Re: [RFD/RFC PATCH 4/8] sched: Split scheduler execution context

2019-05-06 Thread Claudio Scordino
Dear Juri,

just a small comment for the next round of patches (I guess after
OSPM)...


On 091018, 11:24, Juri Lelli wrote:
> From: Peter Zijlstra 
> 
> Lets define the scheduling context as all the scheduler state in
> task_struct and the execution context as all state required to run the
> task.
> 
> Currently both are intertwined in task_struct. We want to logically
> split these such that we can run the execution context of one task
> with the scheduling context of another.
> 
> To this purpose introduce rq::proxy to point to the task_struct used
> for scheduler state and preserve rq::curr to denote the execution
> context.
> 
> Signed-off-by: Peter Zijlstra (Intel) 
> [added lot of comments/questions - identifiable by XXX]
> Signed-off-by: Juri Lelli 
> ---
>  kernel/sched/core.c  | 62 ++--
>  kernel/sched/fair.c  |  4 +++
>  kernel/sched/sched.h | 30 -
>  3 files changed, 82 insertions(+), 14 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index fe0223121883..d3c481b734dd 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -224,12 +224,13 @@ static enum hrtimer_restart hrtick(struct hrtimer 
> *timer)
>  {
>   struct rq *rq = container_of(timer, struct rq, hrtick_timer);
>   struct rq_flags rf;
> + struct task_struct *curr = rq->proxy;

You may want to use a different naming for these local variables (e.g.
"proxy") to help the reader in not confusing curr (i.e. scheduling ctx)
with rq::curr (i.e. execution ctx).

Best regards,

   Claudio


[RFD/RFC PATCH 4/8] sched: Split scheduler execution context

2018-10-09 Thread Juri Lelli
From: Peter Zijlstra 

Lets define the scheduling context as all the scheduler state in
task_struct and the execution context as all state required to run the
task.

Currently both are intertwined in task_struct. We want to logically
split these such that we can run the execution context of one task
with the scheduling context of another.

To this purpose introduce rq::proxy to point to the task_struct used
for scheduler state and preserve rq::curr to denote the execution
context.

Signed-off-by: Peter Zijlstra (Intel) 
[added lot of comments/questions - identifiable by XXX]
Signed-off-by: Juri Lelli 
---
 kernel/sched/core.c  | 62 ++--
 kernel/sched/fair.c  |  4 +++
 kernel/sched/sched.h | 30 -
 3 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe0223121883..d3c481b734dd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -224,12 +224,13 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
 {
struct rq *rq = container_of(timer, struct rq, hrtick_timer);
struct rq_flags rf;
+   struct task_struct *curr = rq->proxy;
 
WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
 
rq_lock(rq, );
update_rq_clock(rq);
-   rq->curr->sched_class->task_tick(rq, rq->curr, 1);
+   curr->sched_class->task_tick(rq, curr, 1);
rq_unlock(rq, );
 
return HRTIMER_NORESTART;
@@ -836,13 +837,18 @@ static inline void check_class_changed(struct rq *rq, 
struct task_struct *p,
 
 void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 {
+   struct task_struct *curr = rq->proxy;
const struct sched_class *class;
 
-   if (p->sched_class == rq->curr->sched_class) {
-   rq->curr->sched_class->check_preempt_curr(rq, p, flags);
+   if (p->sched_class == curr->sched_class) {
+   /*
+* XXX check_preempt_curr will check rq->curr against p, looks
+* like we want to check rq->proxy against p though?
+*/
+   curr->sched_class->check_preempt_curr(rq, p, flags);
} else {
for_each_class(class) {
-   if (class == rq->curr->sched_class)
+   if (class == curr->sched_class)
break;
if (class == p->sched_class) {
resched_curr(rq);
@@ -855,7 +861,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct 
*p, int flags)
 * A queue event has occurred, and we're going to schedule.  In
 * this case, we can save a useless back to back clock update.
 */
-   if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
+   if (task_on_rq_queued(curr) && test_tsk_need_resched(rq->curr))
rq_clock_skip_update(rq);
 }
 
@@ -1016,7 +1022,11 @@ void do_set_cpus_allowed(struct task_struct *p, const 
struct cpumask *new_mask)
lockdep_assert_held(>pi_lock);
 
queued = task_on_rq_queued(p);
-   running = task_current(rq, p);
+   /*
+* XXX is changing affinity of a proxy a problem?
+* Consider for example put_prev_ set_curr_ below...
+*/
+   running = task_current_proxy(rq, p);
 
if (queued) {
/*
@@ -3021,7 +3031,7 @@ unsigned long long task_sched_runtime(struct task_struct 
*p)
 * project cycles that may never be accounted to this
 * thread, breaking clock_gettime().
 */
-   if (task_current(rq, p) && task_on_rq_queued(p)) {
+   if (task_current_proxy(rq, p) && task_on_rq_queued(p)) {
prefetch_curr_exec_start(p);
update_rq_clock(rq);
p->sched_class->update_curr(rq);
@@ -3040,8 +3050,9 @@ void scheduler_tick(void)
 {
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
-   struct task_struct *curr = rq->curr;
struct rq_flags rf;
+   /* accounting goes to the proxy task */
+   struct task_struct *curr = rq->proxy;
 
sched_clock_tick();
 
@@ -3096,6 +3107,13 @@ static void sched_tick_remote(struct work_struct *work)
if (is_idle_task(curr))
goto out_unlock;
 
+   /*
+* XXX don't we need to account to rq->proxy?
+* Maybe, since this is a remote tick for full dynticks mode, we are
+* always sure that there is no proxy (only a single task is running.
+*/
+   SCHED_WARN_ON(rq->curr != rq->proxy);
+
update_rq_clock(rq);
delta = rq_clock_task(rq) - curr->se.exec_start;
 
@@ -3804,7 +3822,10 @@ void rt_mutex_setprio(struct task_struct *p, struct 
task_struct *pi_task)
 
prev_class = p->sched_class;
queued = task_on_rq_queued(p);
-   running = task_current(rq, p);
+   /*
+* XXX how does (proxy exec) mutexes and RT_mutexes work together?!
+   

[RFD/RFC PATCH 4/8] sched: Split scheduler execution context

2018-10-09 Thread Juri Lelli
From: Peter Zijlstra 

Lets define the scheduling context as all the scheduler state in
task_struct and the execution context as all state required to run the
task.

Currently both are intertwined in task_struct. We want to logically
split these such that we can run the execution context of one task
with the scheduling context of another.

To this purpose introduce rq::proxy to point to the task_struct used
for scheduler state and preserve rq::curr to denote the execution
context.

Signed-off-by: Peter Zijlstra (Intel) 
[added lot of comments/questions - identifiable by XXX]
Signed-off-by: Juri Lelli 
---
 kernel/sched/core.c  | 62 ++--
 kernel/sched/fair.c  |  4 +++
 kernel/sched/sched.h | 30 -
 3 files changed, 82 insertions(+), 14 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe0223121883..d3c481b734dd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -224,12 +224,13 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
 {
struct rq *rq = container_of(timer, struct rq, hrtick_timer);
struct rq_flags rf;
+   struct task_struct *curr = rq->proxy;
 
WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
 
rq_lock(rq, );
update_rq_clock(rq);
-   rq->curr->sched_class->task_tick(rq, rq->curr, 1);
+   curr->sched_class->task_tick(rq, curr, 1);
rq_unlock(rq, );
 
return HRTIMER_NORESTART;
@@ -836,13 +837,18 @@ static inline void check_class_changed(struct rq *rq, 
struct task_struct *p,
 
 void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 {
+   struct task_struct *curr = rq->proxy;
const struct sched_class *class;
 
-   if (p->sched_class == rq->curr->sched_class) {
-   rq->curr->sched_class->check_preempt_curr(rq, p, flags);
+   if (p->sched_class == curr->sched_class) {
+   /*
+* XXX check_preempt_curr will check rq->curr against p, looks
+* like we want to check rq->proxy against p though?
+*/
+   curr->sched_class->check_preempt_curr(rq, p, flags);
} else {
for_each_class(class) {
-   if (class == rq->curr->sched_class)
+   if (class == curr->sched_class)
break;
if (class == p->sched_class) {
resched_curr(rq);
@@ -855,7 +861,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct 
*p, int flags)
 * A queue event has occurred, and we're going to schedule.  In
 * this case, we can save a useless back to back clock update.
 */
-   if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
+   if (task_on_rq_queued(curr) && test_tsk_need_resched(rq->curr))
rq_clock_skip_update(rq);
 }
 
@@ -1016,7 +1022,11 @@ void do_set_cpus_allowed(struct task_struct *p, const 
struct cpumask *new_mask)
lockdep_assert_held(>pi_lock);
 
queued = task_on_rq_queued(p);
-   running = task_current(rq, p);
+   /*
+* XXX is changing affinity of a proxy a problem?
+* Consider for example put_prev_ set_curr_ below...
+*/
+   running = task_current_proxy(rq, p);
 
if (queued) {
/*
@@ -3021,7 +3031,7 @@ unsigned long long task_sched_runtime(struct task_struct 
*p)
 * project cycles that may never be accounted to this
 * thread, breaking clock_gettime().
 */
-   if (task_current(rq, p) && task_on_rq_queued(p)) {
+   if (task_current_proxy(rq, p) && task_on_rq_queued(p)) {
prefetch_curr_exec_start(p);
update_rq_clock(rq);
p->sched_class->update_curr(rq);
@@ -3040,8 +3050,9 @@ void scheduler_tick(void)
 {
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
-   struct task_struct *curr = rq->curr;
struct rq_flags rf;
+   /* accounting goes to the proxy task */
+   struct task_struct *curr = rq->proxy;
 
sched_clock_tick();
 
@@ -3096,6 +3107,13 @@ static void sched_tick_remote(struct work_struct *work)
if (is_idle_task(curr))
goto out_unlock;
 
+   /*
+* XXX don't we need to account to rq->proxy?
+* Maybe, since this is a remote tick for full dynticks mode, we are
+* always sure that there is no proxy (only a single task is running.
+*/
+   SCHED_WARN_ON(rq->curr != rq->proxy);
+
update_rq_clock(rq);
delta = rq_clock_task(rq) - curr->se.exec_start;
 
@@ -3804,7 +3822,10 @@ void rt_mutex_setprio(struct task_struct *p, struct 
task_struct *pi_task)
 
prev_class = p->sched_class;
queued = task_on_rq_queued(p);
-   running = task_current(rq, p);
+   /*
+* XXX how does (proxy exec) mutexes and RT_mutexes work together?!
+