Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-11-13 Thread Juri Lelli
On 11/13/2013 03:31 AM, Steven Rostedt wrote:
> On Thu,  7 Nov 2013 14:43:37 +0100
> Juri Lelli  wrote:
> 
>> From: Dario Faggioli 
> 
>> --- /dev/null
>> +++ b/include/linux/sched/deadline.h
>> @@ -0,0 +1,24 @@
>> +#ifndef _SCHED_DEADLINE_H
>> +#define _SCHED_DEADLINE_H
>> +
>> +/*
>> + * SCHED_DEADLINE tasks has negative priorities, reflecting
>> + * the fact that any of them has higher prio than RT and
>> + * NORMAL/BATCH tasks.
>> + */
>> +
>> +#define MAX_DL_PRIO 0
>> +
>> +static inline int dl_prio(int prio)
>> +{
>> +if (unlikely(prio < MAX_DL_PRIO))
>> +return 1;
>> +return 0;
>> +}
>> +
>> +static inline int dl_task(struct task_struct *p)
>> +{
>> +return dl_prio(p->prio);
>> +}
>> +
>> +#endif /* _SCHED_DEADLINE_H */
>> diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
>> index 440434d..a157797 100644
>> --- a/include/linux/sched/rt.h
>> +++ b/include/linux/sched/rt.h
>> @@ -22,7 +22,7 @@
>>  
>>  static inline int rt_prio(int prio)
>>  {
>> -if (unlikely(prio < MAX_RT_PRIO))
>> +if ((unsigned)prio < MAX_RT_PRIO)
> 
> Why remove the "unlikely" here?
>

No reason that I can recall, most probably something went wrong with successive
rebases. G! Fixed.

>>  return 1;
>>  return 0;
>>  }
>> diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
>> index 5a0f945..2d5e49a 100644
>> --- a/include/uapi/linux/sched.h
>> +++ b/include/uapi/linux/sched.h
>> @@ -39,6 +39,7 @@
>>  #define SCHED_BATCH 3
>>  /* SCHED_ISO: reserved but not implemented yet */
>>  #define SCHED_IDLE  5
>> +#define SCHED_DEADLINE  6
>>  /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL 
>> on fork */
>>  #define SCHED_RESET_ON_FORK 0x4000
>>  
>> diff --git a/kernel/fork.c b/kernel/fork.c
>> index 086fe73..55fc95f 100644
>> --- a/kernel/fork.c
>> +++ b/kernel/fork.c
>> @@ -1313,7 +1313,9 @@ static struct task_struct *copy_process(unsigned long 
>> clone_flags,
>>  #endif
>>  
>>  /* Perform scheduler related setup. Assign this task to a CPU. */
>> -sched_fork(p);
>> +retval = sched_fork(p);
>> +if (retval)
>> +goto bad_fork_cleanup_policy;
>>  
>>  retval = perf_event_init_task(p);
>>  if (retval)
>> diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
>> index 383319b..0909436 100644
>> --- a/kernel/hrtimer.c
>> +++ b/kernel/hrtimer.c
>> @@ -46,6 +46,7 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>  #include 
>>  #include 
>>  
>> @@ -1610,7 +1611,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct 
>> timespec __user *rmtp,
>>  unsigned long slack;
>>  
>>  slack = current->timer_slack_ns;
>> -if (rt_task(current))
>> +if (dl_task(current) || rt_task(current))
> 
> Since dl_task() checks if prio is less than 0, and rt_task checks for
> prio < MAX_RT_PRIO, I wonder if we can introduce a
> 
>   dl_or_rt_task(current)
> 
> that does a signed compare against MAX_RT_PRIO to eliminate the double
> compare (in case gcc doesn't figure it out).
> 
> Not something that we need to change now, but something in the future
> maybe.
> 

Ok.

>>  slack = 0;
>>  
>>  hrtimer_init_on_stack(, clockid, mode);
>> diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
>> index 54adcf3..d77282f 100644
>> --- a/kernel/sched/Makefile
>> +++ b/kernel/sched/Makefile
>> @@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
>>  CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
>>  endif
>>  
>> -obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o
>> +obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o deadline.o 
>> stop_task.o
>>  obj-$(CONFIG_SMP) += cpupri.o
>>  obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
>>  obj-$(CONFIG_SCHEDSTATS) += stats.o
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 4fcbf13..cfe15bfc 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -903,7 +903,9 @@ static inline int normal_prio(struct task_struct *p)
>>  {
>>  int prio;
>>  
>> -if (task_has_rt_policy(p))
>> +if (task_has_dl_policy(p))
>> +prio = MAX_DL_PRIO-1;
>> +else if (task_has_rt_policy(p))
>>  prio = MAX_RT_PRIO-1 - p->rt_priority;
>>  else
>>  prio = __normal_prio(p);
>> @@ -1611,6 +1613,12 @@ static void __sched_fork(struct task_struct *p)
>>  memset(>se.statistics, 0, sizeof(p->se.statistics));
>>  #endif
>>  
>> +RB_CLEAR_NODE(>dl.rb_node);
>> +hrtimer_init(>dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
>> +p->dl.dl_runtime = p->dl.runtime = 0;
>> +p->dl.dl_deadline = p->dl.deadline = 0;
>> +p->dl.flags = 0;
>> +
>>  INIT_LIST_HEAD(>rt.run_list);
>>  
>>  #ifdef CONFIG_PREEMPT_NOTIFIERS
>> @@ -1654,7 +1662,7 @@ void set_numabalancing_state(bool enabled)
>>  /*
>>   * fork()/clone()-time setup:
>>   */
>> -void sched_fork(struct task_struct *p)
>> +int 

Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-11-13 Thread Juri Lelli
On 11/13/2013 03:31 AM, Steven Rostedt wrote:
 On Thu,  7 Nov 2013 14:43:37 +0100
 Juri Lelli juri.le...@gmail.com wrote:
 
 From: Dario Faggioli raist...@linux.it
 
 --- /dev/null
 +++ b/include/linux/sched/deadline.h
 @@ -0,0 +1,24 @@
 +#ifndef _SCHED_DEADLINE_H
 +#define _SCHED_DEADLINE_H
 +
 +/*
 + * SCHED_DEADLINE tasks has negative priorities, reflecting
 + * the fact that any of them has higher prio than RT and
 + * NORMAL/BATCH tasks.
 + */
 +
 +#define MAX_DL_PRIO 0
 +
 +static inline int dl_prio(int prio)
 +{
 +if (unlikely(prio  MAX_DL_PRIO))
 +return 1;
 +return 0;
 +}
 +
 +static inline int dl_task(struct task_struct *p)
 +{
 +return dl_prio(p-prio);
 +}
 +
 +#endif /* _SCHED_DEADLINE_H */
 diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
 index 440434d..a157797 100644
 --- a/include/linux/sched/rt.h
 +++ b/include/linux/sched/rt.h
 @@ -22,7 +22,7 @@
  
  static inline int rt_prio(int prio)
  {
 -if (unlikely(prio  MAX_RT_PRIO))
 +if ((unsigned)prio  MAX_RT_PRIO)
 
 Why remove the unlikely here?


No reason that I can recall, most probably something went wrong with successive
rebases. G! Fixed.

  return 1;
  return 0;
  }
 diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
 index 5a0f945..2d5e49a 100644
 --- a/include/uapi/linux/sched.h
 +++ b/include/uapi/linux/sched.h
 @@ -39,6 +39,7 @@
  #define SCHED_BATCH 3
  /* SCHED_ISO: reserved but not implemented yet */
  #define SCHED_IDLE  5
 +#define SCHED_DEADLINE  6
  /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL 
 on fork */
  #define SCHED_RESET_ON_FORK 0x4000
  
 diff --git a/kernel/fork.c b/kernel/fork.c
 index 086fe73..55fc95f 100644
 --- a/kernel/fork.c
 +++ b/kernel/fork.c
 @@ -1313,7 +1313,9 @@ static struct task_struct *copy_process(unsigned long 
 clone_flags,
  #endif
  
  /* Perform scheduler related setup. Assign this task to a CPU. */
 -sched_fork(p);
 +retval = sched_fork(p);
 +if (retval)
 +goto bad_fork_cleanup_policy;
  
  retval = perf_event_init_task(p);
  if (retval)
 diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
 index 383319b..0909436 100644
 --- a/kernel/hrtimer.c
 +++ b/kernel/hrtimer.c
 @@ -46,6 +46,7 @@
  #include linux/sched.h
  #include linux/sched/sysctl.h
  #include linux/sched/rt.h
 +#include linux/sched/deadline.h
  #include linux/timer.h
  #include linux/freezer.h
  
 @@ -1610,7 +1611,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct 
 timespec __user *rmtp,
  unsigned long slack;
  
  slack = current-timer_slack_ns;
 -if (rt_task(current))
 +if (dl_task(current) || rt_task(current))
 
 Since dl_task() checks if prio is less than 0, and rt_task checks for
 prio  MAX_RT_PRIO, I wonder if we can introduce a
 
   dl_or_rt_task(current)
 
 that does a signed compare against MAX_RT_PRIO to eliminate the double
 compare (in case gcc doesn't figure it out).
 
 Not something that we need to change now, but something in the future
 maybe.
 

Ok.

  slack = 0;
  
  hrtimer_init_on_stack(t.timer, clockid, mode);
 diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
 index 54adcf3..d77282f 100644
 --- a/kernel/sched/Makefile
 +++ b/kernel/sched/Makefile
 @@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
  CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
  endif
  
 -obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o
 +obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o deadline.o 
 stop_task.o
  obj-$(CONFIG_SMP) += cpupri.o
  obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
  obj-$(CONFIG_SCHEDSTATS) += stats.o
 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
 index 4fcbf13..cfe15bfc 100644
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
 @@ -903,7 +903,9 @@ static inline int normal_prio(struct task_struct *p)
  {
  int prio;
  
 -if (task_has_rt_policy(p))
 +if (task_has_dl_policy(p))
 +prio = MAX_DL_PRIO-1;
 +else if (task_has_rt_policy(p))
  prio = MAX_RT_PRIO-1 - p-rt_priority;
  else
  prio = __normal_prio(p);
 @@ -1611,6 +1613,12 @@ static void __sched_fork(struct task_struct *p)
  memset(p-se.statistics, 0, sizeof(p-se.statistics));
  #endif
  
 +RB_CLEAR_NODE(p-dl.rb_node);
 +hrtimer_init(p-dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 +p-dl.dl_runtime = p-dl.runtime = 0;
 +p-dl.dl_deadline = p-dl.deadline = 0;
 +p-dl.flags = 0;
 +
  INIT_LIST_HEAD(p-rt.run_list);
  
  #ifdef CONFIG_PREEMPT_NOTIFIERS
 @@ -1654,7 +1662,7 @@ void set_numabalancing_state(bool enabled)
  /*
   * fork()/clone()-time setup:
   */
 -void sched_fork(struct task_struct *p)
 +int sched_fork(struct task_struct *p)
  {
  unsigned long flags;
  int cpu = get_cpu();
 @@ -1676,7 +1684,7 @@ void sched_fork(struct task_struct *p)
   * 

Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-11-12 Thread Steven Rostedt
On Thu,  7 Nov 2013 14:43:37 +0100
Juri Lelli  wrote:

> From: Dario Faggioli 

> --- /dev/null
> +++ b/include/linux/sched/deadline.h
> @@ -0,0 +1,24 @@
> +#ifndef _SCHED_DEADLINE_H
> +#define _SCHED_DEADLINE_H
> +
> +/*
> + * SCHED_DEADLINE tasks has negative priorities, reflecting
> + * the fact that any of them has higher prio than RT and
> + * NORMAL/BATCH tasks.
> + */
> +
> +#define MAX_DL_PRIO  0
> +
> +static inline int dl_prio(int prio)
> +{
> + if (unlikely(prio < MAX_DL_PRIO))
> + return 1;
> + return 0;
> +}
> +
> +static inline int dl_task(struct task_struct *p)
> +{
> + return dl_prio(p->prio);
> +}
> +
> +#endif /* _SCHED_DEADLINE_H */
> diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
> index 440434d..a157797 100644
> --- a/include/linux/sched/rt.h
> +++ b/include/linux/sched/rt.h
> @@ -22,7 +22,7 @@
>  
>  static inline int rt_prio(int prio)
>  {
> - if (unlikely(prio < MAX_RT_PRIO))
> + if ((unsigned)prio < MAX_RT_PRIO)

Why remove the "unlikely" here?

>   return 1;
>   return 0;
>  }
> diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
> index 5a0f945..2d5e49a 100644
> --- a/include/uapi/linux/sched.h
> +++ b/include/uapi/linux/sched.h
> @@ -39,6 +39,7 @@
>  #define SCHED_BATCH  3
>  /* SCHED_ISO: reserved but not implemented yet */
>  #define SCHED_IDLE   5
> +#define SCHED_DEADLINE   6
>  /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL 
> on fork */
>  #define SCHED_RESET_ON_FORK 0x4000
>  
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 086fe73..55fc95f 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -1313,7 +1313,9 @@ static struct task_struct *copy_process(unsigned long 
> clone_flags,
>  #endif
>  
>   /* Perform scheduler related setup. Assign this task to a CPU. */
> - sched_fork(p);
> + retval = sched_fork(p);
> + if (retval)
> + goto bad_fork_cleanup_policy;
>  
>   retval = perf_event_init_task(p);
>   if (retval)
> diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
> index 383319b..0909436 100644
> --- a/kernel/hrtimer.c
> +++ b/kernel/hrtimer.c
> @@ -46,6 +46,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -1610,7 +1611,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct 
> timespec __user *rmtp,
>   unsigned long slack;
>  
>   slack = current->timer_slack_ns;
> - if (rt_task(current))
> + if (dl_task(current) || rt_task(current))

Since dl_task() checks if prio is less than 0, and rt_task checks for
prio < MAX_RT_PRIO, I wonder if we can introduce a

dl_or_rt_task(current)

that does a signed compare against MAX_RT_PRIO to eliminate the double
compare (in case gcc doesn't figure it out).

Not something that we need to change now, but something in the future
maybe.

>   slack = 0;
>  
>   hrtimer_init_on_stack(, clockid, mode);
> diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
> index 54adcf3..d77282f 100644
> --- a/kernel/sched/Makefile
> +++ b/kernel/sched/Makefile
> @@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
>  CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
>  endif
>  
> -obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o
> +obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o deadline.o 
> stop_task.o
>  obj-$(CONFIG_SMP) += cpupri.o
>  obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
>  obj-$(CONFIG_SCHEDSTATS) += stats.o
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 4fcbf13..cfe15bfc 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -903,7 +903,9 @@ static inline int normal_prio(struct task_struct *p)
>  {
>   int prio;
>  
> - if (task_has_rt_policy(p))
> + if (task_has_dl_policy(p))
> + prio = MAX_DL_PRIO-1;
> + else if (task_has_rt_policy(p))
>   prio = MAX_RT_PRIO-1 - p->rt_priority;
>   else
>   prio = __normal_prio(p);
> @@ -1611,6 +1613,12 @@ static void __sched_fork(struct task_struct *p)
>   memset(>se.statistics, 0, sizeof(p->se.statistics));
>  #endif
>  
> + RB_CLEAR_NODE(>dl.rb_node);
> + hrtimer_init(>dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
> + p->dl.dl_runtime = p->dl.runtime = 0;
> + p->dl.dl_deadline = p->dl.deadline = 0;
> + p->dl.flags = 0;
> +
>   INIT_LIST_HEAD(>rt.run_list);
>  
>  #ifdef CONFIG_PREEMPT_NOTIFIERS
> @@ -1654,7 +1662,7 @@ void set_numabalancing_state(bool enabled)
>  /*
>   * fork()/clone()-time setup:
>   */
> -void sched_fork(struct task_struct *p)
> +int sched_fork(struct task_struct *p)
>  {
>   unsigned long flags;
>   int cpu = get_cpu();
> @@ -1676,7 +1684,7 @@ void sched_fork(struct task_struct *p)
>* Revert to default priority/policy on fork if requested.
>*/
>   if (unlikely(p->sched_reset_on_fork)) {
> -  

Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-11-12 Thread Steven Rostedt
On Thu,  7 Nov 2013 14:43:37 +0100
Juri Lelli juri.le...@gmail.com wrote:

 From: Dario Faggioli raist...@linux.it

 --- /dev/null
 +++ b/include/linux/sched/deadline.h
 @@ -0,0 +1,24 @@
 +#ifndef _SCHED_DEADLINE_H
 +#define _SCHED_DEADLINE_H
 +
 +/*
 + * SCHED_DEADLINE tasks has negative priorities, reflecting
 + * the fact that any of them has higher prio than RT and
 + * NORMAL/BATCH tasks.
 + */
 +
 +#define MAX_DL_PRIO  0
 +
 +static inline int dl_prio(int prio)
 +{
 + if (unlikely(prio  MAX_DL_PRIO))
 + return 1;
 + return 0;
 +}
 +
 +static inline int dl_task(struct task_struct *p)
 +{
 + return dl_prio(p-prio);
 +}
 +
 +#endif /* _SCHED_DEADLINE_H */
 diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h
 index 440434d..a157797 100644
 --- a/include/linux/sched/rt.h
 +++ b/include/linux/sched/rt.h
 @@ -22,7 +22,7 @@
  
  static inline int rt_prio(int prio)
  {
 - if (unlikely(prio  MAX_RT_PRIO))
 + if ((unsigned)prio  MAX_RT_PRIO)

Why remove the unlikely here?

   return 1;
   return 0;
  }
 diff --git a/include/uapi/linux/sched.h b/include/uapi/linux/sched.h
 index 5a0f945..2d5e49a 100644
 --- a/include/uapi/linux/sched.h
 +++ b/include/uapi/linux/sched.h
 @@ -39,6 +39,7 @@
  #define SCHED_BATCH  3
  /* SCHED_ISO: reserved but not implemented yet */
  #define SCHED_IDLE   5
 +#define SCHED_DEADLINE   6
  /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL 
 on fork */
  #define SCHED_RESET_ON_FORK 0x4000
  
 diff --git a/kernel/fork.c b/kernel/fork.c
 index 086fe73..55fc95f 100644
 --- a/kernel/fork.c
 +++ b/kernel/fork.c
 @@ -1313,7 +1313,9 @@ static struct task_struct *copy_process(unsigned long 
 clone_flags,
  #endif
  
   /* Perform scheduler related setup. Assign this task to a CPU. */
 - sched_fork(p);
 + retval = sched_fork(p);
 + if (retval)
 + goto bad_fork_cleanup_policy;
  
   retval = perf_event_init_task(p);
   if (retval)
 diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
 index 383319b..0909436 100644
 --- a/kernel/hrtimer.c
 +++ b/kernel/hrtimer.c
 @@ -46,6 +46,7 @@
  #include linux/sched.h
  #include linux/sched/sysctl.h
  #include linux/sched/rt.h
 +#include linux/sched/deadline.h
  #include linux/timer.h
  #include linux/freezer.h
  
 @@ -1610,7 +1611,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct 
 timespec __user *rmtp,
   unsigned long slack;
  
   slack = current-timer_slack_ns;
 - if (rt_task(current))
 + if (dl_task(current) || rt_task(current))

Since dl_task() checks if prio is less than 0, and rt_task checks for
prio  MAX_RT_PRIO, I wonder if we can introduce a

dl_or_rt_task(current)

that does a signed compare against MAX_RT_PRIO to eliminate the double
compare (in case gcc doesn't figure it out).

Not something that we need to change now, but something in the future
maybe.

   slack = 0;
  
   hrtimer_init_on_stack(t.timer, clockid, mode);
 diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
 index 54adcf3..d77282f 100644
 --- a/kernel/sched/Makefile
 +++ b/kernel/sched/Makefile
 @@ -11,7 +11,7 @@ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
  CFLAGS_core.o := $(PROFILING) -fno-omit-frame-pointer
  endif
  
 -obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o stop_task.o
 +obj-y += core.o proc.o clock.o cputime.o idle_task.o fair.o rt.o deadline.o 
 stop_task.o
  obj-$(CONFIG_SMP) += cpupri.o
  obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
  obj-$(CONFIG_SCHEDSTATS) += stats.o
 diff --git a/kernel/sched/core.c b/kernel/sched/core.c
 index 4fcbf13..cfe15bfc 100644
 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
 @@ -903,7 +903,9 @@ static inline int normal_prio(struct task_struct *p)
  {
   int prio;
  
 - if (task_has_rt_policy(p))
 + if (task_has_dl_policy(p))
 + prio = MAX_DL_PRIO-1;
 + else if (task_has_rt_policy(p))
   prio = MAX_RT_PRIO-1 - p-rt_priority;
   else
   prio = __normal_prio(p);
 @@ -1611,6 +1613,12 @@ static void __sched_fork(struct task_struct *p)
   memset(p-se.statistics, 0, sizeof(p-se.statistics));
  #endif
  
 + RB_CLEAR_NODE(p-dl.rb_node);
 + hrtimer_init(p-dl.dl_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 + p-dl.dl_runtime = p-dl.runtime = 0;
 + p-dl.dl_deadline = p-dl.deadline = 0;
 + p-dl.flags = 0;
 +
   INIT_LIST_HEAD(p-rt.run_list);
  
  #ifdef CONFIG_PREEMPT_NOTIFIERS
 @@ -1654,7 +1662,7 @@ void set_numabalancing_state(bool enabled)
  /*
   * fork()/clone()-time setup:
   */
 -void sched_fork(struct task_struct *p)
 +int sched_fork(struct task_struct *p)
  {
   unsigned long flags;
   int cpu = get_cpu();
 @@ -1676,7 +1684,7 @@ void sched_fork(struct task_struct *p)
* Revert to default priority/policy on fork if requested.
*/
   if (unlikely(p-sched_reset_on_fork)) {
 - if 

[PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-11-07 Thread Juri Lelli
From: Dario Faggioli 

Introduces the data structures, constants and symbols needed for
SCHED_DEADLINE implementation.

Core data structure of SCHED_DEADLINE are defined, along with their
initializers. Hooks for checking if a task belong to the new policy
are also added where they are needed.

Adds a scheduling class, in sched/dl.c and a new policy called
SCHED_DEADLINE. It is an implementation of the Earliest Deadline
First (EDF) scheduling algorithm, augmented with a mechanism (called
Constant Bandwidth Server, CBS) that makes it possible to isolate
the behaviour of tasks between each other.

The typical -deadline task will be made up of a computation phase
(instance) which is activated on a periodic or sporadic fashion. The
expected (maximum) duration of such computation is called the task's
runtime; the time interval by which each instance need to be completed
is called the task's relative deadline. The task's absolute deadline
is dynamically calculated as the time instant a task (better, an
instance) activates plus the relative deadline.

The EDF algorithms selects the task with the smallest absolute
deadline as the one to be executed first, while the CBS ensures each
task to run for at most its runtime every (relative) deadline
length time interval, avoiding any interference between different
tasks (bandwidth isolation).
Thanks to this feature, also tasks that do not strictly comply with
the computational model sketched above can effectively use the new
policy.

To summarize, this patch:
 - introduces the data structures, constants and symbols needed;
 - implements the core logic of the scheduling algorithm in the new
   scheduling class file;
 - provides all the glue code between the new scheduling class and
   the core scheduler and refines the interactions between sched/dl
   and the other existing scheduling classes.

Signed-off-by: Dario Faggioli 
Signed-off-by: Michael Trimarchi 
Signed-off-by: Fabio Checconi 
Signed-off-by: Juri Lelli 
---
 arch/arm/include/asm/unistd.h  |2 +-
 include/linux/sched.h  |   46 ++-
 include/linux/sched/deadline.h |   24 ++
 include/linux/sched/rt.h   |2 +-
 include/uapi/linux/sched.h |1 +
 kernel/fork.c  |4 +-
 kernel/hrtimer.c   |3 +-
 kernel/sched/Makefile  |2 +-
 kernel/sched/core.c|  111 ++-
 kernel/sched/deadline.c|  682 
 kernel/sched/sched.h   |   28 ++
 kernel/sched/stop_task.c   |2 +-
 12 files changed, 884 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/sched/deadline.h
 create mode 100644 kernel/sched/deadline.c

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 5f260fd..acabef1 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include 
 
-#define __NR_syscalls  (383)
+#define __NR_syscalls  (384)
 #define __ARM_NR_cmpxchg   (__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f7d633..fdf957c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -92,6 +92,10 @@ struct sched_param {
  * timing constraints.
  *
  * @__unused   padding to allow future expansion without ABI issues
+ *
+ * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
+ * only user of this new interface. More information about the algorithm
+ * available in the scheduling class file or in Documentation/.
  */
 struct sched_param2 {
int sched_priority;
@@ -1054,6 +1058,45 @@ struct sched_rt_entity {
 #endif
 };
 
+struct sched_dl_entity {
+   struct rb_node  rb_node;
+
+   /*
+* Original scheduling parameters. Copied here from sched_param2
+* during sched_setscheduler2(), they will remain the same until
+* the next sched_setscheduler2().
+*/
+   u64 dl_runtime; /* maximum runtime for each instance*/
+   u64 dl_deadline;/* relative deadline of each instance   */
+
+   /*
+* Actual scheduling parameters. Initialized with the values above,
+* they are continously updated during task execution. Note that
+* the remaining runtime could be < 0 in case we are in overrun.
+*/
+   s64 runtime;/* remaining runtime for this instance  */
+   u64 deadline;   /* absolute deadline for this instance  */
+   unsigned int flags; /* specifying the scheduler behaviour   */
+
+   /*
+* Some bool flags:
+*
+* @dl_throttled tells if we exhausted the runtime. If so, the
+* task has to wait for a replenishment to be performed at the
+* next firing of dl_timer.
+*
+* @dl_new tells if a new instance arrived. If so we must
+* start executing it with full runtime and reset its absolute
+* deadline;
+*/
+   int 

[PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-11-07 Thread Juri Lelli
From: Dario Faggioli raist...@linux.it

Introduces the data structures, constants and symbols needed for
SCHED_DEADLINE implementation.

Core data structure of SCHED_DEADLINE are defined, along with their
initializers. Hooks for checking if a task belong to the new policy
are also added where they are needed.

Adds a scheduling class, in sched/dl.c and a new policy called
SCHED_DEADLINE. It is an implementation of the Earliest Deadline
First (EDF) scheduling algorithm, augmented with a mechanism (called
Constant Bandwidth Server, CBS) that makes it possible to isolate
the behaviour of tasks between each other.

The typical -deadline task will be made up of a computation phase
(instance) which is activated on a periodic or sporadic fashion. The
expected (maximum) duration of such computation is called the task's
runtime; the time interval by which each instance need to be completed
is called the task's relative deadline. The task's absolute deadline
is dynamically calculated as the time instant a task (better, an
instance) activates plus the relative deadline.

The EDF algorithms selects the task with the smallest absolute
deadline as the one to be executed first, while the CBS ensures each
task to run for at most its runtime every (relative) deadline
length time interval, avoiding any interference between different
tasks (bandwidth isolation).
Thanks to this feature, also tasks that do not strictly comply with
the computational model sketched above can effectively use the new
policy.

To summarize, this patch:
 - introduces the data structures, constants and symbols needed;
 - implements the core logic of the scheduling algorithm in the new
   scheduling class file;
 - provides all the glue code between the new scheduling class and
   the core scheduler and refines the interactions between sched/dl
   and the other existing scheduling classes.

Signed-off-by: Dario Faggioli raist...@linux.it
Signed-off-by: Michael Trimarchi mich...@amarulasolutions.com
Signed-off-by: Fabio Checconi fchecc...@gmail.com
Signed-off-by: Juri Lelli juri.le...@gmail.com
---
 arch/arm/include/asm/unistd.h  |2 +-
 include/linux/sched.h  |   46 ++-
 include/linux/sched/deadline.h |   24 ++
 include/linux/sched/rt.h   |2 +-
 include/uapi/linux/sched.h |1 +
 kernel/fork.c  |4 +-
 kernel/hrtimer.c   |3 +-
 kernel/sched/Makefile  |2 +-
 kernel/sched/core.c|  111 ++-
 kernel/sched/deadline.c|  682 
 kernel/sched/sched.h   |   28 ++
 kernel/sched/stop_task.c   |2 +-
 12 files changed, 884 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/sched/deadline.h
 create mode 100644 kernel/sched/deadline.c

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 5f260fd..acabef1 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include uapi/asm/unistd.h
 
-#define __NR_syscalls  (383)
+#define __NR_syscalls  (384)
 #define __ARM_NR_cmpxchg   (__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9f7d633..fdf957c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -92,6 +92,10 @@ struct sched_param {
  * timing constraints.
  *
  * @__unused   padding to allow future expansion without ABI issues
+ *
+ * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
+ * only user of this new interface. More information about the algorithm
+ * available in the scheduling class file or in Documentation/.
  */
 struct sched_param2 {
int sched_priority;
@@ -1054,6 +1058,45 @@ struct sched_rt_entity {
 #endif
 };
 
+struct sched_dl_entity {
+   struct rb_node  rb_node;
+
+   /*
+* Original scheduling parameters. Copied here from sched_param2
+* during sched_setscheduler2(), they will remain the same until
+* the next sched_setscheduler2().
+*/
+   u64 dl_runtime; /* maximum runtime for each instance*/
+   u64 dl_deadline;/* relative deadline of each instance   */
+
+   /*
+* Actual scheduling parameters. Initialized with the values above,
+* they are continously updated during task execution. Note that
+* the remaining runtime could be  0 in case we are in overrun.
+*/
+   s64 runtime;/* remaining runtime for this instance  */
+   u64 deadline;   /* absolute deadline for this instance  */
+   unsigned int flags; /* specifying the scheduler behaviour   */
+
+   /*
+* Some bool flags:
+*
+* @dl_throttled tells if we exhausted the runtime. If so, the
+* task has to wait for a replenishment to be performed at the
+* next firing of dl_timer.
+*
+* @dl_new tells if a new instance arrived. If so we must
+   

Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:51 PM, Peter Zijlstra wrote:
> On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
>> +static void set_cpus_allowed_dl(struct task_struct *p,
>> +const struct cpumask *new_mask)
>> +{
>> +int weight = cpumask_weight(new_mask);
>> +
>> +BUG_ON(!dl_task(p));
>> +
>> +cpumask_copy(>cpus_allowed, new_mask);
>> +p->dl.nr_cpus_allowed = weight;
>> +}
> 
> This seems identical so the default in do_set_cpus_allowed(); can we
> leave this function out and use the default?
> 

Another thing that is changed completely in next patch. Anyway, I removed it
from here too.

Thanks,

- Juri
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 07:34 PM, Peter Zijlstra wrote:
> On Mon, Oct 14, 2013 at 06:58:51PM +0200, Juri Lelli wrote:
>> On 10/14/2013 01:44 PM, Peter Zijlstra wrote:
>>> On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
>> We discussed on this point in the past...
> 
> Ah, completely forgot about that; please update the comment that we
> indeed use different clocks for deadline and runtime and that the full
> ramifications need further study; but that deadline needs hard walltime
> and clock_task is more natural for runtime.
> 

Ok, done.

Thanks,

- Juri
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:49 PM, Peter Zijlstra wrote:
> On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
>> +/*
>> + * Yield task semantic for -deadline tasks is:
>> + *
>> + *   get off from the CPU until our next instance, with
>> + *   a new runtime.
>> + */
> 
> Could you amend that comment with a reason for why this is so? I have
> vague recollections of a discussion on this subject but can't recall. It
> seems like a useful thing to have.
> 

I think discussion happened before I started maintaining the patchset, but I'm
quite sure this would be helpful for bandwidth reclaiming mechanisms.
Basically, if I'm able to report that I didn't use all the budget of my current
instance, I could donate that remaining budget to other task instances.
Bandwidth reclaiming is another nice thing to have, and we actually have some
ideas on how to implement it (TODO list always grows :)).

I'll amend the comment saying that this function is of little use now, but will
be helpful in the future.

Thanks,

- Juri
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 06:58:51PM +0200, Juri Lelli wrote:
> On 10/14/2013 01:44 PM, Peter Zijlstra wrote:
> > On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
> We discussed on this point in the past...

Ah, completely forgot about that; please update the comment that we
indeed use different clocks for deadline and runtime and that the full
ramifications need further study; but that deadline needs hard walltime
and clock_task is more natural for runtime.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:44 PM, Peter Zijlstra wrote:
> On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
>> +static void update_curr_dl(struct rq *rq)
>> +{
>> +struct task_struct *curr = rq->curr;
>> +struct sched_dl_entity *dl_se = >dl;
>> +u64 delta_exec;
>> +
>> +if (!dl_task(curr) || !on_dl_rq(dl_se))
>> +return;
>> +
>> +/*
>> + * Consumed budget is computed considering the time as
>> + * observed by schedulable tasks (excluding time spent
>> + * in hardirq context, etc.)
>> + */
>> +delta_exec = rq->clock_task - curr->se.exec_start;
> 
> Oh, cute.. So we compute deadlines from rq->clock but compute runtime
> from rq->clock_task.
> 
> So won't that give funny results in that clock_task is generally slower
> than clock; so people get more 'time'.
> 
> Maybe there's some illumination on this point further on; I'll continue
> reading.
> 

We discussed on this point in the past...

On 04/23/2012 12:31 PM, Peter Zijlstra wrote:> On Fri, 2012-04-06 at 09:14
+0200, Juri Lelli wrote:
>> +   dl_se->deadline = rq->clock + dl_se->dl_deadline;
>
> You might want to use rq->clock_task, this clock excludes times spend in
> hardirq context and steal-time (when paravirt).
>
> Then again, it might not want to use that.. but its something you might
> want to consider and make explicit by means of a comment.
>

On 04/24/2012 08:29 AM, Dario Faggioli wrote:> On Tue, 2012-04-24 at 00:25
+0100, Tommaso Cucinotta wrote:
>>> The idea is that ->clock_task gives the time as observed by schedulable
>>> tasks and excludes other muck.
>>
>> so clock_task might be better to compute the consumed budget at task
>> deschedule, but for setting deadlines one period ahead in the future
>> guess the regular wall-time rq->clock is the one to be used?
>>
> Yep, that was the idea, unless my recollection has completely gone
> flaky! :-P
>
> Perhaps adding a comment saying right this thing above, as Peter
> suggested?

And we kind of agreed on the current use of the different clocks. Do you think
we have to reason (test) more about this? Or do we live with that and see if
something strange happens? (I actually didn't see anything suspiciuos in my
use of the patchset).

Thanks,

- Juri
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 06:16:50PM +0200, Juri Lelli wrote:
> 
> When disassembled everything seems fine, at least for x86 and ARM. Do I add 
> the
> fake data hazard anyway?

nah, lets add it when we find it's needed.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:33 PM, Peter Zijlstra wrote:
> On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
>> +static void replenish_dl_entity(struct sched_dl_entity *dl_se)
>> +{
>> +struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
>> +struct rq *rq = rq_of_dl_rq(dl_rq);
>> +
>> +/*
>> + * We keep moving the deadline away until we get some
>> + * available runtime for the entity. This ensures correct
>> + * handling of situations where the runtime overrun is
>> + * arbitrary large.
>> + */
>> +while (dl_se->runtime <= 0) {
>> +dl_se->deadline += dl_se->dl_deadline;
>> +dl_se->runtime += dl_se->dl_runtime;
>> +}
> 
> Are we sure GCC won't be 'smart' and bite us; that is do we need
> something like:
> 
>   asm("" : "+rm" (dl_se->runtime));
> 
> in there? See:
> 
> 0d98bb2656e9 sched: Prevent compiler from optimising the sched_avg_update() 
> loop
> 

When disassembled everything seems fine, at least for x86 and ARM. Do I add the
fake data hazard anyway?

Thanks,

- Juri
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:24 PM, Peter Zijlstra wrote:
> On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
>> +/*
>> + * We are being explicitly informed that a new instance is starting,
>> + * and this means that:
>> + *  - the absolute deadline of the entity has to be placed at
>> + *current time + relative deadline;
>> + *  - the runtime of the entity has to be set to the maximum value.
>> + *
>> + * The capability of specifying such event is useful whenever a -deadline
>> + * entity wants to (try to!) synchronize its behaviour with the scheduler's
>> + * one, and to (try to!) reconcile itself with its own scheduling
>> + * parameters.
>> + */
>> +static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
>> +{
>> +struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
>> +struct rq *rq = rq_of_dl_rq(dl_rq);
>> +
>> +WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);
>> +
>> +/*
>> + * We use the regular wall clock time to set deadlines in the
>> + * future; in fact, we must consider execution overheads (time
>> + * spent on hardirq context, etc.).
>> + */
>> +dl_se->deadline = rq->clock + dl_se->dl_deadline;
>> +dl_se->runtime = dl_se->dl_runtime;
>> +dl_se->dl_new = 0;
>> +}
> 
> 78becc270975 sched: Use an accessor to read the rq clock
> 
> wants you to use rq_clock(rq) there I think.
> 
> 

Modified here and below.

Thanks,

- Juri
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 03:05:56PM +0200, Juri Lelli wrote:
> Yes, I already considered and used that. But, it is slipped into next patch 
> :\.
> I'll bring the change to this patch.

Ah yes, the wandering hunks problem. I'm only too familiar with it :-(


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:10 PM, Peter Zijlstra wrote:
> On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
>> +struct sched_dl_entity {
>> +struct rb_node  rb_node;
>> +int nr_cpus_allowed;
>> +
> 
> Please see:
> 
> 29baa7478ba4 sched: Move nr_cpus_allowed out of 'struct sched_rt_entity'
> 
> 

Yes, I already considered and used that. But, it is slipped into next patch :\.
I'll bring the change to this patch.

Thanks,

- Juri
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
> +static void set_cpus_allowed_dl(struct task_struct *p,
> + const struct cpumask *new_mask)
> +{
> + int weight = cpumask_weight(new_mask);
> +
> + BUG_ON(!dl_task(p));
> +
> + cpumask_copy(>cpus_allowed, new_mask);
> + p->dl.nr_cpus_allowed = weight;
> +}

This seems identical so the default in do_set_cpus_allowed(); can we
leave this function out and use the default?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
> +/*
> + * Yield task semantic for -deadline tasks is:
> + *
> + *   get off from the CPU until our next instance, with
> + *   a new runtime.
> + */

Could you amend that comment with a reason for why this is so? I have
vague recollections of a discussion on this subject but can't recall. It
seems like a useful thing to have.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
> +static void update_curr_dl(struct rq *rq)
> +{
> + struct task_struct *curr = rq->curr;
> + struct sched_dl_entity *dl_se = >dl;
> + u64 delta_exec;
> +
> + if (!dl_task(curr) || !on_dl_rq(dl_se))
> + return;
> +
> + /*
> +  * Consumed budget is computed considering the time as
> +  * observed by schedulable tasks (excluding time spent
> +  * in hardirq context, etc.)
> +  */
> + delta_exec = rq->clock_task - curr->se.exec_start;

Oh, cute.. So we compute deadlines from rq->clock but compute runtime
from rq->clock_task.

So won't that give funny results in that clock_task is generally slower
than clock; so people get more 'time'.

Maybe there's some illumination on this point further on; I'll continue
reading.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
> +static void replenish_dl_entity(struct sched_dl_entity *dl_se)
> +{
> + struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
> + struct rq *rq = rq_of_dl_rq(dl_rq);
> +
> + /*
> +  * We keep moving the deadline away until we get some
> +  * available runtime for the entity. This ensures correct
> +  * handling of situations where the runtime overrun is
> +  * arbitrary large.
> +  */
> + while (dl_se->runtime <= 0) {
> + dl_se->deadline += dl_se->dl_deadline;
> + dl_se->runtime += dl_se->dl_runtime;
> + }

Are we sure GCC won't be 'smart' and bite us; that is do we need
something like:

  asm("" : "+rm" (dl_se->runtime));

in there? See:

0d98bb2656e9 sched: Prevent compiler from optimising the sched_avg_update() loop

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
> +/*
> + * We are being explicitly informed that a new instance is starting,
> + * and this means that:
> + *  - the absolute deadline of the entity has to be placed at
> + *current time + relative deadline;
> + *  - the runtime of the entity has to be set to the maximum value.
> + *
> + * The capability of specifying such event is useful whenever a -deadline
> + * entity wants to (try to!) synchronize its behaviour with the scheduler's
> + * one, and to (try to!) reconcile itself with its own scheduling
> + * parameters.
> + */
> +static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
> +{
> + struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
> + struct rq *rq = rq_of_dl_rq(dl_rq);
> +
> + WARN_ON(!dl_se->dl_new || dl_se->dl_throttled);
> +
> + /*
> +  * We use the regular wall clock time to set deadlines in the
> +  * future; in fact, we must consider execution overheads (time
> +  * spent on hardirq context, etc.).
> +  */
> + dl_se->deadline = rq->clock + dl_se->dl_deadline;
> + dl_se->runtime = dl_se->dl_runtime;
> + dl_se->dl_new = 0;
> +}

78becc270975 sched: Use an accessor to read the rq clock

wants you to use rq_clock(rq) there I think.


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
> @@ -1693,8 +1701,14 @@ void sched_fork(struct task_struct *p)
>   p->sched_reset_on_fork = 0;
>   }
>  
> - if (!rt_prio(p->prio))
> + if (dl_prio(p->prio)) {
> + put_cpu();
> + return -EAGAIN;

Is this really the error we want to return on fork()?

EAGAIN to me indicates a spurious error and we should try again later;
however as it obvious from the code above; we'll always fail, there's no
point in trying again later.

I would think something like EINVAL; even though there are no arguments
to fork(); would me a better option.

Then again; I really don't care too much; anybody any preferences?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
> +struct sched_dl_entity {
> + struct rb_node  rb_node;
> + int nr_cpus_allowed;
> +

Please see:

29baa7478ba4 sched: Move nr_cpus_allowed out of 'struct sched_rt_entity'


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-10-14 Thread Juri Lelli
From: Dario Faggioli 

Introduces the data structures, constants and symbols needed for
SCHED_DEADLINE implementation.

Core data structure of SCHED_DEADLINE are defined, along with their
initializers. Hooks for checking if a task belong to the new policy
are also added where they are needed.

Adds a scheduling class, in sched/dl.c and a new policy called
SCHED_DEADLINE. It is an implementation of the Earliest Deadline
First (EDF) scheduling algorithm, augmented with a mechanism (called
Constant Bandwidth Server, CBS) that makes it possible to isolate
the behaviour of tasks between each other.

The typical -deadline task will be made up of a computation phase
(instance) which is activated on a periodic or sporadic fashion. The
expected (maximum) duration of such computation is called the task's
runtime; the time interval by which each instance need to be completed
is called the task's relative deadline. The task's absolute deadline
is dynamically calculated as the time instant a task (better, an
instance) activates plus the relative deadline.

The EDF algorithms selects the task with the smallest absolute
deadline as the one to be executed first, while the CBS ensures each
task to run for at most its runtime every (relative) deadline
length time interval, avoiding any interference between different
tasks (bandwidth isolation).
Thanks to this feature, also tasks that do not strictly comply with
the computational model sketched above can effectively use the new
policy.

To summarize, this patch:
 - introduces the data structures, constants and symbols needed;
 - implements the core logic of the scheduling algorithm in the new
   scheduling class file;
 - provides all the glue code between the new scheduling class and
   the core scheduler and refines the interactions between sched/dl
   and the other existing scheduling classes.

Signed-off-by: Dario Faggioli 
Signed-off-by: Michael Trimarchi 
Signed-off-by: Fabio Checconi 
Signed-off-by: Juri Lelli 
---
 arch/arm/include/asm/unistd.h  |2 +-
 include/linux/sched.h  |   47 ++-
 include/linux/sched/deadline.h |   24 ++
 include/linux/sched/rt.h   |2 +-
 include/uapi/linux/sched.h |1 +
 kernel/fork.c  |4 +-
 kernel/hrtimer.c   |3 +-
 kernel/sched/Makefile  |2 +-
 kernel/sched/core.c|  111 ++-
 kernel/sched/deadline.c|  692 
 kernel/sched/sched.h   |   28 ++
 kernel/sched/stop_task.c   |2 +-
 12 files changed, 895 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/sched/deadline.h
 create mode 100644 kernel/sched/deadline.c

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 5f260fd..acabef1 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include 
 
-#define __NR_syscalls  (383)
+#define __NR_syscalls  (384)
 #define __ARM_NR_cmpxchg   (__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6cc7193..7370f3e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -92,6 +92,10 @@ struct sched_param {
  * timing constraints.
  *
  * @__unused   padding to allow future expansion without ABI issues
+ *
+ * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
+ * only user of this new interface. More information about the algorithm
+ * available in the scheduling class file or in Documentation/.
  */
 struct sched_param2 {
int sched_priority;
@@ -1054,6 +1058,46 @@ struct sched_rt_entity {
 #endif
 };
 
+struct sched_dl_entity {
+   struct rb_node  rb_node;
+   int nr_cpus_allowed;
+
+   /*
+* Original scheduling parameters. Copied here from sched_param2
+* during sched_setscheduler2(), they will remain the same until
+* the next sched_setscheduler2().
+*/
+   u64 dl_runtime; /* maximum runtime for each instance*/
+   u64 dl_deadline;/* relative deadline of each instance   */
+
+   /*
+* Actual scheduling parameters. Initialized with the values above,
+* they are continously updated during task execution. Note that
+* the remaining runtime could be < 0 in case we are in overrun.
+*/
+   s64 runtime;/* remaining runtime for this instance  */
+   u64 deadline;   /* absolute deadline for this instance  */
+   unsigned int flags; /* specifying the scheduler behaviour   */
+
+   /*
+* Some bool flags:
+*
+* @dl_throttled tells if we exhausted the runtime. If so, the
+* task has to wait for a replenishment to be performed at the
+* next firing of dl_timer.
+*
+* @dl_new tells if a new instance arrived. If so we must
+* start executing it with full runtime and reset its absolute
+* deadline;

[PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Juri Lelli
From: Dario Faggioli raist...@linux.it

Introduces the data structures, constants and symbols needed for
SCHED_DEADLINE implementation.

Core data structure of SCHED_DEADLINE are defined, along with their
initializers. Hooks for checking if a task belong to the new policy
are also added where they are needed.

Adds a scheduling class, in sched/dl.c and a new policy called
SCHED_DEADLINE. It is an implementation of the Earliest Deadline
First (EDF) scheduling algorithm, augmented with a mechanism (called
Constant Bandwidth Server, CBS) that makes it possible to isolate
the behaviour of tasks between each other.

The typical -deadline task will be made up of a computation phase
(instance) which is activated on a periodic or sporadic fashion. The
expected (maximum) duration of such computation is called the task's
runtime; the time interval by which each instance need to be completed
is called the task's relative deadline. The task's absolute deadline
is dynamically calculated as the time instant a task (better, an
instance) activates plus the relative deadline.

The EDF algorithms selects the task with the smallest absolute
deadline as the one to be executed first, while the CBS ensures each
task to run for at most its runtime every (relative) deadline
length time interval, avoiding any interference between different
tasks (bandwidth isolation).
Thanks to this feature, also tasks that do not strictly comply with
the computational model sketched above can effectively use the new
policy.

To summarize, this patch:
 - introduces the data structures, constants and symbols needed;
 - implements the core logic of the scheduling algorithm in the new
   scheduling class file;
 - provides all the glue code between the new scheduling class and
   the core scheduler and refines the interactions between sched/dl
   and the other existing scheduling classes.

Signed-off-by: Dario Faggioli raist...@linux.it
Signed-off-by: Michael Trimarchi mich...@amarulasolutions.com
Signed-off-by: Fabio Checconi fchecc...@gmail.com
Signed-off-by: Juri Lelli juri.le...@gmail.com
---
 arch/arm/include/asm/unistd.h  |2 +-
 include/linux/sched.h  |   47 ++-
 include/linux/sched/deadline.h |   24 ++
 include/linux/sched/rt.h   |2 +-
 include/uapi/linux/sched.h |1 +
 kernel/fork.c  |4 +-
 kernel/hrtimer.c   |3 +-
 kernel/sched/Makefile  |2 +-
 kernel/sched/core.c|  111 ++-
 kernel/sched/deadline.c|  692 
 kernel/sched/sched.h   |   28 ++
 kernel/sched/stop_task.c   |2 +-
 12 files changed, 895 insertions(+), 23 deletions(-)
 create mode 100644 include/linux/sched/deadline.h
 create mode 100644 kernel/sched/deadline.c

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 5f260fd..acabef1 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include uapi/asm/unistd.h
 
-#define __NR_syscalls  (383)
+#define __NR_syscalls  (384)
 #define __ARM_NR_cmpxchg   (__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6cc7193..7370f3e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -92,6 +92,10 @@ struct sched_param {
  * timing constraints.
  *
  * @__unused   padding to allow future expansion without ABI issues
+ *
+ * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
+ * only user of this new interface. More information about the algorithm
+ * available in the scheduling class file or in Documentation/.
  */
 struct sched_param2 {
int sched_priority;
@@ -1054,6 +1058,46 @@ struct sched_rt_entity {
 #endif
 };
 
+struct sched_dl_entity {
+   struct rb_node  rb_node;
+   int nr_cpus_allowed;
+
+   /*
+* Original scheduling parameters. Copied here from sched_param2
+* during sched_setscheduler2(), they will remain the same until
+* the next sched_setscheduler2().
+*/
+   u64 dl_runtime; /* maximum runtime for each instance*/
+   u64 dl_deadline;/* relative deadline of each instance   */
+
+   /*
+* Actual scheduling parameters. Initialized with the values above,
+* they are continously updated during task execution. Note that
+* the remaining runtime could be  0 in case we are in overrun.
+*/
+   s64 runtime;/* remaining runtime for this instance  */
+   u64 deadline;   /* absolute deadline for this instance  */
+   unsigned int flags; /* specifying the scheduler behaviour   */
+
+   /*
+* Some bool flags:
+*
+* @dl_throttled tells if we exhausted the runtime. If so, the
+* task has to wait for a replenishment to be performed at the
+* next firing of dl_timer.
+*
+* @dl_new tells if a new 

Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +struct sched_dl_entity {
 + struct rb_node  rb_node;
 + int nr_cpus_allowed;
 +

Please see:

29baa7478ba4 sched: Move nr_cpus_allowed out of 'struct sched_rt_entity'


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 @@ -1693,8 +1701,14 @@ void sched_fork(struct task_struct *p)
   p-sched_reset_on_fork = 0;
   }
  
 - if (!rt_prio(p-prio))
 + if (dl_prio(p-prio)) {
 + put_cpu();
 + return -EAGAIN;

Is this really the error we want to return on fork()?

EAGAIN to me indicates a spurious error and we should try again later;
however as it obvious from the code above; we'll always fail, there's no
point in trying again later.

I would think something like EINVAL; even though there are no arguments
to fork(); would me a better option.

Then again; I really don't care too much; anybody any preferences?
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +/*
 + * We are being explicitly informed that a new instance is starting,
 + * and this means that:
 + *  - the absolute deadline of the entity has to be placed at
 + *current time + relative deadline;
 + *  - the runtime of the entity has to be set to the maximum value.
 + *
 + * The capability of specifying such event is useful whenever a -deadline
 + * entity wants to (try to!) synchronize its behaviour with the scheduler's
 + * one, and to (try to!) reconcile itself with its own scheduling
 + * parameters.
 + */
 +static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
 +{
 + struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 + struct rq *rq = rq_of_dl_rq(dl_rq);
 +
 + WARN_ON(!dl_se-dl_new || dl_se-dl_throttled);
 +
 + /*
 +  * We use the regular wall clock time to set deadlines in the
 +  * future; in fact, we must consider execution overheads (time
 +  * spent on hardirq context, etc.).
 +  */
 + dl_se-deadline = rq-clock + dl_se-dl_deadline;
 + dl_se-runtime = dl_se-dl_runtime;
 + dl_se-dl_new = 0;
 +}

78becc270975 sched: Use an accessor to read the rq clock

wants you to use rq_clock(rq) there I think.


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +static void replenish_dl_entity(struct sched_dl_entity *dl_se)
 +{
 + struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 + struct rq *rq = rq_of_dl_rq(dl_rq);
 +
 + /*
 +  * We keep moving the deadline away until we get some
 +  * available runtime for the entity. This ensures correct
 +  * handling of situations where the runtime overrun is
 +  * arbitrary large.
 +  */
 + while (dl_se-runtime = 0) {
 + dl_se-deadline += dl_se-dl_deadline;
 + dl_se-runtime += dl_se-dl_runtime;
 + }

Are we sure GCC won't be 'smart' and bite us; that is do we need
something like:

  asm( : +rm (dl_se-runtime));

in there? See:

0d98bb2656e9 sched: Prevent compiler from optimising the sched_avg_update() loop

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +static void update_curr_dl(struct rq *rq)
 +{
 + struct task_struct *curr = rq-curr;
 + struct sched_dl_entity *dl_se = curr-dl;
 + u64 delta_exec;
 +
 + if (!dl_task(curr) || !on_dl_rq(dl_se))
 + return;
 +
 + /*
 +  * Consumed budget is computed considering the time as
 +  * observed by schedulable tasks (excluding time spent
 +  * in hardirq context, etc.)
 +  */
 + delta_exec = rq-clock_task - curr-se.exec_start;

Oh, cute.. So we compute deadlines from rq-clock but compute runtime
from rq-clock_task.

So won't that give funny results in that clock_task is generally slower
than clock; so people get more 'time'.

Maybe there's some illumination on this point further on; I'll continue
reading.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +/*
 + * Yield task semantic for -deadline tasks is:
 + *
 + *   get off from the CPU until our next instance, with
 + *   a new runtime.
 + */

Could you amend that comment with a reason for why this is so? I have
vague recollections of a discussion on this subject but can't recall. It
seems like a useful thing to have.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +static void set_cpus_allowed_dl(struct task_struct *p,
 + const struct cpumask *new_mask)
 +{
 + int weight = cpumask_weight(new_mask);
 +
 + BUG_ON(!dl_task(p));
 +
 + cpumask_copy(p-cpus_allowed, new_mask);
 + p-dl.nr_cpus_allowed = weight;
 +}

This seems identical so the default in do_set_cpus_allowed(); can we
leave this function out and use the default?
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:10 PM, Peter Zijlstra wrote:
 On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +struct sched_dl_entity {
 +struct rb_node  rb_node;
 +int nr_cpus_allowed;
 +
 
 Please see:
 
 29baa7478ba4 sched: Move nr_cpus_allowed out of 'struct sched_rt_entity'
 
 

Yes, I already considered and used that. But, it is slipped into next patch :\.
I'll bring the change to this patch.

Thanks,

- Juri
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 03:05:56PM +0200, Juri Lelli wrote:
 Yes, I already considered and used that. But, it is slipped into next patch 
 :\.
 I'll bring the change to this patch.

Ah yes, the wandering hunks problem. I'm only too familiar with it :-(


--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:24 PM, Peter Zijlstra wrote:
 On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +/*
 + * We are being explicitly informed that a new instance is starting,
 + * and this means that:
 + *  - the absolute deadline of the entity has to be placed at
 + *current time + relative deadline;
 + *  - the runtime of the entity has to be set to the maximum value.
 + *
 + * The capability of specifying such event is useful whenever a -deadline
 + * entity wants to (try to!) synchronize its behaviour with the scheduler's
 + * one, and to (try to!) reconcile itself with its own scheduling
 + * parameters.
 + */
 +static inline void setup_new_dl_entity(struct sched_dl_entity *dl_se)
 +{
 +struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 +struct rq *rq = rq_of_dl_rq(dl_rq);
 +
 +WARN_ON(!dl_se-dl_new || dl_se-dl_throttled);
 +
 +/*
 + * We use the regular wall clock time to set deadlines in the
 + * future; in fact, we must consider execution overheads (time
 + * spent on hardirq context, etc.).
 + */
 +dl_se-deadline = rq-clock + dl_se-dl_deadline;
 +dl_se-runtime = dl_se-dl_runtime;
 +dl_se-dl_new = 0;
 +}
 
 78becc270975 sched: Use an accessor to read the rq clock
 
 wants you to use rq_clock(rq) there I think.
 
 

Modified here and below.

Thanks,

- Juri
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:33 PM, Peter Zijlstra wrote:
 On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +static void replenish_dl_entity(struct sched_dl_entity *dl_se)
 +{
 +struct dl_rq *dl_rq = dl_rq_of_se(dl_se);
 +struct rq *rq = rq_of_dl_rq(dl_rq);
 +
 +/*
 + * We keep moving the deadline away until we get some
 + * available runtime for the entity. This ensures correct
 + * handling of situations where the runtime overrun is
 + * arbitrary large.
 + */
 +while (dl_se-runtime = 0) {
 +dl_se-deadline += dl_se-dl_deadline;
 +dl_se-runtime += dl_se-dl_runtime;
 +}
 
 Are we sure GCC won't be 'smart' and bite us; that is do we need
 something like:
 
   asm( : +rm (dl_se-runtime));
 
 in there? See:
 
 0d98bb2656e9 sched: Prevent compiler from optimising the sched_avg_update() 
 loop
 

When disassembled everything seems fine, at least for x86 and ARM. Do I add the
fake data hazard anyway?

Thanks,

- Juri
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 06:16:50PM +0200, Juri Lelli wrote:
 
 When disassembled everything seems fine, at least for x86 and ARM. Do I add 
 the
 fake data hazard anyway?

nah, lets add it when we find it's needed.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:44 PM, Peter Zijlstra wrote:
 On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +static void update_curr_dl(struct rq *rq)
 +{
 +struct task_struct *curr = rq-curr;
 +struct sched_dl_entity *dl_se = curr-dl;
 +u64 delta_exec;
 +
 +if (!dl_task(curr) || !on_dl_rq(dl_se))
 +return;
 +
 +/*
 + * Consumed budget is computed considering the time as
 + * observed by schedulable tasks (excluding time spent
 + * in hardirq context, etc.)
 + */
 +delta_exec = rq-clock_task - curr-se.exec_start;
 
 Oh, cute.. So we compute deadlines from rq-clock but compute runtime
 from rq-clock_task.
 
 So won't that give funny results in that clock_task is generally slower
 than clock; so people get more 'time'.
 
 Maybe there's some illumination on this point further on; I'll continue
 reading.
 

We discussed on this point in the past...

On 04/23/2012 12:31 PM, Peter Zijlstra wrote: On Fri, 2012-04-06 at 09:14
+0200, Juri Lelli wrote:
 +   dl_se-deadline = rq-clock + dl_se-dl_deadline;

 You might want to use rq-clock_task, this clock excludes times spend in
 hardirq context and steal-time (when paravirt).

 Then again, it might not want to use that.. but its something you might
 want to consider and make explicit by means of a comment.


On 04/24/2012 08:29 AM, Dario Faggioli wrote: On Tue, 2012-04-24 at 00:25
+0100, Tommaso Cucinotta wrote:
 The idea is that -clock_task gives the time as observed by schedulable
 tasks and excludes other muck.

 so clock_task might be better to compute the consumed budget at task
 deschedule, but for setting deadlines one period ahead in the future
 guess the regular wall-time rq-clock is the one to be used?

 Yep, that was the idea, unless my recollection has completely gone
 flaky! :-P

 Perhaps adding a comment saying right this thing above, as Peter
 suggested?

And we kind of agreed on the current use of the different clocks. Do you think
we have to reason (test) more about this? Or do we live with that and see if
something strange happens? (I actually didn't see anything suspiciuos in my
use of the patchset).

Thanks,

- Juri
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Peter Zijlstra
On Mon, Oct 14, 2013 at 06:58:51PM +0200, Juri Lelli wrote:
 On 10/14/2013 01:44 PM, Peter Zijlstra wrote:
  On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 We discussed on this point in the past...

Ah, completely forgot about that; please update the comment that we
indeed use different clocks for deadline and runtime and that the full
ramifications need further study; but that deadline needs hard walltime
and clock_task is more natural for runtime.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:49 PM, Peter Zijlstra wrote:
 On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +/*
 + * Yield task semantic for -deadline tasks is:
 + *
 + *   get off from the CPU until our next instance, with
 + *   a new runtime.
 + */
 
 Could you amend that comment with a reason for why this is so? I have
 vague recollections of a discussion on this subject but can't recall. It
 seems like a useful thing to have.
 

I think discussion happened before I started maintaining the patchset, but I'm
quite sure this would be helpful for bandwidth reclaiming mechanisms.
Basically, if I'm able to report that I didn't use all the budget of my current
instance, I could donate that remaining budget to other task instances.
Bandwidth reclaiming is another nice thing to have, and we actually have some
ideas on how to implement it (TODO list always grows :)).

I'll amend the comment saying that this function is of little use now, but will
be helpful in the future.

Thanks,

- Juri
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 07:34 PM, Peter Zijlstra wrote:
 On Mon, Oct 14, 2013 at 06:58:51PM +0200, Juri Lelli wrote:
 On 10/14/2013 01:44 PM, Peter Zijlstra wrote:
 On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 We discussed on this point in the past...
 
 Ah, completely forgot about that; please update the comment that we
 indeed use different clocks for deadline and runtime and that the full
 ramifications need further study; but that deadline needs hard walltime
 and clock_task is more natural for runtime.
 

Ok, done.

Thanks,

- Juri
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-10-14 Thread Juri Lelli
On 10/14/2013 01:51 PM, Peter Zijlstra wrote:
 On Mon, Oct 14, 2013 at 12:43:35PM +0200, Juri Lelli wrote:
 +static void set_cpus_allowed_dl(struct task_struct *p,
 +const struct cpumask *new_mask)
 +{
 +int weight = cpumask_weight(new_mask);
 +
 +BUG_ON(!dl_task(p));
 +
 +cpumask_copy(p-cpus_allowed, new_mask);
 +p-dl.nr_cpus_allowed = weight;
 +}
 
 This seems identical so the default in do_set_cpus_allowed(); can we
 leave this function out and use the default?
 

Another thing that is changed completely in next patch. Anyway, I removed it
from here too.

Thanks,

- Juri
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 03/14] sched: SCHED_DEADLINE structures & implementation.

2013-02-11 Thread Juri Lelli
From: Dario Faggioli 

Introduces the data structures, constants and symbols needed for
SCHED_DEADLINE implementation.

Core data structure of SCHED_DEADLINE are defined, along with their
initializers. Hooks for checking if a task belong to the new policy
are also added where they are needed.

Adds a scheduling class, in sched/dl.c and a new policy called
SCHED_DEADLINE. It is an implementation of the Earliest Deadline
First (EDF) scheduling algorithm, augmented with a mechanism (called
Constant Bandwidth Server, CBS) that makes it possible to isolate
the behaviour of tasks between each other.

The typical -deadline task will be made up of a computation phase
(instance) which is activated on a periodic or sporadic fashion. The
expected (maximum) duration of such computation is called the task's
runtime; the time interval by which each instance need to be completed
is called the task's relative deadline. The task's absolute deadline
is dynamically calculated as the time instant a task (better, an
instance) activates plus the relative deadline.

The EDF algorithms selects the task with the smallest absolute
deadline as the one to be executed first, while the CBS ensures each
task to run for at most its runtime every (relative) deadline
length time interval, avoiding any interference between different
tasks (bandwidth isolation).
Thanks to this feature, also tasks that do not strictly comply with
the computational model sketched above can effectively use the new
policy.

To summarize, this patch:
 - introduces the data structures, constants and symbols needed;
 - implements the core logic of the scheduling algorithm in the new
   scheduling class file;
 - provides all the glue code between the new scheduling class and
   the core scheduler and refines the interactions between sched/dl
   and the other existing scheduling classes.

Signed-off-by: Dario Faggioli 
Signed-off-by: Michael Trimarchi 
Signed-off-by: Fabio Checconi 
Signed-off-by: Juri Lelli 
---
 arch/arm/include/asm/unistd.h |2 +-
 include/linux/sched.h |   69 +++-
 include/uapi/linux/sched.h|1 +
 kernel/fork.c |4 +-
 kernel/hrtimer.c  |2 +-
 kernel/sched/Makefile |2 +-
 kernel/sched/core.c   |  111 ++-
 kernel/sched/deadline.c   |  692 +
 kernel/sched/sched.h  |   26 ++
 kernel/sched/stop_task.c  |2 +-
 10 files changed, 888 insertions(+), 23 deletions(-)
 create mode 100644 kernel/sched/deadline.c

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 8e2ebbe..16f45f6 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include 
 
-#define __NR_syscalls  (383)
+#define __NR_syscalls  (384)
 #define __ARM_NR_cmpxchg   (__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d779ecf..22bb2cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -91,6 +91,10 @@ struct sched_param {
  * timing constraints.
  *
  * @__unused   padding to allow future expansion without ABI issues
+ *
+ * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
+ * only user of this new interface. More information about the algorithm
+ * available in the scheduling class file or in Documentation/.
  */
 struct sched_param2 {
int sched_priority;
@@ -1117,6 +1121,7 @@ struct sched_domain;
 #else
 #define ENQUEUE_WAKING 0
 #endif
+#define ENQUEUE_REPLENISH  8
 
 #define DEQUEUE_SLEEP  1
 
@@ -1269,6 +1274,47 @@ struct sched_rt_entity {
 #endif
 };
 
+struct sched_dl_entity {
+   struct rb_node  rb_node;
+   int nr_cpus_allowed;
+
+   /*
+* Original scheduling parameters. Copied here from sched_param2
+* during sched_setscheduler2(), they will remain the same until
+* the next sched_setscheduler2().
+*/
+   u64 dl_runtime; /* maximum runtime for each instance*/
+   u64 dl_deadline;/* relative deadline of each instance   */
+
+   /*
+* Actual scheduling parameters. Initialized with the values above,
+* they are continously updated during task execution. Note that
+* the remaining runtime could be < 0 in case we are in overrun.
+*/
+   s64 runtime;/* remaining runtime for this instance  */
+   u64 deadline;   /* absolute deadline for this instance  */
+   unsigned int flags; /* specifying the scheduler behaviour   */
+
+   /*
+* Some bool flags:
+*
+* @dl_throttled tells if we exhausted the runtime. If so, the
+* task has to wait for a replenishment to be performed at the
+* next firing of dl_timer.
+*
+* @dl_new tells if a new instance arrived. If so we must
+* start executing it with full runtime and reset its absolute

[PATCH 03/14] sched: SCHED_DEADLINE structures implementation.

2013-02-11 Thread Juri Lelli
From: Dario Faggioli raist...@linux.it

Introduces the data structures, constants and symbols needed for
SCHED_DEADLINE implementation.

Core data structure of SCHED_DEADLINE are defined, along with their
initializers. Hooks for checking if a task belong to the new policy
are also added where they are needed.

Adds a scheduling class, in sched/dl.c and a new policy called
SCHED_DEADLINE. It is an implementation of the Earliest Deadline
First (EDF) scheduling algorithm, augmented with a mechanism (called
Constant Bandwidth Server, CBS) that makes it possible to isolate
the behaviour of tasks between each other.

The typical -deadline task will be made up of a computation phase
(instance) which is activated on a periodic or sporadic fashion. The
expected (maximum) duration of such computation is called the task's
runtime; the time interval by which each instance need to be completed
is called the task's relative deadline. The task's absolute deadline
is dynamically calculated as the time instant a task (better, an
instance) activates plus the relative deadline.

The EDF algorithms selects the task with the smallest absolute
deadline as the one to be executed first, while the CBS ensures each
task to run for at most its runtime every (relative) deadline
length time interval, avoiding any interference between different
tasks (bandwidth isolation).
Thanks to this feature, also tasks that do not strictly comply with
the computational model sketched above can effectively use the new
policy.

To summarize, this patch:
 - introduces the data structures, constants and symbols needed;
 - implements the core logic of the scheduling algorithm in the new
   scheduling class file;
 - provides all the glue code between the new scheduling class and
   the core scheduler and refines the interactions between sched/dl
   and the other existing scheduling classes.

Signed-off-by: Dario Faggioli raist...@linux.it
Signed-off-by: Michael Trimarchi mich...@amarulasolutions.com
Signed-off-by: Fabio Checconi fchecc...@gmail.com
Signed-off-by: Juri Lelli juri.le...@gmail.com
---
 arch/arm/include/asm/unistd.h |2 +-
 include/linux/sched.h |   69 +++-
 include/uapi/linux/sched.h|1 +
 kernel/fork.c |4 +-
 kernel/hrtimer.c  |2 +-
 kernel/sched/Makefile |2 +-
 kernel/sched/core.c   |  111 ++-
 kernel/sched/deadline.c   |  692 +
 kernel/sched/sched.h  |   26 ++
 kernel/sched/stop_task.c  |2 +-
 10 files changed, 888 insertions(+), 23 deletions(-)
 create mode 100644 kernel/sched/deadline.c

diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h
index 8e2ebbe..16f45f6 100644
--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include uapi/asm/unistd.h
 
-#define __NR_syscalls  (383)
+#define __NR_syscalls  (384)
 #define __ARM_NR_cmpxchg   (__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d779ecf..22bb2cf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -91,6 +91,10 @@ struct sched_param {
  * timing constraints.
  *
  * @__unused   padding to allow future expansion without ABI issues
+ *
+ * As of now, the SCHED_DEADLINE policy (sched_dl scheduling class) is the
+ * only user of this new interface. More information about the algorithm
+ * available in the scheduling class file or in Documentation/.
  */
 struct sched_param2 {
int sched_priority;
@@ -1117,6 +1121,7 @@ struct sched_domain;
 #else
 #define ENQUEUE_WAKING 0
 #endif
+#define ENQUEUE_REPLENISH  8
 
 #define DEQUEUE_SLEEP  1
 
@@ -1269,6 +1274,47 @@ struct sched_rt_entity {
 #endif
 };
 
+struct sched_dl_entity {
+   struct rb_node  rb_node;
+   int nr_cpus_allowed;
+
+   /*
+* Original scheduling parameters. Copied here from sched_param2
+* during sched_setscheduler2(), they will remain the same until
+* the next sched_setscheduler2().
+*/
+   u64 dl_runtime; /* maximum runtime for each instance*/
+   u64 dl_deadline;/* relative deadline of each instance   */
+
+   /*
+* Actual scheduling parameters. Initialized with the values above,
+* they are continously updated during task execution. Note that
+* the remaining runtime could be  0 in case we are in overrun.
+*/
+   s64 runtime;/* remaining runtime for this instance  */
+   u64 deadline;   /* absolute deadline for this instance  */
+   unsigned int flags; /* specifying the scheduler behaviour   */
+
+   /*
+* Some bool flags:
+*
+* @dl_throttled tells if we exhausted the runtime. If so, the
+* task has to wait for a replenishment to be performed at the
+* next firing of dl_timer.
+*
+* @dl_new