Expand sched_{set,get}attr() to include the policy and nice value.

This obviates the need for sched_setscheduler2().

The new sched_setattr() call now covers the functionality of:

  sched_setscheduler(),
  sched_setparam(),
  setpriority(.which = PRIO_PROCESS)

And sched_getattr() now covers:

  sched_getscheduler(),
  sched_getparam(),
  getpriority(.which = PRIO_PROCESS)

Signed-off-by: Peter Zijlstra <[email protected]>
---
 arch/arm/include/asm/unistd.h      |    2 
 arch/arm/include/uapi/asm/unistd.h |    5 -
 arch/arm/kernel/calls.S            |    3 
 arch/x86/syscalls/syscall_32.tbl   |    1 
 arch/x86/syscalls/syscall_64.tbl   |    1 
 include/linux/sched.h              |   24 +++--
 include/linux/syscalls.h           |    2 
 kernel/sched/core.c                |  173 +++++++++++++++++++------------------
 kernel/sched/sched.h               |   13 +-
 9 files changed, 119 insertions(+), 105 deletions(-)

--- a/arch/arm/include/asm/unistd.h
+++ b/arch/arm/include/asm/unistd.h
@@ -15,7 +15,7 @@
 
 #include <uapi/asm/unistd.h>
 
-#define __NR_syscalls  (383)
+#define __NR_syscalls  (382)
 #define __ARM_NR_cmpxchg               (__ARM_NR_BASE+0x00fff0)
 
 #define __ARCH_WANT_STAT64
--- a/arch/arm/include/uapi/asm/unistd.h
+++ b/arch/arm/include/uapi/asm/unistd.h
@@ -406,9 +406,8 @@
 #define __NR_process_vm_writev         (__NR_SYSCALL_BASE+377)
 #define __NR_kcmp                      (__NR_SYSCALL_BASE+378)
 #define __NR_finit_module              (__NR_SYSCALL_BASE+379)
-#define __NR_sched_setscheduler2       (__NR_SYSCALL_BASE+380)
-#define __NR_sched_setattr             (__NR_SYSCALL_BASE+381)
-#define __NR_sched_getattr             (__NR_SYSCALL_BASE+382)
+#define __NR_sched_setattr             (__NR_SYSCALL_BASE+380)
+#define __NR_sched_getattr             (__NR_SYSCALL_BASE+381)
 
 /*
  * This may need to be greater than __NR_last_syscall+1 in order to
--- a/arch/arm/kernel/calls.S
+++ b/arch/arm/kernel/calls.S
@@ -389,8 +389,7 @@
                CALL(sys_process_vm_writev)
                CALL(sys_kcmp)
                CALL(sys_finit_module)
-/* 380 */      CALL(sys_sched_setscheduler2)
-               CALL(sys_sched_setattr)
+/* 380 */      CALL(sys_sched_setattr)
                CALL(sys_sched_getattr)
 #ifndef syscalls_counted
 .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -359,4 +359,3 @@
 350    i386    finit_module            sys_finit_module
 351    i386    sched_setattr           sys_sched_setattr
 352    i386    sched_getattr           sys_sched_getattr
-353    i386    sched_setscheduler2     sys_sched_setscheduler2
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -322,7 +322,6 @@
 313    common  finit_module            sys_finit_module
 314    common  sched_setattr           sys_sched_setattr
 315    common  sched_getattr           sys_sched_getattr
-316    common  sched_setscheduler2     sys_sched_setscheduler2
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -57,7 +57,7 @@ struct sched_param {
 
 #include <asm/processor.h>
 
-#define SCHED_ATTR_SIZE_VER0   40      /* sizeof first published struct */
+#define SCHED_ATTR_SIZE_VER0   48      /* sizeof first published struct */
 
 /*
  * Extended scheduling parameters data structure.
@@ -85,7 +85,9 @@ struct sched_param {
  *
  * This is reflected by the actual fields of the sched_attr structure:
  *
- *  @sched_priority     task's priority (might still be useful)
+ *  @sched_policy      task's scheduling policy
+ *  @sched_nice                task's nice value      (SCHED_NORMAL/BATCH)
+ *  @sched_priority     task's static priority (SCHED_FIFO/RR)
  *  @sched_flags        for customizing the scheduler behaviour
  *  @sched_deadline     representative of the task's deadline
  *  @sched_runtime      representative of the task's runtime
@@ -102,15 +104,21 @@ struct sched_param {
  * available in the scheduling class file or in Documentation/.
  */
 struct sched_attr {
-       int sched_priority;
-       unsigned int sched_flags;
+       u32 size;
+
+       u32 sched_policy;
+       u64 sched_flags;
+
+       /* SCHED_NORMAL, SCHED_BATCH */
+       s32 sched_nice;
+
+       /* SCHED_FIFO, SCHED_RR */
+       u32 sched_priority;
+
+       /* SCHED_DEADLINE */
        u64 sched_runtime;
        u64 sched_deadline;
        u64 sched_period;
-       u32 size;
-
-       /* Align to u64. */
-       u32 __reserved;
 };
 
 struct exec_domain;
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -278,8 +278,6 @@ asmlinkage long sys_clock_nanosleep(cloc
 asmlinkage long sys_nice(int increment);
 asmlinkage long sys_sched_setscheduler(pid_t pid, int policy,
                                        struct sched_param __user *param);
-asmlinkage long sys_sched_setscheduler2(pid_t pid, int policy,
-                                       struct sched_attr __user *attr);
 asmlinkage long sys_sched_setparam(pid_t pid,
                                        struct sched_param __user *param);
 asmlinkage long sys_sched_setattr(pid_t pid,
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2973,6 +2973,7 @@ void rt_mutex_setprio(struct task_struct
        __task_rq_unlock(rq);
 }
 #endif
+
 void set_user_nice(struct task_struct *p, long nice)
 {
        int old_prio, delta, on_rq;
@@ -3147,24 +3148,6 @@ static struct task_struct *find_process_
        return pid ? find_task_by_vpid(pid) : current;
 }
 
-/* Actually do priority change: must hold rq lock. */
-static void
-__setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
-{
-       p->policy = policy;
-       p->rt_priority = prio;
-       p->normal_prio = normal_prio(p);
-       /* we are holding p->pi_lock already */
-       p->prio = rt_mutex_getprio(p);
-       if (dl_prio(p->prio))
-               p->sched_class = &dl_sched_class;
-       else if (rt_prio(p->prio))
-               p->sched_class = &rt_sched_class;
-       else
-               p->sched_class = &fair_sched_class;
-       set_load_weight(p);
-}
-
 /*
  * This function initializes the sched_dl_entity of a newly becoming
  * SCHED_DEADLINE task.
@@ -3188,6 +3171,34 @@ __setparam_dl(struct task_struct *p, con
        dl_se->dl_new = 1;
 }
 
+/* Actually do priority change: must hold pi & rq lock. */
+static void __setscheduler(struct rq *rq, struct task_struct *p,
+                          const struct sched_attr *attr)
+{
+       int policy = attr->sched_policy;
+
+       p->policy = policy;
+
+       if (fair_policy(policy))
+               p->static_prio = NICE_TO_PRIO(attr->sched_nice);
+       if (rt_policy(policy))
+               p->rt_priority = attr->sched_priority;
+       if (dl_policy(policy))
+               __setparam_dl(p, attr);
+
+       p->normal_prio = normal_prio(p);
+       p->prio = rt_mutex_getprio(p);
+
+       if (dl_prio(p->prio))
+               p->sched_class = &dl_sched_class;
+       else if (rt_prio(p->prio))
+               p->sched_class = &rt_sched_class;
+       else
+               p->sched_class = &fair_sched_class;
+
+       set_load_weight(p);
+}
+
 static void
 __getparam_dl(struct task_struct *p, struct sched_attr *attr)
 {
@@ -3234,11 +3245,12 @@ static bool check_same_owner(struct task
        return match;
 }
 
-static int __sched_setscheduler(struct task_struct *p, int policy,
+static int __sched_setscheduler(struct task_struct *p,
                                const struct sched_attr *attr,
                                bool user)
 {
        int retval, oldprio, oldpolicy = -1, on_rq, running;
+       int policy = attr->sched_policy;
        unsigned long flags;
        const struct sched_class *prev_class;
        struct rq *rq;
@@ -3271,6 +3283,7 @@ static int __sched_setscheduler(struct t
            (p->mm && attr->sched_priority > MAX_USER_RT_PRIO-1) ||
            (!p->mm && attr->sched_priority > MAX_RT_PRIO-1))
                return -EINVAL;
+
        if ((dl_policy(policy) && !__checkparam_dl(attr)) ||
            (rt_policy(policy) != (attr->sched_priority != 0)))
                return -EINVAL;
@@ -3279,6 +3292,11 @@ static int __sched_setscheduler(struct t
         * Allow unprivileged RT tasks to decrease priority:
         */
        if (user && !capable(CAP_SYS_NICE)) {
+               if (fair_policy(policy)) {
+                       if (!can_nice(p, attr->sched_nice))
+                               return -EPERM;
+               }
+
                if (rt_policy(policy)) {
                        unsigned long rlim_rtprio =
                                        task_rlimit(p, RLIMIT_RTPRIO);
@@ -3337,12 +3355,18 @@ static int __sched_setscheduler(struct t
        /*
         * If not changing anything there's no need to proceed further:
         */
-       if (unlikely(policy == p->policy && (!rt_policy(policy) ||
-                       attr->sched_priority == p->rt_priority) &&
-                       !dl_policy(policy))) {
+       if (unlikely(policy == p->policy)) {
+               if (fair_policy(policy) && attr->sched_nice != TASK_NICE(p))
+                       goto change;
+               if (rt_policy(policy) && attr->sched_priority != p->rt_priority)
+                       goto change;
+               if (dl_policy(policy))
+                       goto change;
+
                task_rq_unlock(rq, p, &flags);
                return 0;
        }
+change:
 
        if (user) {
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -3399,8 +3423,7 @@ static int __sched_setscheduler(struct t
         */
        if ((dl_policy(policy) || dl_task(p)) &&
            dl_overflow(p, policy, attr)) {
-               __task_rq_unlock(rq);
-               raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+               task_rq_unlock(rq, p, &flags);
                return -EBUSY;
        }
 
@@ -3415,9 +3438,7 @@ static int __sched_setscheduler(struct t
 
        oldprio = p->prio;
        prev_class = p->sched_class;
-       if (dl_policy(policy))
-               __setparam_dl(p, attr);
-       __setscheduler(rq, p, policy, attr->sched_priority);
+       __setscheduler(rq, p, attr);
 
        if (running)
                p->sched_class->set_curr_task(rq);
@@ -3446,18 +3467,18 @@ int sched_setscheduler(struct task_struc
                       const struct sched_param *param)
 {
        struct sched_attr attr = {
+               .sched_policy   = policy,
                .sched_priority = param->sched_priority
        };
-       return __sched_setscheduler(p, policy, &attr, true);
+       return __sched_setscheduler(p, &attr, true);
 }
 EXPORT_SYMBOL_GPL(sched_setscheduler);
 
-int sched_setscheduler2(struct task_struct *p, int policy,
-                       const struct sched_attr *attr)
+int sched_setattr(struct task_struct *p, const struct sched_attr *attr)
 {
-       return __sched_setscheduler(p, policy, attr, true);
+       return __sched_setscheduler(p, attr, true);
 }
-EXPORT_SYMBOL_GPL(sched_setscheduler2);
+EXPORT_SYMBOL_GPL(sched_setattr);
 
 /**
  * sched_setscheduler_nocheck - change the scheduling policy and/or RT 
priority of a thread from kernelspace.
@@ -3476,9 +3497,10 @@ int sched_setscheduler_nocheck(struct ta
                               const struct sched_param *param)
 {
        struct sched_attr attr = {
+               .sched_policy   = policy,
                .sched_priority = param->sched_priority
        };
-       return __sched_setscheduler(p, policy, &attr, false);
+       return __sched_setscheduler(p, &attr, false);
 }
 
 static int
@@ -3561,6 +3583,12 @@ static int sched_copy_attr(struct sched_
        if (ret)
                return -EFAULT;
 
+       /*
+        * XXX: do we want to be lenient like existing syscalls; or do we want
+        * to be strict and return an error on out-of-bounds values?
+        */
+       attr->sched_nice = clamp(attr->sched_nice, -20, 19);
+
 out:
        return ret;
 
@@ -3570,33 +3598,6 @@ static int sched_copy_attr(struct sched_
        goto out;
 }
 
-static int
-do_sched_setscheduler2(pid_t pid, int policy,
-                      struct sched_attr __user *attr_uptr)
-{
-       struct sched_attr attr;
-       struct task_struct *p;
-       int retval;
-
-       if (!attr_uptr || pid < 0)
-               return -EINVAL;
-
-       if (sched_copy_attr(attr_uptr, &attr))
-               return -EFAULT;
-
-       rcu_read_lock();
-       retval = -ESRCH;
-       p = find_process_by_pid(pid);
-       if (p != NULL) {
-               if (dl_policy(policy))
-                       attr.sched_priority = 0;
-               retval = sched_setscheduler2(p, policy, &attr);
-       }
-       rcu_read_unlock();
-
-       return retval;
-}
-
 /**
  * sys_sched_setscheduler - set/change the scheduler policy and RT priority
  * @pid: the pid in question.
@@ -3616,21 +3617,6 @@ SYSCALL_DEFINE3(sched_setscheduler, pid_
 }
 
 /**
- * sys_sched_setscheduler2 - same as above, but with extended sched_param
- * @pid: the pid in question.
- * @policy: new policy (could use extended sched_param).
- * @attr: structure containg the extended parameters.
- */
-SYSCALL_DEFINE3(sched_setscheduler2, pid_t, pid, int, policy,
-               struct sched_attr __user *, attr)
-{
-       if (policy < 0)
-               return -EINVAL;
-
-       return do_sched_setscheduler2(pid, policy, attr);
-}
-
-/**
  * sys_sched_setparam - set/change the RT priority of a thread
  * @pid: the pid in question.
  * @param: structure containing the new RT priority.
@@ -3647,10 +3633,26 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, p
  * @pid: the pid in question.
  * @attr: structure containing the extended parameters.
  */
-SYSCALL_DEFINE2(sched_setattr, pid_t, pid,
-               struct sched_attr __user *, attr)
+SYSCALL_DEFINE2(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr)
 {
-       return do_sched_setscheduler2(pid, -1, attr);
+       struct sched_attr attr;
+       struct task_struct *p;
+       int retval;
+
+       if (!uattr || pid < 0)
+               return -EINVAL;
+
+       if (sched_copy_attr(uattr, &attr))
+               return -EFAULT;
+
+       rcu_read_lock();
+       retval = -ESRCH;
+       p = find_process_by_pid(pid);
+       if (p != NULL)
+               retval = sched_setattr(p, &attr);
+       rcu_read_unlock();
+
+       return retval;
 }
 
 /**
@@ -3797,8 +3799,14 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pi
        if (retval)
                goto out_unlock;
 
-       __getparam_dl(p, &attr);
-       attr.sched_priority = p->rt_priority;
+       attr.sched_policy = p->policy;
+       if (task_has_dl_policy(p))
+               __getparam_dl(p, &attr);
+       else if (task_has_rt_policy(p))
+               attr.sched_priority = p->rt_priority;
+       else
+               attr.sched_nice = TASK_NICE(p);
+
        rcu_read_unlock();
 
        retval = sched_read_attr(uattr, &attr, size);
@@ -6948,13 +6956,16 @@ EXPORT_SYMBOL(__might_sleep);
 static void normalize_task(struct rq *rq, struct task_struct *p)
 {
        const struct sched_class *prev_class = p->sched_class;
+       struct sched_attr attr = {
+               .sched_policy = SCHED_NORMAL,
+       };
        int old_prio = p->prio;
        int on_rq;
 
        on_rq = p->on_rq;
        if (on_rq)
                dequeue_task(rq, p, 0);
-       __setscheduler(rq, p, SCHED_NORMAL, 0);
+       __setscheduler(rq, p, &attr);
        if (on_rq) {
                enqueue_task(rq, p, 0);
                resched_task(rq->curr);
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -90,18 +90,19 @@ extern void update_cpu_load_active(struc
  */
 #define DL_SCALE (10)
 
+static inline int fair_policy(int policy)
+{
+       return policy == SCHED_NORMAL || policy == SCHED_BATCH;
+}
+
 static inline int rt_policy(int policy)
 {
-       if (policy == SCHED_FIFO || policy == SCHED_RR)
-               return 1;
-       return 0;
+       return policy == SCHED_FIFO || policy == SCHED_RR;
 }
 
 static inline int dl_policy(int policy)
 {
-       if (unlikely(policy == SCHED_DEADLINE))
-               return 1;
-       return 0;
+       return unlikely(policy == SCHED_DEADLINE);
 }
 
 static inline int task_has_rt_policy(struct task_struct *p)


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to