Re: Please test: kill `p_priority'

2020-01-19 Thread Martin Pieuchot
On 14/01/20(Tue) 17:16, Martin Pieuchot wrote:
> On 13/01/20(Mon) 21:31, Martin Pieuchot wrote:
> > I'd like hardclock() to be free of SCHED_LOCK(), the diff below brings
> > us one step away from that goal.  Please test with your favorite
> > benchmark and report any regression :o)
> > 
> > The reason for moving the SCHED_LOCK() away from hardclock() is because
> > it will allow us to re-commit the diff moving accounting out of the
> > SCHED_LOCK().  In the end we shrink the SCHED_LOCK() which is generally
> > a good thing(tm).
> > 
> > `p_priority' is not very well named.  It's currently a placeholder for
> > three values:
> > 
> >   - the sleeping priority, corresponding to the value passed to tsleep(9)
> > which will be later used by setrunnable() to place the thread on the
> > appropriate runqueue.
> > 
> >   - the per-CPU runqueue priority which is used to add/remove a thread
> > to/from a runqueue.
> > 
> >   - the scheduling priority, also named `p_usrpri', calculated from the
> > estimated amount of CPU time used.
> > 
> > 
> > The diff below splits the current `p_priority' into two fields `p_runpri'
> > and `p_slppri'.  The important part is the schedclock() chunk:
> > 
> > @@ -519,8 +515,6 @@ schedclock(struct proc *p)
> > SCHED_LOCK(s);
> > newcpu = ESTCPULIM(p->p_estcpu + 1);
> > setpriority(p, newcpu, p->p_p->ps_nice);
> > -   if (p->p_priority >= PUSER)
> > -   p->p_priority = p->p_usrpri;
> > SCHED_UNLOCK(s);
> >   
> > `p_priority' had to be updated because it was overwritten during each
> > tsleep()/setrunnable() cycle.  The same happened in schedcpu():
> > 
> > schedcpu: reseting priority for zerothread(44701) 127 -> 90
> > schedclock: reseting priority for zerothread(44701) 127 -> 91
> > 
> > Getting rid of this assignment means we can then protect `p_estcpu' and
> > `p_usrpri' with a custom rer-thread lock.
> > 
> > With this division, `p_runpri' becomes part of the scheduler fields while
> > `p_slppri' is obviously part of the global sleepqueue.
> > 
> > The rest of the diff is quite straightforward, including the userland
> > compatibility bits.
> > 
> > Tests, comments and oks welcome :o)
> 
> Updated diff that changes getpriority() into a macro allowing libkvm to
> build as reported by anton@.

New diff that initializes `p_runpri' and fix a parenthesis incoherency
in macro, pointed by visa@.

This has been tested by various people without regression so far, so I'd
like to move forward, ok?

Index: arch/sparc64/sparc64/db_interface.c
===
RCS file: /cvs/src/sys/arch/sparc64/sparc64/db_interface.c,v
retrieving revision 1.54
diff -u -p -r1.54 db_interface.c
--- arch/sparc64/sparc64/db_interface.c 7 Nov 2019 14:44:53 -   1.54
+++ arch/sparc64/sparc64/db_interface.c 19 Jan 2020 13:45:36 -
@@ -958,10 +958,10 @@ db_proc_cmd(addr, have_addr, count, modi
return;
}
db_printf("process %p:", p);
-   db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p pri:%d upri:%d\n",
+   db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p rpri:%d upri:%d\n",
p->p_p->ps_pid, p->p_vmspace, p->p_vmspace->vm_map.pmap,
p->p_vmspace->vm_map.pmap->pm_ctx,
-   p->p_wchan, p->p_priority, p->p_usrpri);
+   p->p_wchan, p->p_runpri, p->p_usrpri);
db_printf("maxsaddr:%p ssiz:%dpg or %llxB\n",
p->p_vmspace->vm_maxsaddr, p->p_vmspace->vm_ssize,
(unsigned long long)ptoa(p->p_vmspace->vm_ssize));
Index: dev/pci/drm/i915/intel_breadcrumbs.c
===
RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_breadcrumbs.c,v
retrieving revision 1.4
diff -u -p -r1.4 intel_breadcrumbs.c
--- dev/pci/drm/i915/intel_breadcrumbs.c10 Jul 2019 07:56:30 -  
1.4
+++ dev/pci/drm/i915/intel_breadcrumbs.c19 Jan 2020 13:45:36 -
@@ -451,7 +451,7 @@ static bool __intel_engine_add_wait(stru
 #ifdef __linux__
if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
 #else
-   if (wait->tsk->p_priority > 
to_wait(parent)->tsk->p_priority) {
+   if (wait->tsk->p_usrpri > 
to_wait(parent)->tsk->p_usrpri) {
 #endif
p = >rb_right;
first = false;
@@ -538,7 +538,7 @@ static inline bool chain_wakeup(struct r
 #else
 static inline bool chain_wakeup(struct rb_node *rb, int priority)
 {
-   return rb && to_wait(rb)->tsk->p_priority <= priority;
+   return rb && to_wait(rb)->tsk->p_usrpri <= priority;
 }
 #endif
 
@@ -558,7 +558,7 @@ static inline int wakeup_priority(struct
if (p == b->signaler)
return INT_MIN;
else
-   return p->p_priority;
+   return p->p_usrpri;
 

Re: Please test: kill `p_priority'

2020-01-14 Thread Martin Pieuchot
On 13/01/20(Mon) 21:31, Martin Pieuchot wrote:
> I'd like hardclock() to be free of SCHED_LOCK(), the diff below brings
> us one step away from that goal.  Please test with your favorite
> benchmark and report any regression :o)
> 
> The reason for moving the SCHED_LOCK() away from hardclock() is because
> it will allow us to re-commit the diff moving accounting out of the
> SCHED_LOCK().  In the end we shrink the SCHED_LOCK() which is generally
> a good thing(tm).
> 
> `p_priority' is not very well named.  It's currently a placeholder for
> three values:
> 
>   - the sleeping priority, corresponding to the value passed to tsleep(9)
> which will be later used by setrunnable() to place the thread on the
> appropriate runqueue.
> 
>   - the per-CPU runqueue priority which is used to add/remove a thread
> to/from a runqueue.
> 
>   - the scheduling priority, also named `p_usrpri', calculated from the
> estimated amount of CPU time used.
> 
> 
> The diff below splits the current `p_priority' into two fields `p_runpri'
> and `p_slppri'.  The important part is the schedclock() chunk:
> 
>   @@ -519,8 +515,6 @@ schedclock(struct proc *p)
>   SCHED_LOCK(s);
>   newcpu = ESTCPULIM(p->p_estcpu + 1);
>   setpriority(p, newcpu, p->p_p->ps_nice);
>   -   if (p->p_priority >= PUSER)
>   -   p->p_priority = p->p_usrpri;
>   SCHED_UNLOCK(s);
> 
> `p_priority' had to be updated because it was overwritten during each
> tsleep()/setrunnable() cycle.  The same happened in schedcpu():
> 
>   schedcpu: reseting priority for zerothread(44701) 127 -> 90
>   schedclock: reseting priority for zerothread(44701) 127 -> 91
> 
> Getting rid of this assignment means we can then protect `p_estcpu' and
> `p_usrpri' with a custom rer-thread lock.
> 
> With this division, `p_runpri' becomes part of the scheduler fields while
> `p_slppri' is obviously part of the global sleepqueue.
> 
> The rest of the diff is quite straightforward, including the userland
> compatibility bits.
> 
> Tests, comments and oks welcome :o)

Updated diff that changes getpriority() into a macro allowing libkvm to
build as reported by anton@.

Index: arch/sparc64/sparc64/db_interface.c
===
RCS file: /cvs/src/sys/arch/sparc64/sparc64/db_interface.c,v
retrieving revision 1.54
diff -u -p -r1.54 db_interface.c
--- arch/sparc64/sparc64/db_interface.c 7 Nov 2019 14:44:53 -   1.54
+++ arch/sparc64/sparc64/db_interface.c 14 Jan 2020 16:02:08 -
@@ -958,10 +958,10 @@ db_proc_cmd(addr, have_addr, count, modi
return;
}
db_printf("process %p:", p);
-   db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p pri:%d upri:%d\n",
+   db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p rpri:%d upri:%d\n",
p->p_p->ps_pid, p->p_vmspace, p->p_vmspace->vm_map.pmap,
p->p_vmspace->vm_map.pmap->pm_ctx,
-   p->p_wchan, p->p_priority, p->p_usrpri);
+   p->p_wchan, p->p_runpri, p->p_usrpri);
db_printf("maxsaddr:%p ssiz:%dpg or %llxB\n",
p->p_vmspace->vm_maxsaddr, p->p_vmspace->vm_ssize,
(unsigned long long)ptoa(p->p_vmspace->vm_ssize));
Index: dev/pci/drm/i915/intel_breadcrumbs.c
===
RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_breadcrumbs.c,v
retrieving revision 1.4
diff -u -p -r1.4 intel_breadcrumbs.c
--- dev/pci/drm/i915/intel_breadcrumbs.c10 Jul 2019 07:56:30 -  
1.4
+++ dev/pci/drm/i915/intel_breadcrumbs.c14 Jan 2020 16:02:08 -
@@ -451,7 +451,7 @@ static bool __intel_engine_add_wait(stru
 #ifdef __linux__
if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
 #else
-   if (wait->tsk->p_priority > 
to_wait(parent)->tsk->p_priority) {
+   if (wait->tsk->p_usrpri > 
to_wait(parent)->tsk->p_usrpri) {
 #endif
p = >rb_right;
first = false;
@@ -538,7 +538,7 @@ static inline bool chain_wakeup(struct r
 #else
 static inline bool chain_wakeup(struct rb_node *rb, int priority)
 {
-   return rb && to_wait(rb)->tsk->p_priority <= priority;
+   return rb && to_wait(rb)->tsk->p_usrpri <= priority;
 }
 #endif
 
@@ -558,7 +558,7 @@ static inline int wakeup_priority(struct
if (p == b->signaler)
return INT_MIN;
else
-   return p->p_priority;
+   return p->p_usrpri;
 }
 #endif
 
Index: kern/kern_fork.c
===
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.220
diff -u -p -r1.220 kern_fork.c
--- kern/kern_fork.c6 Jan 2020 10:25:10 -   1.220
+++ kern/kern_fork.c14 Jan 2020 16:02:08 -
@@ -312,7 +312,7 

Please test: kill `p_priority'

2020-01-14 Thread Martin Pieuchot
I'd like hardclock() to be free of SCHED_LOCK(), the diff below brings
us one step away from that goal.  Please test with your favorite
benchmark and report any regression :o)

The reason for moving the SCHED_LOCK() away from hardclock() is because
it will allow us to re-commit the diff moving accounting out of the
SCHED_LOCK().  In the end we shrink the SCHED_LOCK() which is generally
a good thing(tm).

`p_priority' is not very well named.  It's currently a placeholder for
three values:

  - the sleeping priority, corresponding to the value passed to tsleep(9)
which will be later used by setrunnable() to place the thread on the
appropriate runqueue.

  - the per-CPU runqueue priority which is used to add/remove a thread
to/from a runqueue.

  - the scheduling priority, also named `p_usrpri', calculated from the
estimated amount of CPU time used.


The diff below splits the current `p_priority' into two fields `p_runpri'
and `p_slppri'.  The important part is the schedclock() chunk:

@@ -519,8 +515,6 @@ schedclock(struct proc *p)
SCHED_LOCK(s);
newcpu = ESTCPULIM(p->p_estcpu + 1);
setpriority(p, newcpu, p->p_p->ps_nice);
-   if (p->p_priority >= PUSER)
-   p->p_priority = p->p_usrpri;
SCHED_UNLOCK(s);
  
`p_priority' had to be updated because it was overwritten during each
tsleep()/setrunnable() cycle.  The same happened in schedcpu():

schedcpu: reseting priority for zerothread(44701) 127 -> 90
schedclock: reseting priority for zerothread(44701) 127 -> 91

Getting rid of this assignment means we can then protect `p_estcpu' and
`p_usrpri' with a custom rer-thread lock.

With this division, `p_runpri' becomes part of the scheduler fields while
`p_slppri' is obviously part of the global sleepqueue.

The rest of the diff is quite straightforward, including the userland
compatibility bits.

Tests, comments and oks welcome :o)

Index: arch/sparc64/sparc64/db_interface.c
===
RCS file: /cvs/src/sys/arch/sparc64/sparc64/db_interface.c,v
retrieving revision 1.54
diff -u -p -r1.54 db_interface.c
--- arch/sparc64/sparc64/db_interface.c 7 Nov 2019 14:44:53 -   1.54
+++ arch/sparc64/sparc64/db_interface.c 13 Jan 2020 18:04:23 -
@@ -958,10 +958,10 @@ db_proc_cmd(addr, have_addr, count, modi
return;
}
db_printf("process %p:", p);
-   db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p pri:%d upri:%d\n",
+   db_printf("pid:%d vmspace:%p pmap:%p ctx:%x wchan:%p rpri:%d upri:%d\n",
p->p_p->ps_pid, p->p_vmspace, p->p_vmspace->vm_map.pmap,
p->p_vmspace->vm_map.pmap->pm_ctx,
-   p->p_wchan, p->p_priority, p->p_usrpri);
+   p->p_wchan, p->p_runpri, p->p_usrpri);
db_printf("maxsaddr:%p ssiz:%dpg or %llxB\n",
p->p_vmspace->vm_maxsaddr, p->p_vmspace->vm_ssize,
(unsigned long long)ptoa(p->p_vmspace->vm_ssize));
Index: dev/pci/drm/i915/intel_breadcrumbs.c
===
RCS file: /cvs/src/sys/dev/pci/drm/i915/intel_breadcrumbs.c,v
retrieving revision 1.4
diff -u -p -r1.4 intel_breadcrumbs.c
--- dev/pci/drm/i915/intel_breadcrumbs.c10 Jul 2019 07:56:30 -  
1.4
+++ dev/pci/drm/i915/intel_breadcrumbs.c13 Jan 2020 17:40:41 -
@@ -451,7 +451,7 @@ static bool __intel_engine_add_wait(stru
 #ifdef __linux__
if (wait->tsk->prio > to_wait(parent)->tsk->prio) {
 #else
-   if (wait->tsk->p_priority > 
to_wait(parent)->tsk->p_priority) {
+   if (wait->tsk->p_usrpri > 
to_wait(parent)->tsk->p_usrpri) {
 #endif
p = >rb_right;
first = false;
@@ -538,7 +538,7 @@ static inline bool chain_wakeup(struct r
 #else
 static inline bool chain_wakeup(struct rb_node *rb, int priority)
 {
-   return rb && to_wait(rb)->tsk->p_priority <= priority;
+   return rb && to_wait(rb)->tsk->p_usrpri <= priority;
 }
 #endif
 
@@ -558,7 +558,7 @@ static inline int wakeup_priority(struct
if (p == b->signaler)
return INT_MIN;
else
-   return p->p_priority;
+   return p->p_usrpri;
 }
 #endif
 
Index: kern/kern_fork.c
===
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.220
diff -u -p -r1.220 kern_fork.c
--- kern/kern_fork.c6 Jan 2020 10:25:10 -   1.220
+++ kern/kern_fork.c13 Jan 2020 18:04:35 -
@@ -312,7 +312,7 @@ fork_thread_start(struct proc *p, struct
 
SCHED_LOCK(s);
ci = sched_choosecpu_fork(parent, flags);
-   setrunqueue(ci, p, p->p_priority);
+   setrunqueue(ci, p, p->p_usrpri);
SCHED_UNLOCK(s);
 }