Re: put procs on rb tree

2013-06-06 Thread Alexandre Ratchov
On Wed, Jun 05, 2013 at 02:12:36PM -0400, Ted Unangst wrote:
 
 Conclusion? You'll never notice the difference. Personally I have
 a slight preference for improving worst case behavior.

I've a preference for simpler structures; as long if pid lookup is
not a real problem

-- Alexandre



Re: put procs on rb tree

2013-06-05 Thread Alexandre Ratchov
On Tue, Jun 04, 2013 at 11:33:12PM -0400, Ted Unangst wrote:
 Instead of using a fixed size hash table for procs, use an rb tree.
 
 Makes thread/process lookup even more web scale.
 

any measurement?

-- Alexandre



Re: put procs on rb tree

2013-06-05 Thread Ted Unangst
On Wed, Jun 05, 2013 at 14:13, Alexandre Ratchov wrote:
 On Tue, Jun 04, 2013 at 11:33:12PM -0400, Ted Unangst wrote:
 Instead of using a fixed size hash table for procs, use an rb tree.

 Makes thread/process lookup even more web scale.

 
 any measurement?

o ye of little faith...

stock
54.65
57.29
54.54

rbproc
47.27
47.34
47.16

Benchmark code is below. Now you may well complain that I cooked the
test to give me the result I want. If you remove the pid colliding
code pertaining to 42, the results are different.

stock
37.16
37.52
37.07

rbproc
47.20
46.84
46.51

Perhaps a more realistic test is called for, like building a kernel.

stock
1m24.51s real 4m6.24s user 0m43.34s system
1m24.12s real 4m7.64s user 0m41.98s system

rbproc
1m24.02s real 4m6.90s user 0m43.65s system
1m23.88s real 4m6.07s user 0m41.73s system

rbproc wins by a hair, but I am happy to call it a draw.

Conclusion? You'll never notice the difference. Personally I have a
slight preference for improving worst case behavior. The hash table
isn't *that* large today, but as we increase maxthreads it will get
bigger. I prefer dynamic structures to statically sized structures.

#include errno.h
#include stdio.h
#include stdlib.h
#include unistd.h

static __inline uint64_t
rdtsc(void)
{
uint32_t hi, lo;

__asm __volatile(rdtsc : =d (hi), =a (lo));
return (((uint64_t)hi  32) | (uint64_t) lo);
}

const int loops = 256;
const int maxproc = 50;

int
main()
{
int pidhash = 511;
int nprocs;
int pids[200];
int i, p, pid;
uint64_t before, after;

printf(creating pids\n);
nprocs = 0;
while (nprocs  maxproc) {
pid = fork();
switch (pid) {
case 0:
if ((getpid()  pidhash) != 42)
_exit(0);
while (1)
sleep(1);
break;
case -1:
for (p = 0; p  nprocs; p++)
kill(pids[p], 9);
printf(failed after %d %d\n, nprocs, errno);
exit(1);
default:
if ((pid  pidhash) == 42)
pids[nprocs++] = pid;
else
waitpid(pid, NULL, 0);
}
}
printf(benchmarking...\n);
before = rdtsc();
for (i = 0; i  loops; i++)
for (p = 0; p  nprocs; p++)
kill(pids[p], 0);
after = rdtsc();
for (p = 0; p  nprocs; p++)
kill(pids[p], 9);
printf(%.2f\n, (after - before) / 10.0);
}



Re: put procs on rb tree

2013-06-05 Thread Bob Beck

I'm ok with this. 


On Wed, Jun 05, 2013 at 02:12:36PM -0400, Ted Unangst wrote:
 On Wed, Jun 05, 2013 at 14:13, Alexandre Ratchov wrote:
  On Tue, Jun 04, 2013 at 11:33:12PM -0400, Ted Unangst wrote:
  Instead of using a fixed size hash table for procs, use an rb tree.
 
  Makes thread/process lookup even more web scale.
 
  
  any measurement?
 
 o ye of little faith...
 
 stock
 54.65
 57.29
 54.54
 
 rbproc
 47.27
 47.34
 47.16
 
 Benchmark code is below. Now you may well complain that I cooked the
 test to give me the result I want. If you remove the pid colliding
 code pertaining to 42, the results are different.
 
 stock
 37.16
 37.52
 37.07
 
 rbproc
 47.20
 46.84
 46.51
 
 Perhaps a more realistic test is called for, like building a kernel.
 
 stock
 1m24.51s real 4m6.24s user 0m43.34s system
 1m24.12s real 4m7.64s user 0m41.98s system
 
 rbproc
 1m24.02s real 4m6.90s user 0m43.65s system
 1m23.88s real 4m6.07s user 0m41.73s system
 
 rbproc wins by a hair, but I am happy to call it a draw.
 
 Conclusion? You'll never notice the difference. Personally I have a
 slight preference for improving worst case behavior. The hash table
 isn't *that* large today, but as we increase maxthreads it will get
 bigger. I prefer dynamic structures to statically sized structures.
 
 #include errno.h
 #include stdio.h
 #include stdlib.h
 #include unistd.h
 
 static __inline uint64_t
 rdtsc(void)
 {
   uint32_t hi, lo;
 
   __asm __volatile(rdtsc : =d (hi), =a (lo));
   return (((uint64_t)hi  32) | (uint64_t) lo);
 }
 
 const int loops = 256;
 const int maxproc = 50;
 
 int
 main()
 {
   int pidhash = 511;
   int nprocs;
   int pids[200];
   int i, p, pid;
   uint64_t before, after;
 
   printf(creating pids\n);
   nprocs = 0;
   while (nprocs  maxproc) {
   pid = fork();
   switch (pid) {
   case 0:
   if ((getpid()  pidhash) != 42)
   _exit(0);
   while (1)
   sleep(1);
   break;
   case -1:
   for (p = 0; p  nprocs; p++)
   kill(pids[p], 9);
   printf(failed after %d %d\n, nprocs, errno);
   exit(1);
   default:
   if ((pid  pidhash) == 42)
   pids[nprocs++] = pid;
   else
   waitpid(pid, NULL, 0);
   }
   }
   printf(benchmarking...\n);
   before = rdtsc();
   for (i = 0; i  loops; i++)
   for (p = 0; p  nprocs; p++)
   kill(pids[p], 0);
   after = rdtsc();
   for (p = 0; p  nprocs; p++)
   kill(pids[p], 9);
   printf(%.2f\n, (after - before) / 10.0);
 }
 



Re: put procs on rb tree

2013-06-05 Thread Mark Kettenis
 Date: Wed, 05 Jun 2013 14:12:36 -0400
 From: Ted Unangst t...@tedunangst.com
 
 On Wed, Jun 05, 2013 at 14:13, Alexandre Ratchov wrote:
  On Tue, Jun 04, 2013 at 11:33:12PM -0400, Ted Unangst wrote:
  Instead of using a fixed size hash table for procs, use an rb tree.
 
  Makes thread/process lookup even more web scale.
 
  
  any measurement?
 
 o ye of little faith...
 
 stock
 54.65
 57.29
 54.54
 
 rbproc
 47.27
 47.34
 47.16
 
 Benchmark code is below. Now you may well complain that I cooked the
 test to give me the result I want. If you remove the pid colliding
 code pertaining to 42, the results are different.
 
 stock
 37.16
 37.52
 37.07
 
 rbproc
 47.20
 46.84
 46.51
 
 Perhaps a more realistic test is called for, like building a kernel.
 
 stock
 1m24.51s real 4m6.24s user 0m43.34s system
 1m24.12s real 4m7.64s user 0m41.98s system
 
 rbproc
 1m24.02s real 4m6.90s user 0m43.65s system
 1m23.88s real 4m6.07s user 0m41.73s system
 
 rbproc wins by a hair, but I am happy to call it a draw.
 
 Conclusion? You'll never notice the difference. Personally I have a
 slight preference for improving worst case behavior. The hash table
 isn't *that* large today, but as we increase maxthreads it will get
 bigger. I prefer dynamic structures to statically sized structures.

If it ain't broke, don't fix it?

And I think there are some downsides you may be overlooking:

 * red-black trees tend to add a significant amount of code to the kernel

 * red-black trees are a PITA when debugging; it's difficult to find
   anything in them when you're in ddb/gdb

Cheers,

Mark



put procs on rb tree

2013-06-04 Thread Ted Unangst
Instead of using a fixed size hash table for procs, use an rb tree.

Makes thread/process lookup even more web scale.

Index: kern/init_main.c
===
RCS file: /cvs/src/sys/kern/init_main.c,v
retrieving revision 1.189
diff -u -p -r1.189 init_main.c
--- kern/init_main.c3 Jun 2013 16:55:22 -   1.189
+++ kern/init_main.c5 Jun 2013 02:36:56 -
@@ -274,7 +274,7 @@ main(void *framep)
 
LIST_INSERT_HEAD(allproc, p, p_list);
pr-ps_pgrp = pgrp0;
-   LIST_INSERT_HEAD(PIDHASH(0), p, p_hash);
+   RB_INSERT(proc_rb_procs, proc_rb_root, p);
LIST_INSERT_HEAD(PGRPHASH(0), pgrp0, pg_hash);
LIST_INIT(pgrp0.pg_members);
LIST_INSERT_HEAD(pgrp0.pg_members, pr, ps_pglist);
Index: kern/kern_exit.c
===
RCS file: /cvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.125
diff -u -p -r1.125 kern_exit.c
--- kern/kern_exit.c5 Jun 2013 00:53:26 -   1.125
+++ kern/kern_exit.c5 Jun 2013 02:40:47 -
@@ -254,7 +254,7 @@ exit1(struct proc *p, int rv, int flags)
  * Remove proc from pidhash chain so looking it up won't
  * work.  Move it from allproc to zombproc, but do not yet
  * wake up the reaper.  We will put the proc on the
- * deadproc list later (using the p_hash member), and
+ * deadproc list later (using the p_rbtree member), and
  * wake up the reaper when we do.
  */
/*
@@ -262,7 +262,7 @@ exit1(struct proc *p, int rv, int flags)
 */
p-p_stat = SDEAD;
 
-   LIST_REMOVE(p, p_hash);
+   RB_REMOVE(proc_rb_procs, proc_rb_root, p);
LIST_REMOVE(p, p_list);
LIST_INSERT_HEAD(zombproc, p, p_list);
 
@@ -366,10 +366,10 @@ exit1(struct proc *p, int rv, int flags)
  * critical section of process exit, and thus locking it can't
  * modify interrupt state.  We use a simple spin lock for this
  * proclist.  Processes on this proclist are also on zombproc;
- * we use the p_hash member to linkup to deadproc.
+ * we use the p_rbtree member to linkup to deadproc.
  */
 struct mutex deadproc_mutex = MUTEX_INITIALIZER(IPL_NONE);
-struct proclist deadproc = LIST_HEAD_INITIALIZER(deadproc);
+struct proc_rb_procs deadproc = RB_INITIALIZER(deadproc);
 
 /*
  * We are called from cpu_exit() once it is safe to schedule the
@@ -380,13 +380,13 @@ struct proclist deadproc = LIST_HEAD_INI
  * we should refrain from changing any interrupt state.
  *
  * We lock the deadproc list, place the proc on that list (using
- * the p_hash member), and wake up the reaper.
+ * the p_rbtree member), and wake up the reaper.
  */
 void
 exit2(struct proc *p)
 {
mtx_enter(deadproc_mutex);
-   LIST_INSERT_HEAD(deadproc, p, p_hash);
+   RB_INSERT(proc_rb_procs, deadproc, p);
mtx_leave(deadproc_mutex);
 
wakeup(deadproc);
@@ -408,11 +408,11 @@ reaper(void)
 
for (;;) {
mtx_enter(deadproc_mutex);
-   while ((p = LIST_FIRST(deadproc)) == NULL)
+   while ((p = RB_ROOT(deadproc)) == NULL)
msleep(deadproc, deadproc_mutex, PVM, reaper, 0);
 
/* Remove us from the deadproc list. */
-   LIST_REMOVE(p, p_hash);
+   RB_REMOVE(proc_rb_procs, deadproc, p);
mtx_leave(deadproc_mutex);
 
KERNEL_LOCK();
Index: kern/kern_fork.c
===
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.150
diff -u -p -r1.150 kern_fork.c
--- kern/kern_fork.c5 Jun 2013 00:53:26 -   1.150
+++ kern/kern_fork.c5 Jun 2013 02:24:32 -
@@ -438,7 +438,7 @@ fork1(struct proc *curp, int exitsig, in
p-p_pid = allocpid();
 
LIST_INSERT_HEAD(allproc, p, p_list);
-   LIST_INSERT_HEAD(PIDHASH(p-p_pid), p, p_hash);
+   RB_INSERT(proc_rb_procs, proc_rb_root, p);
if ((flags  FORK_THREAD) == 0) {
LIST_INSERT_AFTER(curpr, pr, ps_pglist);
LIST_INSERT_HEAD(curpr-ps_children, pr, ps_sibling);
Index: kern/kern_proc.c
===
RCS file: /cvs/src/sys/kern/kern_proc.c,v
retrieving revision 1.50
diff -u -p -r1.50 kern_proc.c
--- kern/kern_proc.c17 Feb 2013 17:39:29 -  1.50
+++ kern/kern_proc.c5 Jun 2013 02:54:12 -
@@ -56,8 +56,9 @@ u_long uihash;/* size of hash table - 
 /*
  * Other process lists
  */
-struct pidhashhead *pidhashtbl;
-u_long pidhash;
+intrb_proc_compare(struct proc *, struct proc *);
+RB_GENERATE(proc_rb_procs, proc, p_rbtree, rb_proc_compare);
+struct proc_rb_procs proc_rb_root;
 struct pgrphashhead *pgrphashtbl;
 u_long pgrphash;
 struct proclist allproc;
@@ -84,12 +85,11 @@ procinit(void)
 {
LIST_INIT(allproc);
LIST_INIT(zombproc);
+   RB_INIT(proc_rb_root);
 
-
-