this looks awesome .. :) :)

On Wed, Mar 28, 2012 at 10:58 AM, Eduardo Silva <[email protected]> wrote:

> People, you have to see this patch. I have implemented the "two
> queues" idea proposed here, check the results by your self:
>
>   - Patch:    http://goo.gl/mWven
>   - Profiling:
> http://edsiper.linuxchile.cl/monkey_scheduler_optimization.png
>
> cheers!
>
> On Mon, Mar 26, 2012 at 12:59 PM, Davidlohr Bueso <[email protected]> wrote:
> > On Mon, 2012-03-26 at 09:32 -0600, Eduardo Silva wrote:
> >> On Mon, Mar 26, 2012 at 4:08 AM, Davidlohr Bueso <[email protected]> wrote:
> >> > On Mon, 2012-03-26 at 02:56 -0300, Felipe Astroza Araya wrote:
> >> >> Good aproach. It's like a stack (LIFO) of sched_connections BUT I'd
> prefer a linked list, because it's simpler. You could use just a "free
> list" and not two arrays (stack and queue). When a connection is closed his
> sched_connection is returned to the "free list" (head).
> >> >>
> >> >
> >> > It seems to be this is an overkill and the optimization is too small.
> We
> >> > can always maintain a global variable with the size of current
> capacity
> >> > - since in a threaded context it would require locking which might
> lead
> >> > to contention. Another alternative is to keep a bitmap instead of a
> new
> >> > free_in_queue array. So the bitmap would have a size of work_capacity
> >> > each time a slot is occupied, the corresponding bit is set. Bitmaps
> are
> >> > O(1) as well and the overhead is just 1 bit per setting
> >>
> >> would be possible to create a test case using bitmaps ?
> >
> > The following patch is simply a proof-of-concept, I believe that
> > optimizations here are pretty worthless - optimize where it matters!
> > Times are pretty similar to what Mahesh's patch does (I tested with
> > siege), except the overhead is minimal. This is to be expected, and is
> > why a simple linked lists works nicely with small worker_capacity
> > values.
> >
> > current O(n):
> > Transaction rate:             581.51 trans/sec
> >
> > bitmaps O(1):
> > Transaction rate:             607.66 trans/sec
> >
> > I have not run valgrind on it. Another issue with fast bitmaps is that
> > it is quite architecture dependent, not only dealing with 32 and 64bits,
> > but also using the bsf instruction for x86. This allows searching for a
> > specific bit in a word
> > (http://www.posix.nl/linuxassembly/nasmdochtml/nasmdoca.html).
> >
> >
> > That said, maybe we could eventually find a specific use for bitmaps,
> > who knows.
> >
> > Cheers.
> > Dave
> >
> > From 688947564cda4fad5c5d3da4d4257ebbcd3632e3 Mon Sep 17 00:00:00 2001
> > From: Davidlohr Bueso <[email protected]>
> > Date: Mon, 26 Mar 2012 20:49:42 +0200
> > Subject: [PATCH] proof of concept for client request insertions with
> O(1), using bitmaps.
> >
> > ---
> >  configure                  |    2 +-
> >  src/include/mk_bitmap.h    |   37 ++++++++++++++++++++++++++++++
> >  src/include/mk_scheduler.h |    3 ++
> >  src/mk_bitmap.c            |   23 ++++++++++++++++++
> >  src/mk_scheduler.c         |   54
> +++++++++++++++++++++++++++----------------
> >  5 files changed, 98 insertions(+), 21 deletions(-)
> >  create mode 100644 src/include/mk_bitmap.h
> >  create mode 100644 src/mk_bitmap.c
> >
> > diff --git a/configure b/configure
> > index 18e05b8..c2bfa17 100755
> > --- a/configure
> > +++ b/configure
> > @@ -384,7 +384,7 @@ INCDIR  = ./include
> >  LDFLAGS        = $LDFLAGS
> >  DESTDIR        = ../bin/monkey
> >  LIBS   = -ldl $libs
> > -OBJ    = monkey.o mk_method.o mk_mimetype.o mk_request.o \\
> > +OBJ    = monkey.o mk_bitmap.o mk_method.o mk_mimetype.o mk_request.o \\
> >        mk_header.o mk_config.o mk_signals.o \\
> >        mk_user.o mk_utils.o mk_epoll.o mk_scheduler.o \\
> >        mk_string.o mk_memory.o mk_connection.o mk_iov.o mk_http.o \\
> > diff --git a/src/include/mk_bitmap.h b/src/include/mk_bitmap.h
> > new file mode 100644
> > index 0000000..e51fd7e
> > --- /dev/null
> > +++ b/src/include/mk_bitmap.h
> > @@ -0,0 +1,37 @@
> > +#ifndef MK_BITMAP_H
> > +#define MK_BITMAP_H
> > +
> > +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
> > +#define BITS_PER_BYTE           8
> > +#define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_BYTE *
> sizeof(long))
> > +#define BITS_PER_LONG 64
> > +
> > +#define BIT_MASK(nr)           (1UL << ((nr) % BITS_PER_LONG))
> > +#define BIT_WORD(nr)           ((nr) / BITS_PER_LONG)
> > +
> > +static inline unsigned long ffz(unsigned long word)
> > +{
> > +       asm("bsf %1,%0"
> > +           : "=r" (word)
> > +           : "r" (~word));
> > +       return word;
> > +}
> > +static inline void __set_bit(int nr, volatile unsigned long *addr)
> > +{
> > +       unsigned long mask = BIT_MASK(nr);
> > +       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
> > +
> > +       *p  |= mask;
> > +}
> > +
> > +static inline void __clear_bit(int nr, volatile unsigned long *addr)
> > +{
> > +       unsigned long mask = BIT_MASK(nr);
> > +       unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
> > +
> > +       *p &= ~mask;
> > +}
> > +
> > +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned
> long size);
> > +
> > +#endif
> > diff --git a/src/include/mk_scheduler.h b/src/include/mk_scheduler.h
> > index fd2e442..fe95fd4 100644
> > --- a/src/include/mk_scheduler.h
> > +++ b/src/include/mk_scheduler.h
> > @@ -25,6 +25,7 @@
> >  #include <arpa/inet.h>
> >
> >  #include "mk_list.h"
> > +#include "mk_bitmap.h"
> >
> >  #ifndef MK_SCHEDULER_H
> >  #define MK_SCHEDULER_H
> > @@ -37,6 +38,7 @@ struct sched_connection
> >  {
> >     int status;
> >     int socket;
> > +    unsigned long pos_in_queue;
> >
> >     time_t arrive_time;
> >  };
> > @@ -46,6 +48,7 @@ struct sched_list_node
> >  {
> >     unsigned short int active_connections;
> >     struct sched_connection *queue;
> > +    unsigned long free_in_queue[BITS_TO_LONGS(102)];
> >
> >     short int idx;
> >     pthread_t tid;
> > diff --git a/src/mk_bitmap.c b/src/mk_bitmap.c
> > new file mode 100644
> > index 0000000..be58313
> > --- /dev/null
> > +++ b/src/mk_bitmap.c
> > @@ -0,0 +1,23 @@
> > +#include "mk_bitmap.h"
> > +
> > +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned
> long size)
> > +{
> > +       const unsigned long *p = addr;
> > +       unsigned long result = 0;
> > +       unsigned long tmp;
> > +
> > +       while (size & ~(BITS_PER_LONG-1)) {
> > +               if (~(tmp = *(p++)))
> > +                       goto found;
> > +               result += BITS_PER_LONG;
> > +               size -= BITS_PER_LONG;
> > +       }
> > +       if (!size)
> > +               return result;
> > +
> > +       tmp = (*p) | (~0UL << size);
> > +       if (tmp == ~0UL)        /* Are any bits zero? */
> > +               return result + size;
> > +found:
> > +       return result + ffz(tmp);
> > +}
> > diff --git a/src/mk_scheduler.c b/src/mk_scheduler.c
> > index 11ee748..08a82a7 100644
> > --- a/src/mk_scheduler.c
> > +++ b/src/mk_scheduler.c
> > @@ -112,28 +112,38 @@ int mk_sched_register_client(int remote_fd, struct
> sched_list_node *sched)
> >  {
> >     unsigned int i, ret;
> >
> > -    for (i = 0; i < config->worker_capacity; i++) {
> > -        if (sched->queue[i].status == MK_SCHEDULER_CONN_AVAILABLE) {
> > -            MK_TRACE("[FD %i] Add in slot %i", remote_fd, i);
> > -
> > -            /* Before to continue, we need to run plugin stage 10 */
> > -            ret = mk_plugin_stage_run(MK_PLUGIN_STAGE_10,
> > -                                      remote_fd,
> > -                                      &sched->queue[i], NULL, NULL);
> > -
> > -            /* Close connection, otherwise continue */
> > -            if (ret == MK_PLUGIN_RET_CLOSE_CONX) {
> > -                mk_conn_close(remote_fd);
> > -                return MK_PLUGIN_RET_CLOSE_CONX;
> > -            }
> > +    i = find_first_zero_bit(sched->free_in_queue,
> config->worker_capacity);
> > +    if (i >= config->worker_capacity) {
> > +        return -1;
> > +    }
> >
> > -            /* Socket and status */
> > -            sched->queue[i].socket = remote_fd;
> > -            sched->queue[i].status = MK_SCHEDULER_CONN_PENDING;
> > -            sched->queue[i].arrive_time = log_current_utime;
> > -            return 0;
> > +    if (sched->queue[i].status == MK_SCHEDULER_CONN_AVAILABLE) {
> > +        sched->queue[i].pos_in_queue = i;
> > +        MK_TRACE("[FD %i] Add in slot %i", remote_fd, i);
> > +
> > +        /* Before to continue, we need to run plugin stage 10 */
> > +        ret = mk_plugin_stage_run(MK_PLUGIN_STAGE_10,
> > +                                  remote_fd,
> > +                                  &sched->queue[i], NULL, NULL);
> > +
> > +        /* Close connection, otherwise continue */
> > +        if (ret == MK_PLUGIN_RET_CLOSE_CONX) {
> > +            mk_conn_close(remote_fd);
> > +            return MK_PLUGIN_RET_CLOSE_CONX;
> >         }
> > +
> > +        /* Socket and status */
> > +        sched->queue[i].socket = remote_fd;
> > +        sched->queue[i].status = MK_SCHEDULER_CONN_PENDING;
> > +        sched->queue[i].arrive_time = log_current_utime;
> > +        __set_bit(i, sched->free_in_queue);
> > +        return 0;
> >     }
> > +    else {
> > +        /*This should never happen*/
> > +        mk_err("worker : invalid free position in queue found");
> > +    }
> > +
> >
> >     return -1;
> >  }
> > @@ -229,10 +239,13 @@ int mk_sched_register_thread(int efd)
> >     __sync_bool_compare_and_swap(&sl->epoll_fd, sl->epoll_fd, efd);
> >     sl->queue = mk_mem_malloc_z(sizeof(struct sched_connection) *
> >                                 config->worker_capacity);
> > +
> >     sl->request_handler = NULL;
> >
> >     for (i = 0; i < config->worker_capacity; i++) {
> >         sl->queue[i].status = MK_SCHEDULER_CONN_AVAILABLE;
> > +        __clear_bit(i, sl->free_in_queue);
> > +        sl->queue[i].pos_in_queue = i;
> >     }
> >     return sl->idx;
> >  }
> > @@ -328,6 +341,7 @@ int mk_sched_remove_client(struct sched_list_node
> *sched, int remote_fd)
> >         __sync_fetch_and_sub(&sched->active_connections, 1);
> >         sc->status = MK_SCHEDULER_CONN_AVAILABLE;
> >         sc->socket = -1;
> > +        __clear_bit(sc->pos_in_queue, sched->free_in_queue);
> >         return 0;
> >     }
> >     else {
> > --
> > 1.7.4.1
> >
> >
> >
>
>
>
> --
> Eduardo Silva
> http://edsiper.linuxchile.cl
> http://www.monkey-project.com
> _______________________________________________
> Monkey mailing list
> [email protected]
> http://lists.monkey-project.com/listinfo/monkey
>
_______________________________________________
Monkey mailing list
[email protected]
http://lists.monkey-project.com/listinfo/monkey

Reply via email to