As it turned out, event_loopbreak() does not awaken the thread that
exectutes event_dispatch(), but our code expected that it would.
One easily noticeable effect was that there was a noticeable delay
between the state transition to DB master and listening on sockets.
I knew the mysterious delay existed for a while, but never got around
to investigate. For ncld API, I moved the processing of CLD packets
to its own thread and suddenly everything else froze. Apparently the
existing code only works because of extra packets of CLD protocol.

For a fix, dispose of event_loopbreak and use a loopback pipe.
Also gone is state_tdb_new. That thing was just disgusting.

Notice that we still have one event_loopbreak remaining, because it
works correctly thanks to UNIX signal awakening the polling thread.

Signed-Off-By: Pete Zaitcev <[email protected]>

---
 server/server.c |   90 +++++++++++++++++++++++++++++++++-------------
 server/tabled.h |    4 +-
 2 files changed, 68 insertions(+), 26 deletions(-)

commit fb1aaf280f14e020e6a0110b828f4879b5eaa11e
Author: Master <[email protected]>
Date:   Wed Feb 3 19:48:42 2010 -0700

    Replace event_loopbreak with ev_pipe.

diff --git a/server/server.c b/server/server.c
index e5580c5..b205340 100644
--- a/server/server.c
+++ b/server/server.c
@@ -89,7 +89,6 @@ static error_t parse_opt (int key, char *arg, struct 
argp_state *state);
 static const struct argp argp = { options, parse_opt, NULL, doc };
 
 static bool server_running = true;
-static bool dump_stats;
 static bool use_syslog = true;
 int debugging = 0;
 
@@ -99,6 +98,12 @@ struct server tabled_srv = {
 
 struct tabledb tdb;
 
+enum {
+       TT_CMD_DUMP,
+       TT_CMD_TDBST_MASTER,
+       TT_CMD_TDBST_SLAVE
+};
+
 struct compiled_pat patterns[] = {
        [pat_auth] =
        { "^AWS (\\w+):(\\S+)", 0, },
@@ -361,8 +366,8 @@ static void term_signal(int signo)
 
 static void stats_signal(int signo)
 {
-       dump_stats = true;
-       event_loopbreak();
+       static const unsigned char cmd = TT_CMD_DUMP;
+       write(tabled_srv.ev_pipe[1], &cmd, 1);
 }
 
 #define X(stat) \
@@ -1353,6 +1358,7 @@ static void tdb_checkpoint(int fd, short events, void 
*userdata)
 
 static void tdb_state_cb(enum db_event event)
 {
+       unsigned char cmd;
 
        switch (event) {
        case TDB_EV_ELECTED:
@@ -1369,25 +1375,20 @@ static void tdb_state_cb(enum db_event event)
                 * This callback runs on the context of the replication
                 * manager thread, and calling any of our functions thus
                 * turns our program into a multi-threaded one. Instead
-                * we do a loopbreak and postpone the processing.
+                * we signal them main thread to do the processing.
                 */
                if (tabled_srv.state_tdb != ST_TDB_INIT &&
                    tabled_srv.state_tdb != ST_TDB_OPEN) {
                        if (event == TDB_EV_MASTER)
-                               tabled_srv.state_tdb_new = ST_TDB_MASTER;
+                               cmd = TT_CMD_TDBST_MASTER;
                        else
-                               tabled_srv.state_tdb_new = ST_TDB_SLAVE;
-                       if (debugging) {
-                               applog(LOG_DEBUG, "TDB state > %s",
-                                      
state_name_tdb[tabled_srv.state_tdb_new]);
-                       }
-                       event_loopbreak();
+                               cmd = TT_CMD_TDBST_SLAVE;
+                       write(tabled_srv.ev_pipe[1], &cmd, 1);
                }
                break;
        default:
                applog(LOG_WARNING, "API confusion with TDB, event 0x%x", 
event);
                tabled_srv.state_tdb = ST_TDB_OPEN;  /* wrong, stub for now */
-               tabled_srv.state_tdb_new = ST_TDB_INIT;
        }
 }
 
@@ -1727,6 +1728,8 @@ static void tdb_state_process(enum st_tdb new_state)
 {
        unsigned int db_flags;
 
+       if (debugging)
+               applog(LOG_DEBUG, "TDB state > %s", state_name_tdb[new_state]);
        if ((new_state == ST_TDB_MASTER || new_state == ST_TDB_SLAVE) &&
            tabled_srv.state_tdb == ST_TDB_ACTIVE) {
 
@@ -1744,6 +1747,38 @@ static void tdb_state_process(enum st_tdb new_state)
        }
 }
 
+static void internal_event(int fd, short events, void *userdata)
+{
+       unsigned char cmd;
+       ssize_t rrc;
+
+       rrc = read(tabled_srv.ev_pipe[0], &cmd, 1);
+       if (rrc < 0) {
+               applog(LOG_WARNING, "pipe read error: %s", strerror(errno));
+               abort();
+       }
+       if (rrc < 1) {
+               applog(LOG_WARNING, "pipe short read");
+               abort();
+       }
+
+       if (cmd == TT_CMD_DUMP) {
+               stats_dump();
+       } else if (cmd == TT_CMD_TDBST_MASTER) {
+               if (tabled_srv.state_tdb != ST_TDB_MASTER) {
+                       tdb_state_process(ST_TDB_MASTER);
+                       tabled_srv.state_tdb = ST_TDB_MASTER;
+               }
+       } else if (cmd == TT_CMD_TDBST_SLAVE) {
+               if (tabled_srv.state_tdb != ST_TDB_SLAVE) {
+                       tdb_state_process(ST_TDB_SLAVE);
+                       tabled_srv.state_tdb = ST_TDB_SLAVE;
+               }
+       } else {
+               applog(LOG_WARNING, "internal error: command 0x%x", cmd);
+       }
+}
+
 int main (int argc, char *argv[])
 {
        error_t aprc;
@@ -1820,9 +1855,22 @@ int main (int argc, char *argv[])
        signal(SIGTERM, term_signal);
        signal(SIGUSR1, stats_signal);
 
+       /*
+        * Prepare the libevent paraphernalia
+        */
        tabled_srv.evbase_main = event_init();
        event_base_rep = event_base_new();
        evtimer_set(&tabled_srv.chkpt_timer, tdb_checkpoint, NULL);
+       if (pipe(tabled_srv.ev_pipe) < 0) {
+               applogerr("pipe");
+               goto err_evpipe;
+       }
+       event_set(&tabled_srv.pevt, tabled_srv.ev_pipe[0], EV_READ | EV_PERSIST,
+                 internal_event, NULL);
+       if (event_add(&tabled_srv.pevt, NULL) < 0) {
+               applog(LOG_WARNING, "pevt event_add");
+               goto err_pevt;
+       }
 
        /* set up server networking */
        rc = net_open();
@@ -1839,21 +1887,9 @@ int main (int argc, char *argv[])
        applog(LOG_INFO, "initialized (%s)",
           (tabled_srv.flags & SFL_FOREGROUND)? "fg": "bg");
 
-       while (server_running) {
+       while (server_running)
                event_dispatch();
 
-               if (dump_stats) {
-                       dump_stats = false;
-                       stats_dump();
-               }
-
-               if (tabled_srv.state_tdb_new != ST_TDB_INIT &&
-                   tabled_srv.state_tdb_new != tabled_srv.state_tdb) {
-                       tdb_state_process(tabled_srv.state_tdb_new);
-                       tabled_srv.state_tdb = tabled_srv.state_tdb_new;
-               }
-       }
-
        applog(LOG_INFO, "shutting down");
 
        rc = 0;
@@ -1871,6 +1907,10 @@ err_out_net:
                tdb_fini(&tdb);
        }
 /* err_tdb_init: */
+err_pevt:
+       close(tabled_srv.ev_pipe[0]);
+       close(tabled_srv.ev_pipe[1]);
+err_evpipe:
        unlink(tabled_srv.pid_file);
        close(tabled_srv.pid_fd);
 err_out:
diff --git a/server/tabled.h b/server/tabled.h
index a2ffd8b..91cccae 100644
--- a/server/tabled.h
+++ b/server/tabled.h
@@ -226,6 +226,8 @@ struct server {
        int                     pid_fd;         /* fd of pid_file */
        GMutex                  *bigmutex;
        struct event_base       *evbase_main;
+       int                     ev_pipe[2];
+       struct event            pevt;
 
        char                    *config;        /* config file (static) */
 
@@ -248,7 +250,7 @@ struct server {
        int                     num_stor;       /* number of storage_node's  */
        uint64_t                object_count;
 
-       enum st_tdb             state_tdb, state_tdb_new;
+       enum st_tdb             state_tdb;
        enum st_net             state_net;
 
        struct event            chkpt_timer;    /* db4 checkpoint timer */
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to