Hi Jeff, Do you still plan to replace bdb (and it's replication) with a something based on paxos? I'm considering replacing the Ceph monitors (which currently implement paxos, but in a very ceph-specific way) with cld if it can meet the basic requirements.
What I'd kind of like to see is a clean implementation of a paxos library--one that leaves out message transport and storage--to build a replicated write-ahead log. And then a separate library for handling the database/namespace served up by cld (be it regular files, bdb, whatever) that leaves replication up to paxos. It looks like Google ended up doing something similar with Chubby (see http://labs.google.com/papers/paxos_made_live.html). Does this sound like the direction you guys are heading in? sage On Fri, 31 Jul 2009, Jeff Garzik wrote: > > Below is the current CLD replication patch, which takes CLD from being > a single-node service to a fully replicated, highly available service. > > The server implementation should be complete. > > The current merge blocker is needed code in libcldc, which does not > yet properly "hunt" for a master, among a group of peer CLD replicas > in a CLD cell. > > This will be a big milestone for CLD, when merged. The next milestone > will be adding the needed strict-cache-coherence caching semantics > to CLD server and client lib. > > This patch was generated against git commit > 511b8dafb233ee85e60ddf7eda212f87963e150c. > > --- > server/cld.h | 20 +++ > server/cldb.c | 69 +++++++++++-- > server/cldb.h | 9 + > server/cldbadm.c | 8 - > server/server.c | 286 > +++++++++++++++++++++++++++++++++++++++++++++++++++--- > test/pid-exists | 13 +- > test/prep-db | 19 ++- > test/start-daemon | 26 ++++ > test/stop-daemon | 32 ++++-- > 9 files changed, 428 insertions(+), 54 deletions(-) > > diff --git a/server/cld.h b/server/cld.h > index 21f103d..08e6b12 100644 > --- a/server/cld.h > +++ b/server/cld.h > @@ -91,6 +91,15 @@ struct msg_params { > size_t msg_len; > }; > > +enum st_cldb { > + ST_CLDB_INIT, > + ST_CLDB_OPEN, > + ST_CLDB_ACTIVE, > + ST_CLDB_MASTER, > + ST_CLDB_SLAVE, > + ST_CLDBNUM > +}; > + > struct server_stats { > unsigned long poll; /* number polls */ > unsigned long event; /* events dispatched */ > @@ -114,6 +123,17 @@ struct server { > int pid_fd; > > char *port; /* bind port */ > + unsigned short rep_port; /* db4 replication port */ > + > + char *myhost; > + char *force_myhost; > + GList *rep_remotes; > + > + unsigned int n_peers; /* total peers in cell */ > + > + int rep_pipe[2]; > + > + enum st_cldb state_cldb, state_cldb_new; > > struct cldb cldb; /* database info */ > > diff --git a/server/cldb.c b/server/cldb.c > index 3e7c95c..254decd 100644 > --- a/server/cldb.c > +++ b/server/cldb.c > @@ -25,8 +25,6 @@ > #include <glib.h> > #include "cld.h" > > -static int cldb_up(struct cldb *cldb, unsigned int flags); > - > /* > * db4 page sizes for our various databases. Filesystem block size > * is recommended, so 4096 was chosen (default ext3 block size). > @@ -202,6 +200,30 @@ err_out: > return -EIO; > } > > +static int add_remote_sites(DB_ENV *dbenv, GList *remotes, int *nsites) > +{ > + int rc; > + struct db_remote *rp; > + GList *tmp; > + > + *nsites = 0; > + for (tmp = remotes; tmp; tmp = tmp->next) { > + rp = tmp->data; > + > + rc = dbenv->repmgr_add_remote_site(dbenv, rp->host, rp->port, > + NULL, 0); > + if (rc) { > + dbenv->err(dbenv, rc, > + "dbenv->add.remote.site host %s port %u", > + rp->host, rp->port); > + return rc; > + } > + (*nsites)++; > + } > + > + return 0; > +} > + > static void db4_event(DB_ENV *dbenv, u_int32_t event, void *event_info) > { > struct cldb *cldb = dbenv->app_private; > @@ -229,12 +251,13 @@ static void db4_event(DB_ENV *dbenv, u_int32_t event, > void *event_info) > > int cldb_init(struct cldb *cldb, const char *db_home, const char > *db_password, > unsigned int env_flags, const char *errpfx, bool do_syslog, > - unsigned int flags, void (*cb)(enum db_event)) > + GList *remotes, char *rep_host, unsigned short rep_port, > + int n_peers, void (*cb)(enum db_event)) > { > - int rc; > + int rc, nsites = 0; > DB_ENV *dbenv; > > - cldb->is_master = true; > + cldb->is_master = false; > cldb->home = db_home; > cldb->state_cb = cb; > > @@ -281,25 +304,55 @@ int cldb_init(struct cldb *cldb, const char *db_home, > const char *db_password, > cldb->keyed = true; > } > > + rc = dbenv->repmgr_set_local_site(dbenv, rep_host, rep_port, 0); > + if (rc) { > + dbenv->err(dbenv, rc, "dbenv->set_local_site"); > + goto err_out; > + } > + > rc = dbenv->set_event_notify(dbenv, db4_event); > if (rc) { > dbenv->err(dbenv, rc, "dbenv->set_event_notify"); > goto err_out; > } > > + rc = dbenv->rep_set_priority(dbenv, 100); > + if (rc) { > + dbenv->err(dbenv, rc, "dbenv->rep_set_priority"); > + goto err_out; > + } > + > + rc = dbenv->rep_set_nsites(dbenv, n_peers); > + if (rc) { > + dbenv->err(dbenv, rc, "dbenv->rep_set_nsites"); > + goto err_out; > + } > + > + rc = dbenv->repmgr_set_ack_policy(dbenv, DB_REPMGR_ACKS_QUORUM); > + if (rc) { > + dbenv->err(dbenv, rc, "dbenv->rep_ack_policy"); > + goto err_out; > + } > + > /* init DB transactional environment, stored in directory db_home */ > env_flags |= DB_INIT_LOG | DB_INIT_LOCK | DB_INIT_MPOOL; > - env_flags |= DB_INIT_TXN; > + env_flags |= DB_INIT_TXN | DB_INIT_REP; > rc = dbenv->open(dbenv, db_home, env_flags, S_IRUSR | S_IWUSR); > if (rc) { > dbenv->err(dbenv, rc, "dbenv->open"); > goto err_out; > } > > - rc = cldb_up(cldb, flags); > + rc = add_remote_sites(dbenv, remotes, &nsites); > if (rc) > goto err_out; > > + rc = dbenv->repmgr_start(dbenv, 2, DB_REP_ELECTION); > + if (rc) { > + dbenv->err(dbenv, rc, "dbenv->repmgr_start"); > + goto err_out; > + } > + > return 0; > > err_out: > @@ -310,7 +363,7 @@ err_out: > /* > * open databases > */ > -static int cldb_up(struct cldb *cldb, unsigned int flags) > +int cldb_up(struct cldb *cldb, unsigned int flags) > { > DB_ENV *dbenv = cldb->env; > int rc; > diff --git a/server/cldb.h b/server/cldb.h > index d28f732..f8f26db 100644 > --- a/server/cldb.h > +++ b/server/cldb.h > @@ -107,6 +107,11 @@ enum db_event { > CLDB_EV_NONE, CLDB_EV_CLIENT, CLDB_EV_MASTER, CLDB_EV_ELECTED > }; > > +struct db_remote { /* remotes for cldb_init */ > + char *host; > + unsigned short port; > +}; > + > struct cldb { > bool is_master; > bool keyed; /* using encryption? */ > @@ -133,7 +138,9 @@ struct cldb { > > extern int cldb_init(struct cldb *cldb, const char *db_home, const char > *db_password, > unsigned int env_flags, const char *errpfx, bool do_syslog, > - unsigned int flags, void (*cb)(enum db_event)); > + GList *remotes, char *rep_host, unsigned short rep_port, > + int n_peers, void (*cb)(enum db_event)); > +extern int cldb_up(struct cldb *cldb, unsigned int flags); > extern void cldb_down(struct cldb *cldb); > extern void cldb_fini(struct cldb *cldb); > > diff --git a/server/cldbadm.c b/server/cldbadm.c > index 37e8e36..9342f66 100644 > --- a/server/cldbadm.c > +++ b/server/cldbadm.c > @@ -78,7 +78,8 @@ int main(int argc, char *argv[]) > } > > if (cldb_init(&cld_adm.cldb, cld_adm.data_dir, NULL, > - DB_RECOVER, "cldbadm", false, 0, NULL)) > + DB_RECOVER, "cldbadm", false, > + NULL, NULL, 0, 0, NULL)) > goto err_dbopen; > > switch (cld_adm.mode) { > @@ -142,8 +143,9 @@ static error_t parse_opt(int key, char *arg, struct > argp_state *state) > * Stubs for contents of cldb.c > */ > int cldb_init(struct cldb *cldb, const char *db_home, const char > *db_password, > - unsigned int env_flags, const char *errpfx, bool do_syslog, > - unsigned int flags, void (*cb)(enum db_event)) > + unsigned int env_flags, const char *errpfx, bool do_syslog, > + GList *remotes, char *rep_host, unsigned short rep_port, > + int n_peers, void (*cb)(enum db_event)) > { > > return 0; > diff --git a/server/server.c b/server/server.c > index 02e6231..fb51a42 100644 > --- a/server/server.c > +++ b/server/server.c > @@ -29,6 +29,7 @@ > #include <errno.h> > #include <syslog.h> > #include <locale.h> > +#include <ctype.h> > #include <argp.h> > #include <netdb.h> > #include <signal.h> > @@ -46,6 +47,12 @@ const char *argp_program_version = PACKAGE_VERSION; > > enum { > CLD_RAW_MSG_SZ = 4096, > + > + CLD_DEF_REP_PORT = 9081, > + > + CLD_DEF_PEERS = 5, > + CLD_MIN_PEERS = 3, > + CLD_MAX_PEERS = 400, /* arbitrary "sanity" limit */ > }; > > static struct argp_option options[] = { > @@ -58,10 +65,18 @@ static struct argp_option options[] = { > "Switch the log to standard error" }, > { "foreground", 'F', NULL, 0, > "Run in foreground, do not fork" }, > + { "myhost", 'm', "HOST", 0, > + "Force local hostname to HOST (def: autodetect)" }, > { "port", 'p', "PORT", 0, > "bind to UDP port PORT. Default: " CLD_DEF_PORT }, > { "pid", 'P', "FILE", 0, > "Write daemon process id to FILE. Default: " CLD_DEF_PIDFN }, > + { "rep-port", 'r', "PORT", 0, > + "bind replication engine to port PORT (def: 9081)" }, > + { "remote", 'R', "HOST:PORT", 0, > + "Add a HOST:PORT pair to list of remote hosts. Use this argument > multiple times to build cell's peer list." }, > + { "cell-size", 'S', "PEERS", 0, > + "Total number of PEERS in cell. (PEERS/2)+1 required for quorum. > Must be an odd number (def: 5)" }, > { } > }; > > @@ -79,10 +94,15 @@ static bool use_syslog = true; > int debugging = 0; > struct timeval current_time; > > +static const char *state_name_cldb[ST_CLDBNUM] = { > + "Init", "Open", "Active", "Master", "Slave" > +}; > struct server cld_srv = { > - .data_dir = CLD_DEF_DATADIR, > - .pid_file = CLD_DEF_PIDFN, > + .data_dir = "/spare/tmp/cld/lib", > + .pid_file = "/var/run/cld.pid", > .port = CLD_DEF_PORT, > + .rep_port = CLD_DEF_REP_PORT, > + .n_peers = CLD_DEF_PEERS, > }; > > static void ensure_root(void); > @@ -108,6 +128,33 @@ void cldlog(int prio, const char *fmt, ...) > va_end(ap); > } > > +/* > + * Find out own hostname. > + * This is needed for: > + * - finding the local domain and its SRV records > + * Do this before our state machines start ticking, so we can quit with > + * a meaningful message easily. > + */ > +static char *get_hostname(void) > +{ > + enum { hostsz = 64 }; > + char hostb[hostsz]; > + char *ret; > + > + if (gethostname(hostb, hostsz-1) < 0) { > + cldlog(LOG_ERR, "get_hostname: gethostname error (%d): %s", > + errno, strerror(errno)); > + exit(1); > + } > + hostb[hostsz-1] = 0; > + if ((ret = strdup(hostb)) == NULL) { > + cldlog(LOG_ERR, "get_hostname: no core (%ld)", > + (long)strlen(hostb)); > + exit(1); > + } > + return ret; > +} > + > int udp_tx(struct server_socket *sock, struct sockaddr *addr, > socklen_t addr_len, const void *data, size_t data_len) > { > @@ -484,6 +531,55 @@ static void cldb_checkpoint(struct timer *timer) > add_chkpt_timer(); > } > > +static void cldb_state_cb(enum db_event event) > +{ > + > + switch (event) { > + case CLDB_EV_ELECTED: > + /* > + * Safe to stop ignoring bogus client indication, > + * so unmute us by advancing the state. > + */ > + if (cld_srv.state_cldb == ST_CLDB_OPEN) > + cld_srv.state_cldb = ST_CLDB_ACTIVE; > + break; > + case CLDB_EV_CLIENT: > + case CLDB_EV_MASTER: > + /* > + * This callback runs on the context of the replication > + * manager thread, and calling any of our functions thus > + * turns our program into a multi-threaded one. Instead > + * we do a loopbreak and postpone the processing. > + */ > + if (cld_srv.state_cldb != ST_CLDB_INIT && > + cld_srv.state_cldb != ST_CLDB_OPEN) { > + char c = 0x42; > + > + if (event == CLDB_EV_MASTER) > + cld_srv.state_cldb_new = ST_CLDB_MASTER; > + else > + cld_srv.state_cldb_new = ST_CLDB_SLAVE; > + if (debugging) { > + cldlog(LOG_DEBUG, "CLDB state > %s", > + state_name_cldb[cld_srv.state_cldb_new]); > + } > + > + /* wake up main loop */ > + write(cld_srv.rep_pipe[1], &c, 1); > + } > + break; > + default: > + cldlog(LOG_WARNING, "API confusion with CLDB, event 0x%x", > event); > + cld_srv.state_cldb = ST_CLDB_OPEN; /* wrong, stub for now */ > + cld_srv.state_cldb_new = ST_CLDB_INIT; > + } > +} > + > +static bool noop_event(int fd, short events, void *userdata) > +{ > + return true; /* continue main loop; do NOT terminate server */ > +} > + > static int net_open(void) > { > int ipv6_found; > @@ -575,6 +671,32 @@ err_addr: > return rc; > } > > +static void cldb_state_process(enum st_cldb new_state) > +{ > + unsigned int db_flags; > + > + if ((new_state == ST_CLDB_MASTER || new_state == ST_CLDB_SLAVE) && > + cld_srv.state_cldb == ST_CLDB_ACTIVE) { > + > + db_flags = DB_CREATE | DB_THREAD; > + if (cldb_up(&cld_srv.cldb, db_flags)) > + return; > + > + ensure_root(); > + > + if (sess_load(cld_srv.sessions) != 0) { > + cldlog(LOG_ERR, "session load failed. FIXME: I want > error handling"); > + return; > + } > + > + add_chkpt_timer(); > + } else { > + if (debugging) > + cldlog(LOG_DEBUG, "unhandled state transition %d -> %d", > + cld_srv.state_cldb, new_state); > + } > +} > + > static void segv_signal(int signal) > { > cldlog(LOG_ERR, "SIGSEGV"); > @@ -598,10 +720,59 @@ static void stats_dump(void) > { > X(poll); > X(event); > + cldlog(LOG_INFO, "State: CLDB %s", > + state_name_cldb[cld_srv.state_cldb]); > } > > #undef X > > +static bool add_remote(const char *arg) > +{ > + size_t arg_len = strlen(arg); > + int i, port; > + struct db_remote *rp; > + char *s_port, *colon; > + > + if (!arg_len) > + return false; > + > + /* verify no whitespace in input */ > + for (i = 0; i < arg_len; i++) > + if (isspace(arg[i])) > + return false; > + > + /* find colon delimiter */ > + colon = strchr(arg, ':'); > + if (!colon || (colon == arg)) > + return false; > + s_port = colon + 1; > + > + /* parse replication port number */ > + port = atoi(s_port); > + if (port < 1 || port > 65535) > + return false; > + > + /* alloc and fill in remote-host record */ > + rp = malloc(sizeof(*rp)); > + if (!rp) > + return false; > + > + rp->port = port; > + rp->host = strdup(arg); > + if (!rp->host) { > + free(rp); > + return false; > + } > + > + /* truncate string down to simply hostname portion */ > + rp->host[colon - arg] = 0; > + > + /* add remote host to global list */ > + cld_srv.rep_remotes = g_list_append(cld_srv.rep_remotes, rp); > + > + return true; > +} > + > static error_t parse_opt (int key, char *arg, struct argp_state *state) > { > switch(key) { > @@ -622,6 +793,15 @@ static error_t parse_opt (int key, char *arg, struct > argp_state *state) > case 'F': > cld_srv.flags |= SFL_FOREGROUND; > break; > + case 'm': > + if ((strlen(arg) > 3) && (strlen(arg) < 64) && > + (strchr(arg, '.'))) > + cld_srv.force_myhost = arg; > + else { > + fprintf(stderr, "invalid myhost: '%s'\n", arg); > + argp_usage(state); > + } > + break; > case 'p': > if (atoi(arg) > 0 && atoi(arg) < 65536) > cld_srv.port = arg; > @@ -633,6 +813,31 @@ static error_t parse_opt (int key, char *arg, struct > argp_state *state) > case 'P': > cld_srv.pid_file = arg; > break; > + case 'r': > + if (atoi(arg) > 0 && atoi(arg) < 65536) > + cld_srv.rep_port = atoi(arg); > + else { > + fprintf(stderr, "invalid rep-port: '%s'\n", arg); > + argp_usage(state); > + } > + break; > + case 'R': > + if (!add_remote(arg)) { > + fprintf(stderr, "invalid remote host:port: '%s'\n", > arg); > + argp_usage(state); > + } > + break; > + case 'S': { > + int n_peers = atoi(arg); > + if ((n_peers >= CLD_MIN_PEERS) && (n_peers < CLD_MAX_PEERS) && > + (n_peers & 0x01)) > + cld_srv.n_peers = atoi(arg); > + else { > + fprintf(stderr, "invalid peer count: '%s'\n", arg); > + argp_usage(state); > + } > + break; > + } > case ARGP_KEY_ARG: > argp_usage(state); /* too many args */ > break; > @@ -648,9 +853,12 @@ static error_t parse_opt (int key, char *arg, struct > argp_state *state) > int main (int argc, char *argv[]) > { > error_t aprc; > - int rc = 1; > + int rc = 1, env_flags; > time_t next_timeout; > > + cld_srv.state_cldb = > + cld_srv.state_cldb_new = ST_CLDB_INIT; > + > /* isspace() and strcasecmp() consistency requires this */ > setlocale(LC_ALL, "C"); > > @@ -674,6 +882,20 @@ int main (int argc, char *argv[]) > if (use_syslog) > openlog(PROGRAM_NAME, LOG_PID, LOG_LOCAL3); > > + if (cld_srv.force_myhost) > + cld_srv.myhost = strdup(cld_srv.force_myhost); > + else > + cld_srv.myhost = get_hostname(); > + > + if (debugging) > + cldlog(LOG_DEBUG, "our hostname: %s", cld_srv.myhost); > + > + /* remotes file should list all in peer group, except for us */ > + if ((cld_srv.n_peers - 1) != g_list_length(cld_srv.rep_remotes)) { > + cldlog(LOG_ERR, "n_peers does not match remotes file loaded"); > + goto err_out; > + } > + > if (!(cld_srv.flags & SFL_FOREGROUND) && (daemon(1, !use_syslog) < 0)) { > syslogerr("daemon"); > goto err_out; > @@ -694,16 +916,7 @@ int main (int argc, char *argv[]) > signal(SIGTERM, term_signal); > signal(SIGUSR1, stats_signal); > > - if (cldb_init(&cld_srv.cldb, cld_srv.data_dir, NULL, > - DB_CREATE | DB_THREAD | DB_RECOVER, > - "cld", use_syslog, > - DB_CREATE | DB_THREAD, NULL)) > - exit(1); > - > - ensure_root(); > - > timer_init(&cld_srv.chkpt_timer, cldb_checkpoint, NULL); > - add_chkpt_timer(); > > rc = 1; > > @@ -716,17 +929,53 @@ int main (int argc, char *argv[]) > !cld_srv.polls) > goto err_out_pid; > > - if (sess_load(cld_srv.sessions) != 0) > - goto err_out_pid; > + /* init pipe for replication manager notifications to us */ > + if (pipe(cld_srv.rep_pipe) < 0) { > + syslogerr("pipe"); > + goto err_out; > + } > > /* set up server networking */ > rc = net_open(); > if (rc) > goto err_out_pid; > > + { > + struct pollfd pfd; > + struct server_poll sp; > + > + /* > + * add pipe to poll list, after doing so with our net sockets > + */ > + sp.fd = cld_srv.rep_pipe[0]; > + sp.cb = noop_event; > + sp.userdata = NULL; > + g_array_append_val(cld_srv.poll_data, sp); > + > + pfd.fd = cld_srv.rep_pipe[0]; > + pfd.events = POLLIN; > + pfd.revents = 0; > + g_array_append_val(cld_srv.polls, pfd); > + } > + > + env_flags = DB_RECOVER | DB_CREATE | DB_THREAD; > + if (cldb_init(&cld_srv.cldb, cld_srv.data_dir, NULL, > + env_flags, "cld", true, > + cld_srv.rep_remotes, > + cld_srv.myhost, cld_srv.rep_port, > + cld_srv.n_peers, cldb_state_cb)) { > + cldlog(LOG_ERR, "Failed to open CLDB, limping"); > + } else { > + cld_srv.state_cldb = > + cld_srv.state_cldb_new = ST_CLDB_OPEN; > + } > + > cldlog(LOG_INFO, "initialized: cport %s, dbg %u", > cld_srv.port, > debugging); > + cldlog(LOG_INFO, "replication: %s:%u", > + cld_srv.myhost, > + cld_srv.rep_port); > > next_timeout = timers_run(); > > @@ -789,13 +1038,20 @@ int main (int argc, char *argv[]) > } > > next_timeout = timers_run(); > + > + if (cld_srv.state_cldb_new != ST_CLDB_INIT && > + cld_srv.state_cldb_new != cld_srv.state_cldb) { > + cldb_state_process(cld_srv.state_cldb_new); > + cld_srv.state_cldb = cld_srv.state_cldb_new; > + } > } > > cldlog(LOG_INFO, "shutting down"); > > if (cld_srv.cldb.up) > cldb_down(&cld_srv.cldb); > - cldb_fini(&cld_srv.cldb); > + if (cld_srv.state_cldb >= ST_CLDB_OPEN) > + cldb_fini(&cld_srv.cldb); > > rc = 0; > > diff --git a/test/pid-exists b/test/pid-exists > index 351b4f1..4fa2275 100755 > --- a/test/pid-exists > +++ b/test/pid-exists > @@ -1,9 +1,12 @@ > #!/bin/sh > > -if [ ! -f cld.pid ] > -then > - echo "pid file not found." > - exit 1 > -fi > +for n in 1 2 3 > +do > + if [ ! -f cld$n.pid ] > + then > + echo "cld$n.pid not found." > + exit 1 > + fi > +done > > exit 0 > diff --git a/test/prep-db b/test/prep-db > index 353ca4a..3e4fb60 100755 > --- a/test/prep-db > +++ b/test/prep-db > @@ -2,13 +2,16 @@ > > DATADIR=data > > -mkdir -p $DATADIR > - > -if [ ! -d $DATADIR ] > -then > - rm -rf $DATADIR > - echo "test database dir not found." > - exit 1 > -fi > +for n in 1 2 3 > +do > + mkdir -p $DATADIR/n$n/data > + > + if [ ! -d $DATADIR/n$n/data ] > + then > + rm -rf $DATADIR > + echo "test database dir for node $n not found." > + exit 1 > + fi > +done > > exit 0 > diff --git a/test/start-daemon b/test/start-daemon > index 4cb9fd7..06b3250 100755 > --- a/test/start-daemon > +++ b/test/start-daemon > @@ -1,13 +1,31 @@ > #!/bin/sh > > -if [ -f cld.pid ] > +if [ -f cld1.pid -o -f cld2.pid -o -f cld3.pid ] > then > - echo "pid file found. daemon still running?" > + echo "pid file found. daemons still running?" > exit 1 > fi > > -../server/cld -P cld.pid -d "$PWD/data" -p 18181 -E > +../server/cld -d "$PWD/data/n1/data" -p 18181 -r 19181 -P cld1.pid -E > \ > + -D 2 -S 3 \ > + -m localhost.localdomain \ > + -R localhost.localdomain:19182 \ > + -R localhost.localdomain:19183 > > -sleep 3 > +../server/cld -d "$PWD/data/n2/data" -p 18182 -r 19182 -P cld2.pid -E > \ > + -D 2 -S 3 \ > + -m localhost.localdomain \ > + -R localhost.localdomain:19181 \ > + -R localhost.localdomain:19183 > + > +../server/cld -d "$PWD/data/n3/data" -p 18183 -r 19183 -P cld3.pid -E > \ > + -D 2 -S 3 \ > + -m localhost.localdomain \ > + -R localhost.localdomain:19181 \ > + -R localhost.localdomain:19182 > +sleep 1 > + > +echo " start-daemon: Waiting 20s, for daemons to start up..." > +sleep 20 > > exit 0 > diff --git a/test/stop-daemon b/test/stop-daemon > index 221dc46..d00fda6 100755 > --- a/test/stop-daemon > +++ b/test/stop-daemon > @@ -1,23 +1,35 @@ > #!/bin/sh > > -if [ ! -f cld.pid ] > -then > - echo no daemon pid file found. > - exit 1 > -fi > +for n in 1 2 3 > +do > + if [ ! -f cld$n.pid ] > + then > + echo " stop-daemon: cld$n.pid not found." > + exit 1 > + fi > +done > + > > -kill `cat cld.pid` > +kill `cat cld1.pid cld2.pid cld3.pid` > > for ((n = 0; n < 10; n++)) > do > - if [ ! -f cld.pid ] > + if [ -f cld1.pid -o -f cld2.pid -o -f cld3.pid ] > then > + sleep 1 > + else > exit 0 > fi > > - sleep 1 > done > > -echo "PID file not removed, after signal sent." > -rm -f cld.pid > +for n in 1 2 3 > +do > + if [ -f cld$n.pid ] > + then > + echo " stop-daemon: cld$n.pid found, after signal sent." > + fi > +done > + > +rm -f cld?.pid > exit 1 > -- > To unsubscribe from this list: send the line "unsubscribe hail-devel" in > the body of a message to [email protected] > More majordomo info at http://vger.kernel.org/majordomo-info.html > > -- To unsubscribe from this list: send the line "unsubscribe hail-devel" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
