commit 38e47f046d4d3f9657e9644417ad8b154b46a6e7
Author: Jeff Garzik <[email protected]>
Date: Fri Mar 5 08:03:48 2010 -0500
Self-check cleanups.
- remove self-check period. self-check period is externally controlled.
- self-check initiation limited to specified list of administrative users
- clear up confusion about check "start" versus "poke." "start" is
preferred.
- thread is not spawned until first check
- add some whitespace for readability
- Unrelated: test/server-test.cfg: s/spaces/tab/ in <CLD> clause
Signed-off-by: Jeff Garzik <[email protected]>
diff --git a/doc/setup.txt b/doc/setup.txt
index f8d9f00..a5e7c66 100644
--- a/doc/setup.txt
+++ b/doc/setup.txt
@@ -104,15 +104,12 @@ _cld._udp.phx2.ex.com has SRV record 10 50 8081
maika.phx2.ex.com.
<Rack>F3R18</Rack>
</Geo>
-*) configure the self-check period, if desired. Note that the actual amount
- of time that the daemon sleeps between self-check sweeps is randomized
- in order to prevent the convoy effect, so the value below is approximate.
+*) configure the list of users authorized to initiate background self-check
- <!-- 30 * 60 * 60 == 108000 seconds -->
- <SelfCheckPeriod>108000</SelfCheckPeriod>
-
- The default is zero, which disables the periodic self-check. It may be
- appropriate if the application performs its own end-to-end checking.
+ <Check>
+ <User>admin_user</User>
+ <User>another_user</User>
+ </Check>
*) start daemon (it will put itself into the background) with the
configuration file just created:
diff --git a/include/chunkc.h b/include/chunkc.h
index 5ebc936..683992e 100644
--- a/include/chunkc.h
+++ b/include/chunkc.h
@@ -89,7 +89,7 @@ extern bool stc_put_inline(struct st_client *stc, const void
*key,
extern bool stc_del(struct st_client *stc, const void *key, size_t key_len);
extern bool stc_ping(struct st_client *stc);
-extern bool stc_check_poke(struct st_client *stc);
+extern bool stc_check_start(struct st_client *stc);
extern bool stc_check_status(struct st_client *stc,
struct chunk_check_status *out);
diff --git a/lib/chunkdc.c b/lib/chunkdc.c
index 93783bf..e9e0e0f 100644
--- a/lib/chunkdc.c
+++ b/lib/chunkdc.c
@@ -978,13 +978,13 @@ bool stc_ping(struct st_client *stc)
return true;
}
-bool stc_check_poke(struct st_client *stc)
+bool stc_check_start(struct st_client *stc)
{
struct chunksrv_resp resp;
struct chunksrv_req *req = (struct chunksrv_req *) stc->req_buf;
if (stc->verbose)
- fprintf(stderr, "libstc: CHECK_POKE\n");
+ fprintf(stderr, "libstc: CHECK_START\n");
/* initialize request */
req_init(stc, req);
@@ -1004,7 +1004,7 @@ bool stc_check_poke(struct st_client *stc)
/* check response code */
if (resp.resp_code != che_Success) {
if (stc->verbose)
- fprintf(stderr, "CHECK_POKE resp code: %d\n",
+ fprintf(stderr, "CHECK_START resp code: %d\n",
resp.resp_code);
return false;
}
diff --git a/server/chunkd.h b/server/chunkd.h
index 41e0a4b..42bcb21 100644
--- a/server/chunkd.h
+++ b/server/chunkd.h
@@ -223,8 +223,9 @@ struct server {
char *group;
uint32_t nid;
struct geo loc;
- time_t chk_period;
+
int chk_pipe[2];
+ GList *chk_users;
TCHDB *tbl_master;
struct objcache actives;
@@ -342,8 +343,7 @@ extern void resp_init_req(struct chunksrv_resp *resp,
extern void read_config(void);
/* selfcheck.c */
-extern void chk_init(void);
-extern int chk_spawn(time_t period, TCHDB *hdb);
+extern int chk_spawn(TCHDB *hdb);
static inline bool use_sendfile(struct client *cli)
{
diff --git a/server/config.c b/server/config.c
index fee2c0c..69b5e9b 100644
--- a/server/config.c
+++ b/server/config.c
@@ -41,6 +41,7 @@ struct config_context {
bool badnid;
bool in_ssl;
bool in_listen;
+ bool in_chk;
bool have_ssl;
char *vol_path;
@@ -133,6 +134,13 @@ static void cfg_elm_start (GMarkupParseContext *context,
applog(LOG_ERR, "Nested CLD in configuration");
}
}
+ else if (!strcmp(element_name, "Check")) {
+ if (!cc->in_chk) {
+ cc->in_chk = true;
+ } else {
+ applog(LOG_ERR, "Nested Check in configuration");
+ }
+ }
}
static void cfg_elm_end_listen(struct config_context *cc)
@@ -293,7 +301,16 @@ static void cfg_elm_end (GMarkupParseContext *context,
else if (!strcmp(element_name, "SSL"))
cc->in_ssl = false;
- else if (cc->in_ssl && cc->text && !strcmp(element_name, "PrivateKey"))
{
+ else if (cc->in_chk && cc->text && !strcmp(element_name, "User")) {
+ chunkd_srv.chk_users =
+ g_list_append(chunkd_srv.chk_users, cc->text);
+ cc->text = NULL;
+ }
+
+ else if (!strcmp(element_name, "Check"))
+ cc->in_chk = false;
+
+ else if (cc->in_ssl && cc->text && !strcmp(element_name, "PrivateKey")){
if (SSL_CTX_use_PrivateKey_file(ssl_ctx, cc->text,
SSL_FILETYPE_PEM) <= 0)
applog(LOG_ERR, "Failed to read SSL private key '%s'",
@@ -442,24 +459,6 @@ static void cfg_elm_end (GMarkupParseContext *context,
cc->text = NULL;
}
- else if (!strcmp(element_name, "SelfCheckPeriod")) {
- if (!cc->text) {
- applog(LOG_WARNING, "SelfCheckPeriod element empty");
- return;
- }
- n = strtol(cc->text, NULL, 10);
- if (n < 0 || n >= LONG_MAX) {
- applog(LOG_ERR, "SelfCheckPeriod '%s' is invalid",
- cc->text);
- free(cc->text);
- cc->text = NULL;
- return;
- }
- chunkd_srv.chk_period = n;
- free(cc->text);
- cc->text = NULL;
- }
-
else {
applog(LOG_WARNING, "Unknown element \"%s\"", element_name);
}
diff --git a/server/selfcheck.c b/server/selfcheck.c
index f8cbaef..5207e7f 100644
--- a/server/selfcheck.c
+++ b/server/selfcheck.c
@@ -13,7 +13,6 @@
#include "chunkd.h"
struct chk_arg {
- time_t period;
TCHDB *hdb;
// GThread *gthread;
};
@@ -183,62 +182,46 @@ static void chk_thread_command(struct chk_tls *tls)
static gpointer chk_thread_func(gpointer data)
{
- struct chk_tls _tls;
- struct chk_tls *tls;
+ struct chk_tls _tls = { .arg = data };
+ struct chk_tls *tls = &_tls;
struct pollfd pfd[1];
- int tmo;
- time_t dt;
int i;
int rc;
- tls = &_tls;
- memset(tls, 0, sizeof(struct chk_tls));
- tls->arg = data;
-
for (;;) {
g_mutex_lock(chunkd_srv.bigmutex);
chunkd_srv.chk_state = CHK_ST_IDLE;
g_mutex_unlock(chunkd_srv.bigmutex);
- /*
- * Without the randomization, all systems in a low loaded
- * datacenter are virtually guaranteed to start checks in the
- * same time, blow fuses and/or disrupt applications.
- */
- dt = tls->arg->period;
- if (dt == 0) {
- tmo = -1;
- } else {
- if (dt >= 3)
- dt += rand() % (dt/3);
- if (debugging)
- applog(LOG_DEBUG, "chk: sleeping %lu s", dt);
- tmo = dt * 1000;
- }
-
memset(pfd, 0, sizeof(pfd));
pfd[0].fd = chunkd_srv.chk_pipe[0];
pfd[0].events = POLLIN;
- rc = poll(pfd, 1, tmo);
+ rc = poll(pfd, ARRAY_SIZE(pfd), -1);
if (rc < 0) {
applog(LOG_WARNING, "chk: poll error: %s",
strerror(errno));
break; /* don't flood, just die */
}
- if (rc) {
- for (i = 0; i < ARRAY_SIZE(pfd); i++) {
- if (pfd[i].revents) {
- if (i == 0) {
- chk_thread_command(tls);
- }
- }
+ if (rc == 0)
+ continue; /* should never happen */
+
+ for (i = 0; i < ARRAY_SIZE(pfd); i++) {
+ if (!pfd[i].revents)
+ continue;
+
+ switch (i) {
+ case 0:
+ chk_thread_command(tls);
+ break;
+ default:
+ /* do nothing */
+ break;
}
- } else {
- chk_thread_scan(tls);
}
}
+
return NULL;
}
@@ -249,7 +232,7 @@ static gpointer chk_thread_func(gpointer data)
*/
static struct chk_arg *thread;
-int chk_spawn(time_t period, TCHDB *hdb)
+int chk_spawn(TCHDB *hdb)
{
GThread *gthread;
struct chk_arg *arg;
@@ -260,7 +243,6 @@ int chk_spawn(time_t period, TCHDB *hdb)
applog(LOG_ERR, "No core");
return -1;
}
- arg->period = period;
arg->hdb = hdb;
gthread = g_thread_create(chk_thread_func, arg, FALSE, &error);
@@ -276,18 +258,3 @@ int chk_spawn(time_t period, TCHDB *hdb)
return 0;
}
-void chk_init()
-{
- int rc;
-
- if (!chunkd_srv.chk_period) {
- if (debugging)
- applog(LOG_DEBUG, "chk: disabled by configuration");
- return;
- }
-
- chunkd_srv.chk_state = CHK_ST_INIT;
- rc = chk_spawn(chunkd_srv.chk_period, chunkd_srv.tbl_master);
- if (rc)
- exit(1); /* message already logged */
-}
diff --git a/server/server.c b/server/server.c
index 1dd9d8f..0d83311 100644
--- a/server/server.c
+++ b/server/server.c
@@ -933,24 +933,47 @@ err_out:
return cli_err(cli, err, false);
}
-static bool chk_poke(struct client *cli)
+static bool chk_user_authorized(struct client *cli)
+{
+ GList *tmp = chunkd_srv.chk_users;
+
+ while (tmp) {
+ char *s;
+
+ s = tmp->data;
+ if (!strcmp(cli->user, s))
+ return true;
+
+ tmp = tmp->next;
+ }
+
+ return false;
+}
+
+static bool chk_start(struct client *cli)
{
unsigned char cmd;
int rc;
+ if (!chk_user_authorized(cli))
+ return cli_err(cli, che_AccessDenied, true);
+
g_mutex_lock(chunkd_srv.bigmutex);
+
switch (chunkd_srv.chk_state) {
case CHK_ST_OFF:
chunkd_srv.chk_state = CHK_ST_INIT;
g_mutex_unlock(chunkd_srv.bigmutex);
- rc = chk_spawn(chunkd_srv.chk_period, chunkd_srv.tbl_master);
+ rc = chk_spawn(chunkd_srv.tbl_master);
if (rc)
return cli_err(cli, che_InternalError, true);
break;
+
case CHK_ST_INIT:
case CHK_ST_RUNNING:
g_mutex_unlock(chunkd_srv.bigmutex);
return cli_err(cli, che_Busy, true);
+
default:
chunkd_srv.chk_state = CHK_ST_RUNNING;
g_mutex_unlock(chunkd_srv.bigmutex);
@@ -966,8 +989,11 @@ static bool chk_status(struct client *cli)
struct chunk_check_status outbuf;
memset(&outbuf, 0, sizeof(struct chunk_check_status));
+
g_mutex_lock(chunkd_srv.bigmutex);
+
outbuf.lastdone = cpu_to_le64(chunkd_srv.chk_done);
+
switch (chunkd_srv.chk_state) {
case CHK_ST_IDLE:
outbuf.state = chk_Idle;
@@ -979,6 +1005,7 @@ static bool chk_status(struct client *cli)
default:
outbuf.state = chk_Off;
}
+
g_mutex_unlock(chunkd_srv.bigmutex);
return cli_resp_bin(cli, &outbuf, sizeof(struct chunk_check_status));
@@ -1110,7 +1137,7 @@ static bool cli_evt_exec_req(struct client *cli, unsigned
int events)
rcb = volume_open(cli);
break;
case CHO_CHECK_START:
- rcb = chk_poke(cli);
+ rcb = chk_start(cli);
break;
case CHO_CHECK_STATUS:
rcb = chk_status(cli);
@@ -1804,8 +1831,6 @@ int main (int argc, char *argv[])
goto err_out_cld;
}
- chk_init();
-
/* set up server networking */
for (tmpl = chunkd_srv.listeners; tmpl; tmpl = tmpl->next) {
rc = net_open(tmpl->data);
diff --git a/test/selfcheck-unit.c b/test/selfcheck-unit.c
index fbd9a09..ead5568 100644
--- a/test/selfcheck-unit.c
+++ b/test/selfcheck-unit.c
@@ -37,7 +37,6 @@ struct config_context {
char *text;
char *path;
- long period;
};
static void cfg_elm_start(GMarkupParseContext *context,
@@ -56,20 +55,12 @@ static void cfg_elm_end(GMarkupParseContext *context,
GError **error)
{
struct config_context *cc = user_data;
- long n;
if (!strcmp(element_name, "Path")) {
OK(cc->text);
free(cc->path);
cc->path = cc->text;
cc->text = NULL;
- } else if (!strcmp(element_name, "SelfCheckPeriod")) {
- OK(cc->text);
- n = strtol(cc->text, NULL, 10);
- OK(n >= 0 && n < LONG_MAX);
- cc->period = n;
- free(cc->text);
- cc->text = NULL;
} else {
free(cc->text);
cc->text = NULL;
@@ -251,7 +242,6 @@ int main(int argc, char *argv[])
memset(&ctx, 0, sizeof(struct config_context));
read_config(&ctx);
OK(ctx.path); /* must have a path */
- OK(!ctx.period); /* should have a SelfCheckPeriod set to off */
/*
* Step 1: create the object
@@ -275,11 +265,9 @@ int main(int argc, char *argv[])
OK(rcb);
/*
- * Step 3: damage the back-end file and wait
- * We sleep for longer than the self-check period because:
- * 1) the server sleeps for up to period*1.5
- * 2) the server may be quite busy walking numerous objects
- * 3) the build system may be overloaded
+ * Step 3: damage the back-end file and wait, because:
+ * 1) the server may be quite busy walking numerous objects
+ * 2) the build system may be overloaded
*/
rcb = be_file_damage(fn);
OK(rcb);
@@ -291,7 +279,7 @@ int main(int argc, char *argv[])
OK(stc);
rcb = stc_check_status(stc, &status1);
OK(rcb);
- rcb = stc_check_poke(stc);
+ rcb = stc_check_start(stc);
OK(rcb);
cnt = 0;
for (;;) {
diff --git a/test/server-test.cfg b/test/server-test.cfg
index 80833d2..d98fca4 100644
--- a/test/server-test.cfg
+++ b/test/server-test.cfg
@@ -23,7 +23,12 @@
<NID>1</NID>
<CLD>
- <PortFile>cld.port</PortFile>
- <Host>localhost</Host>
+ <PortFile>cld.port</PortFile>
+ <Host>localhost</Host>
</CLD>
+<Check>
+ <User>testuser</User>
+ <User>testuser2</User>
+</Check>
+
diff --git a/tools/chcli.c b/tools/chcli.c
index 24b3e4b..6fde35f 100644
--- a/tools/chcli.c
+++ b/tools/chcli.c
@@ -185,7 +185,7 @@ static void show_cmds(void)
"DEL key Delete key\n"
"PING Ping server\n"
"CHECKSTATUS Fetch status of server self-check\n"
-"CHECKPOKE Force server self-check\n"
+"CHECKSTART Begin server self-check\n"
"\n"
"Keys provided on the command line (as opposed to via -k) are stored\n"
"with a C-style nul terminating character appended, adding 1 byte to\n"
@@ -639,7 +639,7 @@ static int cmd_get(void)
return 0;
}
-static int cmd_check_poke(void)
+static int cmd_check_start(void)
{
struct st_client *stc;
@@ -647,8 +647,8 @@ static int cmd_check_poke(void)
if (!stc)
return 1;
- if (!stc_check_poke(stc)) {
- fprintf(stderr, "CHECK POKE failed\n");
+ if (!stc_check_start(stc)) {
+ fprintf(stderr, "CHECK START failed\n");
stc_free(stc);
return 1;
}
@@ -758,7 +758,7 @@ int main (int argc, char *argv[])
case CHC_CHECKSTATUS:
return cmd_check_status();
case CHC_CHECKSTART:
- return cmd_check_poke();
+ return cmd_check_start();
}
return 0;
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html