The following reply was made to PR user/6509; it has been noted by GNATS.
From: Reyk Floeter <[email protected]>
To: Stuart Henderson <[email protected]>
Cc: [email protected], [email protected], Jonathan Gray <[email protected]>
Subject: Re: user/6509: relayctl show sessions make relayd crash
Date: Thu, 19 May 2011 13:59:36 +0200
On Thu, May 19, 2011 at 12:33:10PM +0200, Reyk Floeter wrote:
> I still get the problem with your diff applied on -current.
>
> fatal: wrong message for session
> hce exiting, pid 28577
> I'm thinking about another solution to make it fully async. I
> generally dislike this while(!done) loop in show_sessions() which is
> causing all kinds of difficulties. I try to come up with a diff.
>
ok, the attached diff reimplements the relayd show sessions part in an
asynchronous way. it does not lock the pfe and uses a simple
per-control connection reference counter to wait for the replies. the
positive side effect is that it should remove conflicts that leaded to
the crash before. hce can still talk to pfe while a control
connections is requesting session data.
ok?
reyk
Index: usr.sbin/relayd/control.c
===================================================================
RCS file: /cvs/src/usr.sbin/relayd/control.c,v
retrieving revision 1.39
diff -u -p -r1.39 control.c
--- usr.sbin/relayd/control.c 19 May 2011 08:56:49 -0000 1.39
+++ usr.sbin/relayd/control.c 19 May 2011 11:50:48 -0000
@@ -235,6 +235,14 @@ control_dispatch_imsg(int fd, short even
if (n == 0)
break;
+ if (c->waiting) {
+ log_debug("%s: unexpected imsg %d",
+ __func__, imsg.hdr.type);
+ imsg_free(&imsg);
+ control_close(fd);
+ return;
+ }
+
switch (imsg.hdr.type) {
case IMSG_CTL_SHOW_SUM:
show(c);
Index: usr.sbin/relayd/pfe.c
===================================================================
RCS file: /cvs/src/usr.sbin/relayd/pfe.c,v
retrieving revision 1.69
diff -u -p -r1.69 pfe.c
--- usr.sbin/relayd/pfe.c 19 May 2011 08:56:49 -0000 1.69
+++ usr.sbin/relayd/pfe.c 19 May 2011 11:50:49 -0000
@@ -222,6 +222,9 @@ pfe_dispatch_relay(int fd, struct privse
struct ctl_natlook cnl;
struct ctl_stats crs;
struct relay *rlay;
+ struct ctl_conn *c;
+ struct rsession con;
+ int cid;
switch (imsg->hdr.type) {
case IMSG_NATLOOK:
@@ -247,6 +250,35 @@ pfe_dispatch_relay(int fd, struct privse
rlay->rl_stats[crs.proc].interval =
env->sc_statinterval.tv_sec;
break;
+ case IMSG_CTL_SESSION:
+ IMSG_SIZE_CHECK(imsg, &con);
+ memcpy(&con, imsg->data, sizeof(con));
+ if ((c = control_connbyfd(con.se_cid)) == NULL) {
+ log_debug("%s: control connection %d not found",
+ __func__, con.se_cid);
+ return (0);
+ }
+ imsg_compose_event(&c->iev,
+ IMSG_CTL_SESSION, 0, 0, -1,
+ &con, sizeof(con));
+ break;
+ case IMSG_CTL_END:
+ IMSG_SIZE_CHECK(imsg, &cid);
+ memcpy(&cid, imsg->data, sizeof(cid));
+ if ((c = control_connbyfd(cid)) == NULL) {
+ log_debug("%s: control connection %d not found",
+ __func__, cid);
+ return (0);
+ }
+ if (c->waiting == 0) {
+ log_debug("%s: unexpected imsg", __func__);
+ return (-1);
+ } else if (--c->waiting == 0) {
+ /* Last ack for a previous request */
+ imsg_compose_event(&c->iev, IMSG_CTL_END,
+ 0, 0, -1, NULL, 0);
+ }
+ break;
default:
return (-1);
}
@@ -346,59 +378,18 @@ end:
void
show_sessions(struct ctl_conn *c)
{
- int n, proc, done;
- struct imsg imsg;
- struct imsgbuf *ibuf;
- struct rsession con;
+ int proc, cid;
for (proc = 0; proc < env->sc_prefork_relay; proc++) {
- ibuf = proc_ibuf(env->sc_ps, PROC_RELAY, proc);
+ cid = c->iev.ibuf.fd;
/*
* Request all the running sessions from the process
*/
proc_compose_imsg(env->sc_ps, PROC_RELAY, proc,
- IMSG_CTL_SESSION, -1, NULL, 0);
- proc_flush_imsg(env->sc_ps, PROC_RELAY, proc);
-
- /*
- * Wait for the reply and forward the messages to the
- * control connection.
- */
- done = 0;
- while (!done) {
- do {
- if ((n = imsg_read(ibuf)) == -1)
- fatalx("imsg_read error");
- } while (n == -2); /* handle non-blocking I/O */
- while (!done) {
- if ((n = imsg_get(ibuf, &imsg)) == -1)
- fatalx("imsg_get error");
- if (n == 0)
- break;
-
- switch (imsg.hdr.type) {
- case IMSG_CTL_SESSION:
- IMSG_SIZE_CHECK(&imsg, &con);
- memcpy(&con, imsg.data, sizeof(con));
-
- imsg_compose_event(&c->iev,
- IMSG_CTL_SESSION, 0, 0, -1,
- &con, sizeof(con));
- break;
- case IMSG_CTL_END:
- done = 1;
- break;
- default:
- fatalx("wrong message for session");
- break;
- }
- imsg_free(&imsg);
- }
- }
+ IMSG_CTL_SESSION, -1, &cid, sizeof(cid));
+ c->waiting++;
}
-
- imsg_compose_event(&c->iev, IMSG_CTL_END, 0, 0, -1, NULL, 0);
}
int
Index: usr.sbin/relayd/relay.c
===================================================================
RCS file: /cvs/src/usr.sbin/relayd/relay.c,v
retrieving revision 1.137
diff -u -p -r1.137 relay.c
--- usr.sbin/relayd/relay.c 19 May 2011 08:56:49 -0000 1.137
+++ usr.sbin/relayd/relay.c 19 May 2011 11:50:49 -0000
@@ -1939,6 +1939,7 @@ relay_accept(int fd, short sig, void *ar
con->se_relay = rlay;
con->se_id = ++relay_conid;
con->se_relayid = rlay->rl_conf.id;
+ con->se_pid = getpid();
con->se_hashkey = rlay->rl_dstkey;
con->se_in.tree = &proto->request_tree;
con->se_out.tree = &proto->response_tree;
@@ -2375,13 +2376,14 @@ int
relay_dispatch_pfe(int fd, struct privsep_proc *p, struct imsg *imsg)
{
struct relay *rlay;
- struct rsession *con;
+ struct rsession *con, se;
struct ctl_natlook cnl;
struct timeval tv;
struct host *host;
struct table *table;
struct ctl_status st;
objid_t id;
+ int cid;
switch (imsg->hdr.type) {
case IMSG_HOST_DISABLE:
@@ -2468,16 +2470,20 @@ relay_dispatch_pfe(int fd, struct privse
evtimer_add(&con->se_ev, &tv);
break;
case IMSG_CTL_SESSION:
+ IMSG_SIZE_CHECK(imsg, &cid);
+ memcpy(&cid, imsg->data, sizeof(cid));
TAILQ_FOREACH(rlay, env->sc_relays, rl_entry) {
SPLAY_FOREACH(con, session_tree,
&rlay->rl_sessions) {
+ memcpy(&se, con, sizeof(se));
+ se.se_cid = cid;
proc_compose_imsg(env->sc_ps, p->p_id, -1,
IMSG_CTL_SESSION,
- -1, con, sizeof(*con));
+ -1, &se, sizeof(se));
}
}
proc_compose_imsg(env->sc_ps, p->p_id, -1, IMSG_CTL_END,
- -1, NULL, 0);
+ -1, &cid, sizeof(cid));
break;
default:
return (-1);
Index: usr.sbin/relayd/relayd.h
===================================================================
RCS file: /cvs/src/usr.sbin/relayd/relayd.h,v
retrieving revision 1.147
diff -u -p -r1.147 relayd.h
--- usr.sbin/relayd/relayd.h 19 May 2011 08:56:49 -0000 1.147
+++ usr.sbin/relayd/relayd.h 19 May 2011 11:50:50 -0000
@@ -428,6 +428,8 @@ struct rsession {
struct ctl_natlook *se_cnl;
int se_bnds;
+ int se_cid;
+ pid_t se_pid;
SPLAY_ENTRY(rsession) se_nodes;
};
SPLAY_HEAD(session_tree, rsession);
@@ -705,6 +707,7 @@ struct imsgev {
struct ctl_conn {
TAILQ_ENTRY(ctl_conn) entry;
u_int8_t flags;
+ u_int waiting;
#define CTL_CONN_NOTIFY 0x01
struct imsgev iev;
Index: usr.sbin/relayctl/relayctl.c
===================================================================
RCS file: /cvs/src/usr.sbin/relayctl/relayctl.c,v
retrieving revision 1.44
diff -u -p -r1.44 relayctl.c
--- usr.sbin/relayctl/relayctl.c 19 May 2011 08:56:49 -0000 1.44
+++ usr.sbin/relayctl/relayctl.c 19 May 2011 11:50:50 -0000
@@ -447,7 +447,8 @@ show_session_msg(struct imsg *imsg)
fatal("show_session_msg: gettimeofday");
print_time(&tv_now, &con->se_tv_start, a, sizeof(a));
print_time(&tv_now, &con->se_tv_last, b, sizeof(b));
- printf("\tage %s, idle %s, relay %u", a, b, con->se_relayid);
+ printf("\tage %s, idle %s, relay %u, pid %u",
+ a, b, con->se_relayid, con->se_pid);
if (con->se_mark)
printf(", mark %u", con->se_mark);
printf("\n");