Hello.
Pinging on this one hoping to get some feedback. I've reattached the
diff below.
Thanks.
Ashton Fagg <[email protected]> writes:
> Hello tech,
>
> Recently I encountered a problem with automounting iscsi volumes at boot
> time. This came down to a timing issue, where iscsictl reload was
> returning before the volumes were attached - causing the machine to
> enter single user mode when it would try to fsck the iscsi volumes.
>
> See this thread:
>
> https://marc.info/?l=openbsd-bugs&m=161404736018432&w=2
>
> Adding "&& sleep 5" to /etc/rc.d/iscsid did mitigate this, however
> that's both hacky and precarious.
>
> After some discussion with claudio@, the principled solution is to make
> iscsictl wait until the sessions are up and the devices are attached
> before returning. Since "iscsictl reload" is called in the rc script,
> this would halt the boot process long enough for all the volumes to be
> available before progressing - and thus, eliminating my problem.
>
> The following diff is an attempt at that, with some caveats. It does the
> following things:
>
> - adds a control command to iscsid to allow for polling of session and
> connection status during iscsictl reload.
>
> - book-keeping code for working out if there are sessions still
> initializing
>
> - some poll-wait mechanisms in iscsictl, which (a) waits for iscsid to
> tell it everything is up and running or (b) returns after 10 poll
> attempts 1 second apart (thus, the max delay here is currently 10
> seconds).
>
> I have confirmed that the current implementation does indeed solve my
> problem, however there's a couple of things I still question:
>
> 1. In the poll_and_wait() function in iscsictl, you'll notice there's an
> extra sleep with the comment that it is there to give time for the
> devices to attach. In my case, the extra second was needed for both my
> devices to attach. without the extra sleep, the first device would
> attach successfully while iscsictl was still waiting, but the second
> still had not fully attached even though my book-keeping code said that
> nothing further was in flight. I question whether there's a better way
> to do this and welcome suggestions about that.
>
> 2. It's probably too chatty. Is it also perhaps better to have a
> separate command that does this, rather than hijacking "reload"?
>
> 3. I don't know whether the book-keeping code is entirely
> reasonable. I've tried to capture every state, but I do wonder if there
> are cases where things slip through the cracks.
>
> For context, here is the machine booting (and hitting single user mode)
> with the standard iscsid:
>
> https://www.youtube.com/watch?v=F09PaT-8IJU&feature=youtu.be
>
> Whereas, here is the same machine configured identically with my iscsid
> changes applied:
>
> https://www.youtube.com/watch?v=TZzmQBVDRxo&feature=youtu.be
>
> The delay appears to be somewhere around 2-3 seconds, which is less than
> the full 10 allowable by the poll_and_wait() loop - so it does appear to
> be detecting the status correctly (at least in this configuration).
>
> Anyhow, I'd love to hear some comments and suggestions. Please note this
> is my first submission outside of ports that's bigger than a 1 or 2
> liner, and it's been a while since I've written C (I write C++ at my day
> job, and it's not systems programming...), so hopefully I haven't done
> anything too silly.
>
> Thanks,
diff --git a/usr.sbin/iscsictl/iscsictl.c b/usr.sbin/iscsictl/iscsictl.c
index 77f9c74abde..46404b5512b 100644
--- a/usr.sbin/iscsictl/iscsictl.c
+++ b/usr.sbin/iscsictl/iscsictl.c
@@ -40,6 +40,10 @@ struct pdu *ctl_getpdu(char *, size_t);
int ctl_sendpdu(int, struct pdu *);
void show_config(struct ctrlmsghdr *, struct pdu *);
void show_vscsi_stats(struct ctrlmsghdr *, struct pdu *);
+void poll_and_wait(void);
+void poll_session_status(void);
+void register_poll(struct ctrlmsghdr *, struct pdu *);
+void poll_print(struct session_poll *);
char cbuf[CONTROL_READ_SIZE];
@@ -48,6 +52,12 @@ struct control {
int fd;
} control;
+
+struct session_poll poll_result; /* Poll result */
+
+#define POLL_DELAY 1 /* Delay between poll attempts (seconds) */
+#define POLL_ATTEMPTS 10 /* Number of poll attempts before returning */
+
__dead void
usage(void)
{
@@ -68,7 +78,7 @@ main (int argc, char* argv[])
char *sockname = ISCSID_CONTROL;
struct session_ctlcfg *s;
struct iscsi_config *cf;
- int ch, val = 0;
+ int ch, poll = 0, val = 0;
/* check flags */
while ((ch = getopt(argc, argv, "f:s:")) != -1) {
@@ -135,6 +145,9 @@ main (int argc, char* argv[])
&cf->initiator, sizeof(cf->initiator)) == -1)
err(1, "control_compose");
}
+
+ /* Reloading, so poll afterwards. */
+ poll = 1;
SIMPLEQ_FOREACH(s, &cf->sessions, entry) {
struct ctrldata cdv[3];
bzero(cdv, sizeof(cdv));
@@ -174,8 +187,13 @@ main (int argc, char* argv[])
run();
- close(control.fd);
+ /* If we've reloaded, we probably should wait in case any new connections
+ need to come up (or fail). */
+ if (poll) {
+ poll_and_wait();
+ }
+ close(control.fd);
return (0);
}
@@ -237,6 +255,11 @@ run_command(struct pdu *pdu)
show_vscsi_stats(cmh, pdu);
done = 1;
break;
+ case CTRL_SESS_POLL:
+ done = 1;
+ register_poll(cmh, pdu);
+ break;
+
}
}
}
@@ -383,3 +406,73 @@ show_vscsi_stats(struct ctrlmsghdr *cmh, struct pdu *pdu)
vs->cnt_t2i_status[1],
vs->cnt_t2i_status[2]);
}
+
+void
+poll_session_status(void)
+{
+ if (control_compose(NULL, CTRL_SESS_POLL, NULL, 0) == -1)
+ err(1, "control_compose");
+
+ struct pdu *pdu;
+ while ((pdu = TAILQ_FIRST(&control.channel)) != NULL) {
+ TAILQ_REMOVE(&control.channel, pdu, entry);
+ run_command(pdu);
+ }
+}
+
+void
+poll_and_wait(void)
+{
+ int attempts;
+
+ printf("polling...");
+ fflush(stdout);
+
+ for (attempts = 1; attempts <= POLL_ATTEMPTS; ++attempts) {
+
+ poll_session_status();
+
+ /* Poll says we are good to go. */
+ if (poll_result.sess_conn_status != 0) {
+ printf("ok!\n");
+ goto attach_delay;
+ }
+
+ /* Poll says we should wait... */
+ printf("%d...", attempts);
+ fflush(stdout);
+ sleep(POLL_DELAY);
+ }
+
+ printf("timer elapsed.\n");
+ goto finish;
+
+attach_delay:
+ /* For good measure, wait a bit longer while devs attach. */
+ sleep(POLL_DELAY);
+
+finish:
+ poll_print(&poll_result);
+ fflush(stdout);
+}
+
+void
+register_poll(struct ctrlmsghdr *cmh, struct pdu *pdu)
+{
+ if (cmh->len[0] != sizeof(poll_result))
+ errx(1, "poll: bad size of response");
+
+ poll_result = *((struct session_poll *)pdu_getbuf(pdu, NULL, 1));
+}
+
+void
+poll_print(struct session_poll *p)
+{
+ printf("------------------------------------\n");
+ printf("Configured sessions: %d\n", p->session_count);
+ printf("Sessions initializing: %d\n", p->session_init_count);
+ printf("Sessions started/failed: %d\n", p->session_running_count);
+ printf("Sessions logged in: %d\n", p->conn_logged_in_count);
+ printf("Sessions with failed connections: %d\n", p->conn_failed_count);
+ printf("Sessions still attempting to connection: %d\n", p->conn_waiting_count);
+}
diff --git a/usr.sbin/iscsid/Makefile b/usr.sbin/iscsid/Makefile
index 7a62024e68b..bac59d934f8 100644
--- a/usr.sbin/iscsid/Makefile
+++ b/usr.sbin/iscsid/Makefile
@@ -2,7 +2,7 @@
PROG= iscsid
SRCS= connection.c control.c initiator.c iscsid.c log.c logmsg.c pdu.c \
- session.c task.c util.c vscsi.c
+ poll.c session.c task.c util.c vscsi.c
MAN= iscsid.8
diff --git a/usr.sbin/iscsid/iscsid.c b/usr.sbin/iscsid/iscsid.c
index d3526e96363..044888b8ade 100644
--- a/usr.sbin/iscsid/iscsid.c
+++ b/usr.sbin/iscsid/iscsid.c
@@ -211,6 +211,7 @@ iscsid_ctrl_dispatch(void *ch, struct pdu *pdu)
struct initiator_config *ic;
struct session_config *sc;
struct session *s;
+ struct session_poll p;
int *valp;
cmh = pdu_getbuf(pdu, NULL, 0);
@@ -303,6 +304,17 @@ iscsid_ctrl_dispatch(void *ch, struct pdu *pdu)
}
control_compose(ch, CTRL_SUCCESS, NULL, 0);
+ break;
+ case CTRL_SESS_POLL:
+ poll_init(&p);
+
+ TAILQ_FOREACH(s, &initiator->sessions, entry) {
+ poll_session(&p, s);
+ }
+
+ poll_finalize(&p);
+ control_compose(ch, CTRL_SESS_POLL, &p, sizeof(p));
+
break;
default:
log_warnx("unknown control message type %d", cmh->type);
@@ -314,6 +326,7 @@ done:
pdu_free(pdu);
}
+
#define MERGE_MIN(r, a, b, v) \
r->v = (a->v < b->v ? a->v : b->v)
#define MERGE_MAX(r, a, b, v) \
diff --git a/usr.sbin/iscsid/iscsid.h b/usr.sbin/iscsid/iscsid.h
index b43fb5dcd99..24e3e389aaf 100644
--- a/usr.sbin/iscsid/iscsid.h
+++ b/usr.sbin/iscsid/iscsid.h
@@ -67,7 +67,7 @@ struct ctrldata {
#define CTRL_LOG_VERBOSE 6
#define CTRL_VSCSI_STATS 7
#define CTRL_SHOW_SUM 8
-
+#define CTRL_SESS_POLL 9
TAILQ_HEAD(session_head, session);
TAILQ_HEAD(connection_head, connection);
@@ -251,6 +251,16 @@ struct session {
int action;
};
+struct session_poll {
+ int session_count; /* Total number of configured sessions*/
+ int session_init_count; /* Number of sessions in init state */
+ int session_running_count; /* Number of sessions in running state */
+ int conn_logged_in_count; /* Number of sessions indicating logged in. */
+ int conn_failed_count; /* Number of sessions with failed connections. */
+ int conn_waiting_count; /* Number of sessions with connections init in flight */
+ int sess_conn_status; /* Status flag */
+};
+
struct connection {
struct event ev;
struct event wev;
@@ -391,6 +401,12 @@ void vscsi_status(int, int, void *, size_t);
void vscsi_event(unsigned long, u_int, u_int);
struct vscsi_stats *vscsi_stats(void);
+/* Session polling */
+void poll_init(struct session_poll *);
+void poll_session(struct session_poll *, struct session *);
+void poll_finalize(struct session_poll *);
+void poll_print(struct session_poll *);
+
/* logmsg.c */
void log_hexdump(void *, size_t);
void log_pdu(struct pdu *, int);
diff --git a/usr.sbin/iscsid/poll.c b/usr.sbin/iscsid/poll.c
new file mode 100644
index 00000000000..8eb08107f39
--- /dev/null
+++ b/usr.sbin/iscsid/poll.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Dr Ashton Fagg <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <event.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "iscsid.h"
+#include "log.h"
+
+
+void
+poll_init(struct session_poll *p)
+{
+ p->session_count = 0;
+ p->session_init_count = 0;
+ p->session_running_count = 0;
+ p->conn_logged_in_count = 0;
+ p->conn_failed_count = 0;
+ p->conn_waiting_count = 0;
+ p->sess_conn_status = 0;
+}
+
+void
+poll_session(struct session_poll *p, struct session *s)
+{
+ if (!s)
+ fatal("poll_session failed: invalid session");
+
+ ++p->session_count;
+
+ /* If SESS_RUNNING, this determines the session has either
+ been brought up successfully, or has failed. */
+ if (s->state & SESS_RUNNING) {
+ ++p->session_running_count;
+
+ /* Next, check what state the connections are in. */
+ struct connection *c;
+ int is_logged_in = 0;
+ int is_failed = 0;
+ int is_waiting = 0;
+
+ TAILQ_FOREACH(c, &s->connections, entry) {
+ if (c->state & CONN_LOGGED_IN)
+ ++is_logged_in;
+ else if (c->state & CONN_FAILED)
+ ++is_failed;
+ else if (c->state & CONN_NEVER_LOGGED_IN)
+ ++is_waiting;
+
+ }
+
+ /* Potentially, we have multiple connections for each session.
+ We handle this by saying that a single connection logging in takes
+ precedent over a failed connection. We only say the session login has
+ failed if none of the connections have logged in and nothing is in
+ flight. */
+
+ if (is_logged_in)
+ ++p->conn_logged_in_count;
+ if (is_failed)
+ ++p->conn_failed_count;
+ if (is_waiting)
+ ++p->conn_waiting_count;
+
+ /* Otherwise, it is in SESS_INIT. These need to be waited on. */
+ } else if (s->state & SESS_INIT)
+ ++p->session_init_count;
+ else
+ fatal("poll_session: unknown state.");
+}
+
+void
+poll_finalize(struct session_poll *p)
+{
+ /* total number of sessions that have finished initializing (and ended
+ sucessfully or otherwise). */
+ int num_accounted_conn = p->conn_logged_in_count + p->conn_failed_count;
+
+ /* Set status flag to signal iscsictl to stop polling */
+ p->sess_conn_status = p->session_count == num_accounted_conn;
+}