Hello tech,

Recently I encountered a problem with automounting iscsi volumes at boot
time. This came down to a timing issue, where iscsictl reload was
returning before the volumes were attached - causing the machine to
enter single user mode when it would try to fsck the iscsi volumes.

See this thread:

https://marc.info/?l=openbsd-bugs&m=161404736018432&w=2

Adding "&& sleep 5" to /etc/rc.d/iscsid did mitigate this, however
that's both hacky and precarious.

After some discussion with claudio@, the principled solution is to make
iscsictl wait until the sessions are up and the devices are attached
before returning. Since "iscsictl reload" is called in the rc script,
this would halt the boot process long enough for all the volumes to be
available before progressing - and thus, eliminating my problem.

The following diff is an attempt at that, with some caveats. It does the
following things:

- adds a control command to iscsid to allow for polling of session and
  connection status during iscsictl reload.  

- book-keeping code for working out if there are sessions still
  initializing

- some poll-wait mechanisms in iscsictl, which (a) waits for iscsid to
  tell it everything is up and running or (b) returns after 10 poll
  attempts 1 second apart (thus, the max delay here is currently 10
  seconds).

I have confirmed that the current implementation does indeed solve my
problem, however there's a couple of things I still question:

1. In the poll_and_wait() function in iscsictl, you'll notice there's an
extra sleep with the comment that it is there to give time for the
devices to attach. In my case, the extra second was needed for both my
devices to attach. without the extra sleep, the first device would
attach successfully while iscsictl was still waiting, but the second
still had not fully attached even though my book-keeping code said that
nothing further was in flight. I question whether there's a better way
to do this and welcome suggestions about that.

2. It's probably too chatty. Is it also perhaps better to have a
separate command that does this, rather than hijacking "reload"?

3. I don't know whether the book-keeping code is entirely
reasonable. I've tried to capture every state, but I do wonder if there
are cases where things slip through the cracks.

For context, here is the machine booting (and hitting single user mode)
with the standard iscsid:

https://www.youtube.com/watch?v=F09PaT-8IJU&feature=youtu.be

Whereas, here is the same machine configured identically with my iscsid
changes applied:

https://www.youtube.com/watch?v=TZzmQBVDRxo&feature=youtu.be

The delay appears to be somewhere around 2-3 seconds, which is less than
the full 10 allowable by the poll_and_wait() loop - so it does appear to
be detecting the status correctly (at least in this configuration).

Anyhow, I'd love to hear some comments and suggestions. Please note this
is my first submission outside of ports that's bigger than a 1 or 2
liner, and it's been a while since I've written C (I write C++ at my day
job, and it's not systems programming...), so hopefully I haven't done
anything too silly.

Thanks,

Ash

diff --git a/usr.sbin/iscsictl/iscsictl.c b/usr.sbin/iscsictl/iscsictl.c
index 77f9c74abde..46404b5512b 100644
--- a/usr.sbin/iscsictl/iscsictl.c
+++ b/usr.sbin/iscsictl/iscsictl.c
@@ -40,6 +40,10 @@ struct pdu	*ctl_getpdu(char *, size_t);
 int		 ctl_sendpdu(int, struct pdu *);
 void		 show_config(struct ctrlmsghdr *, struct pdu *);
 void		 show_vscsi_stats(struct ctrlmsghdr *, struct pdu *);
+void             poll_and_wait(void);
+void             poll_session_status(void);
+void             register_poll(struct ctrlmsghdr *, struct pdu *);
+void             poll_print(struct session_poll *);
 
 char		cbuf[CONTROL_READ_SIZE];
 
@@ -48,6 +52,12 @@ struct control {
 	int		fd;
 } control;
 
+
+struct session_poll poll_result; /* Poll result */
+
+#define POLL_DELAY     1         /* Delay between poll attempts (seconds) */
+#define POLL_ATTEMPTS  10        /* Number of poll attempts before returning */
+
 __dead void
 usage(void)
 {
@@ -68,7 +78,7 @@ main (int argc, char* argv[])
 	char *sockname = ISCSID_CONTROL;
 	struct session_ctlcfg *s;
 	struct iscsi_config *cf;
-	int ch, val = 0;
+	int ch, poll = 0, val = 0;
 
 	/* check flags */
 	while ((ch = getopt(argc, argv, "f:s:")) != -1) {
@@ -135,6 +145,9 @@ main (int argc, char* argv[])
 			    &cf->initiator, sizeof(cf->initiator)) == -1)
 				err(1, "control_compose");
 		}
+
+		/* Reloading, so poll afterwards. */
+		poll = 1;
 		SIMPLEQ_FOREACH(s, &cf->sessions, entry) {
 			struct ctrldata cdv[3];
 			bzero(cdv, sizeof(cdv));
@@ -174,8 +187,13 @@ main (int argc, char* argv[])
 
 	run();
 
-	close(control.fd);
+	/* If we've reloaded, we probably should wait in case any new connections
+	   need to come up (or fail). */
+	if (poll) {
+		poll_and_wait();
+	}
 
+	close(control.fd);
 	return (0);
 }
 
@@ -237,6 +255,11 @@ run_command(struct pdu *pdu)
 			show_vscsi_stats(cmh, pdu);
 			done = 1;
 			break;
+		case CTRL_SESS_POLL:
+			done = 1;
+			register_poll(cmh, pdu);
+			break;
+
 		}
 	}
 }
@@ -383,3 +406,73 @@ show_vscsi_stats(struct ctrlmsghdr *cmh, struct pdu *pdu)
 	    vs->cnt_t2i_status[1], 
 	    vs->cnt_t2i_status[2]);
 }
+
+void
+poll_session_status(void)
+{
+	if (control_compose(NULL, CTRL_SESS_POLL, NULL, 0) == -1)
+		err(1, "control_compose");
+
+	struct pdu *pdu;
+	while ((pdu = TAILQ_FIRST(&control.channel)) != NULL) {
+		TAILQ_REMOVE(&control.channel, pdu, entry);
+		run_command(pdu);
+	}
+}
+
+void
+poll_and_wait(void)
+{
+	int attempts;
+
+	printf("polling...");
+	fflush(stdout);
+
+	for (attempts = 1; attempts <= POLL_ATTEMPTS; ++attempts) {
+
+		poll_session_status();
+
+		/* Poll says we are good to go. */
+		if (poll_result.sess_conn_status != 0) {
+			printf("ok!\n");
+			goto attach_delay;
+		}
+
+		/* Poll says we should wait... */
+		printf("%d...", attempts);
+		fflush(stdout);
+		sleep(POLL_DELAY);
+	}
+
+	printf("timer elapsed.\n");
+	goto finish;
+
+attach_delay:
+	/* For good measure, wait a bit longer while devs attach. */
+	sleep(POLL_DELAY);
+
+finish:
+	poll_print(&poll_result);
+	fflush(stdout);
+}
+
+void
+register_poll(struct ctrlmsghdr *cmh, struct pdu *pdu)
+{
+	if (cmh->len[0] != sizeof(poll_result))
+		errx(1, "poll: bad size of response");
+
+	poll_result = *((struct session_poll *)pdu_getbuf(pdu, NULL, 1));
+}
+
+void
+poll_print(struct session_poll *p)
+{
+	printf("------------------------------------\n");
+	printf("Configured sessions: %d\n", p->session_count);
+	printf("Sessions initializing: %d\n", p->session_init_count);
+	printf("Sessions started/failed: %d\n", p->session_running_count);
+	printf("Sessions logged in: %d\n", p->conn_logged_in_count);
+	printf("Sessions with failed connections: %d\n", p->conn_failed_count);
+	printf("Sessions still attempting to connection: %d\n", p->conn_waiting_count);
+}
diff --git a/usr.sbin/iscsid/Makefile b/usr.sbin/iscsid/Makefile
index 7a62024e68b..bac59d934f8 100644
--- a/usr.sbin/iscsid/Makefile
+++ b/usr.sbin/iscsid/Makefile
@@ -2,7 +2,7 @@
 
 PROG=	iscsid
 SRCS=	connection.c control.c initiator.c iscsid.c log.c logmsg.c pdu.c \
-	session.c task.c util.c vscsi.c
+	poll.c session.c task.c util.c vscsi.c
 
 MAN=	iscsid.8
 
diff --git a/usr.sbin/iscsid/iscsid.c b/usr.sbin/iscsid/iscsid.c
index d3526e96363..044888b8ade 100644
--- a/usr.sbin/iscsid/iscsid.c
+++ b/usr.sbin/iscsid/iscsid.c
@@ -211,6 +211,7 @@ iscsid_ctrl_dispatch(void *ch, struct pdu *pdu)
 	struct initiator_config *ic;
 	struct session_config *sc;
 	struct session *s;
+	struct session_poll p;
 	int *valp;
 
 	cmh = pdu_getbuf(pdu, NULL, 0);
@@ -303,6 +304,17 @@ iscsid_ctrl_dispatch(void *ch, struct pdu *pdu)
 		}
 
 		control_compose(ch, CTRL_SUCCESS, NULL, 0);
+		break;
+	case CTRL_SESS_POLL:
+		poll_init(&p);
+
+		TAILQ_FOREACH(s, &initiator->sessions, entry) {
+			poll_session(&p, s);
+		}
+
+		poll_finalize(&p);
+		control_compose(ch, CTRL_SESS_POLL, &p, sizeof(p));
+
 		break;
 	default:
 		log_warnx("unknown control message type %d", cmh->type);
@@ -314,6 +326,7 @@ done:
 	pdu_free(pdu);
 }
 
+
 #define MERGE_MIN(r, a, b, v)				\
 	r->v = (a->v < b->v ? a->v : b->v)
 #define MERGE_MAX(r, a, b, v)				\
diff --git a/usr.sbin/iscsid/iscsid.h b/usr.sbin/iscsid/iscsid.h
index b43fb5dcd99..24e3e389aaf 100644
--- a/usr.sbin/iscsid/iscsid.h
+++ b/usr.sbin/iscsid/iscsid.h
@@ -67,7 +67,7 @@ struct ctrldata {
 #define CTRL_LOG_VERBOSE	6
 #define CTRL_VSCSI_STATS	7
 #define CTRL_SHOW_SUM		8
-
+#define CTRL_SESS_POLL          9
 
 TAILQ_HEAD(session_head, session);
 TAILQ_HEAD(connection_head, connection);
@@ -251,6 +251,16 @@ struct session {
 	int			 action;
 };
 
+struct session_poll {
+	int session_count;            /* Total number of configured sessions*/
+	int session_init_count;       /* Number of sessions in init state */
+	int session_running_count;    /* Number of sessions in running state */
+	int conn_logged_in_count;     /* Number of sessions indicating logged in. */
+	int conn_failed_count;        /* Number of sessions with failed connections. */
+	int conn_waiting_count;       /* Number of sessions with connections init in flight */
+	int sess_conn_status;         /* Status flag */
+};
+
 struct connection {
 	struct event		 ev;
 	struct event		 wev;
@@ -391,6 +401,12 @@ void	vscsi_status(int, int, void *, size_t);
 void	vscsi_event(unsigned long, u_int, u_int);
 struct vscsi_stats *vscsi_stats(void);
 
+/* Session polling */
+void    poll_init(struct session_poll *);
+void    poll_session(struct session_poll *, struct session *);
+void    poll_finalize(struct session_poll *);
+void    poll_print(struct session_poll *);
+
 /* logmsg.c */
 void	log_hexdump(void *, size_t);
 void	log_pdu(struct pdu *, int);
diff --git a/usr.sbin/iscsid/poll.c b/usr.sbin/iscsid/poll.c
new file mode 100644
index 00000000000..8eb08107f39
--- /dev/null
+++ b/usr.sbin/iscsid/poll.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Dr Ashton Fagg <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <event.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "iscsid.h"
+#include "log.h"
+
+
+void
+poll_init(struct session_poll *p)
+{
+	p->session_count         = 0;
+	p->session_init_count    = 0;
+	p->session_running_count = 0;
+	p->conn_logged_in_count  = 0;
+	p->conn_failed_count     = 0;
+	p->conn_waiting_count    = 0;
+	p->sess_conn_status      = 0;
+}
+
+void
+poll_session(struct session_poll *p, struct session *s)
+{
+	if (!s)
+		fatal("poll_session failed: invalid session");
+
+	++p->session_count;
+
+	/* If SESS_RUNNING, this determines the session has either
+           been brought up successfully, or has failed. */
+	if (s->state & SESS_RUNNING) {
+		++p->session_running_count;
+
+		/* Next, check what state the connections are in. */
+		struct connection *c;
+		int is_logged_in = 0;
+		int is_failed = 0;
+		int is_waiting = 0;
+
+		TAILQ_FOREACH(c, &s->connections, entry) {
+			if (c->state & CONN_LOGGED_IN)
+				++is_logged_in;
+			else if (c->state & CONN_FAILED)
+				++is_failed;
+			else if (c->state & CONN_NEVER_LOGGED_IN)
+				++is_waiting;
+
+		}
+
+		/* Potentially, we have multiple connections for each session.
+		We handle this by saying that a single connection logging in takes
+		precedent over a failed connection. We only say the session login has
+		failed if none of the connections have logged in and nothing is in
+		flight. */
+
+		if (is_logged_in)
+			++p->conn_logged_in_count;
+		if (is_failed)
+			++p->conn_failed_count;
+		if (is_waiting)
+			++p->conn_waiting_count;
+
+	/* Otherwise, it is in SESS_INIT. These need to be waited on. */
+	} else if (s->state & SESS_INIT)
+		++p->session_init_count;
+	else
+		fatal("poll_session: unknown state.");
+}
+
+void
+poll_finalize(struct session_poll *p)
+{
+	/* total number of sessions that have finished initializing (and ended
+	sucessfully or otherwise). */
+	int num_accounted_conn = p->conn_logged_in_count + p->conn_failed_count;
+
+	/* Set status flag to signal iscsictl to stop polling */
+	p->sess_conn_status = p->session_count == num_accounted_conn;
+}

Reply via email to