Hello.

Pinging on this one hoping to get some feedback. I've reattached the
diff below.

Thanks.


Ashton Fagg <[email protected]> writes:

> Hello tech,
>
> Recently I encountered a problem with automounting iscsi volumes at boot
> time. This came down to a timing issue, where iscsictl reload was
> returning before the volumes were attached - causing the machine to
> enter single user mode when it would try to fsck the iscsi volumes.
>
> See this thread:
>
> https://marc.info/?l=openbsd-bugs&m=161404736018432&w=2
>
> Adding "&& sleep 5" to /etc/rc.d/iscsid did mitigate this, however
> that's both hacky and precarious.
>
> After some discussion with claudio@, the principled solution is to make
> iscsictl wait until the sessions are up and the devices are attached
> before returning. Since "iscsictl reload" is called in the rc script,
> this would halt the boot process long enough for all the volumes to be
> available before progressing - and thus, eliminating my problem.
>
> The following diff is an attempt at that, with some caveats. It does the
> following things:
>
> - adds a control command to iscsid to allow for polling of session and
>   connection status during iscsictl reload.  
>
> - book-keeping code for working out if there are sessions still
>   initializing
>
> - some poll-wait mechanisms in iscsictl, which (a) waits for iscsid to
>   tell it everything is up and running or (b) returns after 10 poll
>   attempts 1 second apart (thus, the max delay here is currently 10
>   seconds).
>
> I have confirmed that the current implementation does indeed solve my
> problem, however there's a couple of things I still question:
>
> 1. In the poll_and_wait() function in iscsictl, you'll notice there's an
> extra sleep with the comment that it is there to give time for the
> devices to attach. In my case, the extra second was needed for both my
> devices to attach. without the extra sleep, the first device would
> attach successfully while iscsictl was still waiting, but the second
> still had not fully attached even though my book-keeping code said that
> nothing further was in flight. I question whether there's a better way
> to do this and welcome suggestions about that.
>
> 2. It's probably too chatty. Is it also perhaps better to have a
> separate command that does this, rather than hijacking "reload"?
>
> 3. I don't know whether the book-keeping code is entirely
> reasonable. I've tried to capture every state, but I do wonder if there
> are cases where things slip through the cracks.
>
> For context, here is the machine booting (and hitting single user mode)
> with the standard iscsid:
>
> https://www.youtube.com/watch?v=F09PaT-8IJU&feature=youtu.be
>
> Whereas, here is the same machine configured identically with my iscsid
> changes applied:
>
> https://www.youtube.com/watch?v=TZzmQBVDRxo&feature=youtu.be
>
> The delay appears to be somewhere around 2-3 seconds, which is less than
> the full 10 allowable by the poll_and_wait() loop - so it does appear to
> be detecting the status correctly (at least in this configuration).
>
> Anyhow, I'd love to hear some comments and suggestions. Please note this
> is my first submission outside of ports that's bigger than a 1 or 2
> liner, and it's been a while since I've written C (I write C++ at my day
> job, and it's not systems programming...), so hopefully I haven't done
> anything too silly.
>
> Thanks,

diff --git a/usr.sbin/iscsictl/iscsictl.c b/usr.sbin/iscsictl/iscsictl.c
index 77f9c74abde..46404b5512b 100644
--- a/usr.sbin/iscsictl/iscsictl.c
+++ b/usr.sbin/iscsictl/iscsictl.c
@@ -40,6 +40,10 @@ struct pdu	*ctl_getpdu(char *, size_t);
 int		 ctl_sendpdu(int, struct pdu *);
 void		 show_config(struct ctrlmsghdr *, struct pdu *);
 void		 show_vscsi_stats(struct ctrlmsghdr *, struct pdu *);
+void             poll_and_wait(void);
+void             poll_session_status(void);
+void             register_poll(struct ctrlmsghdr *, struct pdu *);
+void             poll_print(struct session_poll *);
 
 char		cbuf[CONTROL_READ_SIZE];
 
@@ -48,6 +52,12 @@ struct control {
 	int		fd;
 } control;
 
+
+struct session_poll poll_result; /* Poll result */
+
+#define POLL_DELAY     1         /* Delay between poll attempts (seconds) */
+#define POLL_ATTEMPTS  10        /* Number of poll attempts before returning */
+
 __dead void
 usage(void)
 {
@@ -68,7 +78,7 @@ main (int argc, char* argv[])
 	char *sockname = ISCSID_CONTROL;
 	struct session_ctlcfg *s;
 	struct iscsi_config *cf;
-	int ch, val = 0;
+	int ch, poll = 0, val = 0;
 
 	/* check flags */
 	while ((ch = getopt(argc, argv, "f:s:")) != -1) {
@@ -135,6 +145,9 @@ main (int argc, char* argv[])
 			    &cf->initiator, sizeof(cf->initiator)) == -1)
 				err(1, "control_compose");
 		}
+
+		/* Reloading, so poll afterwards. */
+		poll = 1;
 		SIMPLEQ_FOREACH(s, &cf->sessions, entry) {
 			struct ctrldata cdv[3];
 			bzero(cdv, sizeof(cdv));
@@ -174,8 +187,13 @@ main (int argc, char* argv[])
 
 	run();
 
-	close(control.fd);
+	/* If we've reloaded, we probably should wait in case any new connections
+	   need to come up (or fail). */
+	if (poll) {
+		poll_and_wait();
+	}
 
+	close(control.fd);
 	return (0);
 }
 
@@ -237,6 +255,11 @@ run_command(struct pdu *pdu)
 			show_vscsi_stats(cmh, pdu);
 			done = 1;
 			break;
+		case CTRL_SESS_POLL:
+			done = 1;
+			register_poll(cmh, pdu);
+			break;
+
 		}
 	}
 }
@@ -383,3 +406,73 @@ show_vscsi_stats(struct ctrlmsghdr *cmh, struct pdu *pdu)
 	    vs->cnt_t2i_status[1], 
 	    vs->cnt_t2i_status[2]);
 }
+
+void
+poll_session_status(void)
+{
+	if (control_compose(NULL, CTRL_SESS_POLL, NULL, 0) == -1)
+		err(1, "control_compose");
+
+	struct pdu *pdu;
+	while ((pdu = TAILQ_FIRST(&control.channel)) != NULL) {
+		TAILQ_REMOVE(&control.channel, pdu, entry);
+		run_command(pdu);
+	}
+}
+
+void
+poll_and_wait(void)
+{
+	int attempts;
+
+	printf("polling...");
+	fflush(stdout);
+
+	for (attempts = 1; attempts <= POLL_ATTEMPTS; ++attempts) {
+
+		poll_session_status();
+
+		/* Poll says we are good to go. */
+		if (poll_result.sess_conn_status != 0) {
+			printf("ok!\n");
+			goto attach_delay;
+		}
+
+		/* Poll says we should wait... */
+		printf("%d...", attempts);
+		fflush(stdout);
+		sleep(POLL_DELAY);
+	}
+
+	printf("timer elapsed.\n");
+	goto finish;
+
+attach_delay:
+	/* For good measure, wait a bit longer while devs attach. */
+	sleep(POLL_DELAY);
+
+finish:
+	poll_print(&poll_result);
+	fflush(stdout);
+}
+
+void
+register_poll(struct ctrlmsghdr *cmh, struct pdu *pdu)
+{
+	if (cmh->len[0] != sizeof(poll_result))
+		errx(1, "poll: bad size of response");
+
+	poll_result = *((struct session_poll *)pdu_getbuf(pdu, NULL, 1));
+}
+
+void
+poll_print(struct session_poll *p)
+{
+	printf("------------------------------------\n");
+	printf("Configured sessions: %d\n", p->session_count);
+	printf("Sessions initializing: %d\n", p->session_init_count);
+	printf("Sessions started/failed: %d\n", p->session_running_count);
+	printf("Sessions logged in: %d\n", p->conn_logged_in_count);
+	printf("Sessions with failed connections: %d\n", p->conn_failed_count);
+	printf("Sessions still attempting to connection: %d\n", p->conn_waiting_count);
+}
diff --git a/usr.sbin/iscsid/Makefile b/usr.sbin/iscsid/Makefile
index 7a62024e68b..bac59d934f8 100644
--- a/usr.sbin/iscsid/Makefile
+++ b/usr.sbin/iscsid/Makefile
@@ -2,7 +2,7 @@
 
 PROG=	iscsid
 SRCS=	connection.c control.c initiator.c iscsid.c log.c logmsg.c pdu.c \
-	session.c task.c util.c vscsi.c
+	poll.c session.c task.c util.c vscsi.c
 
 MAN=	iscsid.8
 
diff --git a/usr.sbin/iscsid/iscsid.c b/usr.sbin/iscsid/iscsid.c
index d3526e96363..044888b8ade 100644
--- a/usr.sbin/iscsid/iscsid.c
+++ b/usr.sbin/iscsid/iscsid.c
@@ -211,6 +211,7 @@ iscsid_ctrl_dispatch(void *ch, struct pdu *pdu)
 	struct initiator_config *ic;
 	struct session_config *sc;
 	struct session *s;
+	struct session_poll p;
 	int *valp;
 
 	cmh = pdu_getbuf(pdu, NULL, 0);
@@ -303,6 +304,17 @@ iscsid_ctrl_dispatch(void *ch, struct pdu *pdu)
 		}
 
 		control_compose(ch, CTRL_SUCCESS, NULL, 0);
+		break;
+	case CTRL_SESS_POLL:
+		poll_init(&p);
+
+		TAILQ_FOREACH(s, &initiator->sessions, entry) {
+			poll_session(&p, s);
+		}
+
+		poll_finalize(&p);
+		control_compose(ch, CTRL_SESS_POLL, &p, sizeof(p));
+
 		break;
 	default:
 		log_warnx("unknown control message type %d", cmh->type);
@@ -314,6 +326,7 @@ done:
 	pdu_free(pdu);
 }
 
+
 #define MERGE_MIN(r, a, b, v)				\
 	r->v = (a->v < b->v ? a->v : b->v)
 #define MERGE_MAX(r, a, b, v)				\
diff --git a/usr.sbin/iscsid/iscsid.h b/usr.sbin/iscsid/iscsid.h
index b43fb5dcd99..24e3e389aaf 100644
--- a/usr.sbin/iscsid/iscsid.h
+++ b/usr.sbin/iscsid/iscsid.h
@@ -67,7 +67,7 @@ struct ctrldata {
 #define CTRL_LOG_VERBOSE	6
 #define CTRL_VSCSI_STATS	7
 #define CTRL_SHOW_SUM		8
-
+#define CTRL_SESS_POLL          9
 
 TAILQ_HEAD(session_head, session);
 TAILQ_HEAD(connection_head, connection);
@@ -251,6 +251,16 @@ struct session {
 	int			 action;
 };
 
+struct session_poll {
+	int session_count;            /* Total number of configured sessions*/
+	int session_init_count;       /* Number of sessions in init state */
+	int session_running_count;    /* Number of sessions in running state */
+	int conn_logged_in_count;     /* Number of sessions indicating logged in. */
+	int conn_failed_count;        /* Number of sessions with failed connections. */
+	int conn_waiting_count;       /* Number of sessions with connections init in flight */
+	int sess_conn_status;         /* Status flag */
+};
+
 struct connection {
 	struct event		 ev;
 	struct event		 wev;
@@ -391,6 +401,12 @@ void	vscsi_status(int, int, void *, size_t);
 void	vscsi_event(unsigned long, u_int, u_int);
 struct vscsi_stats *vscsi_stats(void);
 
+/* Session polling */
+void    poll_init(struct session_poll *);
+void    poll_session(struct session_poll *, struct session *);
+void    poll_finalize(struct session_poll *);
+void    poll_print(struct session_poll *);
+
 /* logmsg.c */
 void	log_hexdump(void *, size_t);
 void	log_pdu(struct pdu *, int);
diff --git a/usr.sbin/iscsid/poll.c b/usr.sbin/iscsid/poll.c
new file mode 100644
index 00000000000..8eb08107f39
--- /dev/null
+++ b/usr.sbin/iscsid/poll.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2021 Dr Ashton Fagg <[email protected]>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <event.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "iscsid.h"
+#include "log.h"
+
+
+void
+poll_init(struct session_poll *p)
+{
+	p->session_count         = 0;
+	p->session_init_count    = 0;
+	p->session_running_count = 0;
+	p->conn_logged_in_count  = 0;
+	p->conn_failed_count     = 0;
+	p->conn_waiting_count    = 0;
+	p->sess_conn_status      = 0;
+}
+
+void
+poll_session(struct session_poll *p, struct session *s)
+{
+	if (!s)
+		fatal("poll_session failed: invalid session");
+
+	++p->session_count;
+
+	/* If SESS_RUNNING, this determines the session has either
+           been brought up successfully, or has failed. */
+	if (s->state & SESS_RUNNING) {
+		++p->session_running_count;
+
+		/* Next, check what state the connections are in. */
+		struct connection *c;
+		int is_logged_in = 0;
+		int is_failed = 0;
+		int is_waiting = 0;
+
+		TAILQ_FOREACH(c, &s->connections, entry) {
+			if (c->state & CONN_LOGGED_IN)
+				++is_logged_in;
+			else if (c->state & CONN_FAILED)
+				++is_failed;
+			else if (c->state & CONN_NEVER_LOGGED_IN)
+				++is_waiting;
+
+		}
+
+		/* Potentially, we have multiple connections for each session.
+		We handle this by saying that a single connection logging in takes
+		precedent over a failed connection. We only say the session login has
+		failed if none of the connections have logged in and nothing is in
+		flight. */
+
+		if (is_logged_in)
+			++p->conn_logged_in_count;
+		if (is_failed)
+			++p->conn_failed_count;
+		if (is_waiting)
+			++p->conn_waiting_count;
+
+	/* Otherwise, it is in SESS_INIT. These need to be waited on. */
+	} else if (s->state & SESS_INIT)
+		++p->session_init_count;
+	else
+		fatal("poll_session: unknown state.");
+}
+
+void
+poll_finalize(struct session_poll *p)
+{
+	/* total number of sessions that have finished initializing (and ended
+	sucessfully or otherwise). */
+	int num_accounted_conn = p->conn_logged_in_count + p->conn_failed_count;
+
+	/* Set status flag to signal iscsictl to stop polling */
+	p->sess_conn_status = p->session_count == num_accounted_conn;
+}

Reply via email to