Attention is currently required from: plaisthos.
Hello plaisthos,
I'd like you to do a code review.
Please visit
http://gerrit.openvpn.net/c/openvpn/+/1737?usp=email
to review the following change.
Change subject: dco_linux: read multicast notifications on a dedicated netlink
socket
......................................................................
dco_linux: read multicast notifications on a dedicated netlink socket
ovpn-dco shared a single netlink socket for both synchronous
request/reply transactions (peer create/delete/get, key operations,
stats) and the asynchronous multicast notification group (peer
del/float, key swap). Because every ovpn_nl_msg_send() drains its reply
with nl_recvmsgs() on that same socket, a notification queued by the
kernel could be dispatched re-entrantly in the middle of an unrelated
request/reply: the NL_CB_VALID handler runs ovpn_handle_msg() ->
multi_process_incoming_dco() -> multi_close_instance() while the caller
is still walking an instance collection. For example multi_print_status()
refreshes stats with dco_get_peer_stats_multi() and then iterates
m->hash, and multi_delete_dup() iterates m->instances[] while closing
duplicates; an instance freed underneath either iterator becomes a
use-after-free and the server crashes (observed under mass simultaneous
client reconnects).
Subscribe the multicast group on a dedicated socket (nl_sock_notify) and
read it only from dco_read_and_process(), the top-level event-loop point
where closing an instance is safe. The request/reply socket is no longer
a member of the group, so parsing a command or stats reply can never
dispatch a notification. The event loop now monitors the notification
socket; the request/reply socket keeps being drained synchronously by
ovpn_nl_msg_send().
Change-Id: Ia7894fc360b12c5a0f63fad2abb9bbb1ad9c2bd5
GitHub: closes OpenVPN/openvpn#1067
Signed-off-by: Antonio Quartulli <[email protected]>
---
M src/openvpn/dco_linux.c
M src/openvpn/dco_linux.h
2 files changed, 74 insertions(+), 11 deletions(-)
git pull ssh://gerrit.openvpn.net:29418/openvpn refs/changes/37/1737/1
diff --git a/src/openvpn/dco_linux.c b/src/openvpn/dco_linux.c
index 40746bd..7a0f620 100644
--- a/src/openvpn/dco_linux.c
+++ b/src/openvpn/dco_linux.c
@@ -134,12 +134,8 @@
}
static int
-ovpn_nl_recvmsgs(dco_context_t *dco, const char *prefix)
+ovpn_nl_recvmsgs_report(int ret, const char *prefix)
{
- __is_locked = true;
- int ret = nl_recvmsgs(dco->nl_sock, dco->nl_cb);
- __is_locked = false;
-
switch (ret)
{
case -NLE_INTR:
@@ -174,6 +170,16 @@
return ret;
}
+static int
+ovpn_nl_recvmsgs(dco_context_t *dco, const char *prefix)
+{
+ __is_locked = true;
+ int ret = nl_recvmsgs(dco->nl_sock, dco->nl_cb);
+ __is_locked = false;
+
+ return ovpn_nl_recvmsgs_report(ret, prefix);
+}
+
/**
* Send a prepared netlink message.
*
@@ -398,9 +404,11 @@
}
/* Register for ovpn-dco specific multicast messages that the kernel may
- * send
+ * send. These are subscribed on the dedicated notification socket only, so
+ * that they are never delivered while a request/reply transaction on
+ * dco->nl_sock is being parsed.
*/
- int ret = nl_socket_add_membership(dco->nl_sock, dco->ovpn_dco_mcast_id);
+ int ret = nl_socket_add_membership(dco->nl_sock_notify,
dco->ovpn_dco_mcast_id);
if (ret)
{
msg(M_FATAL, "%s: failed to join groups: %d", __func__, ret);
@@ -449,17 +457,49 @@
nl_cb_set(dco->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_finish,
&dco->status);
nl_cb_set(dco->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, dco);
+ /* Set up the dedicated multicast notification socket. It shares the
message
+ * handler (ovpn_handle_msg) with the request/reply socket, but is read
only
+ * from dco_read_and_process() in the event loop, so notifications never
+ * fire while a request/reply transaction is being parsed. */
+ dco->nl_sock_notify = nl_socket_alloc();
+ if (!dco->nl_sock_notify)
+ {
+ msg(M_FATAL, "Cannot create netlink notification socket");
+ }
+
+ ret = genl_connect(dco->nl_sock_notify);
+ if (ret)
+ {
+ msg(M_FATAL, "Cannot connect to generic netlink (notify): %s",
nl_geterror(ret));
+ }
+
+ set_cloexec(nl_socket_get_fd(dco->nl_sock_notify));
+ set_nonblock(nl_socket_get_fd(dco->nl_sock_notify));
+
+ dco->nl_cb_notify = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!dco->nl_cb_notify)
+ {
+ msg(M_FATAL, "failed to allocate netlink notification callback");
+ }
+
+ nl_socket_set_cb(dco->nl_sock_notify, dco->nl_cb_notify);
+ nl_cb_err(dco->nl_cb_notify, NL_CB_CUSTOM, ovpn_nl_cb_error, &dco->status);
+ nl_cb_set(dco->nl_cb_notify, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg,
dco);
+
ovpn_dco_register(dco);
/* The async PACKET messages confuse libnl and it will drop them with
* wrong sequence numbers (NLE_SEQ_MISMATCH), so disable libnl's sequence
- * number check */
+ * number check. Multicast notifications are unsolicited and likewise carry
+ * no matching sequence number, so disable the check on both sockets. */
nl_socket_disable_seq_check(dco->nl_sock);
+ nl_socket_disable_seq_check(dco->nl_sock_notify);
/* nl library sets the buffer size to 32k/32k by default which is sometimes
* overrun with very fast connecting/disconnecting clients.
* TODO: fix this in a better and more reliable way */
ASSERT(!nl_socket_set_buffer_size(dco->nl_sock, 1024 * 1024, 1024 * 1024));
+ ASSERT(!nl_socket_set_buffer_size(dco->nl_sock_notify, 1024 * 1024, 1024 *
1024));
}
bool
@@ -495,8 +535,12 @@
nl_socket_free(dco->nl_sock);
dco->nl_sock = NULL;
+ nl_socket_free(dco->nl_sock_notify);
+ dco->nl_sock_notify = NULL;
+
/* Decrease reference count */
nl_cb_put(dco->nl_cb);
+ nl_cb_put(dco->nl_cb_notify);
CLEAR(dco);
}
@@ -1164,7 +1208,13 @@
{
msg(D_DCO_DEBUG, __func__);
- return ovpn_nl_recvmsgs(dco, __func__);
+ /* Drain the multicast notification socket. This is the only place where
+ * asynchronous notifications (peer del/float, key swap) are processed, so
+ * the multi_close_instance() they may trigger happens at a safe point in
+ * the event loop rather than re-entrantly inside a request/reply. */
+ int ret = nl_recvmsgs(dco->nl_sock_notify, dco->nl_cb_notify);
+
+ return ovpn_nl_recvmsgs_report(ret, __func__);
}
static int
@@ -1326,9 +1376,12 @@
void
dco_event_set(dco_context_t *dco, struct event_set *es, void *arg)
{
- if (dco && dco->nl_sock)
+ if (dco && dco->nl_sock_notify)
{
- event_ctl(es, nl_socket_get_fd(dco->nl_sock), EVENT_READ, arg);
+ /* Only the notification socket is monitored by the event loop. The
+ * request/reply socket (dco->nl_sock) is drained synchronously by
+ * ovpn_nl_msg_send(). */
+ event_ctl(es, nl_socket_get_fd(dco->nl_sock_notify), EVENT_READ, arg);
}
}
diff --git a/src/openvpn/dco_linux.h b/src/openvpn/dco_linux.h
index e3e4824..c487581 100644
--- a/src/openvpn/dco_linux.h
+++ b/src/openvpn/dco_linux.h
@@ -62,8 +62,18 @@
typedef struct
{
+ /* socket used for synchronous request/reply transactions (commands and
+ * stats queries). It is NOT subscribed to the multicast group, so its
+ * recvmsgs never dispatch an asynchronous notification. */
struct nl_sock *nl_sock;
struct nl_cb *nl_cb;
+ /* socket dedicated to multicast notifications (peer del/float, key swap).
+ * It is read only at the top-level dco_read_and_process() point, where it
+ * is safe to close instances. Keeping it separate from nl_sock prevents
+ * notifications from being processed re-entrantly while a request/reply is
+ * in flight. */
+ struct nl_sock *nl_sock_notify;
+ struct nl_cb *nl_cb_notify;
int status;
struct context *c;
--
To view, visit http://gerrit.openvpn.net/c/openvpn/+/1737?usp=email
To unsubscribe, or for help writing mail filters, visit
http://gerrit.openvpn.net/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: openvpn
Gerrit-Branch: master
Gerrit-Change-Id: Ia7894fc360b12c5a0f63fad2abb9bbb1ad9c2bd5
Gerrit-Change-Number: 1737
Gerrit-PatchSet: 1
Gerrit-Owner: ordex <[email protected]>
Gerrit-Reviewer: plaisthos <[email protected]>
Gerrit-CC: openvpn-devel <[email protected]>
Gerrit-Attention: plaisthos <[email protected]>
_______________________________________________
Openvpn-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/openvpn-devel