This is a robustness improvement for a specific case where very long
ovn-controller iterations (about ~20s) and long JSONRPC message queues
in the ovsdb client synchronization layer lead to unanswered echo
requests that in turn lead to connections being dropped.
In such a case, the echo request is "stuck" in the incoming message
queue and might not be processed in time, because we process everything
in small batches.
Thus, instead of waiting until we can process an incoming echo request,
we remember our last send activity and preemptively send an echo reply
when needed.
Signed-off-by: Martin Morgenstern <martin.morgenst...@cloudandheat.com>
---
lib/jsonrpc.c | 35 +++++++++++++++++++++++++++++++++++
lib/jsonrpc.h | 2 ++
lib/ovsdb-cs.c | 7 +++++++
3 files changed, 44 insertions(+)
diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c
index 2e35180f8..ae55e2113 100644
--- a/lib/jsonrpc.c
+++ b/lib/jsonrpc.c
@@ -21,6 +21,7 @@
#include <errno.h>
#include "byteq.h"
+#include "coverage.h"
#include "openvswitch/dynamic-string.h"
#include "fatal-signal.h"
#include "openvswitch/json.h"
@@ -36,6 +37,8 @@
#include "openvswitch/vlog.h"
VLOG_DEFINE_THIS_MODULE(jsonrpc);
+
+COVERAGE_DEFINE(jsonrpc_gratuitous_echo);
struct jsonrpc {
struct stream *stream;
@@ -825,6 +828,9 @@ struct jsonrpc_session {
/* Limits for jsonrpc. */
size_t max_n_msgs;
size_t max_backlog_bytes;
+
+ /* Used to decide whether we need to send a gratuitous echo reply. */
+ long long int last_send_timestamp;
};
static void
@@ -879,6 +885,7 @@ jsonrpc_session_open_multiple(const struct svec *remotes,
bool retry)
s->seqno = 0;
s->dscp = 0;
s->last_error = 0;
+ s->last_send_timestamp = 0;
jsonrpc_session_set_backlog_threshold(s, 0, 0);
@@ -1159,6 +1166,7 @@ int
jsonrpc_session_send(struct jsonrpc_session *s, struct jsonrpc_msg *msg)
{
if (s->rpc) {
+ s->last_send_timestamp = time_msec();
return jsonrpc_send(s->rpc, msg);
} else {
jsonrpc_msg_destroy(msg);
@@ -1207,6 +1215,33 @@ jsonrpc_session_recv(struct jsonrpc_session *s)
return NULL;
}
+/* Preemptively send an echo reply if needed. */
+void
+jsonrpc_session_gratuitous_echo_reply(struct jsonrpc_session *s)
+{
+ /* XXX: This is actually the wrong interval (our side's interval),
+ * but usually it is configured the same on both sides. */
+ int probe_interval = reconnect_get_probe_interval(s->reconnect);
+ if (!probe_interval) {
+ return;
+ }
+
+ if (time_msec() < s->last_send_timestamp + probe_interval) {
+ return;
+ }
+
+ struct json *params = json_array_create_empty();
+ struct json *id = json_string_create("echo");
+ struct jsonrpc_msg *reply = jsonrpc_create_reply(params, id);
+ /* Calling jsonrpc_create_reply() creates a clone of id, so we can
+ * destroy it already (this just decreases the reference count again). */
+ json_destroy(id);
+
+ VLOG_DBG("Sending gratuitous echo reply.");
+ jsonrpc_session_send(s, reply);
+ COVERAGE_INC(jsonrpc_gratuitous_echo);
+}
+
void
jsonrpc_session_recv_wait(struct jsonrpc_session *s)
{
diff --git a/lib/jsonrpc.h b/lib/jsonrpc.h
index 1baffcd80..d3796f094 100644
--- a/lib/jsonrpc.h
+++ b/lib/jsonrpc.h
@@ -119,6 +119,8 @@ void jsonrpc_session_replace(struct jsonrpc_session *,
struct jsonrpc *);
void jsonrpc_session_run(struct jsonrpc_session *);
void jsonrpc_session_wait(struct jsonrpc_session *);
+void jsonrpc_session_gratuitous_echo_reply(struct jsonrpc_session *s);
+
size_t jsonrpc_session_get_backlog(const struct jsonrpc_session *);
const char *jsonrpc_session_get_name(const struct jsonrpc_session *);
size_t jsonrpc_session_get_n_remotes(const struct jsonrpc_session *);
diff --git a/lib/ovsdb-cs.c b/lib/ovsdb-cs.c
index b5eda88ad..9250db267 100644
--- a/lib/ovsdb-cs.c
+++ b/lib/ovsdb-cs.c
@@ -647,6 +647,13 @@ ovsdb_cs_run(struct ovsdb_cs *cs, struct ovs_list *events)
ovsdb_cs_process_msg(cs, msg);
jsonrpc_msg_destroy(msg);
}
+
+
+ /* Send a gratuitous (unsolicited) echo reply if necessary and we didn't
+ * do it already in the above batch. This is an preemptive activity
+ * signal which doesn't hurt the other side. */
+ jsonrpc_session_gratuitous_echo_reply(cs->session);
+
ovs_list_push_back_all(events, &cs->data.events);
}
--
2.45.2
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev