OVS periodically polls netlink socket for route and rule updates from the kernel, and it will trigger full router table reset upon any relevant change. In the event of a high volume notification updates, combined with a big overall number of routes and rules, the control thread may start getting starved spending most of the cycles parsing router updates:
wakeup due to [POLLIN] on fd 91 (NETLINK_ROUTE<->NETLINK_ROUTE) at lib/netlink-socket.c:1418 (96% CPU usage) wakeup due to [POLLIN] on fd 96 (FIFO pipe:[140158448]) at lib/ovs-rcu.c:259 (96% CPU usage) wakeup due to [POLLIN] on fd 96 (FIFO pipe:[140158448]) at lib/ovs-rcu.c:259 (99% CPU usage) wakeup due to [POLLIN] on fd 14 (<->/var/run/openvswitch/db.sock) at lib/stream-fd.c:157 (99% CPU usage) wakeup due to [POLLIN] on fd 96 (FIFO pipe:[140158448]) at vswitchd/bridge.c:431 (99% CPU usage) wakeup due to 105-ms timeout at vswitchd/bridge.c:3195 (99% CPU usage) Such behavior was triggered for example when adding 500 rules with a table lookup action, where each rule referenced a unique table. And each table had 100 routes. In total it amounted in 50k routes in the custom tables. To optimize CPU usage under such conditions a back-off mechanism is used before doing router reset, to reduce the frequency of a full dump of router tables and rules. The back-off mechanism is dynamic in a sense that it takes into consideration the duration of the last router reset operation so the back-off delay is also increasing with a higher volume of total routes and rules. Signed-off-by: Dima Chumak <dchu...@nvidia.com> --- lib/route-table.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/lib/route-table.c b/lib/route-table.c index 3b12e7fa92ec..7b32d5de3c9e 100644 --- a/lib/route-table.c +++ b/lib/route-table.c @@ -45,6 +45,9 @@ * old headers. (We can't test for it with #ifdef because it's an enum.) */ #define RTA_MARK 16 +#define ROUTE_TABLE_RESET_BACKOFF_MIN 1 +#define ROUTE_TABLE_RESET_BACKOFF_MAX 32 + VLOG_DEFINE_THIS_MODULE(route_table); COVERAGE_DEFINE(route_table_dump); @@ -66,6 +69,7 @@ static struct nln_notifier *name_notifier = NULL; static bool route_table_valid = false; static bool rules_valid = false; +static long long route_table_reset_last_ms; static int route_nln_parse(struct ofpbuf *, void *change); @@ -119,6 +123,8 @@ route_table_init(void) ovs_assert(!rule_notifier); ovs_assert(!rule6_notifier); + route_table_reset_last_ms = time_msec(); + ovs_router_init(); nln = nln_create(NETLINK_ROUTE, route_nln_parse, &nln_rtmsg_change); @@ -147,15 +153,51 @@ void route_table_run(void) OVS_EXCLUDED(route_table_mutex) { + static uint64_t backoff = ROUTE_TABLE_RESET_BACKOFF_MIN; + static long long last_reset_duration_ms = 1; + static long long last_backoff_ms; + ovs_mutex_lock(&route_table_mutex); if (nln) { + long long prev_reset_duration_ms = last_reset_duration_ms; + long long ms_since_backoff; + long long ms_since_reset; + rtnetlink_run(); nln_run(nln); + ms_since_reset = time_msec() - route_table_reset_last_ms; if (!route_table_valid || !rules_valid) { + struct timeval start, end; + + if (ms_since_reset < backoff * last_reset_duration_ms) { + goto out; + } + + if (ms_since_reset < 2 * backoff * last_reset_duration_ms) { + if (backoff < ROUTE_TABLE_RESET_BACKOFF_MAX) { + last_backoff_ms = time_msec(); + backoff <<= 1; + } + } + + xgettimeofday(&start); route_table_reset(); + xgettimeofday(&end); + + last_reset_duration_ms = + timeval_to_msec(&end) - timeval_to_msec(&start); + } + + ms_since_backoff = time_msec() - last_backoff_ms ; + if (ms_since_backoff > 2 * backoff * prev_reset_duration_ms) { + if (backoff > ROUTE_TABLE_RESET_BACKOFF_MIN) { + last_backoff_ms = time_msec(); + backoff >>= 1; + } } } +out: ovs_mutex_unlock(&route_table_mutex); } @@ -276,6 +318,7 @@ route_table_reset(void) } } rules_dump(); + route_table_reset_last_ms = time_msec(); } static void -- 2.49.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev