The tc module combines the use of the `tc_transact` helper function for communication with the in-kernel tc infrastructure with assertions on the reply data by `ofpbuf_at_assert` on the received data prior to further processing.
`tc_transact` in turn calls `nl_transact`, which via `nl_transact_multiple__` ultimately calls and handles return value from `recvmsg`. On error a check for EAGAIN is performed and a consequence of this condition is effectively to provide a non-error (0) result and an empty reply buffer. Before this change, the `tc_transact` and, consumers of it, were unaware of this condition. The use of assertions on the reply buffer can as such lead to a fatal crash of OVS. To be fair, the behavior of `nl_transact` when handling an EAGAIN return is currently not documented, so this change also adds that documentation. While fixing the problem, it led me to find potential problems with several of the one-time initialization functions in the netdev-offload-tc module. Before this patch they would happily accept temporary failure as permanent one, with consequences for the remaining lifetime of the process. For the record, the symptom of the crash is this in the log: EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <= b->size failed in ofpbuf_at_assert() And an excerpt of the backtrace looks like this: 0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20, b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194 tc_replace_flower (id=<optimized out>, flower=<optimized out>) at ../lib/tc.c:3223 0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840, match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>, ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at ../lib/netdev-offload-tc.c:2096 0x0000561dac117541 in netdev_flow_put (stats=<optimized out>, info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>, actions=<optimized out>, match=0x7fb65b7ba980, netdev=0x561dacf91840) at ../lib/netdev-offload.c:257 parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2297 try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at ../lib/dpif-netlink.c:2384 Reported-At: https://launchpad.net/bugs/2018500 Fixes f98e418fbdb6 (tc: Add tc flower functions) Fixes 407556ac6c90 (netlink-socket: Avoid forcing a reply for final message in a transaction.) Signed-off-by: Frode Nordahl <[email protected]> --- lib/dpif-netlink.c | 4 +++- lib/dpif.c | 2 +- lib/netdev-linux.c | 40 ++++++++++++++++++++++++++-------------- lib/netdev-offload-tc.c | 31 ++++++++++++++++++++++--------- lib/netlink-socket.c | 5 +++++ lib/tc.c | 18 ++++++++++++++++++ 6 files changed, 75 insertions(+), 25 deletions(-) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 60bd39643..75537b2f8 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -2340,7 +2340,9 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) } netdev_set_hw_info(oor_netdev, HW_INFO_TYPE_OOR, true); } - level = (err == ENOSPC || err == EOPNOTSUPP) ? VLL_DBG : VLL_ERR; + level = (err == EAGAIN || + err == ENOSPC || + err == EOPNOTSUPP) ? VLL_DBG : VLL_ERR; VLOG_RL(&rl, level, "failed to offload flow: %s: %s", ovs_strerror(err), (oor_netdev ? oor_netdev->name : dev->name)); diff --git a/lib/dpif.c b/lib/dpif.c index 3305401fe..2bbb0aac4 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1751,7 +1751,7 @@ flow_message_log_level(int error) * Kernels that support flow wildcarding will reject these flows as * duplicates (EEXIST), so lower the log level to debug for these * types of messages. */ - return (error && error != EEXIST) ? VLL_WARN : VLL_DBG; + return (error && error != EEXIST && error != EAGAIN) ? VLL_WARN : VLL_DBG; } static bool diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 36620199e..a6065bf69 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -5817,8 +5817,11 @@ tc_get_policer_action(uint32_t index, struct ofputil_meter_stats *stats) error = tc_transact(&request, &replyp); if (error) { - VLOG_ERR_RL(&rl, "Failed to dump police action (index: %u), err=%d", - index, error); + VLOG_RL(&rl, + (error == EAGAIN) ? VLL_DBG : VLL_ERR, + "Failed to dump police action (index: %u), err=%d", + index, + error); return error; } @@ -5849,8 +5852,11 @@ tc_del_policer_action(uint32_t index, struct ofputil_meter_stats *stats) error = tc_transact(&request, &replyp); if (error) { - VLOG_ERR_RL(&rl, "Failed to delete police action (index: %u), err=%d", - index, error); + VLOG_RL(&rl, + (error == EAGAIN) ? VLL_DBG : VLL_ERR, + "Failed to delete police action (index: %u), err=%d", + index, + error); return error; } @@ -6114,11 +6120,13 @@ tc_query_class(const struct netdev *netdev, error = tc_transact(&request, replyp); if (error) { - VLOG_WARN_RL(&rl, "query %s class %u:%u (parent %u:%u) failed (%s)", - netdev_get_name(netdev), - tc_get_major(handle), tc_get_minor(handle), - tc_get_major(parent), tc_get_minor(parent), - ovs_strerror(error)); + VLOG_RL(&rl, + (error == EAGAIN) ? VLL_DBG : VLL_WARN, + "query %s class %u:%u (parent %u:%u) failed (%s)", + netdev_get_name(netdev), + tc_get_major(handle), tc_get_minor(handle), + tc_get_major(parent), tc_get_minor(parent), + ovs_strerror(error)); } return error; } @@ -6140,10 +6148,12 @@ tc_delete_class(const struct netdev *netdev, unsigned int handle) error = tc_transact(&request, NULL); if (error) { - VLOG_WARN_RL(&rl, "delete %s class %u:%u failed (%s)", - netdev_get_name(netdev), - tc_get_major(handle), tc_get_minor(handle), - ovs_strerror(error)); + VLOG_RL(&rl, + (error == EAGAIN) ? VLL_DBG : VLL_WARN, + "delete %s class %u:%u failed (%s)", + netdev_get_name(netdev), + tc_get_major(handle), tc_get_minor(handle), + ovs_strerror(error)); } return error; } @@ -6262,7 +6272,9 @@ tc_query_qdisc(const struct netdev *netdev_) ops = &tc_ops_other; } } - } else if ((!error && !qdisc->size) || error == ENOENT) { + } else if ((!error && !qdisc->size) || + error == ENOENT || error == EAGAIN) + { /* Either it's a built-in qdisc, or (on Linux pre-2.6.35) it's a qdisc * set up by some other entity that doesn't have a handle 1:0. We will * assume that it's the system default qdisc. */ diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 4f26dd8cc..4c15989d6 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -2505,10 +2505,11 @@ netdev_tc_flow_get(struct netdev *netdev, err = tc_get_flower(&id, &flower); if (err) { - VLOG_ERR_RL(&error_rl, - "flow get failed (dev %s prio %d handle %d): %s", - netdev_get_name(netdev), id.prio, id.handle, - ovs_strerror(err)); + VLOG_RL(&error_rl, + (err == EAGAIN) ? VLL_DBG : VLL_ERR, + "flow get failed (dev %s prio %d handle %d): %s", + netdev_get_name(netdev), id.prio, id.handle, + ovs_strerror(err)); return err; } @@ -2594,7 +2595,9 @@ probe_multi_mask_per_prio(int ifindex) memset(&flower.mask.dst_mac, 0xff, sizeof flower.mask.dst_mac); id1 = tc_make_tcf_id(ifindex, block_id, prio, TC_INGRESS); - error = tc_replace_flower(&id1, &flower); + do { + error = tc_replace_flower(&id1, &flower); + } while (error == EAGAIN); if (error) { goto out; } @@ -2603,7 +2606,9 @@ probe_multi_mask_per_prio(int ifindex) memset(&flower.mask.src_mac, 0xff, sizeof flower.mask.src_mac); id2 = tc_make_tcf_id(ifindex, block_id, prio, TC_INGRESS); - error = tc_replace_flower(&id2, &flower); + do { + error = tc_replace_flower(&id2, &flower); + } while (error == EAGAIN); tc_del_flower_filter(&id1); if (error) { @@ -2625,6 +2630,7 @@ probe_insert_ct_state_rule(int ifindex, uint16_t ct_state, struct tcf_id *id) { int prio = TC_RESERVED_PRIORITY_MAX + 1; struct tc_flower flower; + int error; memset(&flower, 0, sizeof flower); flower.key.ct_state = ct_state; @@ -2634,7 +2640,10 @@ probe_insert_ct_state_rule(int ifindex, uint16_t ct_state, struct tcf_id *id) flower.mask.eth_type = OVS_BE16_MAX; *id = tc_make_tcf_id(ifindex, 0, prio, TC_INGRESS); - return tc_replace_flower(id, &flower); + do { + error = tc_replace_flower(id, &flower); + } while (error == EAGAIN); + return error; } static void @@ -2657,7 +2666,9 @@ probe_ct_state_support(int ifindex) goto out; } - error = tc_get_flower(&id, &flower); + do { + error = tc_get_flower(&id, &flower); + } while (error == EAGAIN); if (error || flower.mask.ct_state != ct_state) { goto out_del; } @@ -2732,7 +2743,9 @@ probe_tc_block_support(int ifindex) memset(&flower.mask.dst_mac, 0xff, sizeof flower.mask.dst_mac); id = tc_make_tcf_id(ifindex, block_id, prio, TC_INGRESS); - error = tc_replace_flower(&id, &flower); + do { + error = tc_replace_flower(&id, &flower); + } while (error == EAGAIN); tc_add_del_qdisc(ifindex, false, block_id, TC_INGRESS); diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index 80da20d9f..dea060fc3 100644 --- a/lib/netlink-socket.c +++ b/lib/netlink-socket.c @@ -1798,6 +1798,11 @@ nl_pool_release(struct nl_sock *sock) * * 2. Resending the request causes it to be re-executed, so the request * needs to be idempotent. + * + * 3. In the event that the kernel is too busy to handle the request to + * receive the response (i.e. EAGAIN), this function will still return + * 0. The caller can check for this condition by checking for a zero + * size of the 'replyp' ofpbuf buffer. */ int nl_transact(int protocol, const struct ofpbuf *request, diff --git a/lib/tc.c b/lib/tc.c index 5c32c6f97..7df6a10c5 100644 --- a/lib/tc.c +++ b/lib/tc.c @@ -237,11 +237,29 @@ static void request_from_tcf_id(struct tcf_id *id, uint16_t eth_type, } } +/* The `tc_transact` function is a wrapper around `nl_transact` with the + * addition of: + * + * 1. the 'request' ofpbuf buffer is freed after `nl_transact` returns, + * regardless of success or failure. + * + * 2. When a 'replyp' pointer is provided; in the event of the kernel + * being too busy to process the request for the response, a positive + * error return will be provided with the value of EAGAIN. + * + * Please acquaint yourself with the documentation of the `nl_transact` + * function in the netlink-sock module before making use of this function. + */ int tc_transact(struct ofpbuf *request, struct ofpbuf **replyp) { int error = nl_transact(NETLINK_ROUTE, request, replyp); ofpbuf_uninit(request); + + if (!error && replyp && !(*replyp)->size) { + return EAGAIN; + } + return error; } -- 2.39.2 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
