Current offloading code supports only limited number of tunnel keys and silently ignores everything it doesn't understand. This is causing, for example, offloaded ERSPAN tunnels to not work, because flow is offloaded, but ERSPAN options are not provided to TC.
There is a number of tunnel keys, which are supported by the userspace, but silently ignored during offloading: OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT OVS_TUNNEL_KEY_ATTR_OAM OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS OVS_TUNNEL_KEY_ATTR_CSUM is kind of supported, but only for actions and for some reason is set from the tunnel port instead of the provided action, and not currently supported for the tunnel key in the match. Addig a default case to fail offloading of unknown attributes. For now explicitly allowing incorrect behavior for the DONT_FRAGMENT flag, otherwise we'll break all tunnel offloading by default. VXLAN and ERSPAN options has to fail offloading, because the tunnel will not work otherwise. OAM is not a default configurations, so failing it as well. The missing DONT_FRAGMENT flag though should, probably, cause frequent flow revalidation, but that is not new with this patch. Same for the 'match' key, only clearing masks that was actually consumed, except for the DONT_FRAGMENT and CSUM flags, which are explicitly allowed and highlighted as broken. Also, destination port as well as CSUM configuration for unknown reason was not taken from the actions list and were passed via HW offload info instead of being consumed from the set() action. Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2022-July/395522.html Reported-by: Eelco Chaudron <[email protected]> Fixes: 8f283af89298 ("netdev-tc-offloads: Implement netdev flow put using tc interface") Signed-off-by: Ilya Maximets <[email protected]> --- lib/dpif-netlink.c | 14 +------ lib/netdev-offload-tc.c | 92 +++++++++++++++++++++++++++++++++++------ lib/netdev-offload.h | 3 -- 3 files changed, 80 insertions(+), 29 deletions(-) diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 89e1d4325..c219fb527 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -2237,8 +2237,6 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) size_t left; struct netdev *dev; struct offload_info info; - ovs_be16 dst_port = 0; - uint8_t csum_on = false; int err; info.tc_modify_flow_deleted = false; @@ -2258,10 +2256,9 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) return EOPNOTSUPP; } - /* Get tunnel dst port */ + /* Check the output port for a tunnel. */ NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) { if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) { - const struct netdev_tunnel_config *tnl_cfg; struct netdev *outdev; odp_port_t out_port; @@ -2271,19 +2268,10 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put) err = EOPNOTSUPP; goto out; } - tnl_cfg = netdev_get_tunnel_config(outdev); - if (tnl_cfg && tnl_cfg->dst_port != 0) { - dst_port = tnl_cfg->dst_port; - } - if (tnl_cfg) { - csum_on = tnl_cfg->csum; - } netdev_close(outdev); } } - info.tp_dst_port = dst_port; - info.tunnel_csum_on = csum_on; info.recirc_id_shared_with_tc = (dpif->user_features & OVS_DP_F_TC_RECIRC_SHARING); err = netdev_flow_put(dev, &match, diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c index 1ab12ecfe..92d424951 100644 --- a/lib/netdev-offload-tc.c +++ b/lib/netdev-offload-tc.c @@ -1399,6 +1399,7 @@ static int parse_put_flow_set_action(struct tc_flower *flower, struct tc_action *action, const struct nlattr *set, size_t set_len) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); const struct nlattr *tunnel; const struct nlattr *tun_attr; size_t tun_left, tunnel_len; @@ -1417,6 +1418,7 @@ parse_put_flow_set_action(struct tc_flower *flower, struct tc_action *action, action->type = TC_ACT_ENCAP; action->encap.id_present = false; + action->encap.no_csum = 1; flower->action_count++; NL_ATTR_FOR_EACH_UNSAFE(tun_attr, tun_left, tunnel, tunnel_len) { switch (nl_attr_type(tun_attr)) { @@ -1441,6 +1443,18 @@ parse_put_flow_set_action(struct tc_flower *flower, struct tc_action *action, action->encap.ttl = nl_attr_get_u8(tun_attr); } break; + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: { + /* XXX: This is wrong! We're ignoring the DF flag configuration + * requested by the user. However, TC for now has no way to pass + * that flag and it is set by default, meaning tunnel offloading + * will not work if 'options:df_default=false' is not set. + * Keeping incorrect behavior for now. */ + } + break; + case OVS_TUNNEL_KEY_ATTR_CSUM: { + action->encap.no_csum = 0; + } + break; case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: { action->encap.ipv6.ipv6_src = nl_attr_get_in6_addr(tun_attr); @@ -1465,6 +1479,10 @@ parse_put_flow_set_action(struct tc_flower *flower, struct tc_action *action, action->encap.data.present.len = nl_attr_get_size(tun_attr); } break; + default: + VLOG_DBG_RL(&rl, "unsupported tunnel key attribute %d", + nl_attr_type(tun_attr)); + return EOPNOTSUPP; } } @@ -1593,18 +1611,51 @@ test_key_and_mask(struct match *match) static void flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl *tnl, - const struct flow_tnl *tnl_mask) + struct flow_tnl *tnl_mask) { struct geneve_opt *opt, *opt_mask; int len, cnt = 0; - memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv, - tnl->metadata.present.len); + /* 'flower' always has an exact match on tunnel metadata length, so having + * it in a wrong format is not acceptable unless it is empty. */ + if (!(tnl->flags & FLOW_TNL_F_UDPIF)) { + if (tnl->metadata.present.map) { + /* XXX: Add non-UDPIF format parsing here? */ + VLOG_WARN_RL(&warn_rl, "Tunnel options are in the wrong format."); + } else { + /* There are no options, that equals for them to be in UDPIF format + * with a zero 'len'. Clearing the 'map' mask as consumed. + * No need to explicitly set 'len' to zero in the 'flower'. */ + tnl_mask->flags &= ~FLOW_TNL_F_UDPIF; + memset(&tnl_mask->metadata.present.map, 0, + sizeof tnl_mask->metadata.present.map); + } + return; + } + + tnl_mask->flags &= ~FLOW_TNL_F_UDPIF; + flower->key.tunnel.metadata.present.len = tnl->metadata.present.len; + /* Copying from the key and not from the mask, since in the 'flower' + * the length for a mask is not a mask, but the actual length. TC + * will use an exact match for the length. */ + flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len; + memset(&tnl_mask->metadata.present.len, 0, + sizeof tnl_mask->metadata.present.len); + + if (!tnl->metadata.present.len) { + return; + } + memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv, + tnl->metadata.present.len); memcpy(flower->mask.tunnel.metadata.opts.gnv, tnl_mask->metadata.opts.gnv, tnl->metadata.present.len); + memset(tnl_mask->metadata.opts.gnv, 0, tnl->metadata.present.len); + + /* Fixing up 'length' fields of particular options, since these are + * also not masks, but actual lengths in the 'flower' structure. */ len = flower->key.tunnel.metadata.present.len; while (len) { opt = &flower->key.tunnel.metadata.opts.gnv[cnt]; @@ -1615,10 +1666,6 @@ flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl *tnl, cnt += sizeof(struct geneve_opt) / 4 + opt->length; len -= sizeof(struct geneve_opt) + opt->length * 4; } - - /* Copying from the key and not from the mask, since in the 'flower' - * the length for a mask is not a mask, but the actual length. */ - flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len; } static void @@ -1907,10 +1954,6 @@ netdev_tc_parse_nl_actions(struct netdev *netdev, struct tc_flower *flower, if (err) { return err; } - if (action->type == TC_ACT_ENCAP) { - action->encap.tp_dst = info->tp_dst_port; - action->encap.no_csum = !info->tunnel_csum_on; - } } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) { const struct nlattr *set = nl_attr_get(nla); const size_t set_len = nl_attr_get_size(nla); @@ -1986,7 +2029,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, const struct flow *key = &match->flow; struct flow *mask = &match->wc.masks; const struct flow_tnl *tnl = &match->flow.tunnel; - const struct flow_tnl *tnl_mask = &mask->tunnel; + struct flow_tnl *tnl_mask = &mask->tunnel; bool recirc_act = false; uint32_t block_id = 0; struct tcf_id id; @@ -2024,6 +2067,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, flower.key.tunnel.ttl = tnl->ip_ttl; flower.key.tunnel.tp_src = tnl->tp_src; flower.key.tunnel.tp_dst = tnl->tp_dst; + flower.mask.tunnel.ipv4.ipv4_src = tnl_mask->ip_src; flower.mask.tunnel.ipv4.ipv4_dst = tnl_mask->ip_dst; flower.mask.tunnel.ipv6.ipv6_src = tnl_mask->ipv6_src; @@ -2036,10 +2080,32 @@ netdev_tc_flow_put(struct netdev *netdev, struct match *match, * Degrading the flow down to exact match for now as a workaround. */ flower.mask.tunnel.tp_dst = OVS_BE16_MAX; flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY) ? tnl_mask->tun_id : 0; + + memset(&tnl_mask->ip_src, 0, sizeof tnl_mask->ip_src); + memset(&tnl_mask->ip_dst, 0, sizeof tnl_mask->ip_dst); + memset(&tnl_mask->ipv6_src, 0, sizeof tnl_mask->ipv6_src); + memset(&tnl_mask->ipv6_dst, 0, sizeof tnl_mask->ipv6_dst); + memset(&tnl_mask->ip_tos, 0, sizeof tnl_mask->ip_tos); + memset(&tnl_mask->ip_ttl, 0, sizeof tnl_mask->ip_ttl); + memset(&tnl_mask->tp_dst, 0, sizeof tnl_mask->tp_dst); + + memset(&tnl_mask->tun_id, 0, sizeof tnl_mask->tun_id); + tnl_mask->flags &= ~FLOW_TNL_F_KEY; + + /* XXX: This is wrong! We're ignoring DF and CSUM flags configuration + * requested by the user. However, TC for now has no way to pass + * these flags in a flower key and their masks are set by default, + * meaning tunnel offloading will not work at all if not cleared. + * Keeping incorrect behavior for now. */ + tnl_mask->flags &= ~(FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM); + flower_match_to_tun_opt(&flower, tnl, tnl_mask); flower.tunnel = true; + } else { + /* There is no tunnel metadata to match on, but there could be some + * mask bits set due to flow translation artifacts. Clear them. */ + memset(&mask->tunnel, 0, sizeof mask->tunnel); } - memset(&mask->tunnel, 0, sizeof mask->tunnel); flower.key.eth_type = key->dl_type; flower.mask.eth_type = mask->dl_type; diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h index 249a3102a..180d3f95f 100644 --- a/lib/netdev-offload.h +++ b/lib/netdev-offload.h @@ -66,9 +66,6 @@ struct netdev_flow_dump { /* Flow offloading. */ struct offload_info { - ovs_be16 tp_dst_port; /* Destination port for tunnel in SET action */ - uint8_t tunnel_csum_on; /* Tunnel header with checksum */ - bool recirc_id_shared_with_tc; /* Indicates whever tc chains will be in * sync with datapath recirc ids. */ -- 2.34.3 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
