On 8/16/22 13:57, Ilya Maximets wrote:
> On 8/16/22 13:42, Roi Dayan wrote:
>>
>>
>> On 2022-08-14 5:46 PM, Ilya Maximets wrote:
>>> Current offloading code supports only limited number of tunnel keys
>>> and silently ignores everything it doesn't understand. This is
>>> causing, for example, offloaded ERSPAN tunnels to not work, because
>>> flow is offloaded, but ERSPAN options are not provided to TC.
>>>
>>> There is a number of tunnel keys, which are supported by the userspace,
>>> but silently ignored during offloading:
>>>
>>> OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT
>>> OVS_TUNNEL_KEY_ATTR_OAM
>>> OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS
>>> OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS
>>>
>>> OVS_TUNNEL_KEY_ATTR_CSUM is kind of supported, but only for actions
>>> and for some reason is set from the tunnel port instead of the
>>> provided action, and not currently supported for the tunnel key in
>>> the match.
>>>
>>> Addig a default case to fail offloading of unknown attributes. For
>>> now explicitly allowing incorrect behavior for the DONT_FRAGMENT flag,
>>> otherwise we'll break all tunnel offloading by default. VXLAN and
>>> ERSPAN options has to fail offloading, because the tunnel will not
>>> work otherwise. OAM is not a default configurations, so failing it
>>> as well. The missing DONT_FRAGMENT flag though should, probably,
>>> cause frequent flow revalidation, but that is not new with this patch.
>>>
>>> Same for the 'match' key, only clearing masks that was actually
>>> consumed, except for the DONT_FRAGMENT and CSUM flags, which are
>>> explicitly allowed and highlighted as broken.
>>>
>>> Also, destination port as well as CSUM configuration for unknown
>>> reason was not taken from the actions list and were passed via HW
>>> offload info instead of being consumed from the set() action.
>>>
>>> Reported-at:
>>> https://mail.openvswitch.org/pipermail/ovs-dev/2022-July/395522.html
>>> Reported-by: Eelco Chaudron <[email protected]>
>>> Fixes: 8f283af89298 ("netdev-tc-offloads: Implement netdev flow put using
>>> tc interface")
>>> Signed-off-by: Ilya Maximets <[email protected]>
>>> ---
>>> lib/dpif-netlink.c | 14 +------
>>> lib/netdev-offload-tc.c | 92 +++++++++++++++++++++++++++++++++++------
>>> lib/netdev-offload.h | 3 --
>>> 3 files changed, 80 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>> index 89e1d4325..c219fb527 100644
>>> --- a/lib/dpif-netlink.c
>>> +++ b/lib/dpif-netlink.c
>>> @@ -2237,8 +2237,6 @@ parse_flow_put(struct dpif_netlink *dpif, struct
>>> dpif_flow_put *put)
>>> size_t left;
>>> struct netdev *dev;
>>> struct offload_info info;
>>> - ovs_be16 dst_port = 0;
>>> - uint8_t csum_on = false;
>>> int err;
>>> info.tc_modify_flow_deleted = false;
>>> @@ -2258,10 +2256,9 @@ parse_flow_put(struct dpif_netlink *dpif, struct
>>> dpif_flow_put *put)
>>> return EOPNOTSUPP;
>>> }
>>> - /* Get tunnel dst port */
>>> + /* Check the output port for a tunnel. */
>>> NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
>>> if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>>> - const struct netdev_tunnel_config *tnl_cfg;
>>> struct netdev *outdev;
>>> odp_port_t out_port;
>>> @@ -2271,19 +2268,10 @@ parse_flow_put(struct dpif_netlink *dpif, struct
>>> dpif_flow_put *put)
>>> err = EOPNOTSUPP;
>>> goto out;
>>> }
>>> - tnl_cfg = netdev_get_tunnel_config(outdev);
>>> - if (tnl_cfg && tnl_cfg->dst_port != 0) {
>>> - dst_port = tnl_cfg->dst_port;
>>> - }
>>> - if (tnl_cfg) {
>>> - csum_on = tnl_cfg->csum;
>>> - }
>>> netdev_close(outdev);
>>> }
>>> }
>>> - info.tp_dst_port = dst_port;
>>> - info.tunnel_csum_on = csum_on;
>>> info.recirc_id_shared_with_tc = (dpif->user_features
>>> & OVS_DP_F_TC_RECIRC_SHARING);
>>> err = netdev_flow_put(dev, &match,
>>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>>> index 1ab12ecfe..92d424951 100644
>>> --- a/lib/netdev-offload-tc.c
>>> +++ b/lib/netdev-offload-tc.c
>>> @@ -1399,6 +1399,7 @@ static int
>>> parse_put_flow_set_action(struct tc_flower *flower, struct tc_action
>>> *action,
>>> const struct nlattr *set, size_t set_len)
>>> {
>>> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>> const struct nlattr *tunnel;
>>> const struct nlattr *tun_attr;
>>> size_t tun_left, tunnel_len;
>>> @@ -1417,6 +1418,7 @@ parse_put_flow_set_action(struct tc_flower *flower,
>>> struct tc_action *action,
>>> action->type = TC_ACT_ENCAP;
>>> action->encap.id_present = false;
>>> + action->encap.no_csum = 1;
>>> flower->action_count++;
>>> NL_ATTR_FOR_EACH_UNSAFE(tun_attr, tun_left, tunnel, tunnel_len) {
>>> switch (nl_attr_type(tun_attr)) {
>>> @@ -1441,6 +1443,18 @@ parse_put_flow_set_action(struct tc_flower *flower,
>>> struct tc_action *action,
>>> action->encap.ttl = nl_attr_get_u8(tun_attr);
>>> }
>>> break;
>>> + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: {
>>> + /* XXX: This is wrong! We're ignoring the DF flag
>>> configuration
>>> + * requested by the user. However, TC for now has no way to
>>> pass
>>> + * that flag and it is set by default, meaning tunnel
>>> offloading
>>> + * will not work if 'options:df_default=false' is not set.
>>> + * Keeping incorrect behavior for now. */
>>> + }
>>> + break;
>>> + case OVS_TUNNEL_KEY_ATTR_CSUM: {
>>> + action->encap.no_csum = 0;
>>> + }
>>> + break;
>>> case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: {
>>> action->encap.ipv6.ipv6_src =
>>> nl_attr_get_in6_addr(tun_attr);
>>> @@ -1465,6 +1479,10 @@ parse_put_flow_set_action(struct tc_flower *flower,
>>> struct tc_action *action,
>>> action->encap.data.present.len = nl_attr_get_size(tun_attr);
>>> }
>>> break;
>>> + default:
>>> + VLOG_DBG_RL(&rl, "unsupported tunnel key attribute %d",
>>> + nl_attr_type(tun_attr));
>>> + return EOPNOTSUPP;
>>> }
>>> }
>>> @@ -1593,18 +1611,51 @@ test_key_and_mask(struct match *match)
>>> static void
>>> flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl
>>> *tnl,
>>> - const struct flow_tnl *tnl_mask)
>>> + struct flow_tnl *tnl_mask)
>>> {
>>> struct geneve_opt *opt, *opt_mask;
>>> int len, cnt = 0;
>>> - memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv,
>>> - tnl->metadata.present.len);
>>> + /* 'flower' always has an exact match on tunnel metadata length, so
>>> having
>>> + * it in a wrong format is not acceptable unless it is empty. */
>>> + if (!(tnl->flags & FLOW_TNL_F_UDPIF)) {
>>> + if (tnl->metadata.present.map) {
>>> + /* XXX: Add non-UDPIF format parsing here? */
>>> + VLOG_WARN_RL(&warn_rl, "Tunnel options are in the wrong
>>> format.");
>>> + } else {
>>> + /* There are no options, that equals for them to be in UDPIF
>>> format
>>> + * with a zero 'len'. Clearing the 'map' mask as consumed.
>>> + * No need to explicitly set 'len' to zero in the 'flower'. */
>>> + tnl_mask->flags &= ~FLOW_TNL_F_UDPIF;
>>> + memset(&tnl_mask->metadata.present.map, 0,
>>> + sizeof tnl_mask->metadata.present.map);
>>> + }
>>> + return;
>>> + }
>>> +
>>> + tnl_mask->flags &= ~FLOW_TNL_F_UDPIF;
>>> +
>>> flower->key.tunnel.metadata.present.len = tnl->metadata.present.len;
>>> + /* Copying from the key and not from the mask, since in the 'flower'
>>> + * the length for a mask is not a mask, but the actual length. TC
>>> + * will use an exact match for the length. */
>>> + flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len;
>>> + memset(&tnl_mask->metadata.present.len, 0,
>>> + sizeof tnl_mask->metadata.present.len);
>>> +
>>> + if (!tnl->metadata.present.len) {
>>> + return;
>>> + }
>>> + memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv,
>>> + tnl->metadata.present.len);
>>> memcpy(flower->mask.tunnel.metadata.opts.gnv,
>>> tnl_mask->metadata.opts.gnv,
>>> tnl->metadata.present.len);
>>> + memset(tnl_mask->metadata.opts.gnv, 0, tnl->metadata.present.len);
>>> +
>>> + /* Fixing up 'length' fields of particular options, since these are
>>> + * also not masks, but actual lengths in the 'flower' structure. */
>>> len = flower->key.tunnel.metadata.present.len;
>>> while (len) {
>>> opt = &flower->key.tunnel.metadata.opts.gnv[cnt];
>>> @@ -1615,10 +1666,6 @@ flower_match_to_tun_opt(struct tc_flower *flower,
>>> const struct flow_tnl *tnl,
>>> cnt += sizeof(struct geneve_opt) / 4 + opt->length;
>>> len -= sizeof(struct geneve_opt) + opt->length * 4;
>>> }
>>> -
>>> - /* Copying from the key and not from the mask, since in the 'flower'
>>> - * the length for a mask is not a mask, but the actual length. */
>>> - flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len;
>>> }
>>> static void
>>> @@ -1907,10 +1954,6 @@ netdev_tc_parse_nl_actions(struct netdev *netdev,
>>> struct tc_flower *flower,
>>> if (err) {
>>> return err;
>>> }
>>> - if (action->type == TC_ACT_ENCAP) {
>>> - action->encap.tp_dst = info->tp_dst_port;
>>> - action->encap.no_csum = !info->tunnel_csum_on;
>>> - }
>>> } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
>>> const struct nlattr *set = nl_attr_get(nla);
>>> const size_t set_len = nl_attr_get_size(nla);
>>> @@ -1986,7 +2029,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct
>>> match *match,
>>> const struct flow *key = &match->flow;
>>> struct flow *mask = &match->wc.masks;
>>> const struct flow_tnl *tnl = &match->flow.tunnel;
>>> - const struct flow_tnl *tnl_mask = &mask->tunnel;
>>> + struct flow_tnl *tnl_mask = &mask->tunnel;
>>> bool recirc_act = false;
>>> uint32_t block_id = 0;
>>> struct tcf_id id;
>>> @@ -2024,6 +2067,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct
>>> match *match,
>>> flower.key.tunnel.ttl = tnl->ip_ttl;
>>> flower.key.tunnel.tp_src = tnl->tp_src;
>>> flower.key.tunnel.tp_dst = tnl->tp_dst;
>>> +
>>> flower.mask.tunnel.ipv4.ipv4_src = tnl_mask->ip_src;
>>> flower.mask.tunnel.ipv4.ipv4_dst = tnl_mask->ip_dst;
>>> flower.mask.tunnel.ipv6.ipv6_src = tnl_mask->ipv6_src;
>>> @@ -2036,10 +2080,32 @@ netdev_tc_flow_put(struct netdev *netdev, struct
>>> match *match,
>>> * Degrading the flow down to exact match for now as a
>>> workaround. */
>>> flower.mask.tunnel.tp_dst = OVS_BE16_MAX;
>>> flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY) ?
>>> tnl_mask->tun_id : 0;
>>> +
>>> + memset(&tnl_mask->ip_src, 0, sizeof tnl_mask->ip_src);
>>> + memset(&tnl_mask->ip_dst, 0, sizeof tnl_mask->ip_dst);
>>> + memset(&tnl_mask->ipv6_src, 0, sizeof tnl_mask->ipv6_src);
>>> + memset(&tnl_mask->ipv6_dst, 0, sizeof tnl_mask->ipv6_dst);
>>> + memset(&tnl_mask->ip_tos, 0, sizeof tnl_mask->ip_tos);
>>> + memset(&tnl_mask->ip_ttl, 0, sizeof tnl_mask->ip_ttl);
>>> + memset(&tnl_mask->tp_dst, 0, sizeof tnl_mask->tp_dst);
>>> +
>>> + memset(&tnl_mask->tun_id, 0, sizeof tnl_mask->tun_id);
>>> + tnl_mask->flags &= ~FLOW_TNL_F_KEY;
>>> +
>>> + /* XXX: This is wrong! We're ignoring DF and CSUM flags
>>> configuration
>>> + * requested by the user. However, TC for now has no way to pass
>>> + * these flags in a flower key and their masks are set by default,
>>> + * meaning tunnel offloading will not work at all if not cleared.
>>> + * Keeping incorrect behavior for now. */
>>> + tnl_mask->flags &= ~(FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM);
>>> +
>>> flower_match_to_tun_opt(&flower, tnl, tnl_mask);
>>> flower.tunnel = true;
>>> + } else {
>>> + /* There is no tunnel metadata to match on, but there could be some
>>> + * mask bits set due to flow translation artifacts. Clear them. */
>>> + memset(&mask->tunnel, 0, sizeof mask->tunnel);
>>> }
>>> - memset(&mask->tunnel, 0, sizeof mask->tunnel);
>>> flower.key.eth_type = key->dl_type;
>>> flower.mask.eth_type = mask->dl_type;
>>> diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
>>> index 249a3102a..180d3f95f 100644
>>> --- a/lib/netdev-offload.h
>>> +++ b/lib/netdev-offload.h
>>> @@ -66,9 +66,6 @@ struct netdev_flow_dump {
>>> /* Flow offloading. */
>>> struct offload_info {
>>> - ovs_be16 tp_dst_port; /* Destination port for tunnel in SET action */
>>> - uint8_t tunnel_csum_on; /* Tunnel header with checksum */
>>> -
>>> bool recirc_id_shared_with_tc; /* Indicates whever tc chains will be
>>> in
>>> * sync with datapath recirc ids. */
>>>
>>
>>
>> Hi,
>>
>> I didn't check the source of the issue but now dump-flows always
>> shows geneve(). I ran some vxlan test but got geneve() in the match
>>
>> tunnel(tun_id=0x2a,src=7.7.7.8,dst=7.7.7.7,tp_dst=4789,geneve(),flags(+key)),recirc_id(0),in_port(vxlan_sys_4789),eth(src=a6:96:4b:ab:b3:9a,dst=de:a8:d5:4d:39:48),eth_type(0x0800),ipv4(frag=no),
>> packets:9577, bytes:1150576, used:0.800s, actions:enp8s0f0_0
>
> Yes, that is expected, because we can not distinguish geneve with
> zero-length options from other tunnel types while parsing the
> tunnel() attribute. There is just no enough information.
>
> I highlighted that in the commit message for the first patch:
>
> "Also, flower always has an exact match on the present.len field
> regardless of its value and regardless of this field being masked
> by OVS flow translation layer while installing the flow. Hence,
> all tunnel flows dumped from TC should have an exact match on
> present.len and also UDPIF flag, because present.len doesn't make
> sense without that flag. Without the change, zero-length options
> match is incorrectly reported as a wildcard match. The side effect
> though is that zero-length match on geneve options is reported even
> for other tunnel types, e.g. vxlan. But that should be fairly
> harmless. To avoid reporting a match on empty geneve options for
> vxlan/etc. tunnels we'll need to check the tunnel port type, there
> is no enough information in the TUNNEL attribute itself."
Something like this should do the trick, I guess:
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 487e13940..417545db7 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -940,7 +940,8 @@ parse_tc_flower_to_actions(struct tc_flower *flower,
}
static int
-parse_tc_flower_to_match(struct tc_flower *flower,
+parse_tc_flower_to_match(const struct netdev *netdev,
+ struct tc_flower *flower,
struct match *match,
struct nlattr **actions,
struct dpif_flow_stats *stats,
@@ -1141,7 +1142,11 @@ parse_tc_flower_to_match(struct tc_flower *flower,
flower->mask.tunnel.tp_dst);
}
- flower_tun_opt_to_match(match, flower);
+ if (!strcmp(netdev_get_type(netdev), "geneve")) {
+ flower_tun_opt_to_match(match, flower);
+ } else {
+ VLOG_INFO("%s: Non geneve tunnel: %s", netdev_get_name(netdev),
netdev_get_type(netdev));
+ }
}
act_off = nl_msg_start_nested(buf, OVS_FLOW_ATTR_ACTIONS);
@@ -1182,8 +1187,8 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
continue;
}
- if (parse_tc_flower_to_match(&flower, match, actions, stats, attrs,
- wbuffer, dump->terse)) {
+ if (parse_tc_flower_to_match(netdev, &flower, match, actions,
+ stats, attrs, wbuffer, dump->terse)) {
continue;
}
@@ -2120,7 +2125,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match
*match,
* Keeping incorrect behavior for now. */
tnl_mask->flags &= ~(FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM);
- flower_match_to_tun_opt(&flower, tnl, tnl_mask);
+ if (!strcmp(netdev_get_type(netdev), "geneve")) {
+ flower_match_to_tun_opt(&flower, tnl, tnl_mask);
+ }
flower.tunnel = true;
} else {
/* There is no tunnel metadata to match on, but there could be some
@@ -2400,7 +2407,8 @@ netdev_tc_flow_get(struct netdev *netdev,
}
in_port = netdev_ifindex_to_odp_port(id.ifindex);
- parse_tc_flower_to_match(&flower, match, actions, stats, attrs, buf,
false);
+ parse_tc_flower_to_match(netdev, &flower, match, actions,
+ stats, attrs, buf, false);
match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
match->flow.in_port.odp_port = in_port;
---
I can add this to the series in v4 or send as a separate patch later if
v3 otherwise is fine. Let me know what do you think.
Best regards, Ilya Maximets.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev