On 8/16/22 14:37, Ilya Maximets wrote:
> On 8/16/22 13:57, Ilya Maximets wrote:
>> On 8/16/22 13:42, Roi Dayan wrote:
>>>
>>>
>>> On 2022-08-14 5:46 PM, Ilya Maximets wrote:
>>>> Current offloading code supports only limited number of tunnel keys
>>>> and silently ignores everything it doesn't understand. This is
>>>> causing, for example, offloaded ERSPAN tunnels to not work, because
>>>> flow is offloaded, but ERSPAN options are not provided to TC.
>>>>
>>>> There is a number of tunnel keys, which are supported by the userspace,
>>>> but silently ignored during offloading:
>>>>
>>>> OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT
>>>> OVS_TUNNEL_KEY_ATTR_OAM
>>>> OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS
>>>> OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS
>>>>
>>>> OVS_TUNNEL_KEY_ATTR_CSUM is kind of supported, but only for actions
>>>> and for some reason is set from the tunnel port instead of the
>>>> provided action, and not currently supported for the tunnel key in
>>>> the match.
>>>>
>>>> Addig a default case to fail offloading of unknown attributes. For
>>>> now explicitly allowing incorrect behavior for the DONT_FRAGMENT flag,
>>>> otherwise we'll break all tunnel offloading by default. VXLAN and
>>>> ERSPAN options has to fail offloading, because the tunnel will not
>>>> work otherwise. OAM is not a default configurations, so failing it
>>>> as well. The missing DONT_FRAGMENT flag though should, probably,
>>>> cause frequent flow revalidation, but that is not new with this patch.
>>>>
>>>> Same for the 'match' key, only clearing masks that was actually
>>>> consumed, except for the DONT_FRAGMENT and CSUM flags, which are
>>>> explicitly allowed and highlighted as broken.
>>>>
>>>> Also, destination port as well as CSUM configuration for unknown
>>>> reason was not taken from the actions list and were passed via HW
>>>> offload info instead of being consumed from the set() action.
>>>>
>>>> Reported-at:
>>>> https://mail.openvswitch.org/pipermail/ovs-dev/2022-July/395522.html
>>>> Reported-by: Eelco Chaudron <[email protected]>
>>>> Fixes: 8f283af89298 ("netdev-tc-offloads: Implement netdev flow put using
>>>> tc interface")
>>>> Signed-off-by: Ilya Maximets <[email protected]>
>>>> ---
>>>> lib/dpif-netlink.c | 14 +------
>>>> lib/netdev-offload-tc.c | 92 +++++++++++++++++++++++++++++++++++------
>>>> lib/netdev-offload.h | 3 --
>>>> 3 files changed, 80 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>>> index 89e1d4325..c219fb527 100644
>>>> --- a/lib/dpif-netlink.c
>>>> +++ b/lib/dpif-netlink.c
>>>> @@ -2237,8 +2237,6 @@ parse_flow_put(struct dpif_netlink *dpif, struct
>>>> dpif_flow_put *put)
>>>> size_t left;
>>>> struct netdev *dev;
>>>> struct offload_info info;
>>>> - ovs_be16 dst_port = 0;
>>>> - uint8_t csum_on = false;
>>>> int err;
>>>> info.tc_modify_flow_deleted = false;
>>>> @@ -2258,10 +2256,9 @@ parse_flow_put(struct dpif_netlink *dpif, struct
>>>> dpif_flow_put *put)
>>>> return EOPNOTSUPP;
>>>> }
>>>> - /* Get tunnel dst port */
>>>> + /* Check the output port for a tunnel. */
>>>> NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
>>>> if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>>>> - const struct netdev_tunnel_config *tnl_cfg;
>>>> struct netdev *outdev;
>>>> odp_port_t out_port;
>>>> @@ -2271,19 +2268,10 @@ parse_flow_put(struct dpif_netlink *dpif, struct
>>>> dpif_flow_put *put)
>>>> err = EOPNOTSUPP;
>>>> goto out;
>>>> }
>>>> - tnl_cfg = netdev_get_tunnel_config(outdev);
>>>> - if (tnl_cfg && tnl_cfg->dst_port != 0) {
>>>> - dst_port = tnl_cfg->dst_port;
>>>> - }
>>>> - if (tnl_cfg) {
>>>> - csum_on = tnl_cfg->csum;
>>>> - }
>>>> netdev_close(outdev);
>>>> }
>>>> }
>>>> - info.tp_dst_port = dst_port;
>>>> - info.tunnel_csum_on = csum_on;
>>>> info.recirc_id_shared_with_tc = (dpif->user_features
>>>> & OVS_DP_F_TC_RECIRC_SHARING);
>>>> err = netdev_flow_put(dev, &match,
>>>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>>>> index 1ab12ecfe..92d424951 100644
>>>> --- a/lib/netdev-offload-tc.c
>>>> +++ b/lib/netdev-offload-tc.c
>>>> @@ -1399,6 +1399,7 @@ static int
>>>> parse_put_flow_set_action(struct tc_flower *flower, struct tc_action
>>>> *action,
>>>> const struct nlattr *set, size_t set_len)
>>>> {
>>>> + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>>> const struct nlattr *tunnel;
>>>> const struct nlattr *tun_attr;
>>>> size_t tun_left, tunnel_len;
>>>> @@ -1417,6 +1418,7 @@ parse_put_flow_set_action(struct tc_flower *flower,
>>>> struct tc_action *action,
>>>> action->type = TC_ACT_ENCAP;
>>>> action->encap.id_present = false;
>>>> + action->encap.no_csum = 1;
>>>> flower->action_count++;
>>>> NL_ATTR_FOR_EACH_UNSAFE(tun_attr, tun_left, tunnel, tunnel_len) {
>>>> switch (nl_attr_type(tun_attr)) {
>>>> @@ -1441,6 +1443,18 @@ parse_put_flow_set_action(struct tc_flower *flower,
>>>> struct tc_action *action,
>>>> action->encap.ttl = nl_attr_get_u8(tun_attr);
>>>> }
>>>> break;
>>>> + case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: {
>>>> + /* XXX: This is wrong! We're ignoring the DF flag
>>>> configuration
>>>> + * requested by the user. However, TC for now has no way to
>>>> pass
>>>> + * that flag and it is set by default, meaning tunnel
>>>> offloading
>>>> + * will not work if 'options:df_default=false' is not set.
>>>> + * Keeping incorrect behavior for now. */
>>>> + }
>>>> + break;
>>>> + case OVS_TUNNEL_KEY_ATTR_CSUM: {
>>>> + action->encap.no_csum = 0;
>>>> + }
>>>> + break;
>>>> case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: {
>>>> action->encap.ipv6.ipv6_src =
>>>> nl_attr_get_in6_addr(tun_attr);
>>>> @@ -1465,6 +1479,10 @@ parse_put_flow_set_action(struct tc_flower *flower,
>>>> struct tc_action *action,
>>>> action->encap.data.present.len = nl_attr_get_size(tun_attr);
>>>> }
>>>> break;
>>>> + default:
>>>> + VLOG_DBG_RL(&rl, "unsupported tunnel key attribute %d",
>>>> + nl_attr_type(tun_attr));
>>>> + return EOPNOTSUPP;
>>>> }
>>>> }
>>>> @@ -1593,18 +1611,51 @@ test_key_and_mask(struct match *match)
>>>> static void
>>>> flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl
>>>> *tnl,
>>>> - const struct flow_tnl *tnl_mask)
>>>> + struct flow_tnl *tnl_mask)
>>>> {
>>>> struct geneve_opt *opt, *opt_mask;
>>>> int len, cnt = 0;
>>>> - memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv,
>>>> - tnl->metadata.present.len);
>>>> + /* 'flower' always has an exact match on tunnel metadata length, so
>>>> having
>>>> + * it in a wrong format is not acceptable unless it is empty. */
>>>> + if (!(tnl->flags & FLOW_TNL_F_UDPIF)) {
>>>> + if (tnl->metadata.present.map) {
>>>> + /* XXX: Add non-UDPIF format parsing here? */
>>>> + VLOG_WARN_RL(&warn_rl, "Tunnel options are in the wrong
>>>> format.");
>>>> + } else {
>>>> + /* There are no options, that equals for them to be in UDPIF
>>>> format
>>>> + * with a zero 'len'. Clearing the 'map' mask as consumed.
>>>> + * No need to explicitly set 'len' to zero in the 'flower'. */
>>>> + tnl_mask->flags &= ~FLOW_TNL_F_UDPIF;
>>>> + memset(&tnl_mask->metadata.present.map, 0,
>>>> + sizeof tnl_mask->metadata.present.map);
>>>> + }
>>>> + return;
>>>> + }
>>>> +
>>>> + tnl_mask->flags &= ~FLOW_TNL_F_UDPIF;
>>>> +
>>>> flower->key.tunnel.metadata.present.len = tnl->metadata.present.len;
>>>> + /* Copying from the key and not from the mask, since in the 'flower'
>>>> + * the length for a mask is not a mask, but the actual length. TC
>>>> + * will use an exact match for the length. */
>>>> + flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len;
>>>> + memset(&tnl_mask->metadata.present.len, 0,
>>>> + sizeof tnl_mask->metadata.present.len);
>>>> +
>>>> + if (!tnl->metadata.present.len) {
>>>> + return;
>>>> + }
>>>> + memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv,
>>>> + tnl->metadata.present.len);
>>>> memcpy(flower->mask.tunnel.metadata.opts.gnv,
>>>> tnl_mask->metadata.opts.gnv,
>>>> tnl->metadata.present.len);
>>>> + memset(tnl_mask->metadata.opts.gnv, 0, tnl->metadata.present.len);
>>>> +
>>>> + /* Fixing up 'length' fields of particular options, since these are
>>>> + * also not masks, but actual lengths in the 'flower' structure. */
>>>> len = flower->key.tunnel.metadata.present.len;
>>>> while (len) {
>>>> opt = &flower->key.tunnel.metadata.opts.gnv[cnt];
>>>> @@ -1615,10 +1666,6 @@ flower_match_to_tun_opt(struct tc_flower *flower,
>>>> const struct flow_tnl *tnl,
>>>> cnt += sizeof(struct geneve_opt) / 4 + opt->length;
>>>> len -= sizeof(struct geneve_opt) + opt->length * 4;
>>>> }
>>>> -
>>>> - /* Copying from the key and not from the mask, since in the 'flower'
>>>> - * the length for a mask is not a mask, but the actual length. */
>>>> - flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len;
>>>> }
>>>> static void
>>>> @@ -1907,10 +1954,6 @@ netdev_tc_parse_nl_actions(struct netdev *netdev,
>>>> struct tc_flower *flower,
>>>> if (err) {
>>>> return err;
>>>> }
>>>> - if (action->type == TC_ACT_ENCAP) {
>>>> - action->encap.tp_dst = info->tp_dst_port;
>>>> - action->encap.no_csum = !info->tunnel_csum_on;
>>>> - }
>>>> } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
>>>> const struct nlattr *set = nl_attr_get(nla);
>>>> const size_t set_len = nl_attr_get_size(nla);
>>>> @@ -1986,7 +2029,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct
>>>> match *match,
>>>> const struct flow *key = &match->flow;
>>>> struct flow *mask = &match->wc.masks;
>>>> const struct flow_tnl *tnl = &match->flow.tunnel;
>>>> - const struct flow_tnl *tnl_mask = &mask->tunnel;
>>>> + struct flow_tnl *tnl_mask = &mask->tunnel;
>>>> bool recirc_act = false;
>>>> uint32_t block_id = 0;
>>>> struct tcf_id id;
>>>> @@ -2024,6 +2067,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct
>>>> match *match,
>>>> flower.key.tunnel.ttl = tnl->ip_ttl;
>>>> flower.key.tunnel.tp_src = tnl->tp_src;
>>>> flower.key.tunnel.tp_dst = tnl->tp_dst;
>>>> +
>>>> flower.mask.tunnel.ipv4.ipv4_src = tnl_mask->ip_src;
>>>> flower.mask.tunnel.ipv4.ipv4_dst = tnl_mask->ip_dst;
>>>> flower.mask.tunnel.ipv6.ipv6_src = tnl_mask->ipv6_src;
>>>> @@ -2036,10 +2080,32 @@ netdev_tc_flow_put(struct netdev *netdev, struct
>>>> match *match,
>>>> * Degrading the flow down to exact match for now as a
>>>> workaround. */
>>>> flower.mask.tunnel.tp_dst = OVS_BE16_MAX;
>>>> flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY) ?
>>>> tnl_mask->tun_id : 0;
>>>> +
>>>> + memset(&tnl_mask->ip_src, 0, sizeof tnl_mask->ip_src);
>>>> + memset(&tnl_mask->ip_dst, 0, sizeof tnl_mask->ip_dst);
>>>> + memset(&tnl_mask->ipv6_src, 0, sizeof tnl_mask->ipv6_src);
>>>> + memset(&tnl_mask->ipv6_dst, 0, sizeof tnl_mask->ipv6_dst);
>>>> + memset(&tnl_mask->ip_tos, 0, sizeof tnl_mask->ip_tos);
>>>> + memset(&tnl_mask->ip_ttl, 0, sizeof tnl_mask->ip_ttl);
>>>> + memset(&tnl_mask->tp_dst, 0, sizeof tnl_mask->tp_dst);
>>>> +
>>>> + memset(&tnl_mask->tun_id, 0, sizeof tnl_mask->tun_id);
>>>> + tnl_mask->flags &= ~FLOW_TNL_F_KEY;
>>>> +
>>>> + /* XXX: This is wrong! We're ignoring DF and CSUM flags
>>>> configuration
>>>> + * requested by the user. However, TC for now has no way to pass
>>>> + * these flags in a flower key and their masks are set by default,
>>>> + * meaning tunnel offloading will not work at all if not cleared.
>>>> + * Keeping incorrect behavior for now. */
>>>> + tnl_mask->flags &= ~(FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM);
>>>> +
>>>> flower_match_to_tun_opt(&flower, tnl, tnl_mask);
>>>> flower.tunnel = true;
>>>> + } else {
>>>> + /* There is no tunnel metadata to match on, but there could be
>>>> some
>>>> + * mask bits set due to flow translation artifacts. Clear them.
>>>> */
>>>> + memset(&mask->tunnel, 0, sizeof mask->tunnel);
>>>> }
>>>> - memset(&mask->tunnel, 0, sizeof mask->tunnel);
>>>> flower.key.eth_type = key->dl_type;
>>>> flower.mask.eth_type = mask->dl_type;
>>>> diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
>>>> index 249a3102a..180d3f95f 100644
>>>> --- a/lib/netdev-offload.h
>>>> +++ b/lib/netdev-offload.h
>>>> @@ -66,9 +66,6 @@ struct netdev_flow_dump {
>>>> /* Flow offloading. */
>>>> struct offload_info {
>>>> - ovs_be16 tp_dst_port; /* Destination port for tunnel in SET action */
>>>> - uint8_t tunnel_csum_on; /* Tunnel header with checksum */
>>>> -
>>>> bool recirc_id_shared_with_tc; /* Indicates whever tc chains will
>>>> be in
>>>> * sync with datapath recirc ids. */
>>>>
>>>
>>>
>>> Hi,
>>>
>>> I didn't check the source of the issue but now dump-flows always
>>> shows geneve(). I ran some vxlan test but got geneve() in the match
>>>
>>> tunnel(tun_id=0x2a,src=7.7.7.8,dst=7.7.7.7,tp_dst=4789,geneve(),flags(+key)),recirc_id(0),in_port(vxlan_sys_4789),eth(src=a6:96:4b:ab:b3:9a,dst=de:a8:d5:4d:39:48),eth_type(0x0800),ipv4(frag=no),
>>> packets:9577, bytes:1150576, used:0.800s, actions:enp8s0f0_0
>>
>> Yes, that is expected, because we can not distinguish geneve with
>> zero-length options from other tunnel types while parsing the
>> tunnel() attribute. There is just no enough information.
>>
>> I highlighted that in the commit message for the first patch:
>>
>> "Also, flower always has an exact match on the present.len field
>> regardless of its value and regardless of this field being masked
>> by OVS flow translation layer while installing the flow. Hence,
>> all tunnel flows dumped from TC should have an exact match on
>> present.len and also UDPIF flag, because present.len doesn't make
>> sense without that flag. Without the change, zero-length options
>> match is incorrectly reported as a wildcard match. The side effect
>> though is that zero-length match on geneve options is reported even
>> for other tunnel types, e.g. vxlan. But that should be fairly
>> harmless. To avoid reporting a match on empty geneve options for
>> vxlan/etc. tunnels we'll need to check the tunnel port type, there
>> is no enough information in the TUNNEL attribute itself."
>
> Something like this should do the trick, I guess:
>
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 487e13940..417545db7 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -940,7 +940,8 @@ parse_tc_flower_to_actions(struct tc_flower *flower,
> }
>
> static int
> -parse_tc_flower_to_match(struct tc_flower *flower,
> +parse_tc_flower_to_match(const struct netdev *netdev,
> + struct tc_flower *flower,
> struct match *match,
> struct nlattr **actions,
> struct dpif_flow_stats *stats,
> @@ -1141,7 +1142,11 @@ parse_tc_flower_to_match(struct tc_flower *flower,
> flower->mask.tunnel.tp_dst);
> }
>
> - flower_tun_opt_to_match(match, flower);
> + if (!strcmp(netdev_get_type(netdev), "geneve")) {
> + flower_tun_opt_to_match(match, flower);
> + } else {
> + VLOG_INFO("%s: Non geneve tunnel: %s", netdev_get_name(netdev),
> netdev_get_type(netdev));
This 'else' is not neccessary. Added just for local testing.
Will remove in the actual patch.
> + }
> }
>
> act_off = nl_msg_start_nested(buf, OVS_FLOW_ATTR_ACTIONS);
> @@ -1182,8 +1187,8 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
> continue;
> }
>
> - if (parse_tc_flower_to_match(&flower, match, actions, stats, attrs,
> - wbuffer, dump->terse)) {
> + if (parse_tc_flower_to_match(netdev, &flower, match, actions,
> + stats, attrs, wbuffer, dump->terse)) {
> continue;
> }
>
> @@ -2120,7 +2125,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match
> *match,
> * Keeping incorrect behavior for now. */
> tnl_mask->flags &= ~(FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM);
>
> - flower_match_to_tun_opt(&flower, tnl, tnl_mask);
> + if (!strcmp(netdev_get_type(netdev), "geneve")) {
> + flower_match_to_tun_opt(&flower, tnl, tnl_mask);
> + }
> flower.tunnel = true;
> } else {
> /* There is no tunnel metadata to match on, but there could be some
> @@ -2400,7 +2407,8 @@ netdev_tc_flow_get(struct netdev *netdev,
> }
>
> in_port = netdev_ifindex_to_odp_port(id.ifindex);
> - parse_tc_flower_to_match(&flower, match, actions, stats, attrs, buf,
> false);
> + parse_tc_flower_to_match(netdev, &flower, match, actions,
> + stats, attrs, buf, false);
>
> match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
> match->flow.in_port.odp_port = in_port;
> ---
>
> I can add this to the series in v4 or send as a separate patch later if
> v3 otherwise is fine. Let me know what do you think.
>
> Best regards, Ilya Maximets.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev