On 8/16/22 14:37, Ilya Maximets wrote:
> On 8/16/22 13:57, Ilya Maximets wrote:
>> On 8/16/22 13:42, Roi Dayan wrote:
>>>
>>>
>>> On 2022-08-14 5:46 PM, Ilya Maximets wrote:
>>>> Current offloading code supports only limited number of tunnel keys
>>>> and silently ignores everything it doesn't understand.  This is
>>>> causing, for example, offloaded ERSPAN tunnels to not work, because
>>>> flow is offloaded, but ERSPAN options are not provided to TC.
>>>>
>>>> There is a number of tunnel keys, which are supported by the userspace,
>>>> but silently ignored during offloading:
>>>>
>>>>    OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT
>>>>    OVS_TUNNEL_KEY_ATTR_OAM
>>>>    OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS
>>>>    OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS
>>>>
>>>> OVS_TUNNEL_KEY_ATTR_CSUM is kind of supported, but only for actions
>>>> and for some reason is set from the tunnel port instead of the
>>>> provided action, and not currently supported for the tunnel key in
>>>> the match.
>>>>
>>>> Addig a default case to fail offloading of unknown attributes.  For
>>>> now explicitly allowing incorrect behavior for the DONT_FRAGMENT flag,
>>>> otherwise we'll break all tunnel offloading by default.  VXLAN and
>>>> ERSPAN options has to fail offloading, because the tunnel will not
>>>> work otherwise.  OAM is not a default configurations, so failing it
>>>> as well. The missing DONT_FRAGMENT flag though should, probably,
>>>> cause frequent flow revalidation, but that is not new with this patch.
>>>>
>>>> Same for the 'match' key, only clearing masks that was actually
>>>> consumed, except for the DONT_FRAGMENT and CSUM flags, which are
>>>> explicitly allowed and highlighted as broken.
>>>>
>>>> Also, destination port as well as CSUM configuration for unknown
>>>> reason was not taken from the actions list and were passed via HW
>>>> offload info instead of being consumed from the set() action.
>>>>
>>>> Reported-at: 
>>>> https://mail.openvswitch.org/pipermail/ovs-dev/2022-July/395522.html
>>>> Reported-by: Eelco Chaudron <[email protected]>
>>>> Fixes: 8f283af89298 ("netdev-tc-offloads: Implement netdev flow put using 
>>>> tc interface")
>>>> Signed-off-by: Ilya Maximets <[email protected]>
>>>> ---
>>>>   lib/dpif-netlink.c      | 14 +------
>>>>   lib/netdev-offload-tc.c | 92 +++++++++++++++++++++++++++++++++++------
>>>>   lib/netdev-offload.h    |  3 --
>>>>   3 files changed, 80 insertions(+), 29 deletions(-)
>>>>
>>>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>>> index 89e1d4325..c219fb527 100644
>>>> --- a/lib/dpif-netlink.c
>>>> +++ b/lib/dpif-netlink.c
>>>> @@ -2237,8 +2237,6 @@ parse_flow_put(struct dpif_netlink *dpif, struct 
>>>> dpif_flow_put *put)
>>>>       size_t left;
>>>>       struct netdev *dev;
>>>>       struct offload_info info;
>>>> -    ovs_be16 dst_port = 0;
>>>> -    uint8_t csum_on = false;
>>>>       int err;
>>>>         info.tc_modify_flow_deleted = false;
>>>> @@ -2258,10 +2256,9 @@ parse_flow_put(struct dpif_netlink *dpif, struct 
>>>> dpif_flow_put *put)
>>>>           return EOPNOTSUPP;
>>>>       }
>>>>   -    /* Get tunnel dst port */
>>>> +    /* Check the output port for a tunnel. */
>>>>       NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
>>>>           if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>>>> -            const struct netdev_tunnel_config *tnl_cfg;
>>>>               struct netdev *outdev;
>>>>               odp_port_t out_port;
>>>>   @@ -2271,19 +2268,10 @@ parse_flow_put(struct dpif_netlink *dpif, struct 
>>>> dpif_flow_put *put)
>>>>                   err = EOPNOTSUPP;
>>>>                   goto out;
>>>>               }
>>>> -            tnl_cfg = netdev_get_tunnel_config(outdev);
>>>> -            if (tnl_cfg && tnl_cfg->dst_port != 0) {
>>>> -                dst_port = tnl_cfg->dst_port;
>>>> -            }
>>>> -            if (tnl_cfg) {
>>>> -                csum_on = tnl_cfg->csum;
>>>> -            }
>>>>               netdev_close(outdev);
>>>>           }
>>>>       }
>>>>   -    info.tp_dst_port = dst_port;
>>>> -    info.tunnel_csum_on = csum_on;
>>>>       info.recirc_id_shared_with_tc = (dpif->user_features
>>>>                                        & OVS_DP_F_TC_RECIRC_SHARING);
>>>>       err = netdev_flow_put(dev, &match,
>>>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>>>> index 1ab12ecfe..92d424951 100644
>>>> --- a/lib/netdev-offload-tc.c
>>>> +++ b/lib/netdev-offload-tc.c
>>>> @@ -1399,6 +1399,7 @@ static int
>>>>   parse_put_flow_set_action(struct tc_flower *flower, struct tc_action 
>>>> *action,
>>>>                             const struct nlattr *set, size_t set_len)
>>>>   {
>>>> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>>>       const struct nlattr *tunnel;
>>>>       const struct nlattr *tun_attr;
>>>>       size_t tun_left, tunnel_len;
>>>> @@ -1417,6 +1418,7 @@ parse_put_flow_set_action(struct tc_flower *flower, 
>>>> struct tc_action *action,
>>>>         action->type = TC_ACT_ENCAP;
>>>>       action->encap.id_present = false;
>>>> +    action->encap.no_csum = 1;
>>>>       flower->action_count++;
>>>>       NL_ATTR_FOR_EACH_UNSAFE(tun_attr, tun_left, tunnel, tunnel_len) {
>>>>           switch (nl_attr_type(tun_attr)) {
>>>> @@ -1441,6 +1443,18 @@ parse_put_flow_set_action(struct tc_flower *flower, 
>>>> struct tc_action *action,
>>>>               action->encap.ttl = nl_attr_get_u8(tun_attr);
>>>>           }
>>>>           break;
>>>> +        case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: {
>>>> +            /* XXX: This is wrong!  We're ignoring the DF flag 
>>>> configuration
>>>> +             * requested by the user.  However, TC for now has no way to 
>>>> pass
>>>> +             * that flag and it is set by default, meaning tunnel 
>>>> offloading
>>>> +             * will not work if 'options:df_default=false' is not set.
>>>> +             * Keeping incorrect behavior for now. */
>>>> +        }
>>>> +        break;
>>>> +        case OVS_TUNNEL_KEY_ATTR_CSUM: {
>>>> +            action->encap.no_csum = 0;
>>>> +        }
>>>> +        break;
>>>>           case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: {
>>>>               action->encap.ipv6.ipv6_src =
>>>>                   nl_attr_get_in6_addr(tun_attr);
>>>> @@ -1465,6 +1479,10 @@ parse_put_flow_set_action(struct tc_flower *flower, 
>>>> struct tc_action *action,
>>>>               action->encap.data.present.len = nl_attr_get_size(tun_attr);
>>>>           }
>>>>           break;
>>>> +        default:
>>>> +            VLOG_DBG_RL(&rl, "unsupported tunnel key attribute %d",
>>>> +                        nl_attr_type(tun_attr));
>>>> +            return EOPNOTSUPP;
>>>>           }
>>>>       }
>>>>   @@ -1593,18 +1611,51 @@ test_key_and_mask(struct match *match)
>>>>     static void
>>>>   flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl 
>>>> *tnl,
>>>> -                        const struct flow_tnl *tnl_mask)
>>>> +                        struct flow_tnl *tnl_mask)
>>>>   {
>>>>       struct geneve_opt *opt, *opt_mask;
>>>>       int len, cnt = 0;
>>>>   -    memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv,
>>>> -           tnl->metadata.present.len);
>>>> +    /* 'flower' always has an exact match on tunnel metadata length, so 
>>>> having
>>>> +     * it in a wrong format is not acceptable unless it is empty. */
>>>> +    if (!(tnl->flags & FLOW_TNL_F_UDPIF)) {
>>>> +        if (tnl->metadata.present.map) {
>>>> +            /* XXX: Add non-UDPIF format parsing here? */
>>>> +            VLOG_WARN_RL(&warn_rl, "Tunnel options are in the wrong 
>>>> format.");
>>>> +        } else {
>>>> +            /* There are no options, that equals for them to be in UDPIF 
>>>> format
>>>> +             * with a zero 'len'.  Clearing the 'map' mask as consumed.
>>>> +             * No need to explicitly set 'len' to zero in the 'flower'. */
>>>> +            tnl_mask->flags &= ~FLOW_TNL_F_UDPIF;
>>>> +            memset(&tnl_mask->metadata.present.map, 0,
>>>> +                   sizeof tnl_mask->metadata.present.map);
>>>> +        }
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    tnl_mask->flags &= ~FLOW_TNL_F_UDPIF;
>>>> +
>>>>       flower->key.tunnel.metadata.present.len = tnl->metadata.present.len;
>>>> +    /* Copying from the key and not from the mask, since in the 'flower'
>>>> +     * the length for a mask is not a mask, but the actual length.  TC
>>>> +     * will use an exact match for the length. */
>>>> +    flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len;
>>>> +    memset(&tnl_mask->metadata.present.len, 0,
>>>> +           sizeof tnl_mask->metadata.present.len);
>>>> +
>>>> +    if (!tnl->metadata.present.len) {
>>>> +        return;
>>>> +    }
>>>>   +    memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv,
>>>> +           tnl->metadata.present.len);
>>>>       memcpy(flower->mask.tunnel.metadata.opts.gnv, 
>>>> tnl_mask->metadata.opts.gnv,
>>>>              tnl->metadata.present.len);
>>>>   +    memset(tnl_mask->metadata.opts.gnv, 0, tnl->metadata.present.len);
>>>> +
>>>> +    /* Fixing up 'length' fields of particular options, since these are
>>>> +     * also not masks, but actual lengths in the 'flower' structure. */
>>>>       len = flower->key.tunnel.metadata.present.len;
>>>>       while (len) {
>>>>           opt = &flower->key.tunnel.metadata.opts.gnv[cnt];
>>>> @@ -1615,10 +1666,6 @@ flower_match_to_tun_opt(struct tc_flower *flower, 
>>>> const struct flow_tnl *tnl,
>>>>           cnt += sizeof(struct geneve_opt) / 4 + opt->length;
>>>>           len -= sizeof(struct geneve_opt) + opt->length * 4;
>>>>       }
>>>> -
>>>> -    /* Copying from the key and not from the mask, since in the 'flower'
>>>> -     * the length for a mask is not a mask, but the actual length. */
>>>> -    flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len;
>>>>   }
>>>>     static void
>>>> @@ -1907,10 +1954,6 @@ netdev_tc_parse_nl_actions(struct netdev *netdev, 
>>>> struct tc_flower *flower,
>>>>               if (err) {
>>>>                   return err;
>>>>               }
>>>> -            if (action->type == TC_ACT_ENCAP) {
>>>> -                action->encap.tp_dst = info->tp_dst_port;
>>>> -                action->encap.no_csum = !info->tunnel_csum_on;
>>>> -            }
>>>>           } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
>>>>               const struct nlattr *set = nl_attr_get(nla);
>>>>               const size_t set_len = nl_attr_get_size(nla);
>>>> @@ -1986,7 +2029,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
>>>> match *match,
>>>>       const struct flow *key = &match->flow;
>>>>       struct flow *mask = &match->wc.masks;
>>>>       const struct flow_tnl *tnl = &match->flow.tunnel;
>>>> -    const struct flow_tnl *tnl_mask = &mask->tunnel;
>>>> +    struct flow_tnl *tnl_mask = &mask->tunnel;
>>>>       bool recirc_act = false;
>>>>       uint32_t block_id = 0;
>>>>       struct tcf_id id;
>>>> @@ -2024,6 +2067,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
>>>> match *match,
>>>>           flower.key.tunnel.ttl = tnl->ip_ttl;
>>>>           flower.key.tunnel.tp_src = tnl->tp_src;
>>>>           flower.key.tunnel.tp_dst = tnl->tp_dst;
>>>> +
>>>>           flower.mask.tunnel.ipv4.ipv4_src = tnl_mask->ip_src;
>>>>           flower.mask.tunnel.ipv4.ipv4_dst = tnl_mask->ip_dst;
>>>>           flower.mask.tunnel.ipv6.ipv6_src = tnl_mask->ipv6_src;
>>>> @@ -2036,10 +2080,32 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
>>>> match *match,
>>>>            * Degrading the flow down to exact match for now as a 
>>>> workaround. */
>>>>           flower.mask.tunnel.tp_dst = OVS_BE16_MAX;
>>>>           flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY) ? 
>>>> tnl_mask->tun_id : 0;
>>>> +
>>>> +        memset(&tnl_mask->ip_src, 0, sizeof tnl_mask->ip_src);
>>>> +        memset(&tnl_mask->ip_dst, 0, sizeof tnl_mask->ip_dst);
>>>> +        memset(&tnl_mask->ipv6_src, 0, sizeof tnl_mask->ipv6_src);
>>>> +        memset(&tnl_mask->ipv6_dst, 0, sizeof tnl_mask->ipv6_dst);
>>>> +        memset(&tnl_mask->ip_tos, 0, sizeof tnl_mask->ip_tos);
>>>> +        memset(&tnl_mask->ip_ttl, 0, sizeof tnl_mask->ip_ttl);
>>>> +        memset(&tnl_mask->tp_dst, 0, sizeof tnl_mask->tp_dst);
>>>> +
>>>> +        memset(&tnl_mask->tun_id, 0, sizeof tnl_mask->tun_id);
>>>> +        tnl_mask->flags &= ~FLOW_TNL_F_KEY;
>>>> +
>>>> +        /* XXX: This is wrong!  We're ignoring DF and CSUM flags 
>>>> configuration
>>>> +         * requested by the user.  However, TC for now has no way to pass
>>>> +         * these flags in a flower key and their masks are set by default,
>>>> +         * meaning tunnel offloading will not work at all if not cleared.
>>>> +         * Keeping incorrect behavior for now. */
>>>> +        tnl_mask->flags &= ~(FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM);
>>>> +
>>>>           flower_match_to_tun_opt(&flower, tnl, tnl_mask);
>>>>           flower.tunnel = true;
>>>> +    } else {
>>>> +        /* There is no tunnel metadata to match on, but there could be 
>>>> some
>>>> +         * mask bits set due to flow translation artifacts.  Clear them. 
>>>> */
>>>> +        memset(&mask->tunnel, 0, sizeof mask->tunnel);
>>>>       }
>>>> -    memset(&mask->tunnel, 0, sizeof mask->tunnel);
>>>>         flower.key.eth_type = key->dl_type;
>>>>       flower.mask.eth_type = mask->dl_type;
>>>> diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
>>>> index 249a3102a..180d3f95f 100644
>>>> --- a/lib/netdev-offload.h
>>>> +++ b/lib/netdev-offload.h
>>>> @@ -66,9 +66,6 @@ struct netdev_flow_dump {
>>>>     /* Flow offloading. */
>>>>   struct offload_info {
>>>> -    ovs_be16 tp_dst_port; /* Destination port for tunnel in SET action */
>>>> -    uint8_t tunnel_csum_on; /* Tunnel header with checksum */
>>>> -
>>>>       bool recirc_id_shared_with_tc;  /* Indicates whever tc chains will 
>>>> be in
>>>>                                        * sync with datapath recirc ids. */
>>>>   
>>>
>>>
>>> Hi,
>>>
>>> I didn't check the source of the issue but now dump-flows always
>>> shows geneve(). I ran some vxlan test but got geneve() in the match
>>>
>>> tunnel(tun_id=0x2a,src=7.7.7.8,dst=7.7.7.7,tp_dst=4789,geneve(),flags(+key)),recirc_id(0),in_port(vxlan_sys_4789),eth(src=a6:96:4b:ab:b3:9a,dst=de:a8:d5:4d:39:48),eth_type(0x0800),ipv4(frag=no),
>>>  packets:9577, bytes:1150576, used:0.800s, actions:enp8s0f0_0
>>
>> Yes, that is expected, because we can not distinguish geneve with
>> zero-length options from other tunnel types while parsing the
>> tunnel() attribute.  There is just no enough information.
>>
>> I highlighted that in the commit message for the first patch:
>>
>> "Also, flower always has an exact match on the present.len field
>> regardless of its value and regardless of this field being masked
>> by OVS flow translation layer while installing the flow.  Hence,
>> all tunnel flows dumped from TC should have an exact match on
>> present.len and also UDPIF flag, because present.len doesn't make
>> sense without that flag.  Without the change, zero-length options
>> match is incorrectly reported as a wildcard match.  The side effect
>> though is that zero-length match on geneve options is reported even
>> for other tunnel types, e.g. vxlan.  But that should be fairly
>> harmless.  To avoid reporting a match on empty geneve options for
>> vxlan/etc. tunnels we'll need to check the tunnel port type, there
>> is no enough information in the TUNNEL attribute itself."
> 
> Something like this should do the trick, I guess:
> 
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 487e13940..417545db7 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -940,7 +940,8 @@ parse_tc_flower_to_actions(struct tc_flower *flower,
>  }
>  
>  static int
> -parse_tc_flower_to_match(struct tc_flower *flower,
> +parse_tc_flower_to_match(const struct netdev *netdev,
> +                         struct tc_flower *flower,
>                           struct match *match,
>                           struct nlattr **actions,
>                           struct dpif_flow_stats *stats,
> @@ -1141,7 +1142,11 @@ parse_tc_flower_to_match(struct tc_flower *flower,
>                                          flower->mask.tunnel.tp_dst);
>          }
>  
> -        flower_tun_opt_to_match(match, flower);
> +        if (!strcmp(netdev_get_type(netdev), "geneve")) {
> +            flower_tun_opt_to_match(match, flower);
> +        } else {
> +            VLOG_INFO("%s: Non geneve tunnel: %s", netdev_get_name(netdev), 
> netdev_get_type(netdev));

This 'else' is not neccessary.  Added just for local testing.
Will remove in the actual patch.

> +        }
>      }
>  
>      act_off = nl_msg_start_nested(buf, OVS_FLOW_ATTR_ACTIONS);
> @@ -1182,8 +1187,8 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
>              continue;
>          }
>  
> -        if (parse_tc_flower_to_match(&flower, match, actions, stats, attrs,
> -                                     wbuffer, dump->terse)) {
> +        if (parse_tc_flower_to_match(netdev, &flower, match, actions,
> +                                     stats, attrs, wbuffer, dump->terse)) {
>              continue;
>          }
>  
> @@ -2120,7 +2125,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match 
> *match,
>           * Keeping incorrect behavior for now. */
>          tnl_mask->flags &= ~(FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM);
>  
> -        flower_match_to_tun_opt(&flower, tnl, tnl_mask);
> +        if (!strcmp(netdev_get_type(netdev), "geneve")) {
> +            flower_match_to_tun_opt(&flower, tnl, tnl_mask);
> +        }
>          flower.tunnel = true;
>      } else {
>          /* There is no tunnel metadata to match on, but there could be some
> @@ -2400,7 +2407,8 @@ netdev_tc_flow_get(struct netdev *netdev,
>      }
>  
>      in_port = netdev_ifindex_to_odp_port(id.ifindex);
> -    parse_tc_flower_to_match(&flower, match, actions, stats, attrs, buf, 
> false);
> +    parse_tc_flower_to_match(netdev, &flower, match, actions,
> +                             stats, attrs, buf, false);
>  
>      match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
>      match->flow.in_port.odp_port = in_port;
> ---
> 
> I can add this to the series in v4 or send as a separate patch later if
> v3 otherwise is fine.  Let me know what do you think.
> 
> Best regards, Ilya Maximets.

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to