On 8/16/22 13:57, Ilya Maximets wrote:
> On 8/16/22 13:42, Roi Dayan wrote:
>>
>>
>> On 2022-08-14 5:46 PM, Ilya Maximets wrote:
>>> Current offloading code supports only limited number of tunnel keys
>>> and silently ignores everything it doesn't understand.  This is
>>> causing, for example, offloaded ERSPAN tunnels to not work, because
>>> flow is offloaded, but ERSPAN options are not provided to TC.
>>>
>>> There is a number of tunnel keys, which are supported by the userspace,
>>> but silently ignored during offloading:
>>>
>>>    OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT
>>>    OVS_TUNNEL_KEY_ATTR_OAM
>>>    OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS
>>>    OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS
>>>
>>> OVS_TUNNEL_KEY_ATTR_CSUM is kind of supported, but only for actions
>>> and for some reason is set from the tunnel port instead of the
>>> provided action, and not currently supported for the tunnel key in
>>> the match.
>>>
>>> Addig a default case to fail offloading of unknown attributes.  For
>>> now explicitly allowing incorrect behavior for the DONT_FRAGMENT flag,
>>> otherwise we'll break all tunnel offloading by default.  VXLAN and
>>> ERSPAN options has to fail offloading, because the tunnel will not
>>> work otherwise.  OAM is not a default configurations, so failing it
>>> as well. The missing DONT_FRAGMENT flag though should, probably,
>>> cause frequent flow revalidation, but that is not new with this patch.
>>>
>>> Same for the 'match' key, only clearing masks that was actually
>>> consumed, except for the DONT_FRAGMENT and CSUM flags, which are
>>> explicitly allowed and highlighted as broken.
>>>
>>> Also, destination port as well as CSUM configuration for unknown
>>> reason was not taken from the actions list and were passed via HW
>>> offload info instead of being consumed from the set() action.
>>>
>>> Reported-at: 
>>> https://mail.openvswitch.org/pipermail/ovs-dev/2022-July/395522.html
>>> Reported-by: Eelco Chaudron <[email protected]>
>>> Fixes: 8f283af89298 ("netdev-tc-offloads: Implement netdev flow put using 
>>> tc interface")
>>> Signed-off-by: Ilya Maximets <[email protected]>
>>> ---
>>>   lib/dpif-netlink.c      | 14 +------
>>>   lib/netdev-offload-tc.c | 92 +++++++++++++++++++++++++++++++++++------
>>>   lib/netdev-offload.h    |  3 --
>>>   3 files changed, 80 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>>> index 89e1d4325..c219fb527 100644
>>> --- a/lib/dpif-netlink.c
>>> +++ b/lib/dpif-netlink.c
>>> @@ -2237,8 +2237,6 @@ parse_flow_put(struct dpif_netlink *dpif, struct 
>>> dpif_flow_put *put)
>>>       size_t left;
>>>       struct netdev *dev;
>>>       struct offload_info info;
>>> -    ovs_be16 dst_port = 0;
>>> -    uint8_t csum_on = false;
>>>       int err;
>>>         info.tc_modify_flow_deleted = false;
>>> @@ -2258,10 +2256,9 @@ parse_flow_put(struct dpif_netlink *dpif, struct 
>>> dpif_flow_put *put)
>>>           return EOPNOTSUPP;
>>>       }
>>>   -    /* Get tunnel dst port */
>>> +    /* Check the output port for a tunnel. */
>>>       NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
>>>           if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
>>> -            const struct netdev_tunnel_config *tnl_cfg;
>>>               struct netdev *outdev;
>>>               odp_port_t out_port;
>>>   @@ -2271,19 +2268,10 @@ parse_flow_put(struct dpif_netlink *dpif, struct 
>>> dpif_flow_put *put)
>>>                   err = EOPNOTSUPP;
>>>                   goto out;
>>>               }
>>> -            tnl_cfg = netdev_get_tunnel_config(outdev);
>>> -            if (tnl_cfg && tnl_cfg->dst_port != 0) {
>>> -                dst_port = tnl_cfg->dst_port;
>>> -            }
>>> -            if (tnl_cfg) {
>>> -                csum_on = tnl_cfg->csum;
>>> -            }
>>>               netdev_close(outdev);
>>>           }
>>>       }
>>>   -    info.tp_dst_port = dst_port;
>>> -    info.tunnel_csum_on = csum_on;
>>>       info.recirc_id_shared_with_tc = (dpif->user_features
>>>                                        & OVS_DP_F_TC_RECIRC_SHARING);
>>>       err = netdev_flow_put(dev, &match,
>>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>>> index 1ab12ecfe..92d424951 100644
>>> --- a/lib/netdev-offload-tc.c
>>> +++ b/lib/netdev-offload-tc.c
>>> @@ -1399,6 +1399,7 @@ static int
>>>   parse_put_flow_set_action(struct tc_flower *flower, struct tc_action 
>>> *action,
>>>                             const struct nlattr *set, size_t set_len)
>>>   {
>>> +    static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
>>>       const struct nlattr *tunnel;
>>>       const struct nlattr *tun_attr;
>>>       size_t tun_left, tunnel_len;
>>> @@ -1417,6 +1418,7 @@ parse_put_flow_set_action(struct tc_flower *flower, 
>>> struct tc_action *action,
>>>         action->type = TC_ACT_ENCAP;
>>>       action->encap.id_present = false;
>>> +    action->encap.no_csum = 1;
>>>       flower->action_count++;
>>>       NL_ATTR_FOR_EACH_UNSAFE(tun_attr, tun_left, tunnel, tunnel_len) {
>>>           switch (nl_attr_type(tun_attr)) {
>>> @@ -1441,6 +1443,18 @@ parse_put_flow_set_action(struct tc_flower *flower, 
>>> struct tc_action *action,
>>>               action->encap.ttl = nl_attr_get_u8(tun_attr);
>>>           }
>>>           break;
>>> +        case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: {
>>> +            /* XXX: This is wrong!  We're ignoring the DF flag 
>>> configuration
>>> +             * requested by the user.  However, TC for now has no way to 
>>> pass
>>> +             * that flag and it is set by default, meaning tunnel 
>>> offloading
>>> +             * will not work if 'options:df_default=false' is not set.
>>> +             * Keeping incorrect behavior for now. */
>>> +        }
>>> +        break;
>>> +        case OVS_TUNNEL_KEY_ATTR_CSUM: {
>>> +            action->encap.no_csum = 0;
>>> +        }
>>> +        break;
>>>           case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: {
>>>               action->encap.ipv6.ipv6_src =
>>>                   nl_attr_get_in6_addr(tun_attr);
>>> @@ -1465,6 +1479,10 @@ parse_put_flow_set_action(struct tc_flower *flower, 
>>> struct tc_action *action,
>>>               action->encap.data.present.len = nl_attr_get_size(tun_attr);
>>>           }
>>>           break;
>>> +        default:
>>> +            VLOG_DBG_RL(&rl, "unsupported tunnel key attribute %d",
>>> +                        nl_attr_type(tun_attr));
>>> +            return EOPNOTSUPP;
>>>           }
>>>       }
>>>   @@ -1593,18 +1611,51 @@ test_key_and_mask(struct match *match)
>>>     static void
>>>   flower_match_to_tun_opt(struct tc_flower *flower, const struct flow_tnl 
>>> *tnl,
>>> -                        const struct flow_tnl *tnl_mask)
>>> +                        struct flow_tnl *tnl_mask)
>>>   {
>>>       struct geneve_opt *opt, *opt_mask;
>>>       int len, cnt = 0;
>>>   -    memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv,
>>> -           tnl->metadata.present.len);
>>> +    /* 'flower' always has an exact match on tunnel metadata length, so 
>>> having
>>> +     * it in a wrong format is not acceptable unless it is empty. */
>>> +    if (!(tnl->flags & FLOW_TNL_F_UDPIF)) {
>>> +        if (tnl->metadata.present.map) {
>>> +            /* XXX: Add non-UDPIF format parsing here? */
>>> +            VLOG_WARN_RL(&warn_rl, "Tunnel options are in the wrong 
>>> format.");
>>> +        } else {
>>> +            /* There are no options, that equals for them to be in UDPIF 
>>> format
>>> +             * with a zero 'len'.  Clearing the 'map' mask as consumed.
>>> +             * No need to explicitly set 'len' to zero in the 'flower'. */
>>> +            tnl_mask->flags &= ~FLOW_TNL_F_UDPIF;
>>> +            memset(&tnl_mask->metadata.present.map, 0,
>>> +                   sizeof tnl_mask->metadata.present.map);
>>> +        }
>>> +        return;
>>> +    }
>>> +
>>> +    tnl_mask->flags &= ~FLOW_TNL_F_UDPIF;
>>> +
>>>       flower->key.tunnel.metadata.present.len = tnl->metadata.present.len;
>>> +    /* Copying from the key and not from the mask, since in the 'flower'
>>> +     * the length for a mask is not a mask, but the actual length.  TC
>>> +     * will use an exact match for the length. */
>>> +    flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len;
>>> +    memset(&tnl_mask->metadata.present.len, 0,
>>> +           sizeof tnl_mask->metadata.present.len);
>>> +
>>> +    if (!tnl->metadata.present.len) {
>>> +        return;
>>> +    }
>>>   +    memcpy(flower->key.tunnel.metadata.opts.gnv, tnl->metadata.opts.gnv,
>>> +           tnl->metadata.present.len);
>>>       memcpy(flower->mask.tunnel.metadata.opts.gnv, 
>>> tnl_mask->metadata.opts.gnv,
>>>              tnl->metadata.present.len);
>>>   +    memset(tnl_mask->metadata.opts.gnv, 0, tnl->metadata.present.len);
>>> +
>>> +    /* Fixing up 'length' fields of particular options, since these are
>>> +     * also not masks, but actual lengths in the 'flower' structure. */
>>>       len = flower->key.tunnel.metadata.present.len;
>>>       while (len) {
>>>           opt = &flower->key.tunnel.metadata.opts.gnv[cnt];
>>> @@ -1615,10 +1666,6 @@ flower_match_to_tun_opt(struct tc_flower *flower, 
>>> const struct flow_tnl *tnl,
>>>           cnt += sizeof(struct geneve_opt) / 4 + opt->length;
>>>           len -= sizeof(struct geneve_opt) + opt->length * 4;
>>>       }
>>> -
>>> -    /* Copying from the key and not from the mask, since in the 'flower'
>>> -     * the length for a mask is not a mask, but the actual length. */
>>> -    flower->mask.tunnel.metadata.present.len = tnl->metadata.present.len;
>>>   }
>>>     static void
>>> @@ -1907,10 +1954,6 @@ netdev_tc_parse_nl_actions(struct netdev *netdev, 
>>> struct tc_flower *flower,
>>>               if (err) {
>>>                   return err;
>>>               }
>>> -            if (action->type == TC_ACT_ENCAP) {
>>> -                action->encap.tp_dst = info->tp_dst_port;
>>> -                action->encap.no_csum = !info->tunnel_csum_on;
>>> -            }
>>>           } else if (nl_attr_type(nla) == OVS_ACTION_ATTR_SET_MASKED) {
>>>               const struct nlattr *set = nl_attr_get(nla);
>>>               const size_t set_len = nl_attr_get_size(nla);
>>> @@ -1986,7 +2029,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
>>> match *match,
>>>       const struct flow *key = &match->flow;
>>>       struct flow *mask = &match->wc.masks;
>>>       const struct flow_tnl *tnl = &match->flow.tunnel;
>>> -    const struct flow_tnl *tnl_mask = &mask->tunnel;
>>> +    struct flow_tnl *tnl_mask = &mask->tunnel;
>>>       bool recirc_act = false;
>>>       uint32_t block_id = 0;
>>>       struct tcf_id id;
>>> @@ -2024,6 +2067,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
>>> match *match,
>>>           flower.key.tunnel.ttl = tnl->ip_ttl;
>>>           flower.key.tunnel.tp_src = tnl->tp_src;
>>>           flower.key.tunnel.tp_dst = tnl->tp_dst;
>>> +
>>>           flower.mask.tunnel.ipv4.ipv4_src = tnl_mask->ip_src;
>>>           flower.mask.tunnel.ipv4.ipv4_dst = tnl_mask->ip_dst;
>>>           flower.mask.tunnel.ipv6.ipv6_src = tnl_mask->ipv6_src;
>>> @@ -2036,10 +2080,32 @@ netdev_tc_flow_put(struct netdev *netdev, struct 
>>> match *match,
>>>            * Degrading the flow down to exact match for now as a 
>>> workaround. */
>>>           flower.mask.tunnel.tp_dst = OVS_BE16_MAX;
>>>           flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY) ? 
>>> tnl_mask->tun_id : 0;
>>> +
>>> +        memset(&tnl_mask->ip_src, 0, sizeof tnl_mask->ip_src);
>>> +        memset(&tnl_mask->ip_dst, 0, sizeof tnl_mask->ip_dst);
>>> +        memset(&tnl_mask->ipv6_src, 0, sizeof tnl_mask->ipv6_src);
>>> +        memset(&tnl_mask->ipv6_dst, 0, sizeof tnl_mask->ipv6_dst);
>>> +        memset(&tnl_mask->ip_tos, 0, sizeof tnl_mask->ip_tos);
>>> +        memset(&tnl_mask->ip_ttl, 0, sizeof tnl_mask->ip_ttl);
>>> +        memset(&tnl_mask->tp_dst, 0, sizeof tnl_mask->tp_dst);
>>> +
>>> +        memset(&tnl_mask->tun_id, 0, sizeof tnl_mask->tun_id);
>>> +        tnl_mask->flags &= ~FLOW_TNL_F_KEY;
>>> +
>>> +        /* XXX: This is wrong!  We're ignoring DF and CSUM flags 
>>> configuration
>>> +         * requested by the user.  However, TC for now has no way to pass
>>> +         * these flags in a flower key and their masks are set by default,
>>> +         * meaning tunnel offloading will not work at all if not cleared.
>>> +         * Keeping incorrect behavior for now. */
>>> +        tnl_mask->flags &= ~(FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM);
>>> +
>>>           flower_match_to_tun_opt(&flower, tnl, tnl_mask);
>>>           flower.tunnel = true;
>>> +    } else {
>>> +        /* There is no tunnel metadata to match on, but there could be some
>>> +         * mask bits set due to flow translation artifacts.  Clear them. */
>>> +        memset(&mask->tunnel, 0, sizeof mask->tunnel);
>>>       }
>>> -    memset(&mask->tunnel, 0, sizeof mask->tunnel);
>>>         flower.key.eth_type = key->dl_type;
>>>       flower.mask.eth_type = mask->dl_type;
>>> diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
>>> index 249a3102a..180d3f95f 100644
>>> --- a/lib/netdev-offload.h
>>> +++ b/lib/netdev-offload.h
>>> @@ -66,9 +66,6 @@ struct netdev_flow_dump {
>>>     /* Flow offloading. */
>>>   struct offload_info {
>>> -    ovs_be16 tp_dst_port; /* Destination port for tunnel in SET action */
>>> -    uint8_t tunnel_csum_on; /* Tunnel header with checksum */
>>> -
>>>       bool recirc_id_shared_with_tc;  /* Indicates whever tc chains will be 
>>> in
>>>                                        * sync with datapath recirc ids. */
>>>   
>>
>>
>> Hi,
>>
>> I didn't check the source of the issue but now dump-flows always
>> shows geneve(). I ran some vxlan test but got geneve() in the match
>>
>> tunnel(tun_id=0x2a,src=7.7.7.8,dst=7.7.7.7,tp_dst=4789,geneve(),flags(+key)),recirc_id(0),in_port(vxlan_sys_4789),eth(src=a6:96:4b:ab:b3:9a,dst=de:a8:d5:4d:39:48),eth_type(0x0800),ipv4(frag=no),
>>  packets:9577, bytes:1150576, used:0.800s, actions:enp8s0f0_0
> 
> Yes, that is expected, because we can not distinguish geneve with
> zero-length options from other tunnel types while parsing the
> tunnel() attribute.  There is just no enough information.
> 
> I highlighted that in the commit message for the first patch:
> 
> "Also, flower always has an exact match on the present.len field
> regardless of its value and regardless of this field being masked
> by OVS flow translation layer while installing the flow.  Hence,
> all tunnel flows dumped from TC should have an exact match on
> present.len and also UDPIF flag, because present.len doesn't make
> sense without that flag.  Without the change, zero-length options
> match is incorrectly reported as a wildcard match.  The side effect
> though is that zero-length match on geneve options is reported even
> for other tunnel types, e.g. vxlan.  But that should be fairly
> harmless.  To avoid reporting a match on empty geneve options for
> vxlan/etc. tunnels we'll need to check the tunnel port type, there
> is no enough information in the TUNNEL attribute itself."

Something like this should do the trick, I guess:

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 487e13940..417545db7 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -940,7 +940,8 @@ parse_tc_flower_to_actions(struct tc_flower *flower,
 }
 
 static int
-parse_tc_flower_to_match(struct tc_flower *flower,
+parse_tc_flower_to_match(const struct netdev *netdev,
+                         struct tc_flower *flower,
                          struct match *match,
                          struct nlattr **actions,
                          struct dpif_flow_stats *stats,
@@ -1141,7 +1142,11 @@ parse_tc_flower_to_match(struct tc_flower *flower,
                                         flower->mask.tunnel.tp_dst);
         }
 
-        flower_tun_opt_to_match(match, flower);
+        if (!strcmp(netdev_get_type(netdev), "geneve")) {
+            flower_tun_opt_to_match(match, flower);
+        } else {
+            VLOG_INFO("%s: Non geneve tunnel: %s", netdev_get_name(netdev), 
netdev_get_type(netdev));
+        }
     }
 
     act_off = nl_msg_start_nested(buf, OVS_FLOW_ATTR_ACTIONS);
@@ -1182,8 +1187,8 @@ netdev_tc_flow_dump_next(struct netdev_flow_dump *dump,
             continue;
         }
 
-        if (parse_tc_flower_to_match(&flower, match, actions, stats, attrs,
-                                     wbuffer, dump->terse)) {
+        if (parse_tc_flower_to_match(netdev, &flower, match, actions,
+                                     stats, attrs, wbuffer, dump->terse)) {
             continue;
         }
 
@@ -2120,7 +2125,9 @@ netdev_tc_flow_put(struct netdev *netdev, struct match 
*match,
          * Keeping incorrect behavior for now. */
         tnl_mask->flags &= ~(FLOW_TNL_F_DONT_FRAGMENT | FLOW_TNL_F_CSUM);
 
-        flower_match_to_tun_opt(&flower, tnl, tnl_mask);
+        if (!strcmp(netdev_get_type(netdev), "geneve")) {
+            flower_match_to_tun_opt(&flower, tnl, tnl_mask);
+        }
         flower.tunnel = true;
     } else {
         /* There is no tunnel metadata to match on, but there could be some
@@ -2400,7 +2407,8 @@ netdev_tc_flow_get(struct netdev *netdev,
     }
 
     in_port = netdev_ifindex_to_odp_port(id.ifindex);
-    parse_tc_flower_to_match(&flower, match, actions, stats, attrs, buf, 
false);
+    parse_tc_flower_to_match(netdev, &flower, match, actions,
+                             stats, attrs, buf, false);
 
     match->wc.masks.in_port.odp_port = u32_to_odp(UINT32_MAX);
     match->flow.in_port.odp_port = in_port;
---

I can add this to the series in v4 or send as a separate patch later if
v3 otherwise is fine.  Let me know what do you think.

Best regards, Ilya Maximets.
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to