Current offloading code supports only limited number of tunnel keys
and silently ignores everything it doesn't understand. This is
causing, for example, offloaded ERSPAN tunnels to not work, because
flow is offloaded, but ERSPAN options are not provided to TC.
There is a number of tunnel keys, which are supported by the
userspace,
but silently ignored during offloading:
OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT
OVS_TUNNEL_KEY_ATTR_OAM
OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS
OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS
OVS_TUNNEL_KEY_ATTR_CSUM is kind of supported, but only for actions
and for some reason is set from the tunnel port instead of the
provided action, and not currently supported for the tunnel key in
the match.
Addig a default case to fail offloading of unknown attributes. For
now explicitly allowing incorrect behavior for the DONT_FRAGMENT
flag,
otherwise we'll break all tunnel offloading by default. VXLAN and
ERSPAN options has to fail offloading, because the tunnel will not
work otherwise. OAM is not a default configurations, so failing it
as well. The missing DONT_FRAGMENT flag though should, probably,
cause frequent flow revalidation, but that is not new with this
patch.
Same for the 'match' key, only clearing masks that was actually
consumed, except for the DONT_FRAGMENT and CSUM flags, which are
explicitly allowed and highlighted as broken.
Also, ttl and tos were incorrectly checked by the value instead of a
mask during the flow key dump. Destination port as well as CSUM
configuration for unknown reason was not taken from the actions list
and were passed via HW offload info.
Reported-at:
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fpipermail%2Fovs-dev%2F2022-July%2F395522.html&data=05%7C01%7Croid%40nvidia.com%7Cba6998f8523f4fcf52eb08da64ecba35%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637933265159187181%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=QDm8f%2FFzZE0X7q26i09UI8zaGK0UkD9tcEChI2XYNZ4%3D&reserved=0
Reported-by: Eelco Chaudron <[email protected]>
Fixes: 8f283af89298 ("netdev-tc-offloads: Implement netdev flow
put using tc interface")
Signed-off-by: Ilya Maximets <[email protected]>
---
lib/dpif-netlink.c | 14 +---------
lib/netdev-offload-tc.c | 57
++++++++++++++++++++++++++++++++++-------
lib/netdev-offload.h | 3 ---
3 files changed, 49 insertions(+), 25 deletions(-)
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 06e1e8ca0..1e116b4ad 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2237,8 +2237,6 @@ parse_flow_put(struct dpif_netlink *dpif,
struct dpif_flow_put *put)
size_t left;
struct netdev *dev;
struct offload_info info;
- ovs_be16 dst_port = 0;
- uint8_t csum_on = false;
int err;
info.tc_modify_flow_deleted = false;
@@ -2258,10 +2256,9 @@ parse_flow_put(struct dpif_netlink *dpif,
struct dpif_flow_put *put)
return EOPNOTSUPP;
}
- /* Get tunnel dst port */
+ /* Check the output port for a tunnel. */
NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
- const struct netdev_tunnel_config *tnl_cfg;
struct netdev *outdev;
odp_port_t out_port;
@@ -2271,19 +2268,10 @@ parse_flow_put(struct dpif_netlink *dpif,
struct dpif_flow_put *put)
err = EOPNOTSUPP;
goto out;
}
- tnl_cfg = netdev_get_tunnel_config(outdev);
- if (tnl_cfg && tnl_cfg->dst_port != 0) {
- dst_port = tnl_cfg->dst_port;
- }
- if (tnl_cfg) {
- csum_on = tnl_cfg->csum;
- }
netdev_close(outdev);
}
}
- info.tp_dst_port = dst_port;
- info.tunnel_csum_on = csum_on;
info.recirc_id_shared_with_tc = (dpif->user_features
&
OVS_DP_F_TC_RECIRC_SHARING);
err = netdev_flow_put(dev, &match,
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 262faf3c6..e62687c82 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -805,11 +805,11 @@ parse_tc_flower_to_match(struct tc_flower
*flower,
&flower->key.tunnel.ipv6.ipv6_src,
&flower->mask.tunnel.ipv6.ipv6_src);
}
- if (flower->key.tunnel.tos) {
+ if (flower->mask.tunnel.tos) {
match_set_tun_tos_masked(match,
flower->key.tunnel.tos,
flower->mask.tunnel.tos);
}
- if (flower->key.tunnel.ttl) {
+ if (flower->mask.tunnel.ttl) {
match_set_tun_ttl_masked(match,
flower->key.tunnel.ttl,
flower->mask.tunnel.ttl);
}
@@ -1284,9 +1284,11 @@ static int
parse_put_flow_set_action(struct tc_flower *flower, struct
tc_action *action,
const struct nlattr *set, size_t
set_len)
{
+ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
const struct nlattr *tunnel;
const struct nlattr *tun_attr;
size_t tun_left, tunnel_len;
+ bool attr_csum_present;
if (nl_attr_type(set) == OVS_KEY_ATTR_MPLS) {
return parse_mpls_set_action(flower, action, set);
@@ -1300,6 +1302,7 @@ parse_put_flow_set_action(struct tc_flower
*flower, struct tc_action *action,
tunnel = nl_attr_get(set);
tunnel_len = nl_attr_get_size(set);
+ attr_csum_present = false;
action->type = TC_ACT_ENCAP;
action->encap.id_present = false;
flower->action_count++;
@@ -1326,6 +1329,19 @@ parse_put_flow_set_action(struct tc_flower
*flower, struct tc_action *action,
action->encap.ttl = nl_attr_get_u8(tun_attr);
}
break;
+ case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: {
+ /* XXX: This is wrong! We're ignoring the DF flag
configuration
+ * requested by the user. However, TC for now has
no way to pass
+ * that flag and it is set by default, meaning
tunnel offloading
+ * will not work if 'options:df_default=false' is
not set.
+ * Keeping incorrect behavior for now. */
+ }
+ break;
+ case OVS_TUNNEL_KEY_ATTR_CSUM: {
+ attr_csum_present = true;
+ action->encap.no_csum = !nl_attr_get_u8(tun_attr);
+ }
+ break;
case OVS_TUNNEL_KEY_ATTR_IPV6_SRC: {
action->encap.ipv6.ipv6_src =
nl_attr_get_in6_addr(tun_attr);
@@ -1350,9 +1366,17 @@ parse_put_flow_set_action(struct tc_flower
*flower, struct tc_action *action,
action->encap.data.present.len =
nl_attr_get_size(tun_attr);
}
break;
+ default:
+ VLOG_DBG_RL(&rl, "unsupported tunnel key attribute %d",
+ nl_attr_type(tun_attr));
+ return EOPNOTSUPP;
}
}
+ if (!attr_csum_present) {
+ action->encap.no_csum = 1;
+ }
+
return 0;
}
@@ -1448,7 +1472,7 @@ test_key_and_mask(struct match *match)
static void
flower_match_to_tun_opt(struct tc_flower *flower, const struct
flow_tnl *tnl,
- const struct flow_tnl *tnl_mask)
+ struct flow_tnl *tnl_mask)
{
struct geneve_opt *opt, *opt_mask;
int len, cnt = 0;
@@ -1472,6 +1496,10 @@ flower_match_to_tun_opt(struct tc_flower
*flower, const struct flow_tnl *tnl,
}
flower->mask.tunnel.metadata.present.len =
tnl->metadata.present.len;
+
+ memset(tnl_mask->metadata.opts.gnv, 0,
tnl->metadata.present.len);
+ memset(&tnl_mask->metadata.present.len, 0,
+ sizeof tnl_mask->metadata.present.len);
}
static void
@@ -1576,7 +1604,7 @@ netdev_tc_flow_put(struct netdev *netdev,
struct match *match,
const struct flow *key = &match->flow;
struct flow *mask = &match->wc.masks;
const struct flow_tnl *tnl = &match->flow.tunnel;
- const struct flow_tnl *tnl_mask = &mask->tunnel;
+ struct flow_tnl *tnl_mask = &mask->tunnel;
struct tc_action *action;
bool recirc_act = false;
uint32_t block_id = 0;
@@ -1624,10 +1652,25 @@ netdev_tc_flow_put(struct netdev *netdev,
struct match *match,
flower.mask.tunnel.tos = tnl_mask->ip_tos;
flower.mask.tunnel.ttl = tnl_mask->ip_ttl;
flower.mask.tunnel.id = (tnl->flags & FLOW_TNL_F_KEY)
? tnl_mask->tun_id : 0;
+ memset(&tnl_mask->tun_id, 0, sizeof tnl_mask->tun_id);
+ memset(&tnl_mask->ip_src, 0, sizeof tnl_mask->ip_src);
+ memset(&tnl_mask->ip_dst, 0, sizeof tnl_mask->ip_dst);
+ memset(&tnl_mask->ipv6_src, 0, sizeof tnl_mask->ipv6_src);
+ memset(&tnl_mask->ipv6_dst, 0, sizeof tnl_mask->ipv6_dst);
+ memset(&tnl_mask->ip_tos, 0, sizeof tnl_mask->ip_tos);
+ memset(&tnl_mask->ip_ttl, 0, sizeof tnl_mask->ip_ttl);
+ tnl_mask->flags &= ~FLOW_TNL_F_KEY;
+
+ /* XXX: This is wrong! We're ignoring DF and CSUM flags
configuration
+ * requested by the user. However, TC for now has no
way to pass
+ * these flags in a flower key and their masks are set
by default,
+ * meaning tunnel offloading will not work at all if not
cleared.
+ * Keeping incorrect behavior for now. */
+ tnl_mask->flags &= ~(FLOW_TNL_F_DONT_FRAGMENT |
FLOW_TNL_F_CSUM);
+
flower_match_to_tun_opt(&flower, tnl, tnl_mask);
flower.tunnel = true;
}
- memset(&mask->tunnel, 0, sizeof mask->tunnel);
flower.key.eth_type = key->dl_type;
flower.mask.eth_type = mask->dl_type;
@@ -1899,10 +1942,6 @@ netdev_tc_flow_put(struct netdev *netdev,
struct match *match,
if (err) {
return err;
}
- if (action->type == TC_ACT_ENCAP) {
- action->encap.tp_dst = info->tp_dst_port;
- action->encap.no_csum = !info->tunnel_csum_on;
- }
} else if (nl_attr_type(nla) ==
OVS_ACTION_ATTR_SET_MASKED) {
const struct nlattr *set = nl_attr_get(nla);
const size_t set_len = nl_attr_get_size(nla);
diff --git a/lib/netdev-offload.h b/lib/netdev-offload.h
index 8237a85dd..93eb2df48 100644
--- a/lib/netdev-offload.h
+++ b/lib/netdev-offload.h
@@ -65,9 +65,6 @@ struct netdev_flow_dump {
/* Flow offloading. */
struct offload_info {
- ovs_be16 tp_dst_port; /* Destination port for tunnel in SET
action */
- uint8_t tunnel_csum_on; /* Tunnel header with checksum */
-
bool recirc_id_shared_with_tc; /* Indicates whever tc
chains will be in
* sync with datapath
recirc ids. */