In the future it is likely that our vlan support will expand to include multiply tagged packets. When this happens, we would ideally like for it to be consistent with our current tagging.
Currently, if we receive a packet with a partial VLAN tag we will automatically drop it in the kernel, which is unique among the protocols we support. The only other reason to drop a packet is a memory allocation error. For a doubly tagged packet, we will parse the first tag and indicate that another tag was present but do not drop if the second tag is incorrect as we do not parse it. This changes the behavior of the vlan parser to match other protocols and also deeper tags by indicating the presence of a broken tag with the 802.1Q EtherType but no vlan information. This shifts the policy decision to userspace on whether to drop broken tags and allows us to uniformly add new levels of tag parsing. Although additional levels of control are provided to userspace, this maintains the current behavior of dropping packets with a broken tag when using the NORMAL action because that is the correct behavior for an 802.1Q-aware switch. The userspace flow parser actually already had the new behavior so this corrects an inconsistency. --- datapath/README | 35 ++++++++++++++++++++++++++++ datapath/actions.c | 2 +- datapath/datapath.c | 2 +- datapath/flow.c | 37 ++++++++++++++++++++-------- lib/dpif-netdev.c | 2 +- lib/odp-util.c | 60 ++++++++++++++++++++++++++++++----------------- ofproto/ofproto-dpif.c | 11 +++++++- 7 files changed, 111 insertions(+), 38 deletions(-) diff --git a/datapath/README b/datapath/README index 2d8a141..23a5a81 100644 --- a/datapath/README +++ b/datapath/README @@ -145,6 +145,41 @@ not understand the "vlan" key will not see either of those attributes and therefore will not misinterpret them. (Also, the outer eth_type is still 0x8100, not changed to 0x0800.) +Handling malformed packets +-------------------------- + +Don't drop packets in the kernel for malformed protocols headers, bad +checksums, etc. This would prevent userspace from implementing a +simple Ethernet switch that forwards every packet. + +Instead, in such a case, include an attribute with "empty" content. +It doesn't matter if the empty content could be valid protocol values, +as long as those values are rarely seen in practice, because userspace +can always forward all packets with those values to userspace and +handle them individually. + +For example, consider a packet that contains an IP header that +indicates protocol 6 for TCP, but which is truncated just after the IP +header, so that the TCP header is missing. The flow key for this +packet would include a tcp attribute with all-zero src and dst, like +this: + + eth(...), eth_type(0x0800), ip(proto=6, ...), tcp(src=0, dst=0) + +As another example, consider a packet with an Ethernet type of 0x8100, +indicating that a VLAN TCI should follow, but which is truncated just +after the Ethernet type. The flow key for this packet would include +an all-zero-bits vlan and an empty encap attribute, like this: + + eth(...), eth_type(0x8100), vlan(0), encap() + +Unlike a TCP packet with source and destination ports 0, an +all-zero-bits VLAN TCI is not that rare, so the CFI bit (aka +VLAN_TAG_PRESENT inside the kernel) is ordinarily set in a vlan +attribute expressly to allow this situation to be distinguished. +Thus, the flow key in this second example unambiguously indicates a +missing or malformed VLAN TCI. + Other rules ----------- diff --git a/datapath/actions.c b/datapath/actions.c index f3d7de9..ef4f8ce 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -112,7 +112,7 @@ static int push_vlan(struct sk_buff *skb, __be16 tci) + ETH_HLEN, VLAN_HLEN, 0)); } - __vlan_hwaccel_put_tag(skb, ntohs(tci)); + __vlan_hwaccel_put_tag(skb, ntohs(tci) & ~VLAN_TAG_PRESENT); return 0; } diff --git a/datapath/datapath.c b/datapath/datapath.c index a21b0fa..ebc2942 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -653,7 +653,7 @@ static int validate_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_PUSH_VLAN: - if (nla_get_be16(a) & htons(VLAN_TAG_PRESENT)) + if (!(nla_get_be16(a) & htons(VLAN_TAG_PRESENT))) return -EINVAL; break; diff --git a/datapath/flow.c b/datapath/flow.c index 4d16caa..b331ab6 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -480,9 +480,12 @@ static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) }; struct qtag_prefix *qp; + if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) + return 0; + if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + sizeof(__be16)))) - return -ENOMEM; + return 0; qp = (struct qtag_prefix *) skb->data; key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); @@ -1049,14 +1052,25 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp, (1 << OVS_KEY_ATTR_ETHERTYPE) | (1 << OVS_KEY_ATTR_ENCAP)) && nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) { - swkey->eth.tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - if (swkey->eth.tci & htons(VLAN_TAG_PRESENT)) - return -EINVAL; - swkey->eth.tci |= htons(VLAN_TAG_PRESENT); + const struct nlattr *encap = a[OVS_KEY_ATTR_ENCAP]; + __be16 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - err = parse_flow_nlattrs(a[OVS_KEY_ATTR_ENCAP], a, &attrs); - if (err) - return err; + if (tci & htons(VLAN_TAG_PRESENT)) { + swkey->eth.tci = tci; + + err = parse_flow_nlattrs(encap, a, &attrs); + if (err) + return err; + } else if (!tci) { + /* Used for a truncated 802.1Q header. */ + if (nla_len(encap)) + return -EINVAL; + + swkey->eth.type = htons(ETH_P_8021Q); + *key_lenp = key_len; + return 0; + } else + return -EINVAL; } if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { @@ -1215,11 +1229,12 @@ int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN); memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN); - if (swkey->eth.tci != htons(0)) { + if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q)); - NLA_PUT_BE16(skb, OVS_KEY_ATTR_VLAN, - swkey->eth.tci & ~htons(VLAN_TAG_PRESENT)); + NLA_PUT_BE16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci); encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.tci) + goto unencap; } else encap = NULL; diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index cf9c893..be74a8c 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1311,7 +1311,7 @@ dp_netdev_execute_actions(struct dp_netdev *dp, break; case OVS_ACTION_ATTR_PUSH_VLAN: - eth_push_vlan(packet, nl_attr_get_be16(a)); + eth_push_vlan(packet, nl_attr_get_be16(a) & ~htons(VLAN_CFI)); break; case OVS_ACTION_ATTR_POP_VLAN: diff --git a/lib/odp-util.c b/lib/odp-util.c index 0ca616b..193f28f 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -200,8 +200,9 @@ format_odp_userspace_action(struct ds *ds, const struct nlattr *attr) static void format_odp_action(struct ds *ds, const struct nlattr *a) { - int expected_len; enum ovs_action_attr type = nl_attr_type(a); + int expected_len; + ovs_be16 tci; expected_len = odp_action_len(nl_attr_type(a)); if (expected_len != -2 && nl_attr_get_size(a) != expected_len) { @@ -224,9 +225,10 @@ format_odp_action(struct ds *ds, const struct nlattr *a) ds_put_cstr(ds, ")"); break; case OVS_ACTION_ATTR_PUSH_VLAN: - ds_put_format(ds, "push_vlan(vid=%"PRIu16",pcp=%d)", - vlan_tci_to_vid(nl_attr_get_be16(a)), - vlan_tci_to_pcp(nl_attr_get_be16(a))); + tci = nl_attr_get_be16(a); + ds_put_format(ds, "push_vlan(vid=%"PRIu16",pcp=%d%s)", + vlan_tci_to_vid(tci), vlan_tci_to_pcp(tci), + tci & htons(VLAN_CFI) ? "" : ",cfi=0"); break; case OVS_ACTION_ATTR_POP_VLAN: ds_put_cstr(ds, "pop_vlan"); @@ -349,6 +351,7 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) const struct ovs_key_nd *nd_key; enum ovs_key_attr attr = nl_attr_type(a); int expected_len; + ovs_be16 tci; ds_put_cstr(ds, ovs_key_attr_to_string(attr)); expected_len = odp_flow_key_attr_len(nl_attr_type(a)); @@ -381,9 +384,10 @@ format_odp_key_attr(const struct nlattr *a, struct ds *ds) break; case OVS_KEY_ATTR_VLAN: - ds_put_format(ds, "(vid=%"PRIu16",pcp=%d)", - vlan_tci_to_vid(nl_attr_get_be16(a)), - vlan_tci_to_pcp(nl_attr_get_be16(a))); + tci = nl_attr_get_be16(a); + ds_put_format(ds, "(vid=%"PRIu16",pcp=%d%s)", + vlan_tci_to_vid(tci), vlan_tci_to_pcp(tci), + tci & htons(VLAN_CFI) ? "" : ",cfi=0"); break; case OVS_KEY_ATTR_ETHERTYPE: @@ -616,7 +620,8 @@ parse_odp_key_attr(const char *s, struct ofpbuf *key) && n > 0)) { nl_msg_put_be16(key, OVS_KEY_ATTR_VLAN, htons((vid << VLAN_VID_SHIFT) | - (pcp << VLAN_PCP_SHIFT))); + (pcp << VLAN_PCP_SHIFT) | + VLAN_CFI)); return n; } } @@ -914,11 +919,13 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow) memcpy(eth_key->eth_src, flow->dl_src, ETH_ADDR_LEN); memcpy(eth_key->eth_dst, flow->dl_dst, ETH_ADDR_LEN); - if (flow->vlan_tci != htons(0)) { + if (flow->vlan_tci != htons(0) || flow->dl_type == htons(ETH_TYPE_VLAN)) { nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_TYPE_VLAN)); - nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, - flow->vlan_tci & ~htons(VLAN_CFI)); + nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, flow->vlan_tci); encap = nl_msg_start_nested(buf, OVS_KEY_ATTR_ENCAP); + if (flow->vlan_tci == htons(0)) { + goto unencap; + } } else { encap = 0; } @@ -1201,20 +1208,29 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t key_len, && (nl_attr_get_be16(attrs[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_TYPE_VLAN))) { const struct nlattr *encap = attrs[OVS_KEY_ATTR_ENCAP]; - const struct nlattr *vlan = attrs[OVS_KEY_ATTR_VLAN]; + ovs_be16 tci = nl_attr_get_be16(attrs[OVS_KEY_ATTR_VLAN]); - flow->vlan_tci = nl_attr_get_be16(vlan); - if (flow->vlan_tci & htons(VLAN_CFI)) { - return EINVAL; - } - flow->vlan_tci |= htons(VLAN_CFI); + if (tci & htons(VLAN_CFI)) { + flow->vlan_tci = tci; + + error = parse_flow_nlattrs(nl_attr_get(encap), + nl_attr_get_size(encap), + attrs, &present_attrs); + if (error) { + return error; + } + expected_attrs = 0; + } else if (tci == htons(0)) { + /* Used for a truncated 802.1Q header. */ + if (nl_attr_get_size(encap)) { + return EINVAL; + } - error = parse_flow_nlattrs(nl_attr_get(encap), nl_attr_get_size(encap), - attrs, &present_attrs); - if (error) { - return error; + flow->dl_type = htons(ETH_TYPE_VLAN); + return 0; + } else { + return EINVAL; } - expected_attrs = 0; } if (attrs[OVS_KEY_ATTR_ETHERTYPE]) { diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 90f02dc..42d7976 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -3633,8 +3633,7 @@ commit_vlan_action(struct action_xlate_ctx *ctx, ovs_be16 new_tci) } if (new_tci & htons(VLAN_CFI)) { - nl_msg_put_be16(ctx->odp_actions, OVS_ACTION_ATTR_PUSH_VLAN, - new_tci & ~htons(VLAN_CFI)); + nl_msg_put_be16(ctx->odp_actions, OVS_ACTION_ATTR_PUSH_VLAN, new_tci); } base->vlan_tci = new_tci; } @@ -4713,6 +4712,14 @@ flow_get_vlan(struct ofproto_dpif *ofproto, const struct flow *flow, return -1; } } else { + if (flow->dl_type == htons(ETH_TYPE_VLAN) && + !(flow->vlan_tci & htons(VLAN_CFI))) { + static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); + VLOG_WARN_RL(&rl, "bridge %s: dropping packet with partial " + "VLAN tag received on port %s", + ofproto->up.name, in_bundle->name); + return -1; + } if (in_bundle->vlan_mode != PORT_VLAN_TRUNK) { return in_bundle->vlan; } else { -- 1.7.2.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev