Until now, the tun_id and in_port have been lost when a packet is sent from the kernel to userspace and then back to the kernel. I didn't think that this was a problem, but recent behavior made me look closer and see that it makes a difference if sFlow is turned on or if an ODP_ATTR_ACTION_CONTROLLER action is present. We could possibly kluge around those, but for future-proofing it seems better to pass the packet metadata from userspace to the kernel. That is what this commit does.
This commit introduces a user-kernel protocol break. We could avoid that, if it is desirable, by making ODP_PACKET_ATTR_KEY optional for ODP_PACKET_CMD_EXECUTE commands. Signed-off-by: Ben Pfaff <b...@nicira.com> Bug #4769. --- datapath/datapath.c | 10 +++++- datapath/flow.c | 89 +++++++++++++++++++++++++++++++++++++++++--------- datapath/flow.h | 2 + lib/dpif-linux.c | 3 ++ lib/dpif-netdev.c | 1 + lib/dpif-provider.h | 11 ++++-- lib/dpif.c | 9 ++++- lib/dpif.h | 6 ++- ofproto/ofproto.c | 30 +++++++++++++---- 9 files changed, 130 insertions(+), 31 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index c948944..9c9740b 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -679,7 +679,8 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) int err; err = -EINVAL; - if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_ACTIONS] || + if (!a[ODP_PACKET_ATTR_PACKET] || !a[ODP_PACKET_ATTR_KEY] || + !a[ODP_PACKET_ATTR_ACTIONS] || nla_len(a[ODP_PACKET_ATTR_PACKET]) < ETH_HLEN) goto exit; @@ -708,6 +709,12 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) /* Initialize OVS_CB (it came from Netlink so might not be zeroed). */ memset(OVS_CB(packet), 0, sizeof(struct ovs_skb_cb)); + /* Get flow metadata provided by userspace. */ + err = flow_metadata_from_nlattrs(&key.in_port, &key.tun_id, + a[ODP_PACKET_ATTR_KEY]); + if (err) + goto exit; + err = flow_extract(packet, -1, &key, &is_frag); if (err) goto exit; @@ -727,6 +734,7 @@ exit: static const struct nla_policy packet_policy[ODP_PACKET_ATTR_MAX + 1] = { [ODP_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC }, + [ODP_PACKET_ATTR_KEY] = { .type = NLA_NESTED }, [ODP_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED }, }; diff --git a/datapath/flow.c b/datapath/flow.c index fe05df3..f0a97a9 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -580,6 +580,23 @@ int flow_cmp(const struct tbl_node *node, void *key2_) return !memcmp(key1, key2, sizeof(struct sw_flow_key)); } +/* The size of the argument for each %ODP_KEY_ATTR_* Netlink attribute. */ +static const u32 key_lens[ODP_KEY_ATTR_MAX + 1] = { + [ODP_KEY_ATTR_TUN_ID] = 8, + [ODP_KEY_ATTR_IN_PORT] = 4, + [ODP_KEY_ATTR_ETHERNET] = sizeof(struct odp_key_ethernet), + [ODP_KEY_ATTR_8021Q] = sizeof(struct odp_key_8021q), + [ODP_KEY_ATTR_ETHERTYPE] = 2, + [ODP_KEY_ATTR_IPV4] = sizeof(struct odp_key_ipv4), + [ODP_KEY_ATTR_IPV6] = sizeof(struct odp_key_ipv6), + [ODP_KEY_ATTR_TCP] = sizeof(struct odp_key_tcp), + [ODP_KEY_ATTR_UDP] = sizeof(struct odp_key_udp), + [ODP_KEY_ATTR_ICMP] = sizeof(struct odp_key_icmp), + [ODP_KEY_ATTR_ICMPV6] = sizeof(struct odp_key_icmpv6), + [ODP_KEY_ATTR_ARP] = sizeof(struct odp_key_arp), + [ODP_KEY_ATTR_ND] = sizeof(struct odp_key_nd), +}; + /** * flow_from_nlattrs - parses Netlink attributes into a flow key. * @swkey: receives the extracted flow key. @@ -603,22 +620,6 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *attr) prev_type = ODP_KEY_ATTR_UNSPEC; nla_for_each_nested(nla, attr, rem) { - static const u32 key_lens[ODP_KEY_ATTR_MAX + 1] = { - [ODP_KEY_ATTR_TUN_ID] = 8, - [ODP_KEY_ATTR_IN_PORT] = 4, - [ODP_KEY_ATTR_ETHERNET] = sizeof(struct odp_key_ethernet), - [ODP_KEY_ATTR_8021Q] = sizeof(struct odp_key_8021q), - [ODP_KEY_ATTR_ETHERTYPE] = 2, - [ODP_KEY_ATTR_IPV4] = sizeof(struct odp_key_ipv4), - [ODP_KEY_ATTR_IPV6] = sizeof(struct odp_key_ipv6), - [ODP_KEY_ATTR_TCP] = sizeof(struct odp_key_tcp), - [ODP_KEY_ATTR_UDP] = sizeof(struct odp_key_udp), - [ODP_KEY_ATTR_ICMP] = sizeof(struct odp_key_icmp), - [ODP_KEY_ATTR_ICMPV6] = sizeof(struct odp_key_icmpv6), - [ODP_KEY_ATTR_ARP] = sizeof(struct odp_key_arp), - [ODP_KEY_ATTR_ND] = sizeof(struct odp_key_nd), - }; - const struct odp_key_ethernet *eth_key; const struct odp_key_8021q *q_key; const struct odp_key_ipv4 *ipv4_key; @@ -814,6 +815,62 @@ int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *attr) return -EINVAL; } +/** + * flow_metadata_from_nlattrs - parses Netlink attributes into a flow key. + * @in_port: receives the extracted input port. + * @tun_id: receives the extracted tunnel ID. + * @key: Netlink attribute holding nested %ODP_KEY_ATTR_* Netlink attribute + * sequence. + * + * This parses a series of Netlink attributes that form a flow key, which must + * take the same form accepted by flow_from_nlattrs(), but only enough of it to + * get the metadata, that is, the parts of the flow key that cannot be + * extracted from the packet itself. + */ +int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id, + const struct nlattr *attr) +{ + const struct nlattr *nla; + u16 prev_type; + int rem; + + *tun_id = 0; + + prev_type = ODP_KEY_ATTR_UNSPEC; + nla_for_each_nested(nla, attr, rem) { + int type = nla_type(nla); + + if (type > ODP_KEY_ATTR_MAX || nla_len(nla) != key_lens[type]) + return -EINVAL; + + switch (TRANSITION(prev_type, type)) { + case TRANSITION(ODP_KEY_ATTR_UNSPEC, ODP_KEY_ATTR_TUN_ID): + *tun_id = nla_get_be64(nla); + break; + + case TRANSITION(ODP_KEY_ATTR_UNSPEC, ODP_KEY_ATTR_IN_PORT): + case TRANSITION(ODP_KEY_ATTR_TUN_ID, ODP_KEY_ATTR_IN_PORT): + if (nla_get_u32(nla) >= DP_MAX_PORTS) + return -EINVAL; + *in_port = nla_get_u32(nla); + break; + + default: + goto done; + } + + prev_type = type; + } + if (rem) + return -EINVAL; + +done: + if (prev_type == ODP_KEY_ATTR_UNSPEC || + prev_type == ODP_KEY_ATTR_TUN_ID) + return -EINVAL; + return 0; +} + int flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb) { struct odp_key_ethernet *eth_key; diff --git a/datapath/flow.h b/datapath/flow.h index a40073a..0b2e0a4 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -127,6 +127,8 @@ int flow_cmp(const struct tbl_node *, void *target); int flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *); int flow_from_nlattrs(struct sw_flow_key *swkey, const struct nlattr *); +int flow_metadata_from_nlattrs(u16 *in_port, __be64 *tun_id, + const struct nlattr *); static inline struct sw_flow *flow_cast(const struct tbl_node *node) { diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 509174c..d79ef91 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -33,6 +33,7 @@ #include <unistd.h> #include "dpif-provider.h" +#include "dynamic-string.h" #include "netdev.h" #include "netdev-vport.h" #include "netlink-socket.h" @@ -698,6 +699,7 @@ dpif_linux_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) static int dpif_linux_execute(struct dpif *dpif_, + const struct nlattr *key, size_t key_len, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *packet) { @@ -715,6 +717,7 @@ dpif_linux_execute(struct dpif *dpif_, execute->dp_ifindex = dpif->dp_ifindex; nl_msg_put_unspec(buf, ODP_PACKET_ATTR_PACKET, packet->data, packet->size); + nl_msg_put_unspec(buf, ODP_PACKET_ATTR_KEY, key, key_len); nl_msg_put_unspec(buf, ODP_PACKET_ATTR_ACTIONS, actions, actions_len); error = nl_sock_transact(genl_sock, buf, NULL); diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 486ba48..daa260d 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -944,6 +944,7 @@ dpif_netdev_flow_dump_done(const struct dpif *dpif OVS_UNUSED, void *state_) static int dpif_netdev_execute(struct dpif *dpif, + const struct nlattr *k OVS_UNUSED, size_t k_len OVS_UNUSED, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *packet) { diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 91074d5..e0d9151 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -283,9 +283,14 @@ struct dpif_class { int (*flow_dump_done)(const struct dpif *dpif, void *state); /* Performs the 'actions_len' bytes of actions in 'actions' on the Ethernet - * frame specified in 'packet'. */ - int (*execute)(struct dpif *dpif, const struct nlattr *actions, - size_t actions_len, const struct ofpbuf *packet); + * frame specified in 'packet' taken from the flow specified in the + * 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but + * it contains some metadata that cannot be recovered from 'packet', such + * as tun_id and in_port.) */ + int (*execute)(struct dpif *dpif, + const struct nlattr *key, size_t key_len, + const struct nlattr *actions, size_t actions_len, + const struct ofpbuf *packet); /* Retrieves 'dpif''s "listen mask" into '*listen_mask'. A 1-bit of value * 2**X set in '*listen_mask' indicates that 'dpif' will receive messages diff --git a/lib/dpif.c b/lib/dpif.c index a754613..649436e 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -913,11 +913,15 @@ dpif_flow_dump_done(struct dpif_flow_dump *dump) } /* Causes 'dpif' to perform the 'actions_len' bytes of actions in 'actions' on - * the Ethernet frame specified in 'packet'. + * the Ethernet frame specified in 'packet' taken from the flow specified in + * the 'key_len' bytes of 'key'. ('key' is mostly redundant with 'packet', but + * it contains some metadata that cannot be recovered from 'packet', such as + * tun_id and in_port.) * * Returns 0 if successful, otherwise a positive errno value. */ int dpif_execute(struct dpif *dpif, + const struct nlattr *key, size_t key_len, const struct nlattr *actions, size_t actions_len, const struct ofpbuf *buf) { @@ -925,7 +929,8 @@ dpif_execute(struct dpif *dpif, COVERAGE_INC(dpif_execute); if (actions_len > 0) { - error = dpif->dpif_class->execute(dpif, actions, actions_len, buf); + error = dpif->dpif_class->execute(dpif, key, key_len, + actions, actions_len, buf); } else { error = 0; } diff --git a/lib/dpif.h b/lib/dpif.h index 8872a2e..f0f9801 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -146,8 +146,10 @@ bool dpif_flow_dump_next(struct dpif_flow_dump *, const struct dpif_flow_stats **); int dpif_flow_dump_done(struct dpif_flow_dump *); -int dpif_execute(struct dpif *, const struct nlattr *actions, - size_t actions_len, const struct ofpbuf *); +int dpif_execute(struct dpif *, + const struct nlattr *key, size_t key_len, + const struct nlattr *actions, size_t actions_len, + const struct ofpbuf *); enum dpif_upcall_type { DPIF_UC_MISS, /* Miss in flow table. */ diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index aac9edd..6ba0030 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -1464,17 +1464,22 @@ ofproto_send_packet(struct ofproto *p, const struct flow *flow, const union ofp_action *actions, size_t n_actions, const struct ofpbuf *packet) { + struct ofpbuf key, *odp_actions; + struct odputil_keybuf keybuf; struct action_xlate_ctx ctx; - struct ofpbuf *odp_actions; action_xlate_ctx_init(&ctx, p, flow, packet); /* Always xlate packets originated in this function. */ ctx.check_special = false; odp_actions = xlate_actions(&ctx, actions, n_actions); + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, flow); + /* XXX Should we translate the dpif_execute() errno value into an OpenFlow * error code? */ - dpif_execute(p->dpif, odp_actions->data, odp_actions->size, packet); + dpif_execute(p->dpif, key.data, key.size, + odp_actions->data, odp_actions->size, packet); ofpbuf_delete(odp_actions); @@ -2132,9 +2137,15 @@ execute_odp_actions(struct ofproto *ofproto, const struct flow *flow, return true; } else { + struct odputil_keybuf keybuf; + struct ofpbuf key; int error; - error = dpif_execute(ofproto->dpif, odp_actions, actions_len, packet); + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, flow); + + error = dpif_execute(ofproto->dpif, key.data, key.size, + odp_actions, actions_len, packet); ofpbuf_delete(packet); return !error; } @@ -3209,8 +3220,9 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) { struct ofproto *p = ofconn->ofproto; struct ofp_packet_out *opo; - struct ofpbuf payload, *buffer; + struct ofpbuf payload, *buffer, key; union ofp_action *ofp_actions; + struct odputil_keybuf keybuf; struct action_xlate_ctx ctx; struct ofpbuf *odp_actions; struct ofpbuf request; @@ -3258,10 +3270,14 @@ handle_packet_out(struct ofconn *ofconn, const struct ofp_header *oh) goto exit; } + ofpbuf_use_stack(&key, &keybuf, sizeof keybuf); + odp_flow_key_from_flow(&key, &flow); + /* Send. */ action_xlate_ctx_init(&ctx, p, &flow, &payload); odp_actions = xlate_actions(&ctx, ofp_actions, n_ofp_actions); - dpif_execute(p->dpif, odp_actions->data, odp_actions->size, &payload); + dpif_execute(p->dpif, key.data, key.size, + odp_actions->data, odp_actions->size, &payload); ofpbuf_delete(odp_actions); exit: @@ -4463,8 +4479,8 @@ handle_miss_upcall(struct ofproto *p, struct dpif_upcall *upcall) ofpbuf_init(&odp_actions, 32); nl_msg_put_u32(&odp_actions, ODP_ACTION_ATTR_OUTPUT, ODPP_LOCAL); - dpif_execute(p->dpif, odp_actions.data, odp_actions.size, - upcall->packet); + dpif_execute(p->dpif, upcall->key, upcall->key_len, + odp_actions.data, odp_actions.size, upcall->packet); ofpbuf_uninit(&odp_actions); } -- 1.7.1 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev_openvswitch.org