From: Justin Pettit <jpet...@nicira.com> An RFC to get back early feedback on exposing Linux's kernel connection tracker to OVS. The code has a few rough spots that will be addressed in the next version:
- Need better interface than setting individual flags for the state. - Need support for IP frags. - Need support for zones. - Should have ability to match on "invalid" connection states. - Should only allow conntrack() if conn_state is 0x00 to prevent loops. I'd be interested in hearing back suggestion on improvements other than those mentioned above. Here's a simple example flow table to allow outbound TCP traffic from port 1; drop traffic from port 2 that was not initiated by port 1: ovs-ofctl add-flow br0 \ "in_port=1,conn_state=0x00/0x80,tcp,action=conntrack(zone=0),normal" ovs-ofctl add-flow br0 \ "in_port=2,conn_state=0x00/0x80,tcp,action=conntrack(flags=1,zone=0)" ovs-ofctl add-flow br0 in_port=2,conn_state=0x82/0x83,tcp,action=1 ovs-ofctl add-flow br0 in_port=2,conn_state=0x81/0x83,tcp,action=drop ovs-ofctl add-flow br0 priority=10,action=normal --- datapath/actions.c | 35 +++++++ datapath/datapath.c | 22 +++- datapath/flow.c | 25 +++++ datapath/flow.h | 8 ++ datapath/flow_netlink.c | 117 ++++++++++++++++++++-- datapath/flow_netlink.h | 3 +- datapath/linux/compat/include/linux/openvswitch.h | 15 +++ include/openflow/nicira-ext.h | 22 ++++ lib/dpif-netdev.c | 1 + lib/dpif.c | 1 + lib/flow.c | 63 ++++++++++-- lib/flow.h | 9 +- lib/match.c | 40 +++++++- lib/match.h | 3 + lib/meta-flow.c | 36 +++++++ lib/meta-flow.h | 1 + lib/nx-match.c | 6 +- lib/odp-execute.c | 13 +++ lib/odp-util.c | 62 ++++++++++++ lib/odp-util.h | 5 +- lib/ofp-actions.c | 90 +++++++++++++++++ lib/ofp-actions.h | 18 ++++ lib/ofp-print.c | 4 + lib/ofp-util.c | 7 +- lib/packets.h | 1 + ofproto/ofproto-dpif-xlate.c | 27 ++++- ofproto/ofproto-unixctl.man | 2 + tests/dpif-netdev.at | 16 +-- tests/odp.at | 20 ++-- tests/ofproto-dpif.at | 18 ++-- tests/ofproto.at | 5 +- utilities/ovs-ofctl.8.in | 20 ++++ 32 files changed, 654 insertions(+), 61 deletions(-) diff --git a/datapath/actions.c b/datapath/actions.c index b527cb6..beed5d8 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -32,6 +32,7 @@ #include <net/ipv6.h> #include <net/checksum.h> #include <net/dsfield.h> +#include <net/netfilter/nf_conntrack_core.h> #include <net/sctp/checksum.h> #include "datapath.h" @@ -743,6 +744,36 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, key->ovs_flow_hash = hash; } +static int conntrack(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + const struct ovs_conntrack_info *info) +{ + int nh_ofs = skb_network_offset(skb); + struct net *net = ovs_dp_get_net(dp); + + if (skb->nfct) { + pr_warn_once("Attempt to run through conntrack again\n"); + return 0; + } + + /* The conntrack module expects to be working at L3. */ + skb_pull(skb, nh_ofs); + + /* xxx What's the best return val? */ + if (nf_conntrack_in(net, PF_INET, NF_INET_PRE_ROUTING, skb) != NF_ACCEPT) + return EINVAL; + + if (nf_conntrack_confirm(skb) != NF_ACCEPT) + return EINVAL; + + /* Point back to L2, which OVS expects. */ + skb_push(skb, nh_ofs); + + key->phy.conn_state = ovs_map_nfctinfo(skb); + + return 0; +} + static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key, const struct nlattr *nested_attr) { @@ -913,6 +944,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_SAMPLE: err = sample(dp, skb, key, a); break; + + case OVS_ACTION_ATTR_CONNTRACK: + err = conntrack(dp, skb, key, nla_data(a)); + break; } if (unlikely(err)) { diff --git a/datapath/datapath.c b/datapath/datapath.c index 61d6c0f..60a5b93 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -265,8 +265,16 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) stats = this_cpu_ptr(dp->stats_percpu); /* Look up flow. */ + /* xxx Are we better off resetting the SKB hash, since we've changed + * xxx the value of a field? Will we always have collision for packets + * xxx that only vary based on the conn_state? */ +#if 0 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), &n_mask_hit); +#else + /* xxx Gross, clearing hash. */ + flow = ovs_flow_tbl_lookup_stats(&dp->table, key, 0, &n_mask_hit); +#endif if (unlikely(!flow)) { struct dp_upcall_info upcall; int error; @@ -527,6 +535,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct ethhdr *eth; struct vport *input_vport; + struct net *net = sock_net(skb->sk); int len; int err; @@ -566,7 +575,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], + err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS], &flow->key, &acts); if (err) goto err_flow_free; @@ -858,6 +867,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *acts; struct sw_flow_match match; + struct net *net = sock_net(skb->sk); int error; /* Must have key and actions. */ @@ -889,7 +899,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); /* Validate actions. */ - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, + error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); @@ -986,7 +996,8 @@ error: } /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ -static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, +static struct sw_flow_actions *get_flow_actions(struct net *net, + const struct nlattr *a, const struct sw_flow_key *key, const struct sw_flow_mask *mask) { @@ -995,7 +1006,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, int error; ovs_flow_mask_key(&masked_key, key, mask); - error = ovs_nla_copy_actions(a, &masked_key, &acts); + error = ovs_nla_copy_actions(net, a, &masked_key, &acts); if (error) { OVS_NLERR("Actions may not be safe on all matching packets.\n"); return ERR_PTR(error); @@ -1015,6 +1026,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct sw_flow_actions *old_acts = NULL, *acts = NULL; struct sw_flow_match match; + struct net *net = sock_net(skb->sk); int error; /* Extract key. */ @@ -1032,7 +1044,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) /* Validate actions. */ if (a[OVS_FLOW_ATTR_ACTIONS]) { - acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask); + acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &key, &mask); if (IS_ERR(acts)) { error = PTR_ERR(acts); goto error; diff --git a/datapath/flow.c b/datapath/flow.c index a69f4e1..0fd7c21 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -42,6 +42,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ndisc.h> +#include <linux/netfilter/nf_conntrack_common.h> #include "datapath.h" #include "flow.h" @@ -677,6 +678,29 @@ int ovs_flow_key_update(struct sk_buff *skb, struct sw_flow_key *key) return key_extract(skb, key); } +/* Map SKB connection state into the values used by flow definition. */ +u8 ovs_map_nfctinfo(struct sk_buff *skb) +{ + if (!skb->nfct) + return 0; + + /* xxx This should use #defines instead of numbers. */ + if (skb->nfctinfo == IP_CT_ESTABLISHED) + return 0x82; + else if (skb->nfctinfo == IP_CT_RELATED) + return 0x84; + else if (skb->nfctinfo == IP_CT_NEW) + return 0x81; + else if (skb->nfctinfo == IP_CT_ESTABLISHED_REPLY) + return 0xc2; + else if (skb->nfctinfo == IP_CT_RELATED_REPLY) + return 0xc4; + else if (skb->nfctinfo == IP_CT_NEW_REPLY) + return 0xc1; + else + return 0x80; +} + int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) @@ -703,6 +727,7 @@ int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, key->phy.priority = skb->priority; key->phy.in_port = OVS_CB(skb)->input_vport->port_no; key->phy.skb_mark = skb->mark; + key->phy.conn_state = ovs_map_nfctinfo(skb); key->ovs_flow_hash = 0; key->recirc_id = 0; diff --git a/datapath/flow.h b/datapath/flow.h index eb9246a..2b26232 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -34,6 +34,7 @@ #include <net/inet_ecn.h> #include <net/ip_tunnels.h> +#include <net/netfilter/nf_conntrack.h> struct sk_buff; @@ -118,6 +119,10 @@ static inline void ovs_flow_tun_info_init(struct ovs_tunnel_info *tun_info, opts, opts_len); } +struct ovs_conntrack_info { + u16 zone; +}; + #define OVS_SW_FLOW_KEY_METADATA_SIZE \ (offsetof(struct sw_flow_key, recirc_id) + \ FIELD_SIZEOF(struct sw_flow_key, recirc_id)) @@ -130,6 +135,7 @@ struct sw_flow_key { u32 priority; /* Packet QoS priority. */ u32 skb_mark; /* SKB mark. */ u16 in_port; /* Input switch port (or DP_MAX_PORTS). */ + u8 conn_state; /* Connection state. */ } __packed phy; /* Safe when right after 'tun_key'. */ u32 ovs_flow_hash; /* Datapath computed hash value. */ u32 recirc_id; /* Recirculation ID. */ @@ -252,6 +258,8 @@ void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *, void ovs_flow_stats_clear(struct sw_flow *); u64 ovs_flow_used_time(unsigned long flow_jiffies); +u8 ovs_map_nfctinfo(struct sk_buff *skb); + int ovs_flow_key_extract(const struct ovs_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key); /* Extract key from packet coming from userspace. */ diff --git a/datapath/flow_netlink.c b/datapath/flow_netlink.c index ecb6631..3c57ead 100644 --- a/datapath/flow_netlink.c +++ b/datapath/flow_netlink.c @@ -267,7 +267,7 @@ size_t ovs_key_attr_size(void) { /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 23); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -276,6 +276,7 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ + + nla_total_size(1) /* OVS_KEY_ATTR_CONN_STATE */ + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -292,6 +293,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_PRIORITY] = sizeof(u32), [OVS_KEY_ATTR_IN_PORT] = sizeof(u32), [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32), + [OVS_KEY_ATTR_CONN_STATE] = sizeof(u8), [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet), [OVS_KEY_ATTR_VLAN] = sizeof(__be16), [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16), @@ -663,6 +665,13 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, return -EINVAL; *attrs &= ~(1ULL << OVS_KEY_ATTR_TUNNEL); } + + if (*attrs & (1ULL << OVS_KEY_ATTR_CONN_STATE)) { + uint8_t conn_state = nla_get_u8(a[OVS_KEY_ATTR_CONN_STATE]); + + SW_FLOW_KEY_PUT(match, phy.conn_state, conn_state, is_mask); + *attrs &= ~(1ULL << OVS_KEY_ATTR_CONN_STATE); + } return 0; } @@ -1146,6 +1155,9 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) goto nla_put_failure; + if (nla_put_u8(skb, OVS_KEY_ATTR_CONN_STATE, output->phy.conn_state)) + goto nla_put_failure; + nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); if (!nla) goto nla_put_failure; @@ -1447,12 +1459,64 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, a->nla_len = sfa->actions_len - st_offset; } -static int __ovs_nla_copy_actions(const struct nlattr *attr, +static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci); -static int validate_and_copy_sample(const struct nlattr *attr, +static int validate_and_copy_conntrack(struct net *net, + const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa) +{ + struct ovs_conntrack_info ct_info; + struct nf_conntrack_tuple t; + struct nlattr *a; + int rem; + + memset(&ct_info, 0, sizeof(ct_info)); + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + static const u32 ovs_ct_attr_lens[OVS_CT_ATTR_MAX + 1] = { + [OVS_CT_ATTR_ZONE] = sizeof(u16), + }; + + if (type > OVS_CT_ATTR_MAX) { + OVS_NLERR("Unknown conntrack attribute (type=%d, max=%d).\n", + type, OVS_CT_ATTR_MAX); + return -EINVAL; + } + + if (ovs_ct_attr_lens[type] != nla_len(a) && + ovs_ct_attr_lens[type] != -1) { + OVS_NLERR("Conntrack attribute type has unexpected " + " length (type=%d, length=%d, expected=%d).\n", + type, nla_len(a), ovs_ct_attr_lens[type]); + return -EINVAL; + } + + switch (type) { + case OVS_CT_ATTR_ZONE: + memset(&t, 0, sizeof(t)); + ct_info.zone = nla_get_u16(a); + break; + default: + OVS_NLERR("Unknown conntrack attribute (%d).\n", type); + return -EINVAL; + } + } + + if (rem > 0) { + OVS_NLERR("Conntrack attribute has %d unknown bytes.\n", rem); + return -EINVAL; + } + + return add_action(sfa, OVS_ACTION_ATTR_CONNTRACK, &ct_info, + sizeof(ct_info)); +} + +static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci) @@ -1492,7 +1556,7 @@ static int validate_and_copy_sample(const struct nlattr *attr, if (st_acts < 0) return st_acts; - err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, + err = __ovs_nla_copy_actions(net, actions, key, depth + 1, sfa, eth_type, vlan_tci); if (err) return err; @@ -1622,6 +1686,7 @@ static int validate_set(const struct nlattr *a, case OVS_KEY_ATTR_PRIORITY: case OVS_KEY_ATTR_SKB_MARK: + case OVS_KEY_ATTR_CONN_STATE: case OVS_KEY_ATTR_ETHERNET: break; @@ -1733,7 +1798,7 @@ static int copy_action(const struct nlattr *from, return 0; } -static int __ovs_nla_copy_actions(const struct nlattr *attr, +static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci) @@ -1756,7 +1821,8 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, - [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash) + [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), + [OVS_ACTION_ATTR_CONNTRACK] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -1855,13 +1921,20 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, break; case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(a, key, depth, sfa, + err = validate_and_copy_sample(net, a, key, depth, sfa, eth_type, vlan_tci); if (err) return err; skip_copy = true; break; + case OVS_ACTION_ATTR_CONNTRACK: + err = validate_and_copy_conntrack(net, a, key, sfa); + if (err) + return err; + skip_copy = true; + break; + default: return -EINVAL; } @@ -1878,7 +1951,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr, return 0; } -int ovs_nla_copy_actions(const struct nlattr *attr, +int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa) { @@ -1888,7 +1961,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr, if (IS_ERR(*sfa)) return PTR_ERR(*sfa); - err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, + err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, key->eth.tci); if (err) ovs_nla_free_flow_actions(*sfa); @@ -1965,6 +2038,25 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) return 0; } +static int conntrack_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) +{ + struct ovs_conntrack_info *info; + struct nlattr *start; + + start = nla_nest_start(skb, OVS_ACTION_ATTR_CONNTRACK); + if (!start) + return -EMSGSIZE; + + info = nla_data(attr); + + if (nla_put_u16(skb, OVS_CT_ATTR_ZONE, info->zone)) + return -EMSGSIZE; + + nla_nest_end(skb, start); + + return 0; +} + int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) { const struct nlattr *a; @@ -1985,6 +2077,13 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) if (err) return err; break; + + case OVS_ACTION_ATTR_CONNTRACK: + err = conntrack_action_to_attr(a, skb); + if (err) + return err; + break; + default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; diff --git a/datapath/flow_netlink.h b/datapath/flow_netlink.h index d1802c4..322c7fd 100644 --- a/datapath/flow_netlink.h +++ b/datapath/flow_netlink.h @@ -53,12 +53,13 @@ int ovs_nla_get_match(struct sw_flow_match *match, int ovs_nla_put_egress_tunnel_key(struct sk_buff *, const struct ovs_tunnel_info *); -int ovs_nla_copy_actions(const struct nlattr *attr, +int ovs_nla_copy_actions(struct net*net, const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa); int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); +void __ovs_nla_free_flow_actions(struct sw_flow_actions *); void ovs_nla_free_flow_actions(struct sw_flow_actions *); void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); diff --git a/datapath/linux/compat/include/linux/openvswitch.h b/datapath/linux/compat/include/linux/openvswitch.h index 306ea86..f3654de 100644 --- a/datapath/linux/compat/include/linux/openvswitch.h +++ b/datapath/linux/compat/include/linux/openvswitch.h @@ -329,6 +329,7 @@ enum ovs_key_attr { OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls. * The implementation may restrict * the accepted length of the array. */ + OVS_KEY_ATTR_CONN_STATE,/* u8 conn state */ #ifdef __KERNEL__ /* Only used within kernel data path. */ @@ -579,6 +580,18 @@ struct ovs_action_hash { }; /** + * enum ovs_conntrack_attr - Attributes for %OVS_ACTION_ATTR_CONNTRACK action. + * @OVS_CT_ATTR_ZONE: u16 connection tracking zone. + */ +enum ovs_conntrack_attr { + OVS_CT_ATTR_UNSPEC, + OVS_CT_ATTR_ZONE, + __OVS_CT_ATTR_MAX +}; + +#define OVS_CT_ATTR_MAX (__OVS_CT_ATTR_MAX - 1) + +/** * enum ovs_action_attr - Action types. * * @OVS_ACTION_ATTR_OUTPUT: Output packet to port. @@ -609,6 +622,7 @@ struct ovs_action_hash { * indicate the new packet contents. This could potentially still be * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there * is no MPLS label stack, as determined by ethertype, no action is taken. + * @OVS_ACTION_ATTR_CONNTRACK: Track the connection. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -631,6 +645,7 @@ enum ovs_action_attr { * data immediately followed by a mask. * The data must be zero for the unmasked * bits. */ + OVS_ACTION_ATTR_CONNTRACK, /* One nested OVS_CT_ATTR_* */ __OVS_ACTION_ATTR_MAX }; diff --git a/include/openflow/nicira-ext.h b/include/openflow/nicira-ext.h index bbf3388..57f0133 100644 --- a/include/openflow/nicira-ext.h +++ b/include/openflow/nicira-ext.h @@ -968,6 +968,28 @@ OFP_ASSERT(sizeof(struct nx_async_config) == 24); * Masking: not maskable. */ #define NXM_NX_RECIRC_ID NXM_HEADER (0x0001, 36, 4) +/* Connection tracking state. + * + * The connection tracking state is populated by the NXAST_CONNTRACK + * action. The following flags are defined: + * + * - CONN_STATE_TRACKED (0x80): Connection tracking has occurred. + * - CONN_STATE_REPLY (0x40): This flow did not initiate the connection. + * + * The following values describe the state of the connection: + * + * - New (0x01): This is the beginning of a new connection. + * - Established (0x02): This is part of an already existing connection. + * - Related (0x04): This is a new connection that is "expected". + * + * Prereqs: None. + * + * Format: 8-bit fully maskable + * + * Masking: Fully maskable. */ +#define NXM_NX_CONN_STATE NXM_HEADER (0x0001, 37, 1) +#define NXM_NX_CONN_STATE_W NXM_HEADER_W(0x0001, 37, 1) + /* ## --------------------- ## */ /* ## Requests and replies. ## */ /* ## --------------------- ## */ diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 6b8201b..82dbfd0 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2907,6 +2907,7 @@ dp_execute_cb(void *aux_, struct dpif_packet **packets, int cnt, case OVS_ACTION_ATTR_SET_MASKED: case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_UNSPEC: + case OVS_ACTION_ATTR_CONNTRACK: case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); } diff --git a/lib/dpif.c b/lib/dpif.c index bdefdcc..bdb0564 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1044,6 +1044,7 @@ dpif_execute_helper_cb(void *aux_, struct dpif_packet **packets, int cnt, case OVS_ACTION_ATTR_SET_MASKED: case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_UNSPEC: + case OVS_ACTION_ATTR_CONNTRACK: case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); } diff --git a/lib/flow.c b/lib/flow.c index b9f1820..0a5b010 100644 --- a/lib/flow.c +++ b/lib/flow.c @@ -70,11 +70,12 @@ BUILD_ASSERT_DECL(offsetof(struct flow, nw_frag) + 3 offsetof(struct flow, nw_proto) / 4 == offsetof(struct flow, nw_tos) / 4); -/* TCP flags in the first half of a BE32, zeroes in the other half. */ +/* TCP flags in the first half of a BE32, 'conn_state' and pad in the + * other half. */ BUILD_ASSERT_DECL(offsetof(struct flow, tcp_flags) + 2 - == offsetof(struct flow, pad) && + == offsetof(struct flow, conn_state) && offsetof(struct flow, tcp_flags) / 4 - == offsetof(struct flow, pad) / 4); + == offsetof(struct flow, conn_state) / 4); #if WORDS_BIGENDIAN #define TCP_FLAGS_BE32(tcp_ctl) ((OVS_FORCE ovs_be32)TCP_FLAGS_BE16(tcp_ctl) \ << 16) @@ -121,7 +122,7 @@ struct mf_ctx { * away. Some GCC versions gave warnings on ALWAYS_INLINE, so these are * defined as macros. */ -#if (FLOW_WC_SEQ != 27) +#if (FLOW_WC_SEQ != 28) #define MINIFLOW_ASSERT(X) ovs_assert(X) BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime " "assertions enabled. Consider updating FLOW_WC_SEQ after " @@ -141,6 +142,19 @@ BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime " #define miniflow_push_be32_(MF, OFS, VALUE) \ miniflow_push_uint32_(MF, OFS, (OVS_FORCE uint32_t)(VALUE)) +/* xxx Possibly clean this up. Assert if another value has been pushed. */ +/* Caller must have previously called a miniflow_push_* macro for "OFS" + * with no other push calls in between. */ +#define miniflow_update_uint32_(MF, OFS, VALUE, MASK) \ +{ \ + MINIFLOW_ASSERT(MF.data < MF.end && (OFS) % 4 == 0); \ + *(MF.data-1) |= (VALUE & MASK); \ +} + +#define miniflow_update_be32_(MF, OFS, VALUE, MASK) \ + miniflow_update_uint32_(MF, OFS, (OVS_FORCE uint32_t)(VALUE), \ + (OVS_FORCE uint32_t)(MASK)) + #define miniflow_push_uint16_(MF, OFS, VALUE) \ { \ MINIFLOW_ASSERT(MF.data < MF.end && \ @@ -191,6 +205,12 @@ BUILD_MESSAGE("FLOW_WC_SEQ changed: miniflow_extract() will have runtime " } \ } +#define miniflow_update_uint32(MF, FIELD, VALUE, MASK) \ + miniflow_update_uint32_(MF, offsetof(struct flow, FIELD), VALUE, MASK) + +#define miniflow_update_be32(MF, FIELD, VALUE, MASK) \ + miniflow_update_be32_(MF, offsetof(struct flow, FIELD), VALUE, MASK) + #define miniflow_push_uint16(MF, FIELD, VALUE) \ miniflow_push_uint16_(MF, offsetof(struct flow, FIELD), VALUE) @@ -573,13 +593,28 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, miniflow_push_be32(mf, nw_frag, BYTES_TO_BE32(nw_frag, nw_tos, nw_ttl, nw_proto)); + /* xxx This is hacky to get around ICMPv6 issues. */ + if ((nw_frag & FLOW_NW_FRAG_LATER) || (nw_proto != IPPROTO_ICMPV6)) { + if (md) { + /* xxx Can't be use _check() version, since state may be 0 */ + miniflow_push_be32(mf, tcp_flags, + BYTES_TO_BE32(0, 0, md->conn_state, 0)); + } else { + /* xxx Hack so tcp_flags always has pushed entry */ + miniflow_push_be32(mf, tcp_flags, + BYTES_TO_BE32(0, 0, 0, 0)); + } + } + if (OVS_LIKELY(!(nw_frag & FLOW_NW_FRAG_LATER))) { if (OVS_LIKELY(nw_proto == IPPROTO_TCP)) { if (OVS_LIKELY(size >= TCP_HEADER_LEN)) { const struct tcp_header *tcp = data; - miniflow_push_be32(mf, tcp_flags, - TCP_FLAGS_BE32(tcp->tcp_ctl)); + miniflow_update_be32(mf, tcp_flags, + TCP_FLAGS_BE32(tcp->tcp_ctl), + htonl(0xffff0000)); + miniflow_push_words(mf, tp_src, &tcp->tcp_src, 1); } } else if (OVS_LIKELY(nw_proto == IPPROTO_UDP)) { @@ -625,6 +660,17 @@ miniflow_extract(struct ofpbuf *packet, const struct pkt_metadata *md, miniflow_push_words(mf, nd_target, nd_target, sizeof *nd_target / 4); } + /* xxx This is gross. */ + if (md) { + /* xxx Can't be use _check() version, since + * xxx state may be 0 */ + miniflow_push_be32(mf, tcp_flags, + BYTES_TO_BE32(0, 0, md->conn_state, 0)); + } else { + /* xxx Hack so tcp_flags always has pushed entry */ + miniflow_push_be32(mf, tcp_flags, + BYTES_TO_BE32(0, 0, 0, 0)); + } miniflow_push_be16(mf, tp_src, htons(icmp->icmp6_type)); miniflow_push_be16(mf, tp_dst, htons(icmp->icmp6_code)); } @@ -668,7 +714,7 @@ flow_unwildcard_tp_ports(const struct flow *flow, struct flow_wildcards *wc) void flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); fmd->dp_hash = flow->dp_hash; fmd->recirc_id = flow->recirc_id; @@ -678,6 +724,7 @@ flow_get_metadata(const struct flow *flow, struct flow_metadata *fmd) fmd->metadata = flow->metadata; memcpy(fmd->regs, flow->regs, sizeof fmd->regs); fmd->pkt_mark = flow->pkt_mark; + fmd->conn_state = flow->conn_state; fmd->in_port = flow->in_port.ofp_port; } @@ -1338,7 +1385,7 @@ flow_push_mpls(struct flow *flow, int n, ovs_be16 mpls_eth_type, flow->mpls_lse[0] = set_mpls_lse_values(ttl, tc, 1, htonl(label)); /* Clear all L3 and L4 fields. */ - BUILD_ASSERT(FLOW_WC_SEQ == 27); + BUILD_ASSERT(FLOW_WC_SEQ == 28); memset((char *) flow + FLOW_SEGMENT_2_ENDS_AT, 0, sizeof(struct flow) - FLOW_SEGMENT_2_ENDS_AT); } diff --git a/lib/flow.h b/lib/flow.h index 2764916..a985ca9 100644 --- a/lib/flow.h +++ b/lib/flow.h @@ -38,7 +38,7 @@ struct pkt_metadata; /* This sequence number should be incremented whenever anything involving flows * or the wildcarding of flows changes. This will cause build assertion * failures in places which likely need to be updated. */ -#define FLOW_WC_SEQ 27 +#define FLOW_WC_SEQ 28 /* Number of Open vSwitch extension 32-bit registers. */ #define FLOW_N_REGS 8 @@ -124,7 +124,8 @@ struct flow { uint8_t arp_tha[6]; /* ARP/ND target hardware address. */ struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */ ovs_be16 tcp_flags; /* TCP flags. With L3 to avoid matching L4. */ - ovs_be16 pad; /* Padding. */ + uint8_t conn_state ; /* Connection state. */ + uint8_t pad; /* Padding. */ /* L4 */ ovs_be16 tp_src; /* TCP/UDP/SCTP source port. */ @@ -141,7 +142,7 @@ BUILD_ASSERT_DECL(sizeof(struct flow) % 4 == 0); /* Remember to update FLOW_WC_SEQ when changing 'struct flow'. */ BUILD_ASSERT_DECL(offsetof(struct flow, dp_hash) + sizeof(uint32_t) == sizeof(struct flow_tnl) + 176 - && FLOW_WC_SEQ == 27); + && FLOW_WC_SEQ == 28); /* Incremental points at which flow classification may be performed in * segments. @@ -174,6 +175,7 @@ struct flow_metadata { ovs_be64 metadata; /* OpenFlow 1.1+ metadata field. */ uint32_t regs[FLOW_N_REGS]; /* Registers. */ uint32_t pkt_mark; /* Packet mark. */ + uint8_t conn_state; /* Connection state. */ ofp_port_t in_port; /* OpenFlow port or zero. */ }; @@ -669,6 +671,7 @@ pkt_metadata_from_flow(const struct flow *flow) md.skb_priority = flow->skb_priority; md.pkt_mark = flow->pkt_mark; md.in_port = flow->in_port; + md.conn_state = flow->conn_state; return md; } diff --git a/lib/match.c b/lib/match.c index c4edbfb..cba569c 100644 --- a/lib/match.c +++ b/lib/match.c @@ -60,6 +60,10 @@ match_wc_init(struct match *match, const struct flow *flow) memset(&wc->masks.pkt_mark, 0xff, sizeof wc->masks.pkt_mark); } + if (flow->conn_state) { + memset(&wc->masks.conn_state, 0xff, sizeof wc->masks.conn_state); + } + for (i = 0; i < FLOW_N_REGS; i++) { if (flow->regs[i]) { memset(&wc->masks.regs[i], 0xff, sizeof wc->masks.regs[i]); @@ -335,6 +339,20 @@ match_set_pkt_mark_masked(struct match *match, uint32_t pkt_mark, uint32_t mask) } void +match_set_conn_state(struct match *match, uint8_t conn_state) +{ + match_set_conn_state_masked(match, conn_state, UINT8_MAX); +} + +void +match_set_conn_state_masked(struct match *match, uint8_t conn_state, + uint8_t mask) +{ + match->flow.conn_state = conn_state & mask; + match->wc.masks.conn_state = mask; +} + +void match_set_dl_type(struct match *match, ovs_be16 dl_type) { match->wc.masks.dl_type = OVS_BE16_MAX; @@ -867,6 +885,19 @@ format_ipv6_netmask(struct ds *s, const char *name, } static void +format_uint8_masked(struct ds *s, const char *name, + uint8_t value, uint8_t mask) +{ + if (mask) { + ds_put_format(s, "%s=%#"PRIx8, name, value); + if (mask != UINT8_MAX) { + ds_put_format(s, "/%#"PRIx8, mask); + } + ds_put_char(s, ','); + } +} + +static void format_be16_masked(struct ds *s, const char *name, ovs_be16 value, ovs_be16 mask) { @@ -959,7 +990,7 @@ match_format(const struct match *match, struct ds *s, unsigned int priority) int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); if (priority != OFP_DEFAULT_PRIORITY) { ds_put_format(s, "priority=%u,", priority); @@ -981,6 +1012,13 @@ match_format(const struct match *match, struct ds *s, unsigned int priority) ds_put_format(s, "skb_priority=%#"PRIx32",", f->skb_priority); } + if (wc->masks.conn_state) { + /* xxx Spell out the flags? To be prettier? */ + /* xxx If pretty print, remove format_uint8_masked(). */ + format_uint8_masked(s, "conn_state", f->conn_state, + wc->masks.conn_state); + } + if (wc->masks.dl_type) { skip_type = true; if (f->dl_type == htons(ETH_TYPE_IP)) { diff --git a/lib/match.h b/lib/match.h index ce9fb28..b8e3745 100644 --- a/lib/match.h +++ b/lib/match.h @@ -71,6 +71,9 @@ void match_set_tun_flags_masked(struct match *match, uint16_t flags, uint16_t ma void match_set_in_port(struct match *, ofp_port_t ofp_port); void match_set_pkt_mark(struct match *, uint32_t pkt_mark); void match_set_pkt_mark_masked(struct match *, uint32_t pkt_mark, uint32_t mask); +void match_set_conn_state(struct match *, uint8_t conn_state); +void match_set_conn_state_masked(struct match *, uint8_t conn_state, + uint8_t mask); void match_set_skb_priority(struct match *, uint32_t skb_priority); void match_set_dl_type(struct match *, ovs_be16); void match_set_dl_src(struct match *, const uint8_t[6]); diff --git a/lib/meta-flow.c b/lib/meta-flow.c index 3b82e62..e81a49f 100644 --- a/lib/meta-flow.c +++ b/lib/meta-flow.c @@ -207,6 +207,18 @@ const struct mf_field mf_fields[MFF_N_IDS] = { OFPUTIL_P_NXM_OXM_ANY, OFPUTIL_P_NXM_OXM_ANY, -1, + }, { + MFF_CONN_STATE, "conn_state", NULL, + MF_FIELD_SIZES(u8), + MFM_FULLY, + MFS_HEXADECIMAL, + MFP_NONE, + true, + NXM_NX_CONN_STATE, "NXM_NX_CONN_STATE", + NXM_NX_CONN_STATE, "NXM_NX_CONN_STATE", 0, + OFPUTIL_P_NXM_OXM_ANY, + OFPUTIL_P_NXM_OXM_ANY, + -1, }, #define REGISTER(IDX) \ @@ -943,6 +955,8 @@ mf_is_all_wild(const struct mf_field *mf, const struct flow_wildcards *wc) return !wc->masks.skb_priority; case MFF_PKT_MARK: return !wc->masks.pkt_mark; + case MFF_CONN_STATE: + return !wc->masks.conn_state; CASE_MFF_REGS: return !wc->masks.regs[mf->id - MFF_REG0]; CASE_MFF_XREGS: @@ -1184,6 +1198,7 @@ mf_is_value_valid(const struct mf_field *mf, const union mf_value *value) case MFF_IN_PORT: case MFF_SKB_PRIORITY: case MFF_PKT_MARK: + case MFF_CONN_STATE: CASE_MFF_REGS: CASE_MFF_XREGS: case MFF_ETH_SRC: @@ -1312,6 +1327,10 @@ mf_get_value(const struct mf_field *mf, const struct flow *flow, value->be32 = htonl(flow->pkt_mark); break; + case MFF_CONN_STATE: + value->u8 = flow->conn_state; + break; + CASE_MFF_REGS: value->be32 = htonl(flow->regs[mf->id - MFF_REG0]); break; @@ -1518,6 +1537,10 @@ mf_set_value(const struct mf_field *mf, match_set_pkt_mark(match, ntohl(value->be32)); break; + case MFF_CONN_STATE: + match_set_conn_state(match, value->u8); + break; + CASE_MFF_REGS: match_set_reg(match, mf->id - MFF_REG0, ntohl(value->be32)); break; @@ -1741,6 +1764,10 @@ mf_set_flow_value(const struct mf_field *mf, flow->pkt_mark = ntohl(value->be32); break; + case MFF_CONN_STATE: + flow->conn_state = value->u8; + break; + CASE_MFF_REGS: flow->regs[mf->id - MFF_REG0] = ntohl(value->be32); break; @@ -1962,6 +1989,11 @@ mf_set_wild(const struct mf_field *mf, struct match *match) match->wc.masks.pkt_mark = 0; break; + case MFF_CONN_STATE: + match->flow.conn_state = 0; + match->wc.masks.conn_state = 0; + break; + CASE_MFF_REGS: match_set_reg_masked(match, mf->id - MFF_REG0, 0, 0); break; @@ -2203,6 +2235,10 @@ mf_set(const struct mf_field *mf, ntohl(mask->be32)); break; + case MFF_CONN_STATE: + match_set_conn_state_masked(match, value->u8, mask->u8); + break; + case MFF_ETH_DST: match_set_dl_dst_masked(match, value->mac, mask->mac); break; diff --git a/lib/meta-flow.h b/lib/meta-flow.h index c11f7ab..865ce59 100644 --- a/lib/meta-flow.h +++ b/lib/meta-flow.h @@ -46,6 +46,7 @@ enum OVS_PACKED_ENUM mf_field_id { MFF_IN_PORT_OXM, /* be32 */ MFF_SKB_PRIORITY, /* be32 */ MFF_PKT_MARK, /* be32 */ + MFF_CONN_STATE, /* u8 */ #if FLOW_N_REGS == 8 MFF_REG0, /* be32 */ diff --git a/lib/nx-match.c b/lib/nx-match.c index 05be3b5..2d879b1 100644 --- a/lib/nx-match.c +++ b/lib/nx-match.c @@ -617,7 +617,7 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, int match_len; int i; - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); /* Metadata. */ if (match->wc.masks.dp_hash) { @@ -741,6 +741,10 @@ nx_put_raw(struct ofpbuf *b, enum ofp_version oxm, const struct match *match, nxm_put_32m(b, mf_oxm_header(MFF_PKT_MARK, oxm), htonl(flow->pkt_mark), htonl(match->wc.masks.pkt_mark)); + /* Connection state. */ + nxm_put_8m(b, NXM_NX_CONN_STATE, flow->conn_state, + match->wc.masks.conn_state); + /* OpenFlow 1.1+ Metadata. */ nxm_put_64m(b, mf_oxm_header(MFF_METADATA, oxm), flow->metadata, match->wc.masks.metadata); diff --git a/lib/odp-execute.c b/lib/odp-execute.c index 78b1f24..59ce7e3 100644 --- a/lib/odp-execute.c +++ b/lib/odp-execute.c @@ -198,6 +198,10 @@ odp_execute_set_action(struct dpif_packet *packet, const struct nlattr *a, md->pkt_mark = nl_attr_get_u32(a); break; + case OVS_KEY_ATTR_CONN_STATE: + md->conn_state = nl_attr_get_u8(a); + break; + case OVS_KEY_ATTR_ETHERNET: odp_eth_set_addrs(&packet->ofpbuf, nl_attr_get(a), NULL); break; @@ -287,6 +291,11 @@ odp_execute_masked_set_action(struct dpif_packet *packet, | (md->pkt_mark & ~*get_mask(a, uint32_t)); break; + case OVS_KEY_ATTR_CONN_STATE: + md->conn_state = nl_attr_get_u8(a) + | (md->conn_state & ~*get_mask(a, uint8_t)); + break; + case OVS_KEY_ATTR_ETHERNET: odp_eth_set_addrs(&packet->ofpbuf, nl_attr_get(a), get_mask(a, struct ovs_key_ethernet)); @@ -519,6 +528,10 @@ odp_execute_actions__(void *dp, struct dpif_packet **packets, int cnt, } break; + case OVS_ACTION_ATTR_CONNTRACK: + /* xxx I don't think there's anything we can do here. */ + break; + case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: OVS_NOT_REACHED(); diff --git a/lib/odp-util.c b/lib/odp-util.c index 77e6ec5..061c8d6 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -84,6 +84,7 @@ odp_action_len(uint16_t type) case OVS_ACTION_ATTR_SET: return -2; case OVS_ACTION_ATTR_SET_MASKED: return -2; case OVS_ACTION_ATTR_SAMPLE: return -2; + case OVS_ACTION_ATTR_CONNTRACK: return -2; case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: @@ -105,6 +106,7 @@ ovs_key_attr_to_string(enum ovs_key_attr attr, char *namebuf, size_t bufsize) case OVS_KEY_ATTR_ENCAP: return "encap"; case OVS_KEY_ATTR_PRIORITY: return "skb_priority"; case OVS_KEY_ATTR_SKB_MARK: return "skb_mark"; + case OVS_KEY_ATTR_CONN_STATE: return "conn_state"; case OVS_KEY_ATTR_TUNNEL: return "tunnel"; case OVS_KEY_ATTR_IN_PORT: return "in_port"; case OVS_KEY_ATTR_ETHERNET: return "eth"; @@ -507,6 +509,23 @@ format_odp_hash_action(struct ds *ds, const struct ovs_action_hash *hash_act) } static void +format_odp_conntrack_action(struct ds *ds, const struct nlattr *attr) +{ + static const struct nl_policy ovs_conntrack_policy[] = { + [OVS_CT_ATTR_ZONE] = { .type = NL_A_U16 }, + }; + struct nlattr *a[ARRAY_SIZE(ovs_conntrack_policy)]; + + if (!nl_parse_nested(attr, ovs_conntrack_policy, a, ARRAY_SIZE(a))) { + ds_put_cstr(ds, "conntrack(error)"); + return; + } + + ds_put_format(ds, "conntrack(zone=%"PRIu16")", + nl_attr_get_u16(a[OVS_CT_ATTR_ZONE])); +} + +static void format_odp_action(struct ds *ds, const struct nlattr *a) { int expected_len; @@ -589,6 +608,10 @@ format_odp_action(struct ds *ds, const struct nlattr *a) case OVS_ACTION_ATTR_SAMPLE: format_odp_sample_action(ds, a); break; + case OVS_ACTION_ATTR_CONNTRACK: { + format_odp_conntrack_action(ds,a); + break; + } case OVS_ACTION_ATTR_UNSPEC: case __OVS_ACTION_ATTR_MAX: default: @@ -877,6 +900,21 @@ parse_odp_action(const char *s, const struct simap *port_names, } } + { + int zone; + int n = -1; + + if (ovs_scan(s, "conntrack(zone=%i)%n", &zone, &n)) { + size_t ct_ofs; + + ct_ofs = nl_msg_start_nested(actions, OVS_ACTION_ATTR_CONNTRACK); + nl_msg_put_u16(actions, OVS_CT_ATTR_ZONE, zone); + nl_msg_end_nested(actions, ct_ofs); + + return n; + } + } + return -EINVAL; } @@ -931,6 +969,7 @@ odp_flow_key_attr_len(uint16_t type) case OVS_KEY_ATTR_SKB_MARK: return 4; case OVS_KEY_ATTR_DP_HASH: return 4; case OVS_KEY_ATTR_RECIRC_ID: return 4; + case OVS_KEY_ATTR_CONN_STATE: return 1; case OVS_KEY_ATTR_TUNNEL: return -2; case OVS_KEY_ATTR_IN_PORT: return 4; case OVS_KEY_ATTR_ETHERNET: return sizeof(struct ovs_key_ethernet); @@ -1520,6 +1559,13 @@ format_odp_key_attr(const struct nlattr *a, const struct nlattr *ma, } break; + case OVS_KEY_ATTR_CONN_STATE: + ds_put_format(ds, "%#"PRIx8, nl_attr_get_u8(a)); + if (!is_exact) { + ds_put_format(ds, "/%#"PRIx8, nl_attr_get_u8(ma)); + } + break; + case OVS_KEY_ATTR_TUNNEL: { struct flow_tnl key, mask_; struct flow_tnl *mask = ma ? &mask_ : NULL; @@ -2346,6 +2392,7 @@ parse_odp_key_mask_attr(const char *s, const struct simap *port_names, SCAN_SINGLE("skb_mark(", uint32_t, u32, OVS_KEY_ATTR_SKB_MARK); SCAN_SINGLE_NO_MASK("recirc_id(", uint32_t, u32, OVS_KEY_ATTR_RECIRC_ID); SCAN_SINGLE("dp_hash(", uint32_t, u32, OVS_KEY_ATTR_DP_HASH); + SCAN_SINGLE("conn_state(", uint8_t, u8, OVS_KEY_ATTR_CONN_STATE); SCAN_BEGIN("tunnel(", struct flow_tnl) { SCAN_FIELD("tun_id=", be64, tun_id); @@ -2574,6 +2621,7 @@ odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *flow, } nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->pkt_mark); + nl_msg_put_u8(buf, OVS_KEY_ATTR_CONN_STATE, data->conn_state); if (recirc) { nl_msg_put_u32(buf, OVS_KEY_ATTR_RECIRC_ID, data->recirc_id); @@ -2771,6 +2819,9 @@ odp_key_from_pkt_metadata(struct ofpbuf *buf, const struct pkt_metadata *md) } nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, md->pkt_mark); +#if 0 + nl_msg_put_u8(buf, OVS_KEY_ATTR_CONN_STATE, md->conn_state); +#endif /* Add an ingress port attribute if 'odp_in_port' is not the magical * value "ODPP_NONE". */ @@ -2818,6 +2869,12 @@ odp_key_to_pkt_metadata(const struct nlattr *key, size_t key_len, md->pkt_mark = nl_attr_get_u32(nla); wanted_attrs &= ~(1u << OVS_KEY_ATTR_SKB_MARK); break; +#if 0 + case OVS_KEY_ATTR_CONN_STATE: + md->conn_state = nl_attr_get_u8(nla); + wanted_attrs &= ~(1u << OVS_KEY_ATTR_CONN_STATE); + break; +#endif case OVS_KEY_ATTR_TUNNEL: { enum odp_key_fitness res; @@ -3374,6 +3431,11 @@ odp_flow_key_to_flow__(const struct nlattr *key, size_t key_len, expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_SKB_MARK; } + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_CONN_STATE)) { + flow->conn_state = nl_attr_get_u8(attrs[OVS_KEY_ATTR_CONN_STATE]); + expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_CONN_STATE; + } + if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) { enum odp_key_fitness res; diff --git a/lib/odp-util.h b/lib/odp-util.h index 11b54dd..14570ce 100644 --- a/lib/odp-util.h +++ b/lib/odp-util.h @@ -118,6 +118,7 @@ void odp_portno_names_destroy(struct hmap *portno_names); * OVS_KEY_ATTR_SKB_MARK 4 -- 4 8 * OVS_KEY_ATTR_DP_HASH 4 -- 4 8 * OVS_KEY_ATTR_RECIRC_ID 4 -- 4 8 + * OVS_KEY_ATTR_CONN_STATE 1 3 4 8 * OVS_KEY_ATTR_ETHERNET 12 -- 4 16 * OVS_KEY_ATTR_ETHERTYPE 2 2 4 8 (outer VLAN ethertype) * OVS_KEY_ATTR_VLAN 2 2 4 8 @@ -127,13 +128,13 @@ void odp_portno_names_destroy(struct hmap *portno_names); * OVS_KEY_ATTR_ICMPV6 2 2 4 8 * OVS_KEY_ATTR_ND 28 -- 4 32 * ---------------------------------------------------------- - * total 488 + * total 496 * * We include some slack space in case the calculation isn't quite right or we * add another field and forget to adjust this value. */ #define ODPUTIL_FLOW_KEY_BYTES 512 -BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); +BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); /* A buffer with sufficient size and alignment to hold an nlattr-formatted flow * key. An array of "struct nlattr" might not, in theory, be sufficiently diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index 46651da..0ab8a94 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -267,6 +267,9 @@ enum ofp_raw_action_type { /* NX1.0+(29): struct nx_action_sample. */ NXAST_RAW_SAMPLE, + + /* NX1.0+(32): struct nx_action_conntrack. */ + NXAST_RAW_CONNTRACK, }; /* OpenFlow actions are always a multiple of 8 bytes in length. */ @@ -3924,6 +3927,86 @@ format_SAMPLE(const struct ofpact_sample *a, struct ds *s) a->obs_domain_id, a->obs_point_id); } +/* Action structure for NXAST_CONNTRACK. + * + * Pass traffic to the connection tracker. If 'flags' is + * NX_CONNTRACK_F_RECIRC, traffic is recirculated back to flow table + * with the NXM_NX_CONN_STATE and NXM_NX_CONN_STATE_W matches set. A + * standard "resubmit" action is not sufficient, since connection + * tracking occurs outside of the classifier. The 'zone' argument + * specifies a context within which the tracking is done. */ +struct nx_action_conntrack { + ovs_be16 type; /* OFPAT_VENDOR. */ + ovs_be16 len; /* 16. */ + ovs_be32 vendor; /* NX_VENDOR_ID. */ + ovs_be16 subtype; /* NXAST_CONNTRACK. */ + ovs_be16 flags; /* Either 0 or NX_CONNTRACK_F_RECIRC. */ + ovs_be16 zone; /* Connection tracking context. */ + uint8_t pad[2]; +}; +OFP_ASSERT(sizeof(struct nx_action_conntrack) == 16); + +static enum ofperr +decode_NXAST_RAW_CONNTRACK(const struct nx_action_conntrack *nac, + struct ofpbuf *out) +{ + struct ofpact_conntrack *conntrack; + + conntrack = ofpact_put_CONNTRACK(out); + conntrack->flags = ntohs(nac->flags); + conntrack->zone = ntohs(nac->zone); + + return 0; +} + +static void +encode_CONNTRACK(const struct ofpact_conntrack *conntrack, + enum ofp_version ofp_version OVS_UNUSED, struct ofpbuf *out) +{ + struct nx_action_conntrack *nac; + + nac = put_NXAST_CONNTRACK(out); + nac->flags = htons(conntrack->flags); + nac->zone = htons(conntrack->zone); +} + +/* Parses 'arg' as the argument to a "conntrack" action, and appends such an + * action to 'ofpacts'. + * + * Returns NULL if successful, otherwise a malloc()'d string describing the + * error. The caller is responsible for freeing the returned string. */ +static char * WARN_UNUSED_RESULT +parse_CONNTRACK(char *arg, struct ofpbuf *ofpacts, + enum ofputil_protocol *usable_protocols OVS_UNUSED) +{ + struct ofpact_conntrack *oc = ofpact_put_CONNTRACK(ofpacts); + char *key, *value; + + while (ofputil_parse_key_value(&arg, &key, &value)) { + char *error = NULL; + + if (!strcmp(key, "flags")) { + error = str_to_u16(value, "flags", &oc->flags); + } else if (!strcmp(key, "zone")) { + error = str_to_u16(value, "zone", &oc->zone); + } else { + error = xasprintf("invalid key \"%s\" in \"conntrack\" argument", + key); + } + if (error) { + return error; + } + } + return NULL; +} + +static void +format_CONNTRACK(const struct ofpact_conntrack *a, struct ds *s) +{ + ds_put_format(s, "conntrack(flags=%"PRIu16",zone=%"PRIu16")", + a->flags, a->zone); +} + /* Meter instruction. */ static void @@ -4304,6 +4387,7 @@ ofpact_is_set_or_move_action(const struct ofpact *a) return true; case OFPACT_BUNDLE: case OFPACT_CLEAR_ACTIONS: + case OFPACT_CONNTRACK: case OFPACT_CONTROLLER: case OFPACT_DEC_MPLS_TTL: case OFPACT_DEC_TTL: @@ -4376,6 +4460,7 @@ ofpact_is_allowed_in_actions_set(const struct ofpact *a) * in the action set is undefined. */ case OFPACT_BUNDLE: case OFPACT_CONTROLLER: + case OFPACT_CONNTRACK: case OFPACT_ENQUEUE: case OFPACT_EXIT: case OFPACT_FIN_TIMEOUT: @@ -4600,6 +4685,7 @@ ovs_instruction_type_from_ofpact_type(enum ofpact_type type) case OFPACT_NOTE: case OFPACT_EXIT: case OFPACT_SAMPLE: + case OFPACT_CONNTRACK: default: return OVSINST_OFPIT11_APPLY_ACTIONS; } @@ -5161,6 +5247,9 @@ ofpact_check__(enum ofputil_protocol *usable_protocols, struct ofpact *a, case OFPACT_SAMPLE: return 0; + case OFPACT_CONNTRACK: + return 0; + case OFPACT_CLEAR_ACTIONS: return 0; @@ -5580,6 +5669,7 @@ ofpact_outputs_to_port(const struct ofpact *ofpact, ofp_port_t port) case OFPACT_GOTO_TABLE: case OFPACT_METER: case OFPACT_GROUP: + case OFPACT_CONNTRACK: default: return false; } diff --git a/lib/ofp-actions.h b/lib/ofp-actions.h index 5436f24..a3a4b41 100644 --- a/lib/ofp-actions.h +++ b/lib/ofp-actions.h @@ -105,6 +105,7 @@ OFPACT(NOTE, ofpact_note, data, "note") \ OFPACT(EXIT, ofpact_null, ofpact, "exit") \ OFPACT(SAMPLE, ofpact_sample, ofpact, "sample") \ + OFPACT(CONNTRACK, ofpact_conntrack, ofpact, "conntrack") \ \ /* Instructions. */ \ OFPACT(METER, ofpact_meter, ofpact, "meter") \ @@ -472,6 +473,23 @@ BUILD_ASSERT_DECL(offsetof(struct ofpact_nest, actions) % OFPACT_ALIGNTO == 0); BUILD_ASSERT_DECL(offsetof(struct ofpact_nest, actions) == sizeof(struct ofpact_nest)); +/* Bits for 'flags' in struct nx_action_conntrack. + * + * If NX_CONNTRACK_F_RECIRC is set, then the packet will be recirculated + * through the datapath after running through the connection tracker. */ +enum nx_conntrack_flags { + NX_CONNTRACK_F_RECIRC = 1 << 0 +}; + +/* OFPACT_CONNTRACK. + * + * Used for NXAST_CONNTRACK. */ +struct ofpact_conntrack { + struct ofpact ofpact; + uint16_t flags; + uint16_t zone; +}; + static inline size_t ofpact_nest_get_action_len(const struct ofpact_nest *on) { diff --git a/lib/ofp-print.c b/lib/ofp-print.c index 43bfa17..12e0f6c 100644 --- a/lib/ofp-print.c +++ b/lib/ofp-print.c @@ -141,6 +141,10 @@ ofp_print_packet_in(struct ds *string, const struct ofp_header *oh, ds_put_format(string, " pkt_mark=0x%"PRIx32, pin.fmd.pkt_mark); } + if (pin.fmd.conn_state != 0) { + ds_put_format(string, " conn_state=0x%"PRIx8, pin.fmd.conn_state); + } + ds_put_format(string, " (via %s)", ofputil_packet_in_reason_to_string(pin.reason, reasonbuf, sizeof reasonbuf)); diff --git a/lib/ofp-util.c b/lib/ofp-util.c index c8d38e8..f352336 100644 --- a/lib/ofp-util.c +++ b/lib/ofp-util.c @@ -185,7 +185,7 @@ ofputil_netmask_to_wcbits(ovs_be32 netmask) void ofputil_wildcard_from_ofpfw10(uint32_t ofpfw, struct flow_wildcards *wc) { - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); /* Initialize most of wc. */ flow_wildcards_init_catchall(wc); @@ -3276,6 +3276,7 @@ ofputil_decode_packet_in_finish(struct ofputil_packet_in *pin, pin->fmd.metadata = match->flow.metadata; memcpy(pin->fmd.regs, match->flow.regs, sizeof pin->fmd.regs); pin->fmd.pkt_mark = match->flow.pkt_mark; + pin->fmd.conn_state = match->flow.conn_state; } enum ofperr @@ -3412,6 +3413,10 @@ ofputil_packet_in_to_match(const struct ofputil_packet_in *pin, match_set_pkt_mark(match, pin->fmd.pkt_mark); } + if (pin->fmd.conn_state != 0) { + match_set_conn_state(match, pin->fmd.conn_state); + } + match_set_in_port(match, pin->fmd.in_port); } diff --git a/lib/packets.h b/lib/packets.h index 26c6ff1..8408b36 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -62,6 +62,7 @@ struct pkt_metadata { uint32_t skb_priority; /* Packet priority for QoS. */ uint32_t pkt_mark; /* Packet mark. */ union flow_in_port in_port; /* Input port. */ + uint8_t conn_state; /* Connection state. */ }; #define PKT_METADATA_INITIALIZER(PORT) \ diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index 1d46456..bb187a1 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -2478,13 +2478,14 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, struct flow *flow = &ctx->xin->flow; ovs_be16 flow_vlan_tci; uint32_t flow_pkt_mark; + uint8_t flow_conn_state; uint8_t flow_nw_tos; odp_port_t out_port, odp_port; uint8_t dscp; /* If 'struct flow' gets additional metadata, we'll need to zero it out * before traversing a patch port. */ - BUILD_ASSERT_DECL(FLOW_WC_SEQ == 27); + BUILD_ASSERT_DECL(FLOW_WC_SEQ == 28); if (!xport) { xlate_report(ctx, "Nonexistent output port"); @@ -2577,6 +2578,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, flow_vlan_tci = flow->vlan_tci; flow_pkt_mark = flow->pkt_mark; + flow_conn_state = flow->conn_state; flow_nw_tos = flow->nw_tos; if (count_skb_priorities(xport)) { @@ -2667,6 +2669,7 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, /* Restore flow */ flow->vlan_tci = flow_vlan_tci; flow->pkt_mark = flow_pkt_mark; + flow->conn_state = flow_conn_state; flow->nw_tos = flow_nw_tos; } @@ -3587,6 +3590,7 @@ ofpact_needs_recirculation_after_mpls(const struct xlate_ctx *ctx, case OFPACT_WRITE_ACTIONS: case OFPACT_CLEAR_ACTIONS: case OFPACT_SAMPLE: + case OFPACT_CONNTRACK: return false; case OFPACT_SET_IPV4_SRC: @@ -3631,6 +3635,23 @@ ofpact_needs_recirculation_after_mpls(const struct xlate_ctx *ctx, } static void +compose_conntrack_action(struct xlate_ctx *ctx, struct ofpact_conntrack *ofc) +{ + size_t ct_offset; + struct ofpbuf *odp_actions = ctx->xout->odp_actions; + + ct_offset = nl_msg_start_nested(odp_actions, OVS_ACTION_ATTR_CONNTRACK); + nl_msg_put_u16(odp_actions, OVS_CT_ATTR_ZONE, ofc->zone); + nl_msg_end_nested(odp_actions, ct_offset); + + /* xxx Need to put the recirc here. */ + if (ofc->flags & NX_CONNTRACK_F_RECIRC) { + /* xxx Choose real recird id */ + nl_msg_put_u32(ctx->xout->odp_actions, OVS_ACTION_ATTR_RECIRC, 0); + } +} + +static void do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, struct xlate_ctx *ctx) { @@ -3932,6 +3953,10 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, case OFPACT_SAMPLE: xlate_sample_action(ctx, ofpact_get_SAMPLE(a)); break; + + case OFPACT_CONNTRACK: + compose_conntrack_action(ctx, ofpact_get_CONNTRACK(a)); + break; } } } diff --git a/ofproto/ofproto-unixctl.man b/ofproto/ofproto-unixctl.man index 89013d9..83820ee 100644 --- a/ofproto/ofproto-unixctl.man +++ b/ofproto/ofproto-unixctl.man @@ -103,6 +103,8 @@ only metadata. The metadata can be: Packet QoS priority. .IP \fIpkt_mark\fR Mark of the packet. +.IP \fIconn_state\fR +Connection state of the packet. .IP \fItun_id\fR The tunnel ID on which the packet arrived. .IP \fIin_port\fR diff --git a/tests/dpif-netdev.at b/tests/dpif-netdev.at index af7845c..e0a7170 100644 --- a/tests/dpif-netdev.at +++ b/tests/dpif-netdev.at @@ -65,7 +65,7 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00: sleep 1 AT_CHECK([cat ovs-vswitchd.log | grep -A 1 'miss upcall' | tail -n 1], [0], [dnl -skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) +skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) ]) AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl recirc_id=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_frag=no, actions: <del> @@ -78,10 +78,10 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00: sleep 1 AT_CHECK([cat ovs-vswitchd.log | grep -A 1 'miss upcall' | tail -n 1], [0], [dnl -skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) +skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) ]) AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_XOUT], [0], [dnl -pkt_mark=0,recirc_id=0,skb_priority=0,icmp,tun_id=0,tun_src=0.0.0.0,tun_dst=0.0.0.0,tun_tos=0,tun_ttl=0,,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.0.0.2,nw_dst=10.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,mpls_label=0,mpls_tc=0,mpls_ttl=0,mpls_bos=0,mpls_lse1=0,mpls_lse2=0,icmp_type=8,icmp_code=0, actions: <del> +pkt_mark=0,recirc_id=0,skb_priority=0,conn_state=0,icmp,tun_id=0,tun_src=0.0.0.0,tun_dst=0.0.0.0,tun_tos=0,tun_ttl=0,,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.0.0.2,nw_dst=10.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,mpls_label=0,mpls_tc=0,mpls_ttl=0,mpls_bos=0,mpls_lse1=0,mpls_lse2=0,icmp_type=8,icmp_code=0, actions: <del> recirc_id=0,ip,in_port=1,vlan_tci=0x0000/0x1fff,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_frag=no, actions: <del> ]) @@ -100,10 +100,10 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00: sleep 1 AT_CHECK([cat ovs-vswitchd.log | grep -A 1 'miss upcall' | tail -n 1], [0], [dnl -skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) +skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) ]) AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_DUMP | STRIP_XOUT], [0], [dnl -skb_priority(0/0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions: <del> +skb_priority(0/0),skb_mark(0/0),conn_state(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions: <del> ]) # Now, the same again without megaflows. @@ -113,11 +113,11 @@ AT_CHECK([ovs-appctl netdev-dummy/receive p1 'in_port(1),eth(src=50:54:00:00:00: sleep 1 AT_CHECK([cat ovs-vswitchd.log | grep -A 1 'miss upcall' | tail -n 1], [0], [dnl -skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) +skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0) ]) AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_DUMP | STRIP_XOUT], [0], [dnl -skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:0, bytes:0, used:never, actions: <del> -skb_priority(0/0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions: <del> +skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:0, bytes:0, used:never, actions: <del> +skb_priority(0/0),skb_mark(0/0),conn_state(0/0),recirc_id(0),dp_hash(0/0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions: <del> ]) OVS_VSWITCHD_STOP diff --git a/tests/odp.at b/tests/odp.at index cf12df4..04ee87f 100644 --- a/tests/odp.at +++ b/tests/odp.at @@ -30,52 +30,52 @@ in_port(1),eth(src=00:01:02:03:04:05,dst=10:11:12:13:14:15),eth_type(0x8848),mpl ]) (echo '# Valid forms without tun_id or VLAN header.' - set 's/^/skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),/' odp-base.txt + set 's/^/skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),/' odp-base.txt set ' -s/^/skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),/ +s/^/skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),/ ' odp-base.txt echo echo '# Valid forms with tunnel header.' - sed 's/^/skb_priority(0),tunnel(tun_id=0x7f10354,src=10.10.10.10,dst=20.20.20.20,tos=0,ttl=64,tp_src=0,tp_dst=0,flags(csum,key)),skb_mark(0x1234),recirc_id(0),dp_hash(0),/' odp-base.txt + sed 's/^/skb_priority(0),tunnel(tun_id=0x7f10354,src=10.10.10.10,dst=20.20.20.20,tos=0,ttl=64,tp_src=0,tp_dst=0,flags(csum,key)),skb_mark(0x1234),conn_state(0),recirc_id(0),dp_hash(0),/' odp-base.txt echo echo '# Valid forms with VLAN header.' - sed 's/^/skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),/ + sed 's/^/skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with MPLS header.' - sed 's/^/skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),/ + sed 's/^/skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),/ s/\(eth([[^)]]*),?\)/\1,eth_type(0x8847),mpls(label=100,tc=7,ttl=64,bos=1)/' odp-base.txt echo echo '# Valid forms with MPLS multicast header.' - sed 's/^/skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),/ + sed 's/^/skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),/ s/\(eth([[^)]]*),?\)/\1,eth_type(0x8848),mpls(label=100,tc=7,ttl=64,bos=1)/' odp-base.txt echo echo '# Valid forms with tunnel and VLAN headers.' - sed 's/^/skb_priority(0),tunnel(tun_id=0xfedcba9876543210,src=10.0.0.1,dst=10.0.0.2,tos=0x8,ttl=128,tp_src=0,tp_dst=0,flags(key)),skb_mark(0),recirc_id(0),dp_hash(0),/ + sed 's/^/skb_priority(0),tunnel(tun_id=0xfedcba9876543210,src=10.0.0.1,dst=10.0.0.2,tos=0x8,ttl=128,tp_src=0,tp_dst=0,flags(key)),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with QOS priority, tunnel, and VLAN headers.' - sed 's/^/skb_priority(0x1234),tunnel(tun_id=0xfedcba9876543210,src=10.10.10.10,dst=20.20.20.20,tos=0x8,ttl=64,tp_src=0,tp_dst=0,flags(key)),skb_mark(0),recirc_id(0),dp_hash(0),/ + sed 's/^/skb_priority(0x1234),tunnel(tun_id=0xfedcba9876543210,src=10.10.10.10,dst=20.20.20.20,tos=0x8,ttl=64,tp_src=0,tp_dst=0,flags(key)),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),/ s/\(eth([[^)]]*)\),*/\1,eth_type(0x8100),vlan(vid=99,pcp=7),encap(/ s/$/)/' odp-base.txt echo echo '# Valid forms with IP first fragment.' -sed 's/^/skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),/' odp-base.txt | sed -n 's/,frag=no),/,frag=first),/p' +sed 's/^/skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),/' odp-base.txt | sed -n 's/,frag=no),/,frag=first),/p' echo echo '# Valid forms with IP later fragment.' -sed 's/^/skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),/' odp-base.txt | sed -n 's/,frag=no),.*/,frag=later)/p' +sed 's/^/skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),/' odp-base.txt | sed -n 's/,frag=no),.*/,frag=later)/p' ) > odp-in.txt AT_CAPTURE_FILE([odp-in.txt]) diff --git a/tests/ofproto-dpif.at b/tests/ofproto-dpif.at index 181e740..0468f30 100644 --- a/tests/ofproto-dpif.at +++ b/tests/ofproto-dpif.at @@ -4636,12 +4636,12 @@ recirc_id(0),in_port(3),eth_type(0x0800),ipv4(frag=no), packets:0, bytes:0, used ]) AT_CHECK([ovs-appctl dpif/dump-flows -m br0 | sort | STRIP_USED], [0], [dnl -skb_priority(0/0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(p1),eth(src=50:54:00:00:00:05/00:00:00:00:00:00,dst=50:54:00:00:00:07/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions:drop -skb_priority(0/0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(p2),eth(src=50:54:00:00:00:07/00:00:00:00:00:00,dst=50:54:00:00:00:05/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=0/0,code=0/0), packets:0, bytes:0, used:never, actions:drop +skb_priority(0/0),skb_mark(0/0),conn_state(0/0),recirc_id(0),dp_hash(0/0),in_port(p1),eth(src=50:54:00:00:00:05/00:00:00:00:00:00,dst=50:54:00:00:00:07/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions:drop +skb_priority(0/0),skb_mark(0/0),conn_state(0/0),recirc_id(0),dp_hash(0/0),in_port(p2),eth(src=50:54:00:00:00:07/00:00:00:00:00:00,dst=50:54:00:00:00:05/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=0/0,code=0/0), packets:0, bytes:0, used:never, actions:drop ]) AT_CHECK([ovs-appctl dpif/dump-flows -m br1 | sort | STRIP_USED], [0], [dnl -skb_priority(0/0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(p3),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions:drop +skb_priority(0/0),skb_mark(0/0),conn_state(0/0),recirc_id(0),dp_hash(0/0),in_port(p3),eth(src=50:54:00:00:00:09/00:00:00:00:00:00,dst=50:54:00:00:00:0a/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=10.0.0.2/0.0.0.0,dst=10.0.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:0, bytes:0, used:never, actions:drop ]) OVS_VSWITCHD_STOP @@ -4775,10 +4775,10 @@ recirc_id=0,ip,in_port=101,nw_frag=no, actions:100,2,3 ]) AT_CHECK([cat ovs-vswitchd.log | grep -e 'in_port(100).*packets:9' | FILTER_FLOW_DUMP], [0], [dnl -skb_priority(0/0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(100),eth(src=50:54:00:00:00:05/00:00:00:00:00:00,dst=50:54:00:00:00:07/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:9, bytes:540, used:0.0s, actions:101,3,2 +skb_priority(0/0),skb_mark(0/0),conn_state(0/0),recirc_id(0),dp_hash(0/0),in_port(100),eth(src=50:54:00:00:00:05/00:00:00:00:00:00,dst=50:54:00:00:00:07/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.1/0.0.0.0,dst=192.168.0.2/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:9, bytes:540, used:0.0s, actions:101,3,2 ]) AT_CHECK([cat ovs-vswitchd.log | grep -e 'in_port(101).*packets:4' | FILTER_FLOW_DUMP], [0], [dnl -skb_priority(0/0),skb_mark(0/0),recirc_id(0),dp_hash(0/0),in_port(101),eth(src=50:54:00:00:00:07/00:00:00:00:00:00,dst=50:54:00:00:00:05/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:4, bytes:240, used:0.0s, actions:100,2,3 +skb_priority(0/0),skb_mark(0/0),conn_state(0/0),recirc_id(0),dp_hash(0/0),in_port(101),eth(src=50:54:00:00:00:07/00:00:00:00:00:00,dst=50:54:00:00:00:05/00:00:00:00:00:00),eth_type(0x0800),ipv4(src=192.168.0.2/0.0.0.0,dst=192.168.0.1/0.0.0.0,proto=1/0,tos=0/0,ttl=64/0,frag=no),icmp(type=8/0,code=0/0), packets:4, bytes:240, used:0.0s, actions:100,2,3 ]) AT_CHECK([ovs-ofctl dump-ports br0 pbr0], [0], [dnl @@ -5318,12 +5318,12 @@ for i in 1 2 3 4; do done sleep 1 AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_INSTALL | STRIP_USED], [0], [dnl -pkt_mark=0,recirc_id=0,skb_priority=0,icmp,tun_id=0,tun_src=0.0.0.0,tun_dst=0.0.0.0,tun_tos=0,tun_ttl=0,,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.0.0.2,nw_dst=10.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,mpls_label=0,mpls_tc=0,mpls_ttl=0,mpls_bos=0,mpls_lse1=0,mpls_lse2=0,icmp_type=8,icmp_code=0, actions:2 -pkt_mark=0,recirc_id=0,skb_priority=0,icmp,tun_id=0,tun_src=0.0.0.0,tun_dst=0.0.0.0,tun_tos=0,tun_ttl=0,,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_src=10.0.0.4,nw_dst=10.0.0.3,nw_tos=0,nw_ecn=0,nw_ttl=64,mpls_label=0,mpls_tc=0,mpls_ttl=0,mpls_bos=0,mpls_lse1=0,mpls_lse2=0,icmp_type=8,icmp_code=0, actions:drop +pkt_mark=0,recirc_id=0,skb_priority=0,conn_state=0,icmp,tun_id=0,tun_src=0.0.0.0,tun_dst=0.0.0.0,tun_tos=0,tun_ttl=0,,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:09,dl_dst=50:54:00:00:00:0a,nw_src=10.0.0.2,nw_dst=10.0.0.1,nw_tos=0,nw_ecn=0,nw_ttl=64,mpls_label=0,mpls_tc=0,mpls_ttl=0,mpls_bos=0,mpls_lse1=0,mpls_lse2=0,icmp_type=8,icmp_code=0, actions:2 +pkt_mark=0,recirc_id=0,skb_priority=0,conn_state=0,icmp,tun_id=0,tun_src=0.0.0.0,tun_dst=0.0.0.0,tun_tos=0,tun_ttl=0,,in_port=1,vlan_tci=0x0000,dl_src=50:54:00:00:00:0b,dl_dst=50:54:00:00:00:0c,nw_src=10.0.0.4,nw_dst=10.0.0.3,nw_tos=0,nw_ecn=0,nw_ttl=64,mpls_label=0,mpls_tc=0,mpls_ttl=0,mpls_bos=0,mpls_lse1=0,mpls_lse2=0,icmp_type=8,icmp_code=0, actions:drop ]) AT_CHECK([cat ovs-vswitchd.log | FILTER_FLOW_DUMP | grep 'packets:3'], [0], [dnl -skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:3, bytes:180, used:0.0s, actions:2 -skb_priority(0),skb_mark(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:3, bytes:180, used:0.0s, actions:drop +skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:09,dst=50:54:00:00:00:0a),eth_type(0x0800),ipv4(src=10.0.0.2,dst=10.0.0.1,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:3, bytes:180, used:0.0s, actions:2 +skb_priority(0),skb_mark(0),conn_state(0),recirc_id(0),dp_hash(0),in_port(1),eth(src=50:54:00:00:00:0b,dst=50:54:00:00:00:0c),eth_type(0x0800),ipv4(src=10.0.0.4,dst=10.0.0.3,proto=1,tos=0,ttl=64,frag=no),icmp(type=8,code=0), packets:3, bytes:180, used:0.0s, actions:drop ]) OVS_VSWITCHD_STOP AT_CLEANUP diff --git a/tests/ofproto.at b/tests/ofproto.at index 3f31cd6..b94d124 100644 --- a/tests/ofproto.at +++ b/tests/ofproto.at @@ -1167,7 +1167,7 @@ OVS_VSWITCHD_START instructions: meter,apply_actions,clear_actions,write_actions,write_metadata,goto_table Write-Actions and Apply-Actions features: actions: output group set_field strip_vlan push_vlan mod_nw_ttl dec_ttl set_mpls_ttl dec_mpls_ttl push_mpls pop_mpls set_queue - supported on Set-Field: tun_id tun_src tun_dst metadata in_port in_port_oxm pkt_mark reg0 reg1 reg2 reg3 reg4 reg5 reg6 reg7 xreg0 xreg1 xreg2 xreg3 eth_src eth_dst vlan_tci vlan_vid vlan_pcp mpls_label mpls_tc ip_src ip_dst ipv6_src ipv6_dst nw_tos ip_dscp nw_ecn nw_ttl arp_op arp_spa arp_tpa arp_sha arp_tha tcp_src tcp_dst udp_src udp_dst sctp_src sctp_dst + supported on Set-Field: tun_id tun_src tun_dst metadata in_port in_port_oxm pkt_mark conn_state reg0 reg1 reg2 reg3 reg4 reg5 reg6 reg7 xreg0 xreg1 xreg2 xreg3 eth_src eth_dst vlan_tci vlan_vid vlan_pcp mpls_label mpls_tc ip_src ip_dst ipv6_src ipv6_dst nw_tos ip_dscp nw_ecn nw_ttl arp_op arp_spa arp_tpa arp_sha arp_tha tcp_src tcp_dst udp_src udp_dst sctp_src sctp_dst matching: dp_hash: arbitrary mask recirc_id: exact match or wildcard @@ -1178,6 +1178,7 @@ OVS_VSWITCHD_START in_port: exact match or wildcard in_port_oxm: exact match or wildcard pkt_mark: arbitrary mask + conn_state: arbitrary mask reg0: arbitrary mask reg1: arbitrary mask reg2: arbitrary mask @@ -1247,7 +1248,7 @@ AT_CHECK( # Check that the configuration was updated. mv expout orig-expout sed 's/classifier/main/ -73s/1000000/1024/' < orig-expout > expout +74s/1000000/1024/' < orig-expout > expout AT_CHECK([ovs-ofctl -O OpenFlow13 dump-table-features br0 | sed '/^$/d /^OFPST_TABLE_FEATURES/d'], [0], [expout]) OVS_VSWITCHD_STOP diff --git a/utilities/ovs-ofctl.8.in b/utilities/ovs-ofctl.8.in index 64171e7..7323086 100644 --- a/utilities/ovs-ofctl.8.in +++ b/utilities/ovs-ofctl.8.in @@ -1108,6 +1108,26 @@ system components in order to facilitate interaction between subsystems. On Linux this corresponds to the skb mark but the exact implementation is platform-dependent. . +.IP \fBconn_state=\fIvalue\fR[\fB/\fImask\fR] +Matches packet connection state \fIvalue\fR either exactly or with optional +\fImask\fR. The following flags are defined: +.RS +.IP \fB0x80\fR +Connection tracking has occurred. +.IP \fB0x40\fR +The flow did not initiate the connection. +.RE +.IP +The following values describe the state of the connection: +.RS +.IP \fB0x01\fR +This is the beginning of a new connection. +.IP \fB0x02\fR +This is part of an already existing connection. +.IP \fB0x04\fR +This is a new connection that is "expected". +.RE +. .PP Defining IPv6 flows (those with \fBdl_type\fR equal to 0x86dd) requires support for NXM. The following shorthand notations are available for -- 1.9.3 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev