Now that struct sw_flow_key's fields have been reordered to put optional
information last, it's possible to limit hashing of flow keys to only the
relevant portion for this flow as a performance optimization. flow_extract() is
modified to return the length of the flow key that contains useful information,
and flow_hash() now requires the length as a parameter.

We don't care as much about the performance of hashing flows from userspace, so
the full flow key struct is still hashed in those cases.

Suggested-by: Jesse Gross <[email protected]>
Signed-off-by: Andrew Evans <[email protected]>
---
 datapath/datapath.c |   15 +++++++++------
 datapath/flow.c     |   43 ++++++++++++++++++++++++++++++++-----------
 datapath/flow.h     |    4 ++--
 datapath/tunnel.c   |    7 +++++--
 4 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/datapath/datapath.c b/datapath/datapath.c
index 733acad..482437a 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -276,10 +276,11 @@ void dp_process_received_packet(struct vport *p, struct 
sk_buff *skb)
        if (!OVS_CB(skb)->flow) {
                struct sw_flow_key key;
                struct tbl_node *flow_node;
+               int key_len;
                bool is_frag;
 
                /* Extract flow from 'skb' into 'key'. */
-               error = flow_extract(skb, p->port_no, &key, &is_frag);
+               error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag);
                if (unlikely(error)) {
                        kfree_skb(skb);
                        return;
@@ -293,7 +294,7 @@ void dp_process_received_packet(struct vport *p, struct 
sk_buff *skb)
 
                /* Look up flow. */
                flow_node = tbl_lookup(rcu_dereference(dp->table), &key,
-                                       flow_hash(&key), flow_cmp);
+                               flow_hash(&key, key_len), flow_cmp);
                if (unlikely(!flow_node)) {
                        struct dp_upcall_info upcall;
 
@@ -675,6 +676,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, 
struct genl_info *info)
        struct sw_flow_key key;
        struct datapath *dp;
        struct ethhdr *eth;
+       int key_len;
        bool is_frag;
        int err;
 
@@ -705,7 +707,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, 
struct genl_info *info)
        else
                packet->protocol = htons(ETH_P_802_2);
 
-       err = flow_extract(packet, -1, &key, &is_frag);
+       err = flow_extract(packet, -1, &key, &key_len, &is_frag);
        if (err)
                goto exit;
 
@@ -963,7 +965,7 @@ static int odp_flow_cmd_new_or_set(struct sk_buff *skb, 
struct genl_info *info)
        if (!dp)
                goto error;
 
-       hash = flow_hash(&key);
+       hash = flow_hash(&key, sizeof(key));
        table = get_table_protected(dp);
        flow_node = tbl_lookup(table, &key, hash, flow_cmp);
        if (!flow_node) {
@@ -1087,7 +1089,7 @@ static int odp_flow_cmd_get(struct sk_buff *skb, struct 
genl_info *info)
                return -ENODEV;
 
        table = get_table_protected(dp);
-       flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
+       flow_node = tbl_lookup(table, &key, flow_hash(&key, sizeof(key)), 
flow_cmp);
        if (!flow_node)
                return -ENOENT;
 
@@ -1122,7 +1124,8 @@ static int odp_flow_cmd_del(struct sk_buff *skb, struct 
genl_info *info)
                return -ENODEV;
 
        table = get_table_protected(dp);
-       flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp);
+       flow_node = tbl_lookup(table, &key, flow_hash(&key, sizeof(key)),
+                              flow_cmp);
        if (!flow_node)
                return -ENOENT;
        flow = flow_cast(flow_node);
diff --git a/datapath/flow.c b/datapath/flow.c
index e3a1a6d..475e4ee 100644
--- a/datapath/flow.c
+++ b/datapath/flow.c
@@ -101,7 +101,11 @@ u64 flow_used_time(unsigned long flow_jiffies)
        return cur_ms - idle_ms;
 }
 
-static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key)
+#define SW_FLOW_KEY_OFFSET(field)                      \
+       offsetof(struct sw_flow_key, field) +           \
+       sizeof(((struct sw_flow_key *)0)->field)
+
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, int 
*key_len)
 {
        unsigned int nh_ofs = skb_network_offset(skb);
        unsigned int nh_len;
@@ -118,10 +122,11 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct 
sw_flow_key *key)
        payload_ofs = (u8 *)(nh + 1) - skb->data;
        payload_len = ntohs(nh->payload_len);
 
+       key->nw_proto = NEXTHDR_NONE;
+       key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
        memcpy(key->ipv6.src, nh->saddr.in6_u.u6_addr8, sizeof(key->ipv6.src));
        memcpy(key->ipv6.dst, nh->daddr.in6_u.u6_addr8, sizeof(key->ipv6.dst));
-       key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK;
-       key->nw_proto = NEXTHDR_NONE;
+       *key_len = SW_FLOW_KEY_OFFSET(ipv6.dst);
 
        /* We don't process jumbograms. */
        if (!payload_len)
@@ -318,7 +323,7 @@ static __be16 parse_ethertype(struct sk_buff *skb)
 }
 
 static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
-               int nh_len)
+                       int *key_len, int nh_len)
 {
        struct ipv6hdr *nh = ipv6_hdr(skb);
        int icmp_len = ntohs(nh->payload_len) + sizeof(*nh) - nh_len;
@@ -328,6 +333,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct 
sw_flow_key *key,
         * fields, so we need to store them in 16-bit network byte order. */
        key->ipv6.tp.src = htons(icmp->icmp6_type);
        key->ipv6.tp.dst = htons(icmp->icmp6_code);
+       *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
 
        if (!icmp->icmp6_code
                        && ((icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
@@ -344,6 +350,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct 
sw_flow_key *key,
 
                nd = (struct nd_msg *)skb_transport_header(skb);
                memcpy(key->ipv6.nd_target, &nd->target, 
sizeof(key->ipv6.nd_target));
+               *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_target);
 
                icmp_len -= sizeof(*nd);
                offset = 0;
@@ -363,12 +370,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct 
sw_flow_key *key,
                                        goto invalid;
                                memcpy(key->ipv6.nd_sha,
                                                
&nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+                               *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_sha);
                        } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
                                        && opt_len == 8) {
                                if (!is_zero_ether_addr(key->ipv6.nd_tha))
                                        goto invalid;
                                memcpy(key->ipv6.nd_tha,
                                                
&nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+                               *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_tha);
                        }
 
                        icmp_len -= opt_len;
@@ -382,6 +391,7 @@ invalid:
        memset(key->ipv6.nd_target, 0, sizeof(key->ipv6.nd_target));
        memset(key->ipv6.nd_sha, 0, sizeof(key->ipv6.nd_sha));
        memset(key->ipv6.nd_tha, 0, sizeof(key->ipv6.nd_tha));
+       *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
 
        return 0;
 }
@@ -412,11 +422,12 @@ invalid:
  *      For other key->dl_type values it is left untouched.
  */
 int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
-                bool *is_frag)
+               int *key_len, bool *is_frag)
 {
        struct ethhdr *eth;
 
        memset(key, 0, sizeof(*key));
+       *key_len = 0;
        key->tun_id = OVS_CB(skb)->tun_id;
        key->in_port = in_port;
        *is_frag = false;
@@ -458,6 +469,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct 
sw_flow_key *key,
                parse_vlan(skb, key);
 
        key->dl_type = parse_ethertype(skb);
+       *key_len = SW_FLOW_KEY_OFFSET(dl_type);
        skb_reset_network_header(skb);
        __skb_push(skb, skb->data - (unsigned char *)eth);
 
@@ -478,8 +490,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct 
sw_flow_key *key,
                nh = ip_hdr(skb);
                key->ipv4.src = nh->saddr;
                key->ipv4.dst = nh->daddr;
-               key->nw_tos = nh->tos & ~INET_ECN_MASK;
                key->nw_proto = nh->protocol;
+               key->nw_tos = nh->tos & ~INET_ECN_MASK;
+               *key_len = SW_FLOW_KEY_OFFSET(nw_tos);
 
                /* Transport layer. */
                if (!(nh->frag_off & htons(IP_MF | IP_OFFSET)) &&
@@ -489,12 +502,14 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct 
sw_flow_key *key,
                                        struct tcphdr *tcp = tcp_hdr(skb);
                                        key->ipv4.tp.src = tcp->source;
                                        key->ipv4.tp.dst = tcp->dest;
+                                       *key_len = 
SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
                                }
                        } else if (key->nw_proto == IPPROTO_UDP) {
                                if (udphdr_ok(skb)) {
                                        struct udphdr *udp = udp_hdr(skb);
                                        key->ipv4.tp.src = udp->source;
                                        key->ipv4.tp.dst = udp->dest;
+                                       *key_len = 
SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
                                }
                        } else if (key->nw_proto == IPPROTO_ICMP) {
                                if (icmphdr_ok(skb)) {
@@ -504,6 +519,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct 
sw_flow_key *key,
                                         * in 16-bit network byte order. */
                                        key->ipv4.tp.src = htons(icmp->type);
                                        key->ipv4.tp.dst = htons(icmp->code);
+                                       *key_len = 
SW_FLOW_KEY_OFFSET(ipv4.tp.dst);
                                }
                        }
                } else
@@ -520,8 +536,10 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct 
sw_flow_key *key,
                                && arp->ar_pln == 4) {
 
                        /* We only match on the lower 8 bits of the opcode. */
-                       if (ntohs(arp->ar_op) <= 0xff)
+                       if (ntohs(arp->ar_op) <= 0xff) {
                                key->nw_proto = ntohs(arp->ar_op);
+                               *key_len = SW_FLOW_KEY_OFFSET(nw_proto);
+                       }
 
                        if (key->nw_proto == ARPOP_REQUEST
                                        || key->nw_proto == ARPOP_REPLY) {
@@ -529,12 +547,13 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct 
sw_flow_key *key,
                                memcpy(&key->ipv4.dst, arp->ar_tip, 
sizeof(key->ipv4.dst));
                                memcpy(key->ipv4.arp.sha, arp->ar_sha, 
ETH_ALEN);
                                memcpy(key->ipv4.arp.tha, arp->ar_tha, 
ETH_ALEN);
+                               *key_len = SW_FLOW_KEY_OFFSET(ipv4.arp.tha);
                        }
                }
        } else if (key->dl_type == htons(ETH_P_IPV6)) {
                int nh_len;             /* IPv6 Header + Extensions */
 
-               nh_len = parse_ipv6hdr(skb, key);
+               nh_len = parse_ipv6hdr(skb, key, key_len);
                if (unlikely(nh_len < 0)) {
                        if (nh_len == -EINVAL) {
                                skb->transport_header = skb->network_header;
@@ -549,16 +568,18 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct 
sw_flow_key *key,
                                struct tcphdr *tcp = tcp_hdr(skb);
                                key->ipv6.tp.src = tcp->source;
                                key->ipv6.tp.dst = tcp->dest;
+                               *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
                        }
                } else if (key->nw_proto == NEXTHDR_UDP) {
                        if (udphdr_ok(skb)) {
                                struct udphdr *udp = udp_hdr(skb);
                                key->ipv6.tp.src = udp->source;
                                key->ipv6.tp.dst = udp->dest;
+                               *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst);
                        }
                } else if (key->nw_proto == NEXTHDR_ICMP) {
                        if (icmp6hdr_ok(skb)) {
-                               int error = parse_icmpv6(skb, key, nh_len);
+                               int error = parse_icmpv6(skb, key, key_len, 
nh_len);
                                if (error < 0)
                                        return error;
                        }
@@ -567,9 +588,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct 
sw_flow_key *key,
        return 0;
 }
 
-u32 flow_hash(const struct sw_flow_key *key)
+u32 flow_hash(const struct sw_flow_key *key, int key_len)
 {
-       return jhash2((u32*)key, sizeof(*key) / sizeof(u32), hash_seed);
+       return jhash2((u32*)key, key_len / sizeof(u32), hash_seed);
 }
 
 int flow_cmp(const struct tbl_node *node, void *key2_)
diff --git a/datapath/flow.h b/datapath/flow.h
index 4b304d4..5413c7d 100644
--- a/datapath/flow.h
+++ b/datapath/flow.h
@@ -114,11 +114,11 @@ void flow_deferred_free_acts(struct sw_flow_actions *);
 void flow_hold(struct sw_flow *);
 void flow_put(struct sw_flow *);
 
-int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, bool 
*is_frag);
+int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, int 
*key_len, bool *is_frag);
 void flow_used(struct sw_flow *, struct sk_buff *);
 u64 flow_used_time(unsigned long flow_jiffies);
 
-u32 flow_hash(const struct sw_flow_key *);
+u32 flow_hash(const struct sw_flow_key *, int key_len);
 int flow_cmp(const struct tbl_node *, void *target);
 
 /* Upper bound on the length of a nlattr-formatted flow key.  The longest
diff --git a/datapath/tunnel.c b/datapath/tunnel.c
index f1711f1..d29234d 100644
--- a/datapath/tunnel.c
+++ b/datapath/tunnel.c
@@ -930,6 +930,7 @@ static struct tnl_cache *build_cache(struct vport *vport,
                struct tbl_node *flow_node;
                struct vport *dst_vport;
                struct sk_buff *skb;
+               int flow_key_len;
                bool is_frag;
                int err;
 
@@ -944,14 +945,16 @@ static struct tnl_cache *build_cache(struct vport *vport,
                __skb_put(skb, cache->len);
                memcpy(skb->data, get_cached_header(cache), cache->len);
 
-               err = flow_extract(skb, dst_vport->port_no, &flow_key, 
&is_frag);
+               err = flow_extract(skb, dst_vport->port_no, &flow_key,
+                               &flow_key_len, &is_frag);
 
                kfree_skb(skb);
                if (err || is_frag)
                        goto done;
 
                flow_node = tbl_lookup(rcu_dereference(dst_vport->dp->table),
-                                      &flow_key, flow_hash(&flow_key),
+                                      &flow_key,
+                                      flow_hash(&flow_key, flow_key_len),
                                       flow_cmp);
                if (flow_node) {
                        struct sw_flow *flow = flow_cast(flow_node);
-- 
1.7.2.3

_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev

Reply via email to