Now that struct sw_flow_key's fields have been reordered to put optional information last, it's possible to limit hashing of flow keys to only the relevant portion for this flow as a performance optimization. flow_extract() is modified to return the length of the flow key that contains useful information, and flow_hash() now requires the length as a parameter.
We don't care as much about the performance of hashing flows from userspace, so the full flow key struct is still hashed in those cases. Suggested-by: Jesse Gross <[email protected]> Signed-off-by: Andrew Evans <[email protected]> --- datapath/datapath.c | 15 +++++++++------ datapath/flow.c | 43 ++++++++++++++++++++++++++++++++----------- datapath/flow.h | 4 ++-- datapath/tunnel.c | 7 +++++-- 4 files changed, 48 insertions(+), 21 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index 733acad..482437a 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -276,10 +276,11 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb) if (!OVS_CB(skb)->flow) { struct sw_flow_key key; struct tbl_node *flow_node; + int key_len; bool is_frag; /* Extract flow from 'skb' into 'key'. */ - error = flow_extract(skb, p->port_no, &key, &is_frag); + error = flow_extract(skb, p->port_no, &key, &key_len, &is_frag); if (unlikely(error)) { kfree_skb(skb); return; @@ -293,7 +294,7 @@ void dp_process_received_packet(struct vport *p, struct sk_buff *skb) /* Look up flow. */ flow_node = tbl_lookup(rcu_dereference(dp->table), &key, - flow_hash(&key), flow_cmp); + flow_hash(&key, key_len), flow_cmp); if (unlikely(!flow_node)) { struct dp_upcall_info upcall; @@ -675,6 +676,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct sw_flow_key key; struct datapath *dp; struct ethhdr *eth; + int key_len; bool is_frag; int err; @@ -705,7 +707,7 @@ static int odp_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) else packet->protocol = htons(ETH_P_802_2); - err = flow_extract(packet, -1, &key, &is_frag); + err = flow_extract(packet, -1, &key, &key_len, &is_frag); if (err) goto exit; @@ -963,7 +965,7 @@ static int odp_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) if (!dp) goto error; - hash = flow_hash(&key); + hash = flow_hash(&key, sizeof(key)); table = get_table_protected(dp); flow_node = tbl_lookup(table, &key, hash, flow_cmp); if (!flow_node) { @@ -1087,7 +1089,7 @@ static int odp_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) return -ENODEV; table = get_table_protected(dp); - flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp); + flow_node = tbl_lookup(table, &key, flow_hash(&key, sizeof(key)), flow_cmp); if (!flow_node) return -ENOENT; @@ -1122,7 +1124,8 @@ static int odp_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) return -ENODEV; table = get_table_protected(dp); - flow_node = tbl_lookup(table, &key, flow_hash(&key), flow_cmp); + flow_node = tbl_lookup(table, &key, flow_hash(&key, sizeof(key)), + flow_cmp); if (!flow_node) return -ENOENT; flow = flow_cast(flow_node); diff --git a/datapath/flow.c b/datapath/flow.c index e3a1a6d..475e4ee 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -101,7 +101,11 @@ u64 flow_used_time(unsigned long flow_jiffies) return cur_ms - idle_ms; } -static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) +#define SW_FLOW_KEY_OFFSET(field) \ + offsetof(struct sw_flow_key, field) + \ + sizeof(((struct sw_flow_key *)0)->field) + +static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key, int *key_len) { unsigned int nh_ofs = skb_network_offset(skb); unsigned int nh_len; @@ -118,10 +122,11 @@ static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key) payload_ofs = (u8 *)(nh + 1) - skb->data; payload_len = ntohs(nh->payload_len); + key->nw_proto = NEXTHDR_NONE; + key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK; memcpy(key->ipv6.src, nh->saddr.in6_u.u6_addr8, sizeof(key->ipv6.src)); memcpy(key->ipv6.dst, nh->daddr.in6_u.u6_addr8, sizeof(key->ipv6.dst)); - key->nw_tos = ipv6_get_dsfield(nh) & ~INET_ECN_MASK; - key->nw_proto = NEXTHDR_NONE; + *key_len = SW_FLOW_KEY_OFFSET(ipv6.dst); /* We don't process jumbograms. */ if (!payload_len) @@ -318,7 +323,7 @@ static __be16 parse_ethertype(struct sk_buff *skb) } static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, - int nh_len) + int *key_len, int nh_len) { struct ipv6hdr *nh = ipv6_hdr(skb); int icmp_len = ntohs(nh->payload_len) + sizeof(*nh) - nh_len; @@ -328,6 +333,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, * fields, so we need to store them in 16-bit network byte order. */ key->ipv6.tp.src = htons(icmp->icmp6_type); key->ipv6.tp.dst = htons(icmp->icmp6_code); + *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst); if (!icmp->icmp6_code && ((icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) @@ -344,6 +350,7 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, nd = (struct nd_msg *)skb_transport_header(skb); memcpy(key->ipv6.nd_target, &nd->target, sizeof(key->ipv6.nd_target)); + *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_target); icmp_len -= sizeof(*nd); offset = 0; @@ -363,12 +370,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key, goto invalid; memcpy(key->ipv6.nd_sha, &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_sha); } else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR && opt_len == 8) { if (!is_zero_ether_addr(key->ipv6.nd_tha)) goto invalid; memcpy(key->ipv6.nd_tha, &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN); + *key_len = SW_FLOW_KEY_OFFSET(ipv6.nd_tha); } icmp_len -= opt_len; @@ -382,6 +391,7 @@ invalid: memset(key->ipv6.nd_target, 0, sizeof(key->ipv6.nd_target)); memset(key->ipv6.nd_sha, 0, sizeof(key->ipv6.nd_sha)); memset(key->ipv6.nd_tha, 0, sizeof(key->ipv6.nd_tha)); + *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst); return 0; } @@ -412,11 +422,12 @@ invalid: * For other key->dl_type values it is left untouched. */ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, - bool *is_frag) + int *key_len, bool *is_frag) { struct ethhdr *eth; memset(key, 0, sizeof(*key)); + *key_len = 0; key->tun_id = OVS_CB(skb)->tun_id; key->in_port = in_port; *is_frag = false; @@ -458,6 +469,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, parse_vlan(skb, key); key->dl_type = parse_ethertype(skb); + *key_len = SW_FLOW_KEY_OFFSET(dl_type); skb_reset_network_header(skb); __skb_push(skb, skb->data - (unsigned char *)eth); @@ -478,8 +490,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, nh = ip_hdr(skb); key->ipv4.src = nh->saddr; key->ipv4.dst = nh->daddr; - key->nw_tos = nh->tos & ~INET_ECN_MASK; key->nw_proto = nh->protocol; + key->nw_tos = nh->tos & ~INET_ECN_MASK; + *key_len = SW_FLOW_KEY_OFFSET(nw_tos); /* Transport layer. */ if (!(nh->frag_off & htons(IP_MF | IP_OFFSET)) && @@ -489,12 +502,14 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, struct tcphdr *tcp = tcp_hdr(skb); key->ipv4.tp.src = tcp->source; key->ipv4.tp.dst = tcp->dest; + *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst); } } else if (key->nw_proto == IPPROTO_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); key->ipv4.tp.src = udp->source; key->ipv4.tp.dst = udp->dest; + *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst); } } else if (key->nw_proto == IPPROTO_ICMP) { if (icmphdr_ok(skb)) { @@ -504,6 +519,7 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, * in 16-bit network byte order. */ key->ipv4.tp.src = htons(icmp->type); key->ipv4.tp.dst = htons(icmp->code); + *key_len = SW_FLOW_KEY_OFFSET(ipv4.tp.dst); } } } else @@ -520,8 +536,10 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, && arp->ar_pln == 4) { /* We only match on the lower 8 bits of the opcode. */ - if (ntohs(arp->ar_op) <= 0xff) + if (ntohs(arp->ar_op) <= 0xff) { key->nw_proto = ntohs(arp->ar_op); + *key_len = SW_FLOW_KEY_OFFSET(nw_proto); + } if (key->nw_proto == ARPOP_REQUEST || key->nw_proto == ARPOP_REPLY) { @@ -529,12 +547,13 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, memcpy(&key->ipv4.dst, arp->ar_tip, sizeof(key->ipv4.dst)); memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN); memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN); + *key_len = SW_FLOW_KEY_OFFSET(ipv4.arp.tha); } } } else if (key->dl_type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ - nh_len = parse_ipv6hdr(skb, key); + nh_len = parse_ipv6hdr(skb, key, key_len); if (unlikely(nh_len < 0)) { if (nh_len == -EINVAL) { skb->transport_header = skb->network_header; @@ -549,16 +568,18 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, struct tcphdr *tcp = tcp_hdr(skb); key->ipv6.tp.src = tcp->source; key->ipv6.tp.dst = tcp->dest; + *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst); } } else if (key->nw_proto == NEXTHDR_UDP) { if (udphdr_ok(skb)) { struct udphdr *udp = udp_hdr(skb); key->ipv6.tp.src = udp->source; key->ipv6.tp.dst = udp->dest; + *key_len = SW_FLOW_KEY_OFFSET(ipv6.tp.dst); } } else if (key->nw_proto == NEXTHDR_ICMP) { if (icmp6hdr_ok(skb)) { - int error = parse_icmpv6(skb, key, nh_len); + int error = parse_icmpv6(skb, key, key_len, nh_len); if (error < 0) return error; } @@ -567,9 +588,9 @@ int flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key, return 0; } -u32 flow_hash(const struct sw_flow_key *key) +u32 flow_hash(const struct sw_flow_key *key, int key_len) { - return jhash2((u32*)key, sizeof(*key) / sizeof(u32), hash_seed); + return jhash2((u32*)key, key_len / sizeof(u32), hash_seed); } int flow_cmp(const struct tbl_node *node, void *key2_) diff --git a/datapath/flow.h b/datapath/flow.h index 4b304d4..5413c7d 100644 --- a/datapath/flow.h +++ b/datapath/flow.h @@ -114,11 +114,11 @@ void flow_deferred_free_acts(struct sw_flow_actions *); void flow_hold(struct sw_flow *); void flow_put(struct sw_flow *); -int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, bool *is_frag); +int flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *, int *key_len, bool *is_frag); void flow_used(struct sw_flow *, struct sk_buff *); u64 flow_used_time(unsigned long flow_jiffies); -u32 flow_hash(const struct sw_flow_key *); +u32 flow_hash(const struct sw_flow_key *, int key_len); int flow_cmp(const struct tbl_node *, void *target); /* Upper bound on the length of a nlattr-formatted flow key. The longest diff --git a/datapath/tunnel.c b/datapath/tunnel.c index f1711f1..d29234d 100644 --- a/datapath/tunnel.c +++ b/datapath/tunnel.c @@ -930,6 +930,7 @@ static struct tnl_cache *build_cache(struct vport *vport, struct tbl_node *flow_node; struct vport *dst_vport; struct sk_buff *skb; + int flow_key_len; bool is_frag; int err; @@ -944,14 +945,16 @@ static struct tnl_cache *build_cache(struct vport *vport, __skb_put(skb, cache->len); memcpy(skb->data, get_cached_header(cache), cache->len); - err = flow_extract(skb, dst_vport->port_no, &flow_key, &is_frag); + err = flow_extract(skb, dst_vport->port_no, &flow_key, + &flow_key_len, &is_frag); kfree_skb(skb); if (err || is_frag) goto done; flow_node = tbl_lookup(rcu_dereference(dst_vport->dp->table), - &flow_key, flow_hash(&flow_key), + &flow_key, + flow_hash(&flow_key, flow_key_len), flow_cmp); if (flow_node) { struct sw_flow *flow = flow_cast(flow_node); -- 1.7.2.3 _______________________________________________ dev mailing list [email protected] http://openvswitch.org/mailman/listinfo/dev
