On 25/02/18 07:44, Roopa Prabhu wrote: > From: Roopa Prabhu <ro...@cumulusnetworks.com> > > Dissect flow in fwd path if fib rules require it. Controlled by > a flag to avoid penatly for the common case. Flag is set when fib > rules with sport, dport and proto match that require flow dissect > are installed. Also passes the dissected hash keys to the multipath > hash function when applicable to avoid dissecting the flow again. > icmp packets will continue to use inner header for hash > calculations (Thanks to Nikolay Aleksandrov for some review here). > > Signed-off-by: Roopa Prabhu <ro...@cumulusnetworks.com> > --- > include/net/ip_fib.h | 2 +- > include/net/netns/ipv4.h | 1 + > net/ipv4/fib_rules.c | 6 ++++++ > net/ipv4/fib_semantics.c | 2 +- > net/ipv4/route.c | 52 > +++++++++++++++++++++++++++++++++++------------- > 5 files changed, 47 insertions(+), 16 deletions(-) > > diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h > index f805243..5ada772 100644 > --- a/include/net/ip_fib.h > +++ b/include/net/ip_fib.h > @@ -371,7 +371,7 @@ int fib_sync_up(struct net_device *dev, unsigned int > nh_flags); > > #ifdef CONFIG_IP_ROUTE_MULTIPATH > int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, > - const struct sk_buff *skb); > + const struct sk_buff *skb, struct flow_keys *flkeys); > #endif > void fib_select_multipath(struct fib_result *res, int hash); > void fib_select_path(struct net *net, struct fib_result *res, > diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h > index 44668c2..87b8fdc 100644 > --- a/include/net/netns/ipv4.h > +++ b/include/net/netns/ipv4.h > @@ -52,6 +52,7 @@ struct netns_ipv4 { > #ifdef CONFIG_IP_MULTIPLE_TABLES > struct fib_rules_ops *rules_ops; > bool fib_has_custom_rules; > + bool fib_rules_require_fldissect; > struct fib_table __rcu *fib_main; > struct fib_table __rcu *fib_default; > #endif > diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c > index 9d55c90..83aa786 100644 > --- a/net/ipv4/fib_rules.c > +++ b/net/ipv4/fib_rules.c > @@ -253,6 +253,11 @@ static int fib4_rule_configure(struct fib_rule *rule, > struct sk_buff *skb, > } > #endif > > + if (rule->ip_proto || > + fib_rule_port_range_valid(&rule->sport_range) || > + fib_rule_port_range_valid(&rule->dport_range)) > + net->ipv4.fib_rules_require_fldissect = true; > + > rule4->src_len = frh->src_len; > rule4->srcmask = inet_make_mask(rule4->src_len); > rule4->dst_len = frh->dst_len; > @@ -398,6 +403,7 @@ int __net_init fib4_rules_init(struct net *net) > goto fail; > net->ipv4.rules_ops = ops; > net->ipv4.fib_has_custom_rules = false; > + net->ipv4.fib_rules_require_fldissect = false; > return 0; > > fail: > diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c > index cd46d76..b0c398a 100644 > --- a/net/ipv4/fib_semantics.c > +++ b/net/ipv4/fib_semantics.c > @@ -1770,7 +1770,7 @@ void fib_select_path(struct net *net, struct fib_result > *res, > > #ifdef CONFIG_IP_ROUTE_MULTIPATH > if (res->fi->fib_nhs > 1) { > - int h = fib_multipath_hash(res->fi, fl4, skb); > + int h = fib_multipath_hash(res->fi, fl4, skb, NULL); > > fib_select_multipath(res, h); > } > diff --git a/net/ipv4/route.c b/net/ipv4/route.c > index 26eefa2..72dd6c6 100644 > --- a/net/ipv4/route.c > +++ b/net/ipv4/route.c > @@ -1783,7 +1783,7 @@ static void ip_multipath_l3_keys(const struct sk_buff > *skb, > > /* if skb is set it will be used and fl4 can be NULL */ > int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, > - const struct sk_buff *skb) > + const struct sk_buff *skb, struct flow_keys *flkeys) > { > struct net *net = fi->fib_net; > struct flow_keys hash_keys; > @@ -1810,14 +1810,23 @@ int fib_multipath_hash(const struct fib_info *fi, > const struct flowi4 *fl4, > if (skb->l4_hash) > return skb_get_hash_raw(skb) >> 1; > memset(&hash_keys, 0, sizeof(hash_keys)); > - skb_flow_dissect_flow_keys(skb, &keys, flag); > > - hash_keys.control.addr_type = > FLOW_DISSECTOR_KEY_IPV4_ADDRS; > - hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; > - hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; > - hash_keys.ports.src = keys.ports.src; > - hash_keys.ports.dst = keys.ports.dst; > - hash_keys.basic.ip_proto = keys.basic.ip_proto; > + if (flkeys) { > + hash_keys.control.addr_type = > FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + hash_keys.addrs.v4addrs.src = > flkeys->addrs.v4addrs.src; > + hash_keys.addrs.v4addrs.dst = > flkeys->addrs.v4addrs.dst; > + hash_keys.ports.src = flkeys->ports.src; > + hash_keys.ports.dst = flkeys->ports.dst; > + hash_keys.basic.ip_proto = > flkeys->basic.ip_proto; > + } else { > + skb_flow_dissect_flow_keys(skb, &keys, flag); > + hash_keys.control.addr_type = > FLOW_DISSECTOR_KEY_IPV4_ADDRS; > + hash_keys.addrs.v4addrs.src = > keys.addrs.v4addrs.src; > + hash_keys.addrs.v4addrs.dst = > keys.addrs.v4addrs.dst; > + hash_keys.ports.src = keys.ports.src; > + hash_keys.ports.dst = keys.ports.dst; > + hash_keys.basic.ip_proto = keys.basic.ip_proto; > + } > } else { > memset(&hash_keys, 0, sizeof(hash_keys)); > hash_keys.control.addr_type = > FLOW_DISSECTOR_KEY_IPV4_ADDRS; > @@ -1838,11 +1847,12 @@ int fib_multipath_hash(const struct fib_info *fi, > const struct flowi4 *fl4, > static int ip_mkroute_input(struct sk_buff *skb, > struct fib_result *res, > struct in_device *in_dev, > - __be32 daddr, __be32 saddr, u32 tos) > + __be32 daddr, __be32 saddr, u32 tos, > + struct flow_keys *hkeys) > { > #ifdef CONFIG_IP_ROUTE_MULTIPATH > if (res->fi && res->fi->fib_nhs > 1) { > - int h = fib_multipath_hash(res->fi, NULL, skb); > + int h = fib_multipath_hash(res->fi, NULL, skb, hkeys); > > fib_select_multipath(res, h); > } > @@ -1868,13 +1878,14 @@ static int ip_route_input_slow(struct sk_buff *skb, > __be32 daddr, __be32 saddr, > struct fib_result *res) > { > struct in_device *in_dev = __in_dev_get_rcu(dev); > + struct flow_keys *flkeys = NULL, _flkeys; > + struct net *net = dev_net(dev); > struct ip_tunnel_info *tun_info; > - struct flowi4 fl4; > + int err = -EINVAL; > unsigned int flags = 0; > u32 itag = 0; > struct rtable *rth; > - int err = -EINVAL; > - struct net *net = dev_net(dev); > + struct flowi4 fl4; > bool do_cache; > > /* IP on this device is disabled. */ > @@ -1933,6 +1944,19 @@ static int ip_route_input_slow(struct sk_buff *skb, > __be32 daddr, __be32 saddr, > fl4.daddr = daddr; > fl4.saddr = saddr; > fl4.flowi4_uid = sock_net_uid(net, NULL); > + > +#ifdef CONFIG_IP_MULTIPLE_TABLES > + if (net->ipv4.fib_rules_require_fldissect) { > + unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; > + > + memset(&_flkeys, 0, sizeof(_flkeys));
skb_flow_dissect_flow_keys() does the memset for you. > + skb_flow_dissect_flow_keys(skb, &_flkeys, flag); > + fl4.fl4_sport = _flkeys.ports.src; > + fl4.fl4_dport = _flkeys.ports.dst; > + fl4.flowi4_proto = _flkeys.basic.ip_proto; > + flkeys = &_flkeys; > + } > +#endif > err = fib_lookup(net, &fl4, res, 0); > if (err != 0) { > if (!IN_DEV_FORWARD(in_dev)) > @@ -1958,7 +1982,7 @@ static int ip_route_input_slow(struct sk_buff *skb, > __be32 daddr, __be32 saddr, > if (res->type != RTN_UNICAST) > goto martian_destination; > > - err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos); > + err = ip_mkroute_input(skb, res, in_dev, daddr, saddr, tos, flkeys); > out: return err; > > brd_input: >