Re: [PATCH nf-next v8 2/3] net: netfilter: Add IPIP flowtable tx sw acceleration

Pablo Neira Ayuso Tue, 04 Nov 2025 14:52:39 -0800

On Thu, Oct 23, 2025 at 10:50:16AM +0200, Lorenzo Bianconi wrote:
[...]
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 
> 0355461960ce3c0db49e00a6f77f48b031a635dc..eb8058fd7139a2b5457008146f979590f9f03c1d
>  100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -897,6 +897,9 @@ struct net_device_path {
>                       };
>  
>                       u8      l3_proto;
> +                     u8      tos;
> +                     u8      ttl;
> +                     __be16  df;
>               } tun;
>               struct {
>                       enum {
> diff --git a/include/net/netfilter/nf_flow_table.h 
> b/include/net/netfilter/nf_flow_table.h
> index 
> 6d00a8aa52584ad96d200683297c1b02bf1f6d4f..fe792f5a8f0528de021c27382b235688532614e4
>  100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -119,6 +119,9 @@ struct flow_offload_tunnel {
>       };
>  
>       u8      l3_proto;
> +     u8      tos;
> +     u8      ttl;
> +     __be16  df;


This is now included in the hash that is used for the lookup, is it
intentional to include these fields here? For rx, we cannot know ttl
of the received packet?

Maybe this needs to be moved after the placeholder:

        struct { }                      __hash;

>  };
>  
>  struct flow_offload_tuple {
[...]
> diff --git a/net/netfilter/nf_flow_table_ip.c 
> b/net/netfilter/nf_flow_table_ip.c
> index 
> 76081d5d2f71c10e0c65e906b3fb2769e3ab1466..a66ffa0c7fbe780a9f9a545e42d44dfe408e7cb2
>  100644
> --- a/net/netfilter/nf_flow_table_ip.c
> +++ b/net/netfilter/nf_flow_table_ip.c
[...]
> @@ -533,6 +589,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
>       struct flow_offload *flow;
>       struct neighbour *neigh;
>       struct rtable *rt;
> +     __be32 dest;
>       int ret;
>  
>       tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
> @@ -555,8 +612,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
>  
>       dir = tuplehash->tuple.dir;
>       flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
> +     reply_tuple = &flow->tuplehash[!dir].tuple;

Nit: I'd suggest 'other_tuple' instead 'reply_tuple' given this is not
strictly the reply tuple, just the tuple from the other direction.

> -     if (nf_flow_encap_push(skb, &flow->tuplehash[!dir].tuple) < 0)
> +     if (nf_flow_encap_push(state->net, skb, reply_tuple))
>               return NF_DROP;
>  
>       switch (tuplehash->tuple.xmit_type) {
> @@ -567,7 +625,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
>                       flow_offload_teardown(flow);
>                       return NF_DROP;
>               }
> -             neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, 
> flow->tuplehash[!dir].tuple.src_v4.s_addr));
> +             dest = reply_tuple->tun_num ? reply_tuple->tun.src_v4.s_addr
> +                                         : reply_tuple->src_v4.s_addr;
> +             neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, dest));
>               if (IS_ERR(neigh)) {
>                       flow_offload_teardown(flow);
>                       return NF_DROP;
> diff --git a/net/netfilter/nf_flow_table_path.c 
> b/net/netfilter/nf_flow_table_path.c
> index 
> bd5e9bf1ca393ab793976ba98a027b60f84882ba..cd0be2efe97596d0947621a5ea604373d5b61da8
>  100644
> --- a/net/netfilter/nf_flow_table_path.c
> +++ b/net/netfilter/nf_flow_table_path.c
> @@ -190,7 +190,43 @@ static bool nft_flowtable_find_dev(const struct 
> net_device *dev,
>       return found;
>  }
>  
> -static void nft_dev_forward_path(struct nf_flow_route *route,
> +static int nft_flow_tunnel_update_route(const struct nft_pktinfo *pkt,
> +                                     struct nf_flow_route *route,
> +                                     enum ip_conntrack_dir dir)
> +{
> +     struct dst_entry *tun_dst = NULL;
> +     struct flowi fl = {};
> +
> +     switch (nft_pf(pkt)) {
> +     case NFPROTO_IPV4:
> +             fl.u.ip4.daddr = route->tuple[!dir].in.tun.src_v4.s_addr;
> +             fl.u.ip4.saddr = route->tuple[!dir].in.tun.dst_v4.s_addr;
> +             fl.u.ip4.flowi4_iif = nft_in(pkt)->ifindex;
> +             fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
> +             fl.u.ip4.flowi4_mark = pkt->skb->mark;
> +             fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
> +             break;
> +     case NFPROTO_IPV6:
> +             fl.u.ip6.daddr = route->tuple[!dir].in.tun.src_v6;
> +             fl.u.ip6.saddr = route->tuple[!dir].in.tun.dst_v6;
> +             fl.u.ip6.flowi6_iif = nft_in(pkt)->ifindex;
> +             fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
> +             fl.u.ip6.flowi6_mark = pkt->skb->mark;
> +             fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
> +             break;
> +     }
> +
> +     nf_route(nft_net(pkt), &tun_dst, &fl, false, nft_pf(pkt));
> +     if (!tun_dst)
> +             return -ENOENT;
> +
> +     nft_default_forward_path(route, tun_dst, dir);

This overrides the previous dst that is set on here, is this leaking
such dst?

> +
> +     return 0;
> +}
> +
> +static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
> +                              struct nf_flow_route *route,
>                                const struct nf_conn *ct,
>                                enum ip_conntrack_dir dir,
>                                struct nft_flowtable *ft)
> @@ -218,6 +254,12 @@ static void nft_dev_forward_path(struct nf_flow_route 
> *route,
>               route->tuple[!dir].in.tun.src_v6 = info.tun.dst_v6;
>               route->tuple[!dir].in.tun.dst_v6 = info.tun.src_v6;
>               route->tuple[!dir].in.tun.l3_proto = info.tun.l3_proto;
> +             route->tuple[!dir].in.tun.tos = info.tun.tos;
> +             route->tuple[!dir].in.tun.ttl = info.tun.ttl;
> +             route->tuple[!dir].in.tun.df = info.tun.df;
> +
> +             if (nft_flow_tunnel_update_route(pkt, route, dir))
> +                     return;

If tunnel route is found...

>       }
>
>       route->tuple[!dir].in.num_encaps = info.num_encaps;

... num_encaps is never set?

Would you also extend the selftest to combine IPIP with vlan? Thanks.

> @@ -274,9 +316,9 @@ int nft_flow_route(const struct nft_pktinfo *pkt, const 
> struct nf_conn *ct,
>       nft_default_forward_path(route, other_dst, !dir);
>  
>       if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
> -             nft_dev_forward_path(route, ct, dir, ft);
> +             nft_dev_forward_path(pkt, route, ct, dir, ft);
>       if (route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
> -             nft_dev_forward_path(route, ct, !dir, ft);
> +             nft_dev_forward_path(pkt, route, ct, !dir, ft);
>  
>       return 0;
>  }
> 
> -- 
> 2.51.0
>

Re: [PATCH nf-next v8 2/3] net: netfilter: Add IPIP flowtable tx sw acceleration

Reply via email to