> On Thu, Oct 23, 2025 at 10:50:16AM +0200, Lorenzo Bianconi wrote:
> [...]
> > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> > index
> > 0355461960ce3c0db49e00a6f77f48b031a635dc..eb8058fd7139a2b5457008146f979590f9f03c1d
> > 100644
> > --- a/include/linux/netdevice.h
> > +++ b/include/linux/netdevice.h
> > @@ -897,6 +897,9 @@ struct net_device_path {
> > };
> >
> > u8 l3_proto;
> > + u8 tos;
> > + u8 ttl;
> > + __be16 df;
> > } tun;
> > struct {
> > enum {
> > diff --git a/include/net/netfilter/nf_flow_table.h
> > b/include/net/netfilter/nf_flow_table.h
> > index
> > 6d00a8aa52584ad96d200683297c1b02bf1f6d4f..fe792f5a8f0528de021c27382b235688532614e4
> > 100644
> > --- a/include/net/netfilter/nf_flow_table.h
> > +++ b/include/net/netfilter/nf_flow_table.h
> > @@ -119,6 +119,9 @@ struct flow_offload_tunnel {
> > };
> >
> > u8 l3_proto;
> > + u8 tos;
> > + u8 ttl;
> > + __be16 df;
>
> This is now included in the hash that is used for the lookup, is it
> intentional to include these fields here? For rx, we cannot know ttl
> of the received packet?
it is my mistake, I will fix that in v9. Do we really need to add tos, ttl and
df in tuple for tx and rx acceleration? If so we can move the ttl field after
the __hash placeholder.
>
> Maybe this needs to be moved after the placeholder:
>
> struct { } __hash;
>
> > };
> >
> > struct flow_offload_tuple {
> [...]
> > diff --git a/net/netfilter/nf_flow_table_ip.c
> > b/net/netfilter/nf_flow_table_ip.c
> > index
> > 76081d5d2f71c10e0c65e906b3fb2769e3ab1466..a66ffa0c7fbe780a9f9a545e42d44dfe408e7cb2
> > 100644
> > --- a/net/netfilter/nf_flow_table_ip.c
> > +++ b/net/netfilter/nf_flow_table_ip.c
> [...]
> > @@ -533,6 +589,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
> > struct flow_offload *flow;
> > struct neighbour *neigh;
> > struct rtable *rt;
> > + __be32 dest;
> > int ret;
> >
> > tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
> > @@ -555,8 +612,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
> >
> > dir = tuplehash->tuple.dir;
> > flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
> > + reply_tuple = &flow->tuplehash[!dir].tuple;
>
> Nit: I'd suggest 'other_tuple' instead 'reply_tuple' given this is not
> strictly the reply tuple, just the tuple from the other direction.
ack, I will fix it in v9.
>
> > - if (nf_flow_encap_push(skb, &flow->tuplehash[!dir].tuple) < 0)
> > + if (nf_flow_encap_push(state->net, skb, reply_tuple))
> > return NF_DROP;
> >
> > switch (tuplehash->tuple.xmit_type) {
> > @@ -567,7 +625,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
> > flow_offload_teardown(flow);
> > return NF_DROP;
> > }
> > - neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt,
> > flow->tuplehash[!dir].tuple.src_v4.s_addr));
> > + dest = reply_tuple->tun_num ? reply_tuple->tun.src_v4.s_addr
> > + : reply_tuple->src_v4.s_addr;
> > + neigh = ip_neigh_gw4(rt->dst.dev, rt_nexthop(rt, dest));
> > if (IS_ERR(neigh)) {
> > flow_offload_teardown(flow);
> > return NF_DROP;
> > diff --git a/net/netfilter/nf_flow_table_path.c
> > b/net/netfilter/nf_flow_table_path.c
> > index
> > bd5e9bf1ca393ab793976ba98a027b60f84882ba..cd0be2efe97596d0947621a5ea604373d5b61da8
> > 100644
> > --- a/net/netfilter/nf_flow_table_path.c
> > +++ b/net/netfilter/nf_flow_table_path.c
> > @@ -190,7 +190,43 @@ static bool nft_flowtable_find_dev(const struct
> > net_device *dev,
> > return found;
> > }
> >
> > -static void nft_dev_forward_path(struct nf_flow_route *route,
> > +static int nft_flow_tunnel_update_route(const struct nft_pktinfo *pkt,
> > + struct nf_flow_route *route,
> > + enum ip_conntrack_dir dir)
> > +{
> > + struct dst_entry *tun_dst = NULL;
> > + struct flowi fl = {};
> > +
> > + switch (nft_pf(pkt)) {
> > + case NFPROTO_IPV4:
> > + fl.u.ip4.daddr = route->tuple[!dir].in.tun.src_v4.s_addr;
> > + fl.u.ip4.saddr = route->tuple[!dir].in.tun.dst_v4.s_addr;
> > + fl.u.ip4.flowi4_iif = nft_in(pkt)->ifindex;
> > + fl.u.ip4.flowi4_dscp = ip4h_dscp(ip_hdr(pkt->skb));
> > + fl.u.ip4.flowi4_mark = pkt->skb->mark;
> > + fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
> > + break;
> > + case NFPROTO_IPV6:
> > + fl.u.ip6.daddr = route->tuple[!dir].in.tun.src_v6;
> > + fl.u.ip6.saddr = route->tuple[!dir].in.tun.dst_v6;
> > + fl.u.ip6.flowi6_iif = nft_in(pkt)->ifindex;
> > + fl.u.ip6.flowlabel = ip6_flowinfo(ipv6_hdr(pkt->skb));
> > + fl.u.ip6.flowi6_mark = pkt->skb->mark;
> > + fl.u.ip6.flowi6_flags = FLOWI_FLAG_ANYSRC;
> > + break;
> > + }
> > +
> > + nf_route(nft_net(pkt), &tun_dst, &fl, false, nft_pf(pkt));
> > + if (!tun_dst)
> > + return -ENOENT;
> > +
> > + nft_default_forward_path(route, tun_dst, dir);
>
> This overrides the previous dst that is set on here, is this leaking
> such dst?
ack, right. I will fix it in v9.
>
> > +
> > + return 0;
> > +}
> > +
> > +static void nft_dev_forward_path(const struct nft_pktinfo *pkt,
> > + struct nf_flow_route *route,
> > const struct nf_conn *ct,
> > enum ip_conntrack_dir dir,
> > struct nft_flowtable *ft)
> > @@ -218,6 +254,12 @@ static void nft_dev_forward_path(struct nf_flow_route
> > *route,
> > route->tuple[!dir].in.tun.src_v6 = info.tun.dst_v6;
> > route->tuple[!dir].in.tun.dst_v6 = info.tun.src_v6;
> > route->tuple[!dir].in.tun.l3_proto = info.tun.l3_proto;
> > + route->tuple[!dir].in.tun.tos = info.tun.tos;
> > + route->tuple[!dir].in.tun.ttl = info.tun.ttl;
> > + route->tuple[!dir].in.tun.df = info.tun.df;
> > +
> > + if (nft_flow_tunnel_update_route(pkt, route, dir))
> > + return;
>
> If tunnel route is found...
>
> > }
> >
> > route->tuple[!dir].in.num_encaps = info.num_encaps;
>
> ... num_encaps is never set?
ack, I will fix it in v9.
>
> Would you also extend the selftest to combine IPIP with vlan? Thanks.
sure, I will add it in v9.
Regards,
Lorenzo
>
> > @@ -274,9 +316,9 @@ int nft_flow_route(const struct nft_pktinfo *pkt, const
> > struct nf_conn *ct,
> > nft_default_forward_path(route, other_dst, !dir);
> >
> > if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
> > - nft_dev_forward_path(route, ct, dir, ft);
> > + nft_dev_forward_path(pkt, route, ct, dir, ft);
> > if (route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
> > - nft_dev_forward_path(route, ct, !dir, ft);
> > + nft_dev_forward_path(pkt, route, ct, !dir, ft);
> >
> > return 0;
> > }
> >
> > --
> > 2.51.0
> >
signature.asc
Description: PGP signature
