tc_tunnel only partially validated decap state and missed some tunnel cases. In particular, IPXIP decap checks were not exercised for IPIP/SIT paths, and non-GSO decap encapsulation state was not verified.
Tighten the test by: - setting DECAP_IPXIP4/6 flags for IPIP/SIT/IP6 decap paths based on the outer tunnel header family; - requiring needed DECAP enum values via CO-RE enum existence checks so missing kernel support fails fast; - validating post-decap tunnel state for both GSO and non-GSO packets: expected gso_type bits must be cleared and skb->encapsulation must match remaining tunnel flags; - removing forced TSO disable in the test harness so GSO validation is exercised. This improves coverage for decap tunnel-state regressions and ensures sit_none/ipip-style paths are checked correctly. Signed-off-by: Nick Hudson <[email protected]> --- .../selftests/bpf/prog_tests/test_tc_tunnel.c | 1 - .../selftests/bpf/progs/test_tc_tunnel.c | 91 +++++++++++++++++-- 2 files changed, 84 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c index 1aa7c9463980..67ba27d69347 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c @@ -438,7 +438,6 @@ static int setup(void) SYS(fail_close_ns_client, "ip link add %s type veth peer name %s", "veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1, "veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2); - SYS(fail_close_ns_client, "ethtool -K veth1 tso off"); SYS(fail_close_ns_client, "ip link set veth1 up"); nstoken_server = open_netns(SERVER_NS); if (!ASSERT_OK_PTR(nstoken_server, "open server ns")) diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c index 7376df405a6b..853bca962910 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c +++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c @@ -6,6 +6,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include <bpf/bpf_core_read.h> #include "bpf_tracing_net.h" #include "bpf_compiler.h" @@ -37,6 +38,22 @@ struct vxlanhdr___local { #define EXTPROTO_VXLAN 0x1 +#define SKB_GSO_UDP_TUNNEL_MASK (SKB_GSO_UDP_TUNNEL | \ + SKB_GSO_UDP_TUNNEL_CSUM) + +#define SKB_GSO_TUNNEL_MASK (SKB_GSO_UDP_TUNNEL_MASK | \ + SKB_GSO_GRE | \ + SKB_GSO_GRE_CSUM | \ + SKB_GSO_IPXIP4 | \ + SKB_GSO_IPXIP6 | \ + SKB_GSO_ESP) + +#define BPF_F_ADJ_ROOM_DECAP_L4_MASK (BPF_F_ADJ_ROOM_DECAP_L4_UDP | \ + BPF_F_ADJ_ROOM_DECAP_L4_GRE) + +#define BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK (BPF_F_ADJ_ROOM_DECAP_IPXIP4 | \ + BPF_F_ADJ_ROOM_DECAP_IPXIP6) + #define VXLAN_FLAGS bpf_htonl(1<<27) #define VNI_ID 1 #define VXLAN_VNI bpf_htonl(VNI_ID << 8) @@ -589,9 +606,12 @@ int __encap_ip6vxlan_eth(struct __sk_buff *skb) return TC_ACT_OK; } -static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) +static int decap_internal(struct __sk_buff *skb, int off, int len, char proto, + __u64 ipxip_flag) { __u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO; + struct sk_buff *kskb; + struct skb_shared_info *shinfo; struct ipv6_opt_hdr ip6_opt_hdr; struct gre_hdr greh; struct udphdr udph; @@ -599,10 +619,12 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) switch (proto) { case IPPROTO_IPIP: - flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4; + flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | + ipxip_flag; break; case IPPROTO_IPV6: - flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6; + flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6 | + ipxip_flag; break; case NEXTHDR_DEST: if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr, @@ -610,10 +632,12 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) return TC_ACT_OK; switch (ip6_opt_hdr.nexthdr) { case IPPROTO_IPIP: - flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4; + flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | + ipxip_flag; break; case IPPROTO_IPV6: - flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6; + flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6 | + ipxip_flag; break; default: return TC_ACT_OK; @@ -621,6 +645,11 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) break; case IPPROTO_GRE: olen += sizeof(struct gre_hdr); + if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags, + BPF_F_ADJ_ROOM_DECAP_L4_GRE)) + return TC_ACT_SHOT; + flags |= BPF_F_ADJ_ROOM_DECAP_L4_GRE; + if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0) return TC_ACT_OK; switch (bpf_ntohs(greh.protocol)) { @@ -634,6 +663,10 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) break; case IPPROTO_UDP: olen += sizeof(struct udphdr); + if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags, + BPF_F_ADJ_ROOM_DECAP_L4_UDP)) + return TC_ACT_SHOT; + flags |= BPF_F_ADJ_ROOM_DECAP_L4_UDP; if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0) return TC_ACT_OK; switch (bpf_ntohs(udph.dest)) { @@ -655,6 +688,40 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto) if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags)) return TC_ACT_SHOT; + kskb = bpf_cast_to_kern_ctx(skb); + shinfo = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info); + if (shinfo->gso_size) { + if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_UDP) && + (shinfo->gso_type & SKB_GSO_UDP_TUNNEL_MASK)) + return TC_ACT_SHOT; + + if ((flags & BPF_F_ADJ_ROOM_DECAP_L4_GRE) && + (shinfo->gso_type & (SKB_GSO_GRE | SKB_GSO_GRE_CSUM))) + return TC_ACT_SHOT; + + if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP4) && + (shinfo->gso_type & SKB_GSO_IPXIP4)) + return TC_ACT_SHOT; + + if ((flags & BPF_F_ADJ_ROOM_DECAP_IPXIP6) && + (shinfo->gso_type & SKB_GSO_IPXIP6)) + return TC_ACT_SHOT; + + if (flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK | + BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) { + if ((shinfo->gso_type & SKB_GSO_TUNNEL_MASK) && + !kskb->encapsulation) + return TC_ACT_SHOT; + if (!(shinfo->gso_type & SKB_GSO_TUNNEL_MASK) && + kskb->encapsulation) + return TC_ACT_SHOT; + } + } else if ((flags & (BPF_F_ADJ_ROOM_DECAP_L4_MASK | + BPF_F_ADJ_ROOM_DECAP_IPXIP_MASK)) && + kskb->encapsulation) { + return TC_ACT_SHOT; + } + return TC_ACT_OK; } @@ -662,6 +729,10 @@ static int decap_ipv4(struct __sk_buff *skb) { struct iphdr iph_outer; + if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags, + BPF_F_ADJ_ROOM_DECAP_IPXIP4)) + return TC_ACT_SHOT; + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, sizeof(iph_outer)) < 0) return TC_ACT_OK; @@ -670,19 +741,25 @@ static int decap_ipv4(struct __sk_buff *skb) return TC_ACT_OK; return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), - iph_outer.protocol); + iph_outer.protocol, + BPF_F_ADJ_ROOM_DECAP_IPXIP4); } static int decap_ipv6(struct __sk_buff *skb) { struct ipv6hdr iph_outer; + if (!bpf_core_enum_value_exists(enum bpf_adj_room_flags, + BPF_F_ADJ_ROOM_DECAP_IPXIP6)) + return TC_ACT_SHOT; + if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_outer, sizeof(iph_outer)) < 0) return TC_ACT_OK; return decap_internal(skb, ETH_HLEN, sizeof(iph_outer), - iph_outer.nexthdr); + iph_outer.nexthdr, + BPF_F_ADJ_ROOM_DECAP_IPXIP6); } SEC("tc") -- 2.34.1

