From: Petar Penkov <ppen...@google.com>

This eBPF program extracts basic/control/ip address/ports keys from
incoming packets. It supports recursive parsing for IP encapsulation,
and VLAN, along with IPv4/IPv6 and extension headers.  This program is
meant to show how flow dissection and key extraction can be done in
eBPF.

Link: http://vger.kernel.org/netconf2017_files/rx_hardening_and_udp_gso.pdf
Signed-off-by: Petar Penkov <ppen...@google.com>
Signed-off-by: Willem de Bruijn <will...@google.com>
---
 tools/testing/selftests/bpf/Makefile   |   2 +-
 tools/testing/selftests/bpf/bpf_flow.c | 373 +++++++++++++++++++++++++
 2 files changed, 374 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/bpf_flow.c

diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index fff7fb1285fc..e65f50f9185e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -35,7 +35,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o 
test_tcp_estats.o test
        test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
        test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o 
test_lirc_mode2_kern.o \
        get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
-       test_skb_cgroup_id_kern.o
+       test_skb_cgroup_id_kern.o bpf_flow.o
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
diff --git a/tools/testing/selftests/bpf/bpf_flow.c 
b/tools/testing/selftests/bpf/bpf_flow.c
new file mode 100644
index 000000000000..5fb809d95867
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_flow.c
@@ -0,0 +1,373 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <limits.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/if_packet.h>
+#include <sys/socket.h>
+#include <linux/if_tunnel.h>
+#include <linux/mpls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+#define PROG(F) SEC(#F) int bpf_func_##F
+
+/* These are the identifiers of the BPF programs that will be used in tail
+ * calls. Name is limited to 16 characters, with the terminating character and
+ * bpf_func_ above, we have only 6 to work with, anything after will be 
cropped.
+ */
+enum {
+       IP,
+       IPV6,
+       IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
+       IPV6FR, /* Fragmentation IPv6 Extension Header */
+       MPLS,
+       VLAN,
+};
+
+#define IP_MF          0x2000
+#define IP_OFFSET      0x1FFF
+#define IP6_MF         0x0001
+#define IP6_OFFSET     0xFFF8
+
+struct vlan_hdr {
+       __be16 h_vlan_TCI;
+       __be16 h_vlan_encapsulated_proto;
+};
+
+struct gre_hdr {
+       __be16 flags;
+       __be16 proto;
+};
+
+struct frag_hdr {
+       __u8 nexthdr;
+       __u8 reserved;
+       __be16 frag_off;
+       __be32 identification;
+};
+
+struct bpf_map_def SEC("maps") jmp_table = {
+       .type = BPF_MAP_TYPE_PROG_ARRAY,
+       .key_size = sizeof(__u32),
+       .value_size = sizeof(__u32),
+       .max_entries = 8
+};
+
+static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb,
+                                                        __u16 hdr_size,
+                                                        void *buffer)
+{
+       void *data_end = (void *)(long)skb->data_end;
+       void *data = (void *)(long)skb->data;
+       __u16 nhoff = skb->flow_keys->nhoff;
+       __u8 *hdr;
+
+       /* Verifies this variable offset does not overflow */
+       if (nhoff > (USHRT_MAX - hdr_size))
+               return NULL;
+
+       hdr = data + nhoff;
+       if (hdr + hdr_size <= data_end)
+               return hdr;
+
+       if (bpf_skb_load_bytes(skb, nhoff, buffer, hdr_size))
+               return NULL;
+
+       return buffer;
+}
+
+/* Dispatches on ETHERTYPE */
+static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
+{
+       struct bpf_flow_keys *keys = skb->flow_keys;
+
+       keys->n_proto = proto;
+       switch (proto) {
+       case bpf_htons(ETH_P_IP):
+               bpf_tail_call(skb, &jmp_table, IP);
+               break;
+       case bpf_htons(ETH_P_IPV6):
+               bpf_tail_call(skb, &jmp_table, IPV6);
+               break;
+       case bpf_htons(ETH_P_MPLS_MC):
+       case bpf_htons(ETH_P_MPLS_UC):
+               bpf_tail_call(skb, &jmp_table, MPLS);
+               break;
+       case bpf_htons(ETH_P_8021Q):
+       case bpf_htons(ETH_P_8021AD):
+               bpf_tail_call(skb, &jmp_table, VLAN);
+               break;
+       default:
+               /* Protocol not supported */
+               return BPF_DROP;
+       }
+
+       return BPF_DROP;
+}
+
+SEC("dissect")
+int dissect(struct __sk_buff *skb)
+{
+       if (!skb->vlan_present)
+               return parse_eth_proto(skb, skb->protocol);
+       else
+               return parse_eth_proto(skb, skb->vlan_proto);
+}
+
+/* Parses on IPPROTO_* */
+static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto)
+{
+       struct bpf_flow_keys *keys = skb->flow_keys;
+       void *data_end = (void *)(long)skb->data_end;
+       struct icmphdr *icmp, _icmp;
+       struct gre_hdr *gre, _gre;
+       struct ethhdr *eth, _eth;
+       struct tcphdr *tcp, _tcp;
+       struct udphdr *udp, _udp;
+
+       keys->ip_proto = proto;
+       switch (proto) {
+       case IPPROTO_ICMP:
+               icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp);
+               if (!icmp)
+                       return BPF_DROP;
+               return BPF_OK;
+       case IPPROTO_IPIP:
+               keys->is_encap = true;
+               return parse_eth_proto(skb, bpf_htons(ETH_P_IP));
+       case IPPROTO_IPV6:
+               keys->is_encap = true;
+               return parse_eth_proto(skb, bpf_htons(ETH_P_IPV6));
+       case IPPROTO_GRE:
+               gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre);
+               if (!gre)
+                       return BPF_DROP;
+
+               if (bpf_htons(gre->flags & GRE_VERSION))
+                       /* Only inspect standard GRE packets with version 0 */
+                       return BPF_OK;
+
+               keys->nhoff += sizeof(*gre); /* Step over GRE Flags and Proto */
+               if (GRE_IS_CSUM(gre->flags))
+                       keys->nhoff += 4; /* Step over chksum and Padding */
+               if (GRE_IS_KEY(gre->flags))
+                       keys->nhoff += 4; /* Step over key */
+               if (GRE_IS_SEQ(gre->flags))
+                       keys->nhoff += 4; /* Step over sequence number */
+
+               keys->is_encap = true;
+
+               if (gre->proto == bpf_htons(ETH_P_TEB)) {
+                       eth = bpf_flow_dissect_get_header(skb, sizeof(*eth),
+                                                         &_eth);
+                       if (!eth)
+                               return BPF_DROP;
+
+                       keys->nhoff += sizeof(*eth);
+
+                       return parse_eth_proto(skb, eth->h_proto);
+               } else {
+                       return parse_eth_proto(skb, gre->proto);
+               }
+       case IPPROTO_TCP:
+               tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp);
+               if (!tcp)
+                       return BPF_DROP;
+
+               if (tcp->doff < 5)
+                       return BPF_DROP;
+
+               if ((__u8 *)tcp + (tcp->doff << 2) > data_end)
+                       return BPF_DROP;
+
+               keys->thoff = keys->nhoff;
+               keys->sport = tcp->source;
+               keys->dport = tcp->dest;
+               return BPF_OK;
+       case IPPROTO_UDP:
+       case IPPROTO_UDPLITE:
+               udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp);
+               if (!udp)
+                       return BPF_DROP;
+
+               keys->thoff = keys->nhoff;
+               keys->sport = udp->source;
+               keys->dport = udp->dest;
+               return BPF_OK;
+       default:
+               return BPF_DROP;
+       }
+
+       return BPF_DROP;
+}
+
+static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 
nexthdr)
+{
+       struct bpf_flow_keys *keys = skb->flow_keys;
+
+       keys->ip_proto = nexthdr;
+       switch (nexthdr) {
+       case IPPROTO_HOPOPTS:
+       case IPPROTO_DSTOPTS:
+               bpf_tail_call(skb, &jmp_table, IPV6OP);
+               break;
+       case IPPROTO_FRAGMENT:
+               bpf_tail_call(skb, &jmp_table, IPV6FR);
+               break;
+       default:
+               return parse_ip_proto(skb, nexthdr);
+       }
+
+       return BPF_DROP;
+}
+
+PROG(IP)(struct __sk_buff *skb)
+{
+       void *data_end = (void *)(long)skb->data_end;
+       struct bpf_flow_keys *keys = skb->flow_keys;
+       void *data = (void *)(long)skb->data;
+       struct iphdr *iph, _iph;
+       bool done = false;
+
+       iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
+       if (!iph)
+               return BPF_DROP;
+
+       /* IP header cannot be smaller than 20 bytes */
+       if (iph->ihl < 5)
+               return BPF_DROP;
+
+       keys->addr_proto = ETH_P_IP;
+       keys->ipv4_src = iph->saddr;
+       keys->ipv4_dst = iph->daddr;
+
+       keys->nhoff += iph->ihl << 2;
+       if (data + keys->nhoff > data_end)
+               return BPF_DROP;
+
+       if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) {
+               keys->is_frag = true;
+               if (iph->frag_off & bpf_htons(IP_OFFSET))
+                       /* From second fragment on, packets do not have headers
+                        * we can parse.
+                        */
+                       done = true;
+               else
+                       keys->is_first_frag = true;
+       }
+
+       if (done)
+               return BPF_OK;
+
+       return parse_ip_proto(skb, iph->protocol);
+}
+
+PROG(IPV6)(struct __sk_buff *skb)
+{
+       struct bpf_flow_keys *keys = skb->flow_keys;
+       struct ipv6hdr *ip6h, _ip6h;
+
+       ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+       if (!ip6h)
+               return BPF_DROP;
+
+       keys->addr_proto = ETH_P_IPV6;
+       memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr));
+
+       keys->nhoff += sizeof(struct ipv6hdr);
+
+       return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6OP)(struct __sk_buff *skb)
+{
+       struct ipv6_opt_hdr *ip6h, _ip6h;
+
+       ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h);
+       if (!ip6h)
+               return BPF_DROP;
+
+       /* hlen is in 8-octets and does not include the first 8 bytes
+        * of the header
+        */
+       skb->flow_keys->nhoff += (1 + ip6h->hdrlen) << 3;
+
+       return parse_ipv6_proto(skb, ip6h->nexthdr);
+}
+
+PROG(IPV6FR)(struct __sk_buff *skb)
+{
+       struct bpf_flow_keys *keys = skb->flow_keys;
+       struct frag_hdr *fragh, _fragh;
+
+       fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh);
+       if (!fragh)
+               return BPF_DROP;
+
+       keys->nhoff += sizeof(*fragh);
+       keys->is_frag = true;
+       if (!(fragh->frag_off & bpf_htons(IP6_OFFSET)))
+               keys->is_first_frag = true;
+
+       return parse_ipv6_proto(skb, fragh->nexthdr);
+}
+
+PROG(MPLS)(struct __sk_buff *skb)
+{
+       struct mpls_label *mpls, _mpls;
+
+       mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls);
+       if (!mpls)
+               return BPF_DROP;
+
+       return BPF_OK;
+}
+
+PROG(VLAN)(struct __sk_buff *skb)
+{
+       struct bpf_flow_keys *keys = skb->flow_keys;
+       struct vlan_hdr *vlan, _vlan;
+       __be16 proto;
+
+       /* Peek back to see if single or double-tagging */
+       if (bpf_skb_load_bytes(skb, keys->nhoff - sizeof(proto), &proto,
+                              sizeof(proto)))
+               return BPF_DROP;
+
+       /* Account for double-tagging */
+       if (proto == bpf_htons(ETH_P_8021AD)) {
+               vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+               if (!vlan)
+                       return BPF_DROP;
+
+               if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q))
+                       return BPF_DROP;
+
+               keys->nhoff += sizeof(*vlan);
+       }
+
+       vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan);
+       if (!vlan)
+               return BPF_DROP;
+
+       keys->nhoff += sizeof(*vlan);
+       /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/
+       if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) ||
+           vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q))
+               return BPF_DROP;
+
+       return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto);
+}
+
+char __license[] SEC("license") = "GPL";
-- 
2.19.0.397.gdd90340f6a-goog

Reply via email to