Re: [PATCH 1/1] xdp: Sample xdp program implementing ip forward
Sorry for the late reply. I will include the suggested changes in the next revision of the patch. Please see inline for clarifications and questions. Thanks, Christina From: Daniel Borkmann <dan...@iogearbox.net> Sent: Tuesday, October 3, 2017 9:24 PM To: Jacob, Christina; netdev@vger.kernel.org Cc: linux-ker...@vger.kernel.org; linux-arm-ker...@lists.infradead.org; alexei.starovoi...@gmail.com Subject: Re: [PATCH 1/1] xdp: Sample xdp program implementing ip forward >On 10/03/2017 09:37 AM, cjacob wrote: >> Implements port to port forwarding with route table and arp table >> lookup for ipv4 packets using bpf_redirect helper function and >> lpm_trie map. >> >> Signed-off-by: cjacob <christina.ja...@cavium.com> > >Thanks for the patch, just few minor comments below! > >Note, should be full name, e.g.: > > Signed-off-by: Christina Jacob <christina.ja...@cavium.com> > >Also you From: only shows 'cjacob' as can be seen from the cover letter >as well, so perhaps check your git settings to make that full name: > > cjacob (1): > xdp: Sample xdp program implementing ip forward > >If there's one single patch, then cover letter is not needed, only >for >1 sets. > >[...] >> +#define KBUILD_MODNAME "foo" >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include "bpf_helpers.h" >> +#include >> +#include >> + >> +struct trie_value { >> + __u8 prefix[4]; >> + long value; >> + int gw; >> + int ifindex; >> + int metric; >> +}; >> + >> +union key_4 { >> + u32 b32[2]; >> + u8 b8[8]; >> +}; >> + >> +struct arp_entry { >> + int dst; >> + long mac; >> +}; >> + >> +struct direct_map { >> + long mac; >> + int ifindex; >> + struct arp_entry arp; >> +}; >> + >> +/* Map for trie implementation*/ >> +struct bpf_map_def SEC("maps") lpm_map = { >> + .type = BPF_MAP_TYPE_LPM_TRIE, >> + .key_size = 8, >> + .value_size = >> + sizeof(struct trie_value), > >(Nit: there are couple of such breaks throughout the patch, can we > just use single line for such cases where reasonable?) > >> + .max_entries = 50, >> + .map_flags = BPF_F_NO_PREALLOC, >> +}; >> + >> +/* Map for counter*/ >> +struct bpf_map_def SEC("maps") rxcnt = { >> + .type = BPF_MAP_TYPE_PERCPU_ARRAY, >> + .key_size = sizeof(u32), >> + .value_size = sizeof(long), >> + .max_entries = 256, >> +}; >> + >> +/* Map for ARP table*/ >> +struct bpf_map_def SEC("maps") arp_table = { >> + .type = BPF_MAP_TYPE_HASH, >> + .key_size = sizeof(int), >> + .value_size = sizeof(long), > >Perhaps these should be proper structs here, such that it >becomes easier to read/handle later on lookup. > I am not clear about this. I am defining a ebpf map. I did not understand what structure you are refering to Am I missing something here?. >> + .max_entries = 50, >> +}; >> + >> +/* Map to keep the exact match entries in the route table*/ >> +struct bpf_map_def SEC("maps") exact_match = { >> + .type = BPF_MAP_TYPE_HASH, >> + .key_size = sizeof(int), >> + .value_size = sizeof(struct direct_map), >> + .max_entries = 50, >> +}; >> + >> +/** >> + * Function to set source and destination mac of the packet >> + */ >> +static inline void set_src_dst_mac(void *data, void *src, void *dst) >> +{ >> + unsigned short *p = data; >> + unsigned short *dest = dst; >> + unsigned short *source = src; >> + >> + p[3] = source[0]; >> + p[4] = source[1]; >> + p[5] = source[2]; >> + p[0] = dest[0]; >> + p[1] = dest[1]; >> + p[2] = dest[2]; > >You could just use __builtin_memcpy() given length is >constant anyway, so LLVM will do the inlining. > >> +} >> + >> +/** >> + * Parse IPV4 packet to get SRC, DST IP and protocol >> + */ >> +static inline int parse_ipv4(void *data, u64 nh_off, void *data_end, >> + unsigned int *src, unsigned int *dest) >> +{ >> + struct iphdr *iph = data + nh_off; >> + >> + if (iph + 1 > data_end) >> + return 0; >> + *src = (unsigned int)iph->saddr; >> + *dest = (unsigned in
Re: [PATCH 1/1] xdp: Sample xdp program implementing ip forward
On 10/3/17 12:37 AM, cjacob wrote: > diff --git a/samples/bpf/xdp3_kern.c b/samples/bpf/xdp3_kern.c > new file mode 100644 > index 000..62d905d > --- /dev/null > +++ b/samples/bpf/xdp3_kern.c > @@ -0,0 +1,204 @@ > +/* Copyright (c) 2016 PLUMgrid 2016 PLUMgrid? > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of version 2 of the GNU General Public > + * License as published by the Free Software Foundation. > + */ > +#define KBUILD_MODNAME "
Re: [PATCH 1/1] xdp: Sample xdp program implementing ip forward
On 10/03/2017 09:37 AM, cjacob wrote: Implements port to port forwarding with route table and arp table lookup for ipv4 packets using bpf_redirect helper function and lpm_trie map. Signed-off-by: cjacobThanks for the patch, just few minor comments below! Note, should be full name, e.g.: Signed-off-by: Christina Jacob Also you From: only shows 'cjacob' as can be seen from the cover letter as well, so perhaps check your git settings to make that full name: cjacob (1): xdp: Sample xdp program implementing ip forward If there's one single patch, then cover letter is not needed, only for >1 sets. [...] +#define KBUILD_MODNAME "foo" +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include +#include + +struct trie_value { + __u8 prefix[4]; + long value; + int gw; + int ifindex; + int metric; +}; + +union key_4 { + u32 b32[2]; + u8 b8[8]; +}; + +struct arp_entry { + int dst; + long mac; +}; + +struct direct_map { + long mac; + int ifindex; + struct arp_entry arp; +}; + +/* Map for trie implementation*/ +struct bpf_map_def SEC("maps") lpm_map = { + .type = BPF_MAP_TYPE_LPM_TRIE, + .key_size = 8, + .value_size = + sizeof(struct trie_value), (Nit: there are couple of such breaks throughout the patch, can we just use single line for such cases where reasonable?) + .max_entries = 50, + .map_flags = BPF_F_NO_PREALLOC, +}; + +/* Map for counter*/ +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +/* Map for ARP table*/ +struct bpf_map_def SEC("maps") arp_table = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(long), Perhaps these should be proper structs here, such that it becomes easier to read/handle later on lookup. + .max_entries = 50, +}; + +/* Map to keep the exact match entries in the route table*/ +struct bpf_map_def SEC("maps") exact_match = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct direct_map), + .max_entries = 50, +}; + +/** + * Function to set source and destination mac of the packet + */ +static inline void set_src_dst_mac(void *data, void *src, void *dst) +{ + unsigned short *p = data; + unsigned short *dest = dst; + unsigned short *source = src; + + p[3] = source[0]; + p[4] = source[1]; + p[5] = source[2]; + p[0] = dest[0]; + p[1] = dest[1]; + p[2] = dest[2]; You could just use __builtin_memcpy() given length is constant anyway, so LLVM will do the inlining. +} + +/** + * Parse IPV4 packet to get SRC, DST IP and protocol + */ +static inline int parse_ipv4(void *data, u64 nh_off, void *data_end, +unsigned int *src, unsigned int *dest) +{ + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + *src = (unsigned int)iph->saddr; + *dest = (unsigned int)iph->daddr; Why not stay with __be32 types? + return iph->protocol; +} + +SEC("xdp3") +int xdp_prog3(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP, forward_to; + long *value; + struct trie_value *prefix_value; + long *dest_mac = NULL, *src_mac = NULL; + u16 h_proto; + u64 nh_off; + u32 ipproto; + union key_4 key4; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + if (h_proto == htons(ETH_P_ARP)) { + return XDP_PASS; + } else if (h_proto == htons(ETH_P_IP)) { + int src_ip = 0, dest_ip = 0; + struct direct_map *direct_entry; + + ipproto = parse_ipv4(data, nh_off, data_end, _ip, _ip); + direct_entry = (struct direct_map *)bpf_map_lookup_elem + (_match, _ip); + /*check for exact match, this would give a faster lookup*/ + if (direct_entry && direct_entry->mac && + direct_entry->arp.mac) { + src_mac = _entry->mac; + dest_mac = _entry->arp.mac; + forward_to =
[PATCH 1/1] xdp: Sample xdp program implementing ip forward
Implements port to port forwarding with route table and arp table lookup for ipv4 packets using bpf_redirect helper function and lpm_trie map. Signed-off-by: cjacob--- samples/bpf/Makefile|4 + samples/bpf/xdp3_kern.c | 204 +++ samples/bpf/xdp3_user.c | 649 +++ 3 files changed, 857 insertions(+), 0 deletions(-) diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index cf17c79..cc9cc0b 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -28,6 +28,7 @@ hostprogs-y += test_cgrp2_sock hostprogs-y += test_cgrp2_sock2 hostprogs-y += xdp1 hostprogs-y += xdp2 +hostprogs-y += xdp3 hostprogs-y += test_current_task_under_cgroup hostprogs-y += trace_event hostprogs-y += sampleip @@ -73,6 +74,7 @@ test_cgrp2_sock2-objs := bpf_load.o $(LIBBPF) test_cgrp2_sock2.o xdp1-objs := bpf_load.o $(LIBBPF) xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o $(LIBBPF) xdp1_user.o +xdp3-objs := bpf_load.o $(LIBBPF) xdp3_user.o test_current_task_under_cgroup-objs := bpf_load.o $(LIBBPF) cgroup_helpers.o \ test_current_task_under_cgroup_user.o trace_event-objs := bpf_load.o $(LIBBPF) trace_event_user.o @@ -114,6 +116,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o always += xdp1_kern.o always += xdp2_kern.o +always += xdp3_kern.o always += test_current_task_under_cgroup_kern.o always += trace_event_kern.o always += sampleip_kern.o @@ -160,6 +163,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf +HOSTLOADLIBES_xdp3 += -lelf HOSTLOADLIBES_test_current_task_under_cgroup += -lelf HOSTLOADLIBES_trace_event += -lelf HOSTLOADLIBES_sampleip += -lelf diff --git a/samples/bpf/xdp3_kern.c b/samples/bpf/xdp3_kern.c new file mode 100644 index 000..62d905d --- /dev/null +++ b/samples/bpf/xdp3_kern.c @@ -0,0 +1,204 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" +#include +#include + +struct trie_value { + __u8 prefix[4]; + long value; + int gw; + int ifindex; + int metric; +}; + +union key_4 { + u32 b32[2]; + u8 b8[8]; +}; + +struct arp_entry { + int dst; + long mac; +}; + +struct direct_map { + long mac; + int ifindex; + struct arp_entry arp; +}; + +/* Map for trie implementation*/ +struct bpf_map_def SEC("maps") lpm_map = { + .type = BPF_MAP_TYPE_LPM_TRIE, + .key_size = 8, + .value_size = + sizeof(struct trie_value), + .max_entries = 50, + .map_flags = BPF_F_NO_PREALLOC, +}; + +/* Map for counter*/ +struct bpf_map_def SEC("maps") rxcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +/* Map for ARP table*/ +struct bpf_map_def SEC("maps") arp_table = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(long), + .max_entries = 50, +}; + +/* Map to keep the exact match entries in the route table*/ +struct bpf_map_def SEC("maps") exact_match = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(int), + .value_size = sizeof(struct direct_map), + .max_entries = 50, +}; + +/** + * Function to set source and destination mac of the packet + */ +static inline void set_src_dst_mac(void *data, void *src, void *dst) +{ + unsigned short *p = data; + unsigned short *dest = dst; + unsigned short *source = src; + + p[3] = source[0]; + p[4] = source[1]; + p[5] = source[2]; + p[0] = dest[0]; + p[1] = dest[1]; + p[2] = dest[2]; +} + +/** + * Parse IPV4 packet to get SRC, DST IP and protocol + */ +static inline int parse_ipv4(void *data, u64 nh_off, void *data_end, +unsigned int *src, unsigned int *dest) +{ + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + *src = (unsigned int)iph->saddr; + *dest = (unsigned int)iph->daddr; + return iph->protocol; +} + +SEC("xdp3") +int xdp_prog3(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP, forward_to; + long *value; + struct trie_value *prefix_value; + long *dest_mac = NULL, *src_mac = NULL; + u16 h_proto; + u64 nh_off; + u32 ipproto; + union key_4 key4;