On Wed, 18 Apr 2018 22:59:27 +0200 Daniel Borkmann <dan...@iogearbox.net> wrote:
> On 04/17/2018 06:48 AM, Eyal Birger wrote: > > This commit introduces a helper which allows fetching xfrm state > > parameters by eBPF programs attached to TC. > > > > Prototype: > > bpf_skb_get_xfrm_state(skb, index, xfrm_state, size, flags) > > > > skb: pointer to skb > > index: the index in the skb xfrm_state secpath array > > xfrm_state: pointer to 'struct bpf_xfrm_state' > > size: size of 'struct bpf_xfrm_state' > > flags: reserved for future extensions > > > > The helper returns 0 on success. Non zero if no xfrm state at the > > index is found - or non exists at all. > > > > struct bpf_xfrm_state currently includes the SPI, peer IPv4/IPv6 > > address and the reqid; it can be further extended by adding > > elements to its end - indicating the populated fields by the 'size' > > argument - keeping backwards compatibility. > > > > Typical usage: > > > > struct bpf_xfrm_state x = {}; > > bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0); > > ... > > > > Signed-off-by: Eyal Birger <eyal.bir...@gmail.com> > > Patch looks good to me, two comments below: Thanks! I incorporated your suggestions in v2. Eyal. > > > --- > > include/uapi/linux/bpf.h | 25 ++++++++++++++++++++++++- > > net/core/filter.c | 46 > > ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 > > insertions(+), 1 deletion(-) > > > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > index c5ec897..132e172 100644 > > --- a/include/uapi/linux/bpf.h > > +++ b/include/uapi/linux/bpf.h > > @@ -755,6 +755,15 @@ union bpf_attr { > > * @addr: pointer to struct sockaddr to bind socket to > > * @addr_len: length of sockaddr structure > > * Return: 0 on success or negative error code > > + * > > + * int bpf_skb_get_xfrm_state(skb, index, xfrm_state, size, flags) > > + * retrieve XFRM state > > + * @skb: pointer to skb > > + * @index: index of the xfrm state in the secpath > > + * @key: pointer to 'struct bpf_xfrm_state' > > + * @size: size of 'struct bpf_xfrm_state' > > + * @flags: room for future extensions > > + * Return: 0 on success or negative error > > */ > > #define __BPF_FUNC_MAPPER(FN) \ > > FN(unspec), \ > > @@ -821,7 +830,8 @@ union bpf_attr { > > FN(msg_apply_bytes), \ > > FN(msg_cork_bytes), \ > > FN(msg_pull_data), \ > > - FN(bind), > > + FN(bind), \ > > + FN(skb_get_xfrm_state), > > > > /* integer value in 'imm' field of BPF_CALL instruction selects > > which helper > > * function eBPF program intends to call > > @@ -927,6 +937,19 @@ struct bpf_tunnel_key { > > __u32 tunnel_label; > > }; > > > > +/* user accessible mirror of in-kernel xfrm_state. > > + * new fields can only be added to the end of this structure > > + */ > > +struct bpf_xfrm_state { > > + __u32 reqid; > > + __u32 spi; > > + __u16 family; > > + union { > > + __u32 remote_ipv4; > > + __u32 remote_ipv6[4]; > > + }; > > +}; > > + > > /* Generic BPF return codes which all BPF program types may > > support. > > * The values are binary compatible with their TC_ACT_* > > counter-part to > > * provide backwards compatibility with existing SCHED_CLS and > > SCHED_ACT diff --git a/net/core/filter.c b/net/core/filter.c > > index d31aff9..c06600a 100644 > > --- a/net/core/filter.c > > +++ b/net/core/filter.c > > @@ -57,6 +57,7 @@ > > #include <net/sock_reuseport.h> > > #include <net/busy_poll.h> > > #include <net/tcp.h> > > +#include <net/xfrm.h> > > #include <linux/bpf_trace.h> > > > > /** > > @@ -3703,6 +3704,49 @@ static const struct bpf_func_proto > > bpf_bind_proto = { .arg3_type = ARG_CONST_SIZE, > > }; > > > > +BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, > > index, > > + struct bpf_xfrm_state *, to, u32, size, u64, flags) > > +{ > > +#ifdef CONFIG_XFRM > > + const struct sec_path *sp = skb_sec_path(skb); > > + const struct xfrm_state *x; > > + > > + if (!sp || index >= sp->len) > > This should be something like: if (!sp || unlikely(index >= sp->len > || flags)) Such that we unconditionally bail out on any flags > currently, since this is reserved for future use and anything > non-zero would be invalid and rejected until we start extending it. > > > + goto err_clear; > > + > > + x = sp->xvec[index]; > > + > > + if (unlikely(size != sizeof(struct bpf_xfrm_state))) > > + goto err_clear; > > + > > + to->reqid = x->props.reqid; > > + to->spi = be32_to_cpu(x->id.spi); > > + to->family = x->props.family; > > + if (to->family == AF_INET6) { > > + memcpy(to->remote_ipv6, x->props.saddr.a6, > > + sizeof(to->remote_ipv6)); > > + } else { > > + to->remote_ipv4 = be32_to_cpu(x->props.saddr.a4); > > + } > > + > > + return 0; > > +err_clear: > > +#endif > > + memset(to, 0, size); > > + return -EINVAL; > > +} > > + > > +static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = { > > + .func = bpf_skb_get_xfrm_state, > > + .gpl_only = false, > > + .ret_type = RET_INTEGER, > > + .arg1_type = ARG_PTR_TO_CTX, > > + .arg2_type = ARG_ANYTHING, > > + .arg3_type = ARG_PTR_TO_UNINIT_MEM, > > + .arg4_type = ARG_CONST_SIZE, > > + .arg5_type = ARG_ANYTHING, > > +}; > > + > > static const struct bpf_func_proto * > > bpf_base_func_proto(enum bpf_func_id func_id) > > { > > @@ -3844,6 +3888,8 @@ tc_cls_act_func_proto(enum bpf_func_id > > func_id, const struct bpf_prog *prog) return > > &bpf_get_socket_cookie_proto; case BPF_FUNC_get_socket_uid: > > return &bpf_get_socket_uid_proto; > > + case BPF_FUNC_skb_get_xfrm_state: > > + return &bpf_skb_get_xfrm_state_proto; > > Potentially, on kernels with !CONFIG_XFRM, you might want to let the > program bail out at program verification phase already? Thus it would > become ... > > #ifdef CONFIG_XFRM > case BPF_FUNC_skb_get_xfrm_state: > return &bpf_skb_get_xfrm_state_proto; > #endif > > ... where you'd also wrap the helper + state_proto in CONFIG_XFRM. > > > default: > > return bpf_base_func_proto(func_id); > > } > > >