Re: [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps
On 10/04/2018 10:40 PM, Mauricio Vasquez wrote: On 10/04/2018 06:57 PM, Alexei Starovoitov wrote: On Thu, Oct 04, 2018 at 07:12:44PM +0200, Mauricio Vasquez B wrote: Implement two new kind of maps that support the peek, push and pop operations. A use case for this is to keep track of a pool of elements, like network ports in a SNAT. Signed-off-by: Mauricio Vasquez B --- include/linux/bpf.h | 7 + include/linux/bpf_types.h | 2 include/uapi/linux/bpf.h | 35 - kernel/bpf/Makefile | 2 kernel/bpf/core.c | 3 kernel/bpf/helpers.c | 43 ++ kernel/bpf/queue_stack_maps.c | 300 + kernel/bpf/syscall.c | 31 +++- kernel/bpf/verifier.c | 14 +- net/core/filter.c | 6 + 10 files changed, 424 insertions(+), 19 deletions(-) create mode 100644 kernel/bpf/queue_stack_maps.c diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 98c7eeb6d138..cad3bc5cffd1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -40,6 +40,9 @@ struct bpf_map_ops { int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key); + int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); + int (*map_pop_elem)(struct bpf_map *map, void *value); + int (*map_peek_elem)(struct bpf_map *map, void *value); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -139,6 +142,7 @@ enum bpf_arg_type { ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ + ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack @@ -825,6 +829,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_map_push_elem_proto; +extern const struct bpf_func_proto bpf_map_pop_elem_proto; +extern const struct bpf_func_proto bpf_map_peek_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 658509daacd4..a2ec73aa1ec7 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) #endif #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3bb94aa2d408..bfa042273fad 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -129,6 +129,8 @@ enum bpf_map_type { BPF_MAP_TYPE_CGROUP_STORAGE, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, }; enum bpf_prog_type { @@ -463,6 +465,28 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is removed to + * make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -790,14 +814,14 @@ union bpf_attr { * * int ret; * struct bpf_tunnel_key key = {}; - * + * * ret = bpf_skb_get_tunnel_key(skb, , sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop
Re: [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps
On Thu, Oct 04, 2018 at 10:40:55PM -0500, Mauricio Vasquez wrote: > > > > + /* Round up queue size to nearest power of 2 */ > > > + max_entries = index_mask + 1; > > what's the point of roundup ? > > If the size of the buffer is power of two we can wrap the indexes with an > AND operation instead of MOD. > > > The memory waste becomes quite large when max_entries are high. > Yes, you are right, we have the different choices described below. > > > > > If queue/stack is sized to exact max_entries, > > then 'count' can be removed too, right? > > If we don't use 'count' and we want to use the AND operation for wrapping > indexes, the max entries should be 2^ - 1 because a slot is lost to > distinguish between full/empty queue/stack. > > Just to summarize, we have these options: > 1. Allow any size, round up, use the AND operation and 'count' (current). > 2. Allow only power of 2 sizes, use the AND operation and 'count'. > 3. Allow any size, no roundup, use the MOD operation and leaving an empty > slot. > > I prefer 1 or 2, but I don't have a strong opinion, maybe allowing only > power of two max entries could be too limiting. > Another consideration: is this really too bad to waste memory when user > requires a size far away of the next power of 2? I think there is 4th option. Neither AND nor MOD is necessary. Pls take a look at ptr_ring implementation.
Re: [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps
On Thu, Oct 04, 2018 at 07:12:44PM +0200, Mauricio Vasquez B wrote: > Implement two new kind of maps that support the peek, push and pop > operations. > > A use case for this is to keep track of a pool of elements, like > network ports in a SNAT. > > Signed-off-by: Mauricio Vasquez B > --- > include/linux/bpf.h |7 + > include/linux/bpf_types.h |2 > include/uapi/linux/bpf.h | 35 - > kernel/bpf/Makefile |2 > kernel/bpf/core.c |3 > kernel/bpf/helpers.c | 43 ++ > kernel/bpf/queue_stack_maps.c | 300 > + > kernel/bpf/syscall.c | 31 +++- > kernel/bpf/verifier.c | 14 +- > net/core/filter.c |6 + > 10 files changed, 424 insertions(+), 19 deletions(-) > create mode 100644 kernel/bpf/queue_stack_maps.c > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 98c7eeb6d138..cad3bc5cffd1 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -40,6 +40,9 @@ struct bpf_map_ops { > int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 > flags); > int (*map_delete_elem)(struct bpf_map *map, void *key); > void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key); > + int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); > + int (*map_pop_elem)(struct bpf_map *map, void *value); > + int (*map_peek_elem)(struct bpf_map *map, void *value); > > /* funcs called by prog_array and perf_event_array map */ > void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, > @@ -139,6 +142,7 @@ enum bpf_arg_type { > ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ > ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ > ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ > + ARG_PTR_TO_UNINIT_MAP_VALUE,/* pointer to valid memory used to > store a map value */ > > /* the following constraints used to prototype bpf_memcmp() and other >* functions that access data on eBPF program stack > @@ -825,6 +829,9 @@ static inline int > bpf_fd_reuseport_array_update_elem(struct bpf_map *map, > extern const struct bpf_func_proto bpf_map_lookup_elem_proto; > extern const struct bpf_func_proto bpf_map_update_elem_proto; > extern const struct bpf_func_proto bpf_map_delete_elem_proto; > +extern const struct bpf_func_proto bpf_map_push_elem_proto; > +extern const struct bpf_func_proto bpf_map_pop_elem_proto; > +extern const struct bpf_func_proto bpf_map_peek_elem_proto; > > extern const struct bpf_func_proto bpf_get_prandom_u32_proto; > extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h > index 658509daacd4..a2ec73aa1ec7 100644 > --- a/include/linux/bpf_types.h > +++ b/include/linux/bpf_types.h > @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) > BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) > #endif > #endif > +BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) > +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 3bb94aa2d408..bfa042273fad 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -129,6 +129,8 @@ enum bpf_map_type { > BPF_MAP_TYPE_CGROUP_STORAGE, > BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, > BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, > + BPF_MAP_TYPE_QUEUE, > + BPF_MAP_TYPE_STACK, > }; > > enum bpf_prog_type { > @@ -463,6 +465,28 @@ union bpf_attr { > * Return > * 0 on success, or a negative error in case of failure. > * > + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) > + * Description > + * Push an element *value* in *map*. *flags* is one of: > + * > + * **BPF_EXIST** > + * If the queue/stack is full, the oldest element is removed to > + * make room for this. > + * Return > + * 0 on success, or a negative error in case of failure. > + * > + * int bpf_map_pop_elem(struct bpf_map *map, void *value) > + * Description > + * Pop an element from *map*. > + * Return > + * 0 on success, or a negative error in case of failure. > + * > + * int bpf_map_peek_elem(struct bpf_map *map, void *value) > + * Description > + * Get an element from *map* without removing it. > + * Return > + * 0 on success, or a negative error in case of failure. > + * > * int bpf_probe_read(void *dst, u32 size, const void *src) > * Description > * For tracing programs, safely attempt to read *size* bytes from > @@ -790,14 +814,14 @@ union bpf_attr { > * > * int ret; > * struct bpf_tunnel_key key = {}; > - * > + * > * ret =
[RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps
Implement two new kind of maps that support the peek, push and pop operations. A use case for this is to keep track of a pool of elements, like network ports in a SNAT. Signed-off-by: Mauricio Vasquez B --- include/linux/bpf.h |7 + include/linux/bpf_types.h |2 include/uapi/linux/bpf.h | 35 - kernel/bpf/Makefile |2 kernel/bpf/core.c |3 kernel/bpf/helpers.c | 43 ++ kernel/bpf/queue_stack_maps.c | 300 + kernel/bpf/syscall.c | 31 +++- kernel/bpf/verifier.c | 14 +- net/core/filter.c |6 + 10 files changed, 424 insertions(+), 19 deletions(-) create mode 100644 kernel/bpf/queue_stack_maps.c diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 98c7eeb6d138..cad3bc5cffd1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -40,6 +40,9 @@ struct bpf_map_ops { int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags); int (*map_delete_elem)(struct bpf_map *map, void *key); void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key); + int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags); + int (*map_pop_elem)(struct bpf_map *map, void *value); + int (*map_peek_elem)(struct bpf_map *map, void *value); /* funcs called by prog_array and perf_event_array map */ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, @@ -139,6 +142,7 @@ enum bpf_arg_type { ARG_CONST_MAP_PTR, /* const argument used as pointer to bpf_map */ ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ + ARG_PTR_TO_UNINIT_MAP_VALUE,/* pointer to valid memory used to store a map value */ /* the following constraints used to prototype bpf_memcmp() and other * functions that access data on eBPF program stack @@ -825,6 +829,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct bpf_map *map, extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; +extern const struct bpf_func_proto bpf_map_push_elem_proto; +extern const struct bpf_func_proto bpf_map_pop_elem_proto; +extern const struct bpf_func_proto bpf_map_peek_elem_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 658509daacd4..a2ec73aa1ec7 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) #endif #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3bb94aa2d408..bfa042273fad 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -129,6 +129,8 @@ enum bpf_map_type { BPF_MAP_TYPE_CGROUP_STORAGE, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + BPF_MAP_TYPE_QUEUE, + BPF_MAP_TYPE_STACK, }; enum bpf_prog_type { @@ -463,6 +465,28 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is removed to + * make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * Description + * Pop an element from *map*. + * Return + * 0 on success, or a negative error in case of failure. + * + * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * Description + * Get an element from *map* without removing it. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -790,14 +814,14 @@ union bpf_attr { * * int ret; * struct bpf_tunnel_key key = {}; - * + * * ret = bpf_skb_get_tunnel_key(skb, , sizeof(key), 0); * if (ret < 0) * return TC_ACT_SHOT; // drop packet - * + * * if