Re: [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps

2018-10-07 Thread Mauricio Vasquez




On 10/04/2018 10:40 PM, Mauricio Vasquez wrote:



On 10/04/2018 06:57 PM, Alexei Starovoitov wrote:

On Thu, Oct 04, 2018 at 07:12:44PM +0200, Mauricio Vasquez B wrote:

Implement two new kind of maps that support the peek, push and pop
operations.

A use case for this is to keep track of a pool of elements, like
network ports in a SNAT.

Signed-off-by: Mauricio Vasquez B 
---
  include/linux/bpf.h   |    7 +
  include/linux/bpf_types.h |    2
  include/uapi/linux/bpf.h  |   35 -
  kernel/bpf/Makefile   |    2
  kernel/bpf/core.c |    3
  kernel/bpf/helpers.c  |   43 ++
  kernel/bpf/queue_stack_maps.c |  300 
+

  kernel/bpf/syscall.c  |   31 +++-
  kernel/bpf/verifier.c |   14 +-
  net/core/filter.c |    6 +
  10 files changed, 424 insertions(+), 19 deletions(-)
  create mode 100644 kernel/bpf/queue_stack_maps.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 98c7eeb6d138..cad3bc5cffd1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,9 @@ struct bpf_map_ops {
  int (*map_update_elem)(struct bpf_map *map, void *key, void 
*value, u64 flags);

  int (*map_delete_elem)(struct bpf_map *map, void *key);
  void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void 
*key);

+    int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+    int (*map_pop_elem)(struct bpf_map *map, void *value);
+    int (*map_peek_elem)(struct bpf_map *map, void *value);
    /* funcs called by prog_array and perf_event_array map */
  void *(*map_fd_get_ptr)(struct bpf_map *map, struct file 
*map_file,

@@ -139,6 +142,7 @@ enum bpf_arg_type {
  ARG_CONST_MAP_PTR,    /* const argument used as pointer to 
bpf_map */

  ARG_PTR_TO_MAP_KEY,    /* pointer to stack used as map key */
  ARG_PTR_TO_MAP_VALUE,    /* pointer to stack used as map value */
+    ARG_PTR_TO_UNINIT_MAP_VALUE,    /* pointer to valid memory used 
to store a map value */
    /* the following constraints used to prototype bpf_memcmp() 
and other

   * functions that access data on eBPF program stack
@@ -825,6 +829,9 @@ static inline int 
bpf_fd_reuseport_array_update_elem(struct bpf_map *map,

  extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
  extern const struct bpf_func_proto bpf_map_update_elem_proto;
  extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
    extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
  extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 658509daacd4..a2ec73aa1ec7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
  BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
  #endif
  #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3bb94aa2d408..bfa042273fad 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -129,6 +129,8 @@ enum bpf_map_type {
  BPF_MAP_TYPE_CGROUP_STORAGE,
  BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
  BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+    BPF_MAP_TYPE_QUEUE,
+    BPF_MAP_TYPE_STACK,
  };
    enum bpf_prog_type {
@@ -463,6 +465,28 @@ union bpf_attr {
   * Return
   * 0 on success, or a negative error in case of failure.
   *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, 
u64 flags)

+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is 
removed to

+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
   * int bpf_probe_read(void *dst, u32 size, const void *src)
   * Description
   * For tracing programs, safely attempt to read *size* 
bytes from

@@ -790,14 +814,14 @@ union bpf_attr {
   *
   * int ret;
   * struct bpf_tunnel_key key = {};
- *
+ *
   * ret = bpf_skb_get_tunnel_key(skb, , 
sizeof(key), 0);

   * if (ret < 0)
   * return TC_ACT_SHOT;    // drop 

Re: [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps

2018-10-05 Thread Alexei Starovoitov
On Thu, Oct 04, 2018 at 10:40:55PM -0500, Mauricio Vasquez wrote:
> 
> > > + /* Round up queue size to nearest power of 2 */
> > > + max_entries = index_mask + 1;
> > what's the point of roundup ?
> 
> If the size of the buffer is power of two we can wrap the indexes with an
> AND operation instead of MOD.
> 
> > The memory waste becomes quite large when max_entries are high.
> Yes, you are right, we have the different choices described below.
> 
> > 
> > If queue/stack is sized to exact max_entries,
> > then 'count' can be removed too, right?
> 
> If we don't use 'count' and we want to use the AND operation for wrapping
> indexes, the max entries should be 2^ - 1  because a slot is lost to
> distinguish between full/empty queue/stack.
> 
> Just to summarize, we have these options:
> 1. Allow any size, round up, use the AND operation and 'count' (current).
> 2. Allow only power of 2 sizes, use the AND operation and 'count'.
> 3. Allow any size, no roundup, use the MOD operation and leaving an empty
> slot.
> 
> I prefer 1 or 2, but I don't have a strong opinion, maybe allowing only
> power of two max entries could be too limiting.
> Another consideration: is this really too bad to waste memory when user
> requires a size far away of the next power of 2?

I think there is 4th option. Neither AND nor MOD is necessary.
Pls take a look at ptr_ring implementation.



Re: [RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps

2018-10-04 Thread Alexei Starovoitov
On Thu, Oct 04, 2018 at 07:12:44PM +0200, Mauricio Vasquez B wrote:
> Implement two new kind of maps that support the peek, push and pop
> operations.
> 
> A use case for this is to keep track of a pool of elements, like
> network ports in a SNAT.
> 
> Signed-off-by: Mauricio Vasquez B 
> ---
>  include/linux/bpf.h   |7 +
>  include/linux/bpf_types.h |2 
>  include/uapi/linux/bpf.h  |   35 -
>  kernel/bpf/Makefile   |2 
>  kernel/bpf/core.c |3 
>  kernel/bpf/helpers.c  |   43 ++
>  kernel/bpf/queue_stack_maps.c |  300 
> +
>  kernel/bpf/syscall.c  |   31 +++-
>  kernel/bpf/verifier.c |   14 +-
>  net/core/filter.c |6 +
>  10 files changed, 424 insertions(+), 19 deletions(-)
>  create mode 100644 kernel/bpf/queue_stack_maps.c
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 98c7eeb6d138..cad3bc5cffd1 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -40,6 +40,9 @@ struct bpf_map_ops {
>   int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
> flags);
>   int (*map_delete_elem)(struct bpf_map *map, void *key);
>   void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
> + int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
> + int (*map_pop_elem)(struct bpf_map *map, void *value);
> + int (*map_peek_elem)(struct bpf_map *map, void *value);
>  
>   /* funcs called by prog_array and perf_event_array map */
>   void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
> @@ -139,6 +142,7 @@ enum bpf_arg_type {
>   ARG_CONST_MAP_PTR,  /* const argument used as pointer to bpf_map */
>   ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
>   ARG_PTR_TO_MAP_VALUE,   /* pointer to stack used as map value */
> + ARG_PTR_TO_UNINIT_MAP_VALUE,/* pointer to valid memory used to 
> store a map value */
>  
>   /* the following constraints used to prototype bpf_memcmp() and other
>* functions that access data on eBPF program stack
> @@ -825,6 +829,9 @@ static inline int 
> bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
>  extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
>  extern const struct bpf_func_proto bpf_map_update_elem_proto;
>  extern const struct bpf_func_proto bpf_map_delete_elem_proto;
> +extern const struct bpf_func_proto bpf_map_push_elem_proto;
> +extern const struct bpf_func_proto bpf_map_pop_elem_proto;
> +extern const struct bpf_func_proto bpf_map_peek_elem_proto;
>  
>  extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
>  extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index 658509daacd4..a2ec73aa1ec7 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
>  BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
>  #endif
>  #endif
> +BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
> +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 3bb94aa2d408..bfa042273fad 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -129,6 +129,8 @@ enum bpf_map_type {
>   BPF_MAP_TYPE_CGROUP_STORAGE,
>   BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
>   BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
> + BPF_MAP_TYPE_QUEUE,
> + BPF_MAP_TYPE_STACK,
>  };
>  
>  enum bpf_prog_type {
> @@ -463,6 +465,28 @@ union bpf_attr {
>   *   Return
>   *   0 on success, or a negative error in case of failure.
>   *
> + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
> + *   Description
> + *   Push an element *value* in *map*. *flags* is one of:
> + *
> + *   **BPF_EXIST**
> + *   If the queue/stack is full, the oldest element is removed to
> + *   make room for this.
> + *   Return
> + *   0 on success, or a negative error in case of failure.
> + *
> + * int bpf_map_pop_elem(struct bpf_map *map, void *value)
> + *   Description
> + *   Pop an element from *map*.
> + * Return
> + *   0 on success, or a negative error in case of failure.
> + *
> + * int bpf_map_peek_elem(struct bpf_map *map, void *value)
> + *   Description
> + *   Get an element from *map* without removing it.
> + * Return
> + *   0 on success, or a negative error in case of failure.
> + *
>   * int bpf_probe_read(void *dst, u32 size, const void *src)
>   *   Description
>   *   For tracing programs, safely attempt to read *size* bytes from
> @@ -790,14 +814,14 @@ union bpf_attr {
>   *
>   *   int ret;
>   *   struct bpf_tunnel_key key = {};
> - *   
> + *
>   *   ret = 

[RFC PATCH bpf-next v4 4/7] bpf: add bpf queue and stack maps

2018-10-04 Thread Mauricio Vasquez B
Implement two new kind of maps that support the peek, push and pop
operations.

A use case for this is to keep track of a pool of elements, like
network ports in a SNAT.

Signed-off-by: Mauricio Vasquez B 
---
 include/linux/bpf.h   |7 +
 include/linux/bpf_types.h |2 
 include/uapi/linux/bpf.h  |   35 -
 kernel/bpf/Makefile   |2 
 kernel/bpf/core.c |3 
 kernel/bpf/helpers.c  |   43 ++
 kernel/bpf/queue_stack_maps.c |  300 +
 kernel/bpf/syscall.c  |   31 +++-
 kernel/bpf/verifier.c |   14 +-
 net/core/filter.c |6 +
 10 files changed, 424 insertions(+), 19 deletions(-)
 create mode 100644 kernel/bpf/queue_stack_maps.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 98c7eeb6d138..cad3bc5cffd1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -40,6 +40,9 @@ struct bpf_map_ops {
int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 
flags);
int (*map_delete_elem)(struct bpf_map *map, void *key);
void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
+   int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
+   int (*map_pop_elem)(struct bpf_map *map, void *value);
+   int (*map_peek_elem)(struct bpf_map *map, void *value);
 
/* funcs called by prog_array and perf_event_array map */
void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
@@ -139,6 +142,7 @@ enum bpf_arg_type {
ARG_CONST_MAP_PTR,  /* const argument used as pointer to bpf_map */
ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
ARG_PTR_TO_MAP_VALUE,   /* pointer to stack used as map value */
+   ARG_PTR_TO_UNINIT_MAP_VALUE,/* pointer to valid memory used to 
store a map value */
 
/* the following constraints used to prototype bpf_memcmp() and other
 * functions that access data on eBPF program stack
@@ -825,6 +829,9 @@ static inline int bpf_fd_reuseport_array_update_elem(struct 
bpf_map *map,
 extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
+extern const struct bpf_func_proto bpf_map_push_elem_proto;
+extern const struct bpf_func_proto bpf_map_pop_elem_proto;
+extern const struct bpf_func_proto bpf_map_peek_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 658509daacd4..a2ec73aa1ec7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
 #endif
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3bb94aa2d408..bfa042273fad 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -129,6 +129,8 @@ enum bpf_map_type {
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+   BPF_MAP_TYPE_QUEUE,
+   BPF_MAP_TYPE_STACK,
 };
 
 enum bpf_prog_type {
@@ -463,6 +465,28 @@ union bpf_attr {
  * Return
  * 0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * Description
+ * Push an element *value* in *map*. *flags* is one of:
+ *
+ * **BPF_EXIST**
+ * If the queue/stack is full, the oldest element is removed to
+ * make room for this.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * Description
+ * Pop an element from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * Description
+ * Get an element from *map* without removing it.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * Description
  * For tracing programs, safely attempt to read *size* bytes from
@@ -790,14 +814,14 @@ union bpf_attr {
  *
  * int ret;
  * struct bpf_tunnel_key key = {};
- * 
+ *
  * ret = bpf_skb_get_tunnel_key(skb, , sizeof(key), 0);
  * if (ret < 0)
  * return TC_ACT_SHOT; // drop packet
- * 
+ *
  * if