On Mon, Aug 03, 2020 at 06:46:51PM +0200, KP Singh wrote:
> From: KP Singh <[email protected]>
> 
> Refactor the functionality in bpf_sk_storage.c so that concept of
> storage linked to kernel objects can be extended to other objects like
> inode, task_struct etc.
> 
> Each new local storage will still be a separate map and provide its own
> set of helpers. This allows for future object specific extensions and
> still share a lot of the underlying implementation.
> 
> This includes the changes suggested by Martin in:
> 
>   https://lore.kernel.org/bpf/[email protected]/
> 
> which adds map_local_storage_charge, map_local_storage_uncharge,
> and map_owner_storage_ptr.
A description will still be useful in the commit message to talk
about the new map_ops, e.g.
they allow kernel object to optionally have different mem-charge strategy.

> 
> Co-developed-by: Martin KaFai Lau <[email protected]>
> Signed-off-by: KP Singh <[email protected]>
> ---
>  include/linux/bpf.h            |   9 ++
>  include/net/bpf_sk_storage.h   |  51 +++++++
>  include/uapi/linux/bpf.h       |   8 +-
>  net/core/bpf_sk_storage.c      | 246 +++++++++++++++++++++------------
>  tools/include/uapi/linux/bpf.h |   8 +-
>  5 files changed, 233 insertions(+), 89 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index cef4ef0d2b4e..8e1e23c60dc7 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -34,6 +34,9 @@ struct btf_type;
>  struct exception_table_entry;
>  struct seq_operations;
>  struct bpf_iter_aux_info;
> +struct bpf_local_storage;
> +struct bpf_local_storage_map;
> +struct bpf_local_storage_elem;
"struct bpf_local_storage_elem" is not needed.

>  
>  extern struct idr btf_idr;
>  extern spinlock_t btf_idr_lock;
> @@ -104,6 +107,12 @@ struct bpf_map_ops {
>       __poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
>                            struct poll_table_struct *pts);
>  
> +     /* Functions called by bpf_local_storage maps */
> +     int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
> +                                     void *owner, u32 size);
> +     void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
> +                                        void *owner, u32 size);
> +     struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(void *owner);
>       /* BTF name and id of struct allocated by map_alloc */
>       const char * const map_btf_name;
>       int *map_btf_id;
> diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h
> index 950c5aaba15e..05b777950eb3 100644
> --- a/include/net/bpf_sk_storage.h
> +++ b/include/net/bpf_sk_storage.h
> @@ -3,8 +3,15 @@
>  #ifndef _BPF_SK_STORAGE_H
>  #define _BPF_SK_STORAGE_H
>  
> +#include <linux/rculist.h>
> +#include <linux/list.h>
> +#include <linux/hash.h>
>  #include <linux/types.h>
>  #include <linux/spinlock.h>
> +#include <linux/bpf.h>
> +#include <net/sock.h>
> +#include <uapi/linux/sock_diag.h>
> +#include <uapi/linux/btf.h>
>  
>  struct sock;
>  
> @@ -34,6 +41,50 @@ u16 bpf_local_storage_cache_idx_get(struct 
> bpf_local_storage_cache *cache);
>  void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
>                                     u16 idx);
>  
> +/* Helper functions for bpf_local_storage */
> +int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
> +
> +struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr 
> *attr);
> +
> +struct bpf_local_storage_data *
> +bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
> +                      struct bpf_local_storage_map *smap,
> +                      bool cacheit_lockit);
> +
> +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap);
> +
> +int bpf_local_storage_map_check_btf(const struct bpf_map *map,
> +                                 const struct btf *btf,
> +                                 const struct btf_type *key_type,
> +                                 const struct btf_type *value_type);
> +
> +void bpf_selem_link_storage(struct bpf_local_storage *local_storage,
> +                         struct bpf_local_storage_elem *selem);
> +
> +bool bpf_selem_unlink_storage(struct bpf_local_storage *local_storage,
> +                           struct bpf_local_storage_elem *selem,
> +                           bool uncharge_omem);
> +
> +void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
> +
> +void bpf_selem_link_map(struct bpf_local_storage_map *smap,
> +                     struct bpf_local_storage_elem *selem);
> +
> +void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem);
> +
> +struct bpf_local_storage_elem *
> +bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
> +             bool charge_mem);
> +
> +int
> +bpf_local_storage_alloc(void *owner,
> +                     struct bpf_local_storage_map *smap,
> +                     struct bpf_local_storage_elem *first_selem);
> +
> +struct bpf_local_storage_data *
> +bpf_local_storage_update(void *owner, struct bpf_map *map, void *value,
Nit.  It may be more consistent to take "struct bpf_local_storage_map *smap"
instead of "struct bpf_map *map" here.

bpf_local_storage_map_check_btf() will be the only one taking
"struct bpf_map *map".

> +                      u64 map_flags);
> +
>  #ifdef CONFIG_BPF_SYSCALL
>  int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk);
>  struct bpf_sk_storage_diag *
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index b134e679e9db..35629752cec8 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3647,9 +3647,13 @@ enum {
>       BPF_F_SYSCTL_BASE_NAME          = (1ULL << 0),
>  };
>  
> -/* BPF_FUNC_sk_storage_get flags */
> +/* BPF_FUNC_<local>_storage_get flags */
BPF_FUNC_<kernel_obj>_storage_get flags?

>  enum {
> -     BPF_SK_STORAGE_GET_F_CREATE     = (1ULL << 0),
> +     BPF_LOCAL_STORAGE_GET_F_CREATE  = (1ULL << 0),
> +     /* BPF_SK_STORAGE_GET_F_CREATE is only kept for backward compatibility
> +      * and BPF_LOCAL_STORAGE_GET_F_CREATE must be used instead.
> +      */
> +     BPF_SK_STORAGE_GET_F_CREATE  = BPF_LOCAL_STORAGE_GET_F_CREATE,
>  };
>  
>  /* BPF_FUNC_read_branch_records flags. */
> diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
> index 99dde7b74767..bb2375769ca1 100644
> --- a/net/core/bpf_sk_storage.c
> +++ b/net/core/bpf_sk_storage.c
> @@ -84,7 +84,7 @@ struct bpf_local_storage_elem {
>  struct bpf_local_storage {
>       struct bpf_local_storage_data __rcu 
> *cache[BPF_LOCAL_STORAGE_CACHE_SIZE];
>       struct hlist_head list; /* List of bpf_local_storage_elem */
> -     struct sock *owner;     /* The object that owns the the above "list" of
> +     void *owner;            /* The object that owns the the above "list" of
>                                * bpf_local_storage_elem.
>                                */
>       struct rcu_head rcu;
> @@ -110,6 +110,33 @@ static int omem_charge(struct sock *sk, unsigned int 
> size)
>       return -ENOMEM;
>  }
>  
> +static int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 
> size)
> +{
> +     struct bpf_map *map = &smap->map;
> +
> +     if (!map->ops->map_local_storage_charge)
> +             return 0;
> +
> +     return map->ops->map_local_storage_charge(smap, owner, size);
> +}
> +
> +static void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
> +                      u32 size)
> +{
> +     struct bpf_map *map = &smap->map;
> +
> +     if (map->ops->map_local_storage_uncharge)
> +             map->ops->map_local_storage_uncharge(smap, owner, size);
> +}
> +
> +static struct bpf_local_storage __rcu **
> +owner_storage(struct bpf_local_storage_map *smap, void *owner)
> +{
> +     struct bpf_map *map = &smap->map;
> +
> +     return map->ops->map_owner_storage_ptr(owner);
> +}
> +
>  static bool selem_linked_to_storage(const struct bpf_local_storage_elem 
> *selem)
>  {
>       return !hlist_unhashed(&selem->snode);
> @@ -120,13 +147,13 @@ static bool selem_linked_to_map(const struct 
> bpf_local_storage_elem *selem)
>       return !hlist_unhashed(&selem->map_node);
>  }
>  
> -static struct bpf_local_storage_elem *
> -bpf_selem_alloc(struct bpf_local_storage_map *smap, struct sock *sk,
> -             void *value, bool charge_omem)
> +struct bpf_local_storage_elem *
> +bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
> +             void *value, bool charge_mem)
>  {
>       struct bpf_local_storage_elem *selem;
>  
> -     if (charge_omem && omem_charge(sk, smap->elem_size))
> +     if (charge_mem && mem_charge(smap, owner, smap->elem_size))
>               return NULL;
>  
>       selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
> @@ -136,40 +163,42 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, 
> struct sock *sk,
>               return selem;
>       }
>  
> -     if (charge_omem)
> -             atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
> +     if (charge_mem)
> +             mem_uncharge(smap, owner, smap->elem_size);
>  
>       return NULL;
>  }
>  
> -/* sk_storage->lock must be held and selem->sk_storage == sk_storage.
> +/* local_storage->lock must be held and selem->sk_storage == sk_storage.
This name change belongs to patch 1.

Also,
selem->"local_"storage == "local_"storage

>   * The caller must ensure selem->smap is still valid to be
>   * dereferenced for its smap->elem_size and smap->cache_idx.
>   */

[ ... ]

> @@ -367,7 +401,7 @@ static int sk_storage_alloc(struct sock *sk,
>               /* Note that even first_selem was linked to smap's
>                * bucket->list, first_selem can be freed immediately
>                * (instead of kfree_rcu) because
> -              * bpf_sk_storage_map_free() does a
> +              * bpf_local_storage_map_free() does a
This name change belongs to patch 1 also.

>                * synchronize_rcu() before walking the bucket->list.
>                * Hence, no one is accessing selem from the
>                * bucket->list under rcu_read_lock().
> @@ -377,8 +411,8 @@ static int sk_storage_alloc(struct sock *sk,
>       return 0;
>  
>  uncharge:
> -     kfree(sk_storage);
> -     atomic_sub(sizeof(*sk_storage), &sk->sk_omem_alloc);
> +     kfree(storage);
> +     mem_uncharge(smap, owner, sizeof(*storage));
>       return err;
>  }
>  
> @@ -387,9 +421,9 @@ static int sk_storage_alloc(struct sock *sk,
>   * Otherwise, it will become a leak (and other memory issues
>   * during map destruction).
>   */
> -static struct bpf_local_storage_data *
> -bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
> -                      u64 map_flags)
> +struct bpf_local_storage_data *
> +bpf_local_storage_update(void *owner, struct bpf_map *map,
> +                      void *value, u64 map_flags)
>  {
>       struct bpf_local_storage_data *old_sdata = NULL;
>       struct bpf_local_storage_elem *selem;
> @@ -404,21 +438,21 @@ bpf_local_storage_update(struct sock *sk, struct 
> bpf_map *map, void *value,
>               return ERR_PTR(-EINVAL);
>  
>       smap = (struct bpf_local_storage_map *)map;
> -     local_storage = rcu_dereference(sk->sk_bpf_storage);
> +     local_storage = rcu_dereference(*owner_storage(smap, owner));
>       if (!local_storage || hlist_empty(&local_storage->list)) {
> -             /* Very first elem for this object */
> +             /* Very first elem for the owner */
>               err = check_flags(NULL, map_flags);
>               if (err)
>                       return ERR_PTR(err);
>  
> -             selem = bpf_selem_alloc(smap, sk, value, true);
> +             selem = bpf_selem_alloc(smap, owner, value, true);
>               if (!selem)
>                       return ERR_PTR(-ENOMEM);
>  
> -             err = sk_storage_alloc(sk, smap, selem);
> +             err = bpf_local_storage_alloc(owner, smap, selem);
>               if (err) {
>                       kfree(selem);
> -                     atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
> +                     mem_uncharge(smap, owner, smap->elem_size);
>                       return ERR_PTR(err);
>               }
>  
> @@ -430,8 +464,8 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map 
> *map, void *value,
>                * such that it can avoid taking the local_storage->lock
>                * and changing the lists.
>                */
> -             old_sdata =
> -                     bpf_local_storage_lookup(local_storage, smap, false);
> +             old_sdata = bpf_local_storage_lookup(local_storage, smap,
> +                                                  false);
Pure indentation change.  The same line has been changed in patch 1.  Please 
change
the identation in patch 1 if the above way is preferred.

>               err = check_flags(old_sdata, map_flags);
>               if (err)
>                       return ERR_PTR(err);
> @@ -475,7 +509,7 @@ bpf_local_storage_update(struct sock *sk, struct bpf_map 
> *map, void *value,
>        * old_sdata will not be uncharged later during
>        * bpf_selem_unlink_storage().
>        */
> -     selem = bpf_selem_alloc(smap, sk, value, !old_sdata);
> +     selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
>       if (!selem) {
>               err = -ENOMEM;
>               goto unlock_err;
> @@ -567,7 +601,7 @@ void bpf_sk_storage_free(struct sock *sk)
>        * Thus, no elem can be added-to or deleted-from the
>        * sk_storage->list by the bpf_prog or by the bpf-map's syscall.
>        *
> -      * It is racing with bpf_sk_storage_map_free() alone
> +      * It is racing with bpf_local_storage_map_free() alone
This name change belongs to patch 1 also.

>        * when unlinking elem from the sk_storage->list and
>        * the map's bucket->list.
>        */
> @@ -587,17 +621,12 @@ void bpf_sk_storage_free(struct sock *sk)
>               kfree_rcu(sk_storage, rcu);
>  }
>  
> -static void bpf_local_storage_map_free(struct bpf_map *map)
> +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
>  {
>       struct bpf_local_storage_elem *selem;
> -     struct bpf_local_storage_map *smap;
>       struct bpf_local_storage_map_bucket *b;
>       unsigned int i;
>  
> -     smap = (struct bpf_local_storage_map *)map;
> -
> -     bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
> -
>       /* Note that this map might be concurrently cloned from
>        * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
>        * RCU read section to finish before proceeding. New RCU
> @@ -607,11 +636,12 @@ static void bpf_local_storage_map_free(struct bpf_map 
> *map)
>  
>       /* bpf prog and the userspace can no longer access this map
>        * now.  No new selem (of this map) can be added
> -      * to the sk->sk_bpf_storage or to the map bucket's list.
> +      * to the bpf_local_storage or to the map bucket's list.
s/bpf_local_storage/owner->storage/

>        *
>        * The elem of this map can be cleaned up here
>        * or
> -      * by bpf_sk_storage_free() during __sk_destruct().
> +      * by bpf_local_storage_free() during the destruction of the
> +      * owner object. eg. __sk_destruct.
This belongs to patch 1 also.

>        */
>       for (i = 0; i < (1U << smap->bucket_log); i++) {
>               b = &smap->buckets[i];
> @@ -627,22 +657,31 @@ static void bpf_local_storage_map_free(struct bpf_map 
> *map)
>               rcu_read_unlock();
>       }
>  
> -     /* bpf_sk_storage_free() may still need to access the map.
> -      * e.g. bpf_sk_storage_free() has unlinked selem from the map
> +     /* bpf_local_storage_free() may still need to access the map.
It is confusing.  There is no bpf_local_storage_free().

> +      * e.g. bpf_local_storage_free() has unlinked selem from the map
>        * which then made the above while((selem = ...)) loop
>        * exited immediately.
>        *
> -      * However, the bpf_sk_storage_free() still needs to access
> +      * However, the bpf_local_storage_free() still needs to access
Same here.

>        * the smap->elem_size to do the uncharging in
>        * bpf_selem_unlink_storage().
>        *
>        * Hence, wait another rcu grace period for the
> -      * bpf_sk_storage_free() to finish.
> +      * bpf_local_storage_free() to finish.
and here.

>        */
>       synchronize_rcu();
>  
>       kvfree(smap->buckets);
> -     kfree(map);
> +     kfree(smap);
> +}
> +
> +static void sk_storage_map_free(struct bpf_map *map)
> +{
> +     struct bpf_local_storage_map *smap;
> +
> +     smap = (struct bpf_local_storage_map *)map;
> +     bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
> +     bpf_local_storage_map_free(smap);
>  }
>  
>  /* U16_MAX is much more than enough for sk local storage
> @@ -654,7 +693,7 @@ static void bpf_local_storage_map_free(struct bpf_map 
> *map)
>              sizeof(struct bpf_local_storage_elem)),                  \
>             (U16_MAX - sizeof(struct bpf_local_storage_elem)))
>  
> -static int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
> +int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
>  {
>       if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
>           !(attr->map_flags & BPF_F_NO_PREALLOC) ||
> @@ -673,7 +712,7 @@ static int bpf_local_storage_map_alloc_check(union 
> bpf_attr *attr)
>       return 0;
>  }
>  
> -static struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
> +struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr 
> *attr)
>  {
>       struct bpf_local_storage_map *smap;
>       unsigned int i;
> @@ -711,9 +750,21 @@ static struct bpf_map *bpf_local_storage_map_alloc(union 
> bpf_attr *attr)
>               raw_spin_lock_init(&smap->buckets[i].lock);
>       }
>  
> -     smap->elem_size = sizeof(struct bpf_local_storage_elem) + 
> attr->value_size;
> -     smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
> +     smap->elem_size =
> +             sizeof(struct bpf_local_storage_elem) + attr->value_size;
Same line has changed in patch 1.   Change the indentation in patch 1 also
if the above way is desired.

Others LGTM.

Reply via email to