On Mon, Apr 27, 2020 at 01:12:37PM -0700, Yonghong Song wrote:
> The bpf_map iterator is implemented.
> The bpf program is called at seq_ops show() and stop() functions.
> bpf_iter_get_prog() will retrieve bpf program and other
> parameters during seq_file object traversal. In show() function,
> bpf program will traverse every valid object, and in stop()
> function, bpf program will be called one more time after all
> objects are traversed.
> 
> The first member of the bpf context contains the meta data, namely,
> the seq_file, session_id and seq_num. Here, the session_id is
> a unique id for one specific seq_file session. The seq_num is
> the number of bpf prog invocations in the current session.
> The bpf_iter_get_prog(), which will be implemented in subsequent
> patches, will have more information on how meta data are computed.
> 
> The second member of the bpf context is a struct bpf_map pointer,
> which bpf program can examine.
> 
> The target implementation also provided the structure definition
> for bpf program and the function definition for verifier to
> verify the bpf program. Specifically for bpf_map iterator,
> the structure is "bpf_iter__bpf_map" andd the function is
> "__bpf_iter__bpf_map".
> 
> More targets will be implemented later, all of which will include
> the following, similar to bpf_map iterator:
>   - seq_ops() implementation
>   - function definition for verifier to verify the bpf program
>   - seq_file private data size
>   - additional target feature
> 
> Signed-off-by: Yonghong Song <[email protected]>
> ---
>  include/linux/bpf.h   |  10 ++++
>  kernel/bpf/Makefile   |   2 +-
>  kernel/bpf/bpf_iter.c |  19 ++++++++
>  kernel/bpf/map_iter.c | 107 ++++++++++++++++++++++++++++++++++++++++++
>  kernel/bpf/syscall.c  |  13 +++++
>  5 files changed, 150 insertions(+), 1 deletion(-)
>  create mode 100644 kernel/bpf/map_iter.c
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 5e56abc1e2f1..4ac8d61f7c3e 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1078,6 +1078,7 @@ int  generic_map_update_batch(struct bpf_map *map,
>  int  generic_map_delete_batch(struct bpf_map *map,
>                             const union bpf_attr *attr,
>                             union bpf_attr __user *uattr);
> +struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
>  
>  extern int sysctl_unprivileged_bpf_disabled;
>  
> @@ -1118,7 +1119,16 @@ struct bpf_iter_reg {
>       u32 target_feature;
>  };
>  
> +struct bpf_iter_meta {
> +     __bpf_md_ptr(struct seq_file *, seq);
> +     u64 session_id;
> +     u64 seq_num;
> +};
> +
>  int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
> +struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size,
> +                                u64 *session_id, u64 *seq_num, bool is_last);
> +int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
>  
>  int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
>  int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index 6a8b0febd3f6..b2b5eefc5254 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -2,7 +2,7 @@
>  obj-y := core.o
>  CFLAGS_core.o += $(call cc-disable-warning, override-init)
>  
> -obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o 
> bpf_iter.o
> +obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o 
> bpf_iter.o map_iter.o
>  obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o 
> bpf_lru_list.o lpm_trie.o map_in_map.o
>  obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
>  obj-$(CONFIG_BPF_SYSCALL) += disasm.o
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index 1115b978607a..284c95587803 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -48,3 +48,22 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
>  
>       return 0;
>  }
> +
> +struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size,
> +                                u64 *session_id, u64 *seq_num, bool is_last)
> +{
> +     return NULL;
Can this patch be moved after this function is implemented?

> +}
> +
> +int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
> +{
> +     int ret;
> +
> +     migrate_disable();
> +     rcu_read_lock();
> +     ret = BPF_PROG_RUN(prog, ctx);
> +     rcu_read_unlock();
> +     migrate_enable();
> +
> +     return ret;
> +}
> diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
> new file mode 100644
> index 000000000000..bb3ad4c3bde5
> --- /dev/null
> +++ b/kernel/bpf/map_iter.c
> @@ -0,0 +1,107 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright (c) 2020 Facebook */
> +#include <linux/bpf.h>
> +#include <linux/fs.h>
> +#include <linux/filter.h>
> +#include <linux/kernel.h>
> +
> +struct bpf_iter_seq_map_info {
> +     struct bpf_map *map;
> +     u32 id;
> +};
> +
> +static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos)
> +{
> +     struct bpf_iter_seq_map_info *info = seq->private;
> +     struct bpf_map *map;
> +     u32 id = info->id;
> +
> +     map = bpf_map_get_curr_or_next(&id);
> +     if (IS_ERR_OR_NULL(map))
> +             return NULL;
> +
> +     ++*pos;
Does pos always need to be incremented here?

> +     info->map = map;
> +     info->id = id;
> +     return map;
> +}
> +
> +static void *bpf_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
> +{
> +     struct bpf_iter_seq_map_info *info = seq->private;
> +     struct bpf_map *map;
> +
> +     ++*pos;
> +     ++info->id;
> +     map = bpf_map_get_curr_or_next(&info->id);
> +     if (IS_ERR_OR_NULL(map))
> +             return NULL;
> +
> +     bpf_map_put(info->map);
> +     info->map = map;
> +     return map;
> +}
> +
> +struct bpf_iter__bpf_map {
> +     __bpf_md_ptr(struct bpf_iter_meta *, meta);
> +     __bpf_md_ptr(struct bpf_map *, map);
> +};
> +
> +int __init __bpf_iter__bpf_map(struct bpf_iter_meta *meta, struct bpf_map 
> *map)
> +{
> +     return 0;
> +}
> +
> +static int bpf_map_seq_show(struct seq_file *seq, void *v)
> +{
> +     struct bpf_iter_meta meta;
> +     struct bpf_iter__bpf_map ctx;
> +     struct bpf_prog *prog;
> +     int ret = 0;
> +
> +     ctx.meta = &meta;
> +     ctx.map = v;
> +     meta.seq = seq;
> +     prog = bpf_iter_get_prog(seq, sizeof(struct bpf_iter_seq_map_info),
> +                              &meta.session_id, &meta.seq_num,
> +                              v == (void *)0);
>From looking at seq_file.c, when will show() be called with "v == NULL"?

> +     if (prog)
> +             ret = bpf_iter_run_prog(prog, &ctx);
> +
> +     return ret == 0 ? 0 : -EINVAL;
The verifier change in patch 4 should have ensured that prog
can only return 0?

> +}
> +
> +static void bpf_map_seq_stop(struct seq_file *seq, void *v)
> +{
> +     struct bpf_iter_seq_map_info *info = seq->private;
> +
> +     if (!v)
> +             bpf_map_seq_show(seq, v);
> +
> +     if (info->map) {
> +             bpf_map_put(info->map);
> +             info->map = NULL;
> +     }
> +}
> +
> +static const struct seq_operations bpf_map_seq_ops = {
> +     .start  = bpf_map_seq_start,
> +     .next   = bpf_map_seq_next,
> +     .stop   = bpf_map_seq_stop,
> +     .show   = bpf_map_seq_show,
> +};
> +
> +static int __init bpf_map_iter_init(void)
> +{
> +     struct bpf_iter_reg reg_info = {
> +             .target                 = "bpf_map",
> +             .target_func_name       = "__bpf_iter__bpf_map",
> +             .seq_ops                = &bpf_map_seq_ops,
> +             .seq_priv_size          = sizeof(struct bpf_iter_seq_map_info),
> +             .target_feature         = 0,
> +     };
> +
> +     return bpf_iter_reg_target(&reg_info);
> +}
> +
> +late_initcall(bpf_map_iter_init);
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 7626b8024471..022187640943 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2800,6 +2800,19 @@ static int bpf_obj_get_next_id(const union bpf_attr 
> *attr,
>       return err;
>  }
>  
> +struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
> +{
> +     struct bpf_map *map;
> +
> +     spin_lock_bh(&map_idr_lock);
> +     map = idr_get_next(&map_idr, id);
> +     if (map)
> +             map = __bpf_map_inc_not_zero(map, false);
nit. For the !map case, set "map = ERR_PTR(-ENOENT)" so that
the _OR_NULL() test is not needed.  It will be more consistent
with other error checking codes in syscall.c.

> +     spin_unlock_bh(&map_idr_lock);
> +
> +     return map;
> +}
> +
>  #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
>  
>  struct bpf_prog *bpf_prog_by_id(u32 id)
> -- 
> 2.24.1
> 

Reply via email to