Re: [PATCH net-next v2 02/15] bpf: offload: add infrastructure for loading programs for a specific netdev

2017-11-09 Thread Jakub Kicinski
Hi!

Sorry for the delay!

On Mon, 06 Nov 2017 18:32:45 +0100, Daniel Borkmann wrote:
> On 11/03/2017 09:56 PM, Jakub Kicinski wrote:
> > @@ -1549,6 +1555,8 @@ static void bpf_prog_free_deferred(struct work_struct 
> > *work)
> > struct bpf_prog_aux *aux;
> >
> > aux = container_of(work, struct bpf_prog_aux, work);
> > +   if (bpf_prog_is_dev_bound(aux))
> > +   bpf_prog_offload_destroy(aux->prog);
> > bpf_jit_free(aux->prog);
> >   }  
> [...]
> > +static int bpf_offload_notification(struct notifier_block *notifier,
> > +   ulong event, void *ptr)
> > +{
> > +   struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
> > +   struct bpf_dev_offload *offload, *tmp;
> > +
> > +   ASSERT_RTNL();
> > +
> > +   switch (event) {
> > +   case NETDEV_UNREGISTER:
> > +   list_for_each_entry_safe(offload, tmp, _prog_offload_devs,
> > +offloads) {
> > +   if (offload->netdev == netdev)
> > +   __bpf_prog_offload_destroy(offload->prog);  
> 
> We would be calling this twice, right? Once here and then on prog
> destruction again. __bpf_prog_offload_destroy() looks it will handle
> this just fine, but we should probably add a comment to
> __bpf_prog_offload_destroy() such that when changes are made to it
> it's obvious that we need to be extra careful.

Good point, I will add the comment.

> > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> > index 323be2473c4b..1574b9f0f24e 100644
> > --- a/kernel/bpf/syscall.c
> > +++ b/kernel/bpf/syscall.c
> > @@ -824,7 +824,10 @@ static int find_prog_type(enum bpf_prog_type type, 
> > struct bpf_prog *prog)
> > if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
> > return -EINVAL;
> >
> > -   prog->aux->ops = bpf_prog_types[type];
> > +   if (!bpf_prog_is_dev_bound(prog->aux))
> > +   prog->aux->ops = bpf_prog_types[type];
> > +   else
> > +   prog->aux->ops = _offload_prog_ops;
> > prog->type = type;
> > return 0;
> >   }
> > @@ -1054,7 +1057,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct 
> > bpf_prog *prog)
> >   }
> >   EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);
> >
> > -static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
> > +static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type 
> > *attach_type)
> >   {
> > struct fd f = fdget(ufd);
> > struct bpf_prog *prog;
> > @@ -1062,7 +1065,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum 
> > bpf_prog_type *type)
> > prog = bpf_prog_get(f);
> > if (IS_ERR(prog))
> > return prog;
> > -   if (type && prog->type != *type) {
> > +   if (attach_type && (prog->type != *attach_type || prog->aux->offload)) {
> > prog = ERR_PTR(-EINVAL);
> > goto out;
> > }
> > @@ -1089,7 +1092,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum 
> > bpf_prog_type type)
> >   EXPORT_SYMBOL_GPL(bpf_prog_get_type);
> >
> >   /* last field in 'union bpf_attr' used by this command */
> > -#defineBPF_PROG_LOAD_LAST_FIELD prog_name
> > +#defineBPF_PROG_LOAD_LAST_FIELD prog_target_ifindex  
> 
> For program types that are neither XDP nor cls_bpf, we should reject
> the request if something calls bpf(2) with non-0 prog_target_ifindex.
> 
> That way, i) we don't burn the whole field and could perhaps reuse/union
> it for other prog types like tracing in future. Probably makes sense to
> do anyway since ii) for types like tracing, we would want to reject this
> upfront here and not when later attach happens.
> 
> I probably missed something when reading the code, but if I spotted
> that correctly, we might otherwise even go and nfp-jit simple progs
> for non-networking types (we would bail out later though on in
> __bpf_prog_get() ... but we shouldn't let syscall return in first
> place)?

Agreed, I will fix this.


Re: [PATCH net-next v2 02/15] bpf: offload: add infrastructure for loading programs for a specific netdev

2017-11-06 Thread Daniel Borkmann

On 11/03/2017 09:56 PM, Jakub Kicinski wrote:

The fact that we don't know which device the program is going
to be used on is quite limiting in current eBPF infrastructure.
We have to reverse or limit the changes which kernel makes to
the loaded bytecode if we want it to be offloaded to a networking
device.  We also have to invent new APIs for debugging and
troubleshooting support.

Make it possible to load programs for a specific netdev.  This
helps us to bring the debug information closer to the core
eBPF infrastructure (e.g. we will be able to reuse the verifer
log in device JIT).  It allows device JITs to perform translation
on the original bytecode.

__bpf_prog_get() when called to get a reference for an attachment
point will now refuse to give it if program has a device assigned.
Following patches will add a version of that function which passes
the expected netdev in. @type argument in __bpf_prog_get() is
renamed to attach_type to make it clearer that it's only set on
attachment.

All calls to ndo_bpf are protected by rtnl, only verifier callbacks
are not.  We need a wait queue to make sure netdev doesn't get
destroyed while verifier is still running and calling its driver.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
Reviewed-by: Quentin Monnet 


First of all, great work, I went over the series and I really like
the outcome. It's applied already anyway, but two minor comments
further below.

[...]

@@ -1549,6 +1555,8 @@ static void bpf_prog_free_deferred(struct work_struct 
*work)
struct bpf_prog_aux *aux;

aux = container_of(work, struct bpf_prog_aux, work);
+   if (bpf_prog_is_dev_bound(aux))
+   bpf_prog_offload_destroy(aux->prog);
bpf_jit_free(aux->prog);
  }

[...]

+static int bpf_offload_notification(struct notifier_block *notifier,
+   ulong event, void *ptr)
+{
+   struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+   struct bpf_dev_offload *offload, *tmp;
+
+   ASSERT_RTNL();
+
+   switch (event) {
+   case NETDEV_UNREGISTER:
+   list_for_each_entry_safe(offload, tmp, _prog_offload_devs,
+offloads) {
+   if (offload->netdev == netdev)
+   __bpf_prog_offload_destroy(offload->prog);


We would be calling this twice, right? Once here and then on prog
destruction again. __bpf_prog_offload_destroy() looks it will handle
this just fine, but we should probably add a comment to
__bpf_prog_offload_destroy() such that when changes are made to it
it's obvious that we need to be extra careful.

[...]

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 323be2473c4b..1574b9f0f24e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -824,7 +824,10 @@ static int find_prog_type(enum bpf_prog_type type, struct 
bpf_prog *prog)
if (type >= ARRAY_SIZE(bpf_prog_types) || !bpf_prog_types[type])
return -EINVAL;

-   prog->aux->ops = bpf_prog_types[type];
+   if (!bpf_prog_is_dev_bound(prog->aux))
+   prog->aux->ops = bpf_prog_types[type];
+   else
+   prog->aux->ops = _offload_prog_ops;
prog->type = type;
return 0;
  }
@@ -1054,7 +1057,7 @@ struct bpf_prog *bpf_prog_inc_not_zero(struct bpf_prog 
*prog)
  }
  EXPORT_SYMBOL_GPL(bpf_prog_inc_not_zero);

-static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type)
+static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type 
*attach_type)
  {
struct fd f = fdget(ufd);
struct bpf_prog *prog;
@@ -1062,7 +1065,7 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum 
bpf_prog_type *type)
prog = bpf_prog_get(f);
if (IS_ERR(prog))
return prog;
-   if (type && prog->type != *type) {
+   if (attach_type && (prog->type != *attach_type || prog->aux->offload)) {
prog = ERR_PTR(-EINVAL);
goto out;
}
@@ -1089,7 +1092,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum 
bpf_prog_type type)
  EXPORT_SYMBOL_GPL(bpf_prog_get_type);

  /* last field in 'union bpf_attr' used by this command */
-#defineBPF_PROG_LOAD_LAST_FIELD prog_name
+#defineBPF_PROG_LOAD_LAST_FIELD prog_target_ifindex


For program types that are neither XDP nor cls_bpf, we should reject
the request if something calls bpf(2) with non-0 prog_target_ifindex.

That way, i) we don't burn the whole field and could perhaps reuse/union
it for other prog types like tracing in future. Probably makes sense to
do anyway since ii) for types like tracing, we would want to reject this
upfront here and not when later attach happens.

I probably missed something when reading the code, but if I spotted
that correctly, we might otherwise even go and nfp-jit simple progs
for non-networking 

[PATCH net-next v2 02/15] bpf: offload: add infrastructure for loading programs for a specific netdev

2017-11-03 Thread Jakub Kicinski
The fact that we don't know which device the program is going
to be used on is quite limiting in current eBPF infrastructure.
We have to reverse or limit the changes which kernel makes to
the loaded bytecode if we want it to be offloaded to a networking
device.  We also have to invent new APIs for debugging and
troubleshooting support.

Make it possible to load programs for a specific netdev.  This
helps us to bring the debug information closer to the core
eBPF infrastructure (e.g. we will be able to reuse the verifer
log in device JIT).  It allows device JITs to perform translation
on the original bytecode.

__bpf_prog_get() when called to get a reference for an attachment
point will now refuse to give it if program has a device assigned.
Following patches will add a version of that function which passes
the expected netdev in. @type argument in __bpf_prog_get() is
renamed to attach_type to make it clearer that it's only set on
attachment.

All calls to ndo_bpf are protected by rtnl, only verifier callbacks
are not.  We need a wait queue to make sure netdev doesn't get
destroyed while verifier is still running and calling its driver.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
Reviewed-by: Quentin Monnet 
---
 include/linux/bpf.h  |  36 +
 include/linux/bpf_verifier.h |  10 +++
 include/linux/netdevice.h|  14 
 include/uapi/linux/bpf.h |   1 +
 kernel/bpf/Makefile  |   1 +
 kernel/bpf/core.c|  10 ++-
 kernel/bpf/offload.c | 182 +++
 kernel/bpf/syscall.c |  17 +++-
 kernel/bpf/verifier.c|  15 +++-
 9 files changed, 278 insertions(+), 8 deletions(-)
 create mode 100644 kernel/bpf/offload.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 520aeebe0d93..e45d43f9ec92 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -15,6 +15,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct perf_event;
 struct bpf_prog;
@@ -182,6 +183,16 @@ struct bpf_verifier_ops {
  struct bpf_prog *prog, u32 *target_size);
 };
 
+struct bpf_dev_offload {
+   struct bpf_prog *prog;
+   struct net_device   *netdev;
+   void*dev_priv;
+   struct list_headoffloads;
+   booldev_state;
+   boolverifier_running;
+   wait_queue_head_t   verifier_done;
+};
+
 struct bpf_prog_aux {
atomic_t refcnt;
u32 used_map_cnt;
@@ -199,6 +210,7 @@ struct bpf_prog_aux {
 #ifdef CONFIG_SECURITY
void *security;
 #endif
+   struct bpf_dev_offload *offload;
union {
struct work_struct work;
struct rcu_head rcu;
@@ -317,6 +329,7 @@ extern const struct file_operations bpf_prog_fops;
 #undef BPF_PROG_TYPE
 #undef BPF_MAP_TYPE
 
+extern const struct bpf_prog_ops bpf_offload_prog_ops;
 extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
 extern const struct bpf_verifier_ops xdp_analyzer_ops;
 
@@ -491,6 +504,29 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry 
*rcpu,
 }
 #endif /* CONFIG_BPF_SYSCALL */
 
+int bpf_prog_offload_compile(struct bpf_prog *prog);
+void bpf_prog_offload_destroy(struct bpf_prog *prog);
+
+#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
+int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
+
+static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
+{
+   return aux->offload;
+}
+#else
+static inline int bpf_prog_offload_init(struct bpf_prog *prog,
+   union bpf_attr *attr)
+{
+   return -EOPNOTSUPP;
+}
+
+static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
+{
+   return false;
+}
+#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */
+
 #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL)
 struct sock  *__sock_map_lookup_elem(struct bpf_map *map, u32 key);
 int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 3b0976aaac75..e45011dbc02d 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -153,6 +153,7 @@ struct bpf_verifier_env {
struct bpf_verifier_state *cur_state; /* current verifier state */
struct bpf_verifier_state_list **explored_states; /* search pruning 
optimization */
const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer 
ops */
+   const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */
void *analyzer_priv; /* pointer to external analyzer's private data */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by 
eBPF program */
u32 used_map_cnt;   /* number of used maps */
@@ -169,6 +170,15 @@ static inline struct bpf_reg_state