Re: [PATCH bpf-next v2 6/8] bpf: offload: report device information for offloaded programs

2017-12-27 Thread Jakub Kicinski
On Wed, 27 Dec 2017 18:26:16 +0100, Daniel Borkmann wrote:
> On 12/21/2017 10:01 PM, Jakub Kicinski wrote:
> > Report to the user ifindex and namespace information of offloaded
> > programs.  If device has disappeared return -ENODEV.  Specify the
> > namespace using dev/inode combination.
> > 
> > CC: Eric W. Biederman 
> > Signed-off-by: Jakub Kicinski 
> > Reviewed-by: Quentin Monnet 
> > ---
> > v2:
> >  - take RTNL lock to grab a coherent snapshot of device state
> >(ifindex vs name space) and avoid races with name space
> >moves (based on Eric's comment on Kirill's patch to
> >peernet2id_alloc()).
> > ---
> >  fs/nsfs.c  |  2 +-
> >  include/linux/bpf.h|  2 ++
> >  include/linux/proc_ns.h|  1 +
> >  include/uapi/linux/bpf.h   |  3 +++
> >  kernel/bpf/offload.c   | 44 
> > ++
> >  kernel/bpf/syscall.c   |  6 ++
> >  tools/include/uapi/linux/bpf.h |  3 +++
> >  7 files changed, 60 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/nsfs.c b/fs/nsfs.c
> > index 7c6f76d29f56..e50628675935 100644
> > --- a/fs/nsfs.c
> > +++ b/fs/nsfs.c
> > @@ -51,7 +51,7 @@ static void nsfs_evict(struct inode *inode)
> > ns->ops->put(ns);
> >  }
> >  
> > -static void *__ns_get_path(struct path *path, struct ns_common *ns)
> > +void *__ns_get_path(struct path *path, struct ns_common *ns)
> >  {
> > struct vfsmount *mnt = nsfs_mnt;
> > struct dentry *dentry;
> > diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> > index 9a916ab34299..7810ae57b357 100644
> > --- a/include/linux/bpf.h
> > +++ b/include/linux/bpf.h
> > @@ -531,6 +531,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 
> > ufd,
> >  
> >  int bpf_prog_offload_compile(struct bpf_prog *prog);
> >  void bpf_prog_offload_destroy(struct bpf_prog *prog);
> > +int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
> > +  struct bpf_prog *prog);
> >  
> >  #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
> >  int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
> > diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
> > index 2ff18c9840a7..1733359cf713 100644
> > --- a/include/linux/proc_ns.h
> > +++ b/include/linux/proc_ns.h
> > @@ -76,6 +76,7 @@ static inline int ns_alloc_inum(struct ns_common *ns)
> >  
> >  extern struct file *proc_ns_fget(int fd);
> >  #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
> > +extern void *__ns_get_path(struct path *path, struct ns_common *ns);
> >  extern void *ns_get_path(struct path *path, struct task_struct *task,
> > const struct proc_ns_operations *ns_ops);
> >  
> > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > index d01f1cb3cfc0..72b37fc3bc0c 100644
> > --- a/include/uapi/linux/bpf.h
> > +++ b/include/uapi/linux/bpf.h
> > @@ -921,6 +921,9 @@ struct bpf_prog_info {
> > __u32 nr_map_ids;
> > __aligned_u64 map_ids;
> > char name[BPF_OBJ_NAME_LEN];
> > +   __u32 ifindex;
> > +   __u64 netns_dev;
> > +   __u64 netns_ino;
> >  } __attribute__((aligned(8)));
> >  
> >  struct bpf_map_info {
> > diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> > index 1e6064ea3609..4d5bd1e3 100644
> > --- a/kernel/bpf/offload.c
> > +++ b/kernel/bpf/offload.c
> > @@ -16,9 +16,11 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  
> > @@ -181,6 +183,48 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
> > return bpf_prog_offload_translate(prog);
> >  }
> >  
> > +int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
> > +  struct bpf_prog *prog)
> > +{
> > +   struct bpf_dev_offload *offload;
> > +   struct inode *ns_inode;
> > +   struct path ns_path;
> > +   int ifindex, err;
> > +   struct net *net;
> > +
> > +again:
> > +   rtnl_lock();
> > +   down_read(_devs_lock);
> > +
> > +   offload = prog->aux->offload;
> > +   if (!offload) {
> > +   up_read(_devs_lock);
> > +   rtnl_unlock();
> > +   return -ENODEV;
> > +   }
> > +
> > +   ifindex = offload->netdev->ifindex;
> > +   net = dev_net(offload->netdev);
> > +   get_net(net); /* __ns_get_path() drops the reference */
> > +
> > +   up_read(_devs_lock);
> > +   rtnl_unlock();
> > +
> > +   err = PTR_ERR_OR_ZERO(__ns_get_path(_path, >ns));
> > +   if (err) {
> > +   if (err == -EAGAIN)
> > +   goto again;
> > +   return err;
> > +   }
> > +   ns_inode = ns_path.dentry->d_inode;
> > +
> > +   info->ifindex = ifindex;
> > +   info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
> > +   info->netns_ino = ns_inode->i_ino;  
> 
> I think here, we're still missing a:
> 
>   path_put(_path);
> 
> Otherwise we're leaking that 

Re: [PATCH bpf-next v2 6/8] bpf: offload: report device information for offloaded programs

2017-12-27 Thread Daniel Borkmann
On 12/21/2017 10:01 PM, Jakub Kicinski wrote:
> Report to the user ifindex and namespace information of offloaded
> programs.  If device has disappeared return -ENODEV.  Specify the
> namespace using dev/inode combination.
> 
> CC: Eric W. Biederman 
> Signed-off-by: Jakub Kicinski 
> Reviewed-by: Quentin Monnet 
> ---
> v2:
>  - take RTNL lock to grab a coherent snapshot of device state
>(ifindex vs name space) and avoid races with name space
>moves (based on Eric's comment on Kirill's patch to
>peernet2id_alloc()).
> ---
>  fs/nsfs.c  |  2 +-
>  include/linux/bpf.h|  2 ++
>  include/linux/proc_ns.h|  1 +
>  include/uapi/linux/bpf.h   |  3 +++
>  kernel/bpf/offload.c   | 44 
> ++
>  kernel/bpf/syscall.c   |  6 ++
>  tools/include/uapi/linux/bpf.h |  3 +++
>  7 files changed, 60 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nsfs.c b/fs/nsfs.c
> index 7c6f76d29f56..e50628675935 100644
> --- a/fs/nsfs.c
> +++ b/fs/nsfs.c
> @@ -51,7 +51,7 @@ static void nsfs_evict(struct inode *inode)
>   ns->ops->put(ns);
>  }
>  
> -static void *__ns_get_path(struct path *path, struct ns_common *ns)
> +void *__ns_get_path(struct path *path, struct ns_common *ns)
>  {
>   struct vfsmount *mnt = nsfs_mnt;
>   struct dentry *dentry;
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 9a916ab34299..7810ae57b357 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -531,6 +531,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
>  
>  int bpf_prog_offload_compile(struct bpf_prog *prog);
>  void bpf_prog_offload_destroy(struct bpf_prog *prog);
> +int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
> +struct bpf_prog *prog);
>  
>  #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
>  int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
> diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
> index 2ff18c9840a7..1733359cf713 100644
> --- a/include/linux/proc_ns.h
> +++ b/include/linux/proc_ns.h
> @@ -76,6 +76,7 @@ static inline int ns_alloc_inum(struct ns_common *ns)
>  
>  extern struct file *proc_ns_fget(int fd);
>  #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
> +extern void *__ns_get_path(struct path *path, struct ns_common *ns);
>  extern void *ns_get_path(struct path *path, struct task_struct *task,
>   const struct proc_ns_operations *ns_ops);
>  
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index d01f1cb3cfc0..72b37fc3bc0c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -921,6 +921,9 @@ struct bpf_prog_info {
>   __u32 nr_map_ids;
>   __aligned_u64 map_ids;
>   char name[BPF_OBJ_NAME_LEN];
> + __u32 ifindex;
> + __u64 netns_dev;
> + __u64 netns_ino;
>  } __attribute__((aligned(8)));
>  
>  struct bpf_map_info {
> diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> index 1e6064ea3609..4d5bd1e3 100644
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -16,9 +16,11 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -181,6 +183,48 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
>   return bpf_prog_offload_translate(prog);
>  }
>  
> +int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
> +struct bpf_prog *prog)
> +{
> + struct bpf_dev_offload *offload;
> + struct inode *ns_inode;
> + struct path ns_path;
> + int ifindex, err;
> + struct net *net;
> +
> +again:
> + rtnl_lock();
> + down_read(_devs_lock);
> +
> + offload = prog->aux->offload;
> + if (!offload) {
> + up_read(_devs_lock);
> + rtnl_unlock();
> + return -ENODEV;
> + }
> +
> + ifindex = offload->netdev->ifindex;
> + net = dev_net(offload->netdev);
> + get_net(net); /* __ns_get_path() drops the reference */
> +
> + up_read(_devs_lock);
> + rtnl_unlock();
> +
> + err = PTR_ERR_OR_ZERO(__ns_get_path(_path, >ns));
> + if (err) {
> + if (err == -EAGAIN)
> + goto again;
> + return err;
> + }
> + ns_inode = ns_path.dentry->d_inode;
> +
> + info->ifindex = ifindex;
> + info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
> + info->netns_ino = ns_inode->i_ino;

I think here, we're still missing a:

path_put(_path);

Otherwise we're leaking that reference, no?

Looking at perf, it does similar fetch of dev/ino pair through
perf_fill_ns_link_info(). Only difference is that task / ns_ops
are used to lookup the actual ns from a task, which we already
have in our case (netns) through netdev.

I've been thinking, 

Re: [PATCH bpf-next v2 6/8] bpf: offload: report device information for offloaded programs

2017-12-26 Thread Alexei Starovoitov
On Thu, Dec 21, 2017 at 01:01:18PM -0800, Jakub Kicinski wrote:
> Report to the user ifindex and namespace information of offloaded
> programs.  If device has disappeared return -ENODEV.  Specify the
> namespace using dev/inode combination.
> 
> CC: Eric W. Biederman 
> Signed-off-by: Jakub Kicinski 
> Reviewed-by: Quentin Monnet 
> ---
> v2:
>  - take RTNL lock to grab a coherent snapshot of device state
>(ifindex vs name space) and avoid races with name space
>moves (based on Eric's comment on Kirill's patch to
>peernet2id_alloc()).

Eric, could you please review this patch.
The whole set is blocked on it.

Thanks

> ---
>  fs/nsfs.c  |  2 +-
>  include/linux/bpf.h|  2 ++
>  include/linux/proc_ns.h|  1 +
>  include/uapi/linux/bpf.h   |  3 +++
>  kernel/bpf/offload.c   | 44 
> ++
>  kernel/bpf/syscall.c   |  6 ++
>  tools/include/uapi/linux/bpf.h |  3 +++
>  7 files changed, 60 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/nsfs.c b/fs/nsfs.c
> index 7c6f76d29f56..e50628675935 100644
> --- a/fs/nsfs.c
> +++ b/fs/nsfs.c
> @@ -51,7 +51,7 @@ static void nsfs_evict(struct inode *inode)
>   ns->ops->put(ns);
>  }
>  
> -static void *__ns_get_path(struct path *path, struct ns_common *ns)
> +void *__ns_get_path(struct path *path, struct ns_common *ns)
>  {
>   struct vfsmount *mnt = nsfs_mnt;
>   struct dentry *dentry;
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 9a916ab34299..7810ae57b357 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -531,6 +531,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
>  
>  int bpf_prog_offload_compile(struct bpf_prog *prog);
>  void bpf_prog_offload_destroy(struct bpf_prog *prog);
> +int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
> +struct bpf_prog *prog);
>  
>  #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
>  int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
> diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
> index 2ff18c9840a7..1733359cf713 100644
> --- a/include/linux/proc_ns.h
> +++ b/include/linux/proc_ns.h
> @@ -76,6 +76,7 @@ static inline int ns_alloc_inum(struct ns_common *ns)
>  
>  extern struct file *proc_ns_fget(int fd);
>  #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
> +extern void *__ns_get_path(struct path *path, struct ns_common *ns);
>  extern void *ns_get_path(struct path *path, struct task_struct *task,
>   const struct proc_ns_operations *ns_ops);
>  
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index d01f1cb3cfc0..72b37fc3bc0c 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -921,6 +921,9 @@ struct bpf_prog_info {
>   __u32 nr_map_ids;
>   __aligned_u64 map_ids;
>   char name[BPF_OBJ_NAME_LEN];
> + __u32 ifindex;
> + __u64 netns_dev;
> + __u64 netns_ino;
>  } __attribute__((aligned(8)));
>  
>  struct bpf_map_info {
> diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> index 1e6064ea3609..4d5bd1e3 100644
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -16,9 +16,11 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -181,6 +183,48 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
>   return bpf_prog_offload_translate(prog);
>  }
>  
> +int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
> +struct bpf_prog *prog)
> +{
> + struct bpf_dev_offload *offload;
> + struct inode *ns_inode;
> + struct path ns_path;
> + int ifindex, err;
> + struct net *net;
> +
> +again:
> + rtnl_lock();
> + down_read(_devs_lock);
> +
> + offload = prog->aux->offload;
> + if (!offload) {
> + up_read(_devs_lock);
> + rtnl_unlock();
> + return -ENODEV;
> + }
> +
> + ifindex = offload->netdev->ifindex;
> + net = dev_net(offload->netdev);
> + get_net(net); /* __ns_get_path() drops the reference */
> +
> + up_read(_devs_lock);
> + rtnl_unlock();
> +
> + err = PTR_ERR_OR_ZERO(__ns_get_path(_path, >ns));
> + if (err) {
> + if (err == -EAGAIN)
> + goto again;
> + return err;
> + }
> + ns_inode = ns_path.dentry->d_inode;
> +
> + info->ifindex = ifindex;
> + info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
> + info->netns_ino = ns_inode->i_ino;
> +
> + return 0;
> +}
> +
>  const struct bpf_prog_ops bpf_offload_prog_ops = {
>  };
>  
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 7d9f5b0f0e49..20444fd678d0 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ 

[PATCH bpf-next v2 6/8] bpf: offload: report device information for offloaded programs

2017-12-21 Thread Jakub Kicinski
Report to the user ifindex and namespace information of offloaded
programs.  If device has disappeared return -ENODEV.  Specify the
namespace using dev/inode combination.

CC: Eric W. Biederman 
Signed-off-by: Jakub Kicinski 
Reviewed-by: Quentin Monnet 
---
v2:
 - take RTNL lock to grab a coherent snapshot of device state
   (ifindex vs name space) and avoid races with name space
   moves (based on Eric's comment on Kirill's patch to
   peernet2id_alloc()).
---
 fs/nsfs.c  |  2 +-
 include/linux/bpf.h|  2 ++
 include/linux/proc_ns.h|  1 +
 include/uapi/linux/bpf.h   |  3 +++
 kernel/bpf/offload.c   | 44 ++
 kernel/bpf/syscall.c   |  6 ++
 tools/include/uapi/linux/bpf.h |  3 +++
 7 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/fs/nsfs.c b/fs/nsfs.c
index 7c6f76d29f56..e50628675935 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -51,7 +51,7 @@ static void nsfs_evict(struct inode *inode)
ns->ops->put(ns);
 }
 
-static void *__ns_get_path(struct path *path, struct ns_common *ns)
+void *__ns_get_path(struct path *path, struct ns_common *ns)
 {
struct vfsmount *mnt = nsfs_mnt;
struct dentry *dentry;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9a916ab34299..7810ae57b357 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -531,6 +531,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 
 int bpf_prog_offload_compile(struct bpf_prog *prog);
 void bpf_prog_offload_destroy(struct bpf_prog *prog);
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+  struct bpf_prog *prog);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 2ff18c9840a7..1733359cf713 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -76,6 +76,7 @@ static inline int ns_alloc_inum(struct ns_common *ns)
 
 extern struct file *proc_ns_fget(int fd);
 #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
+extern void *__ns_get_path(struct path *path, struct ns_common *ns);
 extern void *ns_get_path(struct path *path, struct task_struct *task,
const struct proc_ns_operations *ns_ops);
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d01f1cb3cfc0..72b37fc3bc0c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -921,6 +921,9 @@ struct bpf_prog_info {
__u32 nr_map_ids;
__aligned_u64 map_ids;
char name[BPF_OBJ_NAME_LEN];
+   __u32 ifindex;
+   __u64 netns_dev;
+   __u64 netns_ino;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 1e6064ea3609..4d5bd1e3 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -16,9 +16,11 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -181,6 +183,48 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
return bpf_prog_offload_translate(prog);
 }
 
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+  struct bpf_prog *prog)
+{
+   struct bpf_dev_offload *offload;
+   struct inode *ns_inode;
+   struct path ns_path;
+   int ifindex, err;
+   struct net *net;
+
+again:
+   rtnl_lock();
+   down_read(_devs_lock);
+
+   offload = prog->aux->offload;
+   if (!offload) {
+   up_read(_devs_lock);
+   rtnl_unlock();
+   return -ENODEV;
+   }
+
+   ifindex = offload->netdev->ifindex;
+   net = dev_net(offload->netdev);
+   get_net(net); /* __ns_get_path() drops the reference */
+
+   up_read(_devs_lock);
+   rtnl_unlock();
+
+   err = PTR_ERR_OR_ZERO(__ns_get_path(_path, >ns));
+   if (err) {
+   if (err == -EAGAIN)
+   goto again;
+   return err;
+   }
+   ns_inode = ns_path.dentry->d_inode;
+
+   info->ifindex = ifindex;
+   info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
+   info->netns_ino = ns_inode->i_ino;
+
+   return 0;
+}
+
 const struct bpf_prog_ops bpf_offload_prog_ops = {
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7d9f5b0f0e49..20444fd678d0 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1624,6 +1624,12 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
return -EFAULT;
}
 
+   if (bpf_prog_is_dev_bound(prog->aux)) {
+   err = bpf_prog_offload_info_fill(, prog);
+   if (err)
+   return err;
+   }
+
 done:
if (copy_to_user(uinfo, , info_len)