Re: [RFC] bpf: offload: report device information for offloaded programs

2017-11-30 Thread Jakub Kicinski
Hi Kirill,

On Thu, 30 Nov 2017 16:19:13 +0300, Kirill Tkhai wrote:
> > @@ -164,6 +166,38 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
> > return bpf_prog_offload_translate(prog);
> >  }
> >  
> > +int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
> > +  struct bpf_prog *prog)
> > +{
> > +   struct bpf_dev_offload *offload = prog->aux->offload;
> > +   struct inode *ns_inode;
> > +   struct path ns_path;
> > +   struct net *net;
> > +   int ret = 0;
> > +   void *ptr;
> > +
> > +   info->dev_bound = 1;
> > +
> > +   rtnl_lock();  
> 
> rtnl_lock() is too big lock and it is already overused in kernel.
> Can't we use smaller lock in this driver to protect bpf_prog_offload_devs?
> I suppose rwlock would be appropriate for that.
> 
> (Then, we may completely remove rtnl_lock() from bpf_prog_offload_init()
> and use readlocked dev_base_lock for __dev_get_by_index() instead and
> the new small_rwlock to link in the list.
> 
> Not sure about bpf_prog_offload_verifier_prep() and 
> bpf_prog_offload_translate()
> and which context expect net_device_ops->ndo_bpf users. Either they need rtnl
> or not).

Thanks for the comments, removing the use of rtnl_lock is definitely on
my todo list!


Re: [RFC] bpf: offload: report device information for offloaded programs

2017-11-30 Thread Kirill Tkhai
Hi, Jakub,

please, read comments below.

On 30.11.2017 03:22, Jakub Kicinski wrote:
> Report to the user ifindex and namespace information of offloaded
> programs.  Always set dev_bound to true if program was loaded for
> a device which has been since removed.  Specify the namespace
> using dev/inode combination.
> 
> Signed-off-by: Jakub Kicinski 
> Reviewed-by: Simon Horman 
> Reviewed-by: Quentin Monnet 
> ---
>  fs/nsfs.c  |  2 +-
>  include/linux/bpf.h|  2 ++
>  include/linux/proc_ns.h|  1 +
>  include/uapi/linux/bpf.h   |  5 +
>  kernel/bpf/offload.c   | 34 ++
>  kernel/bpf/syscall.c   |  6 ++
>  tools/include/uapi/linux/bpf.h |  5 +
>  7 files changed, 54 insertions(+), 1 deletion(-)

[snip]

> diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
> index 8455b89d1bbf..da98349c647d 100644
> --- a/kernel/bpf/offload.c
> +++ b/kernel/bpf/offload.c
> @@ -16,9 +16,11 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  /* protected by RTNL */
> @@ -164,6 +166,38 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
>   return bpf_prog_offload_translate(prog);
>  }
>  
> +int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
> +struct bpf_prog *prog)
> +{
> + struct bpf_dev_offload *offload = prog->aux->offload;
> + struct inode *ns_inode;
> + struct path ns_path;
> + struct net *net;
> + int ret = 0;
> + void *ptr;
> +
> + info->dev_bound = 1;
> +
> + rtnl_lock();

rtnl_lock() is too big lock and it is already overused in kernel.
Can't we use smaller lock in this driver to protect bpf_prog_offload_devs?
I suppose rwlock would be appropriate for that.

(Then, we may completely remove rtnl_lock() from bpf_prog_offload_init()
and use readlocked dev_base_lock for __dev_get_by_index() instead and
the new small_rwlock to link in the list.

Not sure about bpf_prog_offload_verifier_prep() and bpf_prog_offload_translate()
and which context expect net_device_ops->ndo_bpf users. Either they need rtnl
or not).

Then the below hunk:

> + if (!offload->netdev)
> + goto out;
> +
> + net = dev_net(offload->netdev);
> + get_net(net); /* __ns_get_path() drops the reference */

will be:

read_lock(_rwlock);
if (!offload->netdev)
goto out;

net = dev_net(offload->netdev);
get_net(net); /* __ns_get_path() drops the reference */
read_unlock(_rwlock);

and rtnl_lock() won't be touched.

> + ptr = __ns_get_path(_path, >ns);
> + ret = PTR_ERR_OR_ZERO(ptr);
> + if (ret)
> + goto out;
> + ns_inode = ns_path.dentry->d_inode;
> +
> + info->ns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
> + info->ns_inode = ns_inode->i_ino;
> + info->ifindex = offload->netdev->ifindex;
> +out:
> + rtnl_unlock();
> + return ret;
> +}
> +

[snip]

Kirill


[RFC] bpf: offload: report device information for offloaded programs

2017-11-29 Thread Jakub Kicinski
Report to the user ifindex and namespace information of offloaded
programs.  Always set dev_bound to true if program was loaded for
a device which has been since removed.  Specify the namespace
using dev/inode combination.

Signed-off-by: Jakub Kicinski 
Reviewed-by: Simon Horman 
Reviewed-by: Quentin Monnet 
---
 fs/nsfs.c  |  2 +-
 include/linux/bpf.h|  2 ++
 include/linux/proc_ns.h|  1 +
 include/uapi/linux/bpf.h   |  5 +
 kernel/bpf/offload.c   | 34 ++
 kernel/bpf/syscall.c   |  6 ++
 tools/include/uapi/linux/bpf.h |  5 +
 7 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/fs/nsfs.c b/fs/nsfs.c
index ef243e14b6eb..d2b89372544a 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -51,7 +51,7 @@ static void nsfs_evict(struct inode *inode)
ns->ops->put(ns);
 }
 
-static void *__ns_get_path(struct path *path, struct ns_common *ns)
+void *__ns_get_path(struct path *path, struct ns_common *ns)
 {
struct vfsmount *mnt = nsfs_mnt;
struct dentry *dentry;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e55e4255a210..fc7ab26e10bf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -516,6 +516,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
 
 int bpf_prog_offload_compile(struct bpf_prog *prog);
 void bpf_prog_offload_destroy(struct bpf_prog *prog);
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+  struct bpf_prog *prog);
 
 #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
 int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 2ff18c9840a7..1733359cf713 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -76,6 +76,7 @@ static inline int ns_alloc_inum(struct ns_common *ns)
 
 extern struct file *proc_ns_fget(int fd);
 #define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
+extern void *__ns_get_path(struct path *path, struct ns_common *ns);
 extern void *ns_get_path(struct path *path, struct task_struct *task,
const struct proc_ns_operations *ns_ops);
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4c223ab30293..3183674496a2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -910,6 +910,11 @@ struct bpf_prog_info {
__u32 nr_map_ids;
__aligned_u64 map_ids;
char name[BPF_OBJ_NAME_LEN];
+   __u32 dev_bound:1;
+   __u32 reserved:31;
+   __u32 ifindex;
+   __u64 ns_dev;
+   __u64 ns_inode;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
index 8455b89d1bbf..da98349c647d 100644
--- a/kernel/bpf/offload.c
+++ b/kernel/bpf/offload.c
@@ -16,9 +16,11 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /* protected by RTNL */
@@ -164,6 +166,38 @@ int bpf_prog_offload_compile(struct bpf_prog *prog)
return bpf_prog_offload_translate(prog);
 }
 
+int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
+  struct bpf_prog *prog)
+{
+   struct bpf_dev_offload *offload = prog->aux->offload;
+   struct inode *ns_inode;
+   struct path ns_path;
+   struct net *net;
+   int ret = 0;
+   void *ptr;
+
+   info->dev_bound = 1;
+
+   rtnl_lock();
+   if (!offload->netdev)
+   goto out;
+
+   net = dev_net(offload->netdev);
+   get_net(net); /* __ns_get_path() drops the reference */
+   ptr = __ns_get_path(_path, >ns);
+   ret = PTR_ERR_OR_ZERO(ptr);
+   if (ret)
+   goto out;
+   ns_inode = ns_path.dentry->d_inode;
+
+   info->ns_dev = new_encode_dev(ns_inode->i_sb->s_dev);
+   info->ns_inode = ns_inode->i_ino;
+   info->ifindex = offload->netdev->ifindex;
+out:
+   rtnl_unlock();
+   return ret;
+}
+
 const struct bpf_prog_ops bpf_offload_prog_ops = {
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2c4cfeaa8d5e..101ee3a3e80e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1616,6 +1616,12 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
return -EFAULT;
}
 
+   if (bpf_prog_is_dev_bound(prog->aux)) {
+   err = bpf_prog_offload_info_fill(, prog);
+   if (err)
+   return err;
+   }
+
 done:
if (copy_to_user(uinfo, , info_len) ||
put_user(info_len, >info.info_len))
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4c223ab30293..3183674496a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -910,6 +910,11 @@ struct bpf_prog_info {
__u32