Re: [bpf-next PATCH 1/2] bpf: allow sk_msg programs to read sock fields

2018-05-17 Thread John Fastabend
On 05/17/2018 11:17 AM, Martin KaFai Lau wrote:
> On Thu, May 17, 2018 at 08:54:04AM -0700, John Fastabend wrote:
>> Currently sk_msg programs only have access to the raw data. However,
>> it is often useful when building policies to have the policies specific
>> to the socket endpoint. This allows using the socket tuple as input
>> into filters, etc.
>>
>> This patch adds ctx access to the sock fields.
>>
>> Signed-off-by: John Fastabend 
>> ---
>>  include/linux/filter.h   |1 
>>  include/uapi/linux/bpf.h |8 +++
>>  kernel/bpf/sockmap.c |1 
>>  net/core/filter.c|  114 
>> +-
> It is indeed a lot of dup lines with sock_ops_convert_ctx_access()
> as you mentioned in the cover.
> 
> Other than that, LGTM.
> 
> Acked-by: Martin KaFai Lau 
> 
>>  4 files changed, 121 insertions(+), 3 deletions(-)
>>
>> diff --git a/include/linux/filter.h b/include/linux/filter.h
>> index 9dbcb9d..d358d18 100644
>> --- a/include/linux/filter.h
>> +++ b/include/linux/filter.h
>> @@ -517,6 +517,7 @@ struct sk_msg_buff {
>>  bool sg_copy[MAX_SKB_FRAGS];
>>  __u32 flags;
>>  struct sock *sk_redir;
>> +struct sock *sk;
>>  struct sk_buff *skb;
>>  struct list_head list;
>>  };
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index d94d333..97446bb 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -2176,6 +2176,14 @@ enum sk_action {
>>  struct sk_msg_md {
>>  void *data;
>>  void *data_end;
>> +
>> +__u32 family;
>> +__u32 remote_ip4;   /* Stored in network byte order */
>> +__u32 local_ip4;/* Stored in network byte order */
>> +__u32 remote_ip6[4];/* Stored in network byte order */
>> +__u32 local_ip6[4]; /* Stored in network byte order */
>> +__u32 remote_port;  /* Stored in network byte order */
>> +__u32 local_port;   /* stored in host byte order */
> This ordering inconsistency could be a trap to write bpf_prog
> but I guess it is too late to change now considering
> bpf_sock_ops is also using this convention.
> 

Yep, when writing both bpf_sock_ops programs and sk_msg based
programs its nice to have them both use the same semantics. The
two programs, at least in my experience, are used together
typically sharing maps and heavily dependent on one another.

> Just curious, we cannot always assume inet_sk and then uses
> its inet_sport?
> 

For now we only support SOCK_STREAM so I guess we could also
use inet_sport but then it wouldn't align with bpf_sock_ops.
If we want to add it later we can put another field in there
to use it "__u32 source_port". For now though I prefer to keep
the bpf_sock_ops and sk_msg_md sock field access aligned.

.John


Re: [bpf-next PATCH 1/2] bpf: allow sk_msg programs to read sock fields

2018-05-17 Thread Martin KaFai Lau
On Thu, May 17, 2018 at 08:54:04AM -0700, John Fastabend wrote:
> Currently sk_msg programs only have access to the raw data. However,
> it is often useful when building policies to have the policies specific
> to the socket endpoint. This allows using the socket tuple as input
> into filters, etc.
> 
> This patch adds ctx access to the sock fields.
> 
> Signed-off-by: John Fastabend 
> ---
>  include/linux/filter.h   |1 
>  include/uapi/linux/bpf.h |8 +++
>  kernel/bpf/sockmap.c |1 
>  net/core/filter.c|  114 
> +-
It is indeed a lot of dup lines with sock_ops_convert_ctx_access()
as you mentioned in the cover.

Other than that, LGTM.

Acked-by: Martin KaFai Lau 

>  4 files changed, 121 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/filter.h b/include/linux/filter.h
> index 9dbcb9d..d358d18 100644
> --- a/include/linux/filter.h
> +++ b/include/linux/filter.h
> @@ -517,6 +517,7 @@ struct sk_msg_buff {
>   bool sg_copy[MAX_SKB_FRAGS];
>   __u32 flags;
>   struct sock *sk_redir;
> + struct sock *sk;
>   struct sk_buff *skb;
>   struct list_head list;
>  };
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index d94d333..97446bb 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -2176,6 +2176,14 @@ enum sk_action {
>  struct sk_msg_md {
>   void *data;
>   void *data_end;
> +
> + __u32 family;
> + __u32 remote_ip4;   /* Stored in network byte order */
> + __u32 local_ip4;/* Stored in network byte order */
> + __u32 remote_ip6[4];/* Stored in network byte order */
> + __u32 local_ip6[4]; /* Stored in network byte order */
> + __u32 remote_port;  /* Stored in network byte order */
> + __u32 local_port;   /* stored in host byte order */
This ordering inconsistency could be a trap to write bpf_prog
but I guess it is too late to change now considering
bpf_sock_ops is also using this convention.

Just curious, we cannot always assume inet_sk and then uses
its inet_sport?


[bpf-next PATCH 1/2] bpf: allow sk_msg programs to read sock fields

2018-05-17 Thread John Fastabend
Currently sk_msg programs only have access to the raw data. However,
it is often useful when building policies to have the policies specific
to the socket endpoint. This allows using the socket tuple as input
into filters, etc.

This patch adds ctx access to the sock fields.

Signed-off-by: John Fastabend 
---
 include/linux/filter.h   |1 
 include/uapi/linux/bpf.h |8 +++
 kernel/bpf/sockmap.c |1 
 net/core/filter.c|  114 +-
 4 files changed, 121 insertions(+), 3 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9dbcb9d..d358d18 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -517,6 +517,7 @@ struct sk_msg_buff {
bool sg_copy[MAX_SKB_FRAGS];
__u32 flags;
struct sock *sk_redir;
+   struct sock *sk;
struct sk_buff *skb;
struct list_head list;
 };
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d94d333..97446bb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2176,6 +2176,14 @@ enum sk_action {
 struct sk_msg_md {
void *data;
void *data_end;
+
+   __u32 family;
+   __u32 remote_ip4;   /* Stored in network byte order */
+   __u32 local_ip4;/* Stored in network byte order */
+   __u32 remote_ip6[4];/* Stored in network byte order */
+   __u32 local_ip6[4]; /* Stored in network byte order */
+   __u32 remote_port;  /* Stored in network byte order */
+   __u32 local_port;   /* stored in host byte order */
 };
 
 #define BPF_TAG_SIZE   8
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index c6de139..0ebf157 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -523,6 +523,7 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
}
 
bpf_compute_data_pointers_sg(md);
+   md->sk = sk;
rc = (*prog->bpf_func)(md, prog->insnsi);
psock->apply_bytes = md->apply_bytes;
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 6d0d156..aec5eba 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5148,18 +5148,23 @@ static bool sk_msg_is_valid_access(int off, int size,
switch (off) {
case offsetof(struct sk_msg_md, data):
info->reg_type = PTR_TO_PACKET;
+   if (size != sizeof(__u64))
+   return false;
break;
case offsetof(struct sk_msg_md, data_end):
info->reg_type = PTR_TO_PACKET_END;
+   if (size != sizeof(__u64))
+   return false;
break;
+   default:
+   if (size != sizeof(__u32))
+   return false;
}
 
if (off < 0 || off >= sizeof(struct sk_msg_md))
return false;
if (off % size != 0)
return false;
-   if (size != sizeof(__u64))
-   return false;
 
return true;
 }
@@ -5835,7 +5840,8 @@ static u32 sock_ops_convert_ctx_access(enum 
bpf_access_type type,
break;
 
case offsetof(struct bpf_sock_ops, local_ip4):
-   BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 
4);
+   BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ skc_rcv_saddr) != 4);
 
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
  struct bpf_sock_ops_kern, sk),
@@ -6152,6 +6158,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type 
type,
 struct bpf_prog *prog, u32 *target_size)
 {
struct bpf_insn *insn = insn_buf;
+   int off;
 
switch (si->off) {
case offsetof(struct sk_msg_md, data):
@@ -6164,6 +6171,107 @@ static u32 sk_msg_convert_ctx_access(enum 
bpf_access_type type,
  si->dst_reg, si->src_reg,
  offsetof(struct sk_msg_buff, data_end));
break;
+   case offsetof(struct sk_msg_md, family):
+   BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+
+   *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+ struct sk_msg_buff, sk),
+ si->dst_reg, si->src_reg,
+ offsetof(struct sk_msg_buff, sk));
+   *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
+ offsetof(struct sock_common, skc_family));
+   break;
+
+   case offsetof(struct sk_msg_md, remote_ip4):
+   BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+
+   *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
+   struct sk_msg_buff, sk),
+ si->dst_reg, si->src_reg,
+