[PATCH v7 bpf-next 1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

2018-10-19 Thread Song Liu
BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

Two helper functions bpf_compute_and_save_data_end() and
bpf_restore_data_end() are introduced. There are used in
__cgroup_bpf_run_filter_skb(), to compute proper data_end for the
BPF program, and restore original data afterwards.

Signed-off-by: Song Liu 
---
 include/linux/filter.h | 21 +
 kernel/bpf/cgroup.c|  6 ++
 net/core/filter.c  | 36 +++-
 3 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..91b4c934f02e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct 
sk_buff *skb)
cb->data_end  = skb->data + skb_headlen(skb);
 }
 
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_end(
+   struct sk_buff *skb, void **saved_data_end)
+{
+   struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+   *saved_data_end = cb->data_end;
+   cb->data_end  = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_end(
+   struct sk_buff *skb, void *saved_data_end)
+{
+   struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+   cb->data_end = saved_data_end;
+}
+
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 {
/* eBPF programs may read/write skb->cb[] area to transfer meta
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..9425c2fb872f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -553,6 +553,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 {
unsigned int offset = skb->data - skb_network_header(skb);
struct sock *save_sk;
+   void *saved_data_end;
struct cgroup *cgrp;
int ret;
 
@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
save_sk = skb->sk;
skb->sk = sk;
__skb_push(skb, offset);
+
+   /* compute pointers for the bpf prog */
+   bpf_compute_and_save_data_end(skb, _data_end);
+
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
 bpf_prog_run_save_cb);
+   bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
skb->sk = save_sk;
return ret == 1 ? 0 : -EPERM;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..e3ca30bd6840 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5346,6 +5346,40 @@ static bool sk_filter_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+  enum bpf_access_type type,
+  const struct bpf_prog *prog,
+  struct bpf_insn_access_aux *info)
+{
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, tc_classid):
+   case bpf_ctx_range(struct __sk_buff, data_meta):
+   case bpf_ctx_range(struct __sk_buff, flow_keys):
+   return false;
+   }
+   if (type == BPF_WRITE) {
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, mark):
+   case bpf_ctx_range(struct __sk_buff, priority):
+   case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+   break;
+   default:
+   return false;
+   }
+   }
+
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, data):
+   info->reg_type = PTR_TO_PACKET;
+   break;
+   case bpf_ctx_range(struct __sk_buff, data_end):
+   info->reg_type = PTR_TO_PACKET_END;
+   break;
+   }
+
+   return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -7038,7 +7072,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
-   .is_valid_access= sk_filter_is_valid_access,
+   .is_valid_access= cg_skb_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
 };
 
-- 
2.17.1



[PATCH v7 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB

2018-10-19 Thread Song Liu
Tests are added to make sure CGROUP_SKB cannot access:
  tc_classid, data_meta, flow_keys

and can read and write:
  mark, prority, and cb[0-4]

and can read other fields.

To make selftest with skb->sk work, a dummy sk is added in
bpf_prog_test_run_skb().

Signed-off-by: Song Liu 
---
 net/bpf/test_run.c  |   8 +
 tools/testing/selftests/bpf/test_verifier.c | 171 
 2 files changed, 179 insertions(+)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0c423b8cd75c..ae2ab89a9291 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,6 +10,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
@@ -106,6 +108,8 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 
size,
return data;
 }
 
+static struct sock test_run_sk = {0};
+
 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
  union bpf_attr __user *uattr)
 {
@@ -137,11 +141,15 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
break;
}
 
+   sock_net_set(_run_sk, current->nsproxy->net_ns);
+   sock_init_data(NULL, _run_sk);
+
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
return -ENOMEM;
}
+   skb->sk = _run_sk;
 
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index cf4cd32b6772..f1ae8d09770f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4862,6 +4862,177 @@ static struct bpf_test tests[] = {
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+   {
+   "direct packet read test#1 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+   offsetof(struct __sk_buff, data)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+   offsetof(struct __sk_buff, data_end)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, len)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, pkt_type)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, mark)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, mark)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff, queue_mapping)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, protocol)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_present)),
+   BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+   BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+   BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .result = ACCEPT,
+   .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+   },
+   {
+   "direct packet read test#2 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_tci)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_proto)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, priority)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, priority)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff,
+ingress_ifindex)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, tc_index)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, hash)),
+

[PATCH v6 bpf-next 0/2] bpf: add cg_skb_is_valid_access

2018-10-18 Thread Song Liu
Changes v5 -> v6:
1. Fixed dummy sk in bpf_prog_test_run_skb() as suggested by Eric Dumazet.

Changes v4 -> v5:
1. Replaced bpf_compute_and_save_data_pointers() with
   bpf_compute_and_save_data_end();
   Replaced bpf_restore_data_pointers() with bpf_restore_data_end().
2. Fixed indentation in test_verifier.c

Changes v3 -> v4:
1. Fixed crash issue reported by Alexei.

Changes v2 -> v3:
1. Added helper function bpf_compute_and_save_data_pointers() and
   bpf_restore_data_pointers().

Changes v1 -> v2:
1. Updated the list of read-only fields, and read-write fields.
2. Added dummy sk to bpf_prog_test_run_skb().

This set enables BPF program of type BPF_PROG_TYPE_CGROUP_SKB to access
some __skb_buff data directly.

Song Liu (2):
  bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB
  bpf: add tests for direct packet access from CGROUP_SKB

 include/linux/filter.h  |  21 +++
 kernel/bpf/cgroup.c |   6 +
 net/bpf/test_run.c  |   7 +
 net/core/filter.c   |  36 -
 tools/testing/selftests/bpf/test_verifier.c | 171 
 5 files changed, 240 insertions(+), 1 deletion(-)

--
2.17.1


[PATCH v6 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB

2018-10-18 Thread Song Liu
Tests are added to make sure CGROUP_SKB cannot access:
  tc_classid, data_meta, flow_keys

and can read and write:
  mark, prority, and cb[0-4]

and can read other fields.

To make selftest with skb->sk work, a dummy sk is added in
bpf_prog_test_run_skb().

Signed-off-by: Song Liu 
---
 net/bpf/test_run.c  |   7 +
 tools/testing/selftests/bpf/test_verifier.c | 171 
 2 files changed, 178 insertions(+)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0c423b8cd75c..87ea279cb095 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,6 +10,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
@@ -115,6 +117,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
u32 retval, duration;
int hh_len = ETH_HLEN;
struct sk_buff *skb;
+   struct sock sk = {0};
void *data;
int ret;
 
@@ -137,11 +140,15 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
break;
}
 
+   sock_net_set(, current->nsproxy->net_ns);
+   sock_init_data(NULL, );
+
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
return -ENOMEM;
}
+   skb->sk = 
 
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index cf4cd32b6772..f1ae8d09770f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4862,6 +4862,177 @@ static struct bpf_test tests[] = {
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+   {
+   "direct packet read test#1 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+   offsetof(struct __sk_buff, data)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+   offsetof(struct __sk_buff, data_end)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, len)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, pkt_type)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, mark)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, mark)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff, queue_mapping)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, protocol)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_present)),
+   BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+   BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+   BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .result = ACCEPT,
+   .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+   },
+   {
+   "direct packet read test#2 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_tci)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_proto)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, priority)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, priority)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff,
+ingress_ifindex)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, tc_index)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, hash)),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+

[PATCH v6 bpf-next 1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

2018-10-18 Thread Song Liu
BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

Two helper functions bpf_compute_and_save_data_end() and
bpf_restore_data_end() are introduced. There are used in
__cgroup_bpf_run_filter_skb(), to compute proper data_end for the
BPF program, and restore original data afterwards.

Signed-off-by: Song Liu 
---
 include/linux/filter.h | 21 +
 kernel/bpf/cgroup.c|  6 ++
 net/core/filter.c  | 36 +++-
 3 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..91b4c934f02e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct 
sk_buff *skb)
cb->data_end  = skb->data + skb_headlen(skb);
 }
 
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_end(
+   struct sk_buff *skb, void **saved_data_end)
+{
+   struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+   *saved_data_end = cb->data_end;
+   cb->data_end  = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_end(
+   struct sk_buff *skb, void *saved_data_end)
+{
+   struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+   cb->data_end = saved_data_end;
+}
+
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 {
/* eBPF programs may read/write skb->cb[] area to transfer meta
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..9425c2fb872f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -553,6 +553,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 {
unsigned int offset = skb->data - skb_network_header(skb);
struct sock *save_sk;
+   void *saved_data_end;
struct cgroup *cgrp;
int ret;
 
@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
save_sk = skb->sk;
skb->sk = sk;
__skb_push(skb, offset);
+
+   /* compute pointers for the bpf prog */
+   bpf_compute_and_save_data_end(skb, _data_end);
+
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
 bpf_prog_run_save_cb);
+   bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
skb->sk = save_sk;
return ret == 1 ? 0 : -EPERM;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..e3ca30bd6840 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5346,6 +5346,40 @@ static bool sk_filter_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+  enum bpf_access_type type,
+  const struct bpf_prog *prog,
+  struct bpf_insn_access_aux *info)
+{
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, tc_classid):
+   case bpf_ctx_range(struct __sk_buff, data_meta):
+   case bpf_ctx_range(struct __sk_buff, flow_keys):
+   return false;
+   }
+   if (type == BPF_WRITE) {
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, mark):
+   case bpf_ctx_range(struct __sk_buff, priority):
+   case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+   break;
+   default:
+   return false;
+   }
+   }
+
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, data):
+   info->reg_type = PTR_TO_PACKET;
+   break;
+   case bpf_ctx_range(struct __sk_buff, data_end):
+   info->reg_type = PTR_TO_PACKET_END;
+   break;
+   }
+
+   return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -7038,7 +7072,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
-   .is_valid_access= sk_filter_is_valid_access,
+   .is_valid_access= cg_skb_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
 };
 
-- 
2.17.1



Re: [PATCH v5 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB

2018-10-18 Thread Song Liu



> On Oct 18, 2018, at 10:22 PM, Eric Dumazet  wrote:
> 
> 
> 
> On 10/18/2018 10:01 PM, Song Liu wrote:
>> Tests are added to make sure CGROUP_SKB cannot access:
>>  tc_classid, data_meta, flow_keys
>> 
>> and can read and write:
>>  mark, prority, and cb[0-4]
>> 
>> and can read other fields.
>> 
>> To make selftest with skb->sk work, a dummy sk is added in
>> bpf_prog_test_run_skb().
>> 
>> Signed-off-by: Song Liu 
>> ---
>> net/bpf/test_run.c  |   7 +
>> tools/testing/selftests/bpf/test_verifier.c | 171 
>> 2 files changed, 178 insertions(+)
>> 
>> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
>> index 0c423b8cd75c..8dccac305268 100644
>> --- a/net/bpf/test_run.c
>> +++ b/net/bpf/test_run.c
>> @@ -10,6 +10,8 @@
>> #include 
>> #include 
>> #include 
>> +#include 
>> +#include 
>> 
>> static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
>>  struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
>> @@ -115,6 +117,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
>> union bpf_attr *kattr,
>>  u32 retval, duration;
>>  int hh_len = ETH_HLEN;
>>  struct sk_buff *skb;
>> +struct sock sk = {0};
> 
> Arg another dummy :/
> 
>>  void *data;
>>  int ret;
>> 
>> @@ -137,11 +140,15 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
>> union bpf_attr *kattr,
>>  break;
>>  }
>> 
>> +sock_net_set(, _net);
> 
> A few lines later we use :
> 
> skb->protocol = eth_type_trans(skb, current->nsproxy->net_ns->loopback_dev);
> 
> So it looks like you want instead for consistency :
> 
> sock_net_set(, current->nsproxy->net_ns);

Thanks! Will fix this in v6.

>   
> 
>> +sock_init_data(NULL, );
>> +
>>  skb = build_skb(data, 0);
>>  if (!skb) {
>>  kfree(data);
>>  return -ENOMEM;
>>  }
>> +skb->sk = 
>> 
> 
> Normally this would need a skb->destructor, but I guess nothing will call 
> skb_orphan()
> from this point.

Yeah, I double checked, this should be OK. 

Song

Re: [PATCH bpf-next 2/2] samples: bpf: get ifindex from ifname

2018-10-18 Thread Y Song
On Thu, Oct 18, 2018 at 1:48 PM Matteo Croce  wrote:
>
> Find the ifindex via ioctl(SIOCGIFINDEX) instead of requiring the
> numeric ifindex.

Maybe use if_nametoindex which is simpler?

>
> Signed-off-by: Matteo Croce 
> ---
>  samples/bpf/xdp1_user.c | 26 --
>  1 file changed, 24 insertions(+), 2 deletions(-)
>
> diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
> index 4f3d824fc044..a1d0c5dcee9c 100644
> --- a/samples/bpf/xdp1_user.c
> +++ b/samples/bpf/xdp1_user.c
> @@ -15,6 +15,9 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
> +#include 
>
>  #include "bpf_util.h"
>  #include "bpf/bpf.h"
> @@ -59,7 +62,7 @@ static void poll_stats(int map_fd, int interval)
>  static void usage(const char *prog)
>  {
> fprintf(stderr,
> -   "usage: %s [OPTS] IFINDEX\n\n"
> +   "usage: %s [OPTS] IFACE\n\n"
> "OPTS:\n"
> "-Suse skb-mode\n"
> "-Nenforce native mode\n",
> @@ -74,9 +77,11 @@ int main(int argc, char **argv)
> };
> const char *optstr = "SN";
> int prog_fd, map_fd, opt;
> +   struct ifreq ifr = { 0 };
> struct bpf_object *obj;
> struct bpf_map *map;
> char filename[256];
> +   int sock;
>
> while ((opt = getopt(argc, argv, optstr)) != -1) {
> switch (opt) {
> @@ -102,7 +107,24 @@ int main(int argc, char **argv)
> return 1;
> }
>
> -   ifindex = strtoul(argv[optind], NULL, 0);
> +   sock = socket(AF_UNIX, SOCK_DGRAM, 0);
> +   if (sock == -1) {
> +   perror("socket");
> +   return 1;
> +   }
> +
> +   if (strlen(argv[optind]) >= IFNAMSIZ) {
> +   printf("invalid ifname '%s'\n", argv[optind]);
> +   return 1;
> +   }
> +
> +   strcpy(ifr.ifr_name, argv[optind]);
> +   if (ioctl(sock, SIOCGIFINDEX, ) < 0) {
> +   perror("SIOCGIFINDEX");
> +   return 1;
> +   }
> +   close(sock);
> +   ifindex = ifr.ifr_ifindex;
>
> snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
> prog_load_attr.file = filename;
> --
> 2.19.1
>


Re: [PATCH bpf-next 1/2] samples: bpf: improve xdp1 example

2018-10-18 Thread Y Song
On Thu, Oct 18, 2018 at 1:48 PM Matteo Croce  wrote:
>
> Store only the total packet count for every protocol, instead of the
> whole per-cpu array.
> Use bpf_map_get_next_key() to iterate the map, instead of looking up
> all the protocols.
>
> Signed-off-by: Matteo Croce 

Acked-by: Yonghong Song 


[PATCH v5 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB

2018-10-18 Thread Song Liu
Tests are added to make sure CGROUP_SKB cannot access:
  tc_classid, data_meta, flow_keys

and can read and write:
  mark, prority, and cb[0-4]

and can read other fields.

To make selftest with skb->sk work, a dummy sk is added in
bpf_prog_test_run_skb().

Signed-off-by: Song Liu 
---
 net/bpf/test_run.c  |   7 +
 tools/testing/selftests/bpf/test_verifier.c | 171 
 2 files changed, 178 insertions(+)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0c423b8cd75c..8dccac305268 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,6 +10,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
@@ -115,6 +117,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
u32 retval, duration;
int hh_len = ETH_HLEN;
struct sk_buff *skb;
+   struct sock sk = {0};
void *data;
int ret;
 
@@ -137,11 +140,15 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
break;
}
 
+   sock_net_set(, _net);
+   sock_init_data(NULL, );
+
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
return -ENOMEM;
}
+   skb->sk = 
 
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index cf4cd32b6772..f1ae8d09770f 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4862,6 +4862,177 @@ static struct bpf_test tests[] = {
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+   {
+   "direct packet read test#1 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+   offsetof(struct __sk_buff, data)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+   offsetof(struct __sk_buff, data_end)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, len)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, pkt_type)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, mark)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, mark)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff, queue_mapping)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, protocol)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_present)),
+   BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+   BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+   BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .result = ACCEPT,
+   .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+   },
+   {
+   "direct packet read test#2 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_tci)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_proto)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, priority)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, priority)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff,
+ingress_ifindex)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, tc_index)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, hash)),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+  

[PATCH v5 bpf-next 0/2] bpf: add cg_skb_is_valid_access

2018-10-18 Thread Song Liu
Changes v4 -> v5:
1. Replaced bpf_compute_and_save_data_pointers() with
   bpf_compute_and_save_data_end();
   Replaced bpf_restore_data_pointers() with bpf_restore_data_end().
2. Fixed indentation in test_verifier.c

Changes v3 -> v4:
1. Fixed crash issue reported by Alexei.

Changes v2 -> v3:
1. Added helper function bpf_compute_and_save_data_pointers() and
   bpf_restore_data_pointers().

Changes v1 -> v2:
1. Updated the list of read-only fields, and read-write fields.
2. Added dummy sk to bpf_prog_test_run_skb().

This set enables BPF program of type BPF_PROG_TYPE_CGROUP_SKB to access
some __skb_buff data directly.

Song Liu (2):
  bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB
  bpf: add tests for direct packet access from CGROUP_SKB

 include/linux/filter.h  |  21 +++
 kernel/bpf/cgroup.c |   6 +
 net/bpf/test_run.c  |   7 +
 net/core/filter.c   |  36 -
 tools/testing/selftests/bpf/test_verifier.c | 171 
 5 files changed, 240 insertions(+), 1 deletion(-)

--
2.17.1


[PATCH v5 bpf-next 1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

2018-10-18 Thread Song Liu
BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

Two helper functions bpf_compute_and_save_data_end() and
bpf_restore_data_end() are introduced. There are used in
__cgroup_bpf_run_filter_skb(), to compute proper data_end for the
BPF program, and restore original data afterwards.

Signed-off-by: Song Liu 
---
 include/linux/filter.h | 21 +
 kernel/bpf/cgroup.c|  6 ++
 net/core/filter.c  | 36 +++-
 3 files changed, 62 insertions(+), 1 deletion(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..91b4c934f02e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,27 @@ static inline void bpf_compute_data_pointers(struct 
sk_buff *skb)
cb->data_end  = skb->data + skb_headlen(skb);
 }
 
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_end(
+   struct sk_buff *skb, void **saved_data_end)
+{
+   struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+   *saved_data_end = cb->data_end;
+   cb->data_end  = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_end(
+   struct sk_buff *skb, void *saved_data_end)
+{
+   struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+   cb->data_end = saved_data_end;
+}
+
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 {
/* eBPF programs may read/write skb->cb[] area to transfer meta
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..9425c2fb872f 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -553,6 +553,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
 {
unsigned int offset = skb->data - skb_network_header(skb);
struct sock *save_sk;
+   void *saved_data_end;
struct cgroup *cgrp;
int ret;
 
@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
save_sk = skb->sk;
skb->sk = sk;
__skb_push(skb, offset);
+
+   /* compute pointers for the bpf prog */
+   bpf_compute_and_save_data_end(skb, _data_end);
+
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
 bpf_prog_run_save_cb);
+   bpf_restore_data_end(skb, saved_data_end);
__skb_pull(skb, offset);
skb->sk = save_sk;
return ret == 1 ? 0 : -EPERM;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..e3ca30bd6840 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5346,6 +5346,40 @@ static bool sk_filter_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+  enum bpf_access_type type,
+  const struct bpf_prog *prog,
+  struct bpf_insn_access_aux *info)
+{
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, tc_classid):
+   case bpf_ctx_range(struct __sk_buff, data_meta):
+   case bpf_ctx_range(struct __sk_buff, flow_keys):
+   return false;
+   }
+   if (type == BPF_WRITE) {
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, mark):
+   case bpf_ctx_range(struct __sk_buff, priority):
+   case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+   break;
+   default:
+   return false;
+   }
+   }
+
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, data):
+   info->reg_type = PTR_TO_PACKET;
+   break;
+   case bpf_ctx_range(struct __sk_buff, data_end):
+   info->reg_type = PTR_TO_PACKET_END;
+   break;
+   }
+
+   return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -7038,7 +7072,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
-   .is_valid_access= sk_filter_is_valid_access,
+   .is_valid_access= cg_skb_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
 };
 
-- 
2.17.1



Re: [PATCH bpf-next v3 7/7] selftests/bpf: add test cases for queue and stack maps

2018-10-18 Thread Song Liu
On Thu, Oct 18, 2018 at 10:33 AM Mauricio Vasquez
 wrote:
>
>
> On 10/18/18 11:36 AM, Song Liu wrote:
> > On Thu, Oct 18, 2018 at 6:16 AM Mauricio Vasquez B
> >  wrote:
> >> test_maps:
> >> Tests that queue/stack maps are behaving correctly even in corner cases
> >>
> >> test_progs:
> >> Tests new ebpf helpers
> >>
> >> Signed-off-by: Mauricio Vasquez B 
> >> ---
> >>   tools/lib/bpf/bpf.c|   12 ++
> >>   tools/lib/bpf/bpf.h|2
> >>   tools/testing/selftests/bpf/Makefile   |5 +
> >>   tools/testing/selftests/bpf/bpf_helpers.h  |7 +
> >>   tools/testing/selftests/bpf/test_maps.c|  122 
> >> 
> >>   tools/testing/selftests/bpf/test_progs.c   |   99 
> >> 
> >>   tools/testing/selftests/bpf/test_queue_map.c   |4 +
> >>   tools/testing/selftests/bpf/test_queue_stack_map.h |   59 ++
> >>   tools/testing/selftests/bpf/test_stack_map.c   |4 +
> >>   9 files changed, 313 insertions(+), 1 deletion(-)
> >>   create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
> >>   create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
> >>   create mode 100644 tools/testing/selftests/bpf/test_stack_map.c
> >>
> >> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> >> index d70a255cb05e..03f9bcc4ef50 100644
> >> --- a/tools/lib/bpf/bpf.c
> >> +++ b/tools/lib/bpf/bpf.c
> >> @@ -278,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void 
> >> *value)
> >>  return sys_bpf(BPF_MAP_LOOKUP_ELEM, , sizeof(attr));
> >>   }
> >>
> >> +int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
> >> +{
> >> +   union bpf_attr attr;
> >> +
> >> +   bzero(, sizeof(attr));
> >> +   attr.map_fd = fd;
> >> +   attr.key = ptr_to_u64(key);
> >> +   attr.value = ptr_to_u64(value);
> >> +
> >> +   return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, , 
> >> sizeof(attr));
> >> +}
> >> +
> >>   int bpf_map_delete_elem(int fd, const void *key)
> >>   {
> >>  union bpf_attr attr;
> >> diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
> >> index 258c3c178333..26a51538213c 100644
> >> --- a/tools/lib/bpf/bpf.h
> >> +++ b/tools/lib/bpf/bpf.h
> >> @@ -99,6 +99,8 @@ LIBBPF_API int bpf_map_update_elem(int fd, const void 
> >> *key, const void *value,
> >> __u64 flags);
> >>
> >>   LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
> >> +LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
> >> + void *value);
> >>   LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
> >>   LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void 
> >> *next_key);
> >>   LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
> >> diff --git a/tools/testing/selftests/bpf/Makefile 
> >> b/tools/testing/selftests/bpf/Makefile
> >> index d99dd6fc3fbe..e39dfb4e7970 100644
> >> --- a/tools/testing/selftests/bpf/Makefile
> >> +++ b/tools/testing/selftests/bpf/Makefile
> >> @@ -37,7 +37,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o 
> >> test_l4lb.o test_tcp_estats.o test
> >>  test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o 
> >> test_lirc_mode2_kern.o \
> >>  get_cgroup_id_kern.o socket_cookie_prog.o 
> >> test_select_reuseport_kern.o \
> >>  test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
> >> -   test_sk_lookup_kern.o test_xdp_vlan.o
> >> +   test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o 
> >> test_stack_map.o
> >>
> >>   # Order correspond to 'make run_tests' order
> >>   TEST_PROGS := test_kmod.sh \
> >> @@ -118,6 +118,9 @@ CLANG_FLAGS = -I. -I./include/uapi 
> >> -I../../../include/uapi \
> >>   $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
> >>   $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
> >>
> >> +$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
> >> +$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h
> > This looks weird. You meant the .c files, right?
>
> Q

Re: [PATCH bpf-next v3 7/7] selftests/bpf: add test cases for queue and stack maps

2018-10-18 Thread Song Liu
On Thu, Oct 18, 2018 at 6:16 AM Mauricio Vasquez B
 wrote:
>
> test_maps:
> Tests that queue/stack maps are behaving correctly even in corner cases
>
> test_progs:
> Tests new ebpf helpers
>
> Signed-off-by: Mauricio Vasquez B 
> ---
>  tools/lib/bpf/bpf.c|   12 ++
>  tools/lib/bpf/bpf.h|2
>  tools/testing/selftests/bpf/Makefile   |5 +
>  tools/testing/selftests/bpf/bpf_helpers.h  |7 +
>  tools/testing/selftests/bpf/test_maps.c|  122 
> 
>  tools/testing/selftests/bpf/test_progs.c   |   99 
>  tools/testing/selftests/bpf/test_queue_map.c   |4 +
>  tools/testing/selftests/bpf/test_queue_stack_map.h |   59 ++
>  tools/testing/selftests/bpf/test_stack_map.c   |4 +
>  9 files changed, 313 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/bpf/test_queue_map.c
>  create mode 100644 tools/testing/selftests/bpf/test_queue_stack_map.h
>  create mode 100644 tools/testing/selftests/bpf/test_stack_map.c
>
> diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
> index d70a255cb05e..03f9bcc4ef50 100644
> --- a/tools/lib/bpf/bpf.c
> +++ b/tools/lib/bpf/bpf.c
> @@ -278,6 +278,18 @@ int bpf_map_lookup_elem(int fd, const void *key, void 
> *value)
> return sys_bpf(BPF_MAP_LOOKUP_ELEM, , sizeof(attr));
>  }
>
> +int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
> +{
> +   union bpf_attr attr;
> +
> +   bzero(, sizeof(attr));
> +   attr.map_fd = fd;
> +   attr.key = ptr_to_u64(key);
> +   attr.value = ptr_to_u64(value);
> +
> +   return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, , sizeof(attr));
> +}
> +
>  int bpf_map_delete_elem(int fd, const void *key)
>  {
> union bpf_attr attr;
> diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
> index 258c3c178333..26a51538213c 100644
> --- a/tools/lib/bpf/bpf.h
> +++ b/tools/lib/bpf/bpf.h
> @@ -99,6 +99,8 @@ LIBBPF_API int bpf_map_update_elem(int fd, const void *key, 
> const void *value,
>__u64 flags);
>
>  LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
> +LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
> + void *value);
>  LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
>  LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
>  LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
> diff --git a/tools/testing/selftests/bpf/Makefile 
> b/tools/testing/selftests/bpf/Makefile
> index d99dd6fc3fbe..e39dfb4e7970 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -37,7 +37,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o 
> test_tcp_estats.o test
> test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o 
> test_lirc_mode2_kern.o \
> get_cgroup_id_kern.o socket_cookie_prog.o 
> test_select_reuseport_kern.o \
> test_skb_cgroup_id_kern.o bpf_flow.o netcnt_prog.o \
> -   test_sk_lookup_kern.o test_xdp_vlan.o
> +   test_sk_lookup_kern.o test_xdp_vlan.o test_queue_map.o 
> test_stack_map.o
>
>  # Order correspond to 'make run_tests' order
>  TEST_PROGS := test_kmod.sh \
> @@ -118,6 +118,9 @@ CLANG_FLAGS = -I. -I./include/uapi 
> -I../../../include/uapi \
>  $(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
>  $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
>
> +$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h
> +$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h

This looks weird. You meant the .c files, right?

> +
>  BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
>  BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
>  BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
> 'usage.*llvm')
> diff --git a/tools/testing/selftests/bpf/bpf_helpers.h 
> b/tools/testing/selftests/bpf/bpf_helpers.h
> index fda8c162d0df..6407a3df0f3b 100644
> --- a/tools/testing/selftests/bpf/bpf_helpers.h
> +++ b/tools/testing/selftests/bpf/bpf_helpers.h
> @@ -16,6 +16,13 @@ static int (*bpf_map_update_elem)(void *map, void *key, 
> void *value,
> (void *) BPF_FUNC_map_update_elem;
>  static int (*bpf_map_delete_elem)(void *map, void *key) =
> (void *) BPF_FUNC_map_delete_elem;
> +static int (*bpf_map_push_elem)(void *map, void *value,
> +   unsigned long long flags) =
> +   (void *) BPF_FUNC_map_push_elem;
> +static int (*bpf_map_pop_elem)(void *map, void *value) =
> +   (void *) BPF_FUNC_map_pop_elem;
> +static int (*bpf_map_peek_elem)(void *map, void *value) =
> +   (void *) BPF_FUNC_map_peek_elem;
>  static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
> (void *) BPF_FUNC_probe_read;
>  static unsigned long long 

Re: [PATCH bpf-next v3 4/7] bpf: add queue and stack maps

2018-10-18 Thread Song Liu
On Thu, Oct 18, 2018 at 6:16 AM Mauricio Vasquez B
 wrote:
>
> Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs.
> These maps support peek, pop and push operations that are exposed to eBPF
> programs through the new bpf_map[peek/pop/push] helpers.  Those operations
> are exposed to userspace applications through the already existing
> syscalls in the following way:
>
> BPF_MAP_LOOKUP_ELEM-> peek
> BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop
> BPF_MAP_UPDATE_ELEM-> push
>
> Queue/stack maps are implemented using a buffer, tail and head indexes,
> hence BPF_F_NO_PREALLOC is not supported.
>
> As opposite to other maps, queue and stack do not use RCU for protecting
> maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE
> argument that is a pointer to a memory zone where to save the value of a
> map.  Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not
> be passed as an extra argument.
>
> Our main motivation for implementing queue/stack maps was to keep track
> of a pool of elements, like network ports in a SNAT, however we forsee
> other use cases, like for exampling saving last N kernel events in a map
> and then analysing from userspace.
>
> Signed-off-by: Mauricio Vasquez B 
Acked-by: Song Liu 

> ---
>  include/linux/bpf.h   |6 +
>  include/linux/bpf_types.h |2
>  include/uapi/linux/bpf.h  |   29 
>  kernel/bpf/Makefile   |2
>  kernel/bpf/core.c |3
>  kernel/bpf/helpers.c  |   43 ++
>  kernel/bpf/queue_stack_maps.c |  288 
> +
>  kernel/bpf/syscall.c  |6 +
>  kernel/bpf/verifier.c |   19 +++
>  net/core/filter.c |6 +
>  10 files changed, 401 insertions(+), 3 deletions(-)
>  create mode 100644 kernel/bpf/queue_stack_maps.c
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 0f8b863e0229..33014ae73103 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -39,6 +39,9 @@ struct bpf_map_ops {
> void *(*map_lookup_elem)(struct bpf_map *map, void *key);
> int (*map_update_elem)(struct bpf_map *map, void *key, void *value, 
> u64 flags);
> int (*map_delete_elem)(struct bpf_map *map, void *key);
> +   int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
> +   int (*map_pop_elem)(struct bpf_map *map, void *value);
> +   int (*map_peek_elem)(struct bpf_map *map, void *value);
>
> /* funcs called by prog_array and perf_event_array map */
> void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
> @@ -811,6 +814,9 @@ static inline int 
> bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
>  extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
>  extern const struct bpf_func_proto bpf_map_update_elem_proto;
>  extern const struct bpf_func_proto bpf_map_delete_elem_proto;
> +extern const struct bpf_func_proto bpf_map_push_elem_proto;
> +extern const struct bpf_func_proto bpf_map_pop_elem_proto;
> +extern const struct bpf_func_proto bpf_map_peek_elem_proto;
>
>  extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
>  extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index 7bad4e1947ed..44d9ab4809bd 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -69,3 +69,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops)
>  BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops)
>  #endif
>  #endif
> +BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops)
> +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index f9187b41dff6..b8fc161c5b78 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -128,6 +128,8 @@ enum bpf_map_type {
> BPF_MAP_TYPE_CGROUP_STORAGE,
> BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
> BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
> +   BPF_MAP_TYPE_QUEUE,
> +   BPF_MAP_TYPE_STACK,
>  };
>
>  enum bpf_prog_type {
> @@ -462,6 +464,28 @@ union bpf_attr {
>   * Return
>   * 0 on success, or a negative error in case of failure.
>   *
> + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
> + * Description
> + * Push an element *value* in *map*. *flags* is one of:
> + *
> + * **BPF_EXIST**
> + * If the queue/stack is full, the oldest element is removed to
> + * make room for this.
> + * Return
> + * 0 on success, or a ne

Re: [PATCH bpf-next v3 5/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall

2018-10-18 Thread Song Liu
On Thu, Oct 18, 2018 at 6:16 AM Mauricio Vasquez B
 wrote:
>
> The previous patch implemented a bpf queue/stack maps that
> provided the peek/pop/push functions.  There is not a direct
> relationship between those functions and the current maps
> syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added,
> this is mapped to the pop operation in the queue/stack maps
> and it is still to implement in other kind of maps.
>
> Signed-off-by: Mauricio Vasquez B 

Acked-by: Song Liu 

> ---
>  include/uapi/linux/bpf.h |1 +
>  kernel/bpf/syscall.c |   66 
> ++
>  2 files changed, 67 insertions(+)
>
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index b8fc161c5b78..c8824d5364ff 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -103,6 +103,7 @@ enum bpf_cmd {
> BPF_BTF_LOAD,
> BPF_BTF_GET_FD_BY_ID,
> BPF_TASK_FD_QUERY,
> +   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
>  };
>
>  enum bpf_map_type {
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 1617407f9ee5..49ae64a26562 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -999,6 +999,69 @@ static int map_get_next_key(union bpf_attr *attr)
> return err;
>  }
>
> +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
> +
> +static int map_lookup_and_delete_elem(union bpf_attr *attr)
> +{
> +   void __user *ukey = u64_to_user_ptr(attr->key);
> +   void __user *uvalue = u64_to_user_ptr(attr->value);
> +   int ufd = attr->map_fd;
> +   struct bpf_map *map;
> +   void *key, *value, *ptr;
> +   u32 value_size;
> +   struct fd f;
> +   int err;
> +
> +   if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
> +   return -EINVAL;
> +
> +   f = fdget(ufd);
> +   map = __bpf_map_get(f);
> +   if (IS_ERR(map))
> +   return PTR_ERR(map);
> +
> +   if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
> +   err = -EPERM;
> +   goto err_put;
> +   }
> +
> +   key = __bpf_copy_key(ukey, map->key_size);
> +   if (IS_ERR(key)) {
> +   err = PTR_ERR(key);
> +   goto err_put;
> +   }
> +
> +   value_size = map->value_size;
> +
> +   err = -ENOMEM;
> +   value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
> +   if (!value)
> +   goto free_key;
> +
> +   if (map->map_type == BPF_MAP_TYPE_QUEUE ||
> +   map->map_type == BPF_MAP_TYPE_STACK) {
> +   err = map->ops->map_pop_elem(map, value);
> +   } else {
> +   err = -ENOTSUPP;
> +   }
> +
> +   if (err)
> +   goto free_value;
> +
> +   if (copy_to_user(uvalue, value, value_size) != 0)
> +   goto free_value;
> +
> +   err = 0;
> +
> +free_value:
> +   kfree(value);
> +free_key:
> +   kfree(key);
> +err_put:
> +   fdput(f);
> +   return err;
> +}
> +
>  static const struct bpf_prog_ops * const bpf_prog_types[] = {
>  #define BPF_PROG_TYPE(_id, _name) \
> [_id] = & _name ## _prog_ops,
> @@ -2472,6 +2535,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, 
> uattr, unsigned int, siz
> case BPF_TASK_FD_QUERY:
> err = bpf_task_fd_query(, uattr);
> break;
> +   case BPF_MAP_LOOKUP_AND_DELETE_ELEM:
> +   err = map_lookup_and_delete_elem();
> +   break;
> default:
> err = -EINVAL;
> break;
>


[PATCH v4 bpf-next 0/2] bpf: add cg_skb_is_valid_access

2018-10-18 Thread Song Liu
Changes v3 -> v4:
1. Fixed crash issue reported by Alexei.

Changes v2 -> v3:
1. Added helper function bpf_compute_and_save_data_pointers() and
   bpf_restore_data_pointers().

Changes v1 -> v2:
1. Updated the list of read-only fields, and read-write fields.
2. Added dummy sk to bpf_prog_test_run_skb().

This set enables BPF program of type BPF_PROG_TYPE_CGROUP_SKB to access
some __skb_buff data directly.

Song Liu (2):
  bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB
  bpf: add tests for direct packet access from CGROUP_SKB

 include/linux/filter.h  |  24 +++
 kernel/bpf/cgroup.c |   6 +
 net/bpf/test_run.c  |   7 +
 net/core/filter.c   |  36 -
 tools/testing/selftests/bpf/test_verifier.c | 170 
 5 files changed, 242 insertions(+), 1 deletion(-)

--
2.17.1


[PATCH v4 bpf-next 1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

2018-10-18 Thread Song Liu
BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

Two helper functions bpf_compute_and_save_data_pointers() and
bpf_restore_data_pointers() are introduced. There are used in
__cgroup_bpf_run_filter_skb(), to compute proper data_end for the
BPF program, and restore original data afterwards.

Signed-off-by: Song Liu 
---
 include/linux/filter.h | 24 
 kernel/bpf/cgroup.c|  6 ++
 net/core/filter.c  | 36 +++-
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..96b3ee7f14c9 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,30 @@ static inline void bpf_compute_data_pointers(struct 
sk_buff *skb)
cb->data_end  = skb->data + skb_headlen(skb);
 }
 
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_pointers(
+   struct sk_buff *skb, void *saved_pointers[2])
+{
+   struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+   saved_pointers[0] = cb->data_meta;
+   saved_pointers[1] = cb->data_end;
+   cb->data_meta = skb->data - skb_metadata_len(skb);
+   cb->data_end  = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_pointers(
+   struct sk_buff *skb, void *saved_pointers[2])
+{
+   struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+   cb->data_meta = saved_pointers[0];
+   cb->data_end = saved_pointers[1];;
+}
+
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 {
/* eBPF programs may read/write skb->cb[] area to transfer meta
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..5f5180104ddc 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -554,6 +554,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
unsigned int offset = skb->data - skb_network_header(skb);
struct sock *save_sk;
struct cgroup *cgrp;
+   void *saved_pointers[2];
int ret;
 
if (!sk || !sk_fullsock(sk))
@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
save_sk = skb->sk;
skb->sk = sk;
__skb_push(skb, offset);
+
+   /* compute pointers for the bpf prog */
+   bpf_compute_and_save_data_pointers(skb, saved_pointers);
+
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
 bpf_prog_run_save_cb);
+   bpf_restore_data_pointers(skb, saved_pointers);
__skb_pull(skb, offset);
skb->sk = save_sk;
return ret == 1 ? 0 : -EPERM;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..e3ca30bd6840 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5346,6 +5346,40 @@ static bool sk_filter_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+  enum bpf_access_type type,
+  const struct bpf_prog *prog,
+  struct bpf_insn_access_aux *info)
+{
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, tc_classid):
+   case bpf_ctx_range(struct __sk_buff, data_meta):
+   case bpf_ctx_range(struct __sk_buff, flow_keys):
+   return false;
+   }
+   if (type == BPF_WRITE) {
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, mark):
+   case bpf_ctx_range(struct __sk_buff, priority):
+   case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+   break;
+   default:
+   return false;
+   }
+   }
+
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, data):
+   info->reg_type = PTR_TO_PACKET;
+   break;
+   case bpf_ctx_range(struct __sk_buff, data_end):
+   info->reg_type = PTR_TO_PACKET_END;
+   break;
+   }
+
+   return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -7038,7 +7072,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
-   .is_valid_access= sk_filter_is_valid_access,
+   .is_valid_access= cg_skb_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
 };
 
-- 
2.17.1



[PATCH v4 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB

2018-10-18 Thread Song Liu
Tests are added to make sure CGROUP_SKB cannot access:
  tc_classid, data_meta, flow_keys

and can read and write:
  mark, prority, and cb[0-4]

and can read other fields.

To make selftest with skb->sk work, a dummy sk is added in
bpf_prog_test_run_skb().

Signed-off-by: Song Liu 
---
 net/bpf/test_run.c  |   7 +
 tools/testing/selftests/bpf/test_verifier.c | 170 
 2 files changed, 177 insertions(+)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0c423b8cd75c..8dccac305268 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,6 +10,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
@@ -115,6 +117,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
u32 retval, duration;
int hh_len = ETH_HLEN;
struct sk_buff *skb;
+   struct sock sk = {0};
void *data;
int ret;
 
@@ -137,11 +140,15 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
break;
}
 
+   sock_net_set(, _net);
+   sock_init_data(NULL, );
+
skb = build_skb(data, 0);
if (!skb) {
kfree(data);
return -ENOMEM;
}
+   skb->sk = 
 
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index cf4cd32b6772..5bfba7e8afd7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4862,6 +4862,176 @@ static struct bpf_test tests[] = {
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+   {
+   "direct packet read test#1 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+   offsetof(struct __sk_buff, data)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+   offsetof(struct __sk_buff, data_end)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, len)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, pkt_type)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, mark)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, mark)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff, queue_mapping)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, protocol)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_present)),
+   BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+   BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+   BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .result = ACCEPT,
+   .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+   },
+   {
+   "direct packet read test#2 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_tci)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_proto)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, priority)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, priority)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff, 
ingress_ifindex)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, tc_index)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, hash)),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+ 

Re: [PATCH v3 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB

2018-10-18 Thread Song Liu



> On Oct 17, 2018, at 11:25 PM, Alexei Starovoitov 
>  wrote:
> 
> On Wed, Oct 17, 2018 at 10:39:49PM -0700, Song Liu wrote:
>> Tests are added to make sure CGROUP_SKB cannot access:
>>  tc_classid, data_meta, flow_keys
>> 
>> and can read and write:
>>  mark, prority, and cb[0-4]
>> 
>> and can read other fields.
>> 
>> To make selftest with skb->sk work, a dummy sk is added in
>> bpf_prog_test_run_skb().
>> 
>> Signed-off-by: Song Liu 
>> ---
>> net/bpf/test_run.c  |   4 +
>> tools/testing/selftests/bpf/test_verifier.c | 170 
>> 2 files changed, 174 insertions(+)
>> 
>> diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
>> index 0c423b8cd75c..c7210e2f1ae9 100644
>> --- a/net/bpf/test_run.c
>> +++ b/net/bpf/test_run.c
>> @@ -10,6 +10,7 @@
>> #include 
>> #include 
>> #include 
>> +#include 
>> 
>> static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
>>  struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
>> @@ -115,6 +116,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
>> union bpf_attr *kattr,
>>  u32 retval, duration;
>>  int hh_len = ETH_HLEN;
>>  struct sk_buff *skb;
>> +struct sock sk;
>>  void *data;
>>  int ret;
>> 
>> @@ -142,6 +144,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
>> union bpf_attr *kattr,
>>  kfree(data);
>>  return -ENOMEM;
>>  }
>> +sock_init_data(NULL, );
>> +skb->sk = 
> 
> I was about to apply it, but it crashes as:
> [   16.830822] BUG: unable to handle kernel paging request at 00014427b974
> [   16.831363] PGD 800135ecf067 P4D 800135ecf067 PUD 0
> [   16.831792] Oops:  [#1] SMP PTI
> [   16.832061] CPU: 1 PID: 1965 Comm: test_verifier Not tainted 
> 4.19.0-rc7-02550-ga76dee97ff12 #1153
> [   16.832712] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> 1.11.0-2.el7 04/01/2014
> [   16.833358] RIP: 0010:cmp_map_id+0x10/0x50
> [   16.835036] RSP: 0018:c980faf8 EFLAGS: 00010246
> [   16.835429] RAX:  RBX: 36069ee8 RCX: 
> 
> [   16.835958] RDX: 00014427b970 RSI: 00014427b970 RDI: 
> c980fb44
> [   16.836496] RBP: 000c R08: 810f7330 R09: 
> 36069ee8
> [   16.837026] R10:  R11: 0001 R12: 
> 
> [   16.837554] R13: 810f7330 R14: 00014427b970 R15: 
> 1b034f74
> [   16.838083] FS:  7fae50663700() GS:88013ba8() 
> knlGS:
> [   16.838677] CS:  0010 DS:  ES:  CR0: 80050033
> [   16.839105] CR2: 00014427b974 CR3: 000135934005 CR4: 
> 003606e0
> [   16.839632] DR0:  DR1:  DR2: 
> 
> [   16.840157] DR3:  DR6: fffe0ff0 DR7: 
> 0400
> [   16.840682] Call Trace:
> [   16.840897]  bsearch+0x50/0x90
> [   16.841144]  map_id_range_down+0x81/0xa0
> [   16.841438]  make_kuid+0xf/0x10
> [   16.841677]  sock_init_data+0x24f/0x260
> [   16.841979]  bpf_prog_test_run_skb+0x9e/0x270
> 
> I suspect sock_net_set(sk, _net) is necessary before sock_init_data() 
> call.

I am not able to repro this, even with CONFIG_KASAN and CONFIG_PAGE_POISONING. 

Let me try a better approach on this.

Thanks,
Song



[PATCH v3 bpf-next 1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

2018-10-17 Thread Song Liu
BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

Two helper functions bpf_compute_and_save_data_pointers() and
bpf_restore_data_pointers() are introduced. There are used in
__cgroup_bpf_run_filter_skb(), to compute proper data_end for the
BPF program, and restore original data afterwards.

Signed-off-by: Song Liu 
---
 include/linux/filter.h | 24 
 kernel/bpf/cgroup.c|  6 ++
 net/core/filter.c  | 36 +++-
 3 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 5771874bc01e..96b3ee7f14c9 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -548,6 +548,30 @@ static inline void bpf_compute_data_pointers(struct 
sk_buff *skb)
cb->data_end  = skb->data + skb_headlen(skb);
 }
 
+/* Similar to bpf_compute_data_pointers(), except that save orginal
+ * data in cb->data and cb->meta_data for restore.
+ */
+static inline void bpf_compute_and_save_data_pointers(
+   struct sk_buff *skb, void *saved_pointers[2])
+{
+   struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+   saved_pointers[0] = cb->data_meta;
+   saved_pointers[1] = cb->data_end;
+   cb->data_meta = skb->data - skb_metadata_len(skb);
+   cb->data_end  = skb->data + skb_headlen(skb);
+}
+
+/* Restore data saved by bpf_compute_data_pointers(). */
+static inline void bpf_restore_data_pointers(
+   struct sk_buff *skb, void *saved_pointers[2])
+{
+   struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
+
+   cb->data_meta = saved_pointers[0];
+   cb->data_end = saved_pointers[1];;
+}
+
 static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 {
/* eBPF programs may read/write skb->cb[] area to transfer meta
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..5f5180104ddc 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -554,6 +554,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
unsigned int offset = skb->data - skb_network_header(skb);
struct sock *save_sk;
struct cgroup *cgrp;
+   void *saved_pointers[2];
int ret;
 
if (!sk || !sk_fullsock(sk))
@@ -566,8 +567,13 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
save_sk = skb->sk;
skb->sk = sk;
__skb_push(skb, offset);
+
+   /* compute pointers for the bpf prog */
+   bpf_compute_and_save_data_pointers(skb, saved_pointers);
+
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
 bpf_prog_run_save_cb);
+   bpf_restore_data_pointers(skb, saved_pointers);
__skb_pull(skb, offset);
skb->sk = save_sk;
return ret == 1 ? 0 : -EPERM;
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..e3ca30bd6840 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5346,6 +5346,40 @@ static bool sk_filter_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+  enum bpf_access_type type,
+  const struct bpf_prog *prog,
+  struct bpf_insn_access_aux *info)
+{
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, tc_classid):
+   case bpf_ctx_range(struct __sk_buff, data_meta):
+   case bpf_ctx_range(struct __sk_buff, flow_keys):
+   return false;
+   }
+   if (type == BPF_WRITE) {
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, mark):
+   case bpf_ctx_range(struct __sk_buff, priority):
+   case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+   break;
+   default:
+   return false;
+   }
+   }
+
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, data):
+   info->reg_type = PTR_TO_PACKET;
+   break;
+   case bpf_ctx_range(struct __sk_buff, data_end):
+   info->reg_type = PTR_TO_PACKET_END;
+   break;
+   }
+
+   return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -7038,7 +7072,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
-   .is_valid_access= sk_filter_is_valid_access,
+   .is_valid_access= cg_skb_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
 };
 
-- 
2.17.1



[PATCH v3 bpf-next 0/2] bpf: add cg_skb_is_valid_access

2018-10-17 Thread Song Liu
Changes v2 -> v3:
1. Added helper function bpf_compute_and_save_data_pointers() and
   bpf_restore_data_pointers().

Changes v1 -> v2:
1. Updated the list of read-only fields, and read-write fields.
2. Added dummy sk to bpf_prog_test_run_skb().

This set enables BPF program of type BPF_PROG_TYPE_CGROUP_SKB to access
some __skb_buff data directly.

Song Liu (2):
  bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB
  bpf: add tests for direct packet access from CGROUP_SKB

 include/linux/filter.h  |  24 +++
 kernel/bpf/cgroup.c |   6 +
 net/bpf/test_run.c  |   4 +
 net/core/filter.c   |  36 -
 tools/testing/selftests/bpf/test_verifier.c | 170 
 5 files changed, 239 insertions(+), 1 deletion(-)

--
2.17.1


[PATCH v3 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB

2018-10-17 Thread Song Liu
Tests are added to make sure CGROUP_SKB cannot access:
  tc_classid, data_meta, flow_keys

and can read and write:
  mark, prority, and cb[0-4]

and can read other fields.

To make selftest with skb->sk work, a dummy sk is added in
bpf_prog_test_run_skb().

Signed-off-by: Song Liu 
---
 net/bpf/test_run.c  |   4 +
 tools/testing/selftests/bpf/test_verifier.c | 170 
 2 files changed, 174 insertions(+)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0c423b8cd75c..c7210e2f1ae9 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
@@ -115,6 +116,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
u32 retval, duration;
int hh_len = ETH_HLEN;
struct sk_buff *skb;
+   struct sock sk;
void *data;
int ret;
 
@@ -142,6 +144,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
kfree(data);
return -ENOMEM;
}
+   sock_init_data(NULL, );
+   skb->sk = 
 
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index cf4cd32b6772..5bfba7e8afd7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4862,6 +4862,176 @@ static struct bpf_test tests[] = {
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+   {
+   "direct packet read test#1 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+   offsetof(struct __sk_buff, data)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+   offsetof(struct __sk_buff, data_end)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, len)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, pkt_type)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, mark)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, mark)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff, queue_mapping)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, protocol)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_present)),
+   BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+   BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+   BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .result = ACCEPT,
+   .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+   },
+   {
+   "direct packet read test#2 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_tci)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_proto)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, priority)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, priority)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff, 
ingress_ifindex)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, tc_index)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, hash)),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .result = ACCEPT,
+   .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+   },
+   {
+   "direct packet read test#3 for CGRO

Re: [PATCH v2 bpf-next 1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

2018-10-17 Thread Song Liu



> On Oct 17, 2018, at 9:44 PM, Alexei Starovoitov 
>  wrote:
> 
> On Wed, Oct 17, 2018 at 04:36:15PM -0700, Song Liu wrote:
>> BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
>> skb. This patch enables direct access of skb for these programs.
>> 
>> In __cgroup_bpf_run_filter_skb(), bpf_compute_data_pointers() is called
>> to compute proper data_end for the BPF program.
>> 
>> Signed-off-by: Song Liu 
>> ---
>> kernel/bpf/cgroup.c |  4 
>> net/core/filter.c   | 36 +++-
>> 2 files changed, 39 insertions(+), 1 deletion(-)
>> 
>> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
>> index 00f6ed2e4f9a..340d496f35bd 100644
>> --- a/kernel/bpf/cgroup.c
>> +++ b/kernel/bpf/cgroup.c
>> @@ -566,6 +566,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
>>  save_sk = skb->sk;
>>  skb->sk = sk;
>>  __skb_push(skb, offset);
>> +
>> +/* compute pointers for the bpf prog */
>> +bpf_compute_data_pointers(skb);
> 
> cg_skb_is_valid_access() below looks good to me now,
> but I just realized that above change is not safe for all sockets.
> After sk_filter_trim_cap() is called in udp_queue_rcv_skb()
> it needs to see valid UDP_SKB_CB.
> But sizeof(struct udp_skb_cb)==28, so bpf_compute_data_pointers()
> would mangle the end of it.
> So we have to save/restore data_end/data_meta pointers as well.
> 
> I'm thinking that new helper like:
>  bpf_compute_and_save_data_pointers(skb, _of_16_bytes);
>  BPF_PROG_RUN_ARRAY();
>  bpf_restore_data_pointers(skb, _of_16_bytes);
> would be decent interface.

Thanks Alexei!

Will send v3 shortly.

Song

[PATCH v2 bpf-next 0/2] bpf: add cg_skb_is_valid_access

2018-10-17 Thread Song Liu
Changes v1 -> v2:
1. Updated the list of read-only fields, and read-write fields.
2. Added dummy sk to bpf_prog_test_run_skb().

This set enables BPF program of type BPF_PROG_TYPE_CGROUP_SKB to access
some __skb_buff data directly.

Song Liu (2):
  bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB
  bpf: add tests for direct packet access from CGROUP_SKB

 kernel/bpf/cgroup.c |   4 +
 net/core/filter.c   |  37 -
 tools/testing/selftests/bpf/test_verifier.c | 159 
 3 files changed, 199 insertions(+), 1 deletion(-)

--
2.17.1


[PATCH v2 bpf-next 1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

2018-10-17 Thread Song Liu
BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

In __cgroup_bpf_run_filter_skb(), bpf_compute_data_pointers() is called
to compute proper data_end for the BPF program.

Signed-off-by: Song Liu 
---
 kernel/bpf/cgroup.c |  4 
 net/core/filter.c   | 36 +++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..340d496f35bd 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -566,6 +566,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
save_sk = skb->sk;
skb->sk = sk;
__skb_push(skb, offset);
+
+   /* compute pointers for the bpf prog */
+   bpf_compute_data_pointers(skb);
+
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
 bpf_prog_run_save_cb);
__skb_pull(skb, offset);
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..e3ca30bd6840 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5346,6 +5346,40 @@ static bool sk_filter_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+  enum bpf_access_type type,
+  const struct bpf_prog *prog,
+  struct bpf_insn_access_aux *info)
+{
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, tc_classid):
+   case bpf_ctx_range(struct __sk_buff, data_meta):
+   case bpf_ctx_range(struct __sk_buff, flow_keys):
+   return false;
+   }
+   if (type == BPF_WRITE) {
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, mark):
+   case bpf_ctx_range(struct __sk_buff, priority):
+   case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
+   break;
+   default:
+   return false;
+   }
+   }
+
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, data):
+   info->reg_type = PTR_TO_PACKET;
+   break;
+   case bpf_ctx_range(struct __sk_buff, data_end):
+   info->reg_type = PTR_TO_PACKET_END;
+   break;
+   }
+
+   return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -7038,7 +7072,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
-   .is_valid_access= sk_filter_is_valid_access,
+   .is_valid_access= cg_skb_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
 };
 
-- 
2.17.1



[PATCH v2 bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB

2018-10-17 Thread Song Liu
Tests are added to make sure CGROUP_SKB cannot access:
  tc_classid, data_meta, flow_keys

and can read and write:
  mark, prority, and cb[0-4]

and can read other fields.

To make selftest with skb->sk work, a dummy sk is added in
bpf_prog_test_run_skb().

Signed-off-by: Song Liu 
---
 net/bpf/test_run.c  |   4 +
 tools/testing/selftests/bpf/test_verifier.c | 170 
 2 files changed, 174 insertions(+)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0c423b8cd75c..c7210e2f1ae9 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static __always_inline u32 bpf_test_run_one(struct bpf_prog *prog, void *ctx,
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE])
@@ -115,6 +116,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
u32 retval, duration;
int hh_len = ETH_HLEN;
struct sk_buff *skb;
+   struct sock sk;
void *data;
int ret;
 
@@ -142,6 +144,8 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const 
union bpf_attr *kattr,
kfree(data);
return -ENOMEM;
}
+   sock_init_data(NULL, );
+   skb->sk = 
 
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
__skb_put(skb, size);
diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index cf4cd32b6772..5bfba7e8afd7 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4862,6 +4862,176 @@ static struct bpf_test tests[] = {
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+   {
+   "direct packet read test#1 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+   offsetof(struct __sk_buff, data)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+   offsetof(struct __sk_buff, data_end)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, len)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, pkt_type)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, mark)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, mark)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff, queue_mapping)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, protocol)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_present)),
+   BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+   BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+   BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .result = ACCEPT,
+   .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+   },
+   {
+   "direct packet read test#2 for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_tci)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
+   offsetof(struct __sk_buff, vlan_proto)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+   offsetof(struct __sk_buff, priority)),
+   BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+   offsetof(struct __sk_buff, priority)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+   offsetof(struct __sk_buff, 
ingress_ifindex)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+   offsetof(struct __sk_buff, tc_index)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+   offsetof(struct __sk_buff, hash)),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .result = ACCEPT,
+   .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+   },
+   {
+   "direct packet read test#3 for CGRO

[PATCH bpf-next v3 13/13] tools/bpf: bpftool: add support for jited func types

2018-10-17 Thread Yonghong Song
This patch added support to print function signature
if btf func_info is available. Note that ksym
now uses function name instead of prog_name as
prog_name has a limit of 16 bytes including
ending '\0'.

The following is a sample output for selftests
test_btf with file test_btf_haskv.o:

  $ bpftool prog dump jited id 1
  int _dummy_tracepoint(struct dummy_tracepoint_args * ):
  bpf_prog_b07ccb89267cf242__dummy_tracepoint:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3c:   add$0x28,%rbp
40:   leaveq
41:   retq

  int test_long_fname_1(struct dummy_tracepoint_args * ):
  bpf_prog_2dcecc18072623fc_test_long_fname_1:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3a:   add$0x28,%rbp
3e:   leaveq
3f:   retq

  int test_long_fname_2(struct dummy_tracepoint_args * ):
  bpf_prog_89d64e4abf0f0126_test_long_fname_2:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
80:   add$0x28,%rbp
84:   leaveq
85:   retq

Signed-off-by: Yonghong Song 
---
 tools/bpf/bpftool/btf_dumper.c | 90 ++
 tools/bpf/bpftool/main.h   |  2 +
 tools/bpf/bpftool/prog.c   | 54 
 3 files changed, 146 insertions(+)

diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 55bc512a1831..6122b735ddcc 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -249,3 +249,93 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 
type_id,
 {
return btf_dumper_do_type(d, type_id, 0, data);
 }
+
+#define BTF_PRINT_ARG(...) \
+   do {\
+   pos += snprintf(func_sig + pos, size - pos, \
+   __VA_ARGS__);   \
+   if (pos >= size)\
+   return -1;  \
+   } while (0)
+#define BTF_PRINT_TYPE(type)   \
+   do {\
+   pos = __btf_dumper_type_only(btf, type, func_sig,   \
+pos, size);\
+   if (pos == -1)  \
+   return -1;  \
+   } while (0)
+
+static int __btf_dumper_type_only(struct btf *btf, __u32 type_id,
+ char *func_sig, int pos, int size)
+{
+   const struct btf_type *t = btf__type_by_id(btf, type_id);
+   const struct btf_array *array;
+   int i, vlen;
+
+   switch (BTF_INFO_KIND(t->info)) {
+   case BTF_KIND_INT:
+   BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_STRUCT:
+   BTF_PRINT_ARG("struct %s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_UNION:
+   BTF_PRINT_ARG("union %s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_ENUM:
+   BTF_PRINT_ARG("enum %s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_ARRAY:
+   array = (struct btf_array *)(t + 1);
+   BTF_PRINT_TYPE(array->type);
+   BTF_PRINT_ARG("[%d]", array->nelems);
+   break;
+   case BTF_KIND_PTR:
+   BTF_PRINT_TYPE(t->type);
+   BTF_PRINT_ARG("* ");
+   break;
+   case BTF_KIND_UNKN:
+   case BTF_KIND_FWD:
+   case BTF_KIND_TYPEDEF:
+   return -1;
+   case BTF_KIND_VOLATILE:
+   BTF_PRINT_ARG("volatile ");
+   BTF_PRINT_TYPE(t->type);
+   break;
+   case BTF_KIND_CONST:
+   BTF_PRINT_ARG("const ");
+   BTF_PRINT_TYPE(t->type);
+   break;
+   case BTF_KIND_RESTRICT:
+   BTF_PRINT_ARG("restrict ");
+   BTF_PRINT_TYPE(t->type);
+   break;
+   case BTF_KIND_FUNC:
+   case BTF_KIND_FUNC_PROTO:
+   BTF_PRINT_TYPE(t->type);
+   BTF_PRINT_ARG("%s(", btf__name_by_offset(btf, t->name_off));
+   vlen = BTF_INFO_VLEN(t->info);
+   for (i = 0; i < vlen; i++) {
+   __u32 arg_type = ((__u32 *)(t + 1))[i];
+
+   if (i)
+   BTF_PRINT_ARG(", ");
+   BTF_PRINT_TYPE(arg_type);
+   }
+   BTF_PRINT_ARG(")");
+   

[PATCH bpf-next v3 06/13] tools/bpf: sync kernel uapi bpf.h header to tools directory

2018-10-17 Thread Yonghong Song
The kernel uapi bpf.h is synced to tools directory.

Signed-off-by: Yonghong Song 
---
 tools/include/uapi/linux/bpf.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f9187b41dff6..7ebbf4f06a65 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -332,6 +332,9 @@ union bpf_attr {
 * (context accesses, allowed helpers, etc).
 */
__u32   expected_attach_type;
+   __u32   prog_btf_fd;/* fd pointing to BTF type data 
*/
+   __u32   func_info_len;  /* func_info length */
+   __aligned_u64   func_info;  /* func type info */
};
 
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2585,6 +2588,9 @@ struct bpf_prog_info {
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
+   __u32 btf_id;
+   __u32 nr_jited_func_types;
+   __aligned_u64 jited_func_types;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -2896,4 +2902,9 @@ struct bpf_flow_keys {
};
 };
 
+struct bpf_func_info {
+   __u32   insn_offset;
+   __u32   type_id;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
2.17.1



[PATCH bpf-next v3 10/13] tools/bpf: do not use pahole if clang/llvm can generate BTF sections

2018-10-17 Thread Yonghong Song
Add additional checks in tools/testing/selftests/bpf and
samples/bpf such that if clang/llvm compiler can generate
BTF sections, do not use pahole.

Signed-off-by: Yonghong Song 
---
 samples/bpf/Makefile | 8 
 tools/testing/selftests/bpf/Makefile | 8 
 2 files changed, 16 insertions(+)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index be0a961450bc..870fe7ee2b69 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -208,12 +208,20 @@ endif
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+ clang -target bpf -O2 -g -c -x c - -o 
./llvm_btf_verify.o; \
+ readelf -S ./llvm_btf_verify.o | grep BTF; \
+ /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+   EXTRA_CFLAGS += -g
+else
 ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
EXTRA_CFLAGS += -g
LLC_FLAGS += -mattr=dwarfris
DWARF2BTF = y
 endif
+endif
 
 # Trick to allow make to be run from this directory
 all:
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index d99dd6fc3fbe..8d5612724db8 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -121,7 +121,14 @@ $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+ clang -target bpf -O2 -g -c -x c - -o 
./llvm_btf_verify.o; \
+ readelf -S ./llvm_btf_verify.o | grep BTF; \
+ /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+   CLANG_FLAGS += -g
+else
 ifneq ($(BTF_LLC_PROBE),)
 ifneq ($(BTF_PAHOLE_PROBE),)
 ifneq ($(BTF_OBJCOPY_PROBE),)
@@ -131,6 +138,7 @@ ifneq ($(BTF_OBJCOPY_PROBE),)
 endif
 endif
 endif
+endif
 
 $(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
-- 
2.17.1



[PATCH bpf-next v3 12/13] tools/bpf: enhance test_btf file testing to test func info

2018-10-17 Thread Yonghong Song
Change the bpf programs test_btf_haskv.c and test_btf_nokv.c to
have two sections, and enhance test_btf.c test_file feature
to test btf func_info returned by the kernel.

Signed-off-by: Yonghong Song 
---
 tools/testing/selftests/bpf/test_btf.c   | 87 ++--
 tools/testing/selftests/bpf/test_btf_haskv.c | 16 +++-
 tools/testing/selftests/bpf/test_btf_nokv.c  | 16 +++-
 3 files changed, 106 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c 
b/tools/testing/selftests/bpf/test_btf.c
index e03a8cea4bb7..38ca942eba28 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -2180,13 +2180,13 @@ static struct btf_file_test file_tests[] = {
 },
 };
 
-static int file_has_btf_elf(const char *fn)
+static int file_has_btf_elf(const char *fn, bool *has_btf_ext)
 {
Elf_Scn *scn = NULL;
GElf_Ehdr ehdr;
+   int ret = 0;
int elf_fd;
Elf *elf;
-   int ret;
 
if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
  "elf_version(EV_CURRENT) == EV_NONE"))
@@ -2218,14 +2218,12 @@ static int file_has_btf_elf(const char *fn)
}
 
sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
-   if (!strcmp(sh_name, BTF_ELF_SEC)) {
+   if (!strcmp(sh_name, BTF_ELF_SEC))
ret = 1;
-   goto done;
-   }
+   if (!strcmp(sh_name, BTF_EXT_ELF_SEC))
+   *has_btf_ext = true;
}
 
-   ret = 0;
-
 done:
close(elf_fd);
elf_end(elf);
@@ -2235,15 +2233,22 @@ static int file_has_btf_elf(const char *fn)
 static int do_test_file(unsigned int test_num)
 {
const struct btf_file_test *test = _tests[test_num - 1];
+   const char *expected_fnames[] = {"_dummy_tracepoint",
+"test_long_fname_1",
+"test_long_fname_2"};
+   __u32 func_lens[10], func_types[10], info_len;
+   struct bpf_prog_info info = {};
struct bpf_object *obj = NULL;
struct bpf_program *prog;
+   bool has_btf_ext = false;
+   struct btf *btf = NULL;
struct bpf_map *map;
-   int err;
+   int i, err, prog_fd;
 
fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
test->file);
 
-   err = file_has_btf_elf(test->file);
+   err = file_has_btf_elf(test->file, _btf_ext);
if (err == -1)
return err;
 
@@ -2271,6 +2276,7 @@ static int do_test_file(unsigned int test_num)
err = bpf_object__load(obj);
if (CHECK(err < 0, "bpf_object__load: %d", err))
goto done;
+   prog_fd = bpf_program__fd(prog);
 
map = bpf_object__find_map_by_name(obj, "btf_map");
if (CHECK(!map, "btf_map not found")) {
@@ -2285,6 +2291,69 @@ static int do_test_file(unsigned int test_num)
  test->btf_kv_notfound))
goto done;
 
+   if (!jit_enabled || !has_btf_ext)
+   goto skip_jit;
+
+   info_len = sizeof(struct bpf_prog_info);
+   info.nr_jited_func_types = ARRAY_SIZE(func_types);
+   info.nr_jited_func_lens = ARRAY_SIZE(func_lens);
+   info.jited_func_types = ptr_to_u64(_types[0]);
+   info.jited_func_lens = ptr_to_u64(_lens[0]);
+
+   err = bpf_obj_get_info_by_fd(prog_fd, , _len);
+
+   if (CHECK(err == -1, "invalid get info errno:%d", errno)) {
+   fprintf(stderr, "%s\n", btf_log_buf);
+   err = -1;
+   goto done;
+   }
+   if (CHECK(info.nr_jited_func_lens != 3,
+ "incorrect info.nr_jited_func_lens %d",
+ info.nr_jited_func_lens)) {
+   err = -1;
+   goto done;
+   }
+   if (CHECK(info.nr_jited_func_types != 3,
+ "incorrect info.nr_jited_func_types %d",
+ info.nr_jited_func_types)) {
+   err = -1;
+   goto done;
+   }
+   if (CHECK(info.btf_id == 0, "incorrect btf_id = 0")) {
+   err = -1;
+   goto done;
+   }
+
+   err = btf_get_from_id(info.btf_id, );
+   if (CHECK(err, "cannot get btf from kernel, err: %d", err))
+   goto done;
+
+   /* check three functions */
+   for (i = 0; i < 3; i++) {
+   const struct btf_type *t;
+   const char *fname;
+
+   t = btf__type_by_id(btf, func_types[i]);
+   if (CHECK(!t, "btf__type_by_id failure: id %u",
+ func_types[i])) {
+   err = -1;
+   goto done;
+   }
+
+   fname = btf__name_by_offset(btf, t->name_off);
+  

[PATCH bpf-next v3 05/13] bpf: get better bpf_prog ksyms based on btf func type_id

2018-10-17 Thread Yonghong Song
This patch added interface to load a program with the following
additional information:
   . prog_btf_fd
   . func_info and func_info_len
where func_info will provides function range and type_id
corresponding to each function.

If verifier agrees with function range provided by the user,
the bpf_prog ksym for each function will use the func name
provided in the type_id, which is supposed to provide better
encoding as it is not limited by 16 bytes program name
limitation and this is better for bpf program which contains
multiple subprograms.

The bpf_prog_info interface is also extended to
return btf_id and jited_func_types, so user spaces can
print out the function prototype for each jited function.

Signed-off-by: Yonghong Song 
---
 include/linux/bpf.h  |  2 +
 include/linux/bpf_verifier.h |  1 +
 include/linux/btf.h  |  2 +
 include/uapi/linux/bpf.h | 11 +
 kernel/bpf/btf.c |  4 +-
 kernel/bpf/core.c| 13 ++
 kernel/bpf/syscall.c | 86 +++-
 kernel/bpf/verifier.c| 46 +++
 8 files changed, 162 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e60fff48288b..a99e038ce9c4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -308,6 +308,8 @@ struct bpf_prog_aux {
void *security;
 #endif
struct bpf_prog_offload *offload;
+   struct btf *btf;
+   u32 type_id; /* type id for this prog/func */
union {
struct work_struct work;
struct rcu_head rcu;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9e8056ec20fa..e84782ec50ac 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -201,6 +201,7 @@ static inline bool bpf_verifier_log_needed(const struct 
bpf_verifier_log *log)
 struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u16 stack_depth; /* max. stack depth used by this function */
+   u32 type_id; /* btf type_id for this subprog */
 };
 
 /* single container for all structs
diff --git a/include/linux/btf.h b/include/linux/btf.h
index e076c4697049..7f2c0a4a45ea 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -46,5 +46,7 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, 
void *obj,
   struct seq_file *m);
 int btf_get_fd_by_id(u32 id);
 u32 btf_id(const struct btf *btf);
+const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id);
+const char *btf_name_by_offset(const struct btf *btf, u32 offset);
 
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..7ebbf4f06a65 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -332,6 +332,9 @@ union bpf_attr {
 * (context accesses, allowed helpers, etc).
 */
__u32   expected_attach_type;
+   __u32   prog_btf_fd;/* fd pointing to BTF type data 
*/
+   __u32   func_info_len;  /* func_info length */
+   __aligned_u64   func_info;  /* func type info */
};
 
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2585,6 +2588,9 @@ struct bpf_prog_info {
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
+   __u32 btf_id;
+   __u32 nr_jited_func_types;
+   __aligned_u64 jited_func_types;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -2896,4 +2902,9 @@ struct bpf_flow_keys {
};
 };
 
+struct bpf_func_info {
+   __u32   insn_offset;
+   __u32   type_id;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 763f8e06bc91..13e83f82374b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -471,7 +471,7 @@ static bool btf_name_valid_identifier(const struct btf 
*btf, u32 offset)
return !*src;
 }
 
-static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
+const char *btf_name_by_offset(const struct btf *btf, u32 offset)
 {
if (!offset)
return "(anon)";
@@ -481,7 +481,7 @@ static const char *btf_name_by_offset(const struct btf 
*btf, u32 offset)
return "(invalid-name-offset)";
 }
 
-static const struct btf_type *btf_type_by_id(const struct btf *btf, u32 
type_id)
+const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id)
 {
if (type_id > btf->nr_types)
return NULL;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index defcf4df6d91..4c4d414e030a 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -21,12 +21,14 @@
  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  */
 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -387,6 +389,8 @

[PATCH bpf-next v3 09/13] tools/bpf: add support to read .BTF.ext sections

2018-10-17 Thread Yonghong Song
The .BTF section is already available to encode types.
These types can be used for map
pretty print. The whole .BTF will be passed to the
kernel as well for which kernel can verify and return
to the user space for pretty print etc.

The llvm patch at https://reviews.llvm.org/D53261
will generate .BTF section and one more section .BTF.ext.
The .BTF.ext section encodes function type
information and line information. Note that
this patch set only supports function type info.
The functionality is implemented in libbpf.

The .BTF section can be directly loaded into the
kernel, and the .BTF.ext section cannot. The loader
may need to do some relocation and merging,
similar to merging multiple code sections, before
loading into the kernel.

Signed-off-by: Yonghong Song 
---
 tools/lib/bpf/btf.c| 232 +
 tools/lib/bpf/btf.h|  48 +
 tools/lib/bpf/libbpf.c |  53 +-
 3 files changed, 329 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 33095fc1860b..4748e0bacd2b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -37,6 +37,11 @@ struct btf {
int fd;
 };
 
+struct btf_ext {
+   void *func_info;
+   __u32 func_info_len;
+};
+
 static int btf_add_type(struct btf *btf, struct btf_type *t)
 {
if (btf->types_size - btf->nr_types < 2) {
@@ -397,3 +402,230 @@ const char *btf__name_by_offset(const struct btf *btf, 
__u32 offset)
else
return NULL;
 }
+
+static int btf_ext_validate_func_info(const struct btf_sec_func_info *sinfo,
+ __u32 size, btf_print_fn_t err_log)
+{
+   int sec_hdrlen = sizeof(struct btf_sec_func_info);
+   __u32 record_size = sizeof(struct bpf_func_info);
+   __u32 size_left = size, num_records;
+   __u64 total_record_size;
+
+   while (size_left) {
+   if (size_left < sec_hdrlen) {
+   elog("BTF.ext func_info header not found");
+   return -EINVAL;
+   }
+
+   num_records = sinfo->num_func_info;
+   if (num_records == 0) {
+   elog("incorrect BTF.ext num_func_info");
+   return -EINVAL;
+   }
+
+   total_record_size = sec_hdrlen +
+   (__u64)num_records * record_size;
+   if (size_left < total_record_size) {
+   elog("incorrect BTF.ext num_func_info");
+   return -EINVAL;
+   }
+
+   size_left -= total_record_size;
+   sinfo = (void *)sinfo + total_record_size;
+   }
+
+   return 0;
+}
+static int btf_ext_parse_hdr(__u8 *data, __u32 data_size,
+btf_print_fn_t err_log)
+{
+   const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
+   const struct btf_sec_func_info *sinfo;
+   __u32 meta_left, last_func_info_pos;
+
+   if (data_size < sizeof(*hdr)) {
+   elog("BTF.ext header not found");
+   return -EINVAL;
+   }
+
+   if (hdr->magic != BTF_MAGIC) {
+   elog("Invalid BTF.ext magic:%x\n", hdr->magic);
+   return -EINVAL;
+   }
+
+   if (hdr->version != BTF_VERSION) {
+   elog("Unsupported BTF.ext version:%u\n", hdr->version);
+   return -ENOTSUP;
+   }
+
+   if (hdr->flags) {
+   elog("Unsupported BTF.ext flags:%x\n", hdr->flags);
+   return -ENOTSUP;
+   }
+
+   meta_left = data_size - sizeof(*hdr);
+   if (!meta_left) {
+   elog("BTF.ext has no data\n");
+   return -EINVAL;
+   }
+
+   if (meta_left < hdr->func_info_off) {
+   elog("Invalid BTF.ext func_info section offset:%u\n",
+hdr->func_info_off);
+   return -EINVAL;
+   }
+
+   if (hdr->func_info_off & 0x02) {
+   elog("BTF.ext func_info section is not aligned to 4 bytes\n");
+   return -EINVAL;
+   }
+
+   last_func_info_pos = sizeof(*hdr) + hdr->func_info_off +
+hdr->func_info_len;
+   if (last_func_info_pos > data_size) {
+   elog("Invalid BTF.ext func_info section size:%u\n",
+hdr->func_info_len);
+   return -EINVAL;
+   }
+
+   sinfo = (const struct btf_sec_func_info *)(data + sizeof(*hdr) +
+  hdr->func_info_off);
+   return btf_ext_validate_func_info(sinfo, hdr->func_info_len,
+ err_log);
+}
+
+void btf_ext__free(struct btf_ext *btf_ext)
+{
+   if (!btf_ext)
+   return;
+
+   f

[PATCH bpf-next v3 01/13] bpf: btf: Break up btf_type_is_void()

2018-10-17 Thread Yonghong Song
This patch breaks up btf_type_is_void() into
btf_type_is_void() and btf_type_is_fwd().

It also adds btf_type_nosize() to better describe it is
testing a type has nosize info.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 kernel/bpf/btf.c | 37 ++---
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 378cef70341c..be406d8906ce 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -306,15 +306,22 @@ static bool btf_type_is_modifier(const struct btf_type *t)
 
 static bool btf_type_is_void(const struct btf_type *t)
 {
-   /* void => no type and size info.
-* Hence, FWD is also treated as void.
-*/
-   return t == _void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+   return t == _void;
+}
+
+static bool btf_type_is_fwd(const struct btf_type *t)
+{
+   return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+}
+
+static bool btf_type_nosize(const struct btf_type *t)
+{
+   return btf_type_is_void(t) || btf_type_is_fwd(t);
 }
 
-static bool btf_type_is_void_or_null(const struct btf_type *t)
+static bool btf_type_nosize_or_null(const struct btf_type *t)
 {
-   return !t || btf_type_is_void(t);
+   return !t || btf_type_nosize(t);
 }
 
 /* union is only a special case of struct:
@@ -826,7 +833,7 @@ const struct btf_type *btf_type_id_size(const struct btf 
*btf,
u32 size = 0;
 
size_type = btf_type_by_id(btf, size_type_id);
-   if (btf_type_is_void_or_null(size_type))
+   if (btf_type_nosize_or_null(size_type))
return NULL;
 
if (btf_type_has_size(size_type)) {
@@ -842,7 +849,7 @@ const struct btf_type *btf_type_id_size(const struct btf 
*btf,
size = btf->resolved_sizes[size_type_id];
size_type_id = btf->resolved_ids[size_type_id];
size_type = btf_type_by_id(btf, size_type_id);
-   if (btf_type_is_void(size_type))
+   if (btf_type_nosize_or_null(size_type))
return NULL;
}
 
@@ -1164,7 +1171,7 @@ static int btf_modifier_resolve(struct btf_verifier_env 
*env,
}
 
/* "typedef void new_void", "const void"...etc */
-   if (btf_type_is_void(next_type))
+   if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type))
goto resolved;
 
if (!env_type_is_resolve_sink(env, next_type) &&
@@ -1178,7 +1185,7 @@ static int btf_modifier_resolve(struct btf_verifier_env 
*env,
 * pretty print).
 */
if (!btf_type_id_size(btf, _type_id, _type_size) &&
-   !btf_type_is_void(btf_type_id_resolve(btf, _type_id))) {
+   !btf_type_nosize(btf_type_id_resolve(btf, _type_id))) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1205,7 +1212,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
}
 
/* "void *" */
-   if (btf_type_is_void(next_type))
+   if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type))
goto resolved;
 
if (!env_type_is_resolve_sink(env, next_type) &&
@@ -1235,7 +1242,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
}
 
if (!btf_type_id_size(btf, _type_id, _type_size) &&
-   !btf_type_is_void(btf_type_id_resolve(btf, _type_id))) {
+   !btf_type_nosize(btf_type_id_resolve(btf, _type_id))) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1396,7 +1403,7 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->index_type */
index_type_id = array->index_type;
index_type = btf_type_by_id(btf, index_type_id);
-   if (btf_type_is_void_or_null(index_type)) {
+   if (btf_type_nosize_or_null(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
@@ -1415,7 +1422,7 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->type */
elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
-   if (btf_type_is_void_or_null(elem_type)) {
+   if (btf_type_nosize_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
  "Invalid elem");
return -EINVAL;
@@ -1615,7 +1622,7 @@ static int btf_struct_resolve(struct btf_verifier_env 
*env,
const struct btf_type *member_type = btf_type_by_id(env->btf,
member_type_id);
 
-   if (btf_type_is_void_or_null(member_type)) {
+   if (btf_type_nosize_or_null(me

[PATCH bpf-next v3 08/13] tools/bpf: extends test_btf to test load/retrieve func_type info

2018-10-17 Thread Yonghong Song
A two function bpf program is loaded with btf and func_info.
After successful prog load, the bpf_get_info syscall is called
to retrieve prog info to ensure the types returned from the
kernel matches the types passed to the kernel from the
user space.

Several negative tests are also added to test loading/retriving
of func_type info.

Signed-off-by: Yonghong Song 
---
 tools/testing/selftests/bpf/test_btf.c | 278 -
 1 file changed, 275 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c 
b/tools/testing/selftests/bpf/test_btf.c
index b6461c3c5e11..e03a8cea4bb7 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -22,9 +23,13 @@
 #include "bpf_rlimit.h"
 #include "bpf_util.h"
 
+#define MAX_INSNS  512
+#define MAX_SUBPROGS   16
+
 static uint32_t pass_cnt;
 static uint32_t error_cnt;
 static uint32_t skip_cnt;
+static bool jit_enabled;
 
 #define CHECK(condition, format...) ({ \
int __ret = !!(condition);  \
@@ -60,6 +65,24 @@ static int __base_pr(const char *format, ...)
return err;
 }
 
+static bool is_jit_enabled(void)
+{
+   const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+   bool enabled = false;
+   int sysctl_fd;
+
+   sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
+   if (sysctl_fd != -1) {
+   char tmpc;
+
+   if (read(sysctl_fd, , sizeof(tmpc)) == 1)
+   enabled = (tmpc != '0');
+   close(sysctl_fd);
+   }
+
+   return enabled;
+}
+
 #define BTF_INFO_ENC(kind, root, vlen) \
((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 
@@ -103,6 +126,7 @@ static struct args {
bool get_info_test;
bool pprint_test;
bool always_log;
+   bool func_type_test;
 } args;
 
 static char btf_log_buf[BTF_LOG_BUF_SIZE];
@@ -2693,16 +2717,256 @@ static int test_pprint(void)
return err;
 }
 
+static struct btf_func_type_test {
+   const char *descr;
+   const char *str_sec;
+   __u32 raw_types[MAX_NR_RAW_TYPES];
+   __u32 str_sec_size;
+   struct bpf_insn insns[MAX_INSNS];
+   __u32 prog_type;
+   struct bpf_func_info func_info[MAX_SUBPROGS];
+   __u32 func_info_len;
+   bool expected_prog_load_failure;
+} func_type_test[] = {
+
+{
+   .descr = "func_type test #1",
+   .str_sec = "\0int\0unsigned int\0funcA\0funcB",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),   /* [2] */
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 1),  
/* [3] */
+   1, 2,
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 1),  
/* [4] */
+   2, 1,
+   BTF_END_RAW,
+   },
+   .str_sec_size = sizeof("\0int\0unsigned int\0funcA\0funcB"),
+   .insns = {
+   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+   BPF_MOV64_IMM(BPF_REG_0, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(BPF_REG_0, 2),
+   BPF_EXIT_INSN(),
+   },
+   .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+   .func_info = { {0, 3}, {3, 4} },
+   .func_info_len = 2 * sizeof(struct bpf_func_info),
+},
+
+{
+   .descr = "func_type test #2",
+   .str_sec = "\0int\0unsigned int\0funcA\0funcB",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),   /* [2] */
+   /* incorrect func type */
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 
1),  /* [3] */
+   1, 2,
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 1),  
/* [4] */
+   2, 1,
+   BTF_END_RAW,
+   },
+   .str_sec_size = sizeof("\0int\0unsigned int\0funcA\0funcB"),
+   .insns = {
+   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+   BPF_MOV64_IMM(BPF_REG_0, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(BPF_REG_0, 2),
+   BPF_EXIT_INSN(),
+   },
+   .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+   .func_info = { {0, 3}, {3, 4} },
+   .func_info_len = 2 * sizeof(struct bpf_func_info),
+   .expected_prog_load_failure = true,
+},
+
+{
+   .descr = "func_type test #3",
+   .str_sec = "\0int\0unsigned int\0funcA\0funcB",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+ 

[PATCH bpf-next v3 04/13] tools/bpf: add btf func/func_proto unit tests in selftest test_btf

2018-10-17 Thread Yonghong Song
Add several BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
unit tests in bpf selftest test_btf.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 tools/lib/bpf/btf.c|   4 +
 tools/testing/selftests/bpf/test_btf.c | 216 +
 2 files changed, 220 insertions(+)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 449591aa9900..33095fc1860b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -165,6 +165,10 @@ static int btf_parse_type_sec(struct btf *btf, 
btf_print_fn_t err_log)
case BTF_KIND_ENUM:
next_type += vlen * sizeof(struct btf_enum);
break;
+   case BTF_KIND_FUNC:
+   case BTF_KIND_FUNC_PROTO:
+   next_type += vlen * sizeof(int);
+   break;
case BTF_KIND_TYPEDEF:
case BTF_KIND_PTR:
case BTF_KIND_FWD:
diff --git a/tools/testing/selftests/bpf/test_btf.c 
b/tools/testing/selftests/bpf/test_btf.c
index f42b3396d622..b6461c3c5e11 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -1374,6 +1374,222 @@ static struct btf_raw_test raw_tests[] = {
.map_create_err = true,
 },
 
+{
+   .descr = "func pointer #1",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /* int (*func)(int, unsigned int) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 1),
/* [3] */
+   1, 2,
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),   /* [4] 
*/
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .key_type_id = 1,
+   .value_type_id = 1,
+   .max_entries = 4,
+},
+
+{
+   .descr = "func pointer #2",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /* void (*func)(int, unsigned int, ) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 3), 0),
/* [3] */
+   1, 2, 0,
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),   /* [4] 
*/
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .key_type_id = 1,
+   .value_type_id = 1,
+   .max_entries = 4,
+},
+
+{
+   .descr = "func pointer #3",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /* void (*func)(void, int, unsigned int) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 3), 0),
/* [3] */
+   1, 0, 2,
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),   /* [4] 
*/
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .key_type_id = 1,
+   .value_type_id = 1,
+   .max_entries = 4,
+   .btf_load_err = true,
+   .err_str = "Invalid arg#2",
+},
+
+{
+   .descr = "func pointer #4",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /*
+* Testing:
+* BTF_KIND_CONST => BTF_KIND_TYPEDEF => BTF_KIND_PTR =>
+* BTF_KIND_FUNC_PROTO
+*/
+   /* typedef void (*func_ptr)(int, unsigned int) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 5),/* [3] 
*/
+   /* const func_ptr */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 3), /* [4] 
*/
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6),   /* [5] 
*/
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 0),
/* [6] */
+   1, 2,
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .ke

[PATCH bpf-next v3 02/13] bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO

2018-10-17 Thread Yonghong Song
This patch adds BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
support to the type section. BTF_KIND_FUNC_PROTO is used
to specify the type of a function pointer. With this,
BTF has a complete set of C types (except float).

BTF_KIND_FUNC is used to specify the signature of a
defined subprogram. BTF_KIND_FUNC_PROTO can be referenced
by another type, e.g., a pointer type, and BTF_KIND_FUNC
type cannot be referenced by another type.

For both BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO types,
the func return type is in t->type (where t is a
"struct btf_type" object). The func args are an array of
u32s immediately following object "t".

As a concrete example, for the C program below,
  $ cat test.c
  int foo(int (*bar)(int)) { return bar(5); }
with LLVM patch https://reviews.llvm.org/D53261
in Debug mode, we have
  $ clang -target bpf -g -O2 -mllvm -debug-only=btf -c test.c
  Type Table:
  [1] FUNC NameOff=1 Info=0x0c01 Size/Type=2
  ParamType=3
  [2] INT NameOff=11 Info=0x0100 Size/Type=4
  Desc=0x0120
  [3] PTR NameOff=0 Info=0x0200 Size/Type=4
  [4] FUNC_PROTO NameOff=0 Info=0x0d01 Size/Type=2
  ParamType=2

  String Table:
  0 :
  1 : foo
  5 : .text
  11 : int
  15 : test.c
  22 : int foo(int (*bar)(int)) { return bar(5); }

  FuncInfo Table:
  SecNameOff=5
  InsnOffset= TypeId=1

  ...

(Eventually we shall have bpftool to dump btf information
 like the above.)

Function "foo" has a FUNC type (type_id = 1).
The parameter of "foo" has type_id 3 which is PTR->FUNC_PROTO,
where FUNC_PROTO refers to function pointer "bar".

In FuncInfo Table, for section .text, the function,
with to-be-determined offset (marked as ),
has type_id=1 which refers to a FUNC type.
This way, the function signature is
available to both kernel and user space.
Here, the insn offset is not available during the dump time
as relocation is resolved pretty late in the compilation process.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 include/uapi/linux/btf.h |   9 +-
 kernel/bpf/btf.c | 280 ++-
 2 files changed, 253 insertions(+), 36 deletions(-)

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 972265f32871..63f8500e6f34 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -40,7 +40,8 @@ struct btf_type {
/* "size" is used by INT, ENUM, STRUCT and UNION.
 * "size" tells the size of the type it is describing.
 *
-* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+* FUNC and FUNC_PROTO.
 * "type" is a type_id referring to another type.
 */
union {
@@ -64,8 +65,10 @@ struct btf_type {
 #define BTF_KIND_VOLATILE  9   /* Volatile */
 #define BTF_KIND_CONST 10  /* Const*/
 #define BTF_KIND_RESTRICT  11  /* Restrict */
-#define BTF_KIND_MAX   11
-#define NR_BTF_KINDS   12
+#define BTF_KIND_FUNC  12  /* Function */
+#define BTF_KIND_FUNC_PROTO13  /* Function Prototype   */
+#define BTF_KIND_MAX   13
+#define NR_BTF_KINDS   14
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index be406d8906ce..763f8e06bc91 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -259,6 +260,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_VOLATILE] = "VOLATILE",
[BTF_KIND_CONST]= "CONST",
[BTF_KIND_RESTRICT] = "RESTRICT",
+   [BTF_KIND_FUNC] = "FUNC",
+   [BTF_KIND_FUNC_PROTO]   = "FUNC_PROTO",
 };
 
 struct btf_kind_operations {
@@ -281,6 +284,9 @@ struct btf_kind_operations {
 static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS];
 static struct btf_type btf_void;
 
+static int btf_resolve(struct btf_verifier_env *env,
+  const struct btf_type *t, u32 type_id);
+
 static bool btf_type_is_modifier(const struct btf_type *t)
 {
/* Some of them is not strictly a C modifier
@@ -314,9 +320,20 @@ static bool btf_type_is_fwd(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
 }
 
+static bool btf_type_is_func(const struct btf_type *t)
+{
+   return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
+}
+
+static bool btf_type_is_func_proto(const struct btf_type *t)
+{
+   return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
+}
+
 static bool btf_type_nosize(const struct btf_type *t)
 {
-   return btf_type_is_void(t) || btf_type_is_fwd(t);
+   return btf_type_is_

[PATCH bpf-next v3 00/13] bpf: add btf func info support

2018-10-17 Thread Yonghong Song
The BTF support was added to kernel by Commit 69b693f0aefa
("bpf: btf: Introduce BPF Type Format (BTF)"), which introduced
.BTF section into ELF file and is primarily
used for map pretty print.
pahole is used to convert dwarf to BTF for ELF files.

This patch added func info support to the kernel so we can
get better ksym's for bpf function calls. Basically,
pairs of bpf function calls and their corresponding types
are passed to the kernel. Extracting function names from
the types, the kernel is able to construct a ksym for
each function call with embedded function name.

This patch set added support of FUNC and FUNC_PROTO types
in the kernel. LLVM patch https://reviews.llvm.org/D53261
can generate func info, encoded in .BTF.ext ELF section.
The following is an example to show FUNC and
FUNC_PROTO difference, compiled with the above LLVM patch
with Debug mode.

  -bash-4.2$ cat test.c
  int foo(int (*bar)(int)) { return bar(5); }
  -bash-4.2$ clang -target bpf -g -O2 -mllvm -debug-only=btf -c test.c
  Type Table:
  [1] FUNC name_off=1 info=0x0c01 size/type=2
param_type=3
  [2] INT name_off=11 info=0x0100 size/type=4
desc=0x0120
  [3] PTR name_off=0 info=0x0200 size/type=4
  [4] FUNC_PROTO name_off=0 info=0x0d01 size/type=2
param_type=2

  String Table:
  0 :
  1 : foo
  5 : .text
  11 : int
  15 : test.c
  22 : int foo(int (*bar)(int)) { return bar(5); }

  FuncInfo Table:
  sec_name_off=5
insn_offset= type_id=1
  ...
  
In the above, type and string tables are in .BTF section and
the func info in .BTF.ext. The "" is the
insn offset which is not available during the dump time but
resolved during later compilation process.
Following the format specification at Patch #9 and examine the
raw data in .BTF.ext section, we have
  FuncInfo Table:
  sec_name_off=5
insn_offset=0 type_id=1
The (insn_offset, type_id) can be passed to the kernel
so the kernel can find the func name and use it in the ksym.
Below is a demonstration from Patch #13.
  $ bpftool prog dump jited id 1
  int _dummy_tracepoint(struct dummy_tracepoint_args * ):
  bpf_prog_b07ccb89267cf242__dummy_tracepoint:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3c:   add$0x28,%rbp
40:   leaveq
41:   retq
  
  int test_long_fname_1(struct dummy_tracepoint_args * ):
  bpf_prog_2dcecc18072623fc_test_long_fname_1:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3a:   add$0x28,%rbp
3e:   leaveq
3f:   retq
  
  int test_long_fname_2(struct dummy_tracepoint_args * ):
  bpf_prog_89d64e4abf0f0126_test_long_fname_2:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
80:   add$0x28,%rbp
84:   leaveq
85:   retq

For the patchset,
Patch #1  refactors the code to break up btf_type_is_void().
Patch #2  introduces new BTF types BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO.
Patch #3  syncs btf.h header to tools directory.
Patch #4  adds btf func/func_proto self tests in test_btf.
Patch #5  adds kernel interface to load func_info to kernel
  and pass func_info to userspace.
Patch #6  syncs bpf.h header to tools directory.
Patch #7  adds news btf/func_info related fields in libbpf
  program load function.
Patch #8  extends selftest test_btf to test load/retrieve func_type info.
Patch #9  adds .BTF.ext func info support.
Patch #10 changes Makefile to avoid using pahole if llvm is capable of
  generating BTF sections.
Patch #11 refactors to have btf_get_from_id() in libbpf for reuse.
Patch #12 enhance test_btf file testing to test func info.
Patch #13 adds bpftool support for func signature dump.

Changelogs:
  v2 -> v3:
. Removed kernel btf extern functions btf_type_id_func()
  and btf_get_name_by_id(). Instead, exposing existing
  functions btf_type_by_id() and btf_name_by_offset().
. Added comments about ELF section .BTF.ext layout.
. Better codes in btftool as suggested by Edward Cree.
  v1 -> v2:
. Added missing sign-off.
. Limited the func_name/struct_member_name length for validity test.
. Removed/changed several verifier messages.
. Modified several commit messages to remove line_off reference.

Yonghong Song (13):
  bpf: btf: Break up btf_type_is_void()
  bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
  tools/bpf: sync kernel btf.h header
  tools/bpf: add btf func/func_proto unit tests in selftest test_btf
  bpf: get better bpf_prog ksyms based on btf func type_id
  tools/bpf: sync kernel uapi bpf.h header to tools directory
  tools/bpf: add new fields for program load in lib/bpf
  tools/bpf: extends test_btf to test load/retrieve func_type info
  tools/bpf: add support to read .BTF.ext sections
  tools/bpf: do not use pahole if clang/llvm can generate BTF sections
  tools/bpf: refactor to implement btf_get_from_id() in lib/bpf
  tools/bpf: enhance test_btf file testing

[PATCH bpf-next v3 11/13] tools/bpf: refactor to implement btf_get_from_id() in lib/bpf

2018-10-17 Thread Yonghong Song
The function get_btf() is implemented in tools/bpf/bpftool/map.c
to get a btf structure given a map_info. This patch
refactored this function to be function btf_get_from_id()
in tools/lib/bpf so that it can be used later.

Signed-off-by: Yonghong Song 
---
 tools/bpf/bpftool/map.c | 68 ++--
 tools/lib/bpf/btf.c | 69 +
 tools/lib/bpf/btf.h | 18 ++-
 3 files changed, 81 insertions(+), 74 deletions(-)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 7bf38f0e152e..1b8a75fa0471 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -215,70 +215,6 @@ static int do_dump_btf(const struct btf_dumper *d,
return ret;
 }
 
-static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
-{
-   struct bpf_btf_info btf_info = { 0 };
-   __u32 len = sizeof(btf_info);
-   __u32 last_size;
-   int btf_fd;
-   void *ptr;
-   int err;
-
-   err = 0;
-   *btf = NULL;
-   btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id);
-   if (btf_fd < 0)
-   return 0;
-
-   /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
-* let's start with a sane default - 4KiB here - and resize it only if
-* bpf_obj_get_info_by_fd() needs a bigger buffer.
-*/
-   btf_info.btf_size = 4096;
-   last_size = btf_info.btf_size;
-   ptr = malloc(last_size);
-   if (!ptr) {
-   err = -ENOMEM;
-   goto exit_free;
-   }
-
-   bzero(ptr, last_size);
-   btf_info.btf = ptr_to_u64(ptr);
-   err = bpf_obj_get_info_by_fd(btf_fd, _info, );
-
-   if (!err && btf_info.btf_size > last_size) {
-   void *temp_ptr;
-
-   last_size = btf_info.btf_size;
-   temp_ptr = realloc(ptr, last_size);
-   if (!temp_ptr) {
-   err = -ENOMEM;
-   goto exit_free;
-   }
-   ptr = temp_ptr;
-   bzero(ptr, last_size);
-   btf_info.btf = ptr_to_u64(ptr);
-   err = bpf_obj_get_info_by_fd(btf_fd, _info, );
-   }
-
-   if (err || btf_info.btf_size > last_size) {
-   err = errno;
-   goto exit_free;
-   }
-
-   *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
-   if (IS_ERR(*btf)) {
-   err = PTR_ERR(*btf);
-   *btf = NULL;
-   }
-
-exit_free:
-   close(btf_fd);
-   free(ptr);
-
-   return err;
-}
-
 static json_writer_t *get_btf_writer(void)
 {
json_writer_t *jw = jsonw_new(stdout);
@@ -765,7 +701,7 @@ static int do_dump(int argc, char **argv)
 
prev_key = NULL;
 
-   err = get_btf(, );
+   err = btf_get_from_id(info.btf_id, );
if (err) {
p_err("failed to get btf");
goto exit_free;
@@ -909,7 +845,7 @@ static int do_lookup(int argc, char **argv)
}
 
/* here means bpf_map_lookup_elem() succeeded */
-   err = get_btf(, );
+   err = btf_get_from_id(info.btf_id, );
if (err) {
p_err("failed to get btf");
goto exit_free;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 4748e0bacd2b..ab654628e966 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -42,6 +42,11 @@ struct btf_ext {
__u32 func_info_len;
 };
 
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+   return (__u64) (unsigned long) ptr;
+}
+
 static int btf_add_type(struct btf *btf, struct btf_type *t)
 {
if (btf->types_size - btf->nr_types < 2) {
@@ -403,6 +408,70 @@ const char *btf__name_by_offset(const struct btf *btf, 
__u32 offset)
return NULL;
 }
 
+int btf_get_from_id(__u32 id, struct btf **btf)
+{
+   struct bpf_btf_info btf_info = { 0 };
+   __u32 len = sizeof(btf_info);
+   __u32 last_size;
+   int btf_fd;
+   void *ptr;
+   int err;
+
+   err = 0;
+   *btf = NULL;
+   btf_fd = bpf_btf_get_fd_by_id(id);
+   if (btf_fd < 0)
+   return 0;
+
+   /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+* let's start with a sane default - 4KiB here - and resize it only if
+* bpf_obj_get_info_by_fd() needs a bigger buffer.
+*/
+   btf_info.btf_size = 4096;
+   last_size = btf_info.btf_size;
+   ptr = malloc(last_size);
+   if (!ptr) {
+   err = -ENOMEM;
+   goto exit_free;
+   }
+
+   bzero(ptr, last_size);
+   btf_info.btf = ptr_to_u64(ptr);
+   err = bpf_obj_get_info_by_fd(btf_fd, _info, );
+
+   if (!err && btf_info.btf_size > last_size) {
+   void *temp_ptr;
+
+   last_size = btf_info.btf_size;
+   temp_ptr = realloc(ptr, last_size);
+  

[PATCH bpf-next v3 03/13] tools/bpf: sync kernel btf.h header

2018-10-17 Thread Yonghong Song
The kernel uapi btf.h is synced to the tools directory.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 tools/include/uapi/linux/btf.h | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 972265f32871..63f8500e6f34 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -40,7 +40,8 @@ struct btf_type {
/* "size" is used by INT, ENUM, STRUCT and UNION.
 * "size" tells the size of the type it is describing.
 *
-* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+* FUNC and FUNC_PROTO.
 * "type" is a type_id referring to another type.
 */
union {
@@ -64,8 +65,10 @@ struct btf_type {
 #define BTF_KIND_VOLATILE  9   /* Volatile */
 #define BTF_KIND_CONST 10  /* Const*/
 #define BTF_KIND_RESTRICT  11  /* Restrict */
-#define BTF_KIND_MAX   11
-#define NR_BTF_KINDS   12
+#define BTF_KIND_FUNC  12  /* Function */
+#define BTF_KIND_FUNC_PROTO13  /* Function Prototype   */
+#define BTF_KIND_MAX   13
+#define NR_BTF_KINDS   14
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
-- 
2.17.1



[PATCH bpf-next v3 07/13] tools/bpf: add new fields for program load in lib/bpf

2018-10-17 Thread Yonghong Song
The new fields are added for program load in lib/bpf so
application uses api bpf_load_program_xattr() is able
to load program with btf and func_info data.

This functionality will be used in next patch
by bpf selftest test_btf.

Signed-off-by: Yonghong Song 
---
 tools/lib/bpf/bpf.c | 3 +++
 tools/lib/bpf/bpf.h | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d70a255cb05e..d8d48ab34220 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -196,6 +196,9 @@ int bpf_load_program_xattr(const struct 
bpf_load_program_attr *load_attr,
attr.log_level = 0;
attr.kern_version = load_attr->kern_version;
attr.prog_ifindex = load_attr->prog_ifindex;
+   attr.prog_btf_fd = load_attr->prog_btf_fd;
+   attr.func_info_len = load_attr->func_info_len;
+   attr.func_info = ptr_to_u64(load_attr->func_info);
memcpy(attr.prog_name, load_attr->name,
   min(name_len, BPF_OBJ_NAME_LEN - 1));
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 258c3c178333..d2bdaffd7712 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -74,6 +74,9 @@ struct bpf_load_program_attr {
const char *license;
__u32 kern_version;
__u32 prog_ifindex;
+   __u32 prog_btf_fd;
+   __u32 func_info_len;
+   const struct bpf_func_info *func_info;
 };
 
 /* Flags to direct loading requirements */
-- 
2.17.1



Re: [PATCH bpf-next 1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

2018-10-17 Thread Song Liu



> On Oct 17, 2018, at 12:02 PM, Alexei Starovoitov  wrote:
> 
> On 10/17/18 10:26 AM, Alexei Starovoitov wrote:
>> On Tue, Oct 16, 2018 at 10:56:05PM -0700, Song Liu wrote:
>>> BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
>>> skb. This patch enables direct access of skb for these programs.
>> 
>> The lack of direct packet access in CGROUP_SKB progs was
>> an unpleasant surprise to me, so thank you for fixing it,
>> but there are few issues with the patch. See below.
>> 
>>> In __cgroup_bpf_run_filter_skb(), bpf_compute_data_pointers() is called
>>> to compute proper data_end for the BPF program.
>>> 
>>> Signed-off-by: Song Liu 
>>> ---
>>> kernel/bpf/cgroup.c |  4 
>>> net/core/filter.c   | 26 +-
>>> 2 files changed, 29 insertions(+), 1 deletion(-)
>>> 
>>> diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
>>> index 00f6ed2e4f9a..340d496f35bd 100644
>>> --- a/kernel/bpf/cgroup.c
>>> +++ b/kernel/bpf/cgroup.c
>>> @@ -566,6 +566,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
>>> save_sk = skb->sk;
>>> skb->sk = sk;
>>> __skb_push(skb, offset);
>>> +
>>> +   /* compute pointers for the bpf prog */
>>> +   bpf_compute_data_pointers(skb);
>>> +
>>> ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
>>>  bpf_prog_run_save_cb);
>>> __skb_pull(skb, offset);
>>> diff --git a/net/core/filter.c b/net/core/filter.c
>>> index 1a3ac6c46873..8b5a502e241f 100644
>>> --- a/net/core/filter.c
>>> +++ b/net/core/filter.c
>>> @@ -5346,6 +5346,30 @@ static bool sk_filter_is_valid_access(int off, int 
>>> size,
>>> return bpf_skb_is_valid_access(off, size, type, prog, info);
>>> }
>>> 
>>> +static bool cg_skb_is_valid_access(int off, int size,
>>> +  enum bpf_access_type type,
>>> +  const struct bpf_prog *prog,
>>> +  struct bpf_insn_access_aux *info)
>>> +{
>>> +   if (type == BPF_WRITE)
>>> +   return false;
>> 
>> this disables writes into cb[0..4] that were allowed for cgroup_inet_* 
>> before.
>> One can argue that this may break existing progs,
>> but looking at the place where BPF_CGROUP_RUN_PROG_INET_INGRESS is called
>> it seems it's actually not correct in all cases to access cb there.
>> Just few lines down we call bpf_prog_run_save_cb() which save/restores
>> these 24 bytes.
>> So we have two option either add save/restore for INET_INGRESS only
>> or disable read and write access to cb[0..4] for CGROUP_SKB progs.
>> I prefer the former.
>> 
>>> +
>>> +   switch (off) {
>>> +   case bpf_ctx_range(struct __sk_buff, len):
>>> +   break;
>>> +   case bpf_ctx_range(struct __sk_buff, data):
>>> +   info->reg_type = PTR_TO_PACKET;
>>> +   break;
>>> +   case bpf_ctx_range(struct __sk_buff, data_end):
>>> +   info->reg_type = PTR_TO_PACKET_END;
>>> +   break;
>>> +   default:
>>> +   return false;
>>> +   }
>> 
>> this also enables access to a range of fields family..local_port.
>> It's ok to do for egress, but not for ingress unless we
>> add code similar to the bottom of sk_filter_trim_cap() that
>> inits skb->sk.
>> 
>> above change also allows access to data_meta and flow_keys
>> which is not correct.
>> 
>> Considering all that I'm proposing to fix INET_INGRESS call site
>> similar to code below it in sk_filter_trim_cap().
>> In particular to do:
>> struct sock *save_sk = skb->sk;
>> skb->sk = sk;
>> save and clear cb
>> BPF_CGROUP_RUN_PROG_INET_INGRESS
>> restore cb
>> skb->sk = save_sk;
>> 
>> all of above can probaby be inside BPF_CGROUP_RUN_PROG_INET_INGRESS macro.
>> Then in this cg_skb_is_valid_access() allow access to data/data_end
>> and family..local_port range as well.
>> while disallowing access to flow_keys and data_meta.
>> 
>> In patch 2 we gotta have tests for all these fields.
>> 
>> Thoughts?
> 
> chatted with Song offline.
> I completely misread 'return false' in the above as 'break'.
> The patch actually disables access to pkt_type, mark, queue_mapping
> and so on. Which is not correct either.
> Since tests were not failing we really need to improve this aspect
> of test coverage in test_verifier.c
> 
> Also I missed that __cgroup_bpf_run_filter_skb() already
> does save_sk = skb->sk; skb->sk = sk;
> and bpf_prog_run_save_cb()
> So no issue in the existing code. That was false alarm.
> Revising the proposal...
> I think cg_skb_is_valid_access() can be made similar to
> lwt_is_valid_access().
> Allowing writes into mark, priority, cb[0..4]
> and read of data/data_end.
> In addition it's also ok to allow family..local_port range
> (unlike lwt where sk may not be present).
> and no access to data_meta and flow_keys.

Thanks Alexei! I will send v2 shortly. 

Song



Re: [PATCH bpf-next v2 13/13] tools/bpf: bpftool: add support for jited func types

2018-10-17 Thread Yonghong Song


On 10/17/18 4:11 AM, Edward Cree wrote:
> On 17/10/18 08:24, Yonghong Song wrote:
>> This patch added support to print function signature
>> if btf func_info is available. Note that ksym
>> now uses function name instead of prog_name as
>> prog_name has a limit of 16 bytes including
>> ending '\0'.
>>
>> The following is a sample output for selftests
>> test_btf with file test_btf_haskv.o:
>>
>>$ bpftool prog dump jited id 1
>>int _dummy_tracepoint(struct dummy_tracepoint_args * ):
>>bpf_prog_b07ccb89267cf242__dummy_tracepoint:
>>   0:   push   %rbp
>>   1:   mov%rsp,%rbp
>>  ..
>>  3c:   add$0x28,%rbp
>>  40:   leaveq
>>  41:   retq
>>
>>int test_long_fname_1(struct dummy_tracepoint_args * ):
>>bpf_prog_2dcecc18072623fc_test_long_fname_1:
>>   0:   push   %rbp
>>   1:   mov%rsp,%rbp
>>  ..
>>  3a:   add$0x28,%rbp
>>  3e:   leaveq
>>  3f:   retq
>>
>>int test_long_fname_2(struct dummy_tracepoint_args * ):
>>bpf_prog_89d64e4abf0f0126_test_long_fname_2:
>>   0:   push   %rbp
>>   1:   mov%rsp,%rbp
>>  ..
>>  80:   add$0x28,%rbp
>>  84:   leaveq
>>  85:   retq
>>
>> Signed-off-by: Yonghong Song 
>> ---
>>   tools/bpf/bpftool/btf_dumper.c | 96 ++
>>   tools/bpf/bpftool/main.h   |  2 +
>>   tools/bpf/bpftool/prog.c   | 54 +++
>>   3 files changed, 152 insertions(+)
>>
>> diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
>> index 55bc512a1831..a31df4202335 100644
>> --- a/tools/bpf/bpftool/btf_dumper.c
>> +++ b/tools/bpf/bpftool/btf_dumper.c
>> @@ -249,3 +249,99 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 
>> type_id,
>>   {
>>  return btf_dumper_do_type(d, type_id, 0, data);
>>   }
>> +
>> +#define BTF_PRINT_STRING(str)   
>> \
>> +{   \
>> +pos += snprintf(func_sig + pos, size - pos, str);   \
>> +if (pos >= size)\
>> +return -1;  \
>> +}
> Usual kernel practice for this sort of macro is to use
>      do { \
>      } while(0)
>   to ensure correct behaviour if the macro is used within another control
>   flow statement, e.g.
>      if (x)
>      BTF_PRINT_STRING(x);
>      else
>      do_something_else();
>   will not compile with the bare braces as the else will be detached.

Thanks for the review! Will change to use the "do while" format
as you suggested.

>> +#define BTF_PRINT_ONE_ARG(fmt, arg) \
>> +{   \
>> +pos += snprintf(func_sig + pos, size - pos, fmt, arg);  \
>> +if (pos >= size)\
>> +return -1;  \
>> +}
> Any reason for not just using a variadic macro?

No particular reason. I will try to use it in the next revision.

>> +#define BTF_PRINT_TYPE_ONLY(type)   \
>> +{   \
>> +pos = __btf_dumper_type_only(btf, type, func_sig,   \
>> + pos, size);\
>> +if (pos == -1)  \
>> +return -1;  \
>> +}
>> +
>> +static int __btf_dumper_type_only(struct btf *btf, __u32 type_id,
>> +  char *func_sig, int pos, int size)
>> +{
>> +const struct btf_type *t = btf__type_by_id(btf, type_id);
>> +const struct btf_array *array;
>> +int i, vlen;
>> +
>> +switch (BTF_INFO_KIND(t->info)) {
>> +case BTF_KIND_INT:
>> +BTF_PRINT_ONE_ARG("%s ",
>> +  btf__name_by_offset(btf, t->name_off));
>> +break;
>> +case BTF_KIND_STRUCT:
>> +BTF_PRINT_ONE_ARG("struct %s ",
>> +  btf__name_by_offset(btf, t->name_off));
>> +break;
>> +case BTF_KIND_UNION:
>> +BTF_PRINT_ONE_ARG(&qu

Re: [PATCH bpf-next v2 02/13] bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO

2018-10-17 Thread Yonghong Song


On 10/17/18 9:13 AM, Edward Cree wrote:
> On 17/10/18 08:23, Yonghong Song wrote:
>> This patch adds BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
>> support to the type section. BTF_KIND_FUNC_PROTO is used
>> to specify the type of a function pointer. With this,
>> BTF has a complete set of C types (except float).
>>
>> BTF_KIND_FUNC is used to specify the signature of a
>> defined subprogram. BTF_KIND_FUNC_PROTO can be referenced
>> by another type, e.g., a pointer type, and BTF_KIND_FUNC
>> type cannot be referenced by another type.
> Why are distinct kinds created for these?  A function body is
>   a value of function type, and since there's no way (in C) to
>   declare a variable of function type (only pointer-to-
>   function), any declaration of function type must necessarily
>   be a BTF_KIND_FUNC, whereas any other reference to a function
>   type (e.g. a declaration of type pointer to function type)
>   must, as you state above, be a BTF_KIND_FUNC_PROTO.
> In fact, you can tell the difference just from name_off, since
>   a (C-legal) BTF_KIND_FUNC_PROTO will always be anonymous (as
>   the pointee of a pointer type), while a BTF_KIND_FUNC will
>   have the name of the subprogram.

What you stated is true, BTF_KIND_FUNC_PROTO corresponds to
dwarf subroutine tag which has no name while BTF_KIND_FUNC
must have a valid name. The original design is to have both
since they are corresponding to different dwarf constructs.

Martin, what do you think?

> 
> -Ed
> 


Re: [PATCH bpf-next 2/3] bpf: emit RECORD_MMAP events for bpf prog load/unload

2018-10-17 Thread Song Liu
Hi David,

On Wed, Oct 17, 2018 at 8:09 AM David Ahern  wrote:
>
> On 10/16/18 11:43 PM, Song Liu wrote:
> > I agree that processing events while recording has significant overhead.
> > In this case, perf user space need to know details about the the jited BPF
> > program. It is impossible to pass all these details to user space through
> > the relatively stable ring_buffer API. Therefore, some processing of the
> > data is necessary (get bpf prog_id from ring buffer, and then fetch program
> > details via BPF_OBJ_GET_INFO_BY_FD.
> >
> > I have some idea on processing important data with relatively low overhead.
> > Let me try implement it.
> >
>
> As I understand it, you want this series:
>
>  kernel: add event to perf buffer on bpf prog load
>
>  userspace: perf reads the event and grabs information about the program
> from the fd
>
> Is that correct?

Yes, this is correct.

>
> Userpsace is not awakened immediately when an event is added the the
> ring. It is awakened once the number of events crosses a watermark. That
> means there is an unknown - and potentially long - time window where the
> program can be unloaded before perf reads the event.
>
> So no matter what you do expecting perf record to be able to process the
> event quickly is an unreasonable expectation.

In this case, we don't really need to gather the information immediately. We
will lost information about some short-living BPF programs. These BPF
programs are less important for the performance analysis anyway. I guess
the only missing case is when some BPF program get load/unload many
times, so each instance is short-living, but the overall perf impact is
significant. I think this case is not interesting either, as BPF programs
should not be used like this (considering the kernel need to translate and
jit the program, unloading and reloading soon doesn't make any sense).

Thanks,
Song


Re: [PATCH bpf-next v2 00/13] bpf: add btf func info support

2018-10-17 Thread Yonghong Song


On 10/17/18 4:02 AM, Edward Cree wrote:
> I think the BTF work needs to be better documented; at the moment the only way
>   to determine how BTF sections are structured is to read through the headers,
>   and cross-reference with the DWARF spec to guess at the semantics of various
>   fields.  I've been working on adding BTF support to ebpf_asm, and finding
>   very frustrating the amount of guesswork required.
> Therefore please make sure that each patch extending the BTF format includes
>   documentation patches describing both the layout and the semantics of the 
> new

Make sense. I will add some comments to describe the layout in patch #9.

>   extensions.  For example in patch #9 there is no explanation of
>   btf_ext_header.line_info_off and btf_ext_header.line_info_len (they're not
>   even used by the code, so one cannot reverse-engineer it); while it's fairly
>   clear that they indicate the bounds of the line_info subsection, there is no

The line_info field is added because it is implemented in llvm. I 
imported it to kernel tools directory to be compatible with what llvm 
generates although we did not process it yet. I will add a comment on this.

In the long term, I guess we should add description of format etc.
in Documentation/bpf directory like BTF.rst.

>   specification of what this subsection contains.
> 
> -Ed
> 


Re: [PATCH bpf-next 2/3] bpf: emit RECORD_MMAP events for bpf prog load/unload

2018-10-17 Thread Song Liu
On Wed, Oct 17, 2018 at 5:50 AM Arnaldo Carvalho de Melo
 wrote:
>
> Em Wed, Oct 17, 2018 at 09:11:40AM -0300, Arnaldo Carvalho de Melo escreveu:
> > Adding Alexey, Jiri and Namhyung as they worked/are working on
> > multithreading 'perf record'.
> >
> > Em Tue, Oct 16, 2018 at 11:43:11PM -0700, Song Liu escreveu:
> > > On Tue, Oct 16, 2018 at 4:43 PM David Ahern  wrote:
> > > > On 10/15/18 4:33 PM, Song Liu wrote:
> > > > > I am working with Alexei on the idea of fetching BPF program 
> > > > > information via
> > > > > BPF_OBJ_GET_INFO_BY_FD cmd. I added PERF_RECORD_BPF_EVENT
> > > > > to perf_event_type, and dumped these events to perf event ring buffer.
> >
> > > > > I found that perf will not process event until the end of perf-record:
> >
> > > > > root@virt-test:~# ~/perf record -ag -- sleep 10
> > > > > .. 10 seconds later
> > > > > [ perf record: Woken up 34 times to write data ]
> > > > > machine__process_bpf_event: prog_id 6 loaded
> > > > > machine__process_bpf_event: prog_id 6 unloaded
> > > > > [ perf record: Captured and wrote 9.337 MB perf.data (93178 samples) ]
> >
> > > > > In this example, the bpf program was loaded and then unloaded in
> > > > > another terminal. When machine__process_bpf_event() processes
> > > > > the load event, the bpf program is already unloaded. Therefore,
> > > > > machine__process_bpf_event() will not be able to get information
> > > > > about the program via BPF_OBJ_GET_INFO_BY_FD cmd.
> >
> > > > > To solve this problem, we will need to run BPF_OBJ_GET_INFO_BY_FD
> > > > > as soon as perf get the event from kernel. I looked around the perf
> > > > > code for a while. But I haven't found a good example where some
> > > > > events are processed before the end of perf-record. Could you
> > > > > please help me with this?
> >
> > > > perf record does not process events as they are generated. Its sole job
> > > > is pushing data from the maps to a file as fast as possible meaning in
> > > > bulk based on current read and write locations.
> >
> > > > Adding code to process events will add significant overhead to the
> > > > record command and will not really solve your race problem.
> >
> > > I agree that processing events while recording has significant overhead.
> > > In this case, perf user space need to know details about the the jited BPF
> > > program. It is impossible to pass all these details to user space through
> > > the relatively stable ring_buffer API. Therefore, some processing of the
> > > data is necessary (get bpf prog_id from ring buffer, and then fetch 
> > > program
> > > details via BPF_OBJ_GET_INFO_BY_FD.
> >
> > > I have some idea on processing important data with relatively low 
> > > overhead.
> > > Let me try implement it.
> >
> > Well, you could have a separate thread processing just those kinds of
> > events, associate it with a dummy event where you only ask for
> > PERF_RECORD_BPF_EVENTs.
> >
> > Here is how to setup the PERF_TYPE_SOFTWARE/PERF_COUNT_SW_DUMMY
> > perf_event_attr:
> >
> > [root@seventh ~]# perf record -vv -e dummy sleep 01
> > 
> > perf_event_attr:
> >   type 1
> >   size 112
> >   config   0x9
> >   { sample_period, sample_freq }   4000
> >   sample_type  IP|TID|TIME|PERIOD
> >   disabled 1
> >   inherit  1
>
> These you would have disabled, no need for
> PERF_RECORD_{MMAP*,COMM,FORK,EXIT} just PERF_RECORD_BPF_EVENT
>
> >   mmap 1
> >   comm 1
> >   task 1
> >   mmap21
> >   comm_exec1
>
>

Thanks Arnaldo! This looks better than my original idea (using POLLPRI
to highlight
special events). I will try implement the BPF_EVENT in this direction.

Song


[PATCH bpf-next v2 11/13] tools/bpf: refactor to implement btf_get_from_id() in lib/bpf

2018-10-17 Thread Yonghong Song
The function get_btf() is implemented in tools/bpf/bpftool/map.c
to get a btf structure given a map_info. This patch
refactored this function to be function btf_get_from_id()
in tools/lib/bpf so that it can be used later.

Signed-off-by: Yonghong Song 
---
 tools/bpf/bpftool/map.c | 68 ++--
 tools/lib/bpf/btf.c | 69 +
 tools/lib/bpf/btf.h | 18 ++-
 3 files changed, 81 insertions(+), 74 deletions(-)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 7bf38f0e152e..1b8a75fa0471 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -215,70 +215,6 @@ static int do_dump_btf(const struct btf_dumper *d,
return ret;
 }
 
-static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
-{
-   struct bpf_btf_info btf_info = { 0 };
-   __u32 len = sizeof(btf_info);
-   __u32 last_size;
-   int btf_fd;
-   void *ptr;
-   int err;
-
-   err = 0;
-   *btf = NULL;
-   btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id);
-   if (btf_fd < 0)
-   return 0;
-
-   /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
-* let's start with a sane default - 4KiB here - and resize it only if
-* bpf_obj_get_info_by_fd() needs a bigger buffer.
-*/
-   btf_info.btf_size = 4096;
-   last_size = btf_info.btf_size;
-   ptr = malloc(last_size);
-   if (!ptr) {
-   err = -ENOMEM;
-   goto exit_free;
-   }
-
-   bzero(ptr, last_size);
-   btf_info.btf = ptr_to_u64(ptr);
-   err = bpf_obj_get_info_by_fd(btf_fd, _info, );
-
-   if (!err && btf_info.btf_size > last_size) {
-   void *temp_ptr;
-
-   last_size = btf_info.btf_size;
-   temp_ptr = realloc(ptr, last_size);
-   if (!temp_ptr) {
-   err = -ENOMEM;
-   goto exit_free;
-   }
-   ptr = temp_ptr;
-   bzero(ptr, last_size);
-   btf_info.btf = ptr_to_u64(ptr);
-   err = bpf_obj_get_info_by_fd(btf_fd, _info, );
-   }
-
-   if (err || btf_info.btf_size > last_size) {
-   err = errno;
-   goto exit_free;
-   }
-
-   *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
-   if (IS_ERR(*btf)) {
-   err = PTR_ERR(*btf);
-   *btf = NULL;
-   }
-
-exit_free:
-   close(btf_fd);
-   free(ptr);
-
-   return err;
-}
-
 static json_writer_t *get_btf_writer(void)
 {
json_writer_t *jw = jsonw_new(stdout);
@@ -765,7 +701,7 @@ static int do_dump(int argc, char **argv)
 
prev_key = NULL;
 
-   err = get_btf(, );
+   err = btf_get_from_id(info.btf_id, );
if (err) {
p_err("failed to get btf");
goto exit_free;
@@ -909,7 +845,7 @@ static int do_lookup(int argc, char **argv)
}
 
/* here means bpf_map_lookup_elem() succeeded */
-   err = get_btf(, );
+   err = btf_get_from_id(info.btf_id, );
if (err) {
p_err("failed to get btf");
goto exit_free;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 4748e0bacd2b..ab654628e966 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -42,6 +42,11 @@ struct btf_ext {
__u32 func_info_len;
 };
 
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+   return (__u64) (unsigned long) ptr;
+}
+
 static int btf_add_type(struct btf *btf, struct btf_type *t)
 {
if (btf->types_size - btf->nr_types < 2) {
@@ -403,6 +408,70 @@ const char *btf__name_by_offset(const struct btf *btf, 
__u32 offset)
return NULL;
 }
 
+int btf_get_from_id(__u32 id, struct btf **btf)
+{
+   struct bpf_btf_info btf_info = { 0 };
+   __u32 len = sizeof(btf_info);
+   __u32 last_size;
+   int btf_fd;
+   void *ptr;
+   int err;
+
+   err = 0;
+   *btf = NULL;
+   btf_fd = bpf_btf_get_fd_by_id(id);
+   if (btf_fd < 0)
+   return 0;
+
+   /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+* let's start with a sane default - 4KiB here - and resize it only if
+* bpf_obj_get_info_by_fd() needs a bigger buffer.
+*/
+   btf_info.btf_size = 4096;
+   last_size = btf_info.btf_size;
+   ptr = malloc(last_size);
+   if (!ptr) {
+   err = -ENOMEM;
+   goto exit_free;
+   }
+
+   bzero(ptr, last_size);
+   btf_info.btf = ptr_to_u64(ptr);
+   err = bpf_obj_get_info_by_fd(btf_fd, _info, );
+
+   if (!err && btf_info.btf_size > last_size) {
+   void *temp_ptr;
+
+   last_size = btf_info.btf_size;
+   temp_ptr = realloc(ptr, last_size);
+  

[PATCH bpf-next v2 10/13] tools/bpf: do not use pahole if clang/llvm can generate BTF sections

2018-10-17 Thread Yonghong Song
Add additional checks in tools/testing/selftests/bpf and
samples/bpf such that if clang/llvm compiler can generate
BTF sections, do not use pahole.

Signed-off-by: Yonghong Song 
---
 samples/bpf/Makefile | 8 
 tools/testing/selftests/bpf/Makefile | 8 
 2 files changed, 16 insertions(+)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index be0a961450bc..870fe7ee2b69 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -208,12 +208,20 @@ endif
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+ clang -target bpf -O2 -g -c -x c - -o 
./llvm_btf_verify.o; \
+ readelf -S ./llvm_btf_verify.o | grep BTF; \
+ /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+   EXTRA_CFLAGS += -g
+else
 ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
EXTRA_CFLAGS += -g
LLC_FLAGS += -mattr=dwarfris
DWARF2BTF = y
 endif
+endif
 
 # Trick to allow make to be run from this directory
 all:
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index d99dd6fc3fbe..8d5612724db8 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -121,7 +121,14 @@ $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+ clang -target bpf -O2 -g -c -x c - -o 
./llvm_btf_verify.o; \
+ readelf -S ./llvm_btf_verify.o | grep BTF; \
+ /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+   CLANG_FLAGS += -g
+else
 ifneq ($(BTF_LLC_PROBE),)
 ifneq ($(BTF_PAHOLE_PROBE),)
 ifneq ($(BTF_OBJCOPY_PROBE),)
@@ -131,6 +138,7 @@ ifneq ($(BTF_OBJCOPY_PROBE),)
 endif
 endif
 endif
+endif
 
 $(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
-- 
2.17.1



[PATCH bpf-next v2 13/13] tools/bpf: bpftool: add support for jited func types

2018-10-17 Thread Yonghong Song
This patch added support to print function signature
if btf func_info is available. Note that ksym
now uses function name instead of prog_name as
prog_name has a limit of 16 bytes including
ending '\0'.

The following is a sample output for selftests
test_btf with file test_btf_haskv.o:

  $ bpftool prog dump jited id 1
  int _dummy_tracepoint(struct dummy_tracepoint_args * ):
  bpf_prog_b07ccb89267cf242__dummy_tracepoint:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3c:   add$0x28,%rbp
40:   leaveq
41:   retq

  int test_long_fname_1(struct dummy_tracepoint_args * ):
  bpf_prog_2dcecc18072623fc_test_long_fname_1:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3a:   add$0x28,%rbp
3e:   leaveq
3f:   retq

  int test_long_fname_2(struct dummy_tracepoint_args * ):
  bpf_prog_89d64e4abf0f0126_test_long_fname_2:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
80:   add$0x28,%rbp
84:   leaveq
85:   retq

Signed-off-by: Yonghong Song 
---
 tools/bpf/bpftool/btf_dumper.c | 96 ++
 tools/bpf/bpftool/main.h   |  2 +
 tools/bpf/bpftool/prog.c   | 54 +++
 3 files changed, 152 insertions(+)

diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 55bc512a1831..a31df4202335 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -249,3 +249,99 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 
type_id,
 {
return btf_dumper_do_type(d, type_id, 0, data);
 }
+
+#define BTF_PRINT_STRING(str)  \
+   {   \
+   pos += snprintf(func_sig + pos, size - pos, str);   \
+   if (pos >= size)\
+   return -1;  \
+   }
+#define BTF_PRINT_ONE_ARG(fmt, arg)\
+   {   \
+   pos += snprintf(func_sig + pos, size - pos, fmt, arg);  \
+   if (pos >= size)\
+   return -1;  \
+   }
+#define BTF_PRINT_TYPE_ONLY(type)  \
+   {   \
+   pos = __btf_dumper_type_only(btf, type, func_sig,   \
+pos, size);\
+   if (pos == -1)  \
+   return -1;  \
+   }
+
+static int __btf_dumper_type_only(struct btf *btf, __u32 type_id,
+ char *func_sig, int pos, int size)
+{
+   const struct btf_type *t = btf__type_by_id(btf, type_id);
+   const struct btf_array *array;
+   int i, vlen;
+
+   switch (BTF_INFO_KIND(t->info)) {
+   case BTF_KIND_INT:
+   BTF_PRINT_ONE_ARG("%s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_STRUCT:
+   BTF_PRINT_ONE_ARG("struct %s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_UNION:
+   BTF_PRINT_ONE_ARG("union %s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_ENUM:
+   BTF_PRINT_ONE_ARG("enum %s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_ARRAY:
+   array = (struct btf_array *)(t + 1);
+   BTF_PRINT_TYPE_ONLY(array->type);
+   BTF_PRINT_ONE_ARG("[%d]", array->nelems);
+   break;
+   case BTF_KIND_PTR:
+   BTF_PRINT_TYPE_ONLY(t->type);
+   BTF_PRINT_STRING("* ");
+   break;
+   case BTF_KIND_UNKN:
+   case BTF_KIND_FWD:
+   case BTF_KIND_TYPEDEF:
+   return -1;
+   case BTF_KIND_VOLATILE:
+   BTF_PRINT_STRING("volatile ");
+   BTF_PRINT_TYPE_ONLY(t->type);
+   break;
+   case BTF_KIND_CONST:
+   BTF_PRINT_STRING("const ");
+   BTF_PRINT_TYPE_ONLY(t->type);
+   break;
+   case BTF_KIND_RESTRICT:
+   BTF_PRINT_STRING("restrict ");
+   BTF_PRINT_TYPE_ONLY(t->type);
+   break;
+   case BTF_KIND_FUNC:
+   case BTF_KIND_FUNC_PROTO:
+   BTF_PRINT_TYPE_ONLY(t->type);
+   BTF_PRINT_ONE_ARG("%s(", btf__name_by_offset

[PATCH bpf-next v2 12/13] tools/bpf: enhance test_btf file testing to test func info

2018-10-17 Thread Yonghong Song
Change the bpf programs test_btf_haskv.c and test_btf_nokv.c to
have two sections, and enhance test_btf.c test_file feature
to test btf func_info returned by the kernel.

Signed-off-by: Yonghong Song 
---
 tools/testing/selftests/bpf/test_btf.c   | 72 +++-
 tools/testing/selftests/bpf/test_btf_haskv.c | 16 -
 tools/testing/selftests/bpf/test_btf_nokv.c  | 16 -
 3 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c 
b/tools/testing/selftests/bpf/test_btf.c
index e03a8cea4bb7..0bbefb571426 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -2235,10 +2235,16 @@ static int file_has_btf_elf(const char *fn)
 static int do_test_file(unsigned int test_num)
 {
const struct btf_file_test *test = _tests[test_num - 1];
+   const char *expected_fnames[] = {"_dummy_tracepoint",
+"test_long_fname_1",
+"test_long_fname_2"};
+   __u32 func_lens[10], func_types[10], info_len;
+   struct bpf_prog_info info = {};
struct bpf_object *obj = NULL;
struct bpf_program *prog;
+   struct btf *btf = NULL;
struct bpf_map *map;
-   int err;
+   int i, err, prog_fd;
 
fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
test->file);
@@ -2271,6 +2277,7 @@ static int do_test_file(unsigned int test_num)
err = bpf_object__load(obj);
if (CHECK(err < 0, "bpf_object__load: %d", err))
goto done;
+   prog_fd = bpf_program__fd(prog);
 
map = bpf_object__find_map_by_name(obj, "btf_map");
if (CHECK(!map, "btf_map not found")) {
@@ -2285,6 +2292,69 @@ static int do_test_file(unsigned int test_num)
  test->btf_kv_notfound))
goto done;
 
+   if (!jit_enabled)
+   goto skip_jit;
+
+   info_len = sizeof(struct bpf_prog_info);
+   info.nr_jited_func_types = ARRAY_SIZE(func_types);
+   info.nr_jited_func_lens = ARRAY_SIZE(func_lens);
+   info.jited_func_types = ptr_to_u64(_types[0]);
+   info.jited_func_lens = ptr_to_u64(_lens[0]);
+
+   err = bpf_obj_get_info_by_fd(prog_fd, , _len);
+
+   if (CHECK(err == -1, "invalid get info errno:%d", errno)) {
+   fprintf(stderr, "%s\n", btf_log_buf);
+   err = -1;
+   goto done;
+   }
+   if (CHECK(info.nr_jited_func_lens != 3,
+ "incorrect info.nr_jited_func_lens %d",
+ info.nr_jited_func_lens)) {
+   err = -1;
+   goto done;
+   }
+   if (CHECK(info.nr_jited_func_types != 3,
+ "incorrect info.nr_jited_func_types %d",
+ info.nr_jited_func_types)) {
+   err = -1;
+   goto done;
+   }
+   if (CHECK(info.btf_id == 0, "incorrect btf_id = 0")) {
+   err = -1;
+   goto done;
+   }
+
+   err = btf_get_from_id(info.btf_id, );
+   if (CHECK(err, "cannot get btf from kernel, err: %d", err))
+   goto done;
+
+   /* check three functions */
+   for (i = 0; i < 3; i++) {
+   const struct btf_type *t;
+   const char *fname;
+
+   t = btf__type_by_id(btf, func_types[i]);
+   if (CHECK(!t, "btf__type_by_id failure: id %u",
+ func_types[i])) {
+   err = -1;
+   goto done;
+   }
+
+   fname = btf__name_by_offset(btf, t->name_off);
+   err = strcmp(fname, expected_fnames[i]);
+   /* for the second and third functions in .text section,
+* the compiler may order them either way.
+*/
+   if (i && err)
+   err = strcmp(fname, expected_fnames[3 - i]);
+   if (CHECK(err, "incorrect fname %s", fname ? : "")) {
+   err = -1;
+   goto done;
+   }
+   }
+
+skip_jit:
fprintf(stderr, "OK");
 
 done:
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c 
b/tools/testing/selftests/bpf/test_btf_haskv.c
index b21b876f475d..e5c79fe0ffdb 100644
--- a/tools/testing/selftests/bpf/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -24,8 +24,8 @@ struct dummy_tracepoint_args {
struct sock *sock;
 };
 
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+__attribute__((noinline))
+static int test_long_fname_2(struct dummy_tracepoint_args *arg)
 {
struct ipv_counts *counts;
int key = 0;
@@ -42,4 +42,16 @@ int _dummy_tracepoint(struct dummy_tracepoint

[PATCH bpf-next v2 06/13] tools/bpf: sync kernel uapi bpf.h header to tools directory

2018-10-17 Thread Yonghong Song
The kernel uapi bpf.h is synced to tools directory.

Signed-off-by: Yonghong Song 
---
 tools/include/uapi/linux/bpf.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f9187b41dff6..7ebbf4f06a65 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -332,6 +332,9 @@ union bpf_attr {
 * (context accesses, allowed helpers, etc).
 */
__u32   expected_attach_type;
+   __u32   prog_btf_fd;/* fd pointing to BTF type data 
*/
+   __u32   func_info_len;  /* func_info length */
+   __aligned_u64   func_info;  /* func type info */
};
 
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2585,6 +2588,9 @@ struct bpf_prog_info {
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
+   __u32 btf_id;
+   __u32 nr_jited_func_types;
+   __aligned_u64 jited_func_types;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -2896,4 +2902,9 @@ struct bpf_flow_keys {
};
 };
 
+struct bpf_func_info {
+   __u32   insn_offset;
+   __u32   type_id;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
2.17.1



[PATCH bpf-next v2 09/13] tools/bpf: add support to read .BTF.ext sections

2018-10-17 Thread Yonghong Song
The .BTF section is already available to encode types.
These types can be used for map
pretty print. The whole .BTF will be passed to the
kernel as well for which kernel can verify and return
to the user space for pretty print etc.

The llvm patch at https://reviews.llvm.org/D53261
will generate .BTF section and one more section
.BTF.ext. The .BTF.ext section encodes function type
information and line information. For line information,
the actual source code is encoded in the section, which
makes compiler itself as an ideal place for section
generation.

The .BTF section does not depend on any other section,
and .BTF.ext has dependency on .BTF for strings and types.

The .BTF section can be directly loaded into the
kernel, and the .BTF.ext section cannot. The loader
may need to do some relocation and merging,
similar to merging multiple code sections, before
loading into the kernel.

In this patch, only func type info is processed.
The functionality is implemented in libbpf.

Signed-off-by: Yonghong Song 
---
 tools/lib/bpf/btf.c| 232 +
 tools/lib/bpf/btf.h|  31 ++
 tools/lib/bpf/libbpf.c |  53 +-
 3 files changed, 312 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 33095fc1860b..4748e0bacd2b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -37,6 +37,11 @@ struct btf {
int fd;
 };
 
+struct btf_ext {
+   void *func_info;
+   __u32 func_info_len;
+};
+
 static int btf_add_type(struct btf *btf, struct btf_type *t)
 {
if (btf->types_size - btf->nr_types < 2) {
@@ -397,3 +402,230 @@ const char *btf__name_by_offset(const struct btf *btf, 
__u32 offset)
else
return NULL;
 }
+
+static int btf_ext_validate_func_info(const struct btf_sec_func_info *sinfo,
+ __u32 size, btf_print_fn_t err_log)
+{
+   int sec_hdrlen = sizeof(struct btf_sec_func_info);
+   __u32 record_size = sizeof(struct bpf_func_info);
+   __u32 size_left = size, num_records;
+   __u64 total_record_size;
+
+   while (size_left) {
+   if (size_left < sec_hdrlen) {
+   elog("BTF.ext func_info header not found");
+   return -EINVAL;
+   }
+
+   num_records = sinfo->num_func_info;
+   if (num_records == 0) {
+   elog("incorrect BTF.ext num_func_info");
+   return -EINVAL;
+   }
+
+   total_record_size = sec_hdrlen +
+   (__u64)num_records * record_size;
+   if (size_left < total_record_size) {
+   elog("incorrect BTF.ext num_func_info");
+   return -EINVAL;
+   }
+
+   size_left -= total_record_size;
+   sinfo = (void *)sinfo + total_record_size;
+   }
+
+   return 0;
+}
+static int btf_ext_parse_hdr(__u8 *data, __u32 data_size,
+btf_print_fn_t err_log)
+{
+   const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
+   const struct btf_sec_func_info *sinfo;
+   __u32 meta_left, last_func_info_pos;
+
+   if (data_size < sizeof(*hdr)) {
+   elog("BTF.ext header not found");
+   return -EINVAL;
+   }
+
+   if (hdr->magic != BTF_MAGIC) {
+   elog("Invalid BTF.ext magic:%x\n", hdr->magic);
+   return -EINVAL;
+   }
+
+   if (hdr->version != BTF_VERSION) {
+   elog("Unsupported BTF.ext version:%u\n", hdr->version);
+   return -ENOTSUP;
+   }
+
+   if (hdr->flags) {
+   elog("Unsupported BTF.ext flags:%x\n", hdr->flags);
+   return -ENOTSUP;
+   }
+
+   meta_left = data_size - sizeof(*hdr);
+   if (!meta_left) {
+   elog("BTF.ext has no data\n");
+   return -EINVAL;
+   }
+
+   if (meta_left < hdr->func_info_off) {
+   elog("Invalid BTF.ext func_info section offset:%u\n",
+hdr->func_info_off);
+   return -EINVAL;
+   }
+
+   if (hdr->func_info_off & 0x02) {
+   elog("BTF.ext func_info section is not aligned to 4 bytes\n");
+   return -EINVAL;
+   }
+
+   last_func_info_pos = sizeof(*hdr) + hdr->func_info_off +
+hdr->func_info_len;
+   if (last_func_info_pos > data_size) {
+   elog("Invalid BTF.ext func_info section size:%u\n",
+hdr->func_info_len);
+   return -EINVAL;
+   }
+
+   sinfo = (const struct btf_sec_func_info *)(data + sizeof(*hdr) +
+  hdr->func_

[PATCH bpf-next v2 07/13] tools/bpf: add new fields for program load in lib/bpf

2018-10-17 Thread Yonghong Song
The new fields are added for program load in lib/bpf so
application uses api bpf_load_program_xattr() is able
to load program with btf and func_info data.

This functionality will be used in next patch
by bpf selftest test_btf.

Signed-off-by: Yonghong Song 
---
 tools/lib/bpf/bpf.c | 3 +++
 tools/lib/bpf/bpf.h | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d70a255cb05e..d8d48ab34220 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -196,6 +196,9 @@ int bpf_load_program_xattr(const struct 
bpf_load_program_attr *load_attr,
attr.log_level = 0;
attr.kern_version = load_attr->kern_version;
attr.prog_ifindex = load_attr->prog_ifindex;
+   attr.prog_btf_fd = load_attr->prog_btf_fd;
+   attr.func_info_len = load_attr->func_info_len;
+   attr.func_info = ptr_to_u64(load_attr->func_info);
memcpy(attr.prog_name, load_attr->name,
   min(name_len, BPF_OBJ_NAME_LEN - 1));
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 258c3c178333..d2bdaffd7712 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -74,6 +74,9 @@ struct bpf_load_program_attr {
const char *license;
__u32 kern_version;
__u32 prog_ifindex;
+   __u32 prog_btf_fd;
+   __u32 func_info_len;
+   const struct bpf_func_info *func_info;
 };
 
 /* Flags to direct loading requirements */
-- 
2.17.1



[PATCH bpf-next v2 00/13] bpf: add btf func info support

2018-10-17 Thread Yonghong Song
The BTF support was added to kernel by Commit 69b693f0aefa
("bpf: btf: Introduce BPF Type Format (BTF)"), which introduced
.BTF section into ELF file and is primarily
used for map pretty print.
pahole is used to convert dwarf to BTF for ELF files.

This patch added func info support to the kernel so we can
get better ksym's for bpf function calls. Basically,
pairs of bpf function calls and their corresponding types
are passed to the kernel. Extracting function names from
the types, the kernel is able to construct a ksym for
each function call with embedded function name.

LLVM patch https://reviews.llvm.org/D53261
can generate func info, encoded in .BTF.ext ELF section.
In addition, it also added support for FUNC and FUNC_PROTO
types, which are also supported through this patch set.
The following is an example to show FUNC and
FUNC_PROTO difference, compiled with the above LLVM patch
with Debug mode.

  -bash-4.2$ cat test.c
  int foo(int (*bar)(int)) { return bar(5); }
  -bash-4.2$ clang -target bpf -g -O2 -mllvm -debug-only=btf -c test.c
  Type Table:
  [1] FUNC name_off=1 info=0x0c01 size/type=2
param_type=3
  [2] INT name_off=11 info=0x0100 size/type=4
desc=0x0120
  [3] PTR name_off=0 info=0x0200 size/type=4
  [4] FUNC_PROTO name_off=0 info=0x0d01 size/type=2
param_type=2
  
  String Table:
  0 :
  1 : foo
  5 : .text
  11 : int
  15 : test.c
  22 : int foo(int (*bar)(int)) { return bar(5); }
  
  FuncInfo Table:
  sec_name_off=5
insn_offset= type_id=1
  ...

In the above, type and string tables are in .BTF section and
the func info in .BTF.ext. The "" is the
insn offset which is not available during the dump time but
resolved during later compilation process.
Following the format specification at Patch #9 and examine the
raw data in .BTF.ext section, we have
  FuncInfo Table:
  sec_name_off=5
insn_offset=0 type_id=1
The (insn_offset, type_id) can be passed to the kernel
so the kernel can find the func name and use it in the ksym.
Below is a demonstration from Patch #13.
  $ bpftool prog dump jited id 1
  int _dummy_tracepoint(struct dummy_tracepoint_args * ):
  bpf_prog_b07ccb89267cf242__dummy_tracepoint:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3c:   add$0x28,%rbp
40:   leaveq
41:   retq
  
  int test_long_fname_1(struct dummy_tracepoint_args * ):
  bpf_prog_2dcecc18072623fc_test_long_fname_1:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3a:   add$0x28,%rbp
3e:   leaveq
3f:   retq
  
  int test_long_fname_2(struct dummy_tracepoint_args * ):
  bpf_prog_89d64e4abf0f0126_test_long_fname_2:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
80:   add$0x28,%rbp
84:   leaveq
85:   retq

For the patchset,
Patch #1  refactors the code to break up btf_type_is_void().
Patch #2  introduces new BTF types BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO.
Patch #3  syncs btf.h header to tools directory.
Patch #4  adds btf func/func_proto self tests in test_btf.
Patch #5  adds kernel interface to load func_info to kernel
  and pass func_info to userspace.
Patch #6  syncs bpf.h header to tools directory.
Patch #7  adds news btf/func_info related fields in libbpf
  program load function.
Patch #8  extends selftest test_btf to test load/retrieve func_type info.
Patch #9  adds .BTF.ext func info support.
Patch #10 changes Makefile to avoid using pahole if llvm is capable of
  generating BTF sections.
Patch #11 refactors to have btf_get_from_id() in libbpf for reuse.
Patch #12 enhance test_btf file testing to test func info.
Patch #13 adds bpftool support for func signature dump.

Yonghong Song (13):
  bpf: btf: Break up btf_type_is_void()
  bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
  tools/bpf: sync kernel btf.h header
  tools/bpf: add btf func/func_proto unit tests in selftest test_btf
  bpf: get better bpf_prog ksyms based on btf func type_id
  tools/bpf: sync kernel uapi bpf.h header to tools directory
  tools/bpf: add new fields for program load in lib/bpf
  tools/bpf: extends test_btf to test load/retrieve func_type info
  tools/bpf: add support to read .BTF.ext sections
  tools/bpf: do not use pahole if clang/llvm can generate BTF sections
  tools/bpf: refactor to implement btf_get_from_id() in lib/bpf
  tools/bpf: enhance test_btf file testing to test func info
  tools/bpf: bpftool: add support for jited func types

Changelogs:
  v1 -> v2:
. Added missing sign-off
. Limited the func_name/struct_member_name length for validity test
. Removed/changed several verifier messages
. Modified several commit messages to remove line_off reference

 include/linux/bpf.h  |   2 +
 include/linux/bpf_verifier.h |   1 +
 include/linux/btf.h  |   2 +
 include/uapi/linux/bpf.h   

[PATCH bpf-next v2 01/13] bpf: btf: Break up btf_type_is_void()

2018-10-17 Thread Yonghong Song
This patch breaks up btf_type_is_void() into
btf_type_is_void() and btf_type_is_fwd().

It also adds btf_type_nosize() to better describe it is
testing a type has nosize info.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 kernel/bpf/btf.c | 37 ++---
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 378cef70341c..be406d8906ce 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -306,15 +306,22 @@ static bool btf_type_is_modifier(const struct btf_type *t)
 
 static bool btf_type_is_void(const struct btf_type *t)
 {
-   /* void => no type and size info.
-* Hence, FWD is also treated as void.
-*/
-   return t == _void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+   return t == _void;
+}
+
+static bool btf_type_is_fwd(const struct btf_type *t)
+{
+   return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+}
+
+static bool btf_type_nosize(const struct btf_type *t)
+{
+   return btf_type_is_void(t) || btf_type_is_fwd(t);
 }
 
-static bool btf_type_is_void_or_null(const struct btf_type *t)
+static bool btf_type_nosize_or_null(const struct btf_type *t)
 {
-   return !t || btf_type_is_void(t);
+   return !t || btf_type_nosize(t);
 }
 
 /* union is only a special case of struct:
@@ -826,7 +833,7 @@ const struct btf_type *btf_type_id_size(const struct btf 
*btf,
u32 size = 0;
 
size_type = btf_type_by_id(btf, size_type_id);
-   if (btf_type_is_void_or_null(size_type))
+   if (btf_type_nosize_or_null(size_type))
return NULL;
 
if (btf_type_has_size(size_type)) {
@@ -842,7 +849,7 @@ const struct btf_type *btf_type_id_size(const struct btf 
*btf,
size = btf->resolved_sizes[size_type_id];
size_type_id = btf->resolved_ids[size_type_id];
size_type = btf_type_by_id(btf, size_type_id);
-   if (btf_type_is_void(size_type))
+   if (btf_type_nosize_or_null(size_type))
return NULL;
}
 
@@ -1164,7 +1171,7 @@ static int btf_modifier_resolve(struct btf_verifier_env 
*env,
}
 
/* "typedef void new_void", "const void"...etc */
-   if (btf_type_is_void(next_type))
+   if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type))
goto resolved;
 
if (!env_type_is_resolve_sink(env, next_type) &&
@@ -1178,7 +1185,7 @@ static int btf_modifier_resolve(struct btf_verifier_env 
*env,
 * pretty print).
 */
if (!btf_type_id_size(btf, _type_id, _type_size) &&
-   !btf_type_is_void(btf_type_id_resolve(btf, _type_id))) {
+   !btf_type_nosize(btf_type_id_resolve(btf, _type_id))) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1205,7 +1212,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
}
 
/* "void *" */
-   if (btf_type_is_void(next_type))
+   if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type))
goto resolved;
 
if (!env_type_is_resolve_sink(env, next_type) &&
@@ -1235,7 +1242,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
}
 
if (!btf_type_id_size(btf, _type_id, _type_size) &&
-   !btf_type_is_void(btf_type_id_resolve(btf, _type_id))) {
+   !btf_type_nosize(btf_type_id_resolve(btf, _type_id))) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1396,7 +1403,7 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->index_type */
index_type_id = array->index_type;
index_type = btf_type_by_id(btf, index_type_id);
-   if (btf_type_is_void_or_null(index_type)) {
+   if (btf_type_nosize_or_null(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
@@ -1415,7 +1422,7 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->type */
elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
-   if (btf_type_is_void_or_null(elem_type)) {
+   if (btf_type_nosize_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
  "Invalid elem");
return -EINVAL;
@@ -1615,7 +1622,7 @@ static int btf_struct_resolve(struct btf_verifier_env 
*env,
const struct btf_type *member_type = btf_type_by_id(env->btf,
member_type_id);
 
-   if (btf_type_is_void_or_null(member_type)) {
+   if (btf_type_nosize_or_null(me

[PATCH bpf-next v2 03/13] tools/bpf: sync kernel btf.h header

2018-10-17 Thread Yonghong Song
The kernel uapi btf.h is synced to the tools directory.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 tools/include/uapi/linux/btf.h | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 972265f32871..63f8500e6f34 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -40,7 +40,8 @@ struct btf_type {
/* "size" is used by INT, ENUM, STRUCT and UNION.
 * "size" tells the size of the type it is describing.
 *
-* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+* FUNC and FUNC_PROTO.
 * "type" is a type_id referring to another type.
 */
union {
@@ -64,8 +65,10 @@ struct btf_type {
 #define BTF_KIND_VOLATILE  9   /* Volatile */
 #define BTF_KIND_CONST 10  /* Const*/
 #define BTF_KIND_RESTRICT  11  /* Restrict */
-#define BTF_KIND_MAX   11
-#define NR_BTF_KINDS   12
+#define BTF_KIND_FUNC  12  /* Function */
+#define BTF_KIND_FUNC_PROTO13  /* Function Prototype   */
+#define BTF_KIND_MAX   13
+#define NR_BTF_KINDS   14
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
-- 
2.17.1



[PATCH bpf-next v2 08/13] tools/bpf: extends test_btf to test load/retrieve func_type info

2018-10-17 Thread Yonghong Song
A two function bpf program is loaded with btf and func_info.
After successful prog load, the bpf_get_info syscall is called
to retrieve prog info to ensure the types returned from the
kernel matches the types passed to the kernel from the
user space.

Several negative tests are also added to test loading/retriving
of func_type info.

Signed-off-by: Yonghong Song 
---
 tools/testing/selftests/bpf/test_btf.c | 278 -
 1 file changed, 275 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c 
b/tools/testing/selftests/bpf/test_btf.c
index b6461c3c5e11..e03a8cea4bb7 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -22,9 +23,13 @@
 #include "bpf_rlimit.h"
 #include "bpf_util.h"
 
+#define MAX_INSNS  512
+#define MAX_SUBPROGS   16
+
 static uint32_t pass_cnt;
 static uint32_t error_cnt;
 static uint32_t skip_cnt;
+static bool jit_enabled;
 
 #define CHECK(condition, format...) ({ \
int __ret = !!(condition);  \
@@ -60,6 +65,24 @@ static int __base_pr(const char *format, ...)
return err;
 }
 
+static bool is_jit_enabled(void)
+{
+   const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+   bool enabled = false;
+   int sysctl_fd;
+
+   sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
+   if (sysctl_fd != -1) {
+   char tmpc;
+
+   if (read(sysctl_fd, , sizeof(tmpc)) == 1)
+   enabled = (tmpc != '0');
+   close(sysctl_fd);
+   }
+
+   return enabled;
+}
+
 #define BTF_INFO_ENC(kind, root, vlen) \
((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 
@@ -103,6 +126,7 @@ static struct args {
bool get_info_test;
bool pprint_test;
bool always_log;
+   bool func_type_test;
 } args;
 
 static char btf_log_buf[BTF_LOG_BUF_SIZE];
@@ -2693,16 +2717,256 @@ static int test_pprint(void)
return err;
 }
 
+static struct btf_func_type_test {
+   const char *descr;
+   const char *str_sec;
+   __u32 raw_types[MAX_NR_RAW_TYPES];
+   __u32 str_sec_size;
+   struct bpf_insn insns[MAX_INSNS];
+   __u32 prog_type;
+   struct bpf_func_info func_info[MAX_SUBPROGS];
+   __u32 func_info_len;
+   bool expected_prog_load_failure;
+} func_type_test[] = {
+
+{
+   .descr = "func_type test #1",
+   .str_sec = "\0int\0unsigned int\0funcA\0funcB",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),   /* [2] */
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 1),  
/* [3] */
+   1, 2,
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 1),  
/* [4] */
+   2, 1,
+   BTF_END_RAW,
+   },
+   .str_sec_size = sizeof("\0int\0unsigned int\0funcA\0funcB"),
+   .insns = {
+   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+   BPF_MOV64_IMM(BPF_REG_0, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(BPF_REG_0, 2),
+   BPF_EXIT_INSN(),
+   },
+   .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+   .func_info = { {0, 3}, {3, 4} },
+   .func_info_len = 2 * sizeof(struct bpf_func_info),
+},
+
+{
+   .descr = "func_type test #2",
+   .str_sec = "\0int\0unsigned int\0funcA\0funcB",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),   /* [2] */
+   /* incorrect func type */
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 
1),  /* [3] */
+   1, 2,
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 1),  
/* [4] */
+   2, 1,
+   BTF_END_RAW,
+   },
+   .str_sec_size = sizeof("\0int\0unsigned int\0funcA\0funcB"),
+   .insns = {
+   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+   BPF_MOV64_IMM(BPF_REG_0, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(BPF_REG_0, 2),
+   BPF_EXIT_INSN(),
+   },
+   .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+   .func_info = { {0, 3}, {3, 4} },
+   .func_info_len = 2 * sizeof(struct bpf_func_info),
+   .expected_prog_load_failure = true,
+},
+
+{
+   .descr = "func_type test #3",
+   .str_sec = "\0int\0unsigned int\0funcA\0funcB",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+ 

[PATCH bpf-next v2 05/13] bpf: get better bpf_prog ksyms based on btf func type_id

2018-10-17 Thread Yonghong Song
This patch added interface to load a program with the following
additional information:
   . prog_btf_fd
   . func_info and func_info_len
where func_info will provides function range and type_id
corresponding to each function.

If verifier agrees with function range provided by the user,
the bpf_prog ksym for each function will use the func name
provided in the type_id, which is supposed to provide better
encoding as it is not limited by 16 bytes program name
limitation and this is better for bpf program which contains
multiple subprograms.

The bpf_prog_info interface is also extended to
return btf_id and jited_func_types, so user spaces can
print out the function prototype for each jited function.

Signed-off-by: Yonghong Song 
---
 include/linux/bpf.h  |  2 +
 include/linux/bpf_verifier.h |  1 +
 include/linux/btf.h  |  2 +
 include/uapi/linux/bpf.h | 11 +
 kernel/bpf/btf.c | 16 +++
 kernel/bpf/core.c| 10 +
 kernel/bpf/syscall.c | 83 +++-
 kernel/bpf/verifier.c| 46 
 8 files changed, 170 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e60fff48288b..a99e038ce9c4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -308,6 +308,8 @@ struct bpf_prog_aux {
void *security;
 #endif
struct bpf_prog_offload *offload;
+   struct btf *btf;
+   u32 type_id; /* type id for this prog/func */
union {
struct work_struct work;
struct rcu_head rcu;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9e8056ec20fa..e84782ec50ac 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -201,6 +201,7 @@ static inline bool bpf_verifier_log_needed(const struct 
bpf_verifier_log *log)
 struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u16 stack_depth; /* max. stack depth used by this function */
+   u32 type_id; /* btf type_id for this subprog */
 };
 
 /* single container for all structs
diff --git a/include/linux/btf.h b/include/linux/btf.h
index e076c4697049..4611a53b5dd7 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -46,5 +46,7 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, 
void *obj,
   struct seq_file *m);
 int btf_get_fd_by_id(u32 id);
 u32 btf_id(const struct btf *btf);
+bool btf_type_id_func(const struct btf *btf, u32 type_id);
+const char *btf_get_name_by_id(const struct btf *btf, u32 type_id);
 
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..7ebbf4f06a65 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -332,6 +332,9 @@ union bpf_attr {
 * (context accesses, allowed helpers, etc).
 */
__u32   expected_attach_type;
+   __u32   prog_btf_fd;/* fd pointing to BTF type data 
*/
+   __u32   func_info_len;  /* func_info length */
+   __aligned_u64   func_info;  /* func type info */
};
 
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2585,6 +2588,9 @@ struct bpf_prog_info {
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
+   __u32 btf_id;
+   __u32 nr_jited_func_types;
+   __aligned_u64 jited_func_types;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -2896,4 +2902,9 @@ struct bpf_flow_keys {
};
 };
 
+struct bpf_func_info {
+   __u32   insn_offset;
+   __u32   type_id;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 763f8e06bc91..9e97bbbafe9b 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -489,6 +489,15 @@ static const struct btf_type *btf_type_by_id(const struct 
btf *btf, u32 type_id)
return btf->types[type_id];
 }
 
+bool btf_type_id_func(const struct btf *btf, u32 type_id)
+{
+   const struct btf_type *type = btf_type_by_id(btf, type_id);
+
+   if (!type || !btf_type_is_func(type))
+   return false;
+   return true;
+}
+
 /*
  * Regular int is not a bit field and it must be either
  * u8/u16/u32/u64.
@@ -2582,3 +2591,10 @@ u32 btf_id(const struct btf *btf)
 {
return btf->id;
 }
+
+const char *btf_get_name_by_id(const struct btf *btf, u32 type_id)
+{
+   const struct btf_type *t = btf_type_by_id(btf, type_id);
+
+   return btf_name_by_offset(btf, t->name_off);
+}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index defcf4df6d91..f5ba5d4cb259 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -387,6 +388,7 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
 static void bpf_get_prog_name(const struct bpf_p

[PATCH bpf-next v2 02/13] bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO

2018-10-17 Thread Yonghong Song
This patch adds BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
support to the type section. BTF_KIND_FUNC_PROTO is used
to specify the type of a function pointer. With this,
BTF has a complete set of C types (except float).

BTF_KIND_FUNC is used to specify the signature of a
defined subprogram. BTF_KIND_FUNC_PROTO can be referenced
by another type, e.g., a pointer type, and BTF_KIND_FUNC
type cannot be referenced by another type.

For both BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO types,
the func return type is in t->type (where t is a
"struct btf_type" object). The func args are an array of
u32s immediately following object "t".

As a concrete example, for the C program below,
  $ cat test.c
  int foo(int (*bar)(int)) { return bar(5); }
with LLVM patch https://reviews.llvm.org/D53261
in Debug mode, we have
  $ clang -target bpf -g -O2 -mllvm -debug-only=btf -c test.c
  Type Table:
  [1] FUNC NameOff=1 Info=0x0c01 Size/Type=2
  ParamType=3
  [2] INT NameOff=11 Info=0x0100 Size/Type=4
  Desc=0x0120
  [3] PTR NameOff=0 Info=0x0200 Size/Type=4
  [4] FUNC_PROTO NameOff=0 Info=0x0d01 Size/Type=2
  ParamType=2

  String Table:
  0 :
  1 : foo
  5 : .text
  11 : int
  15 : test.c
  22 : int foo(int (*bar)(int)) { return bar(5); }

  FuncInfo Table:
  SecNameOff=5
  InsnOffset= TypeId=1

  ...

(Eventually we shall have bpftool to dump btf information
 like the above.)

Function "foo" has a FUNC type (type_id = 1).
The parameter of "foo" has type_id 3 which is PTR->FUNC_PROTO,
where FUNC_PROTO refers to function pointer "bar".

In FuncInfo Table, for section .text, the function,
with to-be-determined offset (marked as ),
has type_id=1 which refers to a FUNC type.
This way, the function signature is
available to both kernel and user space.
Here, the insn offset is not available during the dump time
as relocation is resolved pretty late in the compilation process.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 include/uapi/linux/btf.h |   9 +-
 kernel/bpf/btf.c | 280 ++-
 2 files changed, 253 insertions(+), 36 deletions(-)

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 972265f32871..63f8500e6f34 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -40,7 +40,8 @@ struct btf_type {
/* "size" is used by INT, ENUM, STRUCT and UNION.
 * "size" tells the size of the type it is describing.
 *
-* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+* FUNC and FUNC_PROTO.
 * "type" is a type_id referring to another type.
 */
union {
@@ -64,8 +65,10 @@ struct btf_type {
 #define BTF_KIND_VOLATILE  9   /* Volatile */
 #define BTF_KIND_CONST 10  /* Const*/
 #define BTF_KIND_RESTRICT  11  /* Restrict */
-#define BTF_KIND_MAX   11
-#define NR_BTF_KINDS   12
+#define BTF_KIND_FUNC  12  /* Function */
+#define BTF_KIND_FUNC_PROTO13  /* Function Prototype   */
+#define BTF_KIND_MAX   13
+#define NR_BTF_KINDS   14
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index be406d8906ce..763f8e06bc91 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -259,6 +260,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_VOLATILE] = "VOLATILE",
[BTF_KIND_CONST]= "CONST",
[BTF_KIND_RESTRICT] = "RESTRICT",
+   [BTF_KIND_FUNC] = "FUNC",
+   [BTF_KIND_FUNC_PROTO]   = "FUNC_PROTO",
 };
 
 struct btf_kind_operations {
@@ -281,6 +284,9 @@ struct btf_kind_operations {
 static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS];
 static struct btf_type btf_void;
 
+static int btf_resolve(struct btf_verifier_env *env,
+  const struct btf_type *t, u32 type_id);
+
 static bool btf_type_is_modifier(const struct btf_type *t)
 {
/* Some of them is not strictly a C modifier
@@ -314,9 +320,20 @@ static bool btf_type_is_fwd(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
 }
 
+static bool btf_type_is_func(const struct btf_type *t)
+{
+   return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
+}
+
+static bool btf_type_is_func_proto(const struct btf_type *t)
+{
+   return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
+}
+
 static bool btf_type_nosize(const struct btf_type *t)
 {
-   return btf_type_is_void(t) || btf_type_is_fwd(t);
+   return btf_type_is_

[PATCH bpf-next v2 04/13] tools/bpf: add btf func/func_proto unit tests in selftest test_btf

2018-10-17 Thread Yonghong Song
Add several BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
unit tests in bpf selftest test_btf.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 tools/lib/bpf/btf.c|   4 +
 tools/testing/selftests/bpf/test_btf.c | 216 +
 2 files changed, 220 insertions(+)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 449591aa9900..33095fc1860b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -165,6 +165,10 @@ static int btf_parse_type_sec(struct btf *btf, 
btf_print_fn_t err_log)
case BTF_KIND_ENUM:
next_type += vlen * sizeof(struct btf_enum);
break;
+   case BTF_KIND_FUNC:
+   case BTF_KIND_FUNC_PROTO:
+   next_type += vlen * sizeof(int);
+   break;
case BTF_KIND_TYPEDEF:
case BTF_KIND_PTR:
case BTF_KIND_FWD:
diff --git a/tools/testing/selftests/bpf/test_btf.c 
b/tools/testing/selftests/bpf/test_btf.c
index f42b3396d622..b6461c3c5e11 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -1374,6 +1374,222 @@ static struct btf_raw_test raw_tests[] = {
.map_create_err = true,
 },
 
+{
+   .descr = "func pointer #1",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /* int (*func)(int, unsigned int) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 1),
/* [3] */
+   1, 2,
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),   /* [4] 
*/
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .key_type_id = 1,
+   .value_type_id = 1,
+   .max_entries = 4,
+},
+
+{
+   .descr = "func pointer #2",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /* void (*func)(int, unsigned int, ) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 3), 0),
/* [3] */
+   1, 2, 0,
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),   /* [4] 
*/
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .key_type_id = 1,
+   .value_type_id = 1,
+   .max_entries = 4,
+},
+
+{
+   .descr = "func pointer #3",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /* void (*func)(void, int, unsigned int) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 3), 0),
/* [3] */
+   1, 0, 2,
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),   /* [4] 
*/
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .key_type_id = 1,
+   .value_type_id = 1,
+   .max_entries = 4,
+   .btf_load_err = true,
+   .err_str = "Invalid arg#2",
+},
+
+{
+   .descr = "func pointer #4",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /*
+* Testing:
+* BTF_KIND_CONST => BTF_KIND_TYPEDEF => BTF_KIND_PTR =>
+* BTF_KIND_FUNC_PROTO
+*/
+   /* typedef void (*func_ptr)(int, unsigned int) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 5),/* [3] 
*/
+   /* const func_ptr */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 3), /* [4] 
*/
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6),   /* [5] 
*/
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 0),
/* [6] */
+   1, 2,
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .ke

Re: [PATCH bpf-next 2/3] bpf: emit RECORD_MMAP events for bpf prog load/unload

2018-10-17 Thread Song Liu
Hi David,

On Tue, Oct 16, 2018 at 4:43 PM David Ahern  wrote:
>
> On 10/15/18 4:33 PM, Song Liu wrote:
> > I am working with Alexei on the idea of fetching BPF program information via
> > BPF_OBJ_GET_INFO_BY_FD cmd. I added PERF_RECORD_BPF_EVENT
> > to perf_event_type, and dumped these events to perf event ring buffer.
> >
> > I found that perf will not process event until the end of perf-record:
> >
> > root@virt-test:~# ~/perf record -ag -- sleep 10
> > .. 10 seconds later
> > [ perf record: Woken up 34 times to write data ]
> > machine__process_bpf_event: prog_id 6 loaded
> > machine__process_bpf_event: prog_id 6 unloaded
> > [ perf record: Captured and wrote 9.337 MB perf.data (93178 samples) ]
> >
> > In this example, the bpf program was loaded and then unloaded in
> > another terminal. When machine__process_bpf_event() processes
> > the load event, the bpf program is already unloaded. Therefore,
> > machine__process_bpf_event() will not be able to get information
> > about the program via BPF_OBJ_GET_INFO_BY_FD cmd.
> >
> > To solve this problem, we will need to run BPF_OBJ_GET_INFO_BY_FD
> > as soon as perf get the event from kernel. I looked around the perf
> > code for a while. But I haven't found a good example where some
> > events are processed before the end of perf-record. Could you
> > please help me with this?
>
> perf record does not process events as they are generated. Its sole job
> is pushing data from the maps to a file as fast as possible meaning in
> bulk based on current read and write locations.
>
> Adding code to process events will add significant overhead to the
> record command and will not really solve your race problem.

Thanks for the comment.

I agree that processing events while recording has significant overhead.
In this case, perf user space need to know details about the the jited BPF
program. It is impossible to pass all these details to user space through
the relatively stable ring_buffer API. Therefore, some processing of the
data is necessary (get bpf prog_id from ring buffer, and then fetch program
details via BPF_OBJ_GET_INFO_BY_FD.

I have some idea on processing important data with relatively low overhead.
Let me try implement it.

Thanks again,
Song


[PATCH bpf-next 1/2] bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB

2018-10-16 Thread Song Liu
BPF programs of BPF_PROG_TYPE_CGROUP_SKB need to access headers in the
skb. This patch enables direct access of skb for these programs.

In __cgroup_bpf_run_filter_skb(), bpf_compute_data_pointers() is called
to compute proper data_end for the BPF program.

Signed-off-by: Song Liu 
---
 kernel/bpf/cgroup.c |  4 
 net/core/filter.c   | 26 +-
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 00f6ed2e4f9a..340d496f35bd 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -566,6 +566,10 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
save_sk = skb->sk;
skb->sk = sk;
__skb_push(skb, offset);
+
+   /* compute pointers for the bpf prog */
+   bpf_compute_data_pointers(skb);
+
ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
 bpf_prog_run_save_cb);
__skb_pull(skb, offset);
diff --git a/net/core/filter.c b/net/core/filter.c
index 1a3ac6c46873..8b5a502e241f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5346,6 +5346,30 @@ static bool sk_filter_is_valid_access(int off, int size,
return bpf_skb_is_valid_access(off, size, type, prog, info);
 }
 
+static bool cg_skb_is_valid_access(int off, int size,
+  enum bpf_access_type type,
+  const struct bpf_prog *prog,
+  struct bpf_insn_access_aux *info)
+{
+   if (type == BPF_WRITE)
+   return false;
+
+   switch (off) {
+   case bpf_ctx_range(struct __sk_buff, len):
+   break;
+   case bpf_ctx_range(struct __sk_buff, data):
+   info->reg_type = PTR_TO_PACKET;
+   break;
+   case bpf_ctx_range(struct __sk_buff, data_end):
+   info->reg_type = PTR_TO_PACKET_END;
+   break;
+   default:
+   return false;
+   }
+
+   return bpf_skb_is_valid_access(off, size, type, prog, info);
+}
+
 static bool lwt_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
@@ -7038,7 +7062,7 @@ const struct bpf_prog_ops xdp_prog_ops = {
 
 const struct bpf_verifier_ops cg_skb_verifier_ops = {
.get_func_proto = cg_skb_func_proto,
-   .is_valid_access= sk_filter_is_valid_access,
+   .is_valid_access= cg_skb_is_valid_access,
.convert_ctx_access = bpf_convert_ctx_access,
 };
 
-- 
2.17.1



[PATCH bpf-next 2/2] bpf: add tests for direct packet access from CGROUP_SKB

2018-10-16 Thread Song Liu
Tests are added to make sure CGROUP_SKB can directly access len, data,
and data_end in __sk_buff, but not other fields.

Signed-off-by: Song Liu 
---
 tools/testing/selftests/bpf/test_verifier.c | 30 +
 1 file changed, 30 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_verifier.c 
b/tools/testing/selftests/bpf/test_verifier.c
index cf4cd32b6772..aaf2ceba83dd 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -4862,6 +4862,36 @@ static struct bpf_test tests[] = {
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+   {
+   "direct packet read for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+   offsetof(struct __sk_buff, data)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+   offsetof(struct __sk_buff, data_end)),
+   BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+   offsetof(struct __sk_buff, len)),
+   BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+   BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+   BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+   BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+   BPF_MOV64_IMM(BPF_REG_0, 0),
+   BPF_EXIT_INSN(),
+   },
+   .result = ACCEPT,
+   .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+   },
+   {
+   "invalid access of tc_classid for CGROUP_SKB",
+   .insns = {
+   BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+   offsetof(struct __sk_buff, tc_classid)),
+   BPF_EXIT_INSN(),
+   },
+   .result = REJECT,
+   .errstr = "invalid bpf_context access",
+   .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+   },
{
"valid cgroup storage access",
.insns = {
-- 
2.17.1



[PATCH bpf-next 0/2] bpf: add cg_skb_is_valid_access

2018-10-16 Thread Song Liu
This set enables BPF program of type BPF_PROG_TYPE_CGROUP_SKB to access
__skb_buff->len/data/data_end directly.

Song Liu (2):
  bpf: add cg_skb_is_valid_access for BPF_PROG_TYPE_CGROUP_SKB
  bpf: add tests for direct packet access from CGROUP_SKB

 kernel/bpf/cgroup.c |  4 +++
 net/core/filter.c   | 26 +-
 tools/testing/selftests/bpf/test_verifier.c | 30 +
 3 files changed, 59 insertions(+), 1 deletion(-)

--
2.17.1


Re: [PATCH bpf-next 00/13] bpf: add btf func info support

2018-10-16 Thread Yonghong Song


On 10/16/18 11:27 AM, Alexei Starovoitov wrote:
> On Fri, Oct 12, 2018 at 11:54:20AM -0700, Yonghong Song wrote:
>> The BTF support was added to kernel by Commit 69b693f0aefa
>> ("bpf: btf: Introduce BPF Type Format (BTF)"), which introduced
>> .BTF section into ELF file and is primarily
>> used for map pretty print.
>> pahole is used to convert dwarf to BTF for ELF files.
>>
>> The next step would be add func type info and debug line info
>> into BTF. For debug line info, it is desirable to encode
>> source code directly in the BTF to ease deployment and
>> introspection.
> 
> it's kinda confusing that cover letter talks about line info next step,
> but these kernel side patches are only for full prog name from btf.
> It certainly makes sense for llvm to do both at the same time.
> Please make the cover letter more clear.

Make sense. Will remove line_info stuff from the cover letter.


Re: [PATCH bpf-next 05/13] bpf: get better bpf_prog ksyms based on btf func type_id

2018-10-16 Thread Yonghong Song


On 10/16/18 10:59 AM, Alexei Starovoitov wrote:
> On Fri, Oct 12, 2018 at 11:54:42AM -0700, Yonghong Song wrote:
>> This patch added interface to load a program with the following
>> additional information:
>> . prog_btf_fd
>> . func_info and func_info_len
>> where func_info will provides function range and type_id
>> corresponding to each function.
>>
>> If verifier agrees with function range provided by the user,
>> the bpf_prog ksym for each function will use the func name
>> provided in the type_id, which is supposed to provide better
>> encoding as it is not limited by 16 bytes program name
>> limitation and this is better for bpf program which contains
>> multiple subprograms.
>>
>> The bpf_prog_info interface is also extended to
>> return btf_id and jited_func_types, so user spaces can
>> print out the function prototype for each jited function.
>>
>> Signed-off-by: Yonghong Song 
> ...
>>  BUILD_BUG_ON(sizeof("bpf_prog_") +
>>   sizeof(prog->tag) * 2 +
>> @@ -401,6 +403,13 @@ static void bpf_get_prog_name(const struct bpf_prog 
>> *prog, char *sym)
>>   
>>  sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_");
>>  sym  = bin2hex(sym, prog->tag, sizeof(prog->tag));
>> +
>> +if (prog->aux->btf) {
>> +func_name = btf_get_name_by_id(prog->aux->btf, 
>> prog->aux->type_id);
>> +snprintf(sym, (size_t)(end - sym), "_%s", func_name);
>> +return;
> 
> Would it make sense to add a comment here that prog->aux->name is ignored
> when full btf name is available? (otherwise the same name will appear twice 
> in ksym)

Will add a comment.

> 
>> +}
>> +
>>  if (prog->aux->name[0])
>>  snprintf(sym, (size_t)(end - sym), "_%s", prog->aux->name);
> ...
>> +static int check_btf_func(struct bpf_prog *prog, struct bpf_verifier_env 
>> *env,
>> +  union bpf_attr *attr)
>> +{
>> +struct bpf_func_info *data;
>> +int i, nfuncs, ret = 0;
>> +
>> +if (!attr->func_info_len)
>> +return 0;
>> +
>> +nfuncs = attr->func_info_len / sizeof(struct bpf_func_info);
>> +if (env->subprog_cnt != nfuncs) {
>> +verbose(env, "number of funcs in func_info does not match 
>> verifier\n");
> 
> 'does not match verifier' is hard to make sense of.
> How about 'number of funcs in func_info doesn't match number of subprogs' ?

Sounds good to me.

> 
>> +return -EINVAL;
>> +}
>> +
>> +data = kvmalloc(attr->func_info_len, GFP_KERNEL | __GFP_NOWARN);
>> +if (!data) {
>> +verbose(env, "no memory to allocate attr func_info\n");
> 
> I don't think we ever print such warnings for memory allocations.
> imo this can be removed, since enomem is enough.

Okay.

> 
>> +return -ENOMEM;
>> +}
>> +
>> +if (copy_from_user(data, u64_to_user_ptr(attr->func_info),
>> +   attr->func_info_len)) {
>> +verbose(env, "memory copy error for attr func_info\n");
> 
> similar thing. kernel never warns about copy_from_user errors.

Okay.

> 
>> +ret = -EFAULT;
>> +goto cleanup;
>> +}
>> +
>> +for (i = 0; i < nfuncs; i++) {
>> +if (env->subprog_info[i].start != data[i].insn_offset) {
>> +verbose(env, "func_info subprog start (%d) does not 
>> match verifier (%d)\n",
>> +env->subprog_info[i].start, 
>> data[i].insn_offset);
> 
> I think printing exact insn offset isn't going to be much help
> for regular user to debug it. If this happens, it's likely llvm issue.
> How about 'func_info BTF section doesn't match subprog layout in BPF program' 
> ?

Okay.

> 


Re: [PATCH bpf-next 02/13] bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO

2018-10-16 Thread Yonghong Song


On 10/15/18 3:36 PM, Daniel Borkmann wrote:
> On 10/12/2018 08:54 PM, Yonghong Song wrote:
> [...]
>> +static bool btf_name_valid_identifier(const struct btf *btf, u32 offset)
>> +{
>> +/* offset must be valid */
>> +const char *src = >strings[offset];
>> +
>> +if (!isalpha(*src) && *src != '_')
>> +return false;
>> +
>> +src++;
>> +while (*src) {
>> +if (!isalnum(*src) && *src != '_')
>> +return false;
>> +src++;
>> +}
>> +
>> +return true;
>> +}
> 
> Should there be an upper name length limit like KSYM_NAME_LEN? (Is it implied
> by the kvmalloc() limit?)

KSYM_NAME_LEN is good choice. Here, we check function names and 
struct/union member names. In C, based on
https://stackoverflow.com/questions/2352209/max-identifier-length,
the identifier max length is 63. Some compiler implementation may vary.
KSYM_NAME_LEN is 128.

> 
>>   static const char *btf_name_by_offset(const struct btf *btf, u32 offset)
>>   {
>>  if (!offset)
>> @@ -747,7 +782,9 @@ static bool env_type_is_resolve_sink(const struct 
>> btf_verifier_env *env,
>>  /* int, enum or void is a sink */
>>  return !btf_type_needs_resolve(next_type);
>>  case RESOLVE_PTR:
>> -/* int, enum, void, struct or array is a sink for ptr */
>> +/* int, enum, void, struct, array or func_ptoto is a sink
>> + * for ptr
>> + */
>>  return !btf_type_is_modifier(next_type) &&
>>  !btf_type_is_ptr(next_type);
>>  case RESOLVE_STRUCT_OR_ARRAY:


Re: [PATCH bpf-next 02/13] bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO

2018-10-16 Thread Yonghong Song


On 10/15/18 3:30 PM, Daniel Borkmann wrote:
> On 10/12/2018 08:54 PM, Yonghong Song wrote:
>> This patch adds BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
>> support to the type section. BTF_KIND_FUNC_PROTO is used
>> to specify the type of a function pointer. With this,
>> BTF has a complete set of C types (except float).
>>
>> BTF_KIND_FUNC is used to specify the signature of a
>> defined subprogram. BTF_KIND_FUNC_PROTO can be referenced
>> by another type, e.g., a pointer type, and BTF_KIND_FUNC
>> type cannot be referenced by another type.
>>
>> For both BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO types,
>> the func return type is in t->type (where t is a
>> "struct btf_type" object). The func args are an array of
>> u32s immediately following object "t".
>>
>> As a concrete example, for the C program below,
>>$ cat test.c
>>int foo(int (*bar)(int)) { return bar(5); }
>> with latest llvm trunk built with Debug mode, we have
>>$ clang -target bpf -g -O2 -mllvm -debug-only=btf -c test.c
>>Type Table:
>>[1] FUNC name_off=1 info=0x0c01 size/type=2
>>param_type=3
>>[2] INT name_off=11 info=0x0100 size/type=4
>>desc=0x0120
>>[3] PTR name_off=0 info=0x0200 size/type=4
>>[4] FUNC_PROTO name_off=0 info=0x0d01 size/type=2
>>param_type=2
>>
>>String Table:
>>0 :
>>1 : foo
>>5 : .text
>>11 : int
>>15 : test.c
>>22 : int foo(int (*bar)(int)) { return bar(5); }
>>
>>FuncInfo Table:
>>sec_name_off=5
>>insn_offset= type_id=1
>>
>>...
>>
>> (Eventually we shall have bpftool to dump btf information
>>   like the above.)
>>
>> Function "foo" has a FUNC type (type_id = 1).
>> The parameter of "foo" has type_id 3 which is PTR->FUNC_PROTO,
>> where FUNC_PROTO refers to function pointer "bar".
> 
> Should also "bar" be part of the string table (at least at some point in 
> future)?

Yes, we can do it. The dwarf for the abovee example looks like

0x0043: DW_TAG_formal_parameter
   DW_AT_location(0x
  [0x,  0x0008): 
DW_OP_reg1 W1
  [0x0008,  0x0018): 
DW_OP_reg2 W2)
   DW_AT_name("bar")
   DW_AT_decl_file   ("/home/yhs/tmp/t.c")
   DW_AT_decl_line   (1)
   DW_AT_type(0x005a "subroutine int*")

0x005a:   DW_TAG_pointer_type
 DW_AT_type  (0x005f "subroutine int")

0x005f:   DW_TAG_subroutine_type
 DW_AT_type  (0x0053 "int")
 DW_AT_prototyped(true)

0x0064: DW_TAG_formal_parameter
   DW_AT_type(0x0053 "int")

0x0069: NULL

0x006a:   NULL

The current llvm implementation does not record func
parameter name, so "bar" got lost. We could associate
"bar" with pointer type in the future implementation.

> Iow, if verifier hints to an issue in the program when it would for example 
> walk
> pointers and rewrite ctx access, then it could dump the var name along with 
> it.
> It might be useful as well in combination with 22 from str table, when 
> annotating
> the source. We might need support for variadic functions, though. How is LLVM
> handling the latter with the recent BTF support?

The LLVM implementation does support variadic functions.
The last type id 0 indicates a variadic function.

> 
>> In FuncInfo Table, for section .text, the function,
>> with to-be-determined offset (marked as ),
>> has type_id=1 which refers to a FUNC type.
>> This way, the function signature is
>> available to both kernel and user space.
>> Here, the insn offset is not available during the dump time
>> as relocation is resolved pretty late in the compilation process.
>>
>> Signed-off-by: Martin KaFai Lau 
>> Signed-off-by: Yonghong Song 


Re: [PATCH bpf-next 05/13] bpf: get better bpf_prog ksyms based on btf func type_id

2018-10-16 Thread Yonghong Song


On 10/15/18 4:12 PM, Martin Lau wrote:
> On Fri, Oct 12, 2018 at 11:54:42AM -0700, Yonghong Song wrote:
>> This patch added interface to load a program with the following
>> additional information:
>> . prog_btf_fd
>> . func_info and func_info_len
>> where func_info will provides function range and type_id
>> corresponding to each function.
>>
>> If verifier agrees with function range provided by the user,
>> the bpf_prog ksym for each function will use the func name
>> provided in the type_id, which is supposed to provide better
>> encoding as it is not limited by 16 bytes program name
>> limitation and this is better for bpf program which contains
>> multiple subprograms.
>>
>> The bpf_prog_info interface is also extended to
>> return btf_id and jited_func_types, so user spaces can
>> print out the function prototype for each jited function.
> Some nits.
> 
>>
>> Signed-off-by: Yonghong Song 
>> ---
>>   include/linux/bpf.h  |  2 +
>>   include/linux/bpf_verifier.h |  1 +
>>   include/linux/btf.h  |  2 +
>>   include/uapi/linux/bpf.h | 11 +
>>   kernel/bpf/btf.c | 16 +++
>>   kernel/bpf/core.c|  9 
>>   kernel/bpf/syscall.c | 86 +++-
>>   kernel/bpf/verifier.c| 50 +
>>   8 files changed, 176 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 9b558713447f..e9c63ffa01af 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -308,6 +308,8 @@ struct bpf_prog_aux {
>>  void *security;
>>   #endif
>>  struct bpf_prog_offload *offload;
>> +struct btf *btf;
>> +u32 type_id; /* type id for this prog/func */
>>  union {
>>  struct work_struct work;
>>  struct rcu_head rcu;
>> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
>> index 9e8056ec20fa..e84782ec50ac 100644
>> --- a/include/linux/bpf_verifier.h
>> +++ b/include/linux/bpf_verifier.h
>> @@ -201,6 +201,7 @@ static inline bool bpf_verifier_log_needed(const struct 
>> bpf_verifier_log *log)
>>   struct bpf_subprog_info {
>>  u32 start; /* insn idx of function entry point */
>>  u16 stack_depth; /* max. stack depth used by this function */
>> +u32 type_id; /* btf type_id for this subprog */
>>   };
>>   
>>   /* single container for all structs
>> diff --git a/include/linux/btf.h b/include/linux/btf.h
>> index e076c4697049..90e91b52aa90 100644
>> --- a/include/linux/btf.h
>> +++ b/include/linux/btf.h
>> @@ -46,5 +46,7 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, 
>> void *obj,
>> struct seq_file *m);
>>   int btf_get_fd_by_id(u32 id);
>>   u32 btf_id(const struct btf *btf);
>> +bool is_btf_func_type(const struct btf *btf, u32 type_id);
>> +const char *btf_get_name_by_id(const struct btf *btf, u32 type_id);
>>   
>>   #endif
>> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
>> index f9187b41dff6..7ebbf4f06a65 100644
>> --- a/include/uapi/linux/bpf.h
>> +++ b/include/uapi/linux/bpf.h
>> @@ -332,6 +332,9 @@ union bpf_attr {
>>   * (context accesses, allowed helpers, etc).
>>   */
>>  __u32   expected_attach_type;
>> +__u32   prog_btf_fd;/* fd pointing to BTF type data 
>> */
>> +__u32   func_info_len;  /* func_info length */
>> +__aligned_u64   func_info;  /* func type info */
>>  };
>>   
>>  struct { /* anonymous struct used by BPF_OBJ_* commands */
>> @@ -2585,6 +2588,9 @@ struct bpf_prog_info {
>>  __u32 nr_jited_func_lens;
>>  __aligned_u64 jited_ksyms;
>>  __aligned_u64 jited_func_lens;
>> +__u32 btf_id;
>> +__u32 nr_jited_func_types;
>> +__aligned_u64 jited_func_types;
>>   } __attribute__((aligned(8)));
>>   
>>   struct bpf_map_info {
>> @@ -2896,4 +2902,9 @@ struct bpf_flow_keys {
>>  };
>>   };
>>   
>> +struct bpf_func_info {
>> +__u32   insn_offset;
>> +__u32   type_id;
>> +};
>> +
>>   #endif /* _UAPI__LINUX_BPF_H__ */
>> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
>> index 794a185f11bf..85b8eeccddbd 100644
>> --- a/kernel/bpf/btf.c
>> +++ b/kernel/bpf/btf.c
>> @@ -486,6 +486,15 @@ static const struct btf_type *btf_typ

Re: [PATCH bpf-next 2/3] bpf: emit RECORD_MMAP events for bpf prog load/unload

2018-10-15 Thread Song Liu
m, so users looking at kernel stack traces
> with bpf_prog_TAG can find the source.
> It's similar to build-id, but not going to help perf to annotate
> actual x86 instructions inside JITed image and show src code.
> Since JIT runs in the kernel this problem cannot be solved by user space only.
> It's a difficult problem and we have a plan to tackle that,
> but it's step 2. A bunch of infra is needed on bpf side to
> preserve the association during src_in_C -> original bpf insns ->
> translated bpf insns -> JITed asm.
> Then report it back to user space and then teach perf to properly annotate 
> progs.
>
> > Will the JITed code from some BPF bytecode be different if the same
> > bytecode is JITed in several machines, all having the exact same
> > hardware?
>
> Yes. JITed code will be different depending on sysctl knobs (like const 
> blinding)
> So the same original bpf byte stream loaded at different times
> may have different JITed image.
>
> Even without security features like blinding the JIT can be different.
> the bpf maps are separate from bpf progs. The bpf map is created first.
> Then the same bpf instruction stream (depending on map type that it uses)
> may be optimized by the verifier differently causing difference in JIT.
>
> > > (instead of passing kallsyms's bpf prog name in event->mmap.filename)
> > > but bpf functions don't have their own prog_id. Multiple bpf funcs
> > > with different JITed blobs are considered to be a part of single prog_id.
> > > So as a step one I'm only extending RECORD_MMAP with addr and kallsym
> > > name of bpf function/prog.
> > > As a step two the plan is to add notification mechanism for prog 
> > > load/unload
> > > that will include prog_id and design new synthetic RECORD_* events in
> > > perf user space that will contain JITed code, line info, BTF, etc.
> >
> > So, will the kernel JIT a bytecode, load it somewhere and run it, then,
> > when unloading it, keep it somewhere (a filesystem with some limits,
> > etc) so that at some later time (with some timeouts) tooling can, using
> > its id/buildid cookie get the contents and symbol table to have a better
> > annotation experience?
>
> yes. The plan is to let perf fetch JITed image via BPF_OBJ_GET_INFO_BY_FD cmd
> during perf record run and store it inside perf.data in synthetic records.
> Then perf report can annotate it later.

Hi Peter and Arnaldo,

I am working with Alexei on the idea of fetching BPF program information via
BPF_OBJ_GET_INFO_BY_FD cmd. I added PERF_RECORD_BPF_EVENT
to perf_event_type, and dumped these events to perf event ring buffer.

I found that perf will not process event until the end of perf-record:

root@virt-test:~# ~/perf record -ag -- sleep 10
.. 10 seconds later
[ perf record: Woken up 34 times to write data ]
machine__process_bpf_event: prog_id 6 loaded
machine__process_bpf_event: prog_id 6 unloaded
[ perf record: Captured and wrote 9.337 MB perf.data (93178 samples) ]

In this example, the bpf program was loaded and then unloaded in
another terminal. When machine__process_bpf_event() processes
the load event, the bpf program is already unloaded. Therefore,
machine__process_bpf_event() will not be able to get information
about the program via BPF_OBJ_GET_INFO_BY_FD cmd.

To solve this problem, we will need to run BPF_OBJ_GET_INFO_BY_FD
as soon as perf get the event from kernel. I looked around the perf
code for a while. But I haven't found a good example where some
events are processed before the end of perf-record. Could you
please help me with this?

Thanks,
Song


[PATCH bpf-next 11/13] tools/bpf: refactor to implement btf_get_from_id() in lib/bpf

2018-10-12 Thread Yonghong Song
The function get_btf() is implemented in tools/bpf/bpftool/map.c
to get a btf structure given a map_info. This patch
refactored this function to be function btf_get_from_id()
in tools/lib/bpf so that it can be used later.

Signed-off-by: Yonghong Song 
---
 tools/bpf/bpftool/map.c | 68 ++--
 tools/lib/bpf/btf.c | 69 +
 tools/lib/bpf/btf.h |  1 +
 3 files changed, 72 insertions(+), 66 deletions(-)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 9f5de48f8a99..33c7dc8ddd86 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -203,70 +203,6 @@ static int do_dump_btf(const struct btf_dumper *d,
return ret;
 }
 
-static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
-{
-   struct bpf_btf_info btf_info = { 0 };
-   __u32 len = sizeof(btf_info);
-   __u32 last_size;
-   int btf_fd;
-   void *ptr;
-   int err;
-
-   err = 0;
-   *btf = NULL;
-   btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id);
-   if (btf_fd < 0)
-   return 0;
-
-   /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
-* let's start with a sane default - 4KiB here - and resize it only if
-* bpf_obj_get_info_by_fd() needs a bigger buffer.
-*/
-   btf_info.btf_size = 4096;
-   last_size = btf_info.btf_size;
-   ptr = malloc(last_size);
-   if (!ptr) {
-   err = -ENOMEM;
-   goto exit_free;
-   }
-
-   bzero(ptr, last_size);
-   btf_info.btf = ptr_to_u64(ptr);
-   err = bpf_obj_get_info_by_fd(btf_fd, _info, );
-
-   if (!err && btf_info.btf_size > last_size) {
-   void *temp_ptr;
-
-   last_size = btf_info.btf_size;
-   temp_ptr = realloc(ptr, last_size);
-   if (!temp_ptr) {
-   err = -ENOMEM;
-   goto exit_free;
-   }
-   ptr = temp_ptr;
-   bzero(ptr, last_size);
-   btf_info.btf = ptr_to_u64(ptr);
-   err = bpf_obj_get_info_by_fd(btf_fd, _info, );
-   }
-
-   if (err || btf_info.btf_size > last_size) {
-   err = errno;
-   goto exit_free;
-   }
-
-   *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
-   if (IS_ERR(*btf)) {
-   err = PTR_ERR(*btf);
-   *btf = NULL;
-   }
-
-exit_free:
-   close(btf_fd);
-   free(ptr);
-
-   return err;
-}
-
 static json_writer_t *get_btf_writer(void)
 {
json_writer_t *jw = jsonw_new(stdout);
@@ -753,7 +689,7 @@ static int do_dump(int argc, char **argv)
 
prev_key = NULL;
 
-   err = get_btf(, );
+   err = btf_get_from_id(info.btf_id, );
if (err) {
p_err("failed to get btf");
goto exit_free;
@@ -897,7 +833,7 @@ static int do_lookup(int argc, char **argv)
}
 
/* here means bpf_map_lookup_elem() succeeded */
-   err = get_btf(, );
+   err = btf_get_from_id(info.btf_id, );
if (err) {
p_err("failed to get btf");
goto exit_free;
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 4748e0bacd2b..ab654628e966 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -42,6 +42,11 @@ struct btf_ext {
__u32 func_info_len;
 };
 
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+   return (__u64) (unsigned long) ptr;
+}
+
 static int btf_add_type(struct btf *btf, struct btf_type *t)
 {
if (btf->types_size - btf->nr_types < 2) {
@@ -403,6 +408,70 @@ const char *btf__name_by_offset(const struct btf *btf, 
__u32 offset)
return NULL;
 }
 
+int btf_get_from_id(__u32 id, struct btf **btf)
+{
+   struct bpf_btf_info btf_info = { 0 };
+   __u32 len = sizeof(btf_info);
+   __u32 last_size;
+   int btf_fd;
+   void *ptr;
+   int err;
+
+   err = 0;
+   *btf = NULL;
+   btf_fd = bpf_btf_get_fd_by_id(id);
+   if (btf_fd < 0)
+   return 0;
+
+   /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+* let's start with a sane default - 4KiB here - and resize it only if
+* bpf_obj_get_info_by_fd() needs a bigger buffer.
+*/
+   btf_info.btf_size = 4096;
+   last_size = btf_info.btf_size;
+   ptr = malloc(last_size);
+   if (!ptr) {
+   err = -ENOMEM;
+   goto exit_free;
+   }
+
+   bzero(ptr, last_size);
+   btf_info.btf = ptr_to_u64(ptr);
+   err = bpf_obj_get_info_by_fd(btf_fd, _info, );
+
+   if (!err && btf_info.btf_size > last_size) {
+   void *temp_ptr;
+
+   last_size = btf_info.btf_size;
+   temp_ptr = realloc(ptr, last_size);
+   if (!temp_ptr) {
+   

[PATCH bpf-next 12/13] tools/bpf: enhance test_btf file testing to test func info

2018-10-12 Thread Yonghong Song
Change the bpf programs test_btf_haskv.c and test_btf_nokv.c to
have two sections, and enhance test_btf.c test_file feature
to test btf func_info returned by the kernel.

Signed-off-by: Yonghong Song 
---
 tools/testing/selftests/bpf/test_btf.c   | 72 +++-
 tools/testing/selftests/bpf/test_btf_haskv.c | 16 -
 tools/testing/selftests/bpf/test_btf_nokv.c  | 16 -
 3 files changed, 99 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c 
b/tools/testing/selftests/bpf/test_btf.c
index e03a8cea4bb7..0bbefb571426 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -2235,10 +2235,16 @@ static int file_has_btf_elf(const char *fn)
 static int do_test_file(unsigned int test_num)
 {
const struct btf_file_test *test = _tests[test_num - 1];
+   const char *expected_fnames[] = {"_dummy_tracepoint",
+"test_long_fname_1",
+"test_long_fname_2"};
+   __u32 func_lens[10], func_types[10], info_len;
+   struct bpf_prog_info info = {};
struct bpf_object *obj = NULL;
struct bpf_program *prog;
+   struct btf *btf = NULL;
struct bpf_map *map;
-   int err;
+   int i, err, prog_fd;
 
fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
test->file);
@@ -2271,6 +2277,7 @@ static int do_test_file(unsigned int test_num)
err = bpf_object__load(obj);
if (CHECK(err < 0, "bpf_object__load: %d", err))
goto done;
+   prog_fd = bpf_program__fd(prog);
 
map = bpf_object__find_map_by_name(obj, "btf_map");
if (CHECK(!map, "btf_map not found")) {
@@ -2285,6 +2292,69 @@ static int do_test_file(unsigned int test_num)
  test->btf_kv_notfound))
goto done;
 
+   if (!jit_enabled)
+   goto skip_jit;
+
+   info_len = sizeof(struct bpf_prog_info);
+   info.nr_jited_func_types = ARRAY_SIZE(func_types);
+   info.nr_jited_func_lens = ARRAY_SIZE(func_lens);
+   info.jited_func_types = ptr_to_u64(_types[0]);
+   info.jited_func_lens = ptr_to_u64(_lens[0]);
+
+   err = bpf_obj_get_info_by_fd(prog_fd, , _len);
+
+   if (CHECK(err == -1, "invalid get info errno:%d", errno)) {
+   fprintf(stderr, "%s\n", btf_log_buf);
+   err = -1;
+   goto done;
+   }
+   if (CHECK(info.nr_jited_func_lens != 3,
+ "incorrect info.nr_jited_func_lens %d",
+ info.nr_jited_func_lens)) {
+   err = -1;
+   goto done;
+   }
+   if (CHECK(info.nr_jited_func_types != 3,
+ "incorrect info.nr_jited_func_types %d",
+ info.nr_jited_func_types)) {
+   err = -1;
+   goto done;
+   }
+   if (CHECK(info.btf_id == 0, "incorrect btf_id = 0")) {
+   err = -1;
+   goto done;
+   }
+
+   err = btf_get_from_id(info.btf_id, );
+   if (CHECK(err, "cannot get btf from kernel, err: %d", err))
+   goto done;
+
+   /* check three functions */
+   for (i = 0; i < 3; i++) {
+   const struct btf_type *t;
+   const char *fname;
+
+   t = btf__type_by_id(btf, func_types[i]);
+   if (CHECK(!t, "btf__type_by_id failure: id %u",
+ func_types[i])) {
+   err = -1;
+   goto done;
+   }
+
+   fname = btf__name_by_offset(btf, t->name_off);
+   err = strcmp(fname, expected_fnames[i]);
+   /* for the second and third functions in .text section,
+* the compiler may order them either way.
+*/
+   if (i && err)
+   err = strcmp(fname, expected_fnames[3 - i]);
+   if (CHECK(err, "incorrect fname %s", fname ? : "")) {
+   err = -1;
+   goto done;
+   }
+   }
+
+skip_jit:
fprintf(stderr, "OK");
 
 done:
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c 
b/tools/testing/selftests/bpf/test_btf_haskv.c
index b21b876f475d..e5c79fe0ffdb 100644
--- a/tools/testing/selftests/bpf/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -24,8 +24,8 @@ struct dummy_tracepoint_args {
struct sock *sock;
 };
 
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+__attribute__((noinline))
+static int test_long_fname_2(struct dummy_tracepoint_args *arg)
 {
struct ipv_counts *counts;
int key = 0;
@@ -42,4 +42,16 @@ int _dummy_tracepoint(struct dummy_tracepoint

[PATCH bpf-next 13/13] tools/bpf: bpftool: add support for jited func types

2018-10-12 Thread Yonghong Song
This patch added support to print function signature
if btf func_info is available. Note that ksym
now uses function name instead of prog_name as
prog_name has a limit of 16 bytes including
ending '\0'.

The following is a sample output for selftests
test_btf with file test_btf_haskv.o:

  $ bpftool prog dump jited id 1
  int _dummy_tracepoint(struct dummy_tracepoint_args * ):
  bpf_prog_b07ccb89267cf242__dummy_tracepoint:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3c:   add$0x28,%rbp
40:   leaveq
41:   retq

  int test_long_fname_1(struct dummy_tracepoint_args * ):
  bpf_prog_2dcecc18072623fc_test_long_fname_1:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3a:   add$0x28,%rbp
3e:   leaveq
3f:   retq

  int test_long_fname_2(struct dummy_tracepoint_args * ):
  bpf_prog_89d64e4abf0f0126_test_long_fname_2:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
80:   add$0x28,%rbp
84:   leaveq
85:   retq

Signed-off-by: Yonghong Song 
---
 tools/bpf/bpftool/btf_dumper.c | 96 ++
 tools/bpf/bpftool/main.h   |  2 +
 tools/bpf/bpftool/prog.c   | 54 +++
 3 files changed, 152 insertions(+)

diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 55bc512a1831..a31df4202335 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -249,3 +249,99 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 
type_id,
 {
return btf_dumper_do_type(d, type_id, 0, data);
 }
+
+#define BTF_PRINT_STRING(str)  \
+   {   \
+   pos += snprintf(func_sig + pos, size - pos, str);   \
+   if (pos >= size)\
+   return -1;  \
+   }
+#define BTF_PRINT_ONE_ARG(fmt, arg)\
+   {   \
+   pos += snprintf(func_sig + pos, size - pos, fmt, arg);  \
+   if (pos >= size)\
+   return -1;  \
+   }
+#define BTF_PRINT_TYPE_ONLY(type)  \
+   {   \
+   pos = __btf_dumper_type_only(btf, type, func_sig,   \
+pos, size);\
+   if (pos == -1)  \
+   return -1;  \
+   }
+
+static int __btf_dumper_type_only(struct btf *btf, __u32 type_id,
+ char *func_sig, int pos, int size)
+{
+   const struct btf_type *t = btf__type_by_id(btf, type_id);
+   const struct btf_array *array;
+   int i, vlen;
+
+   switch (BTF_INFO_KIND(t->info)) {
+   case BTF_KIND_INT:
+   BTF_PRINT_ONE_ARG("%s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_STRUCT:
+   BTF_PRINT_ONE_ARG("struct %s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_UNION:
+   BTF_PRINT_ONE_ARG("union %s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_ENUM:
+   BTF_PRINT_ONE_ARG("enum %s ",
+ btf__name_by_offset(btf, t->name_off));
+   break;
+   case BTF_KIND_ARRAY:
+   array = (struct btf_array *)(t + 1);
+   BTF_PRINT_TYPE_ONLY(array->type);
+   BTF_PRINT_ONE_ARG("[%d]", array->nelems);
+   break;
+   case BTF_KIND_PTR:
+   BTF_PRINT_TYPE_ONLY(t->type);
+   BTF_PRINT_STRING("* ");
+   break;
+   case BTF_KIND_UNKN:
+   case BTF_KIND_FWD:
+   case BTF_KIND_TYPEDEF:
+   return -1;
+   case BTF_KIND_VOLATILE:
+   BTF_PRINT_STRING("volatile ");
+   BTF_PRINT_TYPE_ONLY(t->type);
+   break;
+   case BTF_KIND_CONST:
+   BTF_PRINT_STRING("const ");
+   BTF_PRINT_TYPE_ONLY(t->type);
+   break;
+   case BTF_KIND_RESTRICT:
+   BTF_PRINT_STRING("restrict ");
+   BTF_PRINT_TYPE_ONLY(t->type);
+   break;
+   case BTF_KIND_FUNC:
+   case BTF_KIND_FUNC_PROTO:
+   BTF_PRINT_TYPE_ONLY(t->type);
+   BTF_PRINT_ONE_ARG("%s(", btf__name_by_offset

[PATCH bpf-next 10/13] tools/bpf: do not use pahole if clang/llvm can generate BTF sections

2018-10-12 Thread Yonghong Song
Add additional checks in tools/testing/selftests/bpf and
samples/bpf such that if clang/llvm compiler can generate
BTF sections, do not use pahole.

Signed-off-by: Yonghong Song 
---
 samples/bpf/Makefile | 8 
 tools/testing/selftests/bpf/Makefile | 8 
 2 files changed, 16 insertions(+)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index be0a961450bc..870fe7ee2b69 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -208,12 +208,20 @@ endif
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+ clang -target bpf -O2 -g -c -x c - -o 
./llvm_btf_verify.o; \
+ readelf -S ./llvm_btf_verify.o | grep BTF; \
+ /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+   EXTRA_CFLAGS += -g
+else
 ifneq ($(and $(BTF_LLC_PROBE),$(BTF_PAHOLE_PROBE),$(BTF_OBJCOPY_PROBE)),)
EXTRA_CFLAGS += -g
LLC_FLAGS += -mattr=dwarfris
DWARF2BTF = y
 endif
+endif
 
 # Trick to allow make to be run from this directory
 all:
diff --git a/tools/testing/selftests/bpf/Makefile 
b/tools/testing/selftests/bpf/Makefile
index d24afe8b821d..83240e54c6a8 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -119,7 +119,14 @@ $(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
 BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
 BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
 BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 
'usage.*llvm')
+BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
+ clang -target bpf -O2 -g -c -x c - -o 
./llvm_btf_verify.o; \
+ readelf -S ./llvm_btf_verify.o | grep BTF; \
+ /bin/rm -f ./llvm_btf_verify.o)
 
+ifneq ($(BTF_LLVM_PROBE),)
+   CLANG_FLAGS += -g
+else
 ifneq ($(BTF_LLC_PROBE),)
 ifneq ($(BTF_PAHOLE_PROBE),)
 ifneq ($(BTF_OBJCOPY_PROBE),)
@@ -129,6 +136,7 @@ ifneq ($(BTF_OBJCOPY_PROBE),)
 endif
 endif
 endif
+endif
 
 $(OUTPUT)/%.o: %.c
$(CLANG) $(CLANG_FLAGS) \
-- 
2.17.1



[PATCH bpf-next 05/13] bpf: get better bpf_prog ksyms based on btf func type_id

2018-10-12 Thread Yonghong Song
This patch added interface to load a program with the following
additional information:
   . prog_btf_fd
   . func_info and func_info_len
where func_info will provides function range and type_id
corresponding to each function.

If verifier agrees with function range provided by the user,
the bpf_prog ksym for each function will use the func name
provided in the type_id, which is supposed to provide better
encoding as it is not limited by 16 bytes program name
limitation and this is better for bpf program which contains
multiple subprograms.

The bpf_prog_info interface is also extended to
return btf_id and jited_func_types, so user spaces can
print out the function prototype for each jited function.

Signed-off-by: Yonghong Song 
---
 include/linux/bpf.h  |  2 +
 include/linux/bpf_verifier.h |  1 +
 include/linux/btf.h  |  2 +
 include/uapi/linux/bpf.h | 11 +
 kernel/bpf/btf.c | 16 +++
 kernel/bpf/core.c|  9 
 kernel/bpf/syscall.c | 86 +++-
 kernel/bpf/verifier.c| 50 +
 8 files changed, 176 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9b558713447f..e9c63ffa01af 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -308,6 +308,8 @@ struct bpf_prog_aux {
void *security;
 #endif
struct bpf_prog_offload *offload;
+   struct btf *btf;
+   u32 type_id; /* type id for this prog/func */
union {
struct work_struct work;
struct rcu_head rcu;
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 9e8056ec20fa..e84782ec50ac 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -201,6 +201,7 @@ static inline bool bpf_verifier_log_needed(const struct 
bpf_verifier_log *log)
 struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u16 stack_depth; /* max. stack depth used by this function */
+   u32 type_id; /* btf type_id for this subprog */
 };
 
 /* single container for all structs
diff --git a/include/linux/btf.h b/include/linux/btf.h
index e076c4697049..90e91b52aa90 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -46,5 +46,7 @@ void btf_type_seq_show(const struct btf *btf, u32 type_id, 
void *obj,
   struct seq_file *m);
 int btf_get_fd_by_id(u32 id);
 u32 btf_id(const struct btf *btf);
+bool is_btf_func_type(const struct btf *btf, u32 type_id);
+const char *btf_get_name_by_id(const struct btf *btf, u32 type_id);
 
 #endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f9187b41dff6..7ebbf4f06a65 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -332,6 +332,9 @@ union bpf_attr {
 * (context accesses, allowed helpers, etc).
 */
__u32   expected_attach_type;
+   __u32   prog_btf_fd;/* fd pointing to BTF type data 
*/
+   __u32   func_info_len;  /* func_info length */
+   __aligned_u64   func_info;  /* func type info */
};
 
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2585,6 +2588,9 @@ struct bpf_prog_info {
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
+   __u32 btf_id;
+   __u32 nr_jited_func_types;
+   __aligned_u64 jited_func_types;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -2896,4 +2902,9 @@ struct bpf_flow_keys {
};
 };
 
+struct bpf_func_info {
+   __u32   insn_offset;
+   __u32   type_id;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 794a185f11bf..85b8eeccddbd 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -486,6 +486,15 @@ static const struct btf_type *btf_type_by_id(const struct 
btf *btf, u32 type_id)
return btf->types[type_id];
 }
 
+bool is_btf_func_type(const struct btf *btf, u32 type_id)
+{
+   const struct btf_type *type = btf_type_by_id(btf, type_id);
+
+   if (!type || BTF_INFO_KIND(type->info) != BTF_KIND_FUNC)
+   return false;
+   return true;
+}
+
 /*
  * Regular int is not a bit field and it must be either
  * u8/u16/u32/u64.
@@ -2579,3 +2588,10 @@ u32 btf_id(const struct btf *btf)
 {
return btf->id;
 }
+
+const char *btf_get_name_by_id(const struct btf *btf, u32 type_id)
+{
+   const struct btf_type *t = btf_type_by_id(btf, type_id);
+
+   return btf_name_by_offset(btf, t->name_off);
+}
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 3f5bf1af0826..add3866a120e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -27,6 +27,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -387,6 +388,7 @@ bpf_get_prog_addr_region(const struct bpf_prog *prog,
 static void bpf_get_prog_name(

[PATCH bpf-next 06/13] tools/bpf: sync kernel uapi bpf.h header to tools directory

2018-10-12 Thread Yonghong Song
The kernel uapi bpf.h is synced to tools directory.

Signed-off-by: Yonghong Song 
---
 tools/include/uapi/linux/bpf.h | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f9187b41dff6..7ebbf4f06a65 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -332,6 +332,9 @@ union bpf_attr {
 * (context accesses, allowed helpers, etc).
 */
__u32   expected_attach_type;
+   __u32   prog_btf_fd;/* fd pointing to BTF type data 
*/
+   __u32   func_info_len;  /* func_info length */
+   __aligned_u64   func_info;  /* func type info */
};
 
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -2585,6 +2588,9 @@ struct bpf_prog_info {
__u32 nr_jited_func_lens;
__aligned_u64 jited_ksyms;
__aligned_u64 jited_func_lens;
+   __u32 btf_id;
+   __u32 nr_jited_func_types;
+   __aligned_u64 jited_func_types;
 } __attribute__((aligned(8)));
 
 struct bpf_map_info {
@@ -2896,4 +2902,9 @@ struct bpf_flow_keys {
};
 };
 
+struct bpf_func_info {
+   __u32   insn_offset;
+   __u32   type_id;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
-- 
2.17.1



[PATCH bpf-next 07/13] tools/bpf: add new fields for program load in lib/bpf

2018-10-12 Thread Yonghong Song
The new fields are added for program load in lib/bpf so
application uses api bpf_load_program_xattr() is able
to load program with btf and func_info data.

This functionality will be used in next patch
by bpf selftest test_btf.

Signed-off-by: Yonghong Song 
---
 tools/lib/bpf/bpf.c | 3 +++
 tools/lib/bpf/bpf.h | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index d70a255cb05e..d8d48ab34220 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -196,6 +196,9 @@ int bpf_load_program_xattr(const struct 
bpf_load_program_attr *load_attr,
attr.log_level = 0;
attr.kern_version = load_attr->kern_version;
attr.prog_ifindex = load_attr->prog_ifindex;
+   attr.prog_btf_fd = load_attr->prog_btf_fd;
+   attr.func_info_len = load_attr->func_info_len;
+   attr.func_info = ptr_to_u64(load_attr->func_info);
memcpy(attr.prog_name, load_attr->name,
   min(name_len, BPF_OBJ_NAME_LEN - 1));
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 87520a87a75f..d8fe72b22168 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -67,6 +67,9 @@ struct bpf_load_program_attr {
const char *license;
__u32 kern_version;
__u32 prog_ifindex;
+   __u32 prog_btf_fd;
+   __u32 func_info_len;
+   const struct bpf_func_info *func_info;
 };
 
 /* Recommend log buffer size */
-- 
2.17.1



[PATCH bpf-next 03/13] tools/bpf: sync kernel btf.h header

2018-10-12 Thread Yonghong Song
The kernel uapi btf.h is synced to the tools directory.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 tools/include/uapi/linux/btf.h | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 972265f32871..63f8500e6f34 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -40,7 +40,8 @@ struct btf_type {
/* "size" is used by INT, ENUM, STRUCT and UNION.
 * "size" tells the size of the type it is describing.
 *
-* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+* FUNC and FUNC_PROTO.
 * "type" is a type_id referring to another type.
 */
union {
@@ -64,8 +65,10 @@ struct btf_type {
 #define BTF_KIND_VOLATILE  9   /* Volatile */
 #define BTF_KIND_CONST 10  /* Const*/
 #define BTF_KIND_RESTRICT  11  /* Restrict */
-#define BTF_KIND_MAX   11
-#define NR_BTF_KINDS   12
+#define BTF_KIND_FUNC  12  /* Function */
+#define BTF_KIND_FUNC_PROTO13  /* Function Prototype   */
+#define BTF_KIND_MAX   13
+#define NR_BTF_KINDS   14
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
-- 
2.17.1



[PATCH bpf-next 08/13] tools/bpf: extends test_btf to test load/retrieve func_type info

2018-10-12 Thread Yonghong Song
A two function bpf program is loaded with btf and func_info.
After successful prog load, the bpf_get_info syscall is called
to retrieve prog info to ensure the types returned from the
kernel matches the types passed to the kernel from the
user space.

Several negative tests are also added to test loading/retriving
of func_type info.

Signed-off-by: Yonghong Song 
---
 tools/testing/selftests/bpf/test_btf.c | 278 -
 1 file changed, 275 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_btf.c 
b/tools/testing/selftests/bpf/test_btf.c
index b6461c3c5e11..e03a8cea4bb7 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -22,9 +23,13 @@
 #include "bpf_rlimit.h"
 #include "bpf_util.h"
 
+#define MAX_INSNS  512
+#define MAX_SUBPROGS   16
+
 static uint32_t pass_cnt;
 static uint32_t error_cnt;
 static uint32_t skip_cnt;
+static bool jit_enabled;
 
 #define CHECK(condition, format...) ({ \
int __ret = !!(condition);  \
@@ -60,6 +65,24 @@ static int __base_pr(const char *format, ...)
return err;
 }
 
+static bool is_jit_enabled(void)
+{
+   const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+   bool enabled = false;
+   int sysctl_fd;
+
+   sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
+   if (sysctl_fd != -1) {
+   char tmpc;
+
+   if (read(sysctl_fd, , sizeof(tmpc)) == 1)
+   enabled = (tmpc != '0');
+   close(sysctl_fd);
+   }
+
+   return enabled;
+}
+
 #define BTF_INFO_ENC(kind, root, vlen) \
((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
 
@@ -103,6 +126,7 @@ static struct args {
bool get_info_test;
bool pprint_test;
bool always_log;
+   bool func_type_test;
 } args;
 
 static char btf_log_buf[BTF_LOG_BUF_SIZE];
@@ -2693,16 +2717,256 @@ static int test_pprint(void)
return err;
 }
 
+static struct btf_func_type_test {
+   const char *descr;
+   const char *str_sec;
+   __u32 raw_types[MAX_NR_RAW_TYPES];
+   __u32 str_sec_size;
+   struct bpf_insn insns[MAX_INSNS];
+   __u32 prog_type;
+   struct bpf_func_info func_info[MAX_SUBPROGS];
+   __u32 func_info_len;
+   bool expected_prog_load_failure;
+} func_type_test[] = {
+
+{
+   .descr = "func_type test #1",
+   .str_sec = "\0int\0unsigned int\0funcA\0funcB",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),   /* [2] */
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 1),  
/* [3] */
+   1, 2,
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 1),  
/* [4] */
+   2, 1,
+   BTF_END_RAW,
+   },
+   .str_sec_size = sizeof("\0int\0unsigned int\0funcA\0funcB"),
+   .insns = {
+   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+   BPF_MOV64_IMM(BPF_REG_0, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(BPF_REG_0, 2),
+   BPF_EXIT_INSN(),
+   },
+   .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+   .func_info = { {0, 3}, {3, 4} },
+   .func_info_len = 2 * sizeof(struct bpf_func_info),
+},
+
+{
+   .descr = "func_type test #2",
+   .str_sec = "\0int\0unsigned int\0funcA\0funcB",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),   /* [2] */
+   /* incorrect func type */
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 
1),  /* [3] */
+   1, 2,
+   BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 2), 1),  
/* [4] */
+   2, 1,
+   BTF_END_RAW,
+   },
+   .str_sec_size = sizeof("\0int\0unsigned int\0funcA\0funcB"),
+   .insns = {
+   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+   BPF_MOV64_IMM(BPF_REG_0, 1),
+   BPF_EXIT_INSN(),
+   BPF_MOV64_IMM(BPF_REG_0, 2),
+   BPF_EXIT_INSN(),
+   },
+   .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+   .func_info = { {0, 3}, {3, 4} },
+   .func_info_len = 2 * sizeof(struct bpf_func_info),
+   .expected_prog_load_failure = true,
+},
+
+{
+   .descr = "func_type test #3",
+   .str_sec = "\0int\0unsigned int\0funcA\0funcB",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+ 

[PATCH bpf-next 02/13] bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO

2018-10-12 Thread Yonghong Song
This patch adds BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
support to the type section. BTF_KIND_FUNC_PROTO is used
to specify the type of a function pointer. With this,
BTF has a complete set of C types (except float).

BTF_KIND_FUNC is used to specify the signature of a
defined subprogram. BTF_KIND_FUNC_PROTO can be referenced
by another type, e.g., a pointer type, and BTF_KIND_FUNC
type cannot be referenced by another type.

For both BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO types,
the func return type is in t->type (where t is a
"struct btf_type" object). The func args are an array of
u32s immediately following object "t".

As a concrete example, for the C program below,
  $ cat test.c
  int foo(int (*bar)(int)) { return bar(5); }
with latest llvm trunk built with Debug mode, we have
  $ clang -target bpf -g -O2 -mllvm -debug-only=btf -c test.c
  Type Table:
  [1] FUNC name_off=1 info=0x0c01 size/type=2
  param_type=3
  [2] INT name_off=11 info=0x0100 size/type=4
  desc=0x0120
  [3] PTR name_off=0 info=0x0200 size/type=4
  [4] FUNC_PROTO name_off=0 info=0x0d01 size/type=2
  param_type=2

  String Table:
  0 :
  1 : foo
  5 : .text
  11 : int
  15 : test.c
  22 : int foo(int (*bar)(int)) { return bar(5); }

  FuncInfo Table:
  sec_name_off=5
  insn_offset= type_id=1

  ...

(Eventually we shall have bpftool to dump btf information
 like the above.)

Function "foo" has a FUNC type (type_id = 1).
The parameter of "foo" has type_id 3 which is PTR->FUNC_PROTO,
where FUNC_PROTO refers to function pointer "bar".

In FuncInfo Table, for section .text, the function,
with to-be-determined offset (marked as ),
has type_id=1 which refers to a FUNC type.
This way, the function signature is
available to both kernel and user space.
Here, the insn offset is not available during the dump time
as relocation is resolved pretty late in the compilation process.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 include/uapi/linux/btf.h |   9 +-
 kernel/bpf/btf.c | 277 ++-
 2 files changed, 250 insertions(+), 36 deletions(-)

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 972265f32871..63f8500e6f34 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -40,7 +40,8 @@ struct btf_type {
/* "size" is used by INT, ENUM, STRUCT and UNION.
 * "size" tells the size of the type it is describing.
 *
-* "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
+* FUNC and FUNC_PROTO.
 * "type" is a type_id referring to another type.
 */
union {
@@ -64,8 +65,10 @@ struct btf_type {
 #define BTF_KIND_VOLATILE  9   /* Volatile */
 #define BTF_KIND_CONST 10  /* Const*/
 #define BTF_KIND_RESTRICT  11  /* Restrict */
-#define BTF_KIND_MAX   11
-#define NR_BTF_KINDS   12
+#define BTF_KIND_FUNC  12  /* Function */
+#define BTF_KIND_FUNC_PROTO13  /* Function Prototype   */
+#define BTF_KIND_MAX   13
+#define NR_BTF_KINDS   14
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index be406d8906ce..794a185f11bf 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5,6 +5,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -259,6 +260,8 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_VOLATILE] = "VOLATILE",
[BTF_KIND_CONST]= "CONST",
[BTF_KIND_RESTRICT] = "RESTRICT",
+   [BTF_KIND_FUNC] = "FUNC",
+   [BTF_KIND_FUNC_PROTO]   = "FUNC_PROTO",
 };
 
 struct btf_kind_operations {
@@ -281,6 +284,9 @@ struct btf_kind_operations {
 static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS];
 static struct btf_type btf_void;
 
+static int btf_resolve(struct btf_verifier_env *env,
+  const struct btf_type *t, u32 type_id);
+
 static bool btf_type_is_modifier(const struct btf_type *t)
 {
/* Some of them is not strictly a C modifier
@@ -314,9 +320,20 @@ static bool btf_type_is_fwd(const struct btf_type *t)
return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
 }
 
+static bool btf_type_is_func(const struct btf_type *t)
+{
+   return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC;
+}
+
+static bool btf_type_is_func_proto(const struct btf_type *t)
+{
+   return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO;
+}
+
 static bool btf_type_nosize(const struct btf_type *t)
 {
-   return btf_type_is_void(t) || btf_type_is_fwd(t);
+   return btf_type_is_void(t) 

[PATCH bpf-next 01/13] bpf: btf: Break up btf_type_is_void()

2018-10-12 Thread Yonghong Song
This patch breaks up btf_type_is_void() into
btf_type_is_void() and btf_type_is_fwd().

It also adds btf_type_nosize() to better describe it is
testing a type has nosize info.

Signed-off-by: Martin KaFai Lau 
---
 kernel/bpf/btf.c | 37 ++---
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 378cef70341c..be406d8906ce 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -306,15 +306,22 @@ static bool btf_type_is_modifier(const struct btf_type *t)
 
 static bool btf_type_is_void(const struct btf_type *t)
 {
-   /* void => no type and size info.
-* Hence, FWD is also treated as void.
-*/
-   return t == _void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+   return t == _void;
+}
+
+static bool btf_type_is_fwd(const struct btf_type *t)
+{
+   return BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+}
+
+static bool btf_type_nosize(const struct btf_type *t)
+{
+   return btf_type_is_void(t) || btf_type_is_fwd(t);
 }
 
-static bool btf_type_is_void_or_null(const struct btf_type *t)
+static bool btf_type_nosize_or_null(const struct btf_type *t)
 {
-   return !t || btf_type_is_void(t);
+   return !t || btf_type_nosize(t);
 }
 
 /* union is only a special case of struct:
@@ -826,7 +833,7 @@ const struct btf_type *btf_type_id_size(const struct btf 
*btf,
u32 size = 0;
 
size_type = btf_type_by_id(btf, size_type_id);
-   if (btf_type_is_void_or_null(size_type))
+   if (btf_type_nosize_or_null(size_type))
return NULL;
 
if (btf_type_has_size(size_type)) {
@@ -842,7 +849,7 @@ const struct btf_type *btf_type_id_size(const struct btf 
*btf,
size = btf->resolved_sizes[size_type_id];
size_type_id = btf->resolved_ids[size_type_id];
size_type = btf_type_by_id(btf, size_type_id);
-   if (btf_type_is_void(size_type))
+   if (btf_type_nosize_or_null(size_type))
return NULL;
}
 
@@ -1164,7 +1171,7 @@ static int btf_modifier_resolve(struct btf_verifier_env 
*env,
}
 
/* "typedef void new_void", "const void"...etc */
-   if (btf_type_is_void(next_type))
+   if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type))
goto resolved;
 
if (!env_type_is_resolve_sink(env, next_type) &&
@@ -1178,7 +1185,7 @@ static int btf_modifier_resolve(struct btf_verifier_env 
*env,
 * pretty print).
 */
if (!btf_type_id_size(btf, _type_id, _type_size) &&
-   !btf_type_is_void(btf_type_id_resolve(btf, _type_id))) {
+   !btf_type_nosize(btf_type_id_resolve(btf, _type_id))) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1205,7 +1212,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
}
 
/* "void *" */
-   if (btf_type_is_void(next_type))
+   if (btf_type_is_void(next_type) || btf_type_is_fwd(next_type))
goto resolved;
 
if (!env_type_is_resolve_sink(env, next_type) &&
@@ -1235,7 +1242,7 @@ static int btf_ptr_resolve(struct btf_verifier_env *env,
}
 
if (!btf_type_id_size(btf, _type_id, _type_size) &&
-   !btf_type_is_void(btf_type_id_resolve(btf, _type_id))) {
+   !btf_type_nosize(btf_type_id_resolve(btf, _type_id))) {
btf_verifier_log_type(env, v->t, "Invalid type_id");
return -EINVAL;
}
@@ -1396,7 +1403,7 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->index_type */
index_type_id = array->index_type;
index_type = btf_type_by_id(btf, index_type_id);
-   if (btf_type_is_void_or_null(index_type)) {
+   if (btf_type_nosize_or_null(index_type)) {
btf_verifier_log_type(env, v->t, "Invalid index");
return -EINVAL;
}
@@ -1415,7 +1422,7 @@ static int btf_array_resolve(struct btf_verifier_env *env,
/* Check array->type */
elem_type_id = array->type;
elem_type = btf_type_by_id(btf, elem_type_id);
-   if (btf_type_is_void_or_null(elem_type)) {
+   if (btf_type_nosize_or_null(elem_type)) {
btf_verifier_log_type(env, v->t,
  "Invalid elem");
return -EINVAL;
@@ -1615,7 +1622,7 @@ static int btf_struct_resolve(struct btf_verifier_env 
*env,
const struct btf_type *member_type = btf_type_by_id(env->btf,
member_type_id);
 
-   if (btf_type_is_void_or_null(member_type)) {
+   if (btf_type_nosize_or_null(member_type)) {
btf_verifier_log_member(env, v->t, member,
"Invalid member");
return -EINVAL;
-- 
2.17.1



[PATCH bpf-next 04/13] tools/bpf: add btf func/func_proto unit tests in selftest test_btf

2018-10-12 Thread Yonghong Song
Add several BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
unit tests in bpf selftest test_btf.

Signed-off-by: Martin KaFai Lau 
Signed-off-by: Yonghong Song 
---
 tools/lib/bpf/btf.c|   4 +
 tools/testing/selftests/bpf/test_btf.c | 216 +
 2 files changed, 220 insertions(+)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 449591aa9900..33095fc1860b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -165,6 +165,10 @@ static int btf_parse_type_sec(struct btf *btf, 
btf_print_fn_t err_log)
case BTF_KIND_ENUM:
next_type += vlen * sizeof(struct btf_enum);
break;
+   case BTF_KIND_FUNC:
+   case BTF_KIND_FUNC_PROTO:
+   next_type += vlen * sizeof(int);
+   break;
case BTF_KIND_TYPEDEF:
case BTF_KIND_PTR:
case BTF_KIND_FWD:
diff --git a/tools/testing/selftests/bpf/test_btf.c 
b/tools/testing/selftests/bpf/test_btf.c
index f42b3396d622..b6461c3c5e11 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -1374,6 +1374,222 @@ static struct btf_raw_test raw_tests[] = {
.map_create_err = true,
 },
 
+{
+   .descr = "func pointer #1",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /* int (*func)(int, unsigned int) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 1),
/* [3] */
+   1, 2,
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),   /* [4] 
*/
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .key_type_id = 1,
+   .value_type_id = 1,
+   .max_entries = 4,
+},
+
+{
+   .descr = "func pointer #2",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /* void (*func)(int, unsigned int, ) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 3), 0),
/* [3] */
+   1, 2, 0,
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),   /* [4] 
*/
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .key_type_id = 1,
+   .value_type_id = 1,
+   .max_entries = 4,
+},
+
+{
+   .descr = "func pointer #3",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /* void (*func)(void, int, unsigned int) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 3), 0),
/* [3] */
+   1, 0, 2,
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),   /* [4] 
*/
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .key_size = sizeof(int),
+   .value_size = sizeof(int),
+   .key_type_id = 1,
+   .value_type_id = 1,
+   .max_entries = 4,
+   .btf_load_err = true,
+   .err_str = "Invalid arg#2",
+},
+
+{
+   .descr = "func pointer #4",
+   .raw_types = {
+   BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+   BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),   /* [2] */
+   /*
+* Testing:
+* BTF_KIND_CONST => BTF_KIND_TYPEDEF => BTF_KIND_PTR =>
+* BTF_KIND_FUNC_PROTO
+*/
+   /* typedef void (*func_ptr)(int, unsigned int) */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 5),/* [3] 
*/
+   /* const func_ptr */
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 3), /* [4] 
*/
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6),   /* [5] 
*/
+   BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 2), 0),
/* [6] */
+   1, 2,
+   BTF_END_RAW,
+   },
+   .str_sec = "",
+   .str_sec_size = sizeof(""),
+   .map_type = BPF_MAP_TYPE_ARRAY,
+   .map_name = "func_type_check_btf",
+   .ke

[PATCH bpf-next 09/13] tools/bpf: add support to read .BTF.ext sections

2018-10-12 Thread Yonghong Song
The .BTF section is already available to encode types.
These types can be used for map
pretty print. The whole .BTF will be passed to the
kernel as well for which kernel can verify and return
to the user space for pretty print etc.

Recently landed llvm patch "[BPF] Add BTF generation
for BPF target" (https://reviews.llvm.org/rL344366)
will generate .BTF section and one more section
.BTF.ext. The .BTF.ext section encodes function type
information and line information. For line information,
the actual source code is encoded in the section, which
makes compiler itself as an ideal place for section
generation.

The .BTF section does not depend on any other section,
and .BTF.ext has dependency on .BTF for strings and types.

The .BTF section can be directly loaded into the
kernel, and the .BTF.ext section cannot. The loader
may need to do some relocation and merging,
similar to merging multiple code sections, before
loading into the kernel.

In this patch, only func type info is processed.
The functionality is implemented in libbpf.
In this patch, the header for .BTF.ext is the same
as the one in LLVM
(https://github.com/llvm-mirror/llvm/blob/master/include/llvm/MC/MCBTFContext.h).

Signed-off-by: Yonghong Song 
---
 tools/lib/bpf/btf.c| 232 +
 tools/lib/bpf/btf.h|  31 ++
 tools/lib/bpf/libbpf.c |  53 +-
 3 files changed, 312 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 33095fc1860b..4748e0bacd2b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -37,6 +37,11 @@ struct btf {
int fd;
 };
 
+struct btf_ext {
+   void *func_info;
+   __u32 func_info_len;
+};
+
 static int btf_add_type(struct btf *btf, struct btf_type *t)
 {
if (btf->types_size - btf->nr_types < 2) {
@@ -397,3 +402,230 @@ const char *btf__name_by_offset(const struct btf *btf, 
__u32 offset)
else
return NULL;
 }
+
+static int btf_ext_validate_func_info(const struct btf_sec_func_info *sinfo,
+ __u32 size, btf_print_fn_t err_log)
+{
+   int sec_hdrlen = sizeof(struct btf_sec_func_info);
+   __u32 record_size = sizeof(struct bpf_func_info);
+   __u32 size_left = size, num_records;
+   __u64 total_record_size;
+
+   while (size_left) {
+   if (size_left < sec_hdrlen) {
+   elog("BTF.ext func_info header not found");
+   return -EINVAL;
+   }
+
+   num_records = sinfo->num_func_info;
+   if (num_records == 0) {
+   elog("incorrect BTF.ext num_func_info");
+   return -EINVAL;
+   }
+
+   total_record_size = sec_hdrlen +
+   (__u64)num_records * record_size;
+   if (size_left < total_record_size) {
+   elog("incorrect BTF.ext num_func_info");
+   return -EINVAL;
+   }
+
+   size_left -= total_record_size;
+   sinfo = (void *)sinfo + total_record_size;
+   }
+
+   return 0;
+}
+static int btf_ext_parse_hdr(__u8 *data, __u32 data_size,
+btf_print_fn_t err_log)
+{
+   const struct btf_ext_header *hdr = (struct btf_ext_header *)data;
+   const struct btf_sec_func_info *sinfo;
+   __u32 meta_left, last_func_info_pos;
+
+   if (data_size < sizeof(*hdr)) {
+   elog("BTF.ext header not found");
+   return -EINVAL;
+   }
+
+   if (hdr->magic != BTF_MAGIC) {
+   elog("Invalid BTF.ext magic:%x\n", hdr->magic);
+   return -EINVAL;
+   }
+
+   if (hdr->version != BTF_VERSION) {
+   elog("Unsupported BTF.ext version:%u\n", hdr->version);
+   return -ENOTSUP;
+   }
+
+   if (hdr->flags) {
+   elog("Unsupported BTF.ext flags:%x\n", hdr->flags);
+   return -ENOTSUP;
+   }
+
+   meta_left = data_size - sizeof(*hdr);
+   if (!meta_left) {
+   elog("BTF.ext has no data\n");
+   return -EINVAL;
+   }
+
+   if (meta_left < hdr->func_info_off) {
+   elog("Invalid BTF.ext func_info section offset:%u\n",
+hdr->func_info_off);
+   return -EINVAL;
+   }
+
+   if (hdr->func_info_off & 0x02) {
+   elog("BTF.ext func_info section is not aligned to 4 bytes\n");
+   return -EINVAL;
+   }
+
+   last_func_info_pos = sizeof(*hdr) + hdr->func_info_off +
+hdr->func_info_len;
+   if (last_func_info_pos > data_size) {
+   elog("Invalid BTF.ext func_i

[PATCH bpf-next 00/13] bpf: add btf func info support

2018-10-12 Thread Yonghong Song
The BTF support was added to kernel by Commit 69b693f0aefa
("bpf: btf: Introduce BPF Type Format (BTF)"), which introduced
.BTF section into ELF file and is primarily
used for map pretty print.
pahole is used to convert dwarf to BTF for ELF files.

The next step would be add func type info and debug line info
into BTF. For debug line info, it is desirable to encode
source code directly in the BTF to ease deployment and
introspection.

The func type and debug line info are relative to byte code offset.
Also since byte codes may need to be relocated by the loader,
func info and line info are placed in a different section,
.BTF.ext, so the loader could manupilate it according to how
byte codes are placed, before loading into the kernel.

LLVM commit https://reviews.llvm.org/rL344366 (in llvm trunk)
now can generate type/func/line info.
For the below example, with a llvm compiler built with Debug mode,
the following is generated:

  -bash-4.2$ cat test.c
  int foo(int (*bar)(int)) { return bar(5); }
  -bash-4.2$ clang -target bpf -g -O2 -mllvm -debug-only=btf -c test.c
  Type Table:
  [1] FUNC name_off=1 info=0x0c01 size/type=2
param_type=3
  [2] INT name_off=11 info=0x0100 size/type=4
desc=0x0120
  [3] PTR name_off=0 info=0x0200 size/type=4
  [4] FUNC_PROTO name_off=0 info=0x0d01 size/type=2
param_type=2

  String Table:
  0 : 
  1 : foo
  5 : .text
  11 : int
  15 : test.c
  22 : int foo(int (*bar)(int)) { return bar(5); }

  FuncInfo Table:
  sec_name_off=5
insn_offset= type_id=1

  LineInfo Table:
  sec_name_off=5
insn_offset= file_name_off=15 line_off=22 line_num=1 
column_num=0
insn_offset= file_name_off=15 line_off=22 line_num=1 
column_num=35
insn_offset= file_name_off=15 line_off=22 line_num=1 
column_num=28

In the above, type and string tables are in .BTF section, and
func and line info tables in .BTF.ext. The "" is the
insn offset which is not available during the dump time but
resolved during later compilation process.
Following the format specification at Patch #9 and examine the
raw data in .BTF.ext section, we have
  FuncInfo Table:
  sec_name_off=5
insn_offset=0 type_id=1

  LineInfo Table:
  sec_name_off=5
insn_offset=0  file_name_off=15 line_off=22 line_num=1 column_num=0
insn_offset=8  file_name_off=15 line_off=22 line_num=1 column_num=35
insn_offset=24 file_name_off=15 line_off=22 line_num=1 column_num=28
In the above insn_offset is the byte offset.

With this support, better ksym for bpf programs and functions can be
generated. Below is a demonstration from Patch #13.
  $ bpftool prog dump jited id 1
  int _dummy_tracepoint(struct dummy_tracepoint_args * ):
  bpf_prog_b07ccb89267cf242__dummy_tracepoint:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3c:   add$0x28,%rbp
40:   leaveq
41:   retq

  int test_long_fname_1(struct dummy_tracepoint_args * ):
  bpf_prog_2dcecc18072623fc_test_long_fname_1:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
3a:   add$0x28,%rbp
3e:   leaveq
3f:   retq

  int test_long_fname_2(struct dummy_tracepoint_args * ):
  bpf_prog_89d64e4abf0f0126_test_long_fname_2:
 0:   push   %rbp
 1:   mov%rsp,%rbp
..
80:   add$0x28,%rbp
84:   leaveq
85:   retq

For the patchset,
Patch #1  refactors the code to break up btf_type_is_void().
Patch #2  introduces new BTF types BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO.
Patch #3  syncs btf.h header to tools directory.
Patch #4  adds btf func/func_proto self tests in test_btf.
Patch #5  adds kernel interface to load func_info to kernel
  and pass func_info to userspace.
Patch #6  syncs bpf.h header to tools directory.
Patch #7  adds news btf/func_info related fields in libbpf
  program load function.
Patch #8  extends selftest test_btf to test load/retrieve func_type info.
Patch #9  adds .BTF.ext func info support.
Patch #10 changes Makefile to avoid using pahole if llvm is capable of
  generating BTF sections.
Patch #11 refactors to have btf_get_from_id() in libbpf for reuse.
Patch #12 enhance test_btf file testing to test func info.
Patch #13 adds bpftool support for func signature dump.

Yonghong Song (13):
  bpf: btf: Break up btf_type_is_void()
  bpf: btf: Add BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO
  tools/bpf: sync kernel btf.h header
  tools/bpf: add btf func/func_proto unit tests in selftest test_btf
  bpf: get better bpf_prog ksyms based on btf func type_id
  tools/bpf: sync kernel uapi bpf.h header to tools directory
  tools/bpf: add new fields for program load in lib/bpf
  tools/bpf: extends test_btf to test load/retrieve func_type info
  tools/bpf: add support to read .BTF.ext sections
  tools/bpf: do not use pahole if clang/llvm can generate BTF sections
  tools/bpf: refactor to implement btf_get_

Re: [PATCH bpf-next v2 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall

2018-10-10 Thread Song Liu
On Wed, Oct 10, 2018 at 10:48 AM Mauricio Vasquez
 wrote:
>
>
>
> On 10/10/2018 11:48 AM, Song Liu wrote:
> > On Wed, Oct 10, 2018 at 7:06 AM Mauricio Vasquez B
> >  wrote:
> >> The following patch implements a bpf queue/stack maps that
> >> provides the peek/pop/push functions.  There is not a direct
> >> relationship between those functions and the current maps
> >> syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added,
> >> this is mapped to the pop operation in the queue/stack maps
> >> and it is still to implement in other kind of maps.
> >>
> >> Signed-off-by: Mauricio Vasquez B 
> >> ---
> >>   include/linux/bpf.h  |1 +
> >>   include/uapi/linux/bpf.h |1 +
> >>   kernel/bpf/syscall.c |   82 
> >> ++
> >>   3 files changed, 84 insertions(+)
> >>
> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> >> index 9b558713447f..5793f0c7fbb5 100644
> >> --- a/include/linux/bpf.h
> >> +++ b/include/linux/bpf.h
> >> @@ -39,6 +39,7 @@ struct bpf_map_ops {
> >>  void *(*map_lookup_elem)(struct bpf_map *map, void *key);
> >>  int (*map_update_elem)(struct bpf_map *map, void *key, void 
> >> *value, u64 flags);
> >>  int (*map_delete_elem)(struct bpf_map *map, void *key);
> >> +   void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void 
> >> *key);
> >>
> >>  /* funcs called by prog_array and perf_event_array map */
> >>  void *(*map_fd_get_ptr)(struct bpf_map *map, struct file 
> >> *map_file,
> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> >> index f9187b41dff6..3bb94aa2d408 100644
> >> --- a/include/uapi/linux/bpf.h
> >> +++ b/include/uapi/linux/bpf.h
> >> @@ -103,6 +103,7 @@ enum bpf_cmd {
> >>  BPF_BTF_LOAD,
> >>  BPF_BTF_GET_FD_BY_ID,
> >>  BPF_TASK_FD_QUERY,
> >> +   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
> >>   };
> >>
> >>   enum bpf_map_type {
> >> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> >> index f36c080ad356..6907d661dea5 100644
> >> --- a/kernel/bpf/syscall.c
> >> +++ b/kernel/bpf/syscall.c
> >> @@ -980,6 +980,85 @@ static int map_get_next_key(union bpf_attr *attr)
> >>  return err;
> >>   }
> >>
> >> +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
> >> +
> >> +static int map_lookup_and_delete_elem(union bpf_attr *attr)
> >> +{
> >> +   void __user *ukey = u64_to_user_ptr(attr->key);
> >> +   void __user *uvalue = u64_to_user_ptr(attr->value);
> >> +   int ufd = attr->map_fd;
> >> +   struct bpf_map *map;
> >> +   void *key, *value, *ptr;
> >> +   u32 value_size;
> >> +   struct fd f;
> >> +   int err;
> >> +
> >> +   if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
> >> +   return -EINVAL;
> >> +
> >> +   f = fdget(ufd);
> >> +   map = __bpf_map_get(f);
> >> +   if (IS_ERR(map))
> >> +   return PTR_ERR(map);
> >> +
> >> +   if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
> >> +   err = -EPERM;
> >> +   goto err_put;
> >> +   }
> >> +
> >> +   key = __bpf_copy_key(ukey, map->key_size);
> >> +   if (IS_ERR(key)) {
> >> +   err = PTR_ERR(key);
> >> +   goto err_put;
> >> +   }
> >> +
> >> +   value_size = map->value_size;
> >> +
> >> +   err = -ENOMEM;
> >> +   value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
> >> +   if (!value)
> >> +   goto free_key;
> >> +
> >> +   err = -EFAULT;
> >> +   if (copy_from_user(value, uvalue, value_size) != 0)
> >> +   goto free_value;
> >> +
> >> +   /* must increment bpf_prog_active to avoid kprobe+bpf triggering 
> >> from
> >> +* inside bpf map update or delete otherwise deadlocks are possible
> >> +*/
> >> +   preempt_disable();
> >> +   __this_cpu_inc(bpf_prog_active);
> >> +   if (map->ops->map_lookup_and_delete_elem) {
> >> +   rcu_read_lock();
> >> +   ptr = ma

Re: [PATCH bpf-next v2 3/7] bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall

2018-10-10 Thread Song Liu
On Wed, Oct 10, 2018 at 7:06 AM Mauricio Vasquez B
 wrote:
>
> The following patch implements a bpf queue/stack maps that
> provides the peek/pop/push functions.  There is not a direct
> relationship between those functions and the current maps
> syscalls, hence a new MAP_LOOKUP_AND_DELETE_ELEM syscall is added,
> this is mapped to the pop operation in the queue/stack maps
> and it is still to implement in other kind of maps.
>
> Signed-off-by: Mauricio Vasquez B 
> ---
>  include/linux/bpf.h  |1 +
>  include/uapi/linux/bpf.h |1 +
>  kernel/bpf/syscall.c |   82 
> ++
>  3 files changed, 84 insertions(+)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 9b558713447f..5793f0c7fbb5 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -39,6 +39,7 @@ struct bpf_map_ops {
> void *(*map_lookup_elem)(struct bpf_map *map, void *key);
> int (*map_update_elem)(struct bpf_map *map, void *key, void *value, 
> u64 flags);
> int (*map_delete_elem)(struct bpf_map *map, void *key);
> +   void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void *key);
>
> /* funcs called by prog_array and perf_event_array map */
> void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index f9187b41dff6..3bb94aa2d408 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -103,6 +103,7 @@ enum bpf_cmd {
> BPF_BTF_LOAD,
> BPF_BTF_GET_FD_BY_ID,
> BPF_TASK_FD_QUERY,
> +   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
>  };
>
>  enum bpf_map_type {
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index f36c080ad356..6907d661dea5 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -980,6 +980,85 @@ static int map_get_next_key(union bpf_attr *attr)
> return err;
>  }
>
> +#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
> +
> +static int map_lookup_and_delete_elem(union bpf_attr *attr)
> +{
> +   void __user *ukey = u64_to_user_ptr(attr->key);
> +   void __user *uvalue = u64_to_user_ptr(attr->value);
> +   int ufd = attr->map_fd;
> +   struct bpf_map *map;
> +   void *key, *value, *ptr;
> +   u32 value_size;
> +   struct fd f;
> +   int err;
> +
> +   if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
> +   return -EINVAL;
> +
> +   f = fdget(ufd);
> +   map = __bpf_map_get(f);
> +   if (IS_ERR(map))
> +   return PTR_ERR(map);
> +
> +   if (!(f.file->f_mode & FMODE_CAN_WRITE)) {
> +   err = -EPERM;
> +   goto err_put;
> +   }
> +
> +   key = __bpf_copy_key(ukey, map->key_size);
> +   if (IS_ERR(key)) {
> +   err = PTR_ERR(key);
> +   goto err_put;
> +   }
> +
> +   value_size = map->value_size;
> +
> +   err = -ENOMEM;
> +   value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
> +   if (!value)
> +   goto free_key;
> +
> +   err = -EFAULT;
> +   if (copy_from_user(value, uvalue, value_size) != 0)
> +   goto free_value;
> +
> +   /* must increment bpf_prog_active to avoid kprobe+bpf triggering from
> +* inside bpf map update or delete otherwise deadlocks are possible
> +*/
> +   preempt_disable();
> +   __this_cpu_inc(bpf_prog_active);
> +   if (map->ops->map_lookup_and_delete_elem) {
> +   rcu_read_lock();
> +   ptr = map->ops->map_lookup_and_delete_elem(map, key);
> +   if (ptr)
> +   memcpy(value, ptr, value_size);
I think we are exposed to race condition with push and pop in parallel.
map_lookup_and_delete_elem() only updates the head/tail, so it gives
no protection for the buffer pointed by ptr.

Thanks,
Song

> +   rcu_read_unlock();
> +   err = ptr ? 0 : -ENOENT;
> +   } else {
> +   err = -ENOTSUPP;
> +   }
> +
> +   __this_cpu_dec(bpf_prog_active);
> +   preempt_enable();
> +
> +   if (err)
> +   goto free_value;
> +
> +   if (copy_to_user(uvalue, value, value_size) != 0)
> +   goto free_value;
> +
> +   err = 0;
> +
> +free_value:
> +   kfree(value);
> +free_key:
> +   kfree(key);
> +err_put:
> +   fdput(f);
> +   return err;
> +}
> +
>  static const struct bpf_prog_ops * const bpf_prog_types[] = {
>  #define BPF_PROG_TYPE(_id, _name) \
>   

Re: [PATCH bpf-next v2 1/7] bpf: rename stack trace map operations

2018-10-10 Thread Song Liu
On Wed, Oct 10, 2018 at 7:05 AM Mauricio Vasquez B
 wrote:
>
> In the following patches queue and stack maps (FIFO and LIFO
> datastructures) will be implemented.  In order to avoid confusion and
> a possible name clash rename stack_map_ops to stack_trace_map_ops
>
> Signed-off-by: Mauricio Vasquez B 

Acked-by: Song Liu 

> ---
>  include/linux/bpf_types.h |2 +-
>  kernel/bpf/stackmap.c |2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
> index 5432f4c9f50e..658509daacd4 100644
> --- a/include/linux/bpf_types.h
> +++ b/include/linux/bpf_types.h
> @@ -51,7 +51,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops)
>  BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops)
>  BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops)
>  #ifdef CONFIG_PERF_EVENTS
> -BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops)
> +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops)
>  #endif
>  BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops)
>  BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops)
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index b2ade10f7ec3..90daf285de03 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -600,7 +600,7 @@ static void stack_map_free(struct bpf_map *map)
> put_callchain_buffers();
>  }
>
> -const struct bpf_map_ops stack_map_ops = {
> +const struct bpf_map_ops stack_trace_map_ops = {
> .map_alloc = stack_map_alloc,
> .map_free = stack_map_free,
> .map_get_next_key = stack_map_get_next_key,
>


Re: [PATCH bpf-next v2 6/7] Sync uapi/bpf.h to tools/include

2018-10-10 Thread Song Liu
On Wed, Oct 10, 2018 at 7:06 AM Mauricio Vasquez B
 wrote:
>
> Sync both files.
>
> Signed-off-by: Mauricio Vasquez B 
Acked-by: Song Liu 

> ---
>  tools/include/uapi/linux/bpf.h |   30 +-
>  1 file changed, 29 insertions(+), 1 deletion(-)
>
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index f9187b41dff6..c8824d5364ff 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -103,6 +103,7 @@ enum bpf_cmd {
> BPF_BTF_LOAD,
> BPF_BTF_GET_FD_BY_ID,
> BPF_TASK_FD_QUERY,
> +   BPF_MAP_LOOKUP_AND_DELETE_ELEM,
>  };
>
>  enum bpf_map_type {
> @@ -128,6 +129,8 @@ enum bpf_map_type {
> BPF_MAP_TYPE_CGROUP_STORAGE,
> BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
> BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
> +   BPF_MAP_TYPE_QUEUE,
> +   BPF_MAP_TYPE_STACK,
>  };
>
>  enum bpf_prog_type {
> @@ -462,6 +465,28 @@ union bpf_attr {
>   * Return
>   * 0 on success, or a negative error in case of failure.
>   *
> + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
> + * Description
> + * Push an element *value* in *map*. *flags* is one of:
> + *
> + * **BPF_EXIST**
> + * If the queue/stack is full, the oldest element is removed to
> + * make room for this.
> + * Return
> + * 0 on success, or a negative error in case of failure.
> + *
> + * int bpf_map_pop_elem(struct bpf_map *map, void *value)
> + * Description
> + * Pop an element from *map*.
> + * Return
> + * 0 on success, or a negative error in case of failure.
> + *
> + * int bpf_map_peek_elem(struct bpf_map *map, void *value)
> + * Description
> + * Get an element from *map* without removing it.
> + * Return
> + * 0 on success, or a negative error in case of failure.
> + *
>   * int bpf_probe_read(void *dst, u32 size, const void *src)
>   * Description
>   * For tracing programs, safely attempt to read *size* bytes from
> @@ -2303,7 +2328,10 @@ union bpf_attr {
> FN(skb_ancestor_cgroup_id), \
> FN(sk_lookup_tcp),  \
> FN(sk_lookup_udp),  \
> -   FN(sk_release),
> +   FN(sk_release), \
> +   FN(map_push_elem),  \
> +   FN(map_pop_elem),   \
> +   FN(map_peek_elem),
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
>   * function eBPF program intends to call
>


Re: [PATCH bpf-next v2 4/7] bpf/verifier: add ARG_PTR_TO_UNINIT_MAP_VALUE

2018-10-10 Thread Song Liu
On Wed, Oct 10, 2018 at 7:06 AM Mauricio Vasquez B
 wrote:
>
> ARG_PTR_TO_UNINIT_MAP_VALUE argument is a pointer to a memory zone
> used to save the value of a map.  Basically the same as
> ARG_PTR_TO_UNINIT_MEM, but the size has not be passed as an extra
> argument.
>
> This will be used in the following patch that implements some new
> helpers that receive a pointer to be filled with a map value.
>
> Signed-off-by: Mauricio Vasquez B 
Acked-by: Song Liu 
> ---
>  include/linux/bpf.h   |1 +
>  kernel/bpf/verifier.c |9 ++---
>  2 files changed, 7 insertions(+), 3 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 5793f0c7fbb5..e37b4986bb45 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -139,6 +139,7 @@ enum bpf_arg_type {
> ARG_CONST_MAP_PTR,  /* const argument used as pointer to bpf_map 
> */
> ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
> ARG_PTR_TO_MAP_VALUE,   /* pointer to stack used as map value */
> +   ARG_PTR_TO_UNINIT_MAP_VALUE,/* pointer to valid memory used to 
> store a map value */
>
> /* the following constraints used to prototype bpf_memcmp() and other
>  * functions that access data on eBPF program stack
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 3f93a548a642..d84c91ac3b70 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -2117,7 +2117,8 @@ static int check_func_arg(struct bpf_verifier_env *env, 
> u32 regno,
> }
>
> if (arg_type == ARG_PTR_TO_MAP_KEY ||
> -   arg_type == ARG_PTR_TO_MAP_VALUE) {
> +   arg_type == ARG_PTR_TO_MAP_VALUE ||
> +   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
> expected_type = PTR_TO_STACK;
> if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE &&
> type != expected_type)
> @@ -2187,7 +2188,8 @@ static int check_func_arg(struct bpf_verifier_env *env, 
> u32 regno,
> err = check_helper_mem_access(env, regno,
>   meta->map_ptr->key_size, false,
>   NULL);
> -   } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
> +   } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
> +  arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
> /* bpf_map_xxx(..., map_ptr, ..., value) call:
>  * check [value, value + map->value_size) validity
>  */
> @@ -2196,9 +2198,10 @@ static int check_func_arg(struct bpf_verifier_env 
> *env, u32 regno,
> verbose(env, "invalid map_ptr to access 
> map->value\n");
> return -EACCES;
> }
> +   meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
> err = check_helper_mem_access(env, regno,
>   meta->map_ptr->value_size, 
> false,
> - NULL);
> + meta);
> } else if (arg_type_is_mem_size(arg_type)) {
> bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
>
>


Re: [PATCH bpf-next v2 2/7] bpf/syscall: allow key to be null in map functions

2018-10-10 Thread Song Liu
On Wed, Oct 10, 2018 at 7:06 AM Mauricio Vasquez B
 wrote:
>
> This commit adds the required logic to allow key being NULL
> in case the key_size of the map is 0.
>
> A new __bpf_copy_key function helper only copies the key from
> userpsace when key_size != 0, otherwise it enforces that key must be
> null.
>
> Signed-off-by: Mauricio Vasquez B 
Acked-by: Song Liu 
> ---
>  kernel/bpf/syscall.c |   19 +++
>  1 file changed, 15 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 4f416234251f..f36c080ad356 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -651,6 +651,17 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void 
> *key, void *value)
> return -ENOTSUPP;
>  }
>
> +static void *__bpf_copy_key(void __user *ukey, u64 key_size)
> +{
> +   if (key_size)
> +   return memdup_user(ukey, key_size);
> +
> +   if (ukey)
> +   return ERR_PTR(-EINVAL);
> +
> +   return NULL;
> +}
> +
>  /* last field in 'union bpf_attr' used by this command */
>  #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
>
> @@ -678,7 +689,7 @@ static int map_lookup_elem(union bpf_attr *attr)
> goto err_put;
> }
>
> -   key = memdup_user(ukey, map->key_size);
> +   key = __bpf_copy_key(ukey, map->key_size);
> if (IS_ERR(key)) {
> err = PTR_ERR(key);
> goto err_put;
> @@ -774,7 +785,7 @@ static int map_update_elem(union bpf_attr *attr)
> goto err_put;
> }
>
> -   key = memdup_user(ukey, map->key_size);
> +   key = __bpf_copy_key(ukey, map->key_size);
> if (IS_ERR(key)) {
> err = PTR_ERR(key);
> goto err_put;
> @@ -876,7 +887,7 @@ static int map_delete_elem(union bpf_attr *attr)
> goto err_put;
> }
>
> -   key = memdup_user(ukey, map->key_size);
> +   key = __bpf_copy_key(ukey, map->key_size);
> if (IS_ERR(key)) {
> err = PTR_ERR(key);
> goto err_put;
> @@ -928,7 +939,7 @@ static int map_get_next_key(union bpf_attr *attr)
> }
>
> if (ukey) {
> -   key = memdup_user(ukey, map->key_size);
> +   key = __bpf_copy_key(ukey, map->key_size);
> if (IS_ERR(key)) {
> err = PTR_ERR(key);
> goto err_put;
>


Re: [PATCH bpf-next] tools/bpf: use proper type and uapi perf_event.h header for libbpf

2018-10-09 Thread Song Liu



> On Oct 9, 2018, at 4:14 PM, Yonghong Song  wrote:
> 
> Use __u32 instead u32 in libbpf.c and also use
> uapi perf_event.h instead of tools/perf/perf-sys.h.
> 
> Signed-off-by: Yonghong Song 

Acked-by: Song Liu 

> ---
> tools/lib/bpf/Makefile | 2 +-
> tools/lib/bpf/libbpf.c | 8 
> 2 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
> index 6ad27257fd67..79d84413ddf2 100644
> --- a/tools/lib/bpf/Makefile
> +++ b/tools/lib/bpf/Makefile
> @@ -69,7 +69,7 @@ FEATURE_USER = .libbpf
> FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
> FEATURE_DISPLAY = libelf bpf
> 
> -INCLUDES = -I. -I$(srctree)/tools/include 
> -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi 
> -I$(srctree)/tools/perf
> +INCLUDES = -I. -I$(srctree)/tools/include 
> -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
> FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
> 
> check_feat := 1
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index ceb918c14d80..176cf5523728 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -19,7 +19,6 @@
> #include 
> #include 
> #include 
> -#include 
> #include 
> #include 
> #include 
> @@ -27,6 +26,7 @@
> #include 
> #include 
> #include 
> +#include 
> #include 
> #include 
> #include 
> @@ -169,7 +169,7 @@ static LIST_HEAD(bpf_objects_list);
> 
> struct bpf_object {
>   char license[64];
> - u32 kern_version;
> + __u32 kern_version;
> 
>   struct bpf_program *programs;
>   size_t nr_programs;
> @@ -540,7 +540,7 @@ static int
> bpf_object__init_kversion(struct bpf_object *obj,
> void *data, size_t size)
> {
> - u32 kver;
> + __u32 kver;
> 
>   if (size != sizeof(kver)) {
>   pr_warning("invalid kver section in %s\n", obj->path);
> @@ -1295,7 +1295,7 @@ static int bpf_object__collect_reloc(struct bpf_object 
> *obj)
> static int
> load_program(enum bpf_prog_type type, enum bpf_attach_type 
> expected_attach_type,
>const char *name, struct bpf_insn *insns, int insns_cnt,
> -  char *license, u32 kern_version, int *pfd, int prog_ifindex)
> +  char *license, __u32 kern_version, int *pfd, int prog_ifindex)
> {
>   struct bpf_load_program_attr load_attr;
>   char *cp, errmsg[STRERR_BUFSIZE];
> -- 
> 2.17.1
> 



[PATCH bpf-next] tools/bpf: use proper type and uapi perf_event.h header for libbpf

2018-10-09 Thread Yonghong Song
Use __u32 instead u32 in libbpf.c and also use
uapi perf_event.h instead of tools/perf/perf-sys.h.

Signed-off-by: Yonghong Song 
---
 tools/lib/bpf/Makefile | 2 +-
 tools/lib/bpf/libbpf.c | 8 
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 6ad27257fd67..79d84413ddf2 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -69,7 +69,7 @@ FEATURE_USER = .libbpf
 FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
 FEATURE_DISPLAY = libelf bpf
 
-INCLUDES = -I. -I$(srctree)/tools/include 
-I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi 
-I$(srctree)/tools/perf
+INCLUDES = -I. -I$(srctree)/tools/include 
-I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
 FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
 
 check_feat := 1
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index ceb918c14d80..176cf5523728 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -19,7 +19,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -27,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -169,7 +169,7 @@ static LIST_HEAD(bpf_objects_list);
 
 struct bpf_object {
char license[64];
-   u32 kern_version;
+   __u32 kern_version;
 
struct bpf_program *programs;
size_t nr_programs;
@@ -540,7 +540,7 @@ static int
 bpf_object__init_kversion(struct bpf_object *obj,
  void *data, size_t size)
 {
-   u32 kver;
+   __u32 kver;
 
if (size != sizeof(kver)) {
pr_warning("invalid kver section in %s\n", obj->path);
@@ -1295,7 +1295,7 @@ static int bpf_object__collect_reloc(struct bpf_object 
*obj)
 static int
 load_program(enum bpf_prog_type type, enum bpf_attach_type 
expected_attach_type,
 const char *name, struct bpf_insn *insns, int insns_cnt,
-char *license, u32 kern_version, int *pfd, int prog_ifindex)
+char *license, __u32 kern_version, int *pfd, int prog_ifindex)
 {
struct bpf_load_program_attr load_attr;
char *cp, errmsg[STRERR_BUFSIZE];
-- 
2.17.1



Re: [bpf-next V2 PATCH 0/3] bpf/xdp: fix generic-XDP and demonstrate VLAN manipulation

2018-10-09 Thread Song Liu
For the series:

Acked-by: Song Liu 


On Tue, Oct 9, 2018 at 3:04 AM Jesper Dangaard Brouer  wrote:
>
> While implementing PoC building blocks for eBPF code XDP+TC that can
> manipulate VLANs headers, I discovered a bug in generic-XDP.
>
> The fix should be backported to stable kernels.  Even-though
> generic-XDP was introduced in v4.12, I think the bug is not exposed
> until v4.14 in the mentined fixes commit.
>
> ---
>
> Jesper Dangaard Brouer (3):
>   net: fix generic XDP to handle if eth header was mangled
>   bpf: make TC vlan bpf_helpers avail to selftests
>   selftests/bpf: add XDP selftests for modifying and popping VLAN headers
>
>
>  net/core/dev.c   |   14 +
>  tools/testing/selftests/bpf/Makefile |6 -
>  tools/testing/selftests/bpf/bpf_helpers.h|4
>  tools/testing/selftests/bpf/test_xdp_vlan.c  |  292 
> ++
>  tools/testing/selftests/bpf/test_xdp_vlan.sh |  195 +
>  5 files changed, 509 insertions(+), 2 deletions(-)
>  create mode 100644 tools/testing/selftests/bpf/test_xdp_vlan.c
>  create mode 100755 tools/testing/selftests/bpf/test_xdp_vlan.sh
>
> --


Re: [PATCH bpf-next 4/6] bpf: add queue and stack maps

2018-10-09 Thread Song Liu
On Tue, Oct 9, 2018 at 6:05 AM Mauricio Vasquez
 wrote:
>
>
>
> On 10/08/2018 08:36 PM, Song Liu wrote:
> > On Mon, Oct 8, 2018 at 12:12 PM Mauricio Vasquez B
> >  wrote:
> >> Queue/stack maps implement a FIFO/LIFO data storage for ebpf programs.
> >> These maps support peek, pop and push operations that are exposed to eBPF
> >> programs through the new bpf_map[peek/pop/push] helpers.  Those operations
> >> are exposed to userspace applications through the already existing
> >> syscalls in the following way:
> >>
> >> BPF_MAP_LOOKUP_ELEM-> peek
> >> BPF_MAP_LOOKUP_AND_DELETE_ELEM -> pop
> >> BPF_MAP_UPDATE_ELEM-> push
> >>
> >> Queue/stack maps are implemented using a buffer, tail and head indexes,
> >> hence BPF_F_NO_PREALLOC is not supported.
> >>
> >> As opposite to other maps, queue and stack do not use RCU for protecting
> >> maps values, the bpf_map[peek/pop] have a ARG_PTR_TO_UNINIT_MAP_VALUE
> >> argument that is a pointer to a memory zone where to save the value of a
> >> map.  Basically the same as ARG_PTR_TO_UNINIT_MEM, but the size has not
> >> be passed as an extra argument.
> >>
> >> Our main motivation for implementing queue/stack maps was to keep track
> >> of a pool of elements, like network ports in a SNAT, however we forsee
> >> other use cases, like for exampling saving last N kernel events in a map
> >> and then analysing from userspace.
> >>
> >> Signed-off-by: Mauricio Vasquez B 
> >> ---
> >>   include/linux/bpf.h   |7 +
> >>   include/linux/bpf_types.h |2
> >>   include/uapi/linux/bpf.h  |   35 -
> >>   kernel/bpf/Makefile   |2
> >>   kernel/bpf/core.c |3
> >>   kernel/bpf/helpers.c  |   43 ++
> >>   kernel/bpf/queue_stack_maps.c |  288 
> >> +
> >>   kernel/bpf/syscall.c  |   30 +++-
> >>   kernel/bpf/verifier.c |   28 +++-
> >>   net/core/filter.c |6 +
> >>   10 files changed, 426 insertions(+), 18 deletions(-)
> >>   create mode 100644 kernel/bpf/queue_stack_maps.c
> >>
> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> >> index 98c7eeb6d138..cad3bc5cffd1 100644
> >> --- a/include/linux/bpf.h
> >> +++ b/include/linux/bpf.h
> >> @@ -40,6 +40,9 @@ struct bpf_map_ops {
> >>  int (*map_update_elem)(struct bpf_map *map, void *key, void 
> >> *value, u64 flags);
> >>  int (*map_delete_elem)(struct bpf_map *map, void *key);
> >>  void *(*map_lookup_and_delete_elem)(struct bpf_map *map, void 
> >> *key);
> >> +   int (*map_push_elem)(struct bpf_map *map, void *value, u64 flags);
> >> +   int (*map_pop_elem)(struct bpf_map *map, void *value);
> >> +   int (*map_peek_elem)(struct bpf_map *map, void *value);
> >>
> >>  /* funcs called by prog_array and perf_event_array map */
> >>  void *(*map_fd_get_ptr)(struct bpf_map *map, struct file 
> >> *map_file,
> >> @@ -139,6 +142,7 @@ enum bpf_arg_type {
> >>  ARG_CONST_MAP_PTR,  /* const argument used as pointer to 
> >> bpf_map */
> >>  ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */
> >>  ARG_PTR_TO_MAP_VALUE,   /* pointer to stack used as map value */
> >> +   ARG_PTR_TO_UNINIT_MAP_VALUE,/* pointer to valid memory used to 
> >> store a map value */
> > How about we put ARG_PTR_TO_UNINIT_MAP_VALUE and related logic to a
> > separate patch?
>
> I thought it too, but this is a really small change (6 additions, 3
> deletions). Does it worth a separated patch?

I think a separate patch is better. You can also put small changes in
uapi header
in a separate patch.

Thanks,
Song


> >
> >>  /* the following constraints used to prototype bpf_memcmp() and 
> >> other
> >>   * functions that access data on eBPF program stack
> >> @@ -825,6 +829,9 @@ static inline int 
> >> bpf_fd_reuseport_array_update_elem(struct bpf_map *map,
> >>   extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
> >>   extern const struct bpf_func_proto bpf_map_update_elem_proto;
> >>   extern const struct bpf_func_proto bpf_map_delete_elem_proto;
> >> +extern const struct bpf_func_proto bpf_map_push_elem_proto;
> >> +extern const struct bpf_func_proto bpf_map_pop_el

Re: [PATCH bpf-next 6/6] selftests/bpf: test_verifier, check bpf_map_lookup_elem access in bpf prog

2018-10-09 Thread Song Liu
On Mon, Oct 8, 2018 at 6:07 PM Prashant Bhole
 wrote:
>
> map_lookup_elem isn't supported by certain map types like:
> - BPF_MAP_TYPE_PROG_ARRAY
> - BPF_MAP_TYPE_STACK_TRACE
> - BPF_MAP_TYPE_XSKMAP
> - BPF_MAP_TYPE_SOCKMAP/BPF_MAP_TYPE_SOCKHASH
> Let's add verfier tests to check whether verifier prevents
> bpf_map_lookup_elem call on above programs from bpf program.
>
> Signed-off-by: Prashant Bhole 
> Acked-by: Alexei Starovoitov 
Acked-by: Song Liu 

> ---
>  tools/testing/selftests/bpf/test_verifier.c | 121 +++-
>  1 file changed, 120 insertions(+), 1 deletion(-)
>
> diff --git a/tools/testing/selftests/bpf/test_verifier.c 
> b/tools/testing/selftests/bpf/test_verifier.c
> index 65ae44c85d27..cf4cd32b6772 100644
> --- a/tools/testing/selftests/bpf/test_verifier.c
> +++ b/tools/testing/selftests/bpf/test_verifier.c
> @@ -48,7 +48,7 @@
>
>  #define MAX_INSNS  BPF_MAXINSNS
>  #define MAX_FIXUPS 8
> -#define MAX_NR_MAPS8
> +#define MAX_NR_MAPS13
>  #define POINTER_VALUE  0xcafe4all
>  #define TEST_DATA_LEN  64
>
> @@ -65,6 +65,10 @@ struct bpf_test {
> int fixup_map_hash_48b[MAX_FIXUPS];
> int fixup_map_hash_16b[MAX_FIXUPS];
> int fixup_map_array_48b[MAX_FIXUPS];
> +   int fixup_map_sockmap[MAX_FIXUPS];
> +   int fixup_map_sockhash[MAX_FIXUPS];
> +   int fixup_map_xskmap[MAX_FIXUPS];
> +   int fixup_map_stacktrace[MAX_FIXUPS];
> int fixup_prog1[MAX_FIXUPS];
> int fixup_prog2[MAX_FIXUPS];
> int fixup_map_in_map[MAX_FIXUPS];
> @@ -4541,6 +4545,85 @@ static struct bpf_test tests[] = {
> .errstr = "invalid access to packet",
> .prog_type = BPF_PROG_TYPE_SCHED_CLS,
> },
> +   {
> +   "prevent map lookup in sockmap",
> +   .insns = {
> +   BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
> +   BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
> +   BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
> +   BPF_LD_MAP_FD(BPF_REG_1, 0),
> +   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
> +BPF_FUNC_map_lookup_elem),
> +   BPF_EXIT_INSN(),
> +   },
> +   .fixup_map_sockmap = { 3 },
> +   .result = REJECT,
> +   .errstr = "cannot pass map_type 15 into func 
> bpf_map_lookup_elem",
> +   .prog_type = BPF_PROG_TYPE_SOCK_OPS,
> +   },
> +   {
> +   "prevent map lookup in sockhash",
> +   .insns = {
> +   BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
> +   BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
> +   BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
> +   BPF_LD_MAP_FD(BPF_REG_1, 0),
> +   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
> +BPF_FUNC_map_lookup_elem),
> +   BPF_EXIT_INSN(),
> +   },
> +   .fixup_map_sockhash = { 3 },
> +   .result = REJECT,
> +   .errstr = "cannot pass map_type 18 into func 
> bpf_map_lookup_elem",
> +   .prog_type = BPF_PROG_TYPE_SOCK_OPS,
> +   },
> +   {
> +   "prevent map lookup in xskmap",
> +   .insns = {
> +   BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
> +   BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
> +   BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
> +   BPF_LD_MAP_FD(BPF_REG_1, 0),
> +   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
> +BPF_FUNC_map_lookup_elem),
> +   BPF_EXIT_INSN(),
> +   },
> +   .fixup_map_xskmap = { 3 },
> +   .result = REJECT,
> +   .errstr = "cannot pass map_type 17 into func 
> bpf_map_lookup_elem",
> +   .prog_type = BPF_PROG_TYPE_XDP,
> +   },
> +   {
> +   "prevent map lookup in stack trace",
> +   .insns = {
> +   BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
> +   BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
> +   BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
> +   BPF_LD_MAP_FD(BPF_REG_1, 0),
> +   BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
> +BPF_FUNC_map_lookup_elem),
> +   BPF_EXIT_INSN(),
> +   },
&

Re: [PATCH bpf-next 5/6] selftests/bpf: test_verifier, change names of fixup maps

2018-10-09 Thread Song Liu
On Mon, Oct 8, 2018 at 6:07 PM Prashant Bhole
 wrote:
>
> Currently fixup map are named like fixup_map1, fixup_map2, and so on.
> As suggested by Alexei let's change change map names such that we can
> identify map type by looking at the name.
>
> This patch is basically a find and replace change:
> fixup_map1  ->  fixup_map_hash_8b
> fixup_map2  ->  fixup_map_hash_48b
> fixup_map3  ->  fixup_map_hash_16b
> fixup_map4  ->  fixup_map_array_48b
>
> Suggested-by: Alexei Starovoitov 
> Signed-off-by: Prashant Bhole 
> Acked-by: Alexei Starovoitov 
Acked-by: Song Liu 


> ---
>  tools/testing/selftests/bpf/test_verifier.c | 380 ++--
>  1 file changed, 190 insertions(+), 190 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/test_verifier.c 
> b/tools/testing/selftests/bpf/test_verifier.c
> index bc9cd8537467..65ae44c85d27 100644
> --- a/tools/testing/selftests/bpf/test_verifier.c
> +++ b/tools/testing/selftests/bpf/test_verifier.c
> @@ -61,10 +61,10 @@ static bool unpriv_disabled = false;
>  struct bpf_test {
> const char *descr;
> struct bpf_insn insns[MAX_INSNS];
> -   int fixup_map1[MAX_FIXUPS];
> -   int fixup_map2[MAX_FIXUPS];
> -   int fixup_map3[MAX_FIXUPS];
> -   int fixup_map4[MAX_FIXUPS];
> +   int fixup_map_hash_8b[MAX_FIXUPS];
> +   int fixup_map_hash_48b[MAX_FIXUPS];
> +   int fixup_map_hash_16b[MAX_FIXUPS];
> +   int fixup_map_array_48b[MAX_FIXUPS];
> int fixup_prog1[MAX_FIXUPS];
> int fixup_prog2[MAX_FIXUPS];
> int fixup_map_in_map[MAX_FIXUPS];
> @@ -876,7 +876,7 @@ static struct bpf_test tests[] = {
>  BPF_FUNC_map_lookup_elem),
> BPF_EXIT_INSN(),
> },
> -   .fixup_map1 = { 2 },
> +   .fixup_map_hash_8b = { 2 },
> .errstr = "invalid indirect read from stack",
> .result = REJECT,
> },
> @@ -1110,7 +1110,7 @@ static struct bpf_test tests[] = {
> BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
> BPF_EXIT_INSN(),
> },
> -   .fixup_map1 = { 3 },
> +   .fixup_map_hash_8b = { 3 },
> .errstr = "R0 invalid mem access 'map_value_or_null'",
> .result = REJECT,
> },
> @@ -1127,7 +1127,7 @@ static struct bpf_test tests[] = {
> BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
> BPF_EXIT_INSN(),
> },
> -   .fixup_map1 = { 3 },
> +   .fixup_map_hash_8b = { 3 },
> .errstr = "misaligned value access",
> .result = REJECT,
> .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
> @@ -1147,7 +1147,7 @@ static struct bpf_test tests[] = {
> BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
> BPF_EXIT_INSN(),
> },
> -   .fixup_map1 = { 3 },
> +   .fixup_map_hash_8b = { 3 },
> .errstr = "R0 invalid mem access",
> .errstr_unpriv = "R0 leaks addr",
> .result = REJECT,
> @@ -1237,7 +1237,7 @@ static struct bpf_test tests[] = {
>  BPF_FUNC_map_delete_elem),
> BPF_EXIT_INSN(),
> },
> -   .fixup_map1 = { 24 },
> +   .fixup_map_hash_8b = { 24 },
> .errstr_unpriv = "R1 pointer comparison",
> .result_unpriv = REJECT,
> .result = ACCEPT,
> @@ -1391,7 +1391,7 @@ static struct bpf_test tests[] = {
> offsetof(struct __sk_buff, pkt_type)),
> BPF_EXIT_INSN(),
> },
> -   .fixup_map1 = { 4 },
> +   .fixup_map_hash_8b = { 4 },
> .errstr = "different pointers",
> .errstr_unpriv = "R1 pointer comparison",
> .result = REJECT,
> @@ -1414,7 +1414,7 @@ static struct bpf_test tests[] = {
> BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
> BPF_JMP_IMM(BPF_JA, 0, 0, -12),
> },
> -   .fixup_map1 = { 6 },
> +   .fixup_map_hash_8b = { 6 },
> .errstr = "different pointers",
> .errstr_unpriv = "R1 pointer comparison",
> .result = REJECT,
> @@ -1438,7 +1438,7 @@ static struct bpf_test tests[] = {
> BPF_MOV64_REG(BPF_REG_1,

Re: [PATCH bpf-next 4/6] tools/bpf: bpftool, print strerror when map lookup error occurs

2018-10-09 Thread Song Liu
On Mon, Oct 8, 2018 at 6:06 PM Prashant Bhole
 wrote:
>
> Since map lookup error can be ENOENT or EOPNOTSUPP, let's print
> strerror() as error message in normal and JSON output.
>
> This patch adds helper function print_entry_error() to print
> entry from lookup error occurs
>
> Example: Following example dumps a map which does not support lookup.
>
> Output before:
> root# bpftool map -jp dump id 40
> [
> "key": ["0x0a","0x00","0x00","0x00"
> ],
> "value": {
> "error": "can\'t lookup element"
> },
> "key": ["0x0b","0x00","0x00","0x00"
> ],
> "value": {
> "error": "can\'t lookup element"
> }
> ]
>
> root# bpftool map dump id 40
> can't lookup element with key:
> 0a 00 00 00
> can't lookup element with key:
> 0b 00 00 00
> Found 0 elements
>
> Output after changes:
> root# bpftool map dump -jp  id 45
> [
> "key": ["0x0a","0x00","0x00","0x00"
> ],
> "value": {
> "error": "Operation not supported"
> },
> "key": ["0x0b","0x00","0x00","0x00"
> ],
> "value": {
> "error": "Operation not supported"
> }
> ]
>
> root# bpftool map dump id 45
> key:
> 0a 00 00 00
> value:
> Operation not supported
> key:
> 0b 00 00 00
> value:
> Operation not supported
> Found 0 elements
>
> Signed-off-by: Prashant Bhole 
> Acked-by: Jakub Kicinski 
> Acked-by: Alexei Starovoitov 

Acked-by: Song Liu 

> ---
>  tools/bpf/bpftool/map.c | 29 -
>  1 file changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
> index 28d365435fea..9f5de48f8a99 100644
> --- a/tools/bpf/bpftool/map.c
> +++ b/tools/bpf/bpftool/map.c
> @@ -336,6 +336,25 @@ static void print_entry_json(struct bpf_map_info *info, 
> unsigned char *key,
> jsonw_end_object(json_wtr);
>  }
>
> +static void print_entry_error(struct bpf_map_info *info, unsigned char *key,
> + const char *value)
> +{
> +   int value_size = strlen(value);
> +   bool single_line, break_names;
> +
> +   break_names = info->key_size > 16 || value_size > 16;
> +   single_line = info->key_size + value_size <= 24 && !break_names;
> +
> +   printf("key:%c", break_names ? '\n' : ' ');
> +   fprint_hex(stdout, key, info->key_size, " ");
> +
> +   printf(single_line ? "  " : "\n");
> +
> +   printf("value:%c%s", break_names ? '\n' : ' ', value);
> +
> +   printf("\n");
> +}
> +
>  static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
>   unsigned char *value)
>  {
> @@ -663,6 +682,7 @@ static int dump_map_elem(int fd, void *key, void *value,
>  json_writer_t *btf_wtr)
>  {
> int num_elems = 0;
> +   int lookup_errno;
>
> if (!bpf_map_lookup_elem(fd, key, value)) {
> if (json_output) {
> @@ -685,6 +705,8 @@ static int dump_map_elem(int fd, void *key, void *value,
> }
>
> /* lookup error handling */
> +   lookup_errno = errno;
> +
> if (map_is_map_of_maps(map_info->type) ||
> map_is_map_of_progs(map_info->type))
> return 0;
> @@ -694,13 +716,10 @@ static int dump_map_elem(int fd, void *key, void *value,
> print_hex_data_json(key, map_info->key_size);
> jsonw_name(json_wtr, "value");
> jsonw_start_object(json_wtr);
> -   jsonw_string_field(json_wtr, "error",
> -  "can't lookup element");
> +   jsonw_string_field(json_wtr, "error", strerror(lookup_errno));
> jsonw_end_object(json_wtr);
> } else {
> -   p_info("can't lookup element with key: ");
> -   fprint_hex(stderr, key, map_info->key_size, " ");
> -   fprintf(stderr, "\n");
> +   print_entry_error(map_info, key, strerror(lookup_errno));
> }
>
> return 0;
> --
> 2.17.1
>
>


Re: [PATCH bpf-next 3/6] tools/bpf: bpftool, split the function do_dump()

2018-10-09 Thread Song Liu
On Mon, Oct 8, 2018 at 6:06 PM Prashant Bhole
 wrote:
>
> do_dump() function in bpftool/map.c has deep indentations. In order
> to reduce deep indent, let's move element printing code out of
> do_dump() into dump_map_elem() function.
>
> Signed-off-by: Prashant Bhole 
> Acked-by: Jakub Kicinski 
> Acked-by: Alexei Starovoitov 

Acked-by: Song Liu 

> ---
>  tools/bpf/bpftool/map.c | 83 -
>  1 file changed, 49 insertions(+), 34 deletions(-)
>
> diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
> index 6003e9598973..28d365435fea 100644
> --- a/tools/bpf/bpftool/map.c
> +++ b/tools/bpf/bpftool/map.c
> @@ -658,6 +658,54 @@ static int do_show(int argc, char **argv)
> return errno == ENOENT ? 0 : -1;
>  }
>
> +static int dump_map_elem(int fd, void *key, void *value,
> +struct bpf_map_info *map_info, struct btf *btf,
> +json_writer_t *btf_wtr)
> +{
> +   int num_elems = 0;
> +
> +   if (!bpf_map_lookup_elem(fd, key, value)) {
> +   if (json_output) {
> +   print_entry_json(map_info, key, value, btf);
> +   } else {
> +   if (btf) {
> +   struct btf_dumper d = {
> +   .btf = btf,
> +   .jw = btf_wtr,
> +   .is_plain_text = true,
> +   };
> +
> +   do_dump_btf(, map_info, key, value);
> +   } else {
> +   print_entry_plain(map_info, key, value);
> +   }
> +   num_elems++;
> +   }
> +   return num_elems;
> +   }
> +
> +   /* lookup error handling */
> +   if (map_is_map_of_maps(map_info->type) ||
> +   map_is_map_of_progs(map_info->type))
> +   return 0;
> +
> +   if (json_output) {
> +   jsonw_name(json_wtr, "key");
> +   print_hex_data_json(key, map_info->key_size);
> +   jsonw_name(json_wtr, "value");
> +   jsonw_start_object(json_wtr);
> +   jsonw_string_field(json_wtr, "error",
> +  "can't lookup element");
> +   jsonw_end_object(json_wtr);
> +   } else {
> +   p_info("can't lookup element with key: ");
> +   fprint_hex(stderr, key, map_info->key_size, " ");
> +   fprintf(stderr, "\n");
> +   }
> +
> +   return 0;
> +}
> +
>  static int do_dump(int argc, char **argv)
>  {
> struct bpf_map_info info = {};
> @@ -713,40 +761,7 @@ static int do_dump(int argc, char **argv)
> err = 0;
> break;
> }
> -
> -   if (!bpf_map_lookup_elem(fd, key, value)) {
> -   if (json_output)
> -   print_entry_json(, key, value, btf);
> -   else
> -   if (btf) {
> -   struct btf_dumper d = {
> -   .btf = btf,
> -   .jw = btf_wtr,
> -   .is_plain_text = true,
> -   };
> -
> -   do_dump_btf(, , key, value);
> -   } else {
> -   print_entry_plain(, key, value);
> -   }
> -   num_elems++;
> -   } else if (!map_is_map_of_maps(info.type) &&
> -  !map_is_map_of_progs(info.type)) {
> -   if (json_output) {
> -   jsonw_name(json_wtr, "key");
> -   print_hex_data_json(key, info.key_size);
> -   jsonw_name(json_wtr, "value");
> -   jsonw_start_object(json_wtr);
> -   jsonw_string_field(json_wtr, "error",
> -  "can't lookup element");
> -   jsonw_end_object(json_wtr);
> -   } else {
> -   p_info("can't lookup element with key: ");
> -   fprint_hex(stderr, key, info.key_size, " ");
> -   fprintf(stderr, "\n");
> -   }
> -   }
> -
> +   num_elems += dump_map_elem(fd, key, value, , btf, 
> btf_wtr);
> prev_key = key;
> }
>
> --
> 2.17.1
>
>


Re: [PATCH bpf-next 2/6] bpf: return EOPNOTSUPP when map lookup isn't supported

2018-10-09 Thread Song Liu
On Mon, Oct 8, 2018 at 6:06 PM Prashant Bhole
 wrote:
>
> Return ERR_PTR(-EOPNOTSUPP) from map_lookup_elem() methods of below
> map types:
> - BPF_MAP_TYPE_PROG_ARRAY
> - BPF_MAP_TYPE_STACK_TRACE
> - BPF_MAP_TYPE_XSKMAP
> - BPF_MAP_TYPE_SOCKMAP/BPF_MAP_TYPE_SOCKHASH
>
> Signed-off-by: Prashant Bhole 
> Acked-by: Alexei Starovoitov 
Acked-by: Song Liu 
> ---
>  kernel/bpf/arraymap.c | 2 +-
>  kernel/bpf/sockmap.c  | 2 +-
>  kernel/bpf/stackmap.c | 2 +-
>  kernel/bpf/xskmap.c   | 2 +-
>  4 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index dded84cbe814..24583da9ffd1 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -449,7 +449,7 @@ static void fd_array_map_free(struct bpf_map *map)
>
>  static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
>  {
> -   return NULL;
> +   return ERR_PTR(-EOPNOTSUPP);
>  }
>
>  /* only called from syscall */
> diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
> index d37a1a0a6e1e..5d0677d808ae 100644
> --- a/kernel/bpf/sockmap.c
> +++ b/kernel/bpf/sockmap.c
> @@ -2096,7 +2096,7 @@ int sockmap_get_from_fd(const union bpf_attr *attr, int 
> type,
>
>  static void *sock_map_lookup(struct bpf_map *map, void *key)
>  {
> -   return NULL;
> +   return ERR_PTR(-EOPNOTSUPP);
>  }
>
>  static int sock_map_update_elem(struct bpf_map *map,
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 8061a439ef18..b2ade10f7ec3 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -505,7 +505,7 @@ const struct bpf_func_proto bpf_get_stack_proto = {
>  /* Called from eBPF program */
>  static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
>  {
> -   return NULL;
> +   return ERR_PTR(-EOPNOTSUPP);
>  }
>
>  /* Called from syscall */
> diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
> index 9f8463afda9c..ef0b7b6ef8a5 100644
> --- a/kernel/bpf/xskmap.c
> +++ b/kernel/bpf/xskmap.c
> @@ -154,7 +154,7 @@ void __xsk_map_flush(struct bpf_map *map)
>
>  static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
>  {
> -   return NULL;
> +   return ERR_PTR(-EOPNOTSUPP);
>  }
>
>  static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
> --
> 2.17.1
>
>


Re: [PATCH bpf-next 1/6] bpf: error handling when map_lookup_elem isn't supported

2018-10-09 Thread Song Liu
On Mon, Oct 8, 2018 at 6:06 PM Prashant Bhole
 wrote:
>
> The error value returned by map_lookup_elem doesn't differentiate
> whether lookup was failed because of invalid key or lookup is not
> supported.
>
> Lets add handling for -EOPNOTSUPP return value of map_lookup_elem()
> method of map, with expectation from map's implementation that it
> should return -EOPNOTSUPP if lookup is not supported.
>
> The errno for bpf syscall for BPF_MAP_LOOKUP_ELEM command will be set
> to EOPNOTSUPP if map lookup is not supported.
>
> Signed-off-by: Prashant Bhole 
> Acked-by: Alexei Starovoitov 

Acked-by: Song Liu 

> ---
>  kernel/bpf/syscall.c | 9 +++--
>  1 file changed, 7 insertions(+), 2 deletions(-)
>
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index 5742df21598c..4f416234251f 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -719,10 +719,15 @@ static int map_lookup_elem(union bpf_attr *attr)
> } else {
> rcu_read_lock();
> ptr = map->ops->map_lookup_elem(map, key);
> -   if (ptr)
> +   if (IS_ERR(ptr)) {
> +   err = PTR_ERR(ptr);
> +   } else if (!ptr) {
> +   err = -ENOENT;
> +   } else {
> +   err = 0;
> memcpy(value, ptr, value_size);
> +   }
> rcu_read_unlock();
> -   err = ptr ? 0 : -ENOENT;
> }
>
> if (err)
> --
> 2.17.1
>
>


<    1   2   3   4   5   6   7   8   9   10   >