On Fri, Aug 10, 2018 at 08:41:04AM -0400, Carlos Neira wrote: > This helper obtains the active namespace from current and returns pid, tgid, > device and namespace id as seen from that namespace, allowing to instrument > a process inside a container. > Device is read from /proc/self/ns/pid, as in the future it's possible that > different pid_ns files may belong to different devices, according > to the discussion between Eric Biederman and Yonghong in 2017 linux plumbers > conference. > > Currently bpf_get_current_pid_tgid(), is used to do pid filtering in bcc's > scripts but this helper returns the pid as seen by the root namespace which is > fine when a bcc script is not executed inside a container. > When the process of interest is inside a container, pid filtering will not > work > if bpf_get_current_pid_tgid() is used. This helper addresses this limitation > returning the pid as it's seen by the current namespace where the script is > executing. > > This helper has the same use cases as bpf_get_current_pid_tgid() as it can be > used to do pid filtering even inside a container. > > For example a bcc script using bpf_get_current_pid_tgid() > (tools/funccount.py): > > u32 pid = bpf_get_current_pid_tgid() >> 32; > if (pid != <pid_arg_passed_in>) > return 0; > > Could be modified to use bpf_get_current_pidns_info() as follows: > > struct bpf_pidns pidns; > bpf_get_current_pidns_info(&pidns, sizeof(struct bpf_pidns)); > u32 pid = pidns.tgid; > u32 nsid = pidns.nsid; > if ((pid != <pid_arg_passed_in>) && (nsid != <nsid_arg_passed_in>)) > return 0; > > To find out the name PID namespace id of a process, you could use this > command: > > $ ps -h -o pidns -p <pid_of_interest> > > Or this other command: > > $ ls -Li /proc/<pid_of_interest>/ns/pid > > Signed-off-by: Carlos Antonio Neira Bustos <cneirabus...@gmail.com> > --- > include/linux/bpf.h | 1 + > include/uapi/linux/bpf.h | 24 +++++++++++- > kernel/bpf/core.c | 1 + > kernel/bpf/helpers.c | 64 > +++++++++++++++++++++++++++++++ > kernel/trace/bpf_trace.c | 2 + > samples/bpf/Makefile | 3 ++ > samples/bpf/trace_ns_info_user.c | 35 +++++++++++++++++ > samples/bpf/trace_ns_info_user_kern.c | 45 ++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 24 +++++++++++- > tools/testing/selftests/bpf/bpf_helpers.h | 3 ++ > 10 files changed, 200 insertions(+), 2 deletions(-) > create mode 100644 samples/bpf/trace_ns_info_user.c > create mode 100644 samples/bpf/trace_ns_info_user_kern.c > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index cd8790d2c6ed..3f4b999f7c99 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -787,6 +787,7 @@ extern const struct bpf_func_proto bpf_get_stack_proto; > extern const struct bpf_func_proto bpf_sock_map_update_proto; > extern const struct bpf_func_proto bpf_sock_hash_update_proto; > extern const struct bpf_func_proto bpf_get_current_cgroup_id_proto; > +extern const struct bpf_func_proto bpf_get_current_pidns_info_proto; > > extern const struct bpf_func_proto bpf_get_local_storage_proto; > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index dd5758dc35d3..8462f9881465 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -2113,6 +2113,18 @@ union bpf_attr { > * the shared data. > * Return > * Pointer to the local storage area. > + * > + * int bpf_get_current_pidns_info(struct bpf_pidns_info *pidns, u32 > size_of_pidns) > + * Description > + * Copies into *pidns* pid, namespace id and tgid as seen by the > + * current namespace and also device from /proc/self/ns/pid. > + * *size_of_pidns* must be the size of *pidns* > + * > + * This helper is used when pid filtering is needed inside a > + * container as bpf_get_current_tgid() helper returns always the > + * pid id as seen by the root namespace. > + * Return > + * 0 on success -EINVAL on error. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -2196,7 +2208,8 @@ union bpf_attr { > FN(rc_keydown), \ > FN(skb_cgroup_id), \ > FN(get_current_cgroup_id), \ > - FN(get_local_storage), > + FN(get_local_storage), \ > + FN(get_current_pidns_info), > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > * function eBPF program intends to call > @@ -2724,4 +2737,13 @@ enum bpf_task_fd_type { > BPF_FD_TYPE_URETPROBE, /* filename + offset */ > }; > > +/* helper bpf_get_current_pidns_info will store the following > + * data, dev will contain major/minor from /proc/self/ns/pid. > + */ > +struct bpf_pidns_info { > + __u32 dev; > + __u32 nsid; > + __u32 tgid; > + __u32 pid; > +}; > #endif /* _UAPI__LINUX_BPF_H__ */ > diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c > index 4d09e610777f..98ce53ce2ea6 100644 > --- a/kernel/bpf/core.c > +++ b/kernel/bpf/core.c > @@ -1796,6 +1796,7 @@ const struct bpf_func_proto bpf_sock_map_update_proto > __weak; > const struct bpf_func_proto bpf_sock_hash_update_proto __weak; > const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; > const struct bpf_func_proto bpf_get_local_storage_proto __weak; > +const struct bpf_func_proto bpf_get_current_pidns_info __weak; > > const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) > { > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index 1991466b8327..d06d723b9cff 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -18,6 +18,9 @@ > #include <linux/sched.h> > #include <linux/uidgid.h> > #include <linux/filter.h> > +#include <linux/pid_namespace.h> > +#include <linux/major.h> > +#include <linux/stat.h> > > /* If kernel subsystem is allowing eBPF programs to call this function, > * inside its own verifier_ops->get_func_proto() callback it should return > @@ -214,3 +217,64 @@ const struct bpf_func_proto bpf_get_local_storage_proto > = { > .arg2_type = ARG_ANYTHING, > }; > #endif > + > +BPF_CALL_2(bpf_get_current_pidns_info, struct bpf_pidns_info *, pidns_info, > u32, > + size) > +{ > + const char *ppath = "/proc/self/ns/pid"; > + struct pid_namespace *pidns = NULL; > + mm_segment_t oldsegfs; > + struct kstat stat; > + pid_t tgid = 0; > + pid_t pid = 0; > + int res = 0; > + > + if (unlikely(size != sizeof(struct bpf_pidns_info))) > + goto clear; > + > + pidns = task_active_pid_ns(current); > + > + if (unlikely(!pidns)) > + goto clear; > + > + pidns_info->nsid = pidns->ns.inum; > + pid = task_pid_nr_ns(current, pidns); > + > + if (unlikely(!pid)) > + goto clear; > + > + tgid = task_tgid_nr_ns(current, pidns); > + > + if (unlikely(!tgid)) > + goto clear; > + > + pidns_info->tgid = (u32) tgid; > + pidns_info->pid = (u32) pid; > + > + oldsegfs = get_fs(); > + set_fs(KERNEL_DS); > + res = vfs_stat((const char __user *)ppath, &stat); > + set_fs(oldsegfs);
such fs magic cannot be done from the helper. please find a way to retrieve device differently. > + > + if (unlikely(res)) > + goto clear; > + > + pidns_info->dev = stat.dev; > + > + return 0; > + > +clear: > + if (pidns_info) > + memset((void *)pidns, 0, (size_t) size); > + > + return -EINVAL; > +} > + > +const struct bpf_func_proto bpf_get_current_pidns_info_proto = { > + .func = bpf_get_current_pidns_info, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_PTR_TO_UNINIT_MEM, > + .arg2_type = ARG_CONST_SIZE, > +}; > + > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 0ae6829804bc..f70be29e49ab 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -568,6 +568,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct > bpf_prog *prog) > case BPF_FUNC_get_current_cgroup_id: > return &bpf_get_current_cgroup_id_proto; > #endif > + case BPF_FUNC_get_current_pidns_info: > + return &bpf_get_current_pidns_info_proto; > default: > return NULL; > } > diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile > index f88d5683d6ee..fdcde00554ce 100644 > --- a/samples/bpf/Makefile > +++ b/samples/bpf/Makefile > @@ -53,6 +53,7 @@ hostprogs-y += xdpsock > hostprogs-y += xdp_fwd > hostprogs-y += task_fd_query > hostprogs-y += xdp_sample_pkts > +hostprogs-y += trace_ns_info > > # Libbpf dependencies > LIBBPF = $(TOOLS_PATH)/lib/bpf/libbpf.a > @@ -109,6 +110,7 @@ xdpsock-objs := xdpsock_user.o > xdp_fwd-objs := xdp_fwd_user.o > task_fd_query-objs := bpf_load.o task_fd_query_user.o $(TRACE_HELPERS) > xdp_sample_pkts-objs := xdp_sample_pkts_user.o $(TRACE_HELPERS) > +trace_ns_info-objs := bpf_load.o trace_ns_info_user.o > > # Tell kbuild to always build the programs > always := $(hostprogs-y) > @@ -166,6 +168,7 @@ always += xdpsock_kern.o > always += xdp_fwd_kern.o > always += task_fd_query_kern.o > always += xdp_sample_pkts_kern.o > +always += trace_ns_info_user_kern.o > > HOSTCFLAGS += -I$(objtree)/usr/include > HOSTCFLAGS += -I$(srctree)/tools/lib/ > diff --git a/samples/bpf/trace_ns_info_user.c > b/samples/bpf/trace_ns_info_user.c > new file mode 100644 > index 000000000000..e06d08db6f30 > --- /dev/null > +++ b/samples/bpf/trace_ns_info_user.c please split addition of sample code into separate patch and convert it into selftests. > @@ -0,0 +1,35 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2018 Carlos Neira cneirabus...@gmail.com > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of version 2 of the GNU General Public > + * License as published by the Free Software Foundation. > + */ > + > +#include <stdio.h> > +#include <linux/bpf.h> > +#include <unistd.h> > +#include "bpf/libbpf.h" > +#include "bpf_load.h" > + > +/* This code was taken verbatim from tracex1_user.c, it's used > + * to exercize bpf_get_current_pidns_info() helper call. > + */ > +int main(int ac, char **argv) > +{ > + FILE *f; > + char filename[256]; > + > + snprintf(filename, sizeof(filename), "%s_user_kern.o", argv[0]); > + printf("loading %s\n", filename); > + > + if (load_bpf_file(filename)) { > + printf("%s", bpf_log_buf); > + return 1; > + } > + > + f = popen("taskset 1 ping localhost", "r"); > + (void) f; > + read_trace_pipe(); > + return 0; > +} > diff --git a/samples/bpf/trace_ns_info_user_kern.c > b/samples/bpf/trace_ns_info_user_kern.c > new file mode 100644 > index 000000000000..ceaf3e83c8e7 > --- /dev/null > +++ b/samples/bpf/trace_ns_info_user_kern.c > @@ -0,0 +1,45 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2018 Carlos Neira cneirabus...@gmail.com > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of version 2 of the GNU General Public > + * License as published by the Free Software Foundation. > + */ > +#include <linux/skbuff.h> > +#include <linux/netdevice.h> > +#include <linux/version.h> > +#include <uapi/linux/bpf.h> > +#include "bpf_helpers.h" > + > +typedef __u64 u64; > +typedef __u32 u32; > + > + > +/* kprobe is NOT a stable ABI > + * kernel functions can be removed, renamed or completely change semantics. > + * Number of arguments and their positions can change, etc. > + * In such case this bpf+kprobe example will no longer be meaningful > + */ > + > +/* This will call bpf_get_current_pidns_info() to display pid and ns values > + * as seen by the current namespace, on the far left you will see the pid as > + * seen as by the root namespace. > + */ > + > +SEC("kprobe/__netif_receive_skb_core") > +int bpf_prog1(struct pt_regs *ctx) > +{ > + char fmt[] = "nsid:%u, dev: %u, pid:%u\n"; > + struct bpf_pidns_info nsinfo; > + int ok = 0; > + > + ok = bpf_get_current_pidns_info(&nsinfo, sizeof(nsinfo)); > + if (ok == 0) > + bpf_trace_printk(fmt, sizeof(fmt), (u32)nsinfo.nsid, > + (u32) nsinfo.dev, (u32)nsinfo.pid); > + > + return 0; > +} > + > +char _license[] SEC("license") = "GPL"; > +u32 _version SEC("version") = LINUX_VERSION_CODE; > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index dd5758dc35d3..8462f9881465 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h update to tools/../bpf.h should be separate patch as well. At the end it should be: p1 - feature introduction p2 - update tools/.../bpf.h p3 - selftest