Re: [PATCH v3 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info
Hari Bathiniwrites: > Hi Eric, > > > On Monday 12 December 2016 11:57 PM, Eric W. Biederman wrote: >> Hari Bathini writes: >> >>> With the advert of container technologies like docker, that depend >>> on namespaces for isolation, there is a need for tracing support for >>> namespaces. This patch introduces new PERF_RECORD_NAMESPACES event >>> for tracing based on namespaces related info. >>> diff --git a/include/uapi/linux/perf_event.h >>> b/include/uapi/linux/perf_event.h >>> index c66a485..2a48fc6 100644 >>> --- a/include/uapi/linux/perf_event.h >>> +++ b/include/uapi/linux/perf_event.h >>> @@ -344,7 +344,8 @@ struct perf_event_attr { >>> use_clockid: 1, /* use @clockid for time >>> fields */ >>> context_switch : 1, /* context switch data */ >>> write_backward : 1, /* Write ring buffer from >>> end to beginning */ >>> - __reserved_1 : 36; >>> + namespaces : 1, /* include namespaces data >>> */ >>> + __reserved_1 : 35; >>> union { >>> __u32 wakeup_events;/* wakeup every n events */ >>> @@ -610,6 +611,18 @@ struct perf_event_header { >>> __u16 size; >>> }; >>> +enum { >>> + NET_NS_INDEX= 0, >>> + UTS_NS_INDEX= 1, >>> + IPC_NS_INDEX= 2, >>> + PID_NS_INDEX= 3, >>> + USER_NS_INDEX = 4, >>> + MNT_NS_INDEX= 5, >>> + CGROUP_NS_INDEX = 6, >>> + >>> + NAMESPACES_MAX, /* maximum available namespaces */ >>> +}; >>> + >>> enum perf_event_type { >>> /* >>> @@ -862,6 +875,18 @@ enum perf_event_type { >>> */ >>> PERF_RECORD_SWITCH_CPU_WIDE = 15, >>> + /* >>> +* struct { >>> +* struct perf_event_headerheader; >>> +* >>> +* u32 pid, tid; >>> +* u64 dev_num; >>> +* u64 inode_num[NAMESPACES_MAX]; >> There needs to be one device number per inode. While it is true that >> today the device number is always the same. That is not necessarily so. >> I reserve the right to have the device number vary per namespace >> so that I don't need to implement a namespace of namespaces. >> >> These are st_dev and st_ino of the inode for the namespace. > > Do you mean.. > > st_dev = encode_dev(inode->i_sb->s_dev); ? > st_ino = inode->i_ino; ? Yes. I believe that is how those values make it to user space during a stat system call. Eric
Re: [PATCH v3 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info
Hari Bathini writes: > Hi Eric, > > > On Monday 12 December 2016 11:57 PM, Eric W. Biederman wrote: >> Hari Bathini writes: >> >>> With the advert of container technologies like docker, that depend >>> on namespaces for isolation, there is a need for tracing support for >>> namespaces. This patch introduces new PERF_RECORD_NAMESPACES event >>> for tracing based on namespaces related info. >>> diff --git a/include/uapi/linux/perf_event.h >>> b/include/uapi/linux/perf_event.h >>> index c66a485..2a48fc6 100644 >>> --- a/include/uapi/linux/perf_event.h >>> +++ b/include/uapi/linux/perf_event.h >>> @@ -344,7 +344,8 @@ struct perf_event_attr { >>> use_clockid: 1, /* use @clockid for time >>> fields */ >>> context_switch : 1, /* context switch data */ >>> write_backward : 1, /* Write ring buffer from >>> end to beginning */ >>> - __reserved_1 : 36; >>> + namespaces : 1, /* include namespaces data >>> */ >>> + __reserved_1 : 35; >>> union { >>> __u32 wakeup_events;/* wakeup every n events */ >>> @@ -610,6 +611,18 @@ struct perf_event_header { >>> __u16 size; >>> }; >>> +enum { >>> + NET_NS_INDEX= 0, >>> + UTS_NS_INDEX= 1, >>> + IPC_NS_INDEX= 2, >>> + PID_NS_INDEX= 3, >>> + USER_NS_INDEX = 4, >>> + MNT_NS_INDEX= 5, >>> + CGROUP_NS_INDEX = 6, >>> + >>> + NAMESPACES_MAX, /* maximum available namespaces */ >>> +}; >>> + >>> enum perf_event_type { >>> /* >>> @@ -862,6 +875,18 @@ enum perf_event_type { >>> */ >>> PERF_RECORD_SWITCH_CPU_WIDE = 15, >>> + /* >>> +* struct { >>> +* struct perf_event_headerheader; >>> +* >>> +* u32 pid, tid; >>> +* u64 dev_num; >>> +* u64 inode_num[NAMESPACES_MAX]; >> There needs to be one device number per inode. While it is true that >> today the device number is always the same. That is not necessarily so. >> I reserve the right to have the device number vary per namespace >> so that I don't need to implement a namespace of namespaces. >> >> These are st_dev and st_ino of the inode for the namespace. > > Do you mean.. > > st_dev = encode_dev(inode->i_sb->s_dev); ? > st_ino = inode->i_ino; ? Yes. I believe that is how those values make it to user space during a stat system call. Eric
Re: [PATCH v3 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info
Hi Eric, On Monday 12 December 2016 11:57 PM, Eric W. Biederman wrote: Hari Bathiniwrites: With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for tracing based on namespaces related info. diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485..2a48fc6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid: 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events;/* wakeup every n events */ @@ -610,6 +611,18 @@ struct perf_event_header { __u16 size; }; +enum { + NET_NS_INDEX= 0, + UTS_NS_INDEX= 1, + IPC_NS_INDEX= 2, + PID_NS_INDEX= 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX= 5, + CGROUP_NS_INDEX = 6, + + NAMESPACES_MAX, /* maximum available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +875,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* +* struct { +* struct perf_event_headerheader; +* +* u32 pid, tid; +* u64 dev_num; +* u64 inode_num[NAMESPACES_MAX]; There needs to be one device number per inode. While it is true that today the device number is always the same. That is not necessarily so. I reserve the right to have the device number vary per namespace so that I don't need to implement a namespace of namespaces. These are st_dev and st_ino of the inode for the namespace. Do you mean.. st_dev = encode_dev(inode->i_sb->s_dev); ? st_ino = inode->i_ino; ? Thanks Hari
Re: [PATCH v3 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info
Hi Eric, On Monday 12 December 2016 11:57 PM, Eric W. Biederman wrote: Hari Bathini writes: With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for tracing based on namespaces related info. diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485..2a48fc6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid: 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events;/* wakeup every n events */ @@ -610,6 +611,18 @@ struct perf_event_header { __u16 size; }; +enum { + NET_NS_INDEX= 0, + UTS_NS_INDEX= 1, + IPC_NS_INDEX= 2, + PID_NS_INDEX= 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX= 5, + CGROUP_NS_INDEX = 6, + + NAMESPACES_MAX, /* maximum available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +875,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* +* struct { +* struct perf_event_headerheader; +* +* u32 pid, tid; +* u64 dev_num; +* u64 inode_num[NAMESPACES_MAX]; There needs to be one device number per inode. While it is true that today the device number is always the same. That is not necessarily so. I reserve the right to have the device number vary per namespace so that I don't need to implement a namespace of namespaces. These are st_dev and st_ino of the inode for the namespace. Do you mean.. st_dev = encode_dev(inode->i_sb->s_dev); ? st_ino = inode->i_ino; ? Thanks Hari
Re: [PATCH v3 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info
Hari Bathiniwrites: > With the advert of container technologies like docker, that depend > on namespaces for isolation, there is a need for tracing support for > namespaces. This patch introduces new PERF_RECORD_NAMESPACES event > for tracing based on namespaces related info. > diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h > index c66a485..2a48fc6 100644 > --- a/include/uapi/linux/perf_event.h > +++ b/include/uapi/linux/perf_event.h > @@ -344,7 +344,8 @@ struct perf_event_attr { > use_clockid: 1, /* use @clockid for time > fields */ > context_switch : 1, /* context switch data */ > write_backward : 1, /* Write ring buffer from > end to beginning */ > - __reserved_1 : 36; > + namespaces : 1, /* include namespaces data > */ > + __reserved_1 : 35; > > union { > __u32 wakeup_events;/* wakeup every n events */ > @@ -610,6 +611,18 @@ struct perf_event_header { > __u16 size; > }; > > +enum { > + NET_NS_INDEX= 0, > + UTS_NS_INDEX= 1, > + IPC_NS_INDEX= 2, > + PID_NS_INDEX= 3, > + USER_NS_INDEX = 4, > + MNT_NS_INDEX= 5, > + CGROUP_NS_INDEX = 6, > + > + NAMESPACES_MAX, /* maximum available namespaces */ > +}; > + > enum perf_event_type { > > /* > @@ -862,6 +875,18 @@ enum perf_event_type { >*/ > PERF_RECORD_SWITCH_CPU_WIDE = 15, > > + /* > + * struct { > + * struct perf_event_headerheader; > + * > + * u32 pid, tid; > + * u64 dev_num; > + * u64 inode_num[NAMESPACES_MAX]; There needs to be one device number per inode. While it is true that today the device number is always the same. That is not necessarily so. I reserve the right to have the device number vary per namespace so that I don't need to implement a namespace of namespaces. These are st_dev and st_ino of the inode for the namespace. > + * struct sample_idsample_id; > + * }; > + */ > + PERF_RECORD_NAMESPACES = 16, > + > PERF_RECORD_MAX,/* non-ABI */ > };
Re: [PATCH v3 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info
Hari Bathini writes: > With the advert of container technologies like docker, that depend > on namespaces for isolation, there is a need for tracing support for > namespaces. This patch introduces new PERF_RECORD_NAMESPACES event > for tracing based on namespaces related info. > diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h > index c66a485..2a48fc6 100644 > --- a/include/uapi/linux/perf_event.h > +++ b/include/uapi/linux/perf_event.h > @@ -344,7 +344,8 @@ struct perf_event_attr { > use_clockid: 1, /* use @clockid for time > fields */ > context_switch : 1, /* context switch data */ > write_backward : 1, /* Write ring buffer from > end to beginning */ > - __reserved_1 : 36; > + namespaces : 1, /* include namespaces data > */ > + __reserved_1 : 35; > > union { > __u32 wakeup_events;/* wakeup every n events */ > @@ -610,6 +611,18 @@ struct perf_event_header { > __u16 size; > }; > > +enum { > + NET_NS_INDEX= 0, > + UTS_NS_INDEX= 1, > + IPC_NS_INDEX= 2, > + PID_NS_INDEX= 3, > + USER_NS_INDEX = 4, > + MNT_NS_INDEX= 5, > + CGROUP_NS_INDEX = 6, > + > + NAMESPACES_MAX, /* maximum available namespaces */ > +}; > + > enum perf_event_type { > > /* > @@ -862,6 +875,18 @@ enum perf_event_type { >*/ > PERF_RECORD_SWITCH_CPU_WIDE = 15, > > + /* > + * struct { > + * struct perf_event_headerheader; > + * > + * u32 pid, tid; > + * u64 dev_num; > + * u64 inode_num[NAMESPACES_MAX]; There needs to be one device number per inode. While it is true that today the device number is always the same. That is not necessarily so. I reserve the right to have the device number vary per namespace so that I don't need to implement a namespace of namespaces. These are st_dev and st_ino of the inode for the namespace. > + * struct sample_idsample_id; > + * }; > + */ > + PERF_RECORD_NAMESPACES = 16, > + > PERF_RECORD_MAX,/* non-ABI */ > };
[PATCH v3 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info
With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for tracing based on namespaces related info. Signed-off-by: Hari Bathini--- Changes from v2: * Use time value from sample_id. include/linux/perf_event.h |2 + include/uapi/linux/perf_event.h | 27 kernel/events/core.c| 134 +++ kernel/fork.c |3 + kernel/nsproxy.c|5 + 5 files changed, 170 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4741ecd..42d8aa6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1110,6 +1110,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); +extern void perf_event_namespaces(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -1312,6 +1313,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } +static inline void perf_event_namespaces(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk){ } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void){ return -1; } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485..2a48fc6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid: 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events;/* wakeup every n events */ @@ -610,6 +611,18 @@ struct perf_event_header { __u16 size; }; +enum { + NET_NS_INDEX= 0, + UTS_NS_INDEX= 1, + IPC_NS_INDEX= 2, + PID_NS_INDEX= 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX= 5, + CGROUP_NS_INDEX = 6, + + NAMESPACES_MAX, /* maximum available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +875,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* +* struct { +* struct perf_event_headerheader; +* +* u32 pid, tid; +* u64 dev_num; +* u64 inode_num[NAMESPACES_MAX]; +* struct sample_idsample_id; +* }; +*/ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX,/* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 02c8421..eb9c812 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -46,6 +46,10 @@ #include #include #include +#include +#include +#include +#include #include "internal.h" @@ -375,6 +379,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; @@ -3882,6 +3887,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(_mmap_events); if (event->attr.comm) atomic_dec(_comm_events); + if (event->attr.namespaces) + atomic_dec(_namespaces_events); if (event->attr.task) atomic_dec(_task_events); if (event->attr.freq) @@ -6382,6 +6389,7 @@ static void perf_event_task(struct task_struct *task, void perf_event_fork(struct task_struct *task) { perf_event_task(task, NULL, 1); + perf_event_namespaces(task); } /* @@ -6484,6 +6492,125 @@ void perf_event_comm(struct task_struct *task, bool exec) } /* + * namespaces tracking + */ +
[PATCH v3 1/3] perf: add PERF_RECORD_NAMESPACES to include namespaces related info
With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for tracing based on namespaces related info. Signed-off-by: Hari Bathini --- Changes from v2: * Use time value from sample_id. include/linux/perf_event.h |2 + include/uapi/linux/perf_event.h | 27 kernel/events/core.c| 134 +++ kernel/fork.c |3 + kernel/nsproxy.c|5 + 5 files changed, 170 insertions(+), 1 deletion(-) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 4741ecd..42d8aa6 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1110,6 +1110,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); +extern void perf_event_namespaces(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -1312,6 +1313,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } +static inline void perf_event_namespaces(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk){ } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void){ return -1; } diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c66a485..2a48fc6 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -344,7 +344,8 @@ struct perf_event_attr { use_clockid: 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ write_backward : 1, /* Write ring buffer from end to beginning */ - __reserved_1 : 36; + namespaces : 1, /* include namespaces data */ + __reserved_1 : 35; union { __u32 wakeup_events;/* wakeup every n events */ @@ -610,6 +611,18 @@ struct perf_event_header { __u16 size; }; +enum { + NET_NS_INDEX= 0, + UTS_NS_INDEX= 1, + IPC_NS_INDEX= 2, + PID_NS_INDEX= 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX= 5, + CGROUP_NS_INDEX = 6, + + NAMESPACES_MAX, /* maximum available namespaces */ +}; + enum perf_event_type { /* @@ -862,6 +875,18 @@ enum perf_event_type { */ PERF_RECORD_SWITCH_CPU_WIDE = 15, + /* +* struct { +* struct perf_event_headerheader; +* +* u32 pid, tid; +* u64 dev_num; +* u64 inode_num[NAMESPACES_MAX]; +* struct sample_idsample_id; +* }; +*/ + PERF_RECORD_NAMESPACES = 16, + PERF_RECORD_MAX,/* non-ABI */ }; diff --git a/kernel/events/core.c b/kernel/events/core.c index 02c8421..eb9c812 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -46,6 +46,10 @@ #include #include #include +#include +#include +#include +#include #include "internal.h" @@ -375,6 +379,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; +static atomic_t nr_namespaces_events __read_mostly; static atomic_t nr_task_events __read_mostly; static atomic_t nr_freq_events __read_mostly; static atomic_t nr_switch_events __read_mostly; @@ -3882,6 +3887,8 @@ static void unaccount_event(struct perf_event *event) atomic_dec(_mmap_events); if (event->attr.comm) atomic_dec(_comm_events); + if (event->attr.namespaces) + atomic_dec(_namespaces_events); if (event->attr.task) atomic_dec(_task_events); if (event->attr.freq) @@ -6382,6 +6389,7 @@ static void perf_event_task(struct task_struct *task, void perf_event_fork(struct task_struct *task) { perf_event_task(task, NULL, 1); + perf_event_namespaces(task); } /* @@ -6484,6 +6492,125 @@ void perf_event_comm(struct task_struct *task, bool exec) } /* + * namespaces tracking + */ + +struct namespaces_event_id