Re: [PATCH v3 6/7] tracing: Add snapshot action
On Fri, 2018-08-10 at 16:04 +0900, Namhyung Kim wrote: > On Thu, Aug 09, 2018 at 09:34:16AM -0500, Tom Zanussi wrote: > > From: Tom Zanussi > > > > Add support for hist:handlerXXX($var).snapshot(), which will take a > > snapshot of the current trace buffer whenever handlerXXX is hit. > > > > As a first user, this also adds snapshot() action support for the > > onmax() handler i.e. hist:onmax($var).snapshot(). > > > > Signed-off-by: Tom Zanussi > > --- > > [SNIP] > > +static struct track_data *track_data_alloc(unsigned int key_len, > > + struct action_data > > *action_data, > > + struct > > hist_trigger_data *hist_data) > > +{ > > + struct track_data *data = kzalloc(sizeof(*data), > > GFP_KERNEL); > > + unsigned int size = TASK_COMM_LEN; > > + struct hist_elt_data *elt_data; > > + > > + if (!data) > > + return ERR_PTR(-ENOMEM); > > + > > + data->key = kzalloc(key_len, GFP_KERNEL); > > + if (!data->key) { > > + track_data_free(data); > > + return ERR_PTR(-ENOMEM); > > + } > > + > > + data->key_len = key_len; > > + data->action_data = action_data; > > + data->hist_data = hist_data; > > + > > + elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); > > + if (!elt_data) { > > + track_data_free(data); > > + return ERR_PTR(-ENOMEM); > > + } > > + elt_data->comm = kzalloc(size, GFP_KERNEL); > > + if (!elt_data->comm) { > > + track_data_free(data); > > It seems to leak 'elf_data' here. > Yep, it does - thanks for pointing that out. Tom > Thanks, > Namhyung > > > > + return ERR_PTR(-ENOMEM); > > + } > > + > > + data->elt.private_data = elt_data; > > + > > + return data; > > +}
Re: [PATCH v3 6/7] tracing: Add snapshot action
On Fri, 2018-08-10 at 16:04 +0900, Namhyung Kim wrote: > On Thu, Aug 09, 2018 at 09:34:16AM -0500, Tom Zanussi wrote: > > From: Tom Zanussi > > > > Add support for hist:handlerXXX($var).snapshot(), which will take a > > snapshot of the current trace buffer whenever handlerXXX is hit. > > > > As a first user, this also adds snapshot() action support for the > > onmax() handler i.e. hist:onmax($var).snapshot(). > > > > Signed-off-by: Tom Zanussi > > --- > > [SNIP] > > +static struct track_data *track_data_alloc(unsigned int key_len, > > + struct action_data > > *action_data, > > + struct > > hist_trigger_data *hist_data) > > +{ > > + struct track_data *data = kzalloc(sizeof(*data), > > GFP_KERNEL); > > + unsigned int size = TASK_COMM_LEN; > > + struct hist_elt_data *elt_data; > > + > > + if (!data) > > + return ERR_PTR(-ENOMEM); > > + > > + data->key = kzalloc(key_len, GFP_KERNEL); > > + if (!data->key) { > > + track_data_free(data); > > + return ERR_PTR(-ENOMEM); > > + } > > + > > + data->key_len = key_len; > > + data->action_data = action_data; > > + data->hist_data = hist_data; > > + > > + elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); > > + if (!elt_data) { > > + track_data_free(data); > > + return ERR_PTR(-ENOMEM); > > + } > > + elt_data->comm = kzalloc(size, GFP_KERNEL); > > + if (!elt_data->comm) { > > + track_data_free(data); > > It seems to leak 'elf_data' here. > Yep, it does - thanks for pointing that out. Tom > Thanks, > Namhyung > > > > + return ERR_PTR(-ENOMEM); > > + } > > + > > + data->elt.private_data = elt_data; > > + > > + return data; > > +}
Re: [PATCH v3 6/7] tracing: Add snapshot action
On Thu, Aug 09, 2018 at 09:34:16AM -0500, Tom Zanussi wrote: > From: Tom Zanussi > > Add support for hist:handlerXXX($var).snapshot(), which will take a > snapshot of the current trace buffer whenever handlerXXX is hit. > > As a first user, this also adds snapshot() action support for the > onmax() handler i.e. hist:onmax($var).snapshot(). > > Signed-off-by: Tom Zanussi > --- [SNIP] > +static struct track_data *track_data_alloc(unsigned int key_len, > +struct action_data *action_data, > +struct hist_trigger_data *hist_data) > +{ > + struct track_data *data = kzalloc(sizeof(*data), GFP_KERNEL); > + unsigned int size = TASK_COMM_LEN; > + struct hist_elt_data *elt_data; > + > + if (!data) > + return ERR_PTR(-ENOMEM); > + > + data->key = kzalloc(key_len, GFP_KERNEL); > + if (!data->key) { > + track_data_free(data); > + return ERR_PTR(-ENOMEM); > + } > + > + data->key_len = key_len; > + data->action_data = action_data; > + data->hist_data = hist_data; > + > + elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); > + if (!elt_data) { > + track_data_free(data); > + return ERR_PTR(-ENOMEM); > + } > + elt_data->comm = kzalloc(size, GFP_KERNEL); > + if (!elt_data->comm) { > + track_data_free(data); It seems to leak 'elf_data' here. Thanks, Namhyung > + return ERR_PTR(-ENOMEM); > + } > + > + data->elt.private_data = elt_data; > + > + return data; > +}
Re: [PATCH v3 6/7] tracing: Add snapshot action
On Thu, Aug 09, 2018 at 09:34:16AM -0500, Tom Zanussi wrote: > From: Tom Zanussi > > Add support for hist:handlerXXX($var).snapshot(), which will take a > snapshot of the current trace buffer whenever handlerXXX is hit. > > As a first user, this also adds snapshot() action support for the > onmax() handler i.e. hist:onmax($var).snapshot(). > > Signed-off-by: Tom Zanussi > --- [SNIP] > +static struct track_data *track_data_alloc(unsigned int key_len, > +struct action_data *action_data, > +struct hist_trigger_data *hist_data) > +{ > + struct track_data *data = kzalloc(sizeof(*data), GFP_KERNEL); > + unsigned int size = TASK_COMM_LEN; > + struct hist_elt_data *elt_data; > + > + if (!data) > + return ERR_PTR(-ENOMEM); > + > + data->key = kzalloc(key_len, GFP_KERNEL); > + if (!data->key) { > + track_data_free(data); > + return ERR_PTR(-ENOMEM); > + } > + > + data->key_len = key_len; > + data->action_data = action_data; > + data->hist_data = hist_data; > + > + elt_data = kzalloc(sizeof(*elt_data), GFP_KERNEL); > + if (!elt_data) { > + track_data_free(data); > + return ERR_PTR(-ENOMEM); > + } > + elt_data->comm = kzalloc(size, GFP_KERNEL); > + if (!elt_data->comm) { > + track_data_free(data); It seems to leak 'elf_data' here. Thanks, Namhyung > + return ERR_PTR(-ENOMEM); > + } > + > + data->elt.private_data = elt_data; > + > + return data; > +}
[PATCH v3 6/7] tracing: Add snapshot action
From: Tom Zanussi Add support for hist:handlerXXX($var).snapshot(), which will take a snapshot of the current trace buffer whenever handlerXXX is hit. As a first user, this also adds snapshot() action support for the onmax() handler i.e. hist:onmax($var).snapshot(). Signed-off-by: Tom Zanussi --- Documentation/trace/histogram.txt | 109 ++ kernel/trace/trace_events_hist.c | 289 -- 2 files changed, 390 insertions(+), 8 deletions(-) diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt index 7ffea6aa22e3..125bf360e9be 100644 --- a/Documentation/trace/histogram.txt +++ b/Documentation/trace/histogram.txt @@ -1994,6 +1994,115 @@ hist trigger specification. Entries: 2 Dropped: 0 + - onmax(var).snapshot() + +The 'onmax(var).snapshot()' hist trigger action is invoked +whenever the value of 'var' associated with a histogram entry +exceeds the current maximum contained in that variable. + +The end result is that a global snapshot of the trace buffer will +be saved in the tracing/snapshot file if 'var' exceeds the current +maximum for any hist trigger entry. + +Note that in this case the maximum is a global maximum for the +current trace instance, which is the maximum across all buckets of +the histogram. The key of the specific trace event that caused +the global maximum and the global maximum itself are displayed, +along with a message stating that a snapshot has been taken and +where to find it. The user can use the key information displayed +to locate the corresponding bucket in the histogram for even more +detail. + +As an example the below defines a couple of hist triggers, one for +sched_waking and another for sched_switch, keyed on pid. Whenever +a sched_waking event occurs, the timestamp is saved in the entry +corresponding to the current pid, and when the scheduler switches +back to that pid, the timestamp difference is calculated. If the +resulting latency, stored in wakeup_lat, exceeds the current +maximum latency, a snapshot is taken. As part of the setup, all +the scheduler events are also enabled, which are the events that +will show up in the snapshot when it is taken at some point: + +# echo 1 > /sys/kernel/debug/tracing/events/sched/enable + +# echo 'hist:keys=pid:ts0=common_timestamp.usecs \ +if comm=="cyclictest"' >> \ +/sys/kernel/debug/tracing/events/sched/sched_waking/trigger + +# echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: \ +onmax($wakeup_lat).save(next_prio,next_comm,prev_pid,prev_prio, \ + prev_comm):onmax($wakeup_lat).snapshot() \ + if next_comm=="cyclictest"' >> \ + /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + +When the histogram is displayed, for each bucket the max value +and the saved values corresponding to the max are displayed +following the rest of the fields. + +If a snaphot was taken, there is also a message indicating that, +along with the value and event that triggered the global maximum: + +# cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist + { next_pid: 2101 } hitcount:200 + max: 52 next_prio:120 next_comm: cyclictest \ +prev_pid: 0 prev_prio:120 prev_comm: swapper/6 + + { next_pid: 2103 } hitcount: 1326 + max:572 next_prio: 19 next_comm: cyclictest \ +prev_pid: 0 prev_prio:120 prev_comm: swapper/1 + + { next_pid: 2102 } hitcount: 1982 \ + max: 74 next_prio: 19 next_comm: cyclictest \ +prev_pid: 0 prev_prio:120 prev_comm: swapper/5 + +Snapshot taken (see tracing/snapshot). Details: + triggering value { onmax($wakeup_lat) }:572 \ + triggered by event with key: { next_pid: 2103 } + +Totals: +Hits: 3508 +Entries: 3 +Dropped: 0 + +In the above case, the event that triggered the global maximum has +the key with next_pid == 2103. If you look at the bucket that has +2103 as the key, you'll find the additional values save()'d along +with the local maximum for that bucket, which should be the same +as the global maximum (since that was the same value that +triggered the global snapshot). + +And finally, looking at the snapshot data should show at or near +the end the event that triggered the snapshot (in this case you +can verify the timestamps between the sched_waking and +sched_switch events, which should match the time displayed in the +global maximum): + +# cat /sys/kernel/debug/tracing/snapshot + + <...>-2103 [005] d..3 309.873125: sched_switch: prev_comm=cyclictest prev_pid=2103 prev_prio=19
[PATCH v3 6/7] tracing: Add snapshot action
From: Tom Zanussi Add support for hist:handlerXXX($var).snapshot(), which will take a snapshot of the current trace buffer whenever handlerXXX is hit. As a first user, this also adds snapshot() action support for the onmax() handler i.e. hist:onmax($var).snapshot(). Signed-off-by: Tom Zanussi --- Documentation/trace/histogram.txt | 109 ++ kernel/trace/trace_events_hist.c | 289 -- 2 files changed, 390 insertions(+), 8 deletions(-) diff --git a/Documentation/trace/histogram.txt b/Documentation/trace/histogram.txt index 7ffea6aa22e3..125bf360e9be 100644 --- a/Documentation/trace/histogram.txt +++ b/Documentation/trace/histogram.txt @@ -1994,6 +1994,115 @@ hist trigger specification. Entries: 2 Dropped: 0 + - onmax(var).snapshot() + +The 'onmax(var).snapshot()' hist trigger action is invoked +whenever the value of 'var' associated with a histogram entry +exceeds the current maximum contained in that variable. + +The end result is that a global snapshot of the trace buffer will +be saved in the tracing/snapshot file if 'var' exceeds the current +maximum for any hist trigger entry. + +Note that in this case the maximum is a global maximum for the +current trace instance, which is the maximum across all buckets of +the histogram. The key of the specific trace event that caused +the global maximum and the global maximum itself are displayed, +along with a message stating that a snapshot has been taken and +where to find it. The user can use the key information displayed +to locate the corresponding bucket in the histogram for even more +detail. + +As an example the below defines a couple of hist triggers, one for +sched_waking and another for sched_switch, keyed on pid. Whenever +a sched_waking event occurs, the timestamp is saved in the entry +corresponding to the current pid, and when the scheduler switches +back to that pid, the timestamp difference is calculated. If the +resulting latency, stored in wakeup_lat, exceeds the current +maximum latency, a snapshot is taken. As part of the setup, all +the scheduler events are also enabled, which are the events that +will show up in the snapshot when it is taken at some point: + +# echo 1 > /sys/kernel/debug/tracing/events/sched/enable + +# echo 'hist:keys=pid:ts0=common_timestamp.usecs \ +if comm=="cyclictest"' >> \ +/sys/kernel/debug/tracing/events/sched/sched_waking/trigger + +# echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: \ +onmax($wakeup_lat).save(next_prio,next_comm,prev_pid,prev_prio, \ + prev_comm):onmax($wakeup_lat).snapshot() \ + if next_comm=="cyclictest"' >> \ + /sys/kernel/debug/tracing/events/sched/sched_switch/trigger + +When the histogram is displayed, for each bucket the max value +and the saved values corresponding to the max are displayed +following the rest of the fields. + +If a snaphot was taken, there is also a message indicating that, +along with the value and event that triggered the global maximum: + +# cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist + { next_pid: 2101 } hitcount:200 + max: 52 next_prio:120 next_comm: cyclictest \ +prev_pid: 0 prev_prio:120 prev_comm: swapper/6 + + { next_pid: 2103 } hitcount: 1326 + max:572 next_prio: 19 next_comm: cyclictest \ +prev_pid: 0 prev_prio:120 prev_comm: swapper/1 + + { next_pid: 2102 } hitcount: 1982 \ + max: 74 next_prio: 19 next_comm: cyclictest \ +prev_pid: 0 prev_prio:120 prev_comm: swapper/5 + +Snapshot taken (see tracing/snapshot). Details: + triggering value { onmax($wakeup_lat) }:572 \ + triggered by event with key: { next_pid: 2103 } + +Totals: +Hits: 3508 +Entries: 3 +Dropped: 0 + +In the above case, the event that triggered the global maximum has +the key with next_pid == 2103. If you look at the bucket that has +2103 as the key, you'll find the additional values save()'d along +with the local maximum for that bucket, which should be the same +as the global maximum (since that was the same value that +triggered the global snapshot). + +And finally, looking at the snapshot data should show at or near +the end the event that triggered the snapshot (in this case you +can verify the timestamps between the sched_waking and +sched_switch events, which should match the time displayed in the +global maximum): + +# cat /sys/kernel/debug/tracing/snapshot + + <...>-2103 [005] d..3 309.873125: sched_switch: prev_comm=cyclictest prev_pid=2103 prev_prio=19