Store preempting context switch out event into Perf trace as a part of 
PERF_RECORD_SWITCH[_CPU_WIDE] record.

Percentage of preempting and non-preempting context switches help 
understanding the nature of workloads (CPU or IO bound) that are running 
on a machine;

The event is treated as preemption one when task->state value of the 
thread being switched out is TASK_RUNNING. Event type encoding is 
implemented using PERF_RECORD_MISC_SWITCH_OUT_PREEMPT bit;
        
Signed-off-by: Alexey Budankov <[email protected]>
---
 include/uapi/linux/perf_event.h       | 4 ++++
 kernel/events/core.c                  | 4 ++++
 tools/include/uapi/linux/perf_event.h | 4 ++++
 3 files changed, 12 insertions(+)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 912b85b52344..cd6ad7e13824 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -655,6 +655,10 @@ struct perf_event_mmap_page {
  * perf_event_attr::precise_ip.
  */
 #define PERF_RECORD_MISC_EXACT_IP              (1 << 14)
+/*
+ * Indicates that thread was preempted in TASK_RUNNING state
+ */
+#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT    (1 << 14)
 /*
  * Reserve the last bit to indicate some extended misc field
  */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 7517b4fb3ef4..6b760e785116 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7584,6 +7584,10 @@ static void perf_event_switch(struct task_struct *task,
                },
        };
 
+       if (!sched_in && task->state == TASK_RUNNING)
+               switch_event.event_id.header.misc |=
+                               PERF_RECORD_MISC_SWITCH_OUT_PREEMPT;
+
        perf_iterate_sb(perf_event_switch_output,
                       &switch_event,
                       NULL);
diff --git a/tools/include/uapi/linux/perf_event.h 
b/tools/include/uapi/linux/perf_event.h
index 912b85b52344..cd6ad7e13824 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -655,6 +655,10 @@ struct perf_event_mmap_page {
  * perf_event_attr::precise_ip.
  */
 #define PERF_RECORD_MISC_EXACT_IP              (1 << 14)
+/*
+ * Indicates that thread was preempted in TASK_RUNNING state
+ */
+#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT    (1 << 14)
 /*
  * Reserve the last bit to indicate some extended misc field
  */

Reply via email to