The RU/TASK_RUNNING stat means the task is runnable.
It is either currently running or on a run queue waiting to run.

Currently, the crash tool uses the "rq_clock - sched_info->last_arrival" 
formula to
calculate the duration of task in RU state. This is for the scenario of a task 
running on a CPU.

But for the scenario of a task waiting in the  CPU run queue (due to some reason
for example cfs/rt queue throttled), this formula could cause misunderstanding.

For example:
[ 220 10:36:38.026] [RU]  PID: 12345   TASK: ffff8d674ab6b180  CPU: 1   
COMMAND: "task"

Looking closer:

crash> rq.clock ffff8de438a5acc0
  clock = 87029229985307234,

crash> task -R sched_info,se.exec_start
PID: 12345   TASK: ffff8d674ab6b180  CPU: 1     COMMAND: "task"
  sched_info = {
        pcount = 33,
        run_delay = 0,
        last_arrival = 67983031958439673,
        last_queued = 87029224561119369
  },
  se.exec_start = 67983031958476937,

67983031         67983031                 87029224              87029229
|<-   running on CPU  ->| <-      IN    ->|<-    waiting in queue    ->|

For this scenario, the "task" was waiting in the run queue of the CPU only for 
5 seconds,
we should use the "rq_clock - sched_info->last_queued" formula.

We can trust sched_info->last_queued as it is only set when the task enters the 
CPU run queue.
Furthermore, when the task hits/runs on a CPU or dequeues the CPU run queue, it 
will be reset to 0.

Therefore, my idea is simple:

If a task in RU stat and sched_info->last_queued has value (!= 0),
it means this task is waiting in the run queue, use "rq_clock - 
sched_info->last_queued".

Otherwise, if a task in RU stat and sched_info->last_queued = 0
and sched_info->last_arrival has value (it must be), it means this task is 
running on the CPU,
use "rq_clock - sched_info->last_arrival".

Signed-off-by: Kenneth Yin <k...@redhat.com>
---
 defs.h    |  1 +
 symbols.c |  2 ++
 task.c    | 21 +++++++++++++++------
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/defs.h b/defs.h
index 4cf169c..66f5ce4 100644
--- a/defs.h
+++ b/defs.h
@@ -1787,6 +1787,7 @@ struct offset_table {                    /* stash of 
commonly-used offsets */
        long vcpu_struct_rq;
        long task_struct_sched_info;
        long sched_info_last_arrival;
+       long sched_info_last_queued;
        long page_objects;
        long kmem_cache_oo;
        long char_device_struct_cdev;
diff --git a/symbols.c b/symbols.c
index e30fafe..fb5035f 100644
--- a/symbols.c
+++ b/symbols.c
@@ -9930,6 +9930,8 @@ dump_offset_table(char *spec, ulong makestruct)
                 OFFSET(sched_rt_entity_run_list));
        fprintf(fp, "       sched_info_last_arrival: %ld\n",
                 OFFSET(sched_info_last_arrival));
+       fprintf(fp, "       sched_info_last_queued: %ld\n",
+               OFFSET(sched_info_last_queued));
         fprintf(fp, "       task_struct_thread_info: %ld\n",
                 OFFSET(task_struct_thread_info));
         fprintf(fp, "             task_struct_stack: %ld\n",
diff --git a/task.c b/task.c
index 3bafe79..f5386ac 100644
--- a/task.c
+++ b/task.c
@@ -332,9 +332,12 @@ task_init(void)
         MEMBER_OFFSET_INIT(task_struct_last_run, "task_struct", "last_run");
         MEMBER_OFFSET_INIT(task_struct_timestamp, "task_struct", "timestamp");
         MEMBER_OFFSET_INIT(task_struct_sched_info, "task_struct", 
"sched_info");
-       if (VALID_MEMBER(task_struct_sched_info))
+       if (VALID_MEMBER(task_struct_sched_info)) {
                MEMBER_OFFSET_INIT(sched_info_last_arrival, 
                        "sched_info", "last_arrival");
+               MEMBER_OFFSET_INIT(sched_info_last_queued,
+                       "sched_info", "last_queued");
+       }
        if (VALID_MEMBER(task_struct_last_run) || 
            VALID_MEMBER(task_struct_timestamp) ||
            VALID_MEMBER(sched_info_last_arrival)) {
@@ -6035,7 +6038,7 @@ ulonglong
 task_last_run(ulong task)
 {
         ulong last_run;
-       ulonglong timestamp;
+       ulonglong timestamp,last_queued;
 
        timestamp = 0;
         fill_task_struct(task);
@@ -6047,10 +6050,16 @@ task_last_run(ulong task)
        } else if (VALID_MEMBER(task_struct_timestamp))
                timestamp = tt->last_task_read ?  ULONGLONG(tt->task_struct + 
                        OFFSET(task_struct_timestamp)) : 0;
-       else if (VALID_MEMBER(sched_info_last_arrival))
-               timestamp = tt->last_task_read ?  ULONGLONG(tt->task_struct + 
-                       OFFSET(task_struct_sched_info) + 
-                       OFFSET(sched_info_last_arrival)) : 0;
+       else if (VALID_MEMBER(sched_info_last_queued)) 
+               last_queued = ULONGLONG(tt->task_struct +
+                       OFFSET(task_struct_sched_info) +
+                       OFFSET(sched_info_last_queued));
+               if (last_queued != 0) {
+                       timestamp = tt->last_task_read ? last_queued : 0;
+               } else if (VALID_MEMBER(sched_info_last_arrival))
+                       timestamp = tt->last_task_read ?  
ULONGLONG(tt->task_struct + 
+                               OFFSET(task_struct_sched_info) + 
+                               OFFSET(sched_info_last_arrival)) : 0;
        
         return timestamp;
 }
-- 
2.31.1
--
Crash-utility mailing list -- devel@lists.crash-utility.osci.io
To unsubscribe send an email to devel-le...@lists.crash-utility.osci.io
https://${domain_name}/admin/lists/devel.lists.crash-utility.osci.io/
Contribution Guidelines: https://github.com/crash-utility/crash/wiki

Reply via email to