This addition uses the existing syscall probes to record statistics
related to the OVS 'Unreasonably long ... ms poll interval' message.
Basically, it records the min/max/average time between system poll
calls. This can be used to determine if a long poll event has
occurred during the capture.

Signed-off-by: Eelco Chaudron <echau...@redhat.com>
---
The long line warning can be ignored. I'm keeping the original
output in the documentation.

v2: - Calculate the average on display, not per sample.
    - Enhanced the documentation with additional log details.
---
 utilities/usdt-scripts/kernel_delay.py  | 70 ++++++++++++++++++++++++-
 utilities/usdt-scripts/kernel_delay.rst | 17 +++++-
 2 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/utilities/usdt-scripts/kernel_delay.py 
b/utilities/usdt-scripts/kernel_delay.py
index 367d27e34..cbca7ef8b 100755
--- a/utilities/usdt-scripts/kernel_delay.py
+++ b/utilities/usdt-scripts/kernel_delay.py
@@ -182,11 +182,27 @@ struct syscall_data_key_t {
     u32 syscall;
 };
 
+struct long_poll_data_key_t {
+    u32 pid;
+    u32 tid;
+};
+
+struct long_poll_data_t {
+    u64 count;
+    u64 total_ns;
+    u64 min_ns;
+    u64 max_ns;
+};
+
 BPF_HASH(syscall_start, u64, u64);
 BPF_HASH(syscall_data, struct syscall_data_key_t, struct syscall_data_t);
+BPF_HASH(long_poll_start, u64, u64);
+BPF_HASH(long_poll_data, struct long_poll_data_key_t, struct long_poll_data_t);
 
 TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
     u64 pid_tgid = bpf_get_current_pid_tgid();
+    struct long_poll_data_t *val, init_val = {.min_ns = U64_MAX};
+    struct long_poll_data_key_t key;
 
     if (!capture_enabled(pid_tgid))
        return 0;
@@ -194,6 +210,29 @@ TRACEPOINT_PROBE(raw_syscalls, sys_enter) {
     u64 t = bpf_ktime_get_ns();
     syscall_start.update(&pid_tgid, &t);
 
+    /* Do long poll handling from here on. */
+    if (args->id != <SYSCALL_POLL_ID>)
+        return 0;
+
+    u64 *start_ns = long_poll_start.lookup(&pid_tgid);
+
+    if (!start_ns || *start_ns == 0)
+        return 0;
+
+    key.pid = pid_tgid >> 32;
+    key.tid = (u32)pid_tgid;
+
+    val = long_poll_data.lookup_or_try_init(&key, &init_val);
+    if (val) {
+        u64 delta = t - *start_ns;
+        val->count++;
+        val->total_ns += delta;
+        if (delta > val->max_ns)
+            val->max_ns = delta;
+        else if (delta < val->min_ns)
+            val->min_ns = delta;
+    }
+
     return 0;
 }
 
@@ -206,6 +245,12 @@ TRACEPOINT_PROBE(raw_syscalls, sys_exit) {
     if (!capture_enabled(pid_tgid))
        return 0;
 
+    u64 t = bpf_ktime_get_ns();
+
+    if (args->id == <SYSCALL_POLL_ID>) {
+        long_poll_start.update(&pid_tgid, &t);
+    }
+
     key.pid = pid_tgid >> 32;
     key.tid = (u32)pid_tgid;
     key.syscall = args->id;
@@ -217,7 +262,7 @@ TRACEPOINT_PROBE(raw_syscalls, sys_exit) {
 
     val = syscall_data.lookup_or_try_init(&key, &zero);
     if (val) {
-        u64 delta = bpf_ktime_get_ns() - *start_ns;
+        u64 delta = t - *start_ns;
         val->count++;
         val->total_ns += delta;
         if (delta > val->worst_ns)
@@ -1039,6 +1084,9 @@ def process_results(syscall_events=None, 
trigger_delta=None):
     threads_syscall = {k.tid for k, _ in bpf["syscall_data"].items()
                        if k.syscall != 0xffffffff}
 
+    threads_long_poll = {k.tid for k, _ in bpf["long_poll_data"].items()
+                         if k.pid != 0xffffffff}
+
     threads_run = {k.tid for k, _ in bpf["run_data"].items()
                    if k.pid != 0xffffffff}
 
@@ -1055,7 +1103,8 @@ def process_results(syscall_events=None, 
trigger_delta=None):
                        if k.pid != 0xffffffff}
 
     threads = sorted(threads_syscall | threads_run | threads_ready |
-                     threads_stopped | threads_hardirq | threads_softirq,
+                     threads_stopped | threads_hardirq | threads_softirq |
+                     threads_long_poll,
                      key=lambda x: get_thread_name(options.pid, x))
 
     #
@@ -1099,6 +1148,21 @@ def process_results(syscall_events=None, 
trigger_delta=None):
             print("{}{:20.20} {:6}  {:10}  {:16,}".format(
                 indent, "TOTAL( - poll):", "", total_count, total_ns))
 
+        #
+        # LONG POLL STATISTICS
+        #
+        for k, v in filter(lambda t: t[0].tid == thread,
+                           bpf["long_poll_data"].items()):
+
+            print("\n{:10} {:16} {}\n{}{:10}  {:>16}  {:>16}  {:>16}".format(
+                "", "", "[LONG POLL STATISTICS]", indent,
+                "COUNT", "AVERAGE ns", "MIN ns", "MAX ns"))
+
+            print("{}{:10}  {:16,}  {:16,}  {:16,}".format(
+                indent, v.count, int(v.total_ns / v.count),
+                v.min_ns, v.max_ns))
+            break
+
         #
         # THREAD RUN STATISTICS
         #
@@ -1429,6 +1493,8 @@ def main():
     source = source.replace("<STACK_TRACE_ENABLED>", "true"
                             if options.stack_trace_size > 0 else "false")
 
+    source = source.replace("<SYSCALL_POLL_ID>", str(poll_id))
+
     source = source.replace("<INSTALL_OVS_DP_UPCALL_PROBE>", "0"
                             if options.skip_upcall_stats else "1")
 
diff --git a/utilities/usdt-scripts/kernel_delay.rst 
b/utilities/usdt-scripts/kernel_delay.rst
index 41620d760..0a2a430a5 100644
--- a/utilities/usdt-scripts/kernel_delay.rst
+++ b/utilities/usdt-scripts/kernel_delay.rst
@@ -67,13 +67,17 @@ with the ``--pid`` option.
                   read                      0           1             1,292    
         1,292
                   TOTAL( - poll):                     519       144,405,334
 
+                  [LONG POLL STATISTICS]
+                  COUNT             AVERAGE ns            MIN ns            
MAX ns
+                          58            76,773             7,388           
234,129
+
                   [THREAD RUN STATISTICS]
                   SCHED_CNT           TOTAL ns            MIN ns            
MAX ns
-                       6       136,764,071             1,480       115,146,424
+                           6       136,764,071             1,480       
115,146,424
 
                   [THREAD READY STATISTICS]
                   SCHED_CNT           TOTAL ns            MAX ns
-                       7            11,334             6,636
+                           7            11,334             6,636
 
                   [THREAD STOPPED STATISTICS]
                   STOP_CNT            TOTAL ns            MAX ns
@@ -104,6 +108,7 @@ For this, it displays the thread's id (``TID``) and name 
(``THREAD``),
 followed by resource-specific data. Which are:
 
 - ``SYSCALL STATISTICS``
+- ``LONG POLL STATISTICS``
 - ``THREAD RUN STATISTICS``
 - ``THREAD READY STATISTICS``
 - ``THREAD STOPPED STATISTICS``
@@ -129,6 +134,14 @@ Note that it only counts calls that started and stopped 
during the
 measurement interval!
 
 
+``LONG POLL STATISTICS``
+~~~~~~~~~~~~~~~~~~~~~~
+``LONG POLL STATISTICS`` tell you how long the thread was running between two
+poll system calls. This relates to the 'Unreasonably long ... ms poll interval'
+message reported by ovs-vswitchd. More details about this message can be found
+in the example section.
+
+
 ``THREAD RUN STATISTICS``
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 ``THREAD RUN STATISTICS`` tell you how long the thread was running on a CPU
-- 
2.43.5

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to