On 19 Dec 2024, at 14:42, Adrián Moreno wrote:
> On Tue, Dec 17, 2024 at 04:21:52PM +0100, Eelco Chaudron wrote: >> This patch installs a kernel return probe on ovs_dp_upcall() to record >> all successful and failed calls per CPU. The statistics are reported >> when the script exits, providing insights into the upcall behavior. >> >> This is an example output: >> >> # UPCALL STATISTICS (TOTAL [CPU_ID: UPCALL_PER_CPU, ...]): >> Total upcalls : 183247 [0: 8937, 2: 14464, 4: 10372, 6: 4254, ...] >> Successfull upcalls : 120195 [0: 5945, 2: 6379, 4: 5966, 6: 4254, ...] >> Failed upcalls : 63052 [0: 2992, 2: 8085, 4: 4406, 8: 2275, ...] >> 11, EAGAIN : 63052 [0: 2992, 2: 8085, 4: 4406, 8: 2275, ...] >> >> Signed-off-by: Eelco Chaudron <[email protected]> >> >> --- >> v3: - Fixed flake8 error. >> v2: - Cleanup and use per-CPU hash vs per-CPU array for counters. >> --- >> utilities/automake.mk | 1 >> utilities/usdt-scripts/kernel_delay.py | 132 >> +++++++++++++++++++++++++++++++ >> utilities/usdt-scripts/kernel_delay.rst | 9 ++ >> 3 files changed, 141 insertions(+), 1 deletion(-) >> >> diff --git a/utilities/automake.mk b/utilities/automake.mk >> index acc1af4e0..22260b254 100644 >> --- a/utilities/automake.mk >> +++ b/utilities/automake.mk >> @@ -149,6 +149,7 @@ FLAKE8_PYFILES += utilities/ovs-pcap.in \ >> utilities/ovs-pipegen.py \ >> utilities/usdt-scripts/dpif_op_nl_monitor.py \ >> utilities/usdt-scripts/flow_reval_monitor.py \ >> + utilities/usdt-scripts/kernel_delay.py \ >> utilities/usdt-scripts/upcall_monitor.py \ >> utilities/usdt-scripts/upcall_cost.py >> >> diff --git a/utilities/usdt-scripts/kernel_delay.py >> b/utilities/usdt-scripts/kernel_delay.py >> index de6b0c9de..9caf4bd62 100755 >> --- a/utilities/usdt-scripts/kernel_delay.py >> +++ b/utilities/usdt-scripts/kernel_delay.py >> @@ -39,6 +39,7 @@ >> # >> import argparse >> import datetime >> +import errno >> import os >> import pytz >> import psutil >> @@ -556,6 +557,36 @@ TRACEPOINT_PROBE(irq, softirq_exit) >> data->start_ns = 0; >> return 0; >> } >> + >> + >> +/* >> + * For measuring upcall statistics (per CPU). >> + */ >> +BPF_PERCPU_HASH(upcall_count); >> + >> +#if <INSTALL_OVS_DP_UPCALL_PROBE> >> +int kretprobe__ovs_dp_upcall(struct pt_regs *ctx) >> +{ >> + int ret = PT_REGS_RC(ctx); >> + u64 zero = 0; >> + u64 *entry; >> + u64 key; >> + >> + if (!capture_enabled__()) >> + return 0; >> + >> + if (ret >= 0) >> + key = 0; >> + else >> + key = -ret; >> + >> + entry = upcall_count.lookup_or_try_init(&key, &zero); >> + if (entry) >> + *entry += 1; >> + >> + return 0; >> +} >> +#endif /* For measuring upcall statistics/errors. */ >> """ >> >> >> @@ -887,6 +918,7 @@ def reset_capture(): >> bpf["stack_traces"].clear() >> bpf["stop_start"].clear() >> bpf["stop_data"].clear() >> + bpf["upcall_count"].clear() >> >> >> # >> @@ -900,6 +932,93 @@ def print_timestamp(msg): >> print(time_string) >> >> >> +# >> +# Get errno short string >> +# >> +def get_errno_short(err): >> + try: >> + return errno.errorcode[err] >> + except KeyError: >> + return "_unknown_" >> + >> + >> +# >> +# Format a eBPF per-cpu array entry (if the count is > 0) >> +# >> +def format_per_cpu_array(cpu_array, key=None, skip_key=None): > > nit: it's no longer an array. Good catch, changed it to hash. >> + >> + if key is not None: >> + total = cpu_array.sum(key).value >> + if total > 0: >> + for cpu, value in enumerate(cpu_array.getvalue(key)): >> + if value == 0: >> + continue >> + >> + per_cpu += " {}: {},".format(cpu, value) >> + else: >> + total = 0 >> + total_cpu = None >> + >> + for key in cpu_array.keys(): >> + if skip_key is not None and skip_key.value == key.value: >> + continue >> + >> + if total_cpu is None: >> + total_cpu = [0] * len(cpu_array.getvalue(key)) >> + >> + for cpu, value in enumerate(cpu_array.getvalue(key)): >> + total_cpu[cpu] += value >> + total += value >> + >> + if total >= 0: >> + for cpu, value in enumerate(total_cpu): > > Doing some further testing I got this: > > Traceback (most recent call last): > File "/home/amorenoz/src/ovs/utilities/usdt-scripts/./kernel_delay.py", > line 1612, in <module> > main() > File "/home/amorenoz/src/ovs/utilities/usdt-scripts/./kernel_delay.py", > line 1571, in main > process_results(syscall_events=syscall_events, > File "/home/amorenoz/src/ovs/utilities/usdt-scripts/./kernel_delay.py", > line 1198, in process_results > display_upcall_results() > File "/home/amorenoz/src/ovs/utilities/usdt-scripts/./kernel_delay.py", > line 1014, in display_upcall_results > total, per_cpu = format_per_cpu_array(upcalls, skip_key=k) > ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ > File "/home/amorenoz/src/ovs/utilities/usdt-scripts/./kernel_delay.py", > line 975, in format_per_cpu_array > for cpu, value in enumerate(total_cpu): > ^^^^^^^^^^^^^^^^^^^^ > TypeError: 'NoneType' object is not iterable > > In my test all upcalls where successful, so when trying to list Failed > upcalls, "skip_key" is 0, there are no more keys in "cpu_array.keys()" > and "total_cpu" is never set. Forgot to test v3 with no errors, did test it with random errors ;) This is fixed in v4, also cleaned up the output, only displaying the total in case of no errors. >> + if value == 0: >> + continue >> + >> + per_cpu += " {}: {},".format(cpu, value) >> + >> + return total, per_cpu.strip(", ") >> + >> + >> +# >> +# Display kernel upcall statistics >> +# >> +def display_upcall_results(): >> + upcalls = bpf["upcall_count"] >> + have_upcalls = False >> + >> + for k in upcalls: >> + if upcalls.sum(k).value == 0: >> + continue >> + have_upcalls = True >> + break >> + >> + if not have_upcalls: >> + return >> + >> + print("\n\n# UPCALL STATISTICS (TOTAL [CPU_ID: UPCALL_PER_CPU, ...]):\n" > > nit (only because I guess a new version is required and acknowledging > this was my proposal): > s/UPCALL_PER_CPU/N_UPCALLS_PER_CPU/? Done. A v4 was sent out. >> + " Total upcalls : {} [{}]".format( >> + *format_per_cpu_array(upcalls))) >> + >> + for k in sorted(upcalls, key=lambda x: int(x.value)): >> + error = k.value >> + total, per_cpu = format_per_cpu_array(upcalls, key=k) >> + >> + if error != 0 and total == 0: >> + continue >> + >> + if error == 0: >> + print(" Successfull upcalls : {} [{}]".format(total, per_cpu)) >> + >> + total, per_cpu = format_per_cpu_array(upcalls, skip_key=k) >> + print(" Failed upcalls : {} [{}]".format(total, per_cpu)) >> + else: >> + print(" {:3}, {:13}: {} [{}]".format(error, >> + get_errno_short(error), >> + total, per_cpu)) >> + >> + >> # >> # process_results() >> # >> @@ -1074,7 +1193,12 @@ def process_results(syscall_events=None, >> trigger_delta=None): >> indent, "TOTAL:", "", total_count, total_ns)) >> >> # >> - # Print events >> + # Print upcall statistics >> + # >> + display_upcall_results() >> + >> + # >> + # Print syscall events >> # >> lost_stack_traces = 0 >> if syscall_events: >> @@ -1194,6 +1318,9 @@ def main(): >> parser.add_argument("--skip-syscall-poll-events", >> help="Skip poll() syscalls with --syscall-events", >> action="store_true") >> + parser.add_argument("--skip-upcall-stats", >> + help="Skip the collection of upcall statistics", >> + action="store_true") >> parser.add_argument("--stack-trace-size", >> help="Number of unique stack traces that can be " >> "recorded, default 4096. 0 to disable", >> @@ -1298,6 +1425,9 @@ def main(): >> source = source.replace("<STACK_TRACE_ENABLED>", "true" >> if options.stack_trace_size > 0 else "false") >> >> + source = source.replace("<INSTALL_OVS_DP_UPCALL_PROBE>", "0" >> + if options.skip_upcall_stats else "1") >> + >> # >> # Handle start/stop probes >> # >> diff --git a/utilities/usdt-scripts/kernel_delay.rst >> b/utilities/usdt-scripts/kernel_delay.rst >> index 0f6f916a7..41620d760 100644 >> --- a/utilities/usdt-scripts/kernel_delay.rst >> +++ b/utilities/usdt-scripts/kernel_delay.rst >> @@ -109,6 +109,7 @@ followed by resource-specific data. Which are: >> - ``THREAD STOPPED STATISTICS`` >> - ``HARD IRQ STATISTICS`` >> - ``SOFT IRQ STATISTICS`` >> +- ``UPCALL STATISTICS`` >> >> The following sections will describe in detail what statistics they report. >> >> @@ -187,6 +188,14 @@ number of interrupts (``COUNT``), the total time spent >> in the interrupt >> handler (``TOTAL ns``), and the worst-case duration (``MAX ns``). >> >> >> +``UPCALL STATISTICS`` >> +~~~~~~~~~~~~~~~~~~~~~ >> +The ``UPCALL STATISTICS`` section provides information on the number of >> +upcalls sent by the kernel to userspace. If any upcalls fail to be sent, >> +the specific return codes are recorded. Statistics are presented both as >> +a total and on a per-CPU basis. >> + >> + >> The ``--syscall-events`` option >> ------------------------------- >> In addition to reporting global syscall statistics in >> ``SYSCALL_STATISTICS``, >> _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
