On 16 Dec 2024, at 15:21, Aaron Conole wrote:
> Eelco Chaudron <[email protected]> writes: > >> This patch installs a kernel return probe on ovs_dp_upcall() to record >> all successful and failed calls per CPU. The statistics are reported >> when the script exits, providing insights into the upcall behavior. >> >> This is an example output: >> >> # UPCALL STATISTICS: >> Total upcalls : 183247 [0: 8937, 2: 14464, 4: 10372, 6: 4254, ...] >> Successfull upcalls : 120195 [0: 5945, 2: 6379, 4: 5966, 6: 4254, ...] >> Failed upcalls : 63052 [0: 2992, 2: 8085, 4: 4406, 8: 2275, ...] >> 11, EAGAIN : 63052 [0: 2992, 2: 8085, 4: 4406, 8: 2275, ...] >> >> Signed-off-by: Eelco Chaudron <[email protected]> >> --- > > Some nits below, but I don't think anything that would prevent it from > being accepted. > >> utilities/automake.mk | 1 >> utilities/usdt-scripts/kernel_delay.py | 127 >> +++++++++++++++++++++++++++++++ >> utilities/usdt-scripts/kernel_delay.rst | 9 ++ >> 3 files changed, 136 insertions(+), 1 deletion(-) >> >> diff --git a/utilities/automake.mk b/utilities/automake.mk >> index acc1af4e0..22260b254 100644 >> --- a/utilities/automake.mk >> +++ b/utilities/automake.mk >> @@ -149,6 +149,7 @@ FLAKE8_PYFILES += utilities/ovs-pcap.in \ >> utilities/ovs-pipegen.py \ >> utilities/usdt-scripts/dpif_op_nl_monitor.py \ >> utilities/usdt-scripts/flow_reval_monitor.py \ >> + utilities/usdt-scripts/kernel_delay.py \ > > Should this be a separate patch so we can backport it? I sneaked this in, as I don’t see this as a must for backporting. Changes to the script will probably only happen on the main branch, and if backported, they should have passed flake8 :) >> utilities/usdt-scripts/upcall_monitor.py \ >> utilities/usdt-scripts/upcall_cost.py >> >> diff --git a/utilities/usdt-scripts/kernel_delay.py >> b/utilities/usdt-scripts/kernel_delay.py >> index de6b0c9de..a5aa1a3d4 100755 >> --- a/utilities/usdt-scripts/kernel_delay.py >> +++ b/utilities/usdt-scripts/kernel_delay.py >> @@ -39,6 +39,7 @@ >> # >> import argparse >> import datetime >> +import errno >> import os >> import pytz >> import psutil >> @@ -556,6 +557,30 @@ TRACEPOINT_PROBE(irq, softirq_exit) >> data->start_ns = 0; >> return 0; >> } >> + >> + >> +/* >> + * For measuring upcall statistics (per CPU). >> + */ >> +#define MAX_ERROR 255 >> +BPF_TABLE("percpu_array", uint32_t, uint64_t, upcall_count, MAX_ERROR + 1); >> + >> +#if <INSTALL_OVS_DP_UPCALL_PROBE> >> +int kretprobe__ovs_dp_upcall(struct pt_regs *ctx) >> +{ >> + int ret = PT_REGS_RC(ctx); >> + >> + if (ret > 0) >> + ret = 0; >> + >> + ret = -ret; >> + if (ret > MAX_ERROR) >> + ret = MAX_ERROR; >> + >> + upcall_count.increment(ret); >> + return 0; >> +} >> +#endif /* For measuring upcall statistics/errors. */ >> """ >> >> >> @@ -900,6 +925,95 @@ def print_timestamp(msg): >> print(time_string) >> >> >> +# >> +# Get errno short string >> +# >> +def get_errno_short(err): >> + try: >> + return errno.errorcode[err] >> + except KeyError: >> + return "_unknown_" >> + >> + >> +# >> +# Format a bfd per cpu array entry per cpu (if the count is > 0) >> +# >> +def format_per_cpu_array(cpu_array, key=None, skip_key=None): >> + per_cpu = "" >> + >> + if key is not None: >> + total = cpu_array.sum(key).value >> + if total > 0: >> + for cpu, value in enumerate(cpu_array.getvalue(key)): >> + if value == 0: >> + continue >> + >> + per_cpu += " {}: {},".format(cpu, value) >> + else: >> + total = 0 >> + total_cpu = None >> + >> + for key in cpu_array.keys(): >> + # I've seen extrem odd hight values, to avoid those, exclude any >> + # index higher thant 1024 for now. >> + if (not (0 <= key.value <= 1024)) or (skip_key is not None and >> + skip_key.value == >> key.value): >> + continue >> + >> + if total_cpu is None: >> + total_cpu = [0] * len(cpu_array.getvalue(key)) >> + >> + for cpu, value in enumerate(cpu_array.getvalue(key)): >> + total_cpu[cpu] += value >> + total += value >> + >> + if total >= 0: >> + for cpu, value in enumerate(total_cpu): >> + if value == 0: >> + continue >> + >> + per_cpu += " {}: {},".format(cpu, value) >> + >> + return str(total), per_cpu.strip(", ") >> + >> + >> +# >> +# Display kernel upcall statistics >> +# >> +def display_upcall_results(): >> + upcalls = bpf.get_table("upcall_count") >> + have_upcalls = False >> + >> + for k in upcalls.keys(): >> + if not (0 <= k.value <= 255) or upcalls.sum(k).value == 0: >> + continue >> + have_upcalls = True >> + break >> + >> + if not have_upcalls: >> + return >> + >> + print("\n\n# UPCALL STATISTICS:\n Total upcalls : {} >> [{}]".format( >> + *format_per_cpu_array(upcalls))) >> + >> + for k in upcalls.keys(): >> + error = k.value >> + total, per_cpu = format_per_cpu_array(upcalls, key=k) >> + >> + if error != 0 and total == "0": >> + continue >> + >> + if error == 0: >> + print(" Successfull upcalls : {} [{}]".format(total, per_cpu)) >> + >> + total, per_cpu = format_per_cpu_array(upcalls, skip_key=k) >> + print(" Failed upcalls : {} [{}]".format(total, per_cpu)) >> + else: >> + print(" {:3}, {:13}: {} [{}]".format(error, >> + get_errno_short(error), >> + total, per_cpu)) >> + >> + >> # >> # process_results() >> # >> @@ -1074,7 +1188,12 @@ def process_results(syscall_events=None, >> trigger_delta=None): >> indent, "TOTAL:", "", total_count, total_ns)) >> >> # >> - # Print events >> + # Print upcall statistics >> + # >> + display_upcall_results() >> + >> + # >> + # Print syscall events > > This text changed from the original. It's okay to not separate it here > I think, but that's why the diff looks the way it does. Yes this looked odd, could not convince diff to show if differently. >> # >> lost_stack_traces = 0 >> if syscall_events: >> @@ -1194,6 +1313,9 @@ def main(): >> parser.add_argument("--skip-syscall-poll-events", >> help="Skip poll() syscalls with --syscall-events", >> action="store_true") >> + parser.add_argument("--skip-upcall-stats", >> + help="Skip the collection of upcall statistics", >> + action="store_true") >> parser.add_argument("--stack-trace-size", >> help="Number of unique stack traces that can be " >> "recorded, default 4096. 0 to disable", >> @@ -1298,6 +1420,9 @@ def main(): >> source = source.replace("<STACK_TRACE_ENABLED>", "true" >> if options.stack_trace_size > 0 else "false") >> >> + source = source.replace("<INSTALL_OVS_DP_UPCALL_PROBE>", "0" >> + if options.skip_upcall_stats else "1") >> + >> # >> # Handle start/stop probes >> # >> diff --git a/utilities/usdt-scripts/kernel_delay.rst >> b/utilities/usdt-scripts/kernel_delay.rst >> index 0f6f916a7..41620d760 100644 >> --- a/utilities/usdt-scripts/kernel_delay.rst >> +++ b/utilities/usdt-scripts/kernel_delay.rst >> @@ -109,6 +109,7 @@ followed by resource-specific data. Which are: >> - ``THREAD STOPPED STATISTICS`` >> - ``HARD IRQ STATISTICS`` >> - ``SOFT IRQ STATISTICS`` >> +- ``UPCALL STATISTICS`` >> >> The following sections will describe in detail what statistics they report. >> >> @@ -187,6 +188,14 @@ number of interrupts (``COUNT``), the total time spent >> in the interrupt >> handler (``TOTAL ns``), and the worst-case duration (``MAX ns``). >> >> >> +``UPCALL STATISTICS`` >> +~~~~~~~~~~~~~~~~~~~~~ >> +The ``UPCALL STATISTICS`` section provides information on the number of >> +upcalls sent by the kernel to userspace. If any upcalls fail to be sent, >> +the specific return codes are recorded. Statistics are presented both as >> +a total and on a per-CPU basis. >> + >> + >> The ``--syscall-events`` option >> ------------------------------- >> In addition to reporting global syscall statistics in >> ``SYSCALL_STATISTICS``, >> >> _______________________________________________ >> dev mailing list >> [email protected] >> https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
