Eelco Chaudron <[email protected]> writes:

> This patch installs a kernel return probe on ovs_dp_upcall() to record
> all successful and failed calls per CPU. The statistics are reported
> when the script exits, providing insights into the upcall behavior.
>
> This is an example output:
>
>   # UPCALL STATISTICS:
>     Total upcalls       : 183247 [0: 8937, 2: 14464, 4: 10372, 6: 4254, ...]
>     Successfull upcalls : 120195 [0: 5945, 2: 6379, 4: 5966, 6: 4254, ...]
>     Failed upcalls      : 63052 [0: 2992, 2: 8085, 4: 4406, 8: 2275, ...]
>        11, EAGAIN       : 63052 [0: 2992, 2: 8085, 4: 4406, 8: 2275, ...]
>
> Signed-off-by: Eelco Chaudron <[email protected]>
> ---

Some nits below, but I don't think anything that would prevent it from
being accepted.

>  utilities/automake.mk                   |    1 
>  utilities/usdt-scripts/kernel_delay.py  |  127 
> +++++++++++++++++++++++++++++++
>  utilities/usdt-scripts/kernel_delay.rst |    9 ++
>  3 files changed, 136 insertions(+), 1 deletion(-)
>
> diff --git a/utilities/automake.mk b/utilities/automake.mk
> index acc1af4e0..22260b254 100644
> --- a/utilities/automake.mk
> +++ b/utilities/automake.mk
> @@ -149,6 +149,7 @@ FLAKE8_PYFILES += utilities/ovs-pcap.in \
>       utilities/ovs-pipegen.py \
>       utilities/usdt-scripts/dpif_op_nl_monitor.py \
>       utilities/usdt-scripts/flow_reval_monitor.py \
> +     utilities/usdt-scripts/kernel_delay.py \

Should this be a separate patch so we can backport it?

>       utilities/usdt-scripts/upcall_monitor.py \
>       utilities/usdt-scripts/upcall_cost.py
>  
> diff --git a/utilities/usdt-scripts/kernel_delay.py 
> b/utilities/usdt-scripts/kernel_delay.py
> index de6b0c9de..a5aa1a3d4 100755
> --- a/utilities/usdt-scripts/kernel_delay.py
> +++ b/utilities/usdt-scripts/kernel_delay.py
> @@ -39,6 +39,7 @@
>  #
>  import argparse
>  import datetime
> +import errno
>  import os
>  import pytz
>  import psutil
> @@ -556,6 +557,30 @@ TRACEPOINT_PROBE(irq, softirq_exit)
>      data->start_ns = 0;
>      return 0;
>  }
> +
> +
> +/*
> + * For measuring upcall statistics (per CPU).
> + */
> +#define MAX_ERROR 255
> +BPF_TABLE("percpu_array", uint32_t, uint64_t, upcall_count, MAX_ERROR + 1);
> +
> +#if <INSTALL_OVS_DP_UPCALL_PROBE>
> +int kretprobe__ovs_dp_upcall(struct pt_regs *ctx)
> +{
> +    int ret = PT_REGS_RC(ctx);
> +
> +    if (ret > 0)
> +        ret = 0;
> +
> +    ret = -ret;
> +    if (ret > MAX_ERROR)
> +        ret = MAX_ERROR;
> +
> +    upcall_count.increment(ret);
> +    return 0;
> +}
> +#endif /* For measuring upcall statistics/errors. */
>  """
>  
>  
> @@ -900,6 +925,95 @@ def print_timestamp(msg):
>      print(time_string)
>  
>  
> +#
> +# Get errno short string
> +#
> +def get_errno_short(err):
> +    try:
> +        return errno.errorcode[err]
> +    except KeyError:
> +        return "_unknown_"
> +
> +
> +#
> +# Format a bfd per cpu array entry per cpu (if the count is > 0)
> +#
> +def format_per_cpu_array(cpu_array, key=None, skip_key=None):
> +    per_cpu = ""
> +
> +    if key is not None:
> +        total = cpu_array.sum(key).value
> +        if total > 0:
> +            for cpu, value in enumerate(cpu_array.getvalue(key)):
> +                if value == 0:
> +                    continue
> +
> +                per_cpu += " {}: {},".format(cpu, value)
> +    else:
> +        total = 0
> +        total_cpu = None
> +
> +        for key in cpu_array.keys():
> +            # I've seen extrem odd hight values, to avoid those, exclude any
> +            # index higher thant 1024 for now.
> +            if (not (0 <= key.value <= 1024)) or (skip_key is not None and
> +                                                  skip_key.value == 
> key.value):
> +                continue
> +
> +            if total_cpu is None:
> +                total_cpu = [0] * len(cpu_array.getvalue(key))
> +
> +            for cpu, value in enumerate(cpu_array.getvalue(key)):
> +                total_cpu[cpu] += value
> +                total += value
> +
> +        if total >= 0:
> +            for cpu, value in enumerate(total_cpu):
> +                if value == 0:
> +                    continue
> +
> +                per_cpu += " {}: {},".format(cpu, value)
> +
> +    return str(total), per_cpu.strip(", ")
> +
> +
> +#
> +# Display kernel upcall statistics
> +#
> +def display_upcall_results():
> +    upcalls = bpf.get_table("upcall_count")
> +    have_upcalls = False
> +
> +    for k in upcalls.keys():
> +        if not (0 <= k.value <= 255) or upcalls.sum(k).value == 0:
> +            continue
> +        have_upcalls = True
> +        break
> +
> +    if not have_upcalls:
> +        return
> +
> +    print("\n\n# UPCALL STATISTICS:\n  Total upcalls       : {} [{}]".format(
> +        *format_per_cpu_array(upcalls)))
> +
> +    for k in upcalls.keys():
> +        error = k.value
> +        total, per_cpu = format_per_cpu_array(upcalls, key=k)
> +
> +        if error != 0 and total == "0":
> +            continue
> +
> +        if error == 0:
> +            print("  Successfull upcalls : {} [{}]".format(total, per_cpu))
> +
> +            total, per_cpu = format_per_cpu_array(upcalls, skip_key=k)
> +            print("  Failed upcalls      : {} [{}]".format(total, per_cpu))
> +        else:
> +            print("    {:3}, {:13}: {} [{}]".format(error,
> +                                                    get_errno_short(error),
> +                                                    total, per_cpu))
> +
> +
>  #
>  # process_results()
>  #
> @@ -1074,7 +1188,12 @@ def process_results(syscall_events=None, 
> trigger_delta=None):
>                  indent, "TOTAL:", "", total_count, total_ns))
>  
>      #
> -    # Print events
> +    # Print upcall statistics
> +    #
> +    display_upcall_results()
> +
> +    #
> +    # Print syscall events

This text changed from the original.  It's okay to not separate it here
I think, but that's why the diff looks the way it does.

>      #
>      lost_stack_traces = 0
>      if syscall_events:
> @@ -1194,6 +1313,9 @@ def main():
>      parser.add_argument("--skip-syscall-poll-events",
>                          help="Skip poll() syscalls with --syscall-events",
>                          action="store_true")
> +    parser.add_argument("--skip-upcall-stats",
> +                        help="Skip the collection of upcall statistics",
> +                        action="store_true")
>      parser.add_argument("--stack-trace-size",
>                          help="Number of unique stack traces that can be "
>                          "recorded, default 4096. 0 to disable",
> @@ -1298,6 +1420,9 @@ def main():
>      source = source.replace("<STACK_TRACE_ENABLED>", "true"
>                              if options.stack_trace_size > 0 else "false")
>  
> +    source = source.replace("<INSTALL_OVS_DP_UPCALL_PROBE>", "0"
> +                            if options.skip_upcall_stats else "1")
> +
>      #
>      # Handle start/stop probes
>      #
> diff --git a/utilities/usdt-scripts/kernel_delay.rst 
> b/utilities/usdt-scripts/kernel_delay.rst
> index 0f6f916a7..41620d760 100644
> --- a/utilities/usdt-scripts/kernel_delay.rst
> +++ b/utilities/usdt-scripts/kernel_delay.rst
> @@ -109,6 +109,7 @@ followed by resource-specific data. Which are:
>  - ``THREAD STOPPED STATISTICS``
>  - ``HARD IRQ STATISTICS``
>  - ``SOFT IRQ STATISTICS``
> +- ``UPCALL STATISTICS``
>  
>  The following sections will describe in detail what statistics they report.
>  
> @@ -187,6 +188,14 @@ number of interrupts (``COUNT``), the total time spent 
> in the interrupt
>  handler (``TOTAL ns``), and the worst-case duration (``MAX ns``).
>  
>  
> +``UPCALL STATISTICS``
> +~~~~~~~~~~~~~~~~~~~~~
> +The ``UPCALL STATISTICS`` section provides information on the number of
> +upcalls sent by the kernel to userspace. If any upcalls fail to be sent,
> +the specific return codes are recorded. Statistics are presented both as
> +a total and on a per-CPU basis.
> +
> +
>  The ``--syscall-events`` option
>  -------------------------------
>  In addition to reporting global syscall statistics in ``SYSCALL_STATISTICS``,
>
> _______________________________________________
> dev mailing list
> [email protected]
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to