Eelco Chaudron <[email protected]> writes: > This patch adds an utility that can be used to determine if > an issue is related to a lack of Linux kernel resources. > > This tool is also featured in a Red Hat developers blog article: > > > https://developers.redhat.com/articles/2023/07/24/troubleshooting-open-vswitch-kernel-blame > > Signed-off-by: Eelco Chaudron <[email protected]> > ---
Hi Eelco, Thanks for the writeup and utilities. > utilities/automake.mk | 4 > utilities/usdt-scripts/kernel_delay.py | 1397 > +++++++++++++++++++++++++++++++ > utilities/usdt-scripts/kernel_delay.rst | 594 +++++++++++++ > 3 files changed, 1995 insertions(+) > create mode 100755 utilities/usdt-scripts/kernel_delay.py > create mode 100644 utilities/usdt-scripts/kernel_delay.rst > > diff --git a/utilities/automake.mk b/utilities/automake.mk > index 37d679f82..9a2114df4 100644 > --- a/utilities/automake.mk > +++ b/utilities/automake.mk > @@ -23,6 +23,8 @@ scripts_DATA += utilities/ovs-lib > usdt_SCRIPTS += \ > utilities/usdt-scripts/bridge_loop.bt \ > utilities/usdt-scripts/dpif_nl_exec_monitor.py \ > + utilities/usdt-scripts/kernel_delay.py \ > + utilities/usdt-scripts/kernel_delay.rst \ > utilities/usdt-scripts/reval_monitor.py \ > utilities/usdt-scripts/upcall_cost.py \ > utilities/usdt-scripts/upcall_monitor.py > @@ -70,6 +72,8 @@ EXTRA_DIST += \ > utilities/docker/debian/build-kernel-modules.sh \ > utilities/usdt-scripts/bridge_loop.bt \ > utilities/usdt-scripts/dpif_nl_exec_monitor.py \ > + utilities/usdt-scripts/kernel_delay.py \ > + utilities/usdt-scripts/kernel_delay.rst \ > utilities/usdt-scripts/reval_monitor.py \ > utilities/usdt-scripts/upcall_cost.py \ > utilities/usdt-scripts/upcall_monitor.py > diff --git a/utilities/usdt-scripts/kernel_delay.py > b/utilities/usdt-scripts/kernel_delay.py > new file mode 100755 > index 000000000..bba7bc044 > --- /dev/null > +++ b/utilities/usdt-scripts/kernel_delay.py > @@ -0,0 +1,1397 @@ > +#!/usr/bin/env python3 > +# > +# Copyright (c) 2022 Red Hat, Inc. Maybe this should be 2022,2023 ? > +# > +# Licensed under the Apache License, Version 2.0 (the "License"); > +# you may not use this file except in compliance with the License. > +# You may obtain a copy of the License at: > +# > +# http://www.apache.org/licenses/LICENSE-2.0 > +# > +# Unless required by applicable law or agreed to in writing, software > +# distributed under the License is distributed on an "AS IS" BASIS, > +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > +# See the License for the specific language governing permissions and > +# limitations under the License. > +# > +# Script information: > +# ------------------- > +# This script allows a developer to quickly identify if the issue at hand > +# might be related to the kernel running out of resources or if it really is > +# an Open vSwitch issue. > +# > +# For documentation see the kernel_delay.rst file. > +# > +try: > + from bcc import BPF, USDT, USDTException > + from bcc.syscall import syscalls, syscall_name > +except ModuleNotFoundError: > + print("ERROR: Can't find the BPF Compiler Collection (BCC) tools!") I think this needs to exit in this exception block > + > +from enum import IntEnum > + > +import argparse > +import datetime > +import os > +import pytz > +import psutil > +import re > +import sys > +import time > + > +import ctypes as ct > + > +try: > + from time import time_ns > +except ImportError: > + # For compatibility with Python <= v3.6. > + def time_ns(): > + now = datetime.datetime.now() > + return int(now.timestamp() * 1e9) > + > + > +# > +# Actual eBPF source code > +# > +EBPF_SOURCE = """ > +#include <linux/irq.h> > +#include <linux/sched.h> > + > +#define MONITOR_PID <MONITOR_PID> > + > +enum { > +<EVENT_ENUM> > +}; > + > +struct event_t { > + u64 ts; > + u32 tid; > + u32 id; > + > + int user_stack_id; > + int kernel_stack_id; > + > + u32 syscall; > + u64 entry_ts; > + > +}; > + > +BPF_RINGBUF_OUTPUT(events, <BUFFER_PAGE_CNT>); > +BPF_STACK_TRACE(stack_traces, <STACK_TRACE_SIZE>); > +BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, 1); > +BPF_TABLE("percpu_array", uint32_t, uint64_t, trigger_miss, 1); > + > +BPF_ARRAY(capture_on, u64, 1); > +static bool capture_enabled(u64 pid_tgid) { > + int key = 0; > + u64 *ret; > + > + if ((pid_tgid >> 32) != MONITOR_PID) > + return false; > + > + ret = capture_on.lookup(&key); > + return ret && *ret == 1; > +} > + > +static inline bool capture_enabled__() { > + int key = 0; > + u64 *ret; > + > + ret = capture_on.lookup(&key); > + return ret && *ret == 1; > +} > + > + > +static struct event_t *get_event(uint32_t id) { > + struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t)); > + > + if (!event) { > + dropcnt.increment(0); > + return NULL; > + } > + > + event->id = id; > + event->ts = bpf_ktime_get_ns(); > + event->tid = bpf_get_current_pid_tgid(); > + > + return event; > +} > + > +static int start_trigger() { > + int key = 0; > + u64 *val = capture_on.lookup(&key); > + > + /* If the value is -1 we can't start as we are still processing the > + * results in userspace. */ > + if (!val || *val != 0) { > + trigger_miss.increment(0); > + return 0; > + } > + > + struct event_t *event = get_event(EVENT_START_TRIGGER); > + if (event) { > + events.ringbuf_submit(event, 0); > + *val = 1; > + } else { > + trigger_miss.increment(0); > + } > + return 0; > +} > + > +static int stop_trigger() { > + int key = 0; > + u64 *val = capture_on.lookup(&key); > + > + if (!val || *val != 1) > + return 0; > + > + struct event_t *event = get_event(EVENT_STOP_TRIGGER); > + > + if (event) > + events.ringbuf_submit(event, 0); > + > + if (val) > + *val = -1; > + > + return 0; > +} > + > +<START_TRIGGER> > +<STOP_TRIGGER> > + > + > +/* > + * For the syscall monitor the following probes get installed. > + */ > +struct syscall_data_t { > + u64 count; > + u64 total_ns; > + u64 worst_ns; > +}; > + > +struct syscall_data_key_t { > + u32 pid; > + u32 tid; > + u32 syscall; > +}; > + > +BPF_HASH(syscall_start, u64, u64); > +BPF_HASH(syscall_data, struct syscall_data_key_t, struct syscall_data_t); > + > +TRACEPOINT_PROBE(raw_syscalls, sys_enter) { > + u64 pid_tgid = bpf_get_current_pid_tgid(); > + > + if (!capture_enabled(pid_tgid)) > + return 0; > + > + u64 t = bpf_ktime_get_ns(); > + syscall_start.update(&pid_tgid, &t); > + > + return 0; > +} > + > +TRACEPOINT_PROBE(raw_syscalls, sys_exit) { > + struct syscall_data_t *val, zero = {}; > + struct syscall_data_key_t key; > + > + u64 pid_tgid = bpf_get_current_pid_tgid(); > + > + if (!capture_enabled(pid_tgid)) > + return 0; > + > + key.pid = pid_tgid >> 32; > + key.tid = (u32)pid_tgid; > + key.syscall = args->id; > + > + u64 *start_ns = syscall_start.lookup(&pid_tgid); > + > + if (!start_ns) > + return 0; > + > + val = syscall_data.lookup_or_try_init(&key, &zero); > + if (val) { > + u64 delta = bpf_ktime_get_ns() - *start_ns; > + val->count++; > + val->total_ns += delta; > + if (val->worst_ns == 0 || delta > val->worst_ns) > + val->worst_ns = delta; > + > + if (<SYSCAL_TRACE_EVENTS>) { > + struct event_t *event = get_event(EVENT_SYSCALL); > + if (event) { > + event->syscall = args->id; > + event->entry_ts = *start_ns; > + if (<STACK_TRACE_ENABLED>) { > + event->user_stack_id = stack_traces.get_stackid( > + args, BPF_F_USER_STACK); > + event->kernel_stack_id = stack_traces.get_stackid( > + args, 0); > + } > + events.ringbuf_submit(event, 0); > + } > + } > + } > + return 0; > +} > + > + > + > +/* > + * For measuring the thread run time, we need the following. > + */ > +struct run_time_data_t { > + u64 count; > + u64 total_ns; > + u64 max_ns; > + u64 min_ns; > +}; > + > +struct pid_tid_key_t { > + u32 pid; > + u32 tid; > +}; > + > +BPF_HASH(run_start, u64, u64); > +BPF_HASH(run_data, struct pid_tid_key_t, struct run_time_data_t); > + > +static inline void thread_start_run(u64 pid_tgid, u64 ktime) > +{ > + run_start.update(&pid_tgid, &ktime); > +} > + > +static inline void thread_stop_run(u32 pid, u32 tgid, u64 ktime) > +{ > + u64 pid_tgid = (u64) tgid << 32 | pid; > + u64 *start_ns = run_start.lookup(&pid_tgid); > + > + if (!start_ns || *start_ns == 0) > + return; > + > + struct run_time_data_t *val, zero = {}; > + struct pid_tid_key_t key = { .pid = tgid, > + .tid = pid }; > + > + val = run_data.lookup_or_try_init(&key, &zero); > + if (val) { > + u64 delta = ktime - *start_ns; > + val->count++; > + val->total_ns += delta; > + if (val->max_ns == 0 || delta > val->max_ns) > + val->max_ns = delta; > + if (val->min_ns == 0 || delta < val->min_ns) > + val->min_ns = delta; > + } > + *start_ns = 0; > +} > + > + > +/* > + * For measuring the thread-ready delay, we need the following. > + */ > +struct ready_data_t { > + u64 count; > + u64 total_ns; > + u64 worst_ns; > +}; > + > +BPF_HASH(ready_start, u64, u64); > +BPF_HASH(ready_data, struct pid_tid_key_t, struct ready_data_t); > + > +static inline int sched_wakeup__(u32 pid, u32 tgid) > +{ > + u64 pid_tgid = (u64) tgid << 32 | pid; > + > + if (!capture_enabled(pid_tgid)) > + return 0; > + > + u64 t = bpf_ktime_get_ns(); > + ready_start.update(&pid_tgid, &t); > + return 0; > +} > + > +RAW_TRACEPOINT_PROBE(sched_wakeup) > +{ > + struct task_struct *t = (struct task_struct *)ctx->args[0]; > + return sched_wakeup__(t->pid, t->tgid); > +} > + > +RAW_TRACEPOINT_PROBE(sched_wakeup_new) > +{ > + struct task_struct *t = (struct task_struct *)ctx->args[0]; > + return sched_wakeup__(t->pid, t->tgid); > +} > + > +RAW_TRACEPOINT_PROBE(sched_switch) > +{ > + struct task_struct *prev = (struct task_struct *)ctx->args[1]; > + struct task_struct *next= (struct task_struct *)ctx->args[2]; > + u64 ktime = 0; > + > + if (!capture_enabled__()) > + return 0; > + > + if (prev-><STATE_FIELD> == TASK_RUNNING && prev->tgid == MONITOR_PID) > + sched_wakeup__(prev->pid, prev->tgid); > + > + if (prev->tgid == MONITOR_PID) { > + ktime = bpf_ktime_get_ns(); > + thread_stop_run(prev->pid, prev->tgid, ktime); > + } > + > + u64 pid_tgid = (u64)next->tgid << 32 | next->pid; > + > + if (next->tgid != MONITOR_PID) > + return 0; > + > + if (ktime == 0) > + ktime = bpf_ktime_get_ns(); > + > + u64 *start_ns = ready_start.lookup(&pid_tgid); > + > + if (start_ns && *start_ns != 0) { > + > + struct ready_data_t *val, zero = {}; > + struct pid_tid_key_t key = { .pid = next->tgid, > + .tid = next->pid }; > + > + val = ready_data.lookup_or_try_init(&key, &zero); > + if (val) { > + u64 delta = ktime - *start_ns; > + val->count++; > + val->total_ns += delta; > + if (val->worst_ns == 0 || delta > val->worst_ns) > + val->worst_ns = delta; > + } > + *start_ns = 0; > + } > + > + thread_start_run(pid_tgid, ktime); > + return 0; > +} > + > +/* > + * For measuring the hard irq time, we need the following. > + */ > +struct hardirq_start_data_t { > + u64 start_ns; > + char irq_name[32]; > +}; > + > +struct hardirq_data_t { > + u64 count; > + u64 total_ns; > + u64 worst_ns; > +}; > + > +struct hardirq_data_key_t { > + u32 pid; > + u32 tid; > + char irq_name[32]; > +}; > + > +BPF_HASH(hardirq_start, u64, struct hardirq_start_data_t); > +BPF_HASH(hardirq_data, struct hardirq_data_key_t, struct hardirq_data_t); > + > +TRACEPOINT_PROBE(irq, irq_handler_entry) > +{ > + u64 pid_tgid = bpf_get_current_pid_tgid(); > + > + if (!capture_enabled(pid_tgid)) > + return 0; > + > + struct hardirq_start_data_t data = {}; > + > + data.start_ns = bpf_ktime_get_ns(); > + TP_DATA_LOC_READ_STR(&data.irq_name, name, sizeof(data.irq_name)); > + hardirq_start.update(&pid_tgid, &data); > + return 0; > +} > + > +TRACEPOINT_PROBE(irq, irq_handler_exit) > +{ > + u64 pid_tgid = bpf_get_current_pid_tgid(); > + > + if (!capture_enabled(pid_tgid)) > + return 0; > + > + struct hardirq_start_data_t *data; > + data = hardirq_start.lookup(&pid_tgid); > + if (!data || data->start_ns == 0) > + return 0; > + > + if (args->ret != IRQ_NONE) { > + struct hardirq_data_t *val, zero = {}; > + struct hardirq_data_key_t key = { .pid = pid_tgid >> 32, > + .tid = (u32)pid_tgid }; > + > + bpf_probe_read_kernel(&key.irq_name, sizeof(key.irq_name), > + data->irq_name); > + val = hardirq_data.lookup_or_try_init(&key, &zero); > + if (val) { > + u64 delta = bpf_ktime_get_ns() - data->start_ns; > + val->count++; > + val->total_ns += delta; > + if (val->worst_ns == 0 || delta > val->worst_ns) > + val->worst_ns = delta; > + } > + } > + > + data->start_ns = 0; > + return 0; > +} > + > +/* > + * For measuring the soft irq time, we need the following. > + */ > +struct softirq_start_data_t { > + u64 start_ns; > + u32 vec_nr; > +}; > + > +struct softirq_data_t { > + u64 count; > + u64 total_ns; > + u64 worst_ns; > +}; > + > +struct softirq_data_key_t { > + u32 pid; > + u32 tid; > + u32 vec_nr; > +}; > + > +BPF_HASH(softirq_start, u64, struct softirq_start_data_t); > +BPF_HASH(softirq_data, struct softirq_data_key_t, struct softirq_data_t); > + > +TRACEPOINT_PROBE(irq, softirq_entry) > +{ > + u64 pid_tgid = bpf_get_current_pid_tgid(); > + > + if (!capture_enabled(pid_tgid)) > + return 0; > + > + struct softirq_start_data_t data = {}; > + > + data.start_ns = bpf_ktime_get_ns(); > + data.vec_nr = args->vec; > + softirq_start.update(&pid_tgid, &data); > + return 0; > +} > + > +TRACEPOINT_PROBE(irq, softirq_exit) > +{ > + u64 pid_tgid = bpf_get_current_pid_tgid(); > + > + if (!capture_enabled(pid_tgid)) > + return 0; > + > + struct softirq_start_data_t *data; > + data = softirq_start.lookup(&pid_tgid); > + if (!data || data->start_ns == 0) > + return 0; > + > + struct softirq_data_t *val, zero = {}; > + struct softirq_data_key_t key = { .pid = pid_tgid >> 32, > + .tid = (u32)pid_tgid, > + .vec_nr = data->vec_nr}; > + > + val = softirq_data.lookup_or_try_init(&key, &zero); > + if (val) { > + u64 delta = bpf_ktime_get_ns() - data->start_ns; > + val->count++; > + val->total_ns += delta; > + if (val->worst_ns == 0 || delta > val->worst_ns) > + val->worst_ns = delta; > + } > + > + data->start_ns = 0; > + return 0; > +} > +""" > + > + > +# > +# Probe class to use for the start/stop triggers > +# > +class Probe(object): > + ''' > + The goal for this object is to support as many as possible > + probe/events as supported by BCC. See > + > https://github.com/iovisor/bcc/blob/master/docs/reference_guide.md#events--arguments > + ''' > + def __init__(self, probe, pid=None): > + self.pid = pid > + self.text_probe = probe > + self._parse_text_probe() > + > + def __str__(self): > + if self.probe_type == "usdt": > + return "[{}]; {}:{}:{}".format(self.text_probe, self.probe_type, > + self.usdt_provider, > self.usdt_probe) > + elif self.probe_type == "trace": > + return "[{}]; {}:{}:{}".format(self.text_probe, self.probe_type, > + self.trace_system, > self.trace_event) > + elif self.probe_type == "kprobe" or self.probe_type == "kretprobe": > + return "[{}]; {}:{}".format(self.text_probe, self.probe_type, > + self.kprobe_function) > + elif self.probe_type == "uprobe" or self.probe_type == "uretprobe": > + return "[{}]; {}:{}".format(self.text_probe, self.probe_type, > + self.uprobe_function) > + else: > + return "[{}] <{}:unknown probe>".format(self.text_probe, > + self.probe_type) > + > + def _raise(self, error): > + raise ValueError("[{}]; {}".format(self.text_probe, error)) > + > + def _verify_kprobe_probe(self): > + # Nothing to verify for now, just return. > + return > + > + def _verify_trace_probe(self): > + # Nothing to verify for now, just return. > + return > + > + def _verify_uprobe_probe(self): > + # Nothing to verify for now, just return. > + return > + > + def _verify_usdt_probe(self): > + if not self.pid: > + self._raise("USDT probes need a valid PID.") > + > + usdt = USDT(pid=self.pid) > + > + for probe in usdt.enumerate_probes(): > + if probe.provider.decode('utf-8') == self.usdt_provider and \ > + probe.name.decode('utf-8') == self.usdt_probe: > + return > + > + self._raise("Can't find UDST probe > '{}:{}'".format(self.usdt_provider, > + self.usdt_probe)) > + > + def _parse_text_probe(self): > + ''' > + The text probe format is defined as follows: > + <probe_type>:<probe_specific> > + > + Types: > + USDT: u|usdt:<provider>:<probe> > + TRACE: t|trace:<system>:<event> > + KPROBE: k|kprobe:<kernel_function> > + KRETPROBE: kr|kretprobe:<kernel_function> > + UPROBE: up|uprobe:<function> > + URETPROBE: ur|uretprobe:<function> > + ''' > + args = self.text_probe.split(":") > + if len(args) <= 1: > + self._raise("Can't extract probe type.") > + > + if args[0] not in ["k", "kprobe", "kr", "kretprobe", "t", "trace", > + "u", "usdt", "up", "uprobe", "ur", "uretprobe"]: > + self._raise("Invalid probe type '{}'".format(args[0])) > + > + self.probe_type = "kprobe" if args[0] == "k" else args[0] > + self.probe_type = "kretprobe" if args[0] == "kr" else self.probe_type > + self.probe_type = "trace" if args[0] == "t" else self.probe_type > + self.probe_type = "usdt" if args[0] == "u" else self.probe_type > + self.probe_type = "uprobe" if args[0] == "up" else self.probe_type > + self.probe_type = "uretprobe" if args[0] == "ur" else self.probe_type > + > + if self.probe_type == "usdt": > + if len(args) != 3: > + self._raise("Invalid number of arguments for USDT") > + > + self.usdt_provider = args[1] > + self.usdt_probe = args[2] > + self._verify_usdt_probe() > + > + elif self.probe_type == "trace": > + if len(args) != 3: > + self._raise("Invalid number of arguments for TRACE") > + > + self.trace_system = args[1] > + self.trace_event = args[2] > + self._verify_trace_probe() > + > + elif self.probe_type == "kprobe" or self.probe_type == "kretprobe": > + if len(args) != 2: > + self._raise("Invalid number of arguments for K(RET)PROBE") > + self.kprobe_function = args[1] > + self._verify_kprobe_probe() > + > + elif self.probe_type == "uprobe" or self.probe_type == "uretprobe": > + if len(args) != 2: > + self._raise("Invalid number of arguments for U(RET)PROBE") > + self.uprobe_function = args[1] > + self._verify_uprobe_probe() > + > + def _get_kprobe_c_code(self, function_name, function_content): > + # > + # The kprobe__* do not require a function name, so it's > + # ignored in the code generation. > + # > + return """ > +int {}__{}(struct pt_regs *ctx) {{ > + {} > +}} > +""".format(self.probe_type, self.kprobe_function, function_content) > + > + def _get_trace_c_code(self, function_name, function_content): > + # > + # The TRACEPOINT_PROBE() do not require a function name, so it's > + # ignored in the code generation. > + # > + return """ > +TRACEPOINT_PROBE({},{}) {{ > + {} > +}} > +""".format(self.trace_system, self.trace_event, function_content) > + > + def _get_uprobe_c_code(self, function_name, function_content): > + return """ > +int {}(struct pt_regs *ctx) {{ > + {} > +}} > +""".format(function_name, function_content) > + > + def _get_usdt_c_code(self, function_name, function_content): > + return """ > +int {}(struct pt_regs *ctx) {{ > + {} > +}} > +""".format(function_name, function_content) > + > + def get_c_code(self, function_name, function_content): > + if self.probe_type == 'kprobe' or self.probe_type == 'kretprobe': > + return self._get_kprobe_c_code(function_name, function_content) > + elif self.probe_type == 'trace': > + return self._get_trace_c_code(function_name, function_content) > + elif self.probe_type == 'uprobe' or self.probe_type == 'uretprobe': > + return self._get_uprobe_c_code(function_name, function_content) > + elif self.probe_type == 'usdt': > + return self._get_usdt_c_code(function_name, function_content) > + > + return "" > + > + def probe_name(self): > + if self.probe_type == 'kprobe' or self.probe_type == 'kretprobe': > + return "{}".format(self.kprobe_function) > + elif self.probe_type == 'trace': > + return "{}:{}".format(self.trace_system, > + self.trace_event) > + elif self.probe_type == 'uprobe' or self.probe_type == 'uretprobe': > + return "{}".format(self.uprobe_function) > + elif self.probe_type == 'usdt': > + return "{}:{}".format(self.usdt_provider, > + self.usdt_probe) > + > + return "" > + > + > +# > +# event_to_dict() > +# > +def event_to_dict(event): > + return dict([(field, getattr(event, field)) > + for (field, _) in event._fields_ > + if isinstance(getattr(event, field), (int, bytes))]) > + > + > +# > +# Event enum > +# > +Event = IntEnum("Event", ["SYSCALL", "START_TRIGGER", "STOP_TRIGGER", > + ], start=0) > + > + > +# > +# process_event() > +# > +def process_event(ctx, data, size): > + global start_trigger_ts > + global stop_trigger_ts > + > + event = bpf['events'].event(data) > + if event.id == Event.SYSCALL: > + syscall_events.append({"tid": event.tid, > + "ts_entry": event.entry_ts, > + "ts_exit": event.ts, > + "syscall": event.syscall, > + "user_stack_id": event.user_stack_id, > + "kernel_stack_id": event.kernel_stack_id}) > + elif event.id == Event.START_TRIGGER: > + # > + # This event would have start the trigger already, so all we need to > + # do is record the start timestamp. > + # > + start_trigger_ts = event.ts > + > + elif event.id == Event.STOP_TRIGGER: > + # > + # This event would have stopped the trigger already, so all we need > to > + # do is record the start timestamp. > + stop_trigger_ts = event.ts > + > + > +# > +# next_power_of_two() > +# > +def next_power_of_two(val): > + np = 1 > + while np < val: > + np *= 2 > + return np > + > + > +# > +# unsigned_int() > +# > +def unsigned_int(value): > + try: > + value = int(value) > + except ValueError: > + raise argparse.ArgumentTypeError("must be an integer") > + > + if value < 0: > + raise argparse.ArgumentTypeError("must be positive") > + return value > + > + > +# > +# unsigned_nonzero_int() > +# > +def unsigned_nonzero_int(value): > + value = unsigned_int(value) > + if value == 0: > + raise argparse.ArgumentTypeError("must be nonzero") > + return value > + > + > +# > +# get_thread_name() > +# > +def get_thread_name(pid, tid): > + try: > + with open(f"/proc/{pid}/task/{tid}/comm", encoding="utf8") as f: > + return f.readline().strip("\n") > + except FileNotFoundError: > + pass > + > + return f"<unknown:{pid}/{tid}>" > + > + > +# > +# get_vec_nr_name() > +# > +def get_vec_nr_name(vec_nr): > + known_vec_nr = ["hi", "timer", "net_tx", "net_rx", "block", "irq_poll", > + "tasklet", "sched", "hrtimer", "rcu"] > + > + if vec_nr < 0 or vec_nr > len(known_vec_nr): > + return f"<unknown:{vec_nr}>" > + > + return known_vec_nr[vec_nr] > + > + > +# > +# start/stop/reset capture > +# > +def start_capture(): > + bpf["capture_on"][ct.c_int(0)] = ct.c_int(1) > + > + > +def stop_capture(force=False): > + if force: > + bpf["capture_on"][ct.c_int(0)] = ct.c_int(0xffff) > + else: > + bpf["capture_on"][ct.c_int(0)] = ct.c_int(0) > + > + > +def capture_running(): > + return True if bpf["capture_on"][ct.c_int(0)].value == 1 else False > + > + > +def reset_capture(): > + bpf["syscall_start"].clear() > + bpf["syscall_data"].clear() > + bpf["run_start"].clear() > + bpf["run_data"].clear() > + bpf["ready_start"].clear() > + bpf["ready_data"].clear() > + bpf["hardirq_start"].clear() > + bpf["hardirq_data"].clear() > + bpf["softirq_start"].clear() > + bpf["softirq_data"].clear() > + bpf["stack_traces"].clear() > + > + > +# > +# Display timestamp > +# > +def print_timestamp(msg): > + ltz = datetime.datetime.now() > + utc = ltz.astimezone(pytz.utc) > + time_string = "{} @{} ({} UTC)".format( > + msg, ltz.isoformat(), utc.strftime("%H:%M:%S")) > + print(time_string) > + > + > +# > +# process_results() > +# > +def process_results(syscal_events=None, trigger_delta=None): > + if trigger_delta: > + print_timestamp("# Triggered sample dump, stop-start delta {:,} ns". > + format(trigger_delta)) > + else: > + print_timestamp("# Sample dump") > + > + # > + # First get a list of all threads we need to report on. > + # > + threads_syscall = {k.tid for k, _ in bpf["syscall_data"].items() > + if k.syscall != 0xffffffff} > + > + threads_run = {k.tid for k, _ in bpf["run_data"].items() > + if k.pid != 0xffffffff} > + > + threads_ready = {k.tid for k, _ in bpf["ready_data"].items() > + if k.pid != 0xffffffff} > + > + threads_hardirq = {k.tid for k, _ in bpf["hardirq_data"].items() > + if k.pid != 0xffffffff} > + > + threads_softirq = {k.tid for k, _ in bpf["softirq_data"].items() > + if k.pid != 0xffffffff} > + > + threads = sorted(threads_syscall | threads_run | threads_ready | > + threads_hardirq | threads_softirq, > + key=lambda x: get_thread_name(options.pid, x)) > + > + # > + # Print header... > + # > + print("{:10} {:16} {}".format("TID", "THREAD", "<RESOURCE SPECIFIC>")) > + print("{:10} {:16} {}".format("-" * 10, "-" * 16, "-" * 76)) > + indent = 28 * " " > + > + # > + # Print all events/statistics per threads. > + # > + poll_id = [k for k, v in syscalls.items() if v == b'poll'][0] > + for thread in threads: > + > + if thread != threads[0]: > + print("") > + > + # > + # SYSCALL_STATISTICS > + # > + print("{:10} {:16} {}\n{}{:20} {:>6} {:>10} {:>16} {:>16}".format( > + thread, get_thread_name(options.pid, thread), > + "[SYSCALL STATISTICS]", indent, > + "NAME", "NUMBER", "COUNT", "TOTAL ns", "MAX ns")) > + > + total_count = 0 > + total_ns = 0 > + for k, v in sorted(bpf["syscall_data"].items(), > + key=lambda kv: -kv[1].total_ns): > + if k.tid != thread: > + continue > + > + print("{}{:20.20} {:6} {:10} {:16,} {:16,}".format( > + indent, syscall_name(k.syscall).decode('utf-8'), k.syscall, > + v.count, v.total_ns, v.worst_ns)) > + if k.syscall != poll_id: > + total_count += v.count > + total_ns += v.total_ns > + > + if total_count > 0: > + print("{}{:20.20} {:6} {:10} {:16,}".format( > + indent, "TOTAL( - poll):", "", total_count, total_ns)) > + > + # > + # THREAD RUN STATISTICS > + # > + print("\n{:10} {:16} {}\n{}{:10} {:>16} {:>16} {:>16}".format( > + "", "", "[THREAD RUN STATISTICS]", indent, > + "SCHED_CNT", "TOTAL ns", "MIN ns", "MAX ns")) > + > + for k, v in bpf["run_data"].items(): > + if k.tid != thread: > + continue > + > + print("{}{:10} {:16,} {:16,} {:16,}".format( > + indent, v.count, v.total_ns, v.min_ns, v.max_ns)) > + > + # > + # THREAD READY STATISTICS > + # > + print("\n{:10} {:16} {}\n{}{:10} {:>16} {:>16}".format( > + "", "", "[THREAD READY STATISTICS]", indent, > + "SCHED_CNT", "TOTAL ns", "MAX ns")) > + > + for k, v in bpf["ready_data"].items(): > + if k.tid != thread: > + continue > + > + print("{}{:10} {:16,} {:16,}".format( > + indent, v.count, v.total_ns, v.worst_ns)) > + > + # > + # HARD IRQ STATISTICS > + # > + total_ns = 0 > + total_count = 0 > + header_printed = False > + for k, v in sorted(bpf["hardirq_data"].items(), > + key=lambda kv: -kv[1].total_ns): > + if k.tid != thread: > + continue > + > + if not header_printed: > + print("\n{:10} {:16} {}\n{}{:20} {:>10} {:>16} {:>16}". > + format("", "", "[HARD IRQ STATISTICS]", indent, > + "NAME", "COUNT", "TOTAL ns", "MAX ns")) > + header_printed = True > + > + print("{}{:20.20} {:10} {:16,} {:16,}".format( > + indent, k.irq_name.decode('utf-8'), > + v.count, v.total_ns, v.worst_ns)) > + > + total_count += v.count > + total_ns += v.total_ns > + > + if total_count > 0: > + print("{}{:20.20} {:10} {:16,}".format( > + indent, "TOTAL:", total_count, total_ns)) > + > + # > + # SOFT IRQ STATISTICS > + # > + total_ns = 0 > + total_count = 0 > + header_printed = False > + for k, v in sorted(bpf["softirq_data"].items(), > + key=lambda kv: -kv[1].total_ns): > + if k.tid != thread: > + continue > + > + if not header_printed: > + print("\n{:10} {:16} {}\n" > + "{}{:20} {:>7} {:>10} {:>16} {:>16}". > + format("", "", "[SOFT IRQ STATISTICS]", indent, > + "NAME", "VECT_NR", "COUNT", "TOTAL ns", "MAX > ns")) > + header_printed = True > + > + print("{}{:20.20} {:>7} {:10} {:16,} {:16,}".format( > + indent, get_vec_nr_name(k.vec_nr), k.vec_nr, > + v.count, v.total_ns, v.worst_ns)) > + > + total_count += v.count > + total_ns += v.total_ns > + > + if total_count > 0: > + print("{}{:20.20} {:7} {:10} {:16,}".format( > + indent, "TOTAL:", "", total_count, total_ns)) > + > + # > + # Print events > + # > + lost_stack_traces = 0 > + if syscall_events is not None and len(syscall_events) > 0: > + stack_traces = bpf.get_table("stack_traces") > + > + print("\n\n# SYSCALL EVENTS:" > + "\n{}{:>19} {:>19} {:>10} {:16} {:>10} {}".format( > + 2 * " ", "ENTRY (ns)", "EXIT (ns)", "TID", "COMM", > + "DELTA (us)", "SYSCALL")) > + print("{}{:19} {:19} {:10} {:16} {:10} {}".format( > + 2 * " ", "-" * 19, "-" * 19, "-" * 10, "-" * 16, > + "-" * 10, "-" * 16)) > + for event in syscall_events: > + print("{}{:19} {:19} {:10} {:16} {:10,} {}".format( > + " " * 2, > + event["ts_entry"], event["ts_exit"], event["tid"], > + get_thread_name(options.pid, event["tid"]), > + int((event["ts_exit"] - event["ts_entry"]) / 1000), > + syscall_name(event["syscall"]).decode('utf-8'))) > + # > + # Not sure where to put this, but I'll add some info on stack > + # traces here... Userspace stack traces are very limited due to > + # the fact that bcc does not support dwarf backtraces. As OVS > + # gets compiled without frame pointers we will not see much. > + # If however, OVS does get built with frame pointers, we should > not > + # use the BPF_STACK_TRACE_BUILDID as it does not seem to handle > + # the debug symbols correctly. Also, note that for kernel > + # traces you should not use BPF_STACK_TRACE_BUILDID, so two > + # buffers are needed. > + # > + # Some info on manual dwarf walk support: > + # https://github.com/iovisor/bcc/issues/3515 > + # https://github.com/iovisor/bcc/pull/4463 > + # > + if options.stack_trace_size == 0: > + continue > + > + if event['kernel_stack_id'] < 0 or event['user_stack_id'] < 0: > + lost_stack_traces += 1 > + > + kernel_stack = stack_traces.walk(event['kernel_stack_id']) \ > + if event['kernel_stack_id'] >= 0 else [] > + user_stack = stack_traces.walk(event['user_stack_id']) \ > + if event['user_stack_id'] >= 0 else [] > + > + for addr in kernel_stack: > + print("{}{}".format( > + " " * 10, > + bpf.ksym(addr, show_module=True, > + show_offset=True).decode('utf-8', 'replace'))) > + > + for addr in user_stack: > + addr_str = bpf.sym(addr, options.pid, show_module=True, > + show_offset=True).decode('utf-8', > 'replace') > + > + if addr_str == "[unknown]": > + addr_str += " 0x{:x}".format(addr) > + > + print("{}{}".format(" " * 10, addr_str)) > + > + # > + # Print any footer messages. > + # > + if lost_stack_traces > 0: > + print("\n#WARNING: We where not able to display {} stack traces!\n" > + "# Consider increasing the stack trace size using\n" > + "# the '--stack-trace-size' option.\n" > + "# Note that this can also happen due to a stack id\n" > + "# collision.".format(lost_stack_traces)) > + > + > +# > +# main() > +# > +def main(): > + # > + # Don't like these globals, but ctx passing does not seem to work with > the > + # existing open_ring_buffer() API :( > + # > + global bpf > + global options > + global syscall_events > + global start_trigger_ts > + global stop_trigger_ts > + > + start_trigger_ts = 0 > + stop_trigger_ts = 0 > + > + # > + # Argument parsing > + # > + parser = argparse.ArgumentParser() > + > + parser.add_argument("-D", "--debug", > + help="Enable eBPF debugging", > + type=int, const=0x3f, default=0, nargs='?') > + parser.add_argument("-p", "--pid", metavar="VSWITCHD_PID", > + help="ovs-vswitch's PID", > + type=unsigned_int, default=None) > + parser.add_argument("-s", "--syscall-events", metavar="DURATION_NS", > + help="Record syscall events, default disabled", > + type=unsigned_int, const=0, default=None, nargs='?') > + parser.add_argument("--buffer-page-count", > + help="Number of BPF ring buffer pages, default 1024", > + type=unsigned_int, default=1024, metavar="NUMBER") > + parser.add_argument("--sample-count", > + help="Number of sample runs, default 1", > + type=unsigned_nonzero_int, default=1, metavar="RUNS") > + parser.add_argument("--sample-interval", > + help="Delay between sample runs, default 0", > + type=float, default=0, metavar="SECONDS") > + parser.add_argument("--sample-time", > + help="Sample time, default 0.5 seconds", > + type=float, default=0.5, metavar="SECONDS") > + parser.add_argument("--skip-syscall-poll-events", > + help="Skip poll() syscalls with --syscall-events", > + action="store_true") > + parser.add_argument("--stack-trace-size", > + help="Number of unique stack traces that can be " > + "recorded, default 4096. 0 to disable", > + type=unsigned_int, default=4096) > + parser.add_argument("--start-trigger", metavar="TRIGGER", > + help="Start trigger, see documentation for details", > + type=str, default=None) > + parser.add_argument("--stop-trigger", metavar="TRIGGER", > + help="Stop trigger, see documentation for details", > + type=str, default=None) > + parser.add_argument("--trigger-delta", metavar="DURATION_NS", > + help="Only report event when the trigger duration > " > + "DURATION_NS, default 0 (all events)", > + type=unsigned_int, const=0, default=0, nargs='?') > + > + options = parser.parse_args() > + > + # > + # Find the PID of the ovs-vswitchd daemon if not specified. > + # > + if options.pid is None: > + for proc in psutil.process_iter(): > + if 'ovs-vswitchd' in proc.name(): > + if options.pid is not None: > + print("ERROR: Multiple ovs-vswitchd daemons running, " > + "use the -p option!") > + sys.exit(-1) We have some inconsistencies here because some utilities we send "sys.exit(1)" and some we send "sys.exit(-1)" I think we switched to using '1' because it is a valid return value on other OSes. I guess for now there isn't any need to normalize it since these are eBPF programs intended to run on linux. That said, maybe we could consider a normalized value anyway just for review purposes. > + > + options.pid = proc.pid > + > + # > + # Error checking on input parameters. > + # > + if options.pid is None: > + print("ERROR: Failed to find ovs-vswitchd's PID!") > + sys.exit(-1) > + > + options.buffer_page_count = next_power_of_two(options.buffer_page_count) > + > + # > + # Make sure we are running as root, or else we can not attach the probes. > + # > + if os.geteuid() != 0: > + print("ERROR: We need to run as root to attached probes!") > + sys.exit(-1) > + > + # > + # Setup any of the start stop triggers > + # > + if options.start_trigger is not None: > + try: > + start_trigger = Probe(options.start_trigger, pid=options.pid) > + except ValueError as e: > + print(f"ERROR: Invalid start trigger {str(e)}") > + sys.exit(-1) > + else: > + start_trigger = None > + > + if options.stop_trigger is not None: > + try: > + stop_trigger = Probe(options.stop_trigger, pid=options.pid) > + except ValueError as e: > + print(f"ERROR: Invalid stop trigger {str(e)}") > + sys.exit(-1) > + else: > + stop_trigger = None > + > + # > + # Attach probe to running process. > + # > + source = EBPF_SOURCE.replace("<EVENT_ENUM>", "\n".join( > + [" EVENT_{} = {},".format( > + event.name, event.value) for event in Event])) > + source = source.replace("<BUFFER_PAGE_CNT>", > + str(options.buffer_page_count)) > + source = source.replace("<MONITOR_PID>", str(options.pid)) > + > + if BPF.kernel_struct_has_field(b'task_struct', b'state') == 1: > + source = source.replace('<STATE_FIELD>', 'state') > + else: > + source = source.replace('<STATE_FIELD>', '__state') > + > + poll_id = [k for k, v in syscalls.items() if v == b'poll'][0] > + if options.syscall_events is None: > + syscall_trace_events = "false" > + elif options.syscall_events == 0: > + if not options.skip_syscall_poll_events: > + syscall_trace_events = "true" > + else: > + syscall_trace_events = f"args->id != {poll_id}" > + else: > + syscall_trace_events = "delta > {}".format(options.syscall_events) > + if options.skip_syscall_poll_events: > + syscall_trace_events += f" && args->id != {poll_id}" > + > + source = source.replace("<SYSCAL_TRACE_EVENTS>", > + syscall_trace_events) > + > + source = source.replace("<STACK_TRACE_SIZE>", > + str(options.stack_trace_size)) > + > + source = source.replace("<STACK_TRACE_ENABLED>", "true" > + if options.stack_trace_size > 0 else "false") > + > + # > + # Handle start/stop probes > + # > + if start_trigger: > + source = source.replace("<START_TRIGGER>", > + start_trigger.get_c_code( > + "start_trigger_probe", > + "return start_trigger();")) > + else: > + source = source.replace("<START_TRIGGER>", "") > + > + if stop_trigger: > + source = source.replace("<STOP_TRIGGER>", > + stop_trigger.get_c_code( > + "stop_trigger_probe", > + "return stop_trigger();")) > + else: > + source = source.replace("<STOP_TRIGGER>", "") > + > + # > + # Setup usdt or other probes that need handling trough the BFP class. > + # > + usdt = USDT(pid=int(options.pid)) > + try: > + if start_trigger and start_trigger.probe_type == 'usdt': > + usdt.enable_probe(probe=start_trigger.probe_name(), > + fn_name="start_trigger_probe") > + if stop_trigger and stop_trigger.probe_type == 'usdt': > + usdt.enable_probe(probe=stop_trigger.probe_name(), > + fn_name="stop_trigger_probe") > + > + except USDTException as e: > + print("ERROR: {}".format( > + (re.sub('^', ' ' * 7, str(e), flags=re.MULTILINE)).strip(). > + replace("--with-dtrace or --enable-dtrace", > + "--enable-usdt-probes"))) > + sys.exit(-1) > + > + bpf = BPF(text=source, usdt_contexts=[usdt], debug=options.debug) > + > + if start_trigger: > + try: > + if start_trigger.probe_type == "uprobe": > + bpf.attach_uprobe(name=f"/proc/{options.pid}/exe", > + sym=start_trigger.probe_name(), > + fn_name="start_trigger_probe", > + pid=options.pid) > + > + if start_trigger.probe_type == "uretprobe": > + bpf.attach_uretprobe(name=f"/proc/{options.pid}/exe", > + sym=start_trigger.probe_name(), > + fn_name="start_trigger_probe", > + pid=options.pid) > + except Exception as e: > + print("ERROR: Failed attaching uprobe start trigger " > + f"'{start_trigger.probe_name()}';\n {str(e)}") > + sys.exit(-1) > + > + if stop_trigger: > + try: > + if stop_trigger.probe_type == "uprobe": > + bpf.attach_uprobe(name=f"/proc/{options.pid}/exe", > + sym=stop_trigger.probe_name(), > + fn_name="stop_trigger_probe", > + pid=options.pid) > + > + if stop_trigger.probe_type == "uretprobe": > + bpf.attach_uretprobe(name=f"/proc/{options.pid}/exe", > + sym=stop_trigger.probe_name(), > + fn_name="stop_trigger_probe", > + pid=options.pid) > + except Exception as e: > + print("ERROR: Failed attaching uprobe stop trigger" > + f"'{stop_trigger.probe_name()}';\n {str(e)}") > + sys.exit(-1) > + > + # > + # If no triggers are configured use the delay configuration > + # > + bpf['events'].open_ring_buffer(process_event) > + > + sample_count = 0 > + while sample_count < options.sample_count: > + sample_count += 1 > + syscall_events = [] > + > + if options.start_trigger is None: > + print_timestamp("# Start sampling") > + start_capture() > + stop_time = -1 if options.stop_trigger else \ > + time_ns() + options.sample_time * 1000000000 > + else: > + # For start triggers the stop time depends on the start trigger > + # time, or depends on the stop trigger if configured. > + stop_time = -1 if options.stop_trigger else 0 > + > + while True: > + try: > + last_start_ts = start_trigger_ts > + last_stop_ts = stop_trigger_ts > + > + if stop_time > 0: > + delay = int((stop_time - time_ns()) / 1000000) > + if delay <= 0: > + break > + else: > + delay = -1 > + > + bpf.ring_buffer_poll(timeout=delay) > + > + if stop_time <= 0 and last_start_ts != start_trigger_ts: > + print_timestamp( > + "# Start sampling (trigger@{})".format( > + start_trigger_ts)) > + > + if not options.stop_trigger: > + stop_time = time_ns() + \ > + options.sample_time * 1000000000 > + > + if last_stop_ts != stop_trigger_ts: > + break > + > + except KeyboardInterrupt: > + break > + > + if options.stop_trigger and not capture_running(): > + print_timestamp("# Stop sampling (trigger@{})".format( > + stop_trigger_ts)) > + else: > + print_timestamp("# Stop sampling") > + > + if stop_trigger_ts != 0 and start_trigger_ts != 0: > + trigger_delta = stop_trigger_ts - start_trigger_ts > + else: > + trigger_delta = None > + > + if not trigger_delta or trigger_delta >= options.trigger_delta: > + stop_capture(force=True) # Prevent a new trigger to start. > + process_results(syscal_events=syscall_events, > + trigger_delta=trigger_delta) > + elif trigger_delta: > + sample_count -= 1 > + print_timestamp("# Sample dump skipped, delta {:,} ns".format( > + trigger_delta)) > + > + reset_capture() > + stop_capture() > + if options.sample_interval > 0: > + time.sleep(options.sample_interval) > + > + # > + # Report lost events. > + # > + dropcnt = bpf.get_table("dropcnt") > + for k in dropcnt.keys(): > + count = dropcnt.sum(k).value > + if k.value == 0 and count > 0: > + print("\n# WARNING: Not all events were captured, {} were " > + "dropped!\n# Increase the BPF ring buffer size " > + "with the --buffer-page-count option.".format(count)) > + > + if (options.sample_count > 1): > + trigger_miss = bpf.get_table("trigger_miss") > + for k in trigger_miss.keys(): > + count = trigger_miss.sum(k).value > + if k.value == 0 and count > 0: > + print("\n# WARNING: Not all start triggers were successful. " > + "{} were missed due to\n# slow userspace " > + "processing!".format(count)) > + > + > +# > +# Start main() as the default entry point... > +# > +if __name__ == '__main__': > + main() > diff --git a/utilities/usdt-scripts/kernel_delay.rst > b/utilities/usdt-scripts/kernel_delay.rst > new file mode 100644 > index 000000000..95e98db34 > --- /dev/null > +++ b/utilities/usdt-scripts/kernel_delay.rst > @@ -0,0 +1,594 @@ > +Troubleshooting Open vSwitch: Is the kernel to blame? > +===================================================== > +Often, when troubleshooting Open vSwitch (OVS) in the field, you might be > left > +wondering if the issue is really OVS-related, or if it's a problem with the > +kernel being overloaded. The kernel_delay.py tool can help you quickly > +identify if the focus of your investigation should be OVS or the Linux > kernel. We might want to give additional context here. For example, hints that the system could be overloaded due to the WARN level log "Unreasonably long poll..." or uname/uptime output. I know you do reference it later in the document, but perhaps a section on when the reader can infer that there could be a problem. > + > + > +Introduction > +------------ > +``kernel_delay.py`` consists of a Python script that uses the BCC [#BCC]_ > +framework to install eBPF probes. The data the eBPF probes collect will be > +analyzed and presented to the user by the Python script. Some of the > presented > +data can also be captured by the individual scripts included in the BBC > [#BCC]_ > +framework. > + > +kernel_delay.py has two modes of operation: > + > +- In **time mode**, the tool runs for a specific time and collects the > + information. > +- In **trigger mode**, event collection can be started and/or stopped based > on > + a specific eBPF probe. Currently, we support the following probes: > + - USDT probes > + - Kernel tracepoints > + - kprobe > + - kretprobe > + - uprobe > + - uretprobe > + > + > +In addition, the option, ``--sample-count``, exists to specify how many > +iterations you would like to do. When using triggers, you can also ignore > +samples if they are less than a number of nanoseconds with the > +``--trigger-delta`` option. The latter might be useful when debugging Linux > +syscalls which take a long time to complete. More on this later. Finally, you > +can configure the delay between two sample runs with the > ``--sample-interval`` > +option. > + > +Before getting into more details, let's just run the tool without any options > +to see what the output looks like. Notice that it will try to automatically > +get the process ID of the running ``ovs-vsdwitchd``. You can overwrite this > +with the ``--pid`` option. > + > +.. code-block:: console > + > + $ sudo ./kernel_delay.py > + # Start sampling @2023-06-08T12:17:22.725127 (10:17:22 UTC) > + # Stop sampling @2023-06-08T12:17:23.224781 (10:17:23 UTC) > + # Sample dump @2023-06-08T12:17:23.224855 (10:17:23 UTC) > + TID THREAD <RESOURCE SPECIFIC> > + ---------- ---------------- > ---------------------------------------------------------------------------- > + 27090 ovs-vswitchd [SYSCALL STATISTICS] > + <EDIT: REMOVED DATA FOR ovs-vswitchd THREAD> > + > + 31741 revalidator122 [SYSCALL STATISTICS] > + NAME NUMBER COUNT TOTAL ns > MAX ns > + poll 7 5 184,193,176 > 184,191,520 > + recvmsg 47 494 125,208,756 > 310,331 > + futex 202 8 18,768,758 > 4,023,039 > + sendto 44 10 375,861 > 266,867 > + sendmsg 46 4 43,294 > 11,213 > + write 1 1 5,949 > 5,949 > + getrusage 98 1 1,424 > 1,424 > + read 0 1 1,292 > 1,292 > + TOTAL( - poll): 519 144,405,334 > + > + [THREAD RUN STATISTICS] > + SCHED_CNT TOTAL ns MIN ns > MAX ns > + 6 136,764,071 1,480 > 115,146,424 > + > + [THREAD READY STATISTICS] > + SCHED_CNT TOTAL ns MAX ns > + 7 11,334 6,636 > + > + [HARD IRQ STATISTICS] > + NAME COUNT TOTAL ns > MAX ns > + eno8303-rx-1 1 3,586 > 3,586 > + TOTAL: 1 3,586 > + > + [SOFT IRQ STATISTICS] > + NAME VECT_NR COUNT TOTAL ns > MAX ns > + net_rx 3 1 17,699 > 17,699 > + sched 7 6 13,820 > 3,226 > + rcu 9 16 13,586 > 1,554 > + timer 1 3 10,259 > 3,815 > + TOTAL: 26 55,364 > + > + > +By default, the tool will run for half a second in `time mode`. To extend > this > +you can use the ``--sample-time`` option. > + > + > +What will it report > +------------------- > +The above sample output separates the captured data on a per-thread basis. > +For this, it displays the thread's id (``TID``) and name (``THREAD``), > +followed by resource-specific data. Which are: > + > +- ``SYSCALL STATISTICS`` > +- ``THREAD RUN STATISTICS`` > +- ``THREAD READY STATISTICS`` > +- ``HARD IRQ STATISTICS`` > +- ``SOFT IRQ STATISTICS`` > + > +The following sections will describe in detail what statistics they report. > + > + > +``SYSCALL STATISTICS`` > +~~~~~~~~~~~~~~~~~~~~~~ > +``SYSCALL STATISTICS`` tell you which Linux system calls got executed during > +the measurement interval. This includes the number of times the syscall was > +called (``COUNT``), the total time spent in the system calls (``TOTAL ns``), > +and the worst-case duration of a single call (``MAX ns``). > + > +It also shows the total of all system calls, but it excludes the poll system > +call, as the purpose of this call is to wait for activity on a set of > sockets, > +and usually, the thread gets swapped out. > + > +Note that it only counts calls that started and stopped during the > +measurement interval! > + > + > +``THREAD RUN STATISTICS`` > +~~~~~~~~~~~~~~~~~~~~~~~~~ > +``THREAD RUN STATISTICS`` tell you how long the thread was running on a CPU > +during the measurement interval. > + > +Note that these statistics only count events where the thread started and > +stopped running on a CPU during the measurement interval. For example, if > +this was a PMD thread, you should see zero ``SCHED_CNT`` and ``TOTAL_ns``. > +If not, there might be a misconfiguration. > + > + > +``THREAD READY STATISTICS`` > +~~~~~~~~~~~~~~~~~~~~~~~~~~~ > +``THREAD READY STATISTICS`` tell you the time between the thread being ready > +to run and it actually running on the CPU. > + > +Note that these statistics only count events where the thread was getting > +ready to run and started running during the measurement interval. > + > + > +``HARD IRQ STATISTICS`` > +~~~~~~~~~~~~~~~~~~~~~~~ > +``HARD IRQ STATISTICS`` tell you how much time was spent servicing hard > +interrupts during the threads run time. > + > +It shows the interrupt name (``NAME``), the number of interrupts (``COUNT``), > +the total time spent in the interrupt handler (``TOTAL ns``), and the > +worst-case duration (``MAX ns``). > + > + > +``SOFT IRQ STATISTICS`` > +~~~~~~~~~~~~~~~~~~~~~~~ > +``SOFT IRQ STATISTICS`` tell you how much time was spent servicing soft > +interrupts during the threads run time. > + > +It shows the interrupt name (``NAME``), vector number (``VECT_NR``), the > +number of interrupts (``COUNT``), the total time spent in the interrupt > +handler (``TOTAL ns``), and the worst-case duration (``MAX ns``). > + > + > +The ``--syscall-events`` option > +------------------------------- > +In addition to reporting global syscall statistics in ``SYSCALL_STATISTICS``, > +the tool can also report each individual syscall. This can be a usefull > +second step if the ``SYSCALL_STATISTICS`` show high latency numbers. > + > +All you need to do is add the ``--syscall-events`` option, with or without > +the additional ``DURATION_NS`` parameter. The ``DUTATION_NS`` parameter > +allows you to exclude events that take less than the supplied time. > + > +The ``--skip-syscall-poll-events`` option allows you to exclude poll > +syscalls from the report. > + > +Below is an example run, note that I have removed the resource-specific data > +to highlight the syscall events: > + > +.. code-block:: console > + > + $ sudo ./kernel_delay.py --syscall-events 50000 --skip-syscall-poll-events > + # Start sampling @2023-06-13T17:10:46.460874 (15:10:46 UTC) > + # Stop sampling @2023-06-13T17:10:46.960727 (15:10:46 UTC) > + # Sample dump @2023-06-13T17:10:46.961033 (15:10:46 UTC) > + TID THREAD <RESOURCE SPECIFIC> > + ---------- ---------------- > ---------------------------------------------------------------------------- > + 3359686 ipf_clean2 [SYSCALL STATISTICS] > + ... > + 3359635 ovs-vswitchd [SYSCALL STATISTICS] > + ... > + 3359697 revalidator12 [SYSCALL STATISTICS] > + ... > + 3359698 revalidator13 [SYSCALL STATISTICS] > + ... > + 3359699 revalidator14 [SYSCALL STATISTICS] > + ... > + 3359700 revalidator15 [SYSCALL STATISTICS] > + ... > + > + # SYSCALL EVENTS: > + ENTRY (ns) EXIT (ns) TID COMM DELTA > (us) SYSCALL > + ------------------- ------------------- ---------- ---------------- > ---------- ---------------- > + 2161821694935486 2161821695031201 3359699 revalidator14 > 95 futex > + syscall_exit_to_user_mode_prepare+0x161 [kernel] > + syscall_exit_to_user_mode_prepare+0x161 [kernel] > + syscall_exit_to_user_mode+0x9 [kernel] > + do_syscall_64+0x68 [kernel] > + entry_SYSCALL_64_after_hwframe+0x72 [kernel] > + __GI___lll_lock_wait+0x30 [libc.so.6] > + ovs_mutex_lock_at+0x18 [ovs-vswitchd] > + [unknown] 0x696c003936313a63 > + 2161821695276882 2161821695333687 3359698 revalidator13 > 56 futex > + syscall_exit_to_user_mode_prepare+0x161 [kernel] > + syscall_exit_to_user_mode_prepare+0x161 [kernel] > + syscall_exit_to_user_mode+0x9 [kernel] > + do_syscall_64+0x68 [kernel] > + entry_SYSCALL_64_after_hwframe+0x72 [kernel] > + __GI___lll_lock_wait+0x30 [libc.so.6] > + ovs_mutex_lock_at+0x18 [ovs-vswitchd] > + [unknown] 0x696c003134313a63 > + 2161821695275820 2161821695405733 3359700 revalidator15 > 129 futex > + syscall_exit_to_user_mode_prepare+0x161 [kernel] > + syscall_exit_to_user_mode_prepare+0x161 [kernel] > + syscall_exit_to_user_mode+0x9 [kernel] > + do_syscall_64+0x68 [kernel] > + entry_SYSCALL_64_after_hwframe+0x72 [kernel] > + __GI___lll_lock_wait+0x30 [libc.so.6] > + ovs_mutex_lock_at+0x18 [ovs-vswitchd] > + [unknown] 0x696c003936313a63 > + 2161821695964969 2161821696052021 3359635 ovs-vswitchd > 87 accept > + syscall_exit_to_user_mode_prepare+0x161 [kernel] > + syscall_exit_to_user_mode_prepare+0x161 [kernel] > + syscall_exit_to_user_mode+0x9 [kernel] > + do_syscall_64+0x68 [kernel] > + entry_SYSCALL_64_after_hwframe+0x72 [kernel] > + __GI_accept+0x4d [libc.so.6] > + pfd_accept+0x3a [ovs-vswitchd] > + [unknown] 0x7fff19f2bd00 > + [unknown] 0xe4b8001f0f > + > +As you can see above, the output also shows the stackback trace. You can > +disable this using the ``--stack-trace-size 0`` option. > + > +As you can see above, the backtrace does not show a lot of useful information > +due to the BCC [#BCC]_ toolkit not supporting dwarf decoding. So to further > +analyze system call backtraces, you could use perf. The following perf > +script can do this for you (refer to the embedded instructions): > + > +https://github.com/chaudron/perf_scripts/blob/master/analyze_perf_pmd_syscall.py Maybe we should consider putting something like that in-tree for use. > + > + > +Using triggers > +-------------- > +The tool supports start and, or stop triggers. This will allow you to capture > +statistics triggered by a specific event. First, let's look at what > +combinations of stop-and-start triggers we can use. > + > +If you only use ``--start-trigger``, the inspection start when the trigger > +happens and runs until the ``--sample-time`` number of seconds has passed. > +The example below shows all the supported options in this scenario. > + > +.. code-block:: console > + > + $ sudo ./kernel_delay.py --start-trigger up:bridge_run --sample-time 4 \ > + --sample-count 4 --sample-interval 1 > + > + > +If you only use ``--stop-trigger``, the inspection starts immediately and > +stops when the trigger happens. The example below shows all the supported > +options in this scenario. > + > +.. code-block:: console > + > + $ sudo ./kernel_delay.py --stop-trigger upr:bridge_run \ > + --sample-count 4 --sample-interval 1 > + > + > +If you use both ``--start-trigger`` and ``--stop-trigger`` triggers, the > +statistics are captured between the two first occurrences of these events. > +The example below shows all the supported options in this scenario. > + > +.. code-block:: console > + > + $ sudo ./kernel_delay.py --start-trigger up:bridge_run \ > + --stop-trigger upr:bridge_run \ > + --sample-count 4 --sample-interval 1 \ > + --trigger-delta 50000 > + > + > +Now that we know how these triggers can be used, let's investigate what > +triggers are supported. What we call triggers, BCC [#BCC]_, calls events; > +these are eBPF tracepoints you can attach to. For more details on the > +supported tracepoints, check out the BCC documentation [#BCC_EVENT]_. > + > +The list below shows the supported triggers and their argument format: > + > +**USDT probes:** > + [u|usdt]:{provider}:{probe} > +**Kernel tracepoint:** > + [t:trace]:{system}:{event} > +**kprobe:** > + [k:kprobe]:{kernel_function} > +**kretprobe:** > + [kr:kretprobe]:{kernel_function} > +**uprobe:** > + [up:uprobe]:{function} > +**uretprobe:** > + [upr:uretprobe]:{function} > + > +Here are a couple of trigger examples, more use-case-specific examples can be > +found in the *Examples* section. > + > +.. code-block:: console > + > + --start|stop-trigger u:udpif_revalidator:start_dump > + --start|stop-trigger t:openvswitch:ovs_dp_upcall > + --start|stop-trigger k:ovs_dp_process_packet > + --start|stop-trigger kr:ovs_dp_process_packet > + --start|stop-trigger up:bridge_run > + --start|stop-trigger upr:bridge_run > + > + > +Examples > +-------- > +This section will give some examples of how to use this tool in real-world > +scenarios. Let's start with the issue where Open vSwitch reports > +``Unreasonably long XXXXms poll interval`` on your revalidator threads. Note > +that there is a blog available explaining how the revalidator process works > +in OVS [#REVAL_BLOG]_. > + > +First, let me explain this log message. It gets logged if the time delta > +between two ``poll_block()`` calls is more than 1 second. In other words, > +the process was spending a lot of time processing stuff that was made > +available by the return of the ``poll_block()`` function. > + > +Do a run with the tool using the existing USDT revalidator probes as a start > +and stop trigger (Note that I removed the resource-specific data from the > none > +revalidator threads): > + > +.. code-block:: console > + > + $ sudo ./kernel_delay.py --start-trigger u:udpif_revalidator:start_dump > --stop-trigger u:udpif_revalidator:sweep_done > + # Start sampling (trigger@791777093512008) @2023-06-14T14:52:00.110303 > (12:52:00 UTC) > + # Stop sampling (trigger@791778281498462) @2023-06-14T14:52:01.297975 > (12:52:01 UTC) > + # Triggered sample dump, stop-start delta 1,187,986,454 ns > @2023-06-14T14:52:01.298021 (12:52:01 UTC) > + TID THREAD <RESOURCE SPECIFIC> > + ---------- ---------------- > ---------------------------------------------------------------------------- > + 1457761 handler24 [SYSCALL STATISTICS] > + NAME NUMBER COUNT > TOTAL ns MAX ns > + sendmsg 46 6110 > 123,274,761 41,776 > + recvmsg 47 136299 > 99,397,508 49,896 > + futex 202 51 > 7,655,832 7,536,776 > + poll 7 4068 > 1,202,883 2,907 > + getrusage 98 2034 > 586,602 1,398 > + sendto 44 9 > 213,682 27,417 > + TOTAL( - poll): 144503 > 231,128,385 > + > + [THREAD RUN STATISTICS] > + SCHED_CNT TOTAL ns MIN ns > MAX ns > + > + [THREAD READY STATISTICS] > + SCHED_CNT TOTAL ns MAX ns > + 1 1,438 1,438 > + > + [SOFT IRQ STATISTICS] > + NAME VECT_NR COUNT > TOTAL ns MAX ns > + sched 7 21 > 59,145 3,769 > + rcu 9 50 > 42,917 2,234 > + TOTAL: 71 > 102,062 > + 1457733 ovs-vswitchd [SYSCALL STATISTICS] > + ... > + 1457792 revalidator55 [SYSCALL STATISTICS] > + NAME NUMBER COUNT > TOTAL ns MAX ns > + futex 202 73 > 572,576,329 19,621,600 > + recvmsg 47 815 > 296,697,618 405,338 > + sendto 44 3 > 78,302 26,837 > + sendmsg 46 3 > 38,712 13,250 > + write 1 1 > 5,073 5,073 > + TOTAL( - poll): 895 > 869,396,034 > + > + [THREAD RUN STATISTICS] > + SCHED_CNT TOTAL ns MIN ns > MAX ns > + 48 394,350,393 1,729 > 140,455,796 > + > + [THREAD READY STATISTICS] > + SCHED_CNT TOTAL ns MAX ns > + 49 23,650 1,559 > + > + [SOFT IRQ STATISTICS] > + NAME VECT_NR COUNT > TOTAL ns MAX ns > + sched 7 14 > 26,889 3,041 > + rcu 9 28 > 23,024 1,600 > + TOTAL: 42 > 49,913 > + > + > +You can see from the start of the output that the trigger took more than a > +second (1,187,986,454 ns), which we would already know, by looking at the > +output of the ``ovs-vsctl upcall/show`` command. > + > +From the *revalidator55*'s ``SYSCALL STATISTICS`` statistics we can see it > +spent almost 870ms handling syscalls, and there were no poll() calls being > +executed. The ``THREAD RUN STATISTICS`` statistics here are a bit misleading, > +as it looks like we only spent 394ms on the CPU. But earlier, we learned that > +this time does not include the time being on the CPU at the start or stop of > +an event. What is exactly the case here, because we are using USDT probes. > + > +From the above data and maybe some ``top`` output, we can determine that the > +*revalidator55* thread is taking a lot of CPU time, probably because it has > +to do a lot of revalidator work by itself. The solution here is to increase > +the number of revalidator threads, so more work could be done in parallel. > + > +Let's do another run of the same command in another scenario: > + > +.. code-block:: console > + > + $ sudo ./kernel_delay.py --start-trigger u:udpif_revalidator:start_dump > --stop-trigger u:udpif_revalidator:sweep_done > + # Start sampling (trigger@795160501758971) @2023-06-14T15:48:23.518512 > (13:48:23 UTC) > + # Stop sampling (trigger@795160764940201) @2023-06-14T15:48:23.781381 > (13:48:23 UTC) > + # Triggered sample dump, stop-start delta 263,181,230 ns > @2023-06-14T15:48:23.781414 (13:48:23 UTC) > + TID THREAD <RESOURCE SPECIFIC> > + ---------- ---------------- > ---------------------------------------------------------------------------- > + 1457733 ovs-vswitchd [SYSCALL STATISTICS] > + ... > + 1457792 revalidator55 [SYSCALL STATISTICS] > + NAME NUMBER COUNT > TOTAL ns MAX ns > + recvmsg 47 284 > 193,422,110 46,248,418 > + sendto 44 2 > 46,685 23,665 > + sendmsg 46 2 > 24,916 12,703 > + write 1 1 > 6,534 6,534 > + TOTAL( - poll): 289 > 193,500,245 > + > + [THREAD RUN STATISTICS] > + SCHED_CNT TOTAL ns MIN ns > MAX ns > + 2 47,333,558 331,516 > 47,002,042 > + > + [THREAD READY STATISTICS] > + SCHED_CNT TOTAL ns MAX ns > + 3 87,000,403 45,999,712 > + > + [SOFT IRQ STATISTICS] > + NAME VECT_NR COUNT > TOTAL ns MAX ns > + sched 7 2 > 9,504 5,109 > + TOTAL: 2 > 9,504 > + > + > +Here you can see the revalidator run took about 263ms, which does not look > +odd, however, the ``THREAD READY STATISTICS`` information shows us we were > +waiting 87ms for a CPU to be run on. This means the revalidator process could > +have finished 87ms faster. Looking at the ``MAX ns`` value, we see a > +worst-case delay of almost 46ms, which hints at an overloaded system. > + > +One final example where we use a ``uprobe`` to get some statistics on > +a ``bridge_run()`` execution that takes more than 1ms. > + > +.. code-block:: console > + > + $ sudo ./kernel_delay.py --start-trigger up:bridge_run --stop-trigger > ur:bridge_run --trigger-delta 1000000 > + # Start sampling (trigger@2245245432101270) @2023-06-14T16:21:10.467919 > (14:21:10 UTC) > + # Stop sampling (trigger@2245245432414656) @2023-06-14T16:21:10.468296 > (14:21:10 UTC) > + # Sample dump skipped, delta 313,386 ns @2023-06-14T16:21:10.468419 > (14:21:10 UTC) > + # Start sampling (trigger@2245245505301745) @2023-06-14T16:21:10.540970 > (14:21:10 UTC) > + # Stop sampling (trigger@2245245506911119) @2023-06-14T16:21:10.542499 > (14:21:10 UTC) > + # Triggered sample dump, stop-start delta 1,609,374 ns > @2023-06-14T16:21:10.542565 (14:21:10 UTC) > + TID THREAD <RESOURCE SPECIFIC> > + ---------- ---------------- > ---------------------------------------------------------------------------- > + 3371035 <unknown:3366258/3371035> [SYSCALL STATISTICS] > + ... <REMOVED 7 MORE unknown THREADS> > + 3371102 handler66 [SYSCALL STATISTICS] > + ... <REMOVED 7 MORE HANDLER THREADS> > + 3366258 ovs-vswitchd [SYSCALL STATISTICS] > + NAME NUMBER COUNT > TOTAL ns MAX ns > + futex 202 43 > 403,469 199,312 > + clone3 435 13 > 174,394 30,731 > + munmap 11 8 > 115,774 21,861 > + poll 7 5 > 92,969 38,307 > + unlink 87 2 > 49,918 35,741 > + mprotect 10 8 > 47,618 13,201 > + accept 43 10 > 31,360 6,976 > + mmap 9 8 > 30,279 5,776 > + write 1 6 > 27,720 11,774 > + rt_sigprocmask 14 28 > 12,281 970 > + read 0 6 > 9,478 2,318 > + recvfrom 45 3 > 7,024 4,024 > + sendto 44 1 > 4,684 4,684 > + getrusage 98 5 > 4,594 1,342 > + close 3 2 > 2,918 1,627 > + recvmsg 47 1 > 2,722 2,722 > + TOTAL( - poll): 144 > 924,233 > + > + [THREAD RUN STATISTICS] > + SCHED_CNT TOTAL ns MIN ns > MAX ns > + 13 817,605 5,433 > 524,376 > + > + [THREAD READY STATISTICS] > + SCHED_CNT TOTAL ns MAX ns > + 14 28,646 11,566 > + > + [SOFT IRQ STATISTICS] > + NAME VECT_NR COUNT > TOTAL ns MAX ns > + rcu 9 1 > 2,838 2,838 > + TOTAL: 1 > 2,838 > + > + 3371110 revalidator74 [SYSCALL STATISTICS] > + ... <REMOVED 7 MORE NEW revalidator THREADS> > + 3366311 urcu3 [SYSCALL STATISTICS] > + ... > + > + > +We removed some of the threads and their resource-specific data, but based > +on the ``<unknown:3366258/3371035>`` thread name, you can see that some > +threads no longer exist. In the ``ovs-vswitchd`` thread, you can see some > +``clone3`` syscalls, indicating threads were created. In this example, it was > +due to the deletion of a bridge, which resulted in the recreation of the > +revalidator and handler threads. > + > + > +Use with Openshift > +------------------ > +This section describes how you would use the tool on a node in an OpenShift > +cluster. It assumes you have console access to the node, either directly or > +through a debug container. > + > +We will use a base fedora38 container through podman, as this will allow us > to > +install some additional tools and packages we need. > + > +The first thing we need to do is to start the container: > + > +.. code-block:: console > + > + [core@sno-master ~]$ sudo podman run -it --rm \ > + -e PS1='[(DEBUG)\u@\h \W]\$ ' \ > + --privileged --network=host --pid=host \ > + -v /lib/modules:/lib/modules:ro \ > + -v /sys/kernel/debug:/sys/kernel/debug \ > + -v /proc:/proc \ > + -v /:/mnt/rootdir \ > + quay.io/fedora/fedora:38-x86_64 > + > + [(DEBUG)root@sno-master /]# > + > + > +Next add the ``linux_delay.py`` dependencies: > + > +.. code-block:: console > + > + [(DEBUG)root@sno-master /]# dnf install -y bcc-tools perl-interpreter \ > + python3-pytz python3-psutil > + > + > +You need to install the devel, debug and source RPMs for your OVS and kernel > +version: > + > +.. code-block:: console > + > + [(DEBUG)root@sno-master home]# rpm -i \ > + openvswitch2.17-debuginfo-2.17.0-67.el8fdp.x86_64.rpm \ > + openvswitch2.17-debugsource-2.17.0-67.el8fdp.x86_64.rpm \ > + kernel-devel-4.18.0-372.41.1.el8_6.x86_64.rpm > + > + > +Now we can run the tool. Here we use the above ```bridge_run()`` example: > + > +.. code-block:: console > + > + [(DEBUG)root@sno-master home]# ./kernel_delay.py --start-trigger > up:bridge_run --stop-trigger ur:bridge_run > + # Start sampling (trigger@75279117343513) @2023-06-15T11:44:07.628372 > (11:44:07 UTC) > + # Stop sampling (trigger@75279117443980) @2023-06-15T11:44:07.628529 > (11:44:07 UTC) > + # Triggered sample dump, stop-start delta 100,467 ns > @2023-06-15T11:44:07.628569 (11:44:07 UTC) > + TID THREAD <RESOURCE SPECIFIC> > + ---------- ---------------- > ---------------------------------------------------------------------------- > + 1246 ovs-vswitchd [SYSCALL STATISTICS] > + NAME NUMBER COUNT > TOTAL ns MAX ns > + getdents64 217 2 > 8,560 8,162 > + openat 257 1 > 6,951 6,951 > + accept 43 4 > 6,942 3,763 > + recvfrom 45 1 > 3,726 3,726 > + recvmsg 47 2 > 2,880 2,188 > + stat 4 2 > 1,946 1,384 > + close 3 1 > 1,393 1,393 > + fstat 5 1 > 1,324 1,324 > + TOTAL( - poll): 14 > 33,722 > + > + [THREAD RUN STATISTICS] > + SCHED_CNT TOTAL ns MIN ns > MAX ns > + > + [THREAD READY STATISTICS] > + SCHED_CNT TOTAL ns MAX ns > + > + > +.. rubric:: Footnotes > + > +.. [#BCC] https://github.com/iovisor/bcc > +.. [#BCC_EVENT] > https://github.com/iovisor/bcc/blob/master/docs/reference_guide.md#events--arguments > +.. [#REVAL_BLOG] > https://developers.redhat.com/articles/2022/10/19/open-vswitch-revalidator-process-explained > > _______________________________________________ > dev mailing list > [email protected] > https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
