Eelco Chaudron <[email protected]> writes:
> Updated the dpif_nl_exec_monitor.py utility to also optionally monitor
> the other DPIF netlink operations, i.e., DPIF_OP_FLOW_PUT,
> DPIF_OP_FLOW_DEL and DPIF_OP_FLOW_GET
>
> Signed-off-by: Eelco Chaudron <[email protected]>
> ---
> Documentation/topics/usdt-probes.rst | 7 +-
> utilities/automake.mk | 6 +-
> ..._exec_monitor.py => dpif_op_nl_monitor.py} | 279 +++++++++++++-----
> 3 files changed, 207 insertions(+), 85 deletions(-)
> rename utilities/usdt-scripts/{dpif_nl_exec_monitor.py =>
> dpif_op_nl_monitor.py} (72%)
>
> diff --git a/Documentation/topics/usdt-probes.rst
> b/Documentation/topics/usdt-probes.rst
> index b9a6c54b2..93bfaec12 100644
> --- a/Documentation/topics/usdt-probes.rst
> +++ b/Documentation/topics/usdt-probes.rst
> @@ -239,7 +239,7 @@ DPIF_OP_FLOW_DEL operation as part of the dpif
> ``operate()`` callback.
>
> **Script references**:
>
> -- *None*
>
> +- ``utilities/usdt-scripts/dpif_op_nl_monitor.py``
>
>
> dpif_netlink_operate\_\_:op_flow_execute
> @@ -260,7 +260,7 @@ DPIF_OP_FLOW_EXECUTE operation as part of the dpif
> ``operate()`` callback.
>
> **Script references**:
>
> -- ``utilities/usdt-scripts/dpif_nl_exec_monitor.py``
> +- ``utilities/usdt-scripts/dpif_op_nl_monitor.py``
> - ``utilities/usdt-scripts/upcall_cost.py``
>
>
> @@ -281,7 +281,7 @@ DPIF_OP_FLOW_GET operation as part of the dpif
> ``operate()`` callback.
>
> **Script references**:
>
> -- *None*
> +- ``utilities/usdt-scripts/dpif_op_nl_monitor.py``
>
>
> dpif_netlink_operate\_\_:op_flow_put
> @@ -301,6 +301,7 @@ DPIF_OP_FLOW_PUT operation as part of the dpif
> ``operate()`` callback.
>
> **Script references**:
>
> +- ``utilities/usdt-scripts/dpif_op_nl_monitor.py``
> - ``utilities/usdt-scripts/upcall_cost.py``
>
>
>
> diff --git a/utilities/automake.mk b/utilities/automake.mk
> index 146b8c37f..acc1af4e0 100644
> --- a/utilities/automake.mk
> +++ b/utilities/automake.mk
> @@ -22,7 +22,7 @@ scripts_SCRIPTS += \
> scripts_DATA += utilities/ovs-lib
> usdt_SCRIPTS += \
> utilities/usdt-scripts/bridge_loop.bt \
> - utilities/usdt-scripts/dpif_nl_exec_monitor.py \
> + utilities/usdt-scripts/dpif_op_nl_monitor.py \
> utilities/usdt-scripts/flow_reval_monitor.py \
> utilities/usdt-scripts/kernel_delay.py \
> utilities/usdt-scripts/kernel_delay.rst \
> @@ -72,7 +72,7 @@ EXTRA_DIST += \
> utilities/docker/debian/Dockerfile \
> utilities/docker/debian/build-kernel-modules.sh \
> utilities/usdt-scripts/bridge_loop.bt \
> - utilities/usdt-scripts/dpif_nl_exec_monitor.py \
> + utilities/usdt-scripts/dpif_op_nl_monitor.py \
> utilities/usdt-scripts/flow_reval_monitor.py \
> utilities/usdt-scripts/kernel_delay.py \
> utilities/usdt-scripts/kernel_delay.rst \
> @@ -147,7 +147,7 @@ FLAKE8_PYFILES += utilities/ovs-pcap.in \
> utilities/ovs-check-dead-ifs.in \
> utilities/ovs-tcpdump.in \
> utilities/ovs-pipegen.py \
> - utilities/usdt-scripts/dpif_nl_exec_monitor.py \
> + utilities/usdt-scripts/dpif_op_nl_monitor.py \
> utilities/usdt-scripts/flow_reval_monitor.py \
> utilities/usdt-scripts/upcall_monitor.py \
> utilities/usdt-scripts/upcall_cost.py
> diff --git a/utilities/usdt-scripts/dpif_nl_exec_monitor.py
> b/utilities/usdt-scripts/dpif_op_nl_monitor.py
> similarity index 72%
> rename from utilities/usdt-scripts/dpif_nl_exec_monitor.py
> rename to utilities/usdt-scripts/dpif_op_nl_monitor.py
> index a5cf1a35a..c0e0a9fae 100755
> --- a/utilities/usdt-scripts/dpif_nl_exec_monitor.py
> +++ b/utilities/usdt-scripts/dpif_op_nl_monitor.py
> @@ -19,7 +19,8 @@
> # dpif_nl_exec_monitor.py uses the dpif_netlink_operate__:op_flow_execute
> USDT
> # probe to receive all DPIF_OP_EXECUTE operations that are queued for
> # transmission over the netlink socket. It will do some basic decoding, and
> if
> -# requested a packet dump.
> +# requested a packet dump. Note that there are also options to obtain
> +# additional information for the DPIF_OP_FLOW_* operations.
> #
> # Here is an example:
> #
> @@ -75,31 +76,9 @@
> # seq = 0xc
> #
> # The example above dumps the full netlink and packet decode. However options
> -# exist to disable this. Here is the full list of supported options:
> -#
> -# usage: dpif_nl_exec_monitor.py [-h] [--buffer-page-count NUMBER] [-D
> [DEBUG]]
> -# [-d {none,hex,decode}] [-n {none,hex,nlraw}]
> -# [-p VSWITCHD_PID] [-s [64-2048]]
> -# [-w PCAP_FILE]
> -#
> -# optional arguments:
> -# -h, --help show this help message and exit
> -# --buffer-page-count NUMBER
> -# Number of BPF ring buffer pages, default 1024
> -# -D [DEBUG], --debug [DEBUG]
> -# Enable eBPF debugging
> -# -d {none,hex,decode}, --packet-decode {none,hex,decode}
> -# Display packet content in selected mode, default
> none
> -# -n {none,hex,nlraw}, --nlmsg-decode {none,hex,nlraw}
> -# Display netlink message content in selected mode,
> -# default nlraw
> -# -p VSWITCHD_PID, --pid VSWITCHD_PID
> -# ovs-vswitch's PID
> -# -s [64-2048], --nlmsg-size [64-2048]
> -# Set maximum netlink message size to capture,
> default
> -# 512
> -# -w PCAP_FILE, --pcap PCAP_FILE
> -# Write upcall packets to specified pcap file
> +# exist to disable this. For a complete list of options, please use the
> +# '--help' or '-h' argument.
> +#
>
> from bcc import BPF, USDT, USDTException
> from os.path import exists
> @@ -142,12 +121,10 @@ struct ofpbuf {
> BPF_RINGBUF_OUTPUT(events, <BUFFER_PAGE_CNT>);
> BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, 1);
>
> -int trace__op_flow_execute(struct pt_regs *ctx) {
> - struct ofpbuf nlbuf;
> +static int trace_event(struct ofpbuf *nlbuf)
> +{
> uint32_t size;
>
> - bpf_usdt_readarg_p(5, ctx, &nlbuf, sizeof(nlbuf));
> -
> struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t));
> if (!event) {
> uint32_t type = 0;
> @@ -163,17 +140,51 @@ int trace__op_flow_execute(struct pt_regs *ctx) {
> event->pid = bpf_get_current_pid_tgid();
> bpf_get_current_comm(&event->comm, sizeof(event->comm));
>
> - event->nl_size = nlbuf.size;
> + event->nl_size = nlbuf->size;
> if (event->nl_size > MAX_NLMSG)
> size = MAX_NLMSG;
> else
> size = event->nl_size;
>
> - bpf_probe_read(&event->nl_msg, size, nlbuf.data);
> + bpf_probe_read(&event->nl_msg, size, nlbuf->data);
>
> events.ringbuf_submit(event, 0);
> return 0;
> +}
> +
> +int trace__op_execute(struct pt_regs *ctx) {
> + struct ofpbuf nlbuf;
> +
> + bpf_usdt_readarg_p(5, ctx, &nlbuf, sizeof(nlbuf));
> + return trace_event(&nlbuf);
> +};
> +
> +#if <ENABLE_OP_FLOW_PUT>
> +int trace__op_flow_put(struct pt_regs *ctx) {
> + struct ofpbuf nlbuf;
> +
> + bpf_usdt_readarg_p(4, ctx, &nlbuf, sizeof(nlbuf));
> + return trace_event(&nlbuf);
> +};
> +#endif
> +
> +#if <ENABLE_OP_FLOW_DEL>
> +int trace__op_flow_del(struct pt_regs *ctx) {
> + struct ofpbuf nlbuf;
> +
> + bpf_usdt_readarg_p(4, ctx, &nlbuf, sizeof(nlbuf));
> + return trace_event(&nlbuf);
> };
> +#endif
> +
> +#if <ENABLE_OP_FLOW_GET>
> +int trace__op_flow_get(struct pt_regs *ctx) {
> + struct ofpbuf nlbuf;
> +
> + bpf_usdt_readarg_p(4, ctx, &nlbuf, sizeof(nlbuf));
> + return trace_event(&nlbuf);
> +};
> +#endif
> """
>
>
> @@ -182,21 +193,24 @@ int trace__op_flow_execute(struct pt_regs *ctx) {
> #
> def print_event(ctx, data, size):
> event = b["events"].event(data)
> - print("{:<18.9f} {:<4} {:<16} {:<10} {:<10}".
> +
> + if event.nl_size < options.nlmsg_size:
> + nl_size = event.nl_size
> + else:
> + nl_size = options.nlmsg_size
> +
> + print("{:<18.9f} {:<4} {:<16} {:<10} {:<10} {}".
> format(event.ts / 1000000000,
> event.cpu,
> event.comm.decode("utf-8"),
> event.pid,
> - event.nl_size))
> + event.nl_size,
> + get_ovs_dpif_op_str(get_cmd_type_from_nlm(
> + bytes(event.nl_msg)[:nl_size]))))
>
> #
> # Dumping the netlink message data if requested.
> #
> - if event.nl_size < options.nlmsg_size:
> - nl_size = event.nl_size
> - else:
> - nl_size = options.nlmsg_size
> -
> if options.nlmsg_decode == "hex":
> #
> # Abuse scapy's hex dump to dump flow key
> @@ -306,6 +320,19 @@ def decode_nlm_tlvs(tlvs, header=None, indent=4,
> dump=True,
> return result
>
>
> +#
> +# get_cmd_type_from_nlm()
> +#
> +def get_cmd_type_from_nlm(nlm):
> + # The netlink message consists of at least a 'struct nlmsghdr' (16-bytes)
> + # followed by a 'struct genlmsghdr'. The first byte of the genlmsghdr
> + # structure contains the command. Which is what we will extract here.
> + if len(nlm) < 17:
> + return -1
> +
> + return nlm[16]
> +
> +
> #
> # decode_nlm()
> #
> @@ -325,9 +352,11 @@ def decode_nlm(msg, indent=4, dump=True):
> #
> # Decode 'struct genlmsghdr'
> #
> + cmd, version, reserved = struct.unpack("=BBH", msg[:4])
> +
> if dump:
> - print("{}genlmsghdr: cmd = {}, version = {}, reserver = {}".format(
> - " " * indent, *struct.unpack("=BBH", msg[:4])))
> + print("{}genlmsghdr: cmd = {}, version = {}, reserved = {}".format(
> + " " * indent, get_ovs_dpif_op_str(cmd), version, reserved))
>
> msg = msg[4:]
>
> @@ -343,42 +372,94 @@ def decode_nlm(msg, indent=4, dump=True):
> #
> # Decode TLVs
> #
> - nl_attr_tree = {
> - "OVS_PACKET_ATTR_KEY": {
> - "header": "> Decode OVS_KEY_ATTR_* TLVs:",
> - "indent": 4,
> - "attr_str_func": get_ovs_key_attr_str,
> - "decode_tree": None,
> - },
> - "OVS_PACKET_ATTR_ACTIONS": {
> - "header": "> Decode OVS_ACTION_ATTR_* TLVs:",
> - "indent": 4,
> - "attr_str_func": get_ovs_action_attr_str,
> - "decode_tree": {
> - "OVS_ACTION_ATTR_SET": {
> - "header": "> Decode OVS_KEY_ATTR_* TLVs:",
> - "indent": 4,
> - "attr_str_func": get_ovs_key_attr_str,
> - "decode_tree": {
> - "OVS_KEY_ATTR_TUNNEL": {
> - "header": "> Decode OVS_TUNNEL_KEY_ATTR_* TLVs:",
> - "indent": 4,
> - "attr_str_func": get_ovs_tunnel_key_attr_str,
> - "decode_tree": None,
> - },
> + nl_key_attr = {
> + "header": "> Decode OVS_KEY_ATTR_* TLVs:",
> + "indent": 4,
> + "attr_str_func": get_ovs_key_attr_str,
> + "decode_tree": {
> + "OVS_KEY_ATTR_ENCAP": {
> + "header": "> Decode OVS_KEY_ATTR_* TLVs:",
> + "indent": 4,
> + "attr_str_func": get_ovs_key_attr_str,
> + "decode_tree": {
> + "OVS_KEY_ATTR_ENCAP": {
> + "header": "> Decode OVS_KEY_ATTR_* TLVs:",
> + "indent": 4,
> + "attr_str_func": get_ovs_key_attr_str,
> + "decode_tree": None,
> + },
> + },
> + },
> + }
> + }
> +
> + nl_action_attr = {
> + "header": "> Decode OVS_ACTION_ATTR_* TLVs:",
> + "indent": 4,
> + "attr_str_func": get_ovs_action_attr_str,
> + "decode_tree": {
> + "OVS_ACTION_ATTR_SET": {
> + "header": "> Decode OVS_KEY_ATTR_* TLVs:",
> + "indent": 4,
> + "attr_str_func": get_ovs_key_attr_str,
> + "decode_tree": {
> + "OVS_KEY_ATTR_TUNNEL": {
> + "header": "> Decode OVS_TUNNEL_KEY_ATTR_* TLVs:",
> + "indent": 4,
> + "attr_str_func": get_ovs_tunnel_key_attr_str,
> + "decode_tree": None,
> },
> },
> },
> },
> }
>
> - result = decode_nlm_tlvs(msg, indent=indent + 2, dump=dump,
> - header="> Decode OVS_PACKET_ATTR_* TLVs:",
> - attr_to_str_func=get_ovs_pkt_attr_str,
> - decode_tree=nl_attr_tree)
> + nl_attr_tree_exec = {
> + "OVS_PACKET_ATTR_KEY": nl_key_attr,
> + "OVS_PACKET_ATTR_ACTIONS": nl_action_attr,
> + }
> +
> + nl_attr_tree_put = {
> + "OVS_FLOW_ATTR_KEY": nl_key_attr,
> + "OVS_FLOW_ATTR_MASK": nl_key_attr,
> + "OVS_FLOW_ATTR_ACTIONS": nl_action_attr,
> + }
> +
> + if get_ovs_dpif_op_str(cmd) == "DPIF_OP_EXECUTE":
> + result = decode_nlm_tlvs(msg, indent=indent + 2, dump=dump,
> + header="> Decode OVS_PACKET_ATTR_* TLVs:",
> + attr_to_str_func=get_ovs_pkt_attr_str,
> + decode_tree=nl_attr_tree_exec)
> + else:
> + result = decode_nlm_tlvs(msg, indent=indent + 2, dump=dump,
> + header="> Decode OVS_FLOW_ATTR_* TLVs:",
> + attr_to_str_func=get_ovs_flow_attr_str,
> + decode_tree=nl_attr_tree_put)
> return result
>
>
> +#
> +# get_ovs_flow_attr_str()
> +#
> +def get_ovs_flow_attr_str(attr):
> + ovs_flow_attr = ["OVS_FLOW_ATTR_UNSPEC",
> + "OVS_FLOW_ATTR_KEY",
> + "OVS_FLOW_ATTR_ACTIONS",
> + "OVS_FLOW_ATTR_STATS",
> + "OVS_FLOW_ATTR_TCP_FLAGS",
> + "OVS_FLOW_ATTR_USED",
> + "OVS_FLOW_ATTR_CLEAR",
> + "OVS_FLOW_ATTR_MASK",
> + "OVS_FLOW_ATTR_PROBE",
> + "OVS_FLOW_ATTR_UFID",
> + "OVS_FLOW_ATTR_UFID_FLAGS",
> + "OVS_FLOW_ATTR_PAD"]
> + if attr < 0 or attr >= len(ovs_flow_attr):
> + return "<UNKNOWN:{}>".format(attr)
> +
> + return ovs_flow_attr[attr]
> +
> +
> #
> # get_ovs_pkt_attr_str()
> #
> @@ -507,6 +588,22 @@ def get_ovs_tunnel_key_attr_str(attr):
> return ovs_tunnel_key_attr[attr]
>
>
> +#
> +# get_ovs_dpif_op_str()
> +#
> +def get_ovs_dpif_op_str(op):
> + ovs_dpif_ops = ["DPIF_OP_UNSPEC",
> + "DPIF_OP_FLOW_PUT",
> + "DPIF_OP_FLOW_DEL",
> + "DPIF_OP_EXECUTE",
> + "DPIF_OP_FLOW_GET"]
> +
> + if op < 0 or op >= len(ovs_dpif_ops):
> + return "<UNKNOWN:{}>".format(op)
> +
> + return ovs_dpif_ops[op]
> +
> +
> #
> # buffer_size_type()
> #
> @@ -566,8 +663,17 @@ def main():
> help="Set maximum netlink message size to capture, "
> "default 512", type=buffer_size_type, default=512,
> metavar="[64-2048]")
> + parser.add_argument("--trace-del-op",
> + help="Monitor DPIF_OP_FLOW_DEL messages",
> + action="store_true")
> + parser.add_argument("--trace-get-op",
> + help="Monitor DPIF_OP_FLOW_GET messages",
> + action="store_true")
> + parser.add_argument("--trace-put-op",
> + help="Monitor DPIF_OP_FLOW_PUT messages",
> + action="store_true")
> parser.add_argument("-w", "--pcap", metavar="PCAP_FILE",
> - help="Write upcall packets to specified pcap file",
> + help="Write execute packets to specified pcap file",
> type=str, default=None)
>
> options = parser.parse_args()
> @@ -606,13 +712,21 @@ def main():
> u = USDT(pid=int(options.pid))
> try:
> u.enable_probe(probe="dpif_netlink_operate__:op_flow_execute",
> - fn_name="trace__op_flow_execute")
> + fn_name="trace__op_execute")
This should also be a selected probe. Otherwise, it will be present for
every invocation, even if we only want to monitor the flow_del (for
example), and that can clutter up the display. I think a default-true
option with something like --no-trace-exec would be right.
> + if options.trace_put_op:
> + u.enable_probe(probe="dpif_netlink_operate__:op_flow_put",
> + fn_name="trace__op_flow_put")
> + if options.trace_del_op:
> + u.enable_probe(probe="dpif_netlink_operate__:op_flow_del",
> + fn_name="trace__op_flow_del")
> + if options.trace_get_op:
> + u.enable_probe(probe="dpif_netlink_operate__:op_flow_get",
> + fn_name="trace__op_flow_get")
> except USDTException as e:
> - print("ERROR: {}"
> - "ovs-vswitchd!".format(
> - (re.sub("^", " " * 7, str(e), flags=re.MULTILINE)).strip().
> - replace("--with-dtrace or --enable-dtrace",
> - "--enable-usdt-probes")))
> + print("ERROR: {}".format(
> + (re.sub("^", " " * 7, str(e), flags=re.MULTILINE)).strip().
> + replace("--with-dtrace or --enable-dtrace",
> + "--enable-usdt-probes")))
> sys.exit(-1)
>
> #
> @@ -627,15 +741,22 @@ def main():
> source = source.replace("<BUFFER_PAGE_CNT>",
> str(options.buffer_page_count))
>
> + source = source.replace("<ENABLE_OP_FLOW_DEL>",
> + "1" if options.trace_del_op else "0")
> + source = source.replace("<ENABLE_OP_FLOW_GET>",
> + "1" if options.trace_get_op else "0")
> + source = source.replace("<ENABLE_OP_FLOW_PUT>",
> + "1" if options.trace_put_op else "0")
> +
> b = BPF(text=source, usdt_contexts=[u], debug=options.debug)
>
> #
> # Print header
> #
> - print("Display DPIF_OP_EXECUTE operations being queued for transmission "
> - "onto the netlink socket.")
> - print("{:<18} {:<4} {:<16} {:<10} {:<10}".format(
> - "TIME", "CPU", "COMM", "PID", "NL_SIZE"))
> + print("Display DPIF operations being queued for transmission onto the "
> + "netlink socket.")
> + print("{:<18} {:<4} {:<16} {:<10} {:<10} {}".format(
> + "TIME", "CPU", "COMM", "PID", "NL_SIZE", "DPIF_OPERATION"))
>
> #
> # Dump out all events
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev