On 28 Aug 2024, at 16:01, Aaron Conole wrote:
> Eelco Chaudron <[email protected]> writes: > >> Updated the dpif_nl_exec_monitor.py utility to also optionally monitor >> the other DPIF netlink operations, i.e., DPIF_OP_FLOW_PUT, >> DPIF_OP_FLOW_DEL and DPIF_OP_FLOW_GET >> >> Signed-off-by: Eelco Chaudron <[email protected]> >> --- >> Documentation/topics/usdt-probes.rst | 7 +- >> utilities/automake.mk | 6 +- >> ..._exec_monitor.py => dpif_op_nl_monitor.py} | 279 +++++++++++++----- >> 3 files changed, 207 insertions(+), 85 deletions(-) >> rename utilities/usdt-scripts/{dpif_nl_exec_monitor.py => >> dpif_op_nl_monitor.py} (72%) >> >> diff --git a/Documentation/topics/usdt-probes.rst >> b/Documentation/topics/usdt-probes.rst >> index b9a6c54b2..93bfaec12 100644 >> --- a/Documentation/topics/usdt-probes.rst >> +++ b/Documentation/topics/usdt-probes.rst >> @@ -239,7 +239,7 @@ DPIF_OP_FLOW_DEL operation as part of the dpif >> ``operate()`` callback. >> >> **Script references**: >> >> -- *None* >> >> +- ``utilities/usdt-scripts/dpif_op_nl_monitor.py`` >> >> >> dpif_netlink_operate\_\_:op_flow_execute >> @@ -260,7 +260,7 @@ DPIF_OP_FLOW_EXECUTE operation as part of the dpif >> ``operate()`` callback. >> >> **Script references**: >> >> -- ``utilities/usdt-scripts/dpif_nl_exec_monitor.py`` >> +- ``utilities/usdt-scripts/dpif_op_nl_monitor.py`` >> - ``utilities/usdt-scripts/upcall_cost.py`` >> >> >> @@ -281,7 +281,7 @@ DPIF_OP_FLOW_GET operation as part of the dpif >> ``operate()`` callback. >> >> **Script references**: >> >> -- *None* >> +- ``utilities/usdt-scripts/dpif_op_nl_monitor.py`` >> >> >> dpif_netlink_operate\_\_:op_flow_put >> @@ -301,6 +301,7 @@ DPIF_OP_FLOW_PUT operation as part of the dpif >> ``operate()`` callback. >> >> **Script references**: >> >> +- ``utilities/usdt-scripts/dpif_op_nl_monitor.py`` >> - ``utilities/usdt-scripts/upcall_cost.py`` >> >> >> >> diff --git a/utilities/automake.mk b/utilities/automake.mk >> index 146b8c37f..acc1af4e0 100644 >> --- a/utilities/automake.mk >> +++ b/utilities/automake.mk >> @@ -22,7 +22,7 @@ scripts_SCRIPTS += \ >> scripts_DATA += utilities/ovs-lib >> usdt_SCRIPTS += \ >> utilities/usdt-scripts/bridge_loop.bt \ >> - utilities/usdt-scripts/dpif_nl_exec_monitor.py \ >> + utilities/usdt-scripts/dpif_op_nl_monitor.py \ >> utilities/usdt-scripts/flow_reval_monitor.py \ >> utilities/usdt-scripts/kernel_delay.py \ >> utilities/usdt-scripts/kernel_delay.rst \ >> @@ -72,7 +72,7 @@ EXTRA_DIST += \ >> utilities/docker/debian/Dockerfile \ >> utilities/docker/debian/build-kernel-modules.sh \ >> utilities/usdt-scripts/bridge_loop.bt \ >> - utilities/usdt-scripts/dpif_nl_exec_monitor.py \ >> + utilities/usdt-scripts/dpif_op_nl_monitor.py \ >> utilities/usdt-scripts/flow_reval_monitor.py \ >> utilities/usdt-scripts/kernel_delay.py \ >> utilities/usdt-scripts/kernel_delay.rst \ >> @@ -147,7 +147,7 @@ FLAKE8_PYFILES += utilities/ovs-pcap.in \ >> utilities/ovs-check-dead-ifs.in \ >> utilities/ovs-tcpdump.in \ >> utilities/ovs-pipegen.py \ >> - utilities/usdt-scripts/dpif_nl_exec_monitor.py \ >> + utilities/usdt-scripts/dpif_op_nl_monitor.py \ >> utilities/usdt-scripts/flow_reval_monitor.py \ >> utilities/usdt-scripts/upcall_monitor.py \ >> utilities/usdt-scripts/upcall_cost.py >> diff --git a/utilities/usdt-scripts/dpif_nl_exec_monitor.py >> b/utilities/usdt-scripts/dpif_op_nl_monitor.py >> similarity index 72% >> rename from utilities/usdt-scripts/dpif_nl_exec_monitor.py >> rename to utilities/usdt-scripts/dpif_op_nl_monitor.py >> index a5cf1a35a..c0e0a9fae 100755 >> --- a/utilities/usdt-scripts/dpif_nl_exec_monitor.py >> +++ b/utilities/usdt-scripts/dpif_op_nl_monitor.py >> @@ -19,7 +19,8 @@ >> # dpif_nl_exec_monitor.py uses the dpif_netlink_operate__:op_flow_execute >> USDT >> # probe to receive all DPIF_OP_EXECUTE operations that are queued for >> # transmission over the netlink socket. It will do some basic decoding, and >> if >> -# requested a packet dump. >> +# requested a packet dump. Note that there are also options to obtain >> +# additional information for the DPIF_OP_FLOW_* operations. >> # >> # Here is an example: >> # >> @@ -75,31 +76,9 @@ >> # seq = 0xc >> # >> # The example above dumps the full netlink and packet decode. However >> options >> -# exist to disable this. Here is the full list of supported options: >> -# >> -# usage: dpif_nl_exec_monitor.py [-h] [--buffer-page-count NUMBER] [-D >> [DEBUG]] >> -# [-d {none,hex,decode}] [-n >> {none,hex,nlraw}] >> -# [-p VSWITCHD_PID] [-s [64-2048]] >> -# [-w PCAP_FILE] >> -# >> -# optional arguments: >> -# -h, --help show this help message and exit >> -# --buffer-page-count NUMBER >> -# Number of BPF ring buffer pages, default 1024 >> -# -D [DEBUG], --debug [DEBUG] >> -# Enable eBPF debugging >> -# -d {none,hex,decode}, --packet-decode {none,hex,decode} >> -# Display packet content in selected mode, default >> none >> -# -n {none,hex,nlraw}, --nlmsg-decode {none,hex,nlraw} >> -# Display netlink message content in selected mode, >> -# default nlraw >> -# -p VSWITCHD_PID, --pid VSWITCHD_PID >> -# ovs-vswitch's PID >> -# -s [64-2048], --nlmsg-size [64-2048] >> -# Set maximum netlink message size to capture, >> default >> -# 512 >> -# -w PCAP_FILE, --pcap PCAP_FILE >> -# Write upcall packets to specified pcap file >> +# exist to disable this. For a complete list of options, please use the >> +# '--help' or '-h' argument. >> +# >> >> from bcc import BPF, USDT, USDTException >> from os.path import exists >> @@ -142,12 +121,10 @@ struct ofpbuf { >> BPF_RINGBUF_OUTPUT(events, <BUFFER_PAGE_CNT>); >> BPF_TABLE("percpu_array", uint32_t, uint64_t, dropcnt, 1); >> >> -int trace__op_flow_execute(struct pt_regs *ctx) { >> - struct ofpbuf nlbuf; >> +static int trace_event(struct ofpbuf *nlbuf) >> +{ >> uint32_t size; >> >> - bpf_usdt_readarg_p(5, ctx, &nlbuf, sizeof(nlbuf)); >> - >> struct event_t *event = events.ringbuf_reserve(sizeof(struct event_t)); >> if (!event) { >> uint32_t type = 0; >> @@ -163,17 +140,51 @@ int trace__op_flow_execute(struct pt_regs *ctx) { >> event->pid = bpf_get_current_pid_tgid(); >> bpf_get_current_comm(&event->comm, sizeof(event->comm)); >> >> - event->nl_size = nlbuf.size; >> + event->nl_size = nlbuf->size; >> if (event->nl_size > MAX_NLMSG) >> size = MAX_NLMSG; >> else >> size = event->nl_size; >> >> - bpf_probe_read(&event->nl_msg, size, nlbuf.data); >> + bpf_probe_read(&event->nl_msg, size, nlbuf->data); >> >> events.ringbuf_submit(event, 0); >> return 0; >> +} >> + >> +int trace__op_execute(struct pt_regs *ctx) { >> + struct ofpbuf nlbuf; >> + >> + bpf_usdt_readarg_p(5, ctx, &nlbuf, sizeof(nlbuf)); >> + return trace_event(&nlbuf); >> +}; >> + >> +#if <ENABLE_OP_FLOW_PUT> >> +int trace__op_flow_put(struct pt_regs *ctx) { >> + struct ofpbuf nlbuf; >> + >> + bpf_usdt_readarg_p(4, ctx, &nlbuf, sizeof(nlbuf)); >> + return trace_event(&nlbuf); >> +}; >> +#endif >> + >> +#if <ENABLE_OP_FLOW_DEL> >> +int trace__op_flow_del(struct pt_regs *ctx) { >> + struct ofpbuf nlbuf; >> + >> + bpf_usdt_readarg_p(4, ctx, &nlbuf, sizeof(nlbuf)); >> + return trace_event(&nlbuf); >> }; >> +#endif >> + >> +#if <ENABLE_OP_FLOW_GET> >> +int trace__op_flow_get(struct pt_regs *ctx) { >> + struct ofpbuf nlbuf; >> + >> + bpf_usdt_readarg_p(4, ctx, &nlbuf, sizeof(nlbuf)); >> + return trace_event(&nlbuf); >> +}; >> +#endif >> """ >> >> >> @@ -182,21 +193,24 @@ int trace__op_flow_execute(struct pt_regs *ctx) { >> # >> def print_event(ctx, data, size): >> event = b["events"].event(data) >> - print("{:<18.9f} {:<4} {:<16} {:<10} {:<10}". >> + >> + if event.nl_size < options.nlmsg_size: >> + nl_size = event.nl_size >> + else: >> + nl_size = options.nlmsg_size >> + >> + print("{:<18.9f} {:<4} {:<16} {:<10} {:<10} {}". >> format(event.ts / 1000000000, >> event.cpu, >> event.comm.decode("utf-8"), >> event.pid, >> - event.nl_size)) >> + event.nl_size, >> + get_ovs_dpif_op_str(get_cmd_type_from_nlm( >> + bytes(event.nl_msg)[:nl_size])))) >> >> # >> # Dumping the netlink message data if requested. >> # >> - if event.nl_size < options.nlmsg_size: >> - nl_size = event.nl_size >> - else: >> - nl_size = options.nlmsg_size >> - >> if options.nlmsg_decode == "hex": >> # >> # Abuse scapy's hex dump to dump flow key >> @@ -306,6 +320,19 @@ def decode_nlm_tlvs(tlvs, header=None, indent=4, >> dump=True, >> return result >> >> >> +# >> +# get_cmd_type_from_nlm() >> +# >> +def get_cmd_type_from_nlm(nlm): >> + # The netlink message consists of at least a 'struct nlmsghdr' >> (16-bytes) >> + # followed by a 'struct genlmsghdr'. The first byte of the genlmsghdr >> + # structure contains the command. Which is what we will extract here. >> + if len(nlm) < 17: >> + return -1 >> + >> + return nlm[16] >> + >> + >> # >> # decode_nlm() >> # >> @@ -325,9 +352,11 @@ def decode_nlm(msg, indent=4, dump=True): >> # >> # Decode 'struct genlmsghdr' >> # >> + cmd, version, reserved = struct.unpack("=BBH", msg[:4]) >> + >> if dump: >> - print("{}genlmsghdr: cmd = {}, version = {}, reserver = {}".format( >> - " " * indent, *struct.unpack("=BBH", msg[:4]))) >> + print("{}genlmsghdr: cmd = {}, version = {}, reserved = {}".format( >> + " " * indent, get_ovs_dpif_op_str(cmd), version, reserved)) >> >> msg = msg[4:] >> >> @@ -343,42 +372,94 @@ def decode_nlm(msg, indent=4, dump=True): >> # >> # Decode TLVs >> # >> - nl_attr_tree = { >> - "OVS_PACKET_ATTR_KEY": { >> - "header": "> Decode OVS_KEY_ATTR_* TLVs:", >> - "indent": 4, >> - "attr_str_func": get_ovs_key_attr_str, >> - "decode_tree": None, >> - }, >> - "OVS_PACKET_ATTR_ACTIONS": { >> - "header": "> Decode OVS_ACTION_ATTR_* TLVs:", >> - "indent": 4, >> - "attr_str_func": get_ovs_action_attr_str, >> - "decode_tree": { >> - "OVS_ACTION_ATTR_SET": { >> - "header": "> Decode OVS_KEY_ATTR_* TLVs:", >> - "indent": 4, >> - "attr_str_func": get_ovs_key_attr_str, >> - "decode_tree": { >> - "OVS_KEY_ATTR_TUNNEL": { >> - "header": "> Decode OVS_TUNNEL_KEY_ATTR_* >> TLVs:", >> - "indent": 4, >> - "attr_str_func": get_ovs_tunnel_key_attr_str, >> - "decode_tree": None, >> - }, >> + nl_key_attr = { >> + "header": "> Decode OVS_KEY_ATTR_* TLVs:", >> + "indent": 4, >> + "attr_str_func": get_ovs_key_attr_str, >> + "decode_tree": { >> + "OVS_KEY_ATTR_ENCAP": { >> + "header": "> Decode OVS_KEY_ATTR_* TLVs:", >> + "indent": 4, >> + "attr_str_func": get_ovs_key_attr_str, >> + "decode_tree": { >> + "OVS_KEY_ATTR_ENCAP": { >> + "header": "> Decode OVS_KEY_ATTR_* TLVs:", >> + "indent": 4, >> + "attr_str_func": get_ovs_key_attr_str, >> + "decode_tree": None, >> + }, >> + }, >> + }, >> + } >> + } >> + >> + nl_action_attr = { >> + "header": "> Decode OVS_ACTION_ATTR_* TLVs:", >> + "indent": 4, >> + "attr_str_func": get_ovs_action_attr_str, >> + "decode_tree": { >> + "OVS_ACTION_ATTR_SET": { >> + "header": "> Decode OVS_KEY_ATTR_* TLVs:", >> + "indent": 4, >> + "attr_str_func": get_ovs_key_attr_str, >> + "decode_tree": { >> + "OVS_KEY_ATTR_TUNNEL": { >> + "header": "> Decode OVS_TUNNEL_KEY_ATTR_* TLVs:", >> + "indent": 4, >> + "attr_str_func": get_ovs_tunnel_key_attr_str, >> + "decode_tree": None, >> }, >> }, >> }, >> }, >> } >> >> - result = decode_nlm_tlvs(msg, indent=indent + 2, dump=dump, >> - header="> Decode OVS_PACKET_ATTR_* TLVs:", >> - attr_to_str_func=get_ovs_pkt_attr_str, >> - decode_tree=nl_attr_tree) >> + nl_attr_tree_exec = { >> + "OVS_PACKET_ATTR_KEY": nl_key_attr, >> + "OVS_PACKET_ATTR_ACTIONS": nl_action_attr, >> + } >> + >> + nl_attr_tree_put = { >> + "OVS_FLOW_ATTR_KEY": nl_key_attr, >> + "OVS_FLOW_ATTR_MASK": nl_key_attr, >> + "OVS_FLOW_ATTR_ACTIONS": nl_action_attr, >> + } >> + >> + if get_ovs_dpif_op_str(cmd) == "DPIF_OP_EXECUTE": >> + result = decode_nlm_tlvs(msg, indent=indent + 2, dump=dump, >> + header="> Decode OVS_PACKET_ATTR_* TLVs:", >> + attr_to_str_func=get_ovs_pkt_attr_str, >> + decode_tree=nl_attr_tree_exec) >> + else: >> + result = decode_nlm_tlvs(msg, indent=indent + 2, dump=dump, >> + header="> Decode OVS_FLOW_ATTR_* TLVs:", >> + attr_to_str_func=get_ovs_flow_attr_str, >> + decode_tree=nl_attr_tree_put) >> return result >> >> >> +# >> +# get_ovs_flow_attr_str() >> +# >> +def get_ovs_flow_attr_str(attr): >> + ovs_flow_attr = ["OVS_FLOW_ATTR_UNSPEC", >> + "OVS_FLOW_ATTR_KEY", >> + "OVS_FLOW_ATTR_ACTIONS", >> + "OVS_FLOW_ATTR_STATS", >> + "OVS_FLOW_ATTR_TCP_FLAGS", >> + "OVS_FLOW_ATTR_USED", >> + "OVS_FLOW_ATTR_CLEAR", >> + "OVS_FLOW_ATTR_MASK", >> + "OVS_FLOW_ATTR_PROBE", >> + "OVS_FLOW_ATTR_UFID", >> + "OVS_FLOW_ATTR_UFID_FLAGS", >> + "OVS_FLOW_ATTR_PAD"] >> + if attr < 0 or attr >= len(ovs_flow_attr): >> + return "<UNKNOWN:{}>".format(attr) >> + >> + return ovs_flow_attr[attr] >> + >> + >> # >> # get_ovs_pkt_attr_str() >> # >> @@ -507,6 +588,22 @@ def get_ovs_tunnel_key_attr_str(attr): >> return ovs_tunnel_key_attr[attr] >> >> >> +# >> +# get_ovs_dpif_op_str() >> +# >> +def get_ovs_dpif_op_str(op): >> + ovs_dpif_ops = ["DPIF_OP_UNSPEC", >> + "DPIF_OP_FLOW_PUT", >> + "DPIF_OP_FLOW_DEL", >> + "DPIF_OP_EXECUTE", >> + "DPIF_OP_FLOW_GET"] >> + >> + if op < 0 or op >= len(ovs_dpif_ops): >> + return "<UNKNOWN:{}>".format(op) >> + >> + return ovs_dpif_ops[op] >> + >> + >> # >> # buffer_size_type() >> # >> @@ -566,8 +663,17 @@ def main(): >> help="Set maximum netlink message size to capture, " >> "default 512", type=buffer_size_type, default=512, >> metavar="[64-2048]") >> + parser.add_argument("--trace-del-op", >> + help="Monitor DPIF_OP_FLOW_DEL messages", >> + action="store_true") >> + parser.add_argument("--trace-get-op", >> + help="Monitor DPIF_OP_FLOW_GET messages", >> + action="store_true") >> + parser.add_argument("--trace-put-op", >> + help="Monitor DPIF_OP_FLOW_PUT messages", >> + action="store_true") >> parser.add_argument("-w", "--pcap", metavar="PCAP_FILE", >> - help="Write upcall packets to specified pcap file", >> + help="Write execute packets to specified pcap file", >> type=str, default=None) >> >> options = parser.parse_args() >> @@ -606,13 +712,21 @@ def main(): >> u = USDT(pid=int(options.pid)) >> try: >> u.enable_probe(probe="dpif_netlink_operate__:op_flow_execute", >> - fn_name="trace__op_flow_execute") >> + fn_name="trace__op_execute") > > This should also be a selected probe. Otherwise, it will be present for > every invocation, even if we only want to monitor the flow_del (for > example), and that can clutter up the display. I think a default-true > option with something like --no-trace-exec would be right. Yes, I will make the script such that it requires you to specify at least on operation you would like to trace. I will send a v3 soon… //Eelco >> + if options.trace_put_op: >> + u.enable_probe(probe="dpif_netlink_operate__:op_flow_put", >> + fn_name="trace__op_flow_put") >> + if options.trace_del_op: >> + u.enable_probe(probe="dpif_netlink_operate__:op_flow_del", >> + fn_name="trace__op_flow_del") >> + if options.trace_get_op: >> + u.enable_probe(probe="dpif_netlink_operate__:op_flow_get", >> + fn_name="trace__op_flow_get") >> except USDTException as e: >> - print("ERROR: {}" >> - "ovs-vswitchd!".format( >> - (re.sub("^", " " * 7, str(e), >> flags=re.MULTILINE)).strip(). >> - replace("--with-dtrace or --enable-dtrace", >> - "--enable-usdt-probes"))) >> + print("ERROR: {}".format( >> + (re.sub("^", " " * 7, str(e), flags=re.MULTILINE)).strip(). >> + replace("--with-dtrace or --enable-dtrace", >> + "--enable-usdt-probes"))) >> sys.exit(-1) >> >> # >> @@ -627,15 +741,22 @@ def main(): >> source = source.replace("<BUFFER_PAGE_CNT>", >> str(options.buffer_page_count)) >> >> + source = source.replace("<ENABLE_OP_FLOW_DEL>", >> + "1" if options.trace_del_op else "0") >> + source = source.replace("<ENABLE_OP_FLOW_GET>", >> + "1" if options.trace_get_op else "0") >> + source = source.replace("<ENABLE_OP_FLOW_PUT>", >> + "1" if options.trace_put_op else "0") >> + >> b = BPF(text=source, usdt_contexts=[u], debug=options.debug) >> >> # >> # Print header >> # >> - print("Display DPIF_OP_EXECUTE operations being queued for transmission >> " >> - "onto the netlink socket.") >> - print("{:<18} {:<4} {:<16} {:<10} {:<10}".format( >> - "TIME", "CPU", "COMM", "PID", "NL_SIZE")) >> + print("Display DPIF operations being queued for transmission onto the " >> + "netlink socket.") >> + print("{:<18} {:<4} {:<16} {:<10} {:<10} {}".format( >> + "TIME", "CPU", "COMM", "PID", "NL_SIZE", "DPIF_OPERATION")) >> >> # >> # Dump out all events _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
