On 3 Feb 2025, at 11:09, Adrián Moreno wrote:

> On Thu, Jan 30, 2025 at 02:33:24PM +0100, Eelco Chaudron wrote:
>>
>>
>> On 17 Jan 2025, at 15:25, Adrian Moreno wrote:
>>
>>> Printing just the datapath on each upcall gives little information (most
>>> often, there will only be one well-known datapath). Instead, print both
>>> the input port name (plus the datapath).
>>>
>>> In order to do this, refactor decode_nla to always generate the dump
>>> that only gets printed if needed. That way it can be called earlier on.
>>>
>>> Signed-off-by: Adrian Moreno <[email protected]>
>>
>> In general, this looks good to me. Some small comments below.
>>
>> //Eelco
>>
>>> ---
>>>  utilities/usdt-scripts/upcall_cost.py    |   4 +-
>>>  utilities/usdt-scripts/upcall_monitor.py | 110 +++++++++++++----------
>>>  utilities/usdt-scripts/usdt_lib.py       |  27 +++++-
>>>  3 files changed, 90 insertions(+), 51 deletions(-)
>>>
>>> diff --git a/utilities/usdt-scripts/upcall_cost.py 
>>> b/utilities/usdt-scripts/upcall_cost.py
>>> index 47a1e30a6..2037fe69a 100755
>>> --- a/utilities/usdt-scripts/upcall_cost.py
>>> +++ b/utilities/usdt-scripts/upcall_cost.py
>>> @@ -354,7 +354,7 @@ class DpUpcall(Event):
>>>                   pkt_frag_len):
>>>          super(DpUpcall, self).__init__(ts, pid, comm, cpu, 
>>> EventType.DP_UPCALL)
>>>          self.dpif_name = dpif_name
>>> -        self.dp_port = dp_map.get(dpif_name, port)
>>> +        self.dp_port = dp_map.get_port_num(dpif_name, port)
>>
>> get_port_number()? See below.
>>
>>>          if self.dp_port is None:
>>>              #
>>>              # As we only identify interfaces at startup, new interfaces 
>>> could
>>> @@ -448,7 +448,7 @@ class RecvUpcall(Event):
>>>
>>>      def get_system_dp_port(dpif_name):
>>>          dp_map = dp_map.get_map()
>>> -        return dp_map.get(dpif_name, {}).get("ovs-system", None)
>>> +        return dp_map.get_port_num(dpif_name, {}).get("ovs-system", None)
>>>
>>>      def decode_nlm(msg, indent=4, dump=True):
>>>          bytes_left = len(msg)
>>> diff --git a/utilities/usdt-scripts/upcall_monitor.py 
>>> b/utilities/usdt-scripts/upcall_monitor.py
>>> index 333e23d51..8943fd205 100755
>>> --- a/utilities/usdt-scripts/upcall_monitor.py
>>> +++ b/utilities/usdt-scripts/upcall_monitor.py
>>> @@ -20,10 +20,10 @@
>>>  # packets sent by the kernel to ovs-vswitchd. By default, it will show all
>>>  # upcall events, which looks something like this:
>>>  #
>>> -# TIME               CPU  COMM      PID      DPIF_NAME          TYPE 
>>> PKT_LEN...
>>> -# 5952147.003848809  2    handler4  1381158  system@ovs-system  0    98    
>>>  132
>>> -# 5952147.003879643  2    handler4  1381158  system@ovs-system  0    70    
>>>  160
>>> -# 5952147.003914924  2    handler4  1381158  system@ovs-system  0    98    
>>>  152
>>> +# TIME               CPU  COMM      PID      PORT_NAME                TYPE 
>>> ..
>>> +# 5952147.003848809  2    handler4  1381158  eth0 (system@ovs-system)  0
>>> +# 5952147.003879643  2    handler4  1381158  eth0 (system@ovs-system)  0
>>> +# 5952147.003914924  2    handler4  1381158  eth0 (system@ovs-system)  0
>>>  #
>>>  # Also, upcalls dropped by the kernel (e.g: because the netlink buffer is 
>>> full)
>>>  # are reported.
>>> @@ -71,7 +71,7 @@
>>>  #
>>>  #  $ ./upcall_monitor.py --packet-decode decode --flow-key-decode nlraw \
>>>  #      --packet-size 128 --flow-key-size 256
>>> -#  TIME               CPU  COMM             PID        DPIF_NAME          
>>> ...
>>> +#  TIME               CPU  COMM             PID        PORT_NAME          
>>> ...
>>>  #  5953013.333214231  2    handler4         1381158    system@ovs-system  
>>> ...
>>>  #    Flow key size 132 bytes, size captured 132 bytes.
>>>  #      nla_len 8, nla_type OVS_KEY_ATTR_RECIRC_ID[20], data: 00 00 00 00
>>> @@ -121,6 +121,8 @@ from os.path import exists
>>>  from scapy.all import hexdump, wrpcap
>>>  from scapy.layers.l2 import Ether
>>>
>>
>> No need for a new line here.
>
> I was trying to logically split the external dependencies from internal
> ones:
>
> https://peps.python.org/pep-0008/#imports
>
> We are not checking this style-related pep options, so we could ingore
> them.


I was not aware of this pep rule :) So let’s adhere to it.

>>> +from usdt_lib import DpPortMapping
>>> +
>>>  import argparse
>>>  import psutil
>>>  import re
>>> @@ -280,7 +282,7 @@ int kretprobe__ovs_dp_upcall(struct pt_regs *ctx)
>>>  #
>>>  # print_key()
>>>  #
>>> -def print_key(event):
>>> +def print_key(event, decode_dump):
>>>      if event.key_size < options.flow_key_size:
>>>          key_len = event.key_size
>>>      else:
>>> @@ -301,39 +303,46 @@ def print_key(event):
>>>                                             dump=True),
>>>                       flags=re.MULTILINE))
>>>
>>> -    if options.flow_key_decode == 'nlraw':
>>> -        nla = decode_nlm(bytes(event.key)[:key_len])
>>> -    else:
>>> -        nla = decode_nlm(bytes(event.key)[:key_len], dump=False)
>>> -
>>> -    if "OVS_KEY_ATTR_IN_PORT" in nla:
>>> -        port = struct.unpack("=I", nla["OVS_KEY_ATTR_IN_PORT"])[0]
>>> -    else:
>>> -        port = "Unknown"
>>> -
>>> -    return port
>>> +    if options.flow_key_decode == "nlraw":
>>> +        for line in decode_dump:
>>> +            print(line)
>>>
>>>
>>>  #
>>>  # print_event()
>>>  #
>>>  def print_event(ctx, data, size):
>>> -    event = b['events'].event(data)
>>> -    print("{:<18.9f} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<12} {:<8}".
>>> -          format(event.ts / 1000000000,
>>> -                 event.cpu,
>>> -                 event.comm.decode("utf-8"),
>>> -                 event.pid,
>>> -                 event.dpif_name.decode("utf-8"),
>>> -                 event.upcall_type,
>>> -                 event.pkt_size,
>>> -                 event.key_size,
>>> -                 event.result))
>>> +    event = b["events"].event(data)
>>> +    dp = event.dpif_name.decode("utf-8")
>>> +
>>> +    nla, key_dump = decode_nlm(
>>> +        bytes(event.key)[: min(event.key_size, options.flow_key_size)]
>>> +    )
>>> +    if "OVS_KEY_ATTR_IN_PORT" in nla:
>>> +        port_no = struct.unpack("=I", nla["OVS_KEY_ATTR_IN_PORT"])[0]
>>> +        port = dp_map.get_port_name(dp.partition("@")[-1], port_no)
>>
>> What if the port can not be found? Should we re-populate the cache? or just 
>> dump the internal port number?
>>
>
> Maybe the automatic re-populating can be added inside the port cache
> itself? And if it still fails, we can just leave the port number, that's
> a good idea.

Sounds like a good abstraction. Make sure it’s not overwriting a static map 
that was put in place. Should we also add some logic to avoid constant cache 
updates?

>>> +    else:
>>> +        port = "Unknown"
>>> +
>>
>> Adding this in this way seems like a hack, of splitting up print_ukey(), but 
>> I guess it would be ok for now until we move the decode_nlm to the usdt 
>> library as a proper object :)
>>
>
> I agree.
>
>>> +    print(
>>> +        "{:<18.9f} {:<4} {:<16} {:<10} {:<40} {:<4} {:<10} {:<12} {:<8}".
>>> +        format(
>>> +            event.ts / 1000000000,
>>> +            event.cpu,
>>> +            event.comm.decode("utf-8"),
>>> +            event.pid,
>>> +            f"{port} ({dp})",
>>
>> Not sure if we should start mixing f”” style with format(), or f”” in 
>> general? If we want f”” in general, we should probably replace all format() 
>> functions.
>>
>
> I prefer f"" style because I find it easier to read, but you're right it
> makes the style inconsistent. I can use "str.format" and then suggest a
> global change.

Yes, we should do that. I like the f”” style too, and we should be safe with 
3.6 as we need 3.7 minimal according to our NEWS section.

>>> +            event.upcall_type,
>>> +            event.pkt_size,
>>> +            event.key_size,
>>> +            event.result,
>>> +        )
>>> +    )
>>>
>>>      #
>>>      # Decode packet only if there is data
>>>      #
>>> -    port = print_key(event)
>>> +    print_key(event, key_dump)
>>>
>>>      if event.pkt_size <= 0:
>>>          return
>>> @@ -342,7 +351,7 @@ def print_event(ctx, data, size):
>>>
>>>      if event.pkt_size < options.packet_size:
>>>          pkt_len = event.pkt_size
>>> -        pkt_data = bytes(event.pkt)[:event.pkt_size]
>>> +        pkt_data = bytes(event.pkt)[: event.pkt_size]
>>
>> Any reason for the extra space? I guess for slicing we do not need a space 
>> if there are no operations.
>
> No need, you're right.
>
>>
>>>      else:
>>>          pkt_len = options.packet_size
>>>          pkt_data = bytes(event.pkt)
>>> @@ -369,23 +378,26 @@ def print_event(ctx, data, size):
>>>  #
>>>  # decode_nlm()
>>>  #
>>> -def decode_nlm(msg, indent=4, dump=True):
>>> +def decode_nlm(msg, indent=4):
>>>      bytes_left = len(msg)
>>>      result = {}
>>> +    dump = []
>>>
>>>      while bytes_left:
>>>          if bytes_left < 4:
>>> -            if dump:
>>> -                print("{}WARN: decode truncated; can't read header".format(
>>> -                    ' ' * indent))
>>> +            dump.append(
>>> +                "{}WARN: decode truncated; can't read header".format(
>>> +                    " " * indent
>>> +                )
>>> +            )
>>>              break
>>>
>>>          nla_len, nla_type = struct.unpack("=HH", msg[:4])
>>>
>>>          if nla_len < 4:
>>> -            if dump:
>>> -                print("{}WARN: decode truncated; nla_len < 4".format(
>>> -                    ' ' * indent))
>>> +            dump.append(
>>> +                "{}WARN: decode truncated; nla_len < 4".format(" " * 
>>> indent)
>>> +            )
>>>              break
>>>
>>>          nla_data = msg[4:nla_len]
>>> @@ -397,16 +409,19 @@ def decode_nlm(msg, indent=4, dump=True):
>>>          else:
>>>              result[get_ovs_key_attr_str(nla_type)] = nla_data
>>>
>>> -        if dump:
>>> -            print("{}nla_len {}, nla_type {}[{}], data: {}{}".format(
>>> +        dump.append(
>>> +            "{}nla_len {}, nla_type {}[{}], data: {}{}".format(
>>>                  ' ' * indent, nla_len, get_ovs_key_attr_str(nla_type),
>>>                  nla_type,
>>> -                "".join("{:02x} ".format(b) for b in nla_data), trunc))
>>> +                "".join("{:02x} ".format(b) for b in nla_data), trunc)
>>> +        )
>>>
>>>          if trunc != "":
>>> -            if dump:
>>> -                print("{}WARN: decode truncated; nla_len > msg_len[{}] ".
>>> -                      format(' ' * indent, bytes_left))
>>> +            dump.append(
>>> +                "{}WARN: decode truncated; nla_len > msg_len[{}] ".format(
>>> +                    " " * indent, bytes_left
>>> +                )
>>> +            )
>>>              break
>>>
>>>          # update next offset, but make sure it's aligned correctly
>>> @@ -414,7 +429,7 @@ def decode_nlm(msg, indent=4, dump=True):
>>>          msg = msg[next_offset:]
>>>          bytes_left -= next_offset
>>>
>>> -    return result
>>> +    return result, dump
>>>
>>>
>>>  #
>>> @@ -499,6 +514,9 @@ def main():
>>>      #
>>>      global b
>>>      global options
>>> +    global dp_map
>>> +
>>> +    dp_map = DpPortMapping()
>>>
>>>      #
>>>      # Argument parsing
>>> @@ -607,8 +625,8 @@ def main():
>>>      #
>>>      # Print header
>>>      #
>>> -    print("{:<18} {:<4} {:<16} {:<10} {:<32} {:<4} {:<10} {:<12} 
>>> {:<8}".format(
>>> -        "TIME", "CPU", "COMM", "PID", "DPIF_NAME", "TYPE", "PKT_LEN",
>>> +    print("{:<18} {:<4} {:<16} {:<10} {:<40} {:<4} {:<10} {:<12} 
>>> {:<8}".format(
>>> +        "TIME", "CPU", "COMM", "PID", "PORT_NAME", "TYPE", "PKT_LEN",
>>>          "FLOW_KEY_LEN", "RESULT"))
>>>
>>>      #
>>> diff --git a/utilities/usdt-scripts/usdt_lib.py 
>>> b/utilities/usdt-scripts/usdt_lib.py
>>> index da3fab2bf..9ad50c540 100644
>>> --- a/utilities/usdt-scripts/usdt_lib.py
>>> +++ b/utilities/usdt-scripts/usdt_lib.py
>>> @@ -33,12 +33,33 @@ class DpPortMapping:
>>>          """Override the internal cache map."""
>>>          self.cache_map = cache_map
>>>
>>> -    def get(self, dp, port_no):
>>> -        """Get the portname from a port number."""
>>> +    def get_port_num(self, dp, port):
>>> +        """Get the port number from a port name."""
>>>          if self.cache_map is None:
>>>              self._get_mapping()
>>>
>>> -        return self.cache_map.get(dp, {}).get(port_no, None)
>>> +        return self.cache_map.get(dp, {}).get(port, None)
>>> +
>>> +    def get_port_name(self, dp, port_no):
>>> +        """Get the port name from a port number."""
>>> +        if self.cache_map is None:
>>> +            self._get_mapping()
>>> +
>>> +        if not self.cache_map.get(dp):
>>> +            return None
>>> +
>>> +        for name, num in self.cache_map[dp].items():
>>> +            if num == port_no:
>>> +                return name
>>> +
>>> +        return None
>>> +
>>> +    def get_port_number(self, dp, port):
>>
>> Now we have both get_port_num() and get_port_number(). I think we should 
>> only keep the latter one.
>>
>
> Agree.
>
>>> +        """Get the port number from a port name."""
>>> +        if self.cache_map is None:
>>> +            self._get_mapping()
>>> +
>>> +        return self.cache_map.get(dp, {}).get(port, None)
>>>
>>>      def _get_mapping(self):
>>>          """Get the datapath port mapping from the running OVS."""
>>> --
>>> 2.48.1
>>>
>>> _______________________________________________
>>> dev mailing list
>>> [email protected]
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to