===== IMPORTANT RFC NOTE: This drops some of the previous work using netlink infra due to synchronization issues with the child process. There are still additional netlink messages to debug, so treat this patch as a stop-gap measure, rather than a proper implementation. =====
This implements the netns/inode scan using procfs and getting a little creative with /proc/<>/net/* and /sys/class/net/<>/iflink details, which are specific to linux. This does require having root-like access (it may be possible to ignore this), and the selinux / App-Armor permissions need to be update to accommodate this particular sweep. This will allow pulling a netns and inode from a particular net device, ex: > $ ovs-appctl netdev/lookup-v4-sock br0 tcp 172.31.110.1:45900 > 172.31.110.2:4443 > Scanning for: ip:016E1FAC -> 026E1FAC, port:B34C -> 0000115B > Inode lookup error: No such file or directory > ovs-appctl: ovs-vswitchd: server returned an error > $ ovs-appctl netdev/lookup-v4-sock v01 tcp 172.31.110.1:45900 > 172.31.110.2:4443 > Scanning for: ip:016E1FAC -> 026E1FAC, port:B34C -> 0000115B > Inode: 7100354, netns: 4026533001 There was a conscious decision not to use the dynamic string infrastructure when manipulating paths to avoid as many system calls as possible (since the code path is already riddled with them doing the procfs scanning). The goal is actually to use the netlink diag infrastructure going forward anyway, so this is just for a proof of concept rather than how a final implementation should look. Signed-off-by: Aaron Conole <[email protected]> --- NEWS | 3 + lib/daemon-unix.c | 1 + lib/netdev-linux.c | 203 ++++++++++++++++++++++++++++++++++ lib/netdev-provider.h | 13 +++ lib/netdev.c | 97 ++++++++++++++++ lib/netdev.h | 5 + utilities/checkpatch_dict.txt | 1 + 7 files changed, 323 insertions(+) diff --git a/NEWS b/NEWS index 96bf4992c3..219c4a9161 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,9 @@ Post-v3.6.0 - Userspace datapath: * Conntrack now supports the FTP commands EPSV and EPRT with IPv4 connections, instead of limiting these commands to IPv6 only. + - Netlink: + * Introduce a new appctl command 'netdev/lookup-v4-sock' that will + attempt to look up a socket inode and netns details. v3.6.0 - 18 Aug 2025 diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c index 6d02aceebe..e163c33739 100644 --- a/lib/daemon-unix.c +++ b/lib/daemon-unix.c @@ -827,6 +827,7 @@ daemon_become_new_user_linux(bool access_datapath OVS_UNUSED, if (access_datapath && !ret) { ret = capng_update(CAPNG_ADD, cap_sets, CAP_NET_ADMIN) + || capng_update(CAPNG_ADD, cap_sets, CAP_SYS_ADMIN) || capng_update(CAPNG_ADD, cap_sets, CAP_NET_RAW) || capng_update(CAPNG_ADD, cap_sets, CAP_NET_BROADCAST); #ifdef DPDK_NETDEV diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 8e96041814..ec28b14e43 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -19,6 +19,8 @@ #include "netdev-linux.h" #include "netdev-linux-private.h" +#include <ctype.h> +#include <dirent.h> #include <errno.h> #include <fcntl.h> #include <sys/types.h> @@ -31,19 +33,23 @@ #include <linux/if_ether.h> #include <linux/if_packet.h> #include <linux/if_tun.h> +#include <linux/inet_diag.h> #include <linux/types.h> #include <linux/ethtool.h> #include <linux/mii.h> #include <linux/rtnetlink.h> +#include <linux/sock_diag.h> #include <linux/sockios.h> #include <linux/virtio_net.h> #include <sys/ioctl.h> #include <sys/socket.h> #include <sys/uio.h> +#include <sys/wait.h> #include <net/if.h> #include <net/if_arp.h> #include <net/route.h> #include <poll.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -91,6 +97,7 @@ COVERAGE_DEFINE(netdev_get_ethtool); COVERAGE_DEFINE(netdev_set_ethtool); COVERAGE_DEFINE(netdev_linux_invalid_l4_csum); COVERAGE_DEFINE(netdev_linux_unknown_l4_csum); +COVERAGE_DEFINE(netdev_linux_proc_net_walked); #ifndef IFLA_IF_NETNSID @@ -3840,6 +3847,201 @@ netdev_linux_get_target_ns(const struct netdev *netdev, int *target_ns) if (!error) { *target_ns = linked_ns; } + + return error; +} + +static ovs_be16 hex_to_port(const char *hex_port) +{ + return CONSTANT_HTONS(strtoul(hex_port, NULL, 16)); +} + +static int +get_iflink_id(const char *name) +{ + char path[256], buf[256]; + ssize_t len; + int fd, ret; + + snprintf(path, sizeof(path), "/sys/class/net/%s/iflink", name); + fd = open(path, O_RDONLY); + if (fd < 0) { + return -errno; + } + + len = read(fd, buf, 256); + if (len < 0) { + ret = -errno; + goto out; + } + + ret = strtol(buf, NULL, 10); + +out: + close(fd); + return ret; +} + +static bool +dev_mcast_matches(int index, const char *pid) +{ + char path[512]; + snprintf(path, sizeof(path), "/proc/%s/net/dev_mcast", pid); + + FILE *f = fopen(path, "r"); + if (!f) { + return false; + } + + char line[512]; + while (fgets(line, sizeof(line), f)) { + int ifindex; + if (ovs_scan(line, "%d", &ifindex)) { + if (ifindex == index) { + fclose(f); + return true; + } + } + } + + fclose(f); + return false; +} + +static int +netdev_linux_get_socket_inode(const struct netdev *netdev, int proto, int af, + const void *src, ovs_be16 src_port, + const void *dst, ovs_be16 dst_port, + uint64_t *inode_out, uint64_t *netns_out) +{ + int error, devid; + DIR *proc_dir; + + if (proto != IPPROTO_TCP || !inode_out || !netns_out || + (af != AF_INET && af != AF_INET6)) { + error = EINVAL; + goto out; + } + + devid = get_iflink_id(netdev_get_name(netdev)); + if (devid < 0) { + error = get_ifindex(netdev, &devid); + } + + if (devid < 0) { + goto out; + } + + error = ENOENT; + proc_dir = opendir("/proc"); + if (!proc_dir) { + error = errno; + goto out; + } + + *netns_out = NETNSID_LOCAL; + + struct dirent *entry; + while ((entry = readdir(proc_dir))) { + char netns_path[512], netns_link[256], tcp_path[512], line[256]; + const char *pid = entry->d_name; + ssize_t r; + FILE *f; + + if (!isdigit(pid[0]) || !dev_mcast_matches(devid, pid)) { + continue; + } + + COVERAGE_INC(netdev_linux_proc_net_walked); + + snprintf(netns_path, sizeof(netns_path), "/proc/%s/ns/net", pid); + r = readlink(netns_path, netns_link, sizeof(netns_link) - 1); + if (r < 0) { + ovs_strlcpy(netns_link, "unknown", sizeof(netns_link)); + } else { + netns_link[r] = 0; + } + + if (af == AF_INET) { + snprintf(tcp_path, sizeof(tcp_path), "/proc/%s/net/tcp", pid); + } else { + snprintf(tcp_path, sizeof(tcp_path), "/proc/%s/net/tcp6", pid); + } + + f = fopen(tcp_path, "r"); + if (!f) { + continue; + } + + if (!fgets(line, sizeof(line), f)) { + fclose(f); + continue; + } + + while (fgets(line, sizeof(line), f)) { + char src_hex[64], dst_hex[64], src_port_hex[16], dst_port_hex[16]; + char local_addr[128], rem_addr[128], state[8], inode[32]; + ovs_be16 sport, dport; + bool match = false; + + if (!ovs_scan(line, + "%*d: %64s %64s %2s %*s %*s %*s %*s %*s %31s", + local_addr, rem_addr, state, inode)) { + continue; + } + + if (!ovs_scan(local_addr, "%[^:]:%s", src_hex, src_port_hex) || + !ovs_scan(rem_addr, "%[^:]:%s", dst_hex, dst_port_hex)) + { + continue; + } + + sport = hex_to_port(src_port_hex); + dport = hex_to_port(dst_port_hex); + + if (af == AF_INET) { + struct in_addr proc_src, proc_dst; + if (!ip_parse(src_hex, &proc_src.s_addr) || + !ip_parse(dst_hex, &proc_dst.s_addr)) { + continue; + } + + if (proc_src.s_addr == ((struct in_addr *) src)->s_addr && + proc_dst.s_addr == ((struct in_addr *) dst)->s_addr && + sport == src_port && + dport == dst_port) { + match = true; + } + } else if (af == AF_INET6) { + struct in6_addr proc_src6, proc_dst6; + if (!ipv6_parse(src_hex, &proc_src6) || + !ipv6_parse(dst_hex, &proc_dst6)) { + continue; + } + + if (ipv6_addr_equals(&proc_src6, (struct in6_addr *) src) && + ipv6_addr_equals(&proc_dst6, (struct in6_addr *) dst) && + sport == src_port && + dport == dst_port) { + match = true; + } + } + + if (match) { + ovs_scan(netns_link, "net:[%"PRIu64"]", netns_out); + *inode_out = strtoull(inode, NULL, 10); + fclose(f); + error = 0; + goto out; + } + } + + fclose(f); + } + + closedir(proc_dir); + +out: return error; } @@ -3957,6 +4159,7 @@ exit: .get_next_hop = netdev_linux_get_next_hop, \ .arp_lookup = netdev_linux_arp_lookup, \ .get_target_ns = netdev_linux_get_target_ns, \ + .get_socket_inode = netdev_linux_get_socket_inode, \ .update_flags = netdev_linux_update_flags, \ .rxq_alloc = netdev_linux_rxq_alloc, \ .rxq_dealloc = netdev_linux_rxq_dealloc, \ diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h index 589c53842f..914f0819a0 100644 --- a/lib/netdev-provider.h +++ b/lib/netdev-provider.h @@ -784,6 +784,19 @@ struct netdev_class { * anyhow. */ int (*get_target_ns)(const struct netdev *netdev, int *target_ns); + /* Retreives a socket inode from the target netns for 'netdev'. On + * success, stores the socket inode detail in the 'inode_out' variable. + * Uses 'af' to determine 'src'/'dst' size. + * + * This function may be set to null if it would always return EOPNOTSUPP + * anyhow. */ + int (*get_socket_inode)(const struct netdev *netdev, int proto, int af, + const void *src, + ovs_be16 sport, + const void *dst, + ovs_be16 dport, + uint64_t *inode_out, uint64_t *netns_out); + /* Retrieves the current set of flags on 'netdev' into '*old_flags'. Then, * turns off the flags that are set to 1 in 'off' and turns on the flags * that are set to 1 in 'on'. (No bit will be set to 1 in both 'off' and diff --git a/lib/netdev.c b/lib/netdev.c index 501c48bb36..758354f2e7 100644 --- a/lib/netdev.c +++ b/lib/netdev.c @@ -57,6 +57,7 @@ #include "svec.h" #include "openvswitch/vlog.h" #include "flow.h" +#include "unixctl.h" #include "userspace-tso.h" #include "util.h" #ifdef __linux__ @@ -107,6 +108,7 @@ struct netdev_registered_class { * additional log messages. */ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); +static void netdev_register_commands(void); static void restore_all_flags(void *aux OVS_UNUSED); void update_device_args(struct netdev *, const struct shash *args); #ifdef HAVE_AF_XDP @@ -170,6 +172,7 @@ netdev_initialize(void) netdev_register_provider(&netdev_internal_class); netdev_vport_tunnel_register(); #endif + netdev_register_commands(); ovsthread_once_done(&once); } } @@ -1671,6 +1674,22 @@ netdev_get_target_ns(const struct netdev *netdev, int *target_ns) : EOPNOTSUPP); } +/* Retreives a socket inode from the target netns for 'netdev'. On + * success, stores the socket inode detail in the 'inode_out' variable. + * Uses 'af' to determine 'src'/'dst' size. */ +int +netdev_get_socket_inode(const struct netdev *netdev, int proto, int af, + const void *src, ovs_be16 sport, + const void *dst, ovs_be16 dport, + uint64_t *inode_out, uint64_t *netns_out) +{ + return (netdev->netdev_class->get_socket_inode + ? netdev->netdev_class->get_socket_inode(netdev, proto, af, + src, sport, dst, dport, + inode_out, netns_out) + : EOPNOTSUPP); +} + /* Returns true if carrier is active (link light is on) on 'netdev'. */ bool netdev_get_carrier(const struct netdev *netdev) @@ -2439,3 +2458,81 @@ netdev_free_custom_stats_counters(struct netdev_custom_stats *custom_stats) } } } + + + +static void +netdev_v4_socket_inode_find(struct unixctl_conn *conn, int argc OVS_UNUSED, + const char *argv[], void *aux OVS_UNUSED) +{ + struct netdev *netdev = netdev_from_name(argv[1]); + struct ds s = DS_EMPTY_INITIALIZER; + ovs_be32 saddr, daddr; + ovs_be16 sport, dport; + struct in_addr sa, da; + uint64_t inode, ns; + int proto; + int error; + + if (!netdev) { + error = ESRCH; + ds_put_format(&s, "No such device: %s", argv[1]); + goto out; + } + + if (!strcmp(argv[2], "tcp")) { + proto = IPPROTO_TCP; + } else if (!strcmp(argv[2], "udp")) { + proto = IPPROTO_UDP; + } else { + error = EINVAL; + ds_put_format(&s, "Invalid proto: %s vs. tcp/udp", argv[2]); + goto out; + } + + if (ip_parse_port(argv[3], &saddr, &sport)) { + error = EINVAL; + ds_put_format(&s, "Invalid source ip:port format: %s", argv[3]); + goto out; + } + + if (ip_parse_port(argv[4], &daddr, &dport)) { + error = EINVAL; + ds_put_format(&s, "Invalid dest ip:port format: %s", argv[4]); + goto out; + } + + ds_put_format(&s, "Scanning for: ip:%08X -> %08X, port:%04X -> %08X\n", + saddr, daddr, sport, dport); + sa.s_addr = saddr; + da.s_addr = daddr; + + error = netdev_get_socket_inode(netdev, proto, AF_INET, + &sa, sport, &da, dport, + &inode, &ns); + + if (!error) { + ds_put_format(&s, "Inode: %" PRIu64 ", netns: %"PRIu64, inode, ns); + } else { + ds_put_format(&s, "Inode lookup error: %s", ovs_strerror(error)); + } + +out: + if (!error) { + unixctl_command_reply(conn, ds_cstr(&s)); + } else { + unixctl_command_reply_error(conn, ds_cstr(&s)); + } + + ds_destroy(&s); + netdev_close(netdev); +} + +static void +netdev_register_commands(void) +{ + unixctl_command_register("netdev/lookup-v4-sock", + "[netdev] [proto] [src:port] [dst:port]", + 4, 4, + netdev_v4_socket_inode_find, NULL); +} diff --git a/lib/netdev.h b/lib/netdev.h index e2c5630cc6..ed85889796 100644 --- a/lib/netdev.h +++ b/lib/netdev.h @@ -302,6 +302,11 @@ int netdev_arp_lookup(const struct netdev *, ovs_be32 ip, int netdev_get_target_ns(const struct netdev *, int *target_ns); +int netdev_get_socket_inode(const struct netdev *, int proto, int af, + const void *src, ovs_be16 sport, + const void *dst, ovs_be16 dport, + uint64_t *inode_out, uint64_t *netns_out); + struct netdev *netdev_find_dev_by_in4(const struct in_addr *); /* Statistics. */ diff --git a/utilities/checkpatch_dict.txt b/utilities/checkpatch_dict.txt index efe2cfdb31..5d367edbf5 100644 --- a/utilities/checkpatch_dict.txt +++ b/utilities/checkpatch_dict.txt @@ -218,6 +218,7 @@ ppid pps pre prio +procfs promisc qdisc qos -- 2.51.0 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
