Aaron, thank you so much for review, yes, I should handle error. For setns, I will check if it can work as you commented. I'll send V2 to fix your comments.
-----邮件原件----- 发件人: dev [mailto:ovs-dev-boun...@openvswitch.org] 代表 Aaron Conole 发送时间: 2020年8月25日 23:06 收件人: yang_y...@163.com 抄送: ovs-dev@openvswitch.org; i.maxim...@ovn.org; f...@sysclose.org 主题: Re: [ovs-dev] [PATCH V1 3/4] Fix tap interface statistics issue yang_y...@163.com writes: > From: Yi Yang <yangy...@inspur.com> > > After tap interface is moved to network namespace, "ovs-vsctl list > interface tapXXX" can get statistics info of tap interface, the root > cause is OVS still gets statistics info in root namespace. > > With netns option help, OVS can get statistics info in tap interface > netns. > > This patch added enter and exit netns helpers and change > statistics-related functions for those tap interfaces which have been > moved into netns and make sure "ovs-vsctl list interface tapXXX" can > get statistics info correctly. > > Here is a result sample for reference: > name : tap1 > ofport : 4 > ofport_request : [] > options : {netns=ns01} > other_config : {} > statistics : {rx_bytes=6228, rx_packets=68, tx_bytes=8310, > tx_packets=95} > status : {} > type : tap > > Signed-off-by: Yi Yang <yangy...@inspur.com> > --- > lib/dpif-netlink.c | 51 ++++++++++++++++++ > lib/dpif-netlink.h | 3 ++ > lib/netdev-linux.c | 60 ++++++++++++++++++++- > lib/netlink-socket.c | 146 > +++++++++++++++++++++++++++++++++++++++++++++++++++ > lib/netlink-socket.h | 2 + > 5 files changed, 260 insertions(+), 2 deletions(-) > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index > 7da4fb5..8ed37ed 100644 > --- a/lib/dpif-netlink.c > +++ b/lib/dpif-netlink.c > @@ -4282,6 +4282,43 @@ dpif_netlink_vport_transact(const struct > dpif_netlink_vport *request, > return error; > } > > +static int > +dpif_netlink_vport_transact_nopool(const struct dpif_netlink_vport *request, > + struct dpif_netlink_vport *reply, > + struct ofpbuf **bufp) { > + struct ofpbuf *request_buf; > + int error; > + > + ovs_assert((reply != NULL) == (bufp != NULL)); > + > + error = dpif_netlink_init(); > + if (error) { > + if (reply) { > + *bufp = NULL; > + dpif_netlink_vport_init(reply); > + } > + return error; > + } > + > + request_buf = ofpbuf_new(1024); > + dpif_netlink_vport_to_ofpbuf(request, request_buf); > + error = nl_transact_nopool(NETLINK_GENERIC, request_buf, bufp); > + ofpbuf_delete(request_buf); > + > + if (reply) { > + if (!error) { > + error = dpif_netlink_vport_from_ofpbuf(reply, *bufp); > + } > + if (error) { > + dpif_netlink_vport_init(reply); > + ofpbuf_delete(*bufp); > + *bufp = NULL; > + } > + } > + return error; > +} > + > /* Obtains information about the kernel vport named 'name' and stores it into > * '*reply' and '*bufp'. The caller must free '*bufp' when the reply is no > * longer needed ('reply' will contain pointers into '*bufp'). */ @@ > -4298,6 +4335,20 @@ dpif_netlink_vport_get(const char *name, struct > dpif_netlink_vport *reply, > return dpif_netlink_vport_transact(&request, reply, bufp); } > > +int > +dpif_netlink_vport_get_nopool(const char *name, > + struct dpif_netlink_vport *reply, > + struct ofpbuf **bufp) { > + struct dpif_netlink_vport request; > + > + dpif_netlink_vport_init(&request); > + request.cmd = OVS_VPORT_CMD_GET; > + request.name = name; > + > + return dpif_netlink_vport_transact_nopool(&request, reply, bufp); > +} > + > /* Parses the contents of 'buf', which contains a "struct ovs_header" > followed > * by Netlink attributes, into 'dp'. Returns 0 if successful, otherwise a > * positive errno value. > diff --git a/lib/dpif-netlink.h b/lib/dpif-netlink.h index > 24294bc..9372241 100644 > --- a/lib/dpif-netlink.h > +++ b/lib/dpif-netlink.h > @@ -55,6 +55,9 @@ int dpif_netlink_vport_transact(const struct > dpif_netlink_vport *request, > struct ofpbuf **bufp); int > dpif_netlink_vport_get(const char *name, struct dpif_netlink_vport *reply, > struct ofpbuf **bufp); > +int dpif_netlink_vport_get_nopool(const char *name, > + struct dpif_netlink_vport *reply, > + struct ofpbuf **bufp); > > bool dpif_netlink_is_internal_device(const char *name); > > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index > bb3aa9b..2a7d5ec 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -520,8 +520,16 @@ static struct vlog_rate_limit rl = > VLOG_RATE_LIMIT_INIT(5, 20); > * changes in the device miimon status, so we can use atomic_count. > */ static atomic_count miimon_cnt = ATOMIC_COUNT_INIT(0); > > +struct netns_knob { > + int main_fd; > + int netns_fd; > + char nspath[128]; > +}; > + > static int netdev_linux_parse_vnet_hdr(struct dp_packet *b); static > void netdev_linux_prepend_vnet_hdr(struct dp_packet *b, int mtu); > +static int enter_netns(struct netns_knob *netns_knob, const char > +*netns); static void exit_netns(struct netns_knob *netns_knob); > static int netdev_linux_do_ethtool(const char *name, struct ethtool_cmd *, > int cmd, const char *cmd_name); > static int get_flags(const struct netdev *, unsigned int *flags); @@ > -2158,11 +2166,18 @@ netdev_stats_from_ovs_vport_stats(struct > netdev_stats *dst, static int get_stats_via_vport__(const struct > netdev *netdev, struct netdev_stats *stats) { > + struct netdev_linux *netdev_linux = netdev_linux_cast(netdev); > struct dpif_netlink_vport reply; > struct ofpbuf *buf; > int error; > > - error = dpif_netlink_vport_get(netdev_get_name(netdev), &reply, &buf); > + if (is_tap_netdev(netdev) && (netdev_linux->netns != NULL)) { > + error = dpif_netlink_vport_get_nopool(netdev_get_name(netdev), > + &reply, &buf); > + } else { > + error = dpif_netlink_vport_get(netdev_get_name(netdev), > + &reply, &buf); > + } > if (error) { > return error; > } else if (!reply.stats) { > @@ -2237,6 +2252,33 @@ netdev_linux_get_stats(const struct netdev *netdev_, > return error; > } > > +static int > +enter_netns(struct netns_knob *netns_knob, const char *netns) { > + sprintf(netns_knob->nspath, "/proc/%d/ns/net", getpid()); > + netns_knob->main_fd = open(netns_knob->nspath, O_RDONLY); > + if (netns_knob->main_fd < 0) { > + return errno; > + } > + > + sprintf(netns_knob->nspath, "/var/run/netns/%s", netns); > + netns_knob->netns_fd = open(netns_knob->nspath, O_RDONLY); > + if (netns_knob->netns_fd < 0) { > + close(netns_knob->main_fd); > + return errno; > + } > + setns(netns_knob->netns_fd, 0); I think this reassociates the entire thread with the namespace, right? So whatever thread calls this will be moved to the netns. But actually, that 0 type could also change to a mount / pid namespace as well. I think it should just be CLONE_NEWNET here and in the exit_netns call. That way we don't get some additional changes that are unexpected (like cgroup or something). We expect that the FD should refer to a netns type. > + return 0; > +} > + > +static void > +exit_netns(struct netns_knob *netns_knob) { > + setns(netns_knob->main_fd, 0); > + close(netns_knob->netns_fd); > + close(netns_knob->main_fd); > +} > + > /* Retrieves current device stats for 'netdev-tap' netdev or > * netdev-internal. */ > static int > @@ -2245,6 +2287,11 @@ netdev_tap_get_stats(const struct netdev *netdev_, > struct netdev_stats *stats) > struct netdev_linux *netdev = netdev_linux_cast(netdev_); > struct netdev_stats dev_stats; > int error; > + struct netns_knob netns_knob; > + > + if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) { > + error = enter_netns(&netns_knob, netdev->netns); Shouldn't we handle this error? Otherwise why even do the assignment? > + } > > ovs_mutex_lock(&netdev->mutex); > get_stats_via_vport(netdev_, stats); @@ -2298,6 +2345,10 @@ > netdev_tap_get_stats(const struct netdev *netdev_, struct netdev_stats *stats) > stats->rx_dropped += netdev->rx_dropped; > ovs_mutex_unlock(&netdev->mutex); > > + if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) { > + exit_netns(&netns_knob); Because if the above fails, this will call setns() on an invalid FD. And then I think the thread will be in a bad state. > + } > + > return error; > } > > @@ -6245,6 +6296,7 @@ netdev_stats_from_rtnl_link_stats64(struct > netdev_stats *dst, int get_stats_via_netlink(const struct netdev > *netdev_, struct netdev_stats *stats) { > + struct netdev_linux *netdev = netdev_linux_cast(netdev_); > struct ofpbuf request; > struct ofpbuf *reply; > int error; > @@ -6258,7 +6310,11 @@ get_stats_via_netlink(const struct netdev *netdev_, > struct netdev_stats *stats) > RTM_GETLINK, NLM_F_REQUEST); > ofpbuf_put_zeros(&request, sizeof(struct ifinfomsg)); > nl_msg_put_string(&request, IFLA_IFNAME, netdev_get_name(netdev_)); > - error = nl_transact(NETLINK_ROUTE, &request, &reply); > + if (is_tap_netdev(netdev_) && (netdev->netns != NULL)) { > + error = nl_transact_nopool(NETLINK_ROUTE, &request, &reply); > + } else { > + error = nl_transact(NETLINK_ROUTE, &request, &reply); > + } > ofpbuf_uninit(&request); > if (error) { > return error; > diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index > 47077e9..2377ca7 100644 > --- a/lib/netlink-socket.c > +++ b/lib/netlink-socket.c > @@ -1807,6 +1807,152 @@ nl_transact(int protocol, const struct ofpbuf > *request, > return error; > } > > +static int > +nl_sock_create_nopool(int protocol, struct nl_sock **sockp) { > + struct nl_sock *sock; > +#ifndef _WIN32 > + struct sockaddr_nl local, remote; #endif > + socklen_t local_size; > + int rcvbuf; > + int retval = 0; > + > + *sockp = NULL; > + sock = xmalloc(sizeof *sock); > + > +#ifdef _WIN32 > + sock->overlapped.hEvent = NULL; > + sock->handle = CreateFile(OVS_DEVICE_NAME_USER, > + GENERIC_READ | GENERIC_WRITE, > + FILE_SHARE_READ | FILE_SHARE_WRITE, > + NULL, OPEN_EXISTING, > + FILE_FLAG_OVERLAPPED, NULL); > + > + if (sock->handle == INVALID_HANDLE_VALUE) { > + VLOG_ERR("fcntl: %s", ovs_lasterror_to_string()); > + goto error; > + } > + > + memset(&sock->overlapped, 0, sizeof sock->overlapped); > + sock->overlapped.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL); > + if (sock->overlapped.hEvent == NULL) { > + VLOG_ERR("fcntl: %s", ovs_lasterror_to_string()); > + goto error; > + } > + /* Initialize the type/ioctl to Generic */ > + sock->read_ioctl = OVS_IOCTL_READ; #else > + sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol); > + if (sock->fd < 0) { > + VLOG_ERR("fcntl: %s", ovs_strerror(errno)); > + goto error; > + } > +#endif > + > + sock->protocol = protocol; > + sock->next_seq = 1; > + > + rcvbuf = 1024 * 1024; > +#ifdef _WIN32 > + sock->rcvbuf = rcvbuf; > + retval = get_sock_pid_from_kernel(sock); > + if (retval != 0) { > + goto error; > + } > + retval = set_sock_property(sock); > + if (retval != 0) { > + goto error; > + } > +#else > + if (setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUFFORCE, > + &rcvbuf, sizeof rcvbuf)) { > + /* Only root can use SO_RCVBUFFORCE. Everyone else gets EPERM. > + * Warn only if the failure is therefore unexpected. */ > + if (errno != EPERM) { > + VLOG_WARN_RL(&rl, "setting %d-byte socket receive buffer failed " > + "(%s)", rcvbuf, ovs_strerror(errno)); > + } > + } > + > + retval = get_socket_rcvbuf(sock->fd); > + if (retval < 0) { > + retval = -retval; > + goto error; > + } > + sock->rcvbuf = retval; > + retval = 0; > + > + /* Connect to kernel (pid 0) as remote address. */ > + memset(&remote, 0, sizeof remote); > + remote.nl_family = AF_NETLINK; > + remote.nl_pid = 0; > + if (connect(sock->fd, (struct sockaddr *) &remote, sizeof remote) < 0) { > + VLOG_ERR("connect(0): %s", ovs_strerror(errno)); > + goto error; > + } > + > + /* Obtain pid assigned by kernel. */ > + local_size = sizeof local; > + if (getsockname(sock->fd, (struct sockaddr *) &local, &local_size) < 0) { > + VLOG_ERR("getsockname: %s", ovs_strerror(errno)); > + goto error; > + } > + if (local_size < sizeof local || local.nl_family != AF_NETLINK) { > + VLOG_ERR("getsockname returned bad Netlink name"); > + retval = EINVAL; > + goto error; > + } > + sock->pid = local.nl_pid; > +#endif > + > + *sockp = sock; > + return 0; > + > +error: > + if (retval == 0) { > + retval = errno; > + if (retval == 0) { > + retval = EINVAL; > + } > + } > +#ifdef _WIN32 > + if (sock->overlapped.hEvent) { > + CloseHandle(sock->overlapped.hEvent); > + } > + if (sock->handle != INVALID_HANDLE_VALUE) { > + CloseHandle(sock->handle); > + } > +#else > + if (sock->fd >= 0) { > + close(sock->fd); > + } > +#endif > + free(sock); > + return retval; > +} > + > +int > +nl_transact_nopool(int protocol, const struct ofpbuf *request, > + struct ofpbuf **replyp) > +{ > + struct nl_sock *sock; > + int error; > + > + error = nl_sock_create_nopool(protocol, &sock); > + if (error) { > + if (replyp) { > + *replyp = NULL; > + } > + return error; > + } > + > + error = nl_sock_transact(sock, request, replyp); > + nl_sock_destroy(sock); > + > + return error; > +} > + > /* Sends the 'request' member of the 'n' transactions in 'transactions' on a > * Netlink socket for the given 'protocol' (e.g. NETLINK_ROUTE or > * NETLINK_GENERIC), in order, and receives responses to all of them. > Fills in diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h > index 7852ad0..08cbab8 100644 > --- a/lib/netlink-socket.h > +++ b/lib/netlink-socket.h > @@ -254,6 +254,8 @@ struct nl_transaction { int nl_transact(int > protocol, const struct ofpbuf *request, > struct ofpbuf **replyp); void > nl_transact_multiple(int protocol, struct nl_transaction **, size_t > n); > +int nl_transact_nopool(int protocol, const struct ofpbuf *request, > + struct ofpbuf **replyp); > > /* Table dumping. */ > #define NL_DUMP_BUFSIZE 4096 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev