On 30 January 2016 at 22:26, Laurent Vivier <laur...@vivier.eu> wrote: > rtnetlink is needed to use iproute package (ip addr, ip route) > and dhcp client. > > Examples: > > Without this patch: > # ip link > Cannot open netlink socket: Address family not supported by protocol > # ip addr > Cannot open netlink socket: Address family not supported by protocol > # ip route > Cannot open netlink socket: Address family not supported by protocol > # dhclient eth0 > Cannot open netlink socket: Address family not supported by protocol > Cannot open netlink socket: Address family not supported by protocol > > With this patch: > # ip link > 1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode > DEFAULT > link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00 > 51: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state > UP mode DEFAULT qlen 1000 > link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff > # ip addr show eth0 > 51: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state > UP qlen 1000 > link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff > inet 192.168.122.197/24 brd 192.168.122.255 scope global eth0 > valid_lft forever preferred_lft forever > inet6 fe80::216:3eff:fe89:6bd7/64 scope link > valid_lft forever preferred_lft forever > # ip route > default via 192.168.122.1 dev eth0 > 192.168.122.0/24 dev eth0 proto kernel scope link src 192.168.122.197 > # ip addr flush eth0 > # ip addr add 192.168.122.10 dev eth0 > # ip addr show eth0 > 51: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state > UP qlen 1000 > link/ether 00:16:3e:89:6b:d7 brd ff:ff:ff:ff:ff:ff > inet 192.168.122.10/32 scope global eth0 > valid_lft forever preferred_lft forever > # ip route add 192.168.122.0/24 via 192.168.122.10 > # ip route > 192.168.122.0/24 via 192.168.122.10 dev eth0 > > Signed-off-by: Laurent Vivier <laur...@vivier.eu> > --- > linux-user/syscall.c | 477 > ++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 471 insertions(+), 6 deletions(-)
Apologies for not getting to this patch earlier. Mostly it looks OK but I have a few questions/suggestions below. > diff --git a/linux-user/syscall.c b/linux-user/syscall.c > index dac5518..a1ed2f5 100644 > --- a/linux-user/syscall.c > +++ b/linux-user/syscall.c > @@ -100,6 +100,8 @@ int __clone2(int (*fn)(void *), void *child_stack_base, > #include <linux/filter.h> > #include <linux/blkpg.h> > #include "linux_loop.h" > +#include <linux/netlink.h> > +#include <linux/rtnetlink.h> > #include "uname.h" > > #include "qemu.h" > @@ -295,6 +297,14 @@ static TargetFdTrans **target_fd_trans; > > static unsigned int target_fd_max; > > +static TargetFdDataFunc fd_trans_target_to_host_data(int fd) > +{ > + if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { > + return target_fd_trans[fd]->target_to_host_data; > + } > + return NULL; > +} > + > static TargetFdDataFunc fd_trans_host_to_target_data(int fd) > { > if (fd >= 0 && fd < target_fd_max && target_fd_trans[fd]) { > @@ -1222,6 +1232,11 @@ static inline abi_long > host_to_target_sockaddr(abi_ulong target_addr, > return -TARGET_EFAULT; > memcpy(target_saddr, addr, len); > target_saddr->sa_family = tswap16(addr->sa_family); > + if (addr->sa_family == AF_NETLINK) { > + struct sockaddr_nl *target_nl = (struct sockaddr_nl *)target_saddr; > + target_nl->nl_pid = tswap32(target_nl->nl_pid); > + target_nl->nl_groups = tswap32(target_nl->nl_groups); > + } > unlock_user(target_saddr, target_addr, len); > > return 0; > @@ -1453,6 +1468,416 @@ static inline abi_long host_to_target_cmsg(struct > target_msghdr *target_msgh, > return 0; > } > > +static void tswap_nlmsghdr(struct nlmsghdr *nlh) > +{ > + nlh->nlmsg_len = tswap32(nlh->nlmsg_len); > + nlh->nlmsg_type = tswap16(nlh->nlmsg_type); > + nlh->nlmsg_flags = tswap16(nlh->nlmsg_flags); > + nlh->nlmsg_seq = tswap32(nlh->nlmsg_seq); > + nlh->nlmsg_pid = tswap32(nlh->nlmsg_pid); > +} > + > +static abi_long host_to_target_for_each_nlmsg(struct nlmsghdr *nlh, > + size_t len, > + abi_long > (*host_to_target_nlmsg) > + (struct nlmsghdr *)) > +{ > + uint32_t nlmsg_len; > + abi_long ret; > + > + while (len > (int)sizeof(struct nlmsghdr)) { What is this cast to int for ? > + > + nlmsg_len = nlh->nlmsg_len; > + if (nlmsg_len < sizeof(struct nlmsghdr) || > + nlmsg_len > len) { > + break; > + } > + > + if (nlh->nlmsg_type == NLMSG_DONE) { > + tswap_nlmsghdr(nlh); > + break; > + } Why not put this one inside the switch too? (you'd just need to change the 'break' to 'return 0'.) > + switch (nlh->nlmsg_type) { > + case NLMSG_NOOP: > + break; > + case NLMSG_ERROR: { Odd brace placement. > + struct nlmsgerr *e = NLMSG_DATA(nlh); > + e->error = tswap32(e->error); > + tswap_nlmsghdr(&e->msg); > + tswap_nlmsghdr(nlh); > + } > + return 0; > + default: > + ret = host_to_target_nlmsg(nlh); > + if (ret < 0) { > + tswap_nlmsghdr(nlh); > + return ret; > + } > + break; > + } > + tswap_nlmsghdr(nlh); > + len -= NLMSG_ALIGN(nlmsg_len); > + nlh = (struct nlmsghdr *)(((char*)nlh) + NLMSG_ALIGN(nlmsg_len)); > + } > + return 0; > +} > + > +static abi_long target_to_host_for_each_nlmsg(struct nlmsghdr *nlh, > + size_t len, > + abi_long > (*target_to_host_nlmsg) > + (struct nlmsghdr *)) > +{ > + int ret; > + > + while (len > (int)sizeof(struct nlmsghdr)) { > + if (tswap32(nlh->nlmsg_len) < sizeof(struct nlmsghdr) || > + tswap32(nlh->nlmsg_len) > len) { > + break; > + } > + tswap_nlmsghdr(nlh); > + if (nlh->nlmsg_type == NLMSG_DONE) { > + break; > + } > + switch (nlh->nlmsg_type) { > + case NLMSG_NOOP: > + break; > + case NLMSG_ERROR: { > + struct nlmsgerr *e = NLMSG_DATA(nlh); > + e->error = tswap32(e->error); > + tswap_nlmsghdr(&e->msg); > + } > + default: > + ret = target_to_host_nlmsg(nlh); > + if (ret < 0) { > + return ret; > + } > + } > + len -= NLMSG_ALIGN(nlh->nlmsg_len); > + nlh = (struct nlmsghdr *)(((char *)nlh) + > NLMSG_ALIGN(nlh->nlmsg_len)); You could use 'nlh = NLMSG_NEXT(nlh, len);' here rather than manually adjusting len and nlh (though maybe you prefer the parallel with the host_to_target function, and there we can't use NLMSG_NEXT). > + } > + return 0; > +} > + > +static abi_long host_to_target_for_each_rtattr(struct rtattr *rtattr, > + size_t len, > + abi_long > (*host_to_target_rtattr) > + (struct rtattr *)) > +{ > + unsigned short rta_len; > + abi_long ret; > + > + while (len > (int)sizeof(struct rtattr)) { > + rta_len = rtattr->rta_len; > + if (rta_len < sizeof(struct rtattr) || > + rta_len > len) { > + rtattr->rta_len = tswap16(rtattr->rta_len); > + rtattr->rta_type = tswap16(rtattr->rta_type); In the code above for iterating nlmsgs, we don't try to touch the fields if the length claims to be out of bounds; here we do. Why the difference ? > + break; > + } > + ret = host_to_target_rtattr(rtattr); > + rtattr->rta_len = tswap16(rtattr->rta_len); > + rtattr->rta_type = tswap16(rtattr->rta_type); > + if (ret < 0) { > + return ret; > + } > + len -= RTA_ALIGN(rta_len); > + rtattr = (struct rtattr *)(((char *)rtattr) + RTA_ALIGN(rta_len)); > + } > + return 0; > +} > + > +static abi_long host_to_target_data_link_rtattr(struct rtattr *rtattr) > +{ > + uint32_t *u32; > + > + switch (rtattr->rta_type) { > + case IFLA_ADDRESS: > + case IFLA_BROADCAST: > + case IFLA_IFNAME: Comments here about the types we are converting (or not) would be nice. > + break; > + case IFLA_MTU: > + case IFLA_LINK: > + case IFLA_WEIGHT: > + case IFLA_TXQLEN: > + case IFLA_CARRIER_CHANGES: > + case IFLA_NUM_RX_QUEUES: > + case IFLA_NUM_TX_QUEUES: > + case IFLA_PROMISCUITY: > + case IFLA_EXT_MASK: > + case IFLA_LINK_NETNSID: > + case IFLA_GROUP: > + case IFLA_MASTER: > + case IFLA_NUM_VF: > + case IFLA_STATS: > + u32 = RTA_DATA(rtattr); > + *u32 = tswap32(*u32); > + break; > + case IFLA_QDISC: > + case IFLA_COST: > + case IFLA_MAP: > + case IFLA_OPERSTATE: > + case IFLA_LINKMODE: > + case IFLA_CARRIER: > + case IFLA_STATS64: > + case IFLA_AF_SPEC: > + case IFLA_LINKINFO: What order are these cases in (ie why is this set of "do nothing" different from the ones at the top of the switch) ? > + break; > + default: Should we log something about an unsupported IFLA type here? > + break; Looking at http://lxr.free-electrons.com/source/net/core/rtnetlink.c I'm not sure all of these are right. I think IFLA_MAP means we have a pointer to a struct rtnl_link_ifmap, for instance, which would need some of its fields swapping. IFLA_STATS is an rtnl_link_stats struct, and so on. > + } > + return 0; > +} > + > +static abi_long host_to_target_data_addr_rtattr(struct rtattr *rtattr) > +{ > + uint32_t *u32; > + struct ifa_cacheinfo *ci; > + > + switch (rtattr->rta_type) { > + case IFA_FLAGS: > + u32 = RTA_DATA(rtattr); > + *u32 = tswap32(*u32); > + break; > + case IFA_LOCAL: > + case IFA_ADDRESS: > + case IFA_BROADCAST: > + case IFA_LABEL: > + break; > + case IFA_CACHEINFO: > + ci = RTA_DATA(rtattr); > + ci->ifa_prefered = tswap32(ci->ifa_prefered); > + ci->ifa_valid = tswap32(ci->ifa_valid); > + ci->cstamp = tswap32(ci->cstamp); > + ci->tstamp = tswap32(ci->tstamp); > + break; > + default: > + break; Log about unsupported values? > + } > + return 0; > +} > + > +static abi_long host_to_target_data_route_rtattr(struct rtattr *rtattr) > +{ > + uint32_t *u32; > + switch (rtattr->rta_type) { > + case RTA_GATEWAY: > + case RTA_DST: > + case RTA_PREFSRC: > + break; > + case RTA_PRIORITY: > + case RTA_TABLE: > + case RTA_OIF: > + u32 = RTA_DATA(rtattr); > + *u32 = tswap32(*u32); > + break; This doesn't seem to be the complete list of RTA_ values; what decides which ones are listed here? > + default: > + break; > + } > + return 0; > +} > + > +static abi_long host_to_target_link_rtattr(struct rtattr *rtattr, > + uint32_t rtattr_len) > +{ > + return host_to_target_for_each_rtattr(rtattr, rtattr_len, > + host_to_target_data_link_rtattr); > +} > + > +static abi_long host_to_target_addr_rtattr(struct rtattr *rtattr, > + uint32_t rtattr_len) > +{ > + return host_to_target_for_each_rtattr(rtattr, rtattr_len, > + host_to_target_data_addr_rtattr); > +} > + > +static abi_long host_to_target_route_rtattr(struct rtattr *rtattr, > + uint32_t rtattr_len) > +{ > + return host_to_target_for_each_rtattr(rtattr, rtattr_len, > + host_to_target_data_route_rtattr); > +} > + > +static abi_long host_to_target_data_route(struct nlmsghdr *nlh) > +{ > + uint32_t nlmsg_len; > + struct ifinfomsg *ifi; > + struct ifaddrmsg *ifa; > + struct rtmsg *rtm; > + > + nlmsg_len = nlh->nlmsg_len; > + switch (nlh->nlmsg_type) { > + case RTM_NEWLINK: > + case RTM_DELLINK: > + case RTM_GETLINK: > + ifi = NLMSG_DATA(nlh); > + ifi->ifi_type = tswap16(ifi->ifi_type); > + ifi->ifi_index = tswap32(ifi->ifi_index); > + ifi->ifi_flags = tswap32(ifi->ifi_flags); > + ifi->ifi_change = tswap32(ifi->ifi_change); > + host_to_target_link_rtattr(IFLA_RTA(ifi), > + nlmsg_len - NLMSG_LENGTH(sizeof(*ifi))); > + break; > + case RTM_NEWADDR: > + case RTM_DELADDR: > + case RTM_GETADDR: > + ifa = NLMSG_DATA(nlh); > + ifa->ifa_index = tswap32(ifa->ifa_index); > + host_to_target_addr_rtattr(IFA_RTA(ifa), > + nlmsg_len - NLMSG_LENGTH(sizeof(*ifa))); > + break; > + case RTM_NEWROUTE: > + case RTM_DELROUTE: > + case RTM_GETROUTE: > + rtm = NLMSG_DATA(nlh); > + rtm->rtm_flags = tswap32(rtm->rtm_flags); > + host_to_target_route_rtattr(RTM_RTA(rtm), > + nlmsg_len - NLMSG_LENGTH(sizeof(*rtm))); > + break; > + default: > + return -TARGET_EINVAL; > + } > + return 0; > +} > + > +static inline abi_long host_to_target_nlmsg_route(struct nlmsghdr *nlh, > + size_t len) > +{ > + return host_to_target_for_each_nlmsg(nlh, len, > host_to_target_data_route); > +} > + > +static abi_long target_to_host_for_each_rtattr(struct rtattr *rtattr, > + size_t len, > + abi_long > (*target_to_host_rtattr) > + (struct rtattr *)) > +{ > + abi_long ret; > + > + while (len >= (int)sizeof(struct rtattr)) { > + rtattr->rta_len = tswap16(rtattr->rta_len); > + rtattr->rta_type = tswap16(rtattr->rta_type); > + if (rtattr->rta_len < sizeof(struct rtattr) || > + rtattr->rta_len > len) { > + break; > + } Length check after swap of len/type again. > + ret = target_to_host_rtattr(rtattr); > + if (ret < 0) { > + return ret; > + } > + len -= RTA_ALIGN(rtattr->rta_len); > + rtattr = (struct rtattr *)(((char *)rtattr) + > + RTA_ALIGN(rtattr->rta_len)); > + } > + return 0; > +} > + > +static abi_long target_to_host_data_link_rtattr(struct rtattr *rtattr) > +{ > + switch (rtattr->rta_type) { > + default: > + break; > + } > + return 0; > +} > + > +static abi_long target_to_host_data_addr_rtattr(struct rtattr *rtattr) > +{ > + switch (rtattr->rta_type) { > + case IFA_LOCAL: > + case IFA_ADDRESS: > + break; > + default: > + break; > + } > + return 0; > +} > + > +static abi_long target_to_host_data_route_rtattr(struct rtattr *rtattr) > +{ > + uint32_t *u32; > + switch (rtattr->rta_type) { > + case RTA_DST: > + case RTA_SRC: > + case RTA_GATEWAY: > + /* nothing to do */ > + break; > + case RTA_OIF: > + u32 = RTA_DATA(rtattr); > + *u32 = tswap32(*u32); > + break; > + default: > + break; > + } > + return 0; > +} > + > +static void target_to_host_link_rtattr(struct rtattr *rtattr, > + uint32_t rtattr_len) > +{ > + target_to_host_for_each_rtattr(rtattr, rtattr_len, > + target_to_host_data_link_rtattr); > +} > + > +static void target_to_host_addr_rtattr(struct rtattr *rtattr, > + uint32_t rtattr_len) > +{ > + target_to_host_for_each_rtattr(rtattr, rtattr_len, > + target_to_host_data_addr_rtattr); > +} > + > +static void target_to_host_route_rtattr(struct rtattr *rtattr, > + uint32_t rtattr_len) > +{ > + target_to_host_for_each_rtattr(rtattr, rtattr_len, > + target_to_host_data_route_rtattr); > +} > + > +static abi_long target_to_host_data_route(struct nlmsghdr *nlh) > +{ > + struct ifinfomsg *ifi; > + struct ifaddrmsg *ifa; > + struct rtmsg *rtm; > + > + switch (nlh->nlmsg_type) { > + case RTM_GETLINK: > + break; > + case RTM_NEWLINK: > + case RTM_DELLINK: > + ifi = NLMSG_DATA(nlh); > + ifi->ifi_type = tswap16(ifi->ifi_type); > + ifi->ifi_index = tswap32(ifi->ifi_index); > + ifi->ifi_flags = tswap32(ifi->ifi_flags); > + ifi->ifi_change = tswap32(ifi->ifi_change); > + target_to_host_link_rtattr(IFLA_RTA(ifi), nlh->nlmsg_len - > + NLMSG_LENGTH(sizeof(*ifi))); > + break; > + case RTM_GETADDR: > + case RTM_NEWADDR: > + case RTM_DELADDR: > + ifa = NLMSG_DATA(nlh); > + ifa->ifa_index = tswap32(ifa->ifa_index); > + target_to_host_addr_rtattr(IFA_RTA(ifa), nlh->nlmsg_len - > + NLMSG_LENGTH(sizeof(*ifa))); > + break; > + case RTM_GETROUTE: > + break; > + case RTM_NEWROUTE: > + case RTM_DELROUTE: > + rtm = NLMSG_DATA(nlh); > + rtm->rtm_flags = tswap32(rtm->rtm_flags); > + target_to_host_route_rtattr(RTM_RTA(rtm), nlh->nlmsg_len - > + NLMSG_LENGTH(sizeof(*rtm))); > + break; > + default: > + return -TARGET_EOPNOTSUPP; > + } > + return 0; > +} > + > +static abi_long target_to_host_nlmsg_route(struct nlmsghdr *nlh, size_t len) > +{ > + return target_to_host_for_each_nlmsg(nlh, len, > target_to_host_data_route); > +} > + > /* do_setsockopt() Must return target values and target errnos. */ > static abi_long do_setsockopt(int sockfd, int level, int optname, > abi_ulong optval_addr, socklen_t optlen) > @@ -2103,6 +2528,21 @@ static TargetFdTrans target_packet_trans = { > .target_to_host_addr = packet_target_to_host_sockaddr, > }; > > +static abi_long netlink_route_target_to_host(void *buf, size_t len) > +{ > + return target_to_host_nlmsg_route(buf, len); > +} > + > +static abi_long netlink_route_host_to_target(void *buf, size_t len) > +{ > + return host_to_target_nlmsg_route(buf, len); > +} > + > +static TargetFdTrans target_netlink_route_trans = { > + .target_to_host_data = netlink_route_target_to_host, > + .host_to_target_data = netlink_route_host_to_target, > +}; > + > /* do_socket() Must return target values and target errnos. */ > static abi_long do_socket(int domain, int type, int protocol) > { > @@ -2114,9 +2554,6 @@ static abi_long do_socket(int domain, int type, int > protocol) > return ret; > } > > - if (domain == PF_NETLINK) > - return -TARGET_EAFNOSUPPORT; > - > if (domain == AF_PACKET || > (domain == AF_INET && type == SOCK_PACKET)) { > protocol = tswap16(protocol); > @@ -2130,6 +2567,16 @@ static abi_long do_socket(int domain, int type, int > protocol) > * if socket type is SOCK_PACKET, bind by name > */ > fd_trans_register(ret, &target_packet_trans); > + } else if (domain == PF_NETLINK) { > + switch (protocol) { > + case NETLINK_ROUTE: > + fd_trans_register(ret, &target_netlink_route_trans); > + break; > + default: > + close(ret); > + ret = -EPFNOSUPPORT; Can we handle the "PF_NETLINK but unsupported protocol" check before we try to open the host socket, rather than having to back it out here? > + break; > + } > } > } > return ret; > @@ -2214,14 +2661,25 @@ static abi_long do_sendrecvmsg_locked(int fd, struct > target_msghdr *msgp, > msg.msg_iov = vec; > > if (send) { > - ret = target_to_host_cmsg(&msg, msgp); > - if (ret == 0) > + if (fd_trans_target_to_host_data(fd)) { > + ret = fd_trans_target_to_host_data(fd)(msg.msg_iov->iov_base, > + msg.msg_iov->iov_len); > + } else { > + ret = target_to_host_cmsg(&msg, msgp); > + } > + if (ret == 0) { > ret = get_errno(sendmsg(fd, &msg, flags)); > + } > } else { > ret = get_errno(recvmsg(fd, &msg, flags)); > if (!is_error(ret)) { > len = ret; > - ret = host_to_target_cmsg(msgp, &msg); > + if (fd_trans_host_to_target_data(fd)) { > + ret = fd_trans_host_to_target_data(fd)(msg.msg_iov->iov_base, > + msg.msg_iov->iov_len); > + } else { > + ret = host_to_target_cmsg(msgp, &msg); > + } > if (!is_error(ret)) { > msgp->msg_namelen = tswap32(msg.msg_namelen); > if (msg.msg_name != NULL) { > @@ -2448,6 +2906,13 @@ static abi_long do_sendto(int fd, abi_ulong msg, > size_t len, int flags, > host_msg = lock_user(VERIFY_READ, msg, len, 1); > if (!host_msg) > return -TARGET_EFAULT; > + if (fd_trans_target_to_host_data(fd)) { > + ret = fd_trans_target_to_host_data(fd)(host_msg, len); > + if (ret < 0) { > + unlock_user(host_msg, msg, 0); > + return ret; > + } > + } > if (target_addr) { > addr = alloca(addrlen+1); > ret = target_to_host_sockaddr(fd, addr, target_addr, addrlen); > -- > 2.5.0 This felt like a pretty long patch to wade through on review; if there's an easy way to split it up that would be nice for v2, but I don't insist on it if there doesn't seem to be a nice splitting point. thanks -- PMM