Signed-off-by: Charles Myers <[email protected]>
---
Makefile | 1 +
bsd/net.cc | 5 +
bsd/sys/compat/linux/linux_netlink.cc | 904 ++++++++++++++++++++++++++++++++++
bsd/sys/compat/linux/linux_netlink.h | 175 +++++++
bsd/sys/compat/linux/linux_socket.cc | 4 +
bsd/sys/compat/linux/linux_socket.h | 1 +
bsd/sys/net/netisr.h | 1 +
7 files changed, 1091 insertions(+)
create mode 100644 bsd/sys/compat/linux/linux_netlink.cc
create mode 100644 bsd/sys/compat/linux/linux_netlink.h
diff --git a/Makefile b/Makefile
index add72f9..49e61b8 100644
--- a/Makefile
+++ b/Makefile
@@ -564,6 +564,7 @@ bsd += bsd/porting/kobj.o
bsd += bsd/sys/netinet/if_ether.o
bsd += bsd/sys/compat/linux/linux_socket.o
bsd += bsd/sys/compat/linux/linux_ioctl.o
+bsd += bsd/sys/compat/linux/linux_netlink.o
bsd += bsd/sys/net/if_ethersubr.o
bsd += bsd/sys/net/if_llatbl.o
bsd += bsd/sys/net/radix.o
diff --git a/bsd/net.cc b/bsd/net.cc
index 3e42757..f548e09 100644
--- a/bsd/net.cc
+++ b/bsd/net.cc
@@ -23,6 +23,7 @@
#include <bsd/sys/netinet/cc.h>
#include <bsd/sys/net/ethernet.h>
#include <bsd/sys/net/route.h>
+#include <bsd/sys/compat/linux/linux_netlink.h>
/* Generation of ip ids */
void ip_initid(void);
@@ -32,6 +33,8 @@ extern "C" {
extern struct domain inetdomain;
/* AF_ROUTE */
extern struct domain routedomain;
+ /* AF_NETLINK */
+ extern struct domain netlinkdomain;
}
void net_init(void)
@@ -53,9 +56,11 @@ void net_init(void)
domaininit(NULL);
OSV_DOMAIN_SET(inet);
OSV_DOMAIN_SET(route);
+ OSV_DOMAIN_SET(netlink);
rts_init();
route_init();
vnet_route_init();
+ netlink_init();
ipport_tick_init(NULL);
arp_init();
domainfinalize(NULL);
diff --git a/bsd/sys/compat/linux/linux_netlink.cc
b/bsd/sys/compat/linux/linux_netlink.cc
new file mode 100644
index 0000000..ecfa33d
--- /dev/null
+++ b/bsd/sys/compat/linux/linux_netlink.cc
@@ -0,0 +1,904 @@
+/*
+ * Linux NETLINK socket implementation.
+ *
+ * NETLINK is used to support IPv4/IPv6 LIBC getifaddrs(), if_nameindex().
+ *
+ * Warning: Tx/Rx messages are compatible with Linux not FreeBSD.
+ */
+
+#include <osv/initialize.hh>
+#include <bsd/porting/netport.h>
+
+#include <bsd/sys/sys/param.h>
+#include <bsd/sys/sys/domain.h>
+#include <bsd/sys/sys/mbuf.h>
+#include <bsd/sys/sys/priv.h>
+#include <bsd/sys/sys/protosw.h>
+#include <bsd/sys/sys/socket.h>
+#include <bsd/sys/sys/socketvar.h>
+#include <bsd/sys/sys/sysctl.h>
+
+#include <bsd/sys/net/if.h>
+#include <bsd/sys/net/if_dl.h>
+#include <bsd/sys/net/if_llatbl.h>
+#include <bsd/sys/net/if_types.h>
+#include <bsd/sys/net/netisr.h>
+#include <bsd/sys/net/raw_cb.h>
+#include <bsd/sys/net/route.h>
+#include <bsd/sys/net/vnet.h>
+
+#include <bsd/sys/netinet/in.h>
+#include <bsd/sys/netinet/if_ether.h>
+#include <bsd/sys/net/if_llatbl.h>
+
+#ifdef INET6
+#include <bsd/sys/netinet/ip6.h>
+#include <bsd/sys/netinet6/ip6_var.h>
+#include <bsd/sys/netinet6/in6_var.h>
+#include <bsd/sys/netinet6/scope6_var.h>
+#include <bsd/sys/netinet6/nd6.h>
+#endif
+
+#include <bsd/sys/compat/linux/linux.h>
+#include <bsd/sys/compat/linux/linux_netlink.h>
+#include <bsd/sys/compat/linux/linux_socket.h>
+
+#if !defined(offsetof)
+#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER)
+#endif
+
+mutex netlink_mtx;
+
+#define NETLINK_LOCK() mutex_lock(&netlink_mtx)
+#define NETLINK_UNLOCK() mutex_unlock(&netlink_mtx)
+#define NETLINK_LOCK_ASSERT() assert(netlink_mtx.owned())
+
+struct bsd_sockaddr_nl {
+ uint8_t nl_len; /* length of this struct */
+ bsd_sa_family_t nl_family; /* AF_NETLINK */
+ unsigned short nl_pad; /* Zero */
+ pid_t nl_pid; /* Port ID */
+ uint32_t nl_groups; /* Multicast groups mask */
+};
+
+MALLOC_DEFINE(M_NETLINK, "netlink", "netlink socket");
+
+static struct bsd_sockaddr netlink_src = { 2, PF_NETLINK, };
+
+
+
+static size_t mask_to_prefix_len(const uint8_t *bytes, size_t n_bytes)
+{
+ for (size_t i=0; i <n_bytes; ++i) {
+ uint8_t val = bytes[n_bytes - i - 1];
+ if (val == 0)
+ continue;
+ /* Find first bit in byte which is set */
+ int bit_pos = __builtin_ffs((long)val) - 1;
+ size_t pos = 8 * (n_bytes - i) - bit_pos;
+ return pos;
+ }
+ return 0;
+}
+
+static int get_sockaddr_mask_prefix_len(struct bsd_sockaddr *sa)
+{
+ void *data;
+ int data_len;
+
+ if (!sa)
+ return 0;
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ data = &((struct bsd_sockaddr_in *)sa)->sin_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in *)sa)->sin_addr);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ data = ((struct bsd_sockaddr_in6 *)sa)->sin6_addr.s6_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in6 *)sa)->sin6_addr);
+ break;
+#endif
+ default:
+ return 0;
+ }
+
+ return mask_to_prefix_len((uint8_t *)data, data_len);
+}
+
+
+void *nl_m_put(struct mbuf *m0, int len)
+{
+ struct mbuf *m, *n;
+ void *data = NULL;
+ int space;
+
+ /* Skip to last buffer in chain */
+ for (m = m0; m->m_hdr.mh_next != NULL; m = m->m_hdr.mh_next)
+ ;
+
+ space = M_TRAILINGSPACE(m);
+ if (len <= space) {
+ /* Add to existing buffer if there is space */
+ data = m->m_hdr.mh_data + m->m_hdr.mh_len;
+ m->m_hdr.mh_len += len;
+ } else {
+ /* Add additional buffer for new message */
+ if (len > MLEN)
+ return NULL;
+ n = m_get(M_NOWAIT, m->m_hdr.mh_type);
+ if (n == NULL)
+ return NULL;
+ data = n->m_hdr.mh_data;
+ n->m_hdr.mh_len = len;
+ m->m_hdr.mh_next = n;
+ m = n;
+ }
+ if (m0->m_hdr.mh_flags & M_PKTHDR) {
+ m0->M_dat.MH.MH_pkthdr.len += len;
+ }
+ return data;
+}
+
+struct nlmsghdr * nlmsg_put(struct mbuf *m, uint32_t pid, uint32_t seq, int
type, int len, int flags)
+{
+ struct nlmsghdr *nlh;
+ int size = nlmsg_msg_size(len);
+ int align_size = NLMSG_ALIGN(size);
+ nlh = (struct nlmsghdr *) nl_m_put(m, align_size);
+ if (!nlh)
+ return NULL;
+ nlh->nlmsg_type = type;
+ nlh->nlmsg_len = size;
+ nlh->nlmsg_flags = flags;
+ nlh->nlmsg_pid = pid;
+ nlh->nlmsg_seq = seq;
+ if (align_size != size) {
+ memset(nlmsg_data(nlh) + len, 0, align_size - size);
+ }
+ return nlh;
+}
+
+struct nlmsghdr * nlmsg_begin(struct mbuf *m, uint32_t pid, uint32_t seq, int
type, int len, int flags)
+{
+ return nlmsg_put(m, pid, seq, type, len, flags);
+}
+
+void nlmsg_end(struct mbuf *m, struct nlmsghdr *nlh)
+{
+ nlh->nlmsg_len = m->M_dat.MH.MH_pkthdr.len - ((uintptr_t)nlh -
(uintptr_t)m->m_hdr.mh_data);
+}
+
+int nla_put(struct mbuf *m, int attrtype, int len, const void *src)
+{
+ struct nlattr *nla;
+ int size = nla_attr_size(len);
+ int align_size = NLA_ALIGN(size);
+ nla = (struct nlattr *)nl_m_put(m, align_size);
+ if (!nla)
+ return ENOMEM;
+ nla->nla_len = size;
+ nla->nla_type = attrtype;
+ void *dest = nla_data(nla);
+ memcpy(dest, src, len);
+ if (size != align_size)
+ memset(dest + size, 0, (align_size - size));
+ return 0;
+}
+
+template<class T>
+int nla_put_type(struct mbuf *m, int attrtype, T val)
+{
+ return nla_put(m, attrtype, sizeof(val), &val);
+}
+
+int nla_put_string(struct mbuf *m, int attrtype, const char *str)
+{
+ return nla_put(m, attrtype, strlen(str) + 1, str);
+}
+
+int nla_put_sockaddr(struct mbuf *m, int attrtype, struct bsd_sockaddr *sa)
+{
+ void *data;
+ int data_len;
+
+ if (!sa)
+ return 0;
+
+ switch (sa->sa_family) {
+#ifdef INET
+ case AF_INET:
+ data = &((struct bsd_sockaddr_in *)sa)->sin_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in *)sa)->sin_addr);
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ data = ((struct bsd_sockaddr_in6 *)sa)->sin6_addr.s6_addr;
+ data_len = sizeof(((struct bsd_sockaddr_in6 *)sa)->sin6_addr);
+ break;
+#endif
+ case AF_LINK:
+ data = ((struct bsd_sockaddr_dl *)sa)->sdl_data + ((struct
bsd_sockaddr_dl *)sa)->sdl_nlen;
+ data_len = ((struct bsd_sockaddr_dl *)sa)->sdl_alen;
+ break;
+ default:
+ data = sa->sa_data;
+ data_len = sa->sa_len;
+ break;
+ }
+
+ return nla_put(m, attrtype, data_len, data);
+}
+
+static int netlink_output(struct mbuf *m, struct socket *so);
+
+
+/* Currently messages are always redirected back to the socket which
+ * sent the message, so an ISR dispatch handler is not needed.
+ *
+ */
+
+static void netlink_input(struct mbuf *m);
+
+static struct netisr_handler netlink_nh = initialize_with([] (netisr_handler&
x) {
+ x.nh_name = "netlink";
+ x.nh_handler = netlink_input;
+ x.nh_proto = NETISR_NETLINK;
+ x.nh_policy = NETISR_POLICY_SOURCE;
+});
+
+static int
+raw_input_netlink_cb(struct mbuf *m, struct sockproto *proto, struct
bsd_sockaddr *src, struct rawcb *rp)
+{
+ int fibnum;
+
+ KASSERT(m != NULL, ("%s: m is NULL", __func__));
+ KASSERT(proto != NULL, ("%s: proto is NULL", __func__));
+ KASSERT(rp != NULL, ("%s: rp is NULL", __func__));
+
+ /* Check if it is a rts and the fib matches the one of the socket. */
+ fibnum = M_GETFIB(m);
+ if (proto->sp_family != PF_NETLINK ||
+ rp->rcb_socket == NULL ||
+ rp->rcb_socket->so_fibnum == fibnum)
+ return (0);
+
+ /* Filtering requested and no match, the socket shall be skipped. */
+ return (1);
+}
+
+static void
+netlink_input(struct mbuf *m)
+{
+ struct sockproto netlink_proto;
+
+ netlink_proto.sp_family = PF_NETLINK;
+
+ raw_input_ext(m, &netlink_proto, &netlink_src, raw_input_netlink_cb);
+}
+
+void
+netlink_init(void)
+{
+ mutex_init(&netlink_mtx);
+ netisr_register(&netlink_nh);
+}
+
+SYSINIT(netlink, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, netlink_init, 0);
+
+/*
+ * It really doesn't make any sense at all for this code to share much
+ * with raw_usrreq.c, since its functionality is so restricted. XXX
+ */
+static void
+netlink_abort(struct socket *so)
+{
+ raw_usrreqs.pru_abort(so);
+}
+
+static void
+netlink_close(struct socket *so)
+{
+ raw_usrreqs.pru_close(so);
+}
+
+/* pru_accept is EOPNOTSUPP */
+
+static int
+netlink_attach(struct socket *so, int proto, struct thread *td)
+{
+ struct rawcb *rp;
+ int s, error;
+
+ KASSERT(so->so_pcb == NULL, ("netlink_attach: so_pcb != NULL"));
+
+ /* XXX */
+ rp = (rawcb *)malloc(sizeof *rp);
+ if (rp == NULL)
+ return ENOBUFS;
+ bzero(rp, sizeof *rp);
+
+ /*
+ * The splnet() is necessary to block protocols from sending
+ * error notifications (like RTM_REDIRECT or RTM_LOSING) while
+ * this PCB is extant but incompletely initialized.
+ * Probably we should try to do more of this work beforehand and
+ * eliminate the spl.
+ */
+ s = splnet();
+ so->so_pcb = (caddr_t)rp;
+ so->set_mutex(&netlink_mtx);
+ so->so_fibnum = 0;
+ error = raw_attach(so, proto);
+ rp = sotorawcb(so);
+ if (error) {
+ splx(s);
+ so->so_pcb = NULL;
+ free(rp);
+ return error;
+ }
+ NETLINK_LOCK();
+ soisconnected(so);
+ NETLINK_UNLOCK();
+ so->so_options |= SO_USELOOPBACK;
+ splx(s);
+ return 0;
+}
+
+static int
+netlink_bind(struct socket *so, struct bsd_sockaddr *nam, struct thread *td)
+{
+ struct rawcb *rp = sotorawcb(so);
+
+ KASSERT(rp != NULL, ("netlink_bind: rp == NULL"));
+
+ if (nam->sa_family == AF_NETLINK) {
+ if (nam->sa_len != sizeof(struct bsd_sockaddr_nl)) {
+ bsd_log(ERR, "%s(%d) %s Invalid sockaddr_nl length %d
expected %d\n",
+ __FILE__, __LINE__, __FUNCTION__, nam->sa_len,
sizeof(struct bsd_sockaddr_nl));
+ return EINVAL;
+ }
+ // TODO: stash the nl_pid somewhere
+ return 0;
+ }
+ return (raw_usrreqs.pru_bind(so, nam, td)); /* xxx just EINVAL */
+}
+
+static int
+netlink_connect(struct socket *so, struct bsd_sockaddr *nam, struct thread *td)
+{
+ return (raw_usrreqs.pru_connect(so, nam, td)); /* XXX just EINVAL */
+}
+
+/* pru_connect2 is EOPNOTSUPP */
+/* pru_control is EOPNOTSUPP */
+
+static void
+netlink_detach(struct socket *so)
+{
+ struct rawcb *rp = sotorawcb(so);
+
+ KASSERT(rp != NULL, ("netlink_detach: rp == NULL"));
+
+ raw_usrreqs.pru_detach(so);
+}
+
+static int
+netlink_disconnect(struct socket *so)
+{
+ return (raw_usrreqs.pru_disconnect(so));
+}
+
+/* pru_listen is EOPNOTSUPP */
+
+static int
+netlink_peeraddr(struct socket *so, struct bsd_sockaddr **nam)
+{
+ return (raw_usrreqs.pru_peeraddr(so, nam));
+}
+
+/* pru_rcvd is EOPNOTSUPP */
+/* pru_rcvoob is EOPNOTSUPP */
+
+static int
+netlink_send(struct socket *so, int flags, struct mbuf *m, struct bsd_sockaddr
*nam,
+ struct mbuf *control, struct thread *td)
+{
+ return (raw_usrreqs.pru_send(so, flags, m, nam, control, td));
+}
+
+/* pru_sense is null */
+
+static int
+netlink_shutdown(struct socket *so)
+{
+ return (raw_usrreqs.pru_shutdown(so));
+}
+
+static int
+netlink_sockaddr(struct socket *so, struct bsd_sockaddr **nam)
+{
+ return (raw_usrreqs.pru_sockaddr(so, nam));
+}
+
+static struct pr_usrreqs netlink_usrreqs = initialize_with([] (pr_usrreqs& x) {
+ x.pru_abort = netlink_abort;
+ x.pru_attach = netlink_attach;
+ x.pru_bind = netlink_bind;
+ x.pru_connect = netlink_connect;
+ x.pru_detach = netlink_detach;
+ x.pru_disconnect = netlink_disconnect;
+ x.pru_peeraddr = netlink_peeraddr;
+ x.pru_send = netlink_send;
+ x.pru_shutdown = netlink_shutdown;
+ x.pru_sockaddr = netlink_sockaddr;
+ x.pru_close = netlink_close;
+});
+
+static void netlink_dispatch(struct socket *so __unused2, struct mbuf *m)
+{
+ netisr_queue(NETISR_NETLINK, m);
+}
+
+static int
+netlink_senderr(struct socket *so, struct nlmsghdr *nlm, int error)
+{
+ struct mbuf *m;
+ struct nlmsghdr *hdr;
+ struct nlmsgerr *err;
+
+ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ if (!m) {
+ return ENOBUFS;
+ }
+
+ if ((hdr = (struct nlmsghdr *)nlmsg_put(m,
+ nlm ? nlm->nlmsg_pid : 0,
+ nlm ? nlm->nlmsg_seq : 0,
+ NLMSG_ERROR, sizeof(*err),
+ nlm ? nlm->nlmsg_flags : 0)) ==
NULL) {
+ m_freem(m);
+ return ENOBUFS;
+ }
+ err = (struct nlmsgerr *) nlmsg_data(hdr);
+ err->error = error;
+ if (nlm) {
+ err->msg = *nlm;
+ } else {
+ memset(&err->msg, 0, sizeof(err->msg));
+ nlm = &err->msg;
+ }
+
+ netlink_dispatch(so, m);
+ return 0;
+}
+
+static int
+netlink_process_getlink_msg(struct socket *so, struct nlmsghdr *nlm)
+{
+ struct ifnet *ifp = NULL;
+ struct bsd_ifaddr *ifa;
+ struct nlmsghdr *nlh;
+ struct ifinfomsg *ifm;
+ struct mbuf *m = NULL;
+ int error = 0;
+
+ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ if (!m) {
+ return ENOBUFS;
+ }
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ IF_ADDR_RLOCK(ifp);
+
+ nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq,
LINUX_RTM_NEWLINK, sizeof(*ifm), nlm->nlmsg_flags);
+ if (!nlh) {
+ error = ENOBUFS;
+ goto done;
+ }
+
+ ifm = (struct ifinfomsg *) nlmsg_data(nlh);
+ ifm->ifi_family = AF_UNSPEC;
+ ifm->__ifi_pad = 0;
+ ifm->ifi_type = ifp->if_data.ifi_type;
+ ifm->ifi_index = ifp->if_index;
+ ifm->ifi_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifi_change = 0;
+ if (nla_put_string(m, IFLA_IFNAME, ifp->if_xname) ||
+ nla_put_type<uint32_t>(m, IFLA_LINK, ifp->if_index)) {
+ error = ENOBUFS;
+ goto done;
+ }
+ /* Add hw address info */
+ for (ifa = ifp->if_addr; ifa != NULL; ifa = TAILQ_NEXT(ifa,
ifa_link)) {
+ if (ifa->ifa_addr->sa_family == AF_LINK)
+ break;
+ }
+ if (ifa) {
+ if (nla_put_sockaddr(m, IFLA_ADDRESS, ifa->ifa_addr) ||
+ nla_put_sockaddr(m, IFLA_BROADCAST,
ifa->ifa_broadaddr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ }
+
+ IF_ADDR_RUNLOCK(ifp);
+ nlmsg_end(m, nlh);
+ }
+ nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0,
nlm->nlmsg_flags);
+
+done:
+ if (ifp != NULL)
+ IF_ADDR_RUNLOCK(ifp);
+ IFNET_RUNLOCK();
+ if (m) {
+ if (!error) {
+ netlink_dispatch(so, m);
+ } else {
+ m_freem(m);
+ }
+ }
+ return (error);
+}
+
+static int
+netlink_process_getaddr_msg(struct socket *so, struct nlmsghdr *nlm)
+{
+ struct ifnet *ifp = NULL;
+ struct bsd_ifaddr *ifa;
+ struct nlmsghdr *nlh;
+ struct ifaddrmsg *ifm;
+ struct mbuf *m = NULL;
+ int error = 0;
+
+ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ if (!m) {
+ return ENOBUFS;
+ }
+
+ IFNET_RLOCK();
+ TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
+ IF_ADDR_RLOCK(ifp);
+ ifa = ifp->if_addr;
+ for (ifa = ifp->if_addr; ifa != NULL; ifa = TAILQ_NEXT(ifa,
ifa_link)) {
+ int af = ifa->ifa_addr->sa_family;
+
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ af = LINUX_AF_INET;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ af = LINUX_AF_INET6;
+ break;
+#endif
+ default:
+ af = -1;
+ }
+ if (af < 0)
+ continue;
+
+ if (!ifa->ifa_addr)
+ continue;
+
+ nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq,
LINUX_RTM_GETADDR, sizeof(*ifm), nlm->nlmsg_flags);
+ if (!nlh) {
+ error = ENOBUFS;
+ goto done;
+ }
+ ifm = (struct ifaddrmsg *) nlmsg_data(nlh);
+ ifm->ifa_index = ifp->if_index;
+ ifm->ifa_family = af;
+ ifm->ifa_prefixlen =
get_sockaddr_mask_prefix_len(ifa->ifa_netmask);
+ ifm->ifa_flags = ifp->if_flags | ifp->if_drv_flags;
+ ifm->ifa_scope = 0; // FIXME:
+ if (nla_put_string(m, IFA_LABEL, ifp->if_xname)) {
+ error = ENOBUFS;
+ goto done;
+ }
+#ifdef INET6
+ if (ifa->ifa_addr && ifa->ifa_addr->sa_family ==
AF_INET6){
+ // FreeBSD embeds the IPv6 scope ID in the IPv6
address
+ // so need to extract and clear it before
returning it.
+ struct bsd_sockaddr_in6 addr, broadaddr;
+ struct bsd_sockaddr *p_addr = ifa->ifa_addr,
*p_broadaddr = ifa->ifa_broadaddr;
+ if (p_addr && IN6_IS_ADDR_LINKLOCAL(&((struct
bsd_sockaddr_in6 *)p_addr)->sin6_addr)){
+ addr = *(struct bsd_sockaddr_in6
*)p_addr;
+ ifm->ifa_scope =
in6_getscope(&addr.sin6_addr);
+ in6_clearscope(&addr.sin6_addr);
+ p_addr = (struct bsd_sockaddr *)&addr;
+ }
+ if (p_broadaddr &&
IN6_IS_ADDR_LINKLOCAL(&((struct bsd_sockaddr_in6 *)p_broadaddr)->sin6_addr)){
+ broadaddr = *(struct bsd_sockaddr_in6
*)p_broadaddr;
+ in6_clearscope(&broadaddr.sin6_addr);
+ p_broadaddr = (struct bsd_sockaddr
*)&broadaddr;
+ }
+ if (nla_put_sockaddr(m, IFA_ADDRESS, p_addr) ||
+ nla_put_sockaddr(m, IFA_BROADCAST,
p_broadaddr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ }
+ else
+#endif
+ {
+ if (nla_put_sockaddr(m, IFA_ADDRESS,
ifa->ifa_addr) ||
+ nla_put_sockaddr(m, IFA_BROADCAST,
ifa->ifa_broadaddr)){
+ error = ENOBUFS;
+ goto done;
+ }
+ }
+ nlmsg_end(m, nlh);
+ }
+
+ IF_ADDR_RUNLOCK(ifp);
+ }
+ nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE, 0,
nlm->nlmsg_flags);
+done:
+ if (ifp != NULL)
+ IF_ADDR_RUNLOCK(ifp);
+ IFNET_RUNLOCK();
+ if (m) {
+ if (!error) {
+ netlink_dispatch(so, m);
+ } else {
+ m_freem(m);
+ }
+ }
+ return (error);
+}
+
+static uint16_t lle_state_to_ndm_state(int family, int state)
+{
+#ifdef INET6
+ if (family == AF_INET6) {
+ switch(state) {
+ case ND6_LLINFO_INCOMPLETE:
+ return NUD_INCOMPLETE;
+ case ND6_LLINFO_REACHABLE:
+ return NUD_REACHABLE;
+ case ND6_LLINFO_STALE:
+ return NUD_STALE;
+ case ND6_LLINFO_DELAY:
+ return NUD_DELAY;
+ case ND6_LLINFO_PROBE:
+ return NUD_PROBE;
+ case ND6_LLINFO_NOSTATE:
+ default:
+ return 0;
+ }
+ }
+#endif
+ if (family == AF_INET) {
+ return NUD_REACHABLE;
+ }
+
+ return 0;
+}
+
+static int netlink_bsd_to_linux_family(int family)
+{
+ switch(family) {
+ case AF_INET:
+ return LINUX_AF_INET;
+#ifdef INET6
+ case AF_INET6:
+ return LINUX_AF_INET6;
+#endif
+ default:
+ return -1;
+ }
+}
+
+struct netlink_getneigh_lle_cbdata {
+ struct nlmsghdr *nlm;
+ struct mbuf *m;
+ uint16_t family;
+ uint16_t state;
+};
+
+static int
+netlink_getneigh_lle_cb(struct lltable *llt, struct llentry *lle, void *data)
+{
+ struct netlink_getneigh_lle_cbdata *cbdata = (struct
netlink_getneigh_lle_cbdata *) data;
+ int ndm_family = netlink_bsd_to_linux_family(llt->llt_af);
+ int ndm_state = lle_state_to_ndm_state(llt->llt_af, lle->ln_state);
+
+ if (cbdata->family && cbdata->family != ndm_family)
+ return 0;
+
+ if (cbdata->state && !(cbdata->state & ndm_state))
+ return 0;
+
+ struct nlmsghdr *nlm = cbdata->nlm;
+ struct mbuf *m = cbdata->m;
+ struct ndmsg *ndm;
+ struct nlmsghdr *nlh = nlmsg_begin(m, nlm->nlmsg_pid, nlm->nlmsg_seq,
LINUX_RTM_GETNEIGH, sizeof(*ndm), nlm->nlmsg_flags);
+
+ if (!nlh) {
+ return ENOBUFS;
+ }
+
+ ndm = (struct ndmsg *) nlmsg_data(nlh);
+ ndm->ndm_family = ndm_family;
+ ndm->ndm_ifindex = llt->llt_ifp->if_index;
+ ndm->ndm_state = ndm_state;
+ ndm->ndm_flags = 0;
+ if (lle->ln_router)
+ ndm->ndm_flags |= NTF_ROUTER;
+ ndm->ndm_type = 0;
+
+ struct bsd_sockaddr *sa = L3_ADDR(lle);
+ if (sa->sa_family == AF_INET) {
+ struct bsd_sockaddr_in *sa4 = (struct bsd_sockaddr_in *) sa;
+ if (nla_put_type(m, NDA_DST, sa4->sin_addr)) {
+ return ENOBUFS;
+ }
+ }
+#ifdef INET6
+ else if (sa->sa_family == AF_INET6) {
+ struct bsd_sockaddr_in6 sa6 = *(struct bsd_sockaddr_in6 *) sa;
+ if (IN6_IS_ADDR_LINKLOCAL(&sa6.sin6_addr)){
+ in6_clearscope(&sa6.sin6_addr);
+ }
+ if (nla_put_type(m, NDA_DST, sa6.sin6_addr)) {
+ return ENOBUFS;
+ }
+ }
+#endif
+
+ if (nla_put(m, NDA_LLADDR, 6, lle->ll_addr.mac16)) {
+ return ENOBUFS;
+ }
+
+ nlmsg_end(m, nlh);
+
+ return 0;
+}
+
+
+static int
+netlink_getneigh_lltable_cb(struct lltable *llt, void *cbdata)
+{
+ struct netlink_getneigh_lle_cbdata *data = (struct
netlink_getneigh_lle_cbdata *) cbdata;
+ int error = 0;
+
+ if (data->family && data->family !=
netlink_bsd_to_linux_family(llt->llt_af))
+ return 0;
+ if (llt->llt_ifp->if_flags & IFF_LOOPBACK)
+ return 0;
+
+ IF_AFDATA_RLOCK(llt->llt_ifp);
+ error = lltable_foreach_lle(llt, netlink_getneigh_lle_cb, data);
+ IF_AFDATA_RUNLOCK(llt->llt_ifp);
+
+ return error;
+}
+
+static int
+netlink_process_getneigh_msg(struct socket *so, struct nlmsghdr *nlm)
+{
+ struct mbuf *m = NULL;
+ struct nlmsghdr *nlh;
+ struct netlink_getneigh_lle_cbdata cbdata;
+ int error;
+
+ if (nlm->nlmsg_len < sizeof (struct ndmsg)) {
+ return EINVAL;
+ }
+
+ m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR);
+ if (!m) {
+ return ENOBUFS;
+ }
+
+ struct ndmsg *ndm = (struct ndmsg *) nlmsg_data(nlm);
+
+ cbdata.nlm = nlm;
+ cbdata.m = m;
+ cbdata.family = ndm->ndm_family;
+ cbdata.state = ndm->ndm_state;
+
+ error = lltable_foreach(netlink_getneigh_lltable_cb, &cbdata);
+
+ if (!error) {
+ nlh = nlmsg_put(m, nlm->nlmsg_pid, nlm->nlmsg_seq, NLMSG_DONE,
0, nlm->nlmsg_flags);
+ netlink_dispatch(so, m);
+ } else {
+ m_free(m);
+ }
+
+ return error;
+}
+
+static int
+netlink_process_msg(struct mbuf *m, struct socket *so)
+{
+ struct nlmsghdr *nlm = NULL;
+ int len, error = 0;
+
+#define senderr(e) { error = e; goto flush;}
+ if (m == NULL || (m->m_hdr.mh_flags & M_PKTHDR) == 0)
+ panic("Invalid message");
+ len = m->M_dat.MH.MH_pkthdr.len;
+ if (len < sizeof(struct nlmsghdr))
+ senderr(EINVAL);
+ if ((m = m_pullup(m, len)) == NULL)
+ senderr(ENOBUFS);
+ if (len != mtod(m, struct nlmsghdr *)->nlmsg_len)
+ senderr(EINVAL);
+ nlm = mtod(m, struct nlmsghdr *);
+
+ switch(nlm->nlmsg_type) {
+ case LINUX_RTM_GETLINK:
+ error = netlink_process_getlink_msg(so, nlm);
+ break;
+ case LINUX_RTM_GETADDR:
+ error = netlink_process_getaddr_msg(so, nlm);
+ break;
+ case LINUX_RTM_GETNEIGH:
+ error = netlink_process_getneigh_msg(so, nlm);
+ break;
+ default:
+ senderr(EOPNOTSUPP);
+ }
+
+flush:
+ if (error) {
+ netlink_senderr(so, nlm, error);
+ }
+ if (m) {
+ m_freem(m);
+ }
+
+ return (error);
+}
+
+static int
+netlink_output(struct mbuf *m, struct socket *so)
+{
+ return netlink_process_msg(m, so);
+}
+
+/*
+ * Definitions of protocols supported in the NETLINK domain.
+ */
+
+extern struct domain netlinkdomain; /* or at least forward */
+
+static struct protosw netlinksw[] = {
+ initialize_with([] (protosw& x) {
+ x.pr_type = SOCK_RAW;
+ x.pr_domain = &netlinkdomain;
+ x.pr_flags = PR_ATOMIC|PR_ADDR;
+ x.pr_output = netlink_output;
+ x.pr_ctlinput = raw_ctlinput;
+ x.pr_init = raw_init;
+ x.pr_usrreqs = &netlink_usrreqs;
+ }),
+ initialize_with([] (protosw& x) {
+ x.pr_type = SOCK_DGRAM;
+ x.pr_domain = &netlinkdomain;
+ x.pr_flags = PR_ATOMIC|PR_ADDR;
+ x.pr_output = netlink_output;
+ x.pr_ctlinput = raw_ctlinput;
+ x.pr_init = raw_init;
+ x.pr_usrreqs = &netlink_usrreqs;
+ }),
+};
+
+struct domain netlinkdomain = initialize_with([] (domain& x) {
+ x.dom_family = PF_NETLINK;
+ x.dom_name = "netlink";
+ x.dom_protosw = netlinksw;
+ x.dom_protoswNPROTOSW =
&netlinksw[sizeof(netlinksw)/sizeof(netlinksw[0])];
+});
+
+VNET_DOMAIN_SET(netlink);
diff --git a/bsd/sys/compat/linux/linux_netlink.h
b/bsd/sys/compat/linux/linux_netlink.h
new file mode 100644
index 0000000..70a2590
--- /dev/null
+++ b/bsd/sys/compat/linux/linux_netlink.h
@@ -0,0 +1,175 @@
+#ifndef _NETLINK_H_
+#define _NETLINK_H_
+
+#include <sys/cdefs.h>
+
+struct nlmsghdr {
+ uint32_t nlmsg_len; /* Length of message including header */
+ uint16_t nlmsg_type; /* Type of message content */
+ uint16_t nlmsg_flags; /* Additional flags */
+ uint32_t nlmsg_seq; /* Sequence number */
+ uint32_t nlmsg_pid; /* Sender port ID */
+};
+
+struct nlmsgerr {
+ int error; /* Negative errno or 0 for ack */
+ struct nlmsghdr msg; /* Message that caused the error */
+};
+
+
+#define NLMSG_ALIGNTO 4U
+#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
+#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
+#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
+#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
+#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
+ (struct nlmsghdr*)(((char*)(nlh)) +
NLMSG_ALIGN((nlh)->nlmsg_len)))
+#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len <= (len))
+#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
+
+#define NLMSG_NOOP 0x1 /* Nothing. */
+#define NLMSG_ERROR 0x2 /* Error */
+#define NLMSG_DONE 0x3 /* End of a dump */
+#define NLMSG_OVERRUN 0x4 /* Data lost */
+
+
+static inline int nlmsg_msg_size(int payload) {
+ return NLMSG_HDRLEN + payload;
+}
+
+static inline void *nlmsg_data(const struct nlmsghdr *nlh) {
+ return (unsigned char *) nlh + NLMSG_HDRLEN;
+}
+
+
+struct nlattr {
+ uint16_t nla_len;
+ uint16_t nla_type;
+};
+
+
+#define NLA_F_NESTED (1 << 15)
+#define NLA_F_NET_BYTEORDER (1 << 14)
+#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+
+#define NLA_ALIGNTO 4
+#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr)))
+
+static inline int nla_attr_size(int payload)
+{
+ return NLA_HDRLEN + payload;
+}
+
+static inline int nla_total_size(int payload)
+{
+ return NLA_ALIGN(nla_attr_size(payload));
+}
+
+static inline void *nla_data(const struct nlattr *nla)
+{
+ return (char *) nla + NLA_HDRLEN;
+}
+
+#define LINUX_RTM_NEWLINK 16
+#define LINUX_RTM_DELLINK 17
+#define LINUX_RTM_GETLINK 18
+#define LINUX_RTM_SETLINK 19
+#define LINUX_RTM_NEWADDR 20
+#define LINUX_RTM_DELADDR 21
+#define LINUX_RTM_GETADDR 22
+#define LINUX_RTM_NEWNEIGH 28
+#define LINUX_RTM_DELNEIGH 29
+#define LINUX_RTM_GETNEIGH 30
+
+struct ifinfomsg {
+ unsigned char ifi_family;
+ unsigned char __ifi_pad;
+ unsigned short ifi_type; /* ARPHRD_* */
+ int ifi_index; /* Link index */
+ unsigned ifi_flags; /* IFF_* flags */
+ unsigned ifi_change; /* IFF_* change mask */
+};
+
+#define IFLA_UNSPEC 0
+#define IFLA_ADDRESS 1
+#define IFLA_BROADCAST 2
+#define IFLA_IFNAME 3
+#define IFLA_MTU 4
+#define IFLA_LINK 5
+
+struct ifaddrmsg {
+ uint8_t ifa_family;
+ uint8_t ifa_prefixlen; /* The prefix length */
+ uint8_t ifa_flags; /* Flags */
+ uint8_t ifa_scope; /* Address scope */
+ uint32_t ifa_index; /* Link index */
+};
+
+#define IFA_UNSPEC 0
+#define IFA_ADDRESS 1
+#define IFA_LOCAL 2
+#define IFA_LABEL 3
+#define IFA_BROADCAST 4
+#define IFA_ANYCAST 5
+#define IFA_CACHEINFO 6
+#define IFA_MULTICAST 7
+#define IFA_FLAGS 8
+
+/* ifa_flags */
+#define IFA_F_SECONDARY 0x01
+#define IFA_F_TEMPORARY IFA_F_SECONDARY
+#define IFA_F_NODAD 0x02
+#define IFA_F_OPTIMISTIC 0x04
+#define IFA_F_DADFAILED 0x08
+#define IFA_F_HOMEADDRESS 0x10
+#define IFA_F_DEPRECATED 0x20
+#define IFA_F_TENTATIVE 0x40
+#define IFA_F_PERMANENT 0x80
+#define IFA_F_MANAGETEMPADDR 0x100
+#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
+
+struct ndmsg {
+ uint8_t ndm_family;
+ uint8_t ndm_pad1;
+ uint16_t ndm_pad2;
+ int32_t ndm_ifindex;
+ uint16_t ndm_state;
+ uint8_t ndm_flags;
+ uint8_t ndm_type;
+};
+
+#define NDA_UNSPEC 0x0
+#define NDA_DST 0x01
+#define NDA_LLADDR 0x02
+#define NDA_CACHEINFO 0x03
+
+#define NTF_USE 0x01
+#define NTF_SELF 0x02
+#define NTF_MASTER 0x04
+#define NTF_PROXY 0x08
+#define NTF_EXT_LEARNED 0x10
+#define NTF_OFFLOADED 0x20
+#define NTF_ROUTER 0x80
+
+#define NUD_INCOMPLETE 0x01
+#define NUD_REACHABLE 0x02
+#define NUD_STALE 0x04
+#define NUD_DELAY 0x08
+#define NUD_PROBE 0x10
+#define NUD_FAILED 0x20
+
+/* Domain ID for supporting NETLINK socket on FreeBSD (actually 16 on Linux) */
+#define AF_NETLINK AF_VENDOR00
+#define PF_NETLINK AF_NETLINK
+
+__BEGIN_DECLS
+void netlink_init(void);
+__END_DECLS
+
+#endif /* _NETLINK_H_ */
diff --git a/bsd/sys/compat/linux/linux_socket.cc
b/bsd/sys/compat/linux/linux_socket.cc
index c1bbc49..62bc709 100644
--- a/bsd/sys/compat/linux/linux_socket.cc
+++ b/bsd/sys/compat/linux/linux_socket.cc
@@ -256,6 +256,8 @@ linux_to_bsd_domain(int domain)
return (AF_IPX);
case LINUX_AF_APPLETALK:
return (AF_APPLETALK);
+ case LINUX_AF_NETLINK:
+ return (AF_NETLINK);
}
return (-1);
}
@@ -279,6 +281,8 @@ bsd_to_linux_domain(int domain)
return (LINUX_AF_IPX);
case AF_APPLETALK:
return (LINUX_AF_APPLETALK);
+ case AF_NETLINK:
+ return (LINUX_AF_NETLINK);
}
return (-1);
}
diff --git a/bsd/sys/compat/linux/linux_socket.h
b/bsd/sys/compat/linux/linux_socket.h
index 6afa53e..ff14200 100644
--- a/bsd/sys/compat/linux/linux_socket.h
+++ b/bsd/sys/compat/linux/linux_socket.h
@@ -92,6 +92,7 @@
#define LINUX_AF_IPX 4
#define LINUX_AF_APPLETALK 5
#define LINUX_AF_INET6 10
+#define LINUX_AF_NETLINK 16
/* Supported socket types */
diff --git a/bsd/sys/net/netisr.h b/bsd/sys/net/netisr.h
index 7dc4ab1..932e470 100644
--- a/bsd/sys/net/netisr.h
+++ b/bsd/sys/net/netisr.h
@@ -61,6 +61,7 @@
#define NETISR_IPV6 10
#define NETISR_NATM 11
#define NETISR_EPAIR 12 /* if_epair(4) */
+#define NETISR_NETLINK 13 /* Linux NETLINK */
/*
* Protocol ordering and affinity policy constants. See the detailed
--
2.7.4
--
You received this message because you are subscribed to the Google Groups "OSv
Development" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.