Author: melifaro
Date: Sun Apr 27 17:41:18 2014
New Revision: 265019
URL: http://svnweb.freebsd.org/changeset/base/265019

Log:
  Improve memory allocation model for rt_msg2() rtsock messages:
   * memory is now allocated as early as possible, without holding locks.
   * sysctl users are now guaranteed to get a response (M_WAITOK buffer 
prealloc).
   * socket users are more likely to use on-stack buffer for replies.
   * standard kernel malloc/free functions are now used instead of radix 
wrappers.
  rt_msg2() has been renamed to rtsock_msg_buffer().
  
  MFC after:    1 month

Modified:
  head/sys/net/rtsock.c

Modified: head/sys/net/rtsock.c
==============================================================================
--- head/sys/net/rtsock.c       Sun Apr 27 16:40:40 2014        (r265018)
+++ head/sys/net/rtsock.c       Sun Apr 27 17:41:18 2014        (r265019)
@@ -152,8 +152,8 @@ struct walkarg {
 
 static void    rts_input(struct mbuf *m);
 static struct mbuf *rt_msg1(int type, struct rt_addrinfo *rtinfo);
-static int     rt_msg2(int type, struct rt_addrinfo *rtinfo,
-                       caddr_t cp, struct walkarg *w);
+static int     rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo,
+                       struct walkarg *w, int *plen);
 static int     rt_xaddrs(caddr_t cp, caddr_t cplim,
                        struct rt_addrinfo *rtinfo);
 static int     sysctl_dumpentry(struct radix_node *rn, void *vw);
@@ -526,11 +526,13 @@ route_output(struct mbuf *m, struct sock
        struct sockaddr_in6 *sin6;
        int i, rti_need_deembed = 0;
 #endif
-       int len, error = 0, fibnum;
+       int alloc_len = 0, len, error = 0, fibnum;
        struct ifnet *ifp = NULL;
        union sockaddr_union saun;
        sa_family_t saf = AF_UNSPEC;
        struct rawcb *rp = NULL;
+       struct walkarg w;
+       char msgbuf[512];
 
        fibnum = so->so_fibnum;
 
@@ -545,15 +547,31 @@ route_output(struct mbuf *m, struct sock
            len != mtod(m, struct rt_msghdr *)->rtm_msglen)
                senderr(EINVAL);
 
-       R_Malloc(rtm, struct rt_msghdr *, len);
-       if (rtm == NULL)
-               senderr(ENOBUFS);
+       /*
+        * Most of current messages are in range 200-240 bytes,
+        * minimize possible failures by using on-stack buffer
+        * which should fit for most messages.
+        * However, use stable memory if we need to handle
+        * something large.
+        */
+       if (len < sizeof(msgbuf)) {
+               alloc_len = sizeof(msgbuf);
+               rtm = (struct rt_msghdr *)msgbuf;
+       } else {
+               alloc_len = roundup2(len, 1024);
+               rtm = malloc(alloc_len, M_TEMP, M_NOWAIT);
+               if (rtm == NULL)
+                       senderr(ENOBUFS);
+       }
+
        m_copydata(m, 0, len, (caddr_t)rtm);
        bzero(&info, sizeof(info));
+       bzero(&w, sizeof(w));
 
        if (rtm->rtm_version != RTM_VERSION) {
                /* Do not touch message since format is unknown */
-               Free(rtm);
+               if ((char *)rtm != msgbuf)
+                       free(rtm, M_TEMP);
                rtm = NULL;
                senderr(EPROTONOSUPPORT);
        }
@@ -798,18 +816,26 @@ report:
                } else if ((ifp = rt->rt_ifp) != NULL) {
                        rtm->rtm_index = ifp->if_index;
                }
-               len = rt_msg2(rtm->rtm_type, &info, NULL, NULL);
-               if (len > rtm->rtm_msglen) {
+
+               /* Check if we need to realloc storage */
+               rtsock_msg_buffer(rtm->rtm_type, &info, NULL, &len);
+               if (len > alloc_len) {
                        struct rt_msghdr *new_rtm;
-                       R_Malloc(new_rtm, struct rt_msghdr *, len);
+                       new_rtm = malloc(len, M_TEMP, M_NOWAIT);
                        if (new_rtm == NULL) {
                                RT_UNLOCK(rt);
                                senderr(ENOBUFS);
                        }
                        bcopy(rtm, new_rtm, rtm->rtm_msglen);
-                       Free(rtm); rtm = new_rtm;
+                       free(rtm, M_TEMP);
+                       rtm = new_rtm;
+                       alloc_len = len;
                }
-               (void)rt_msg2(rtm->rtm_type, &info, (caddr_t)rtm, NULL);
+
+               w.w_tmem = (caddr_t)rtm;
+               w.w_tmemsize = alloc_len;
+               rtsock_msg_buffer(rtm->rtm_type, &info, &w, &len);
+
                if (rt->rt_flags & RTF_GWFLAG_COMPAT)
                        rtm->rtm_flags = RTF_GATEWAY | 
                                (rt->rt_flags & ~RTF_GWFLAG_COMPAT);
@@ -833,8 +859,8 @@ flush:
         */
        if ((so->so_options & SO_USELOOPBACK) == 0) {
                if (V_route_cb.any_count <= 1) {
-                       if (rtm != NULL)
-                               Free(rtm);
+                       if (rtm != NULL && (char *)rtm != msgbuf)
+                               free(rtm, M_TEMP);
                        m_freem(m);
                        return (error);
                }
@@ -870,7 +896,9 @@ flush:
                        m = NULL;
                } else if (m->m_pkthdr.len > rtm->rtm_msglen)
                        m_adj(m, rtm->rtm_msglen - m->m_pkthdr.len);
-               Free(rtm);
+
+               if ((char *)rtm != msgbuf)
+                       free(rtm, M_TEMP);
        }
        if (m != NULL) {
                M_SETFIB(m, fibnum);
@@ -1041,21 +1069,26 @@ rt_msg1(int type, struct rt_addrinfo *rt
 }
 
 /*
- * Used by the sysctl code and routing socket.
+ * Writes information related to @rtinfo object to preallocated buffer.
+ * Stores needed size in @plen. If @w is NULL, calculates size without
+ * writing.
+ * Used for sysctl dumps and rtsock answers (RTM_DEL/RTM_GET) generation.
+ *
+ * Returns 0 on success.
+ *
  */
 static int
-rt_msg2(int type, struct rt_addrinfo *rtinfo, caddr_t cp, struct walkarg *w)
+rtsock_msg_buffer(int type, struct rt_addrinfo *rtinfo, struct walkarg *w, int 
*plen)
 {
        int i;
-       int len, dlen, second_time = 0;
-       caddr_t cp0;
+       int len, buflen = 0, dlen;
+       caddr_t cp;
+       struct rt_msghdr *rtm = NULL;
 #ifdef INET6
        struct sockaddr_storage ss;
        struct sockaddr_in6 *sin6;
 #endif
 
-       rtinfo->rti_addrs = 0;
-again:
        switch (type) {
 
        case RTM_DELADDR:
@@ -1094,9 +1127,14 @@ again:
        default:
                len = sizeof(struct rt_msghdr);
        }
-       cp0 = cp;
-       if (cp0)
-               cp += len;
+
+       if (w != NULL) {
+               rtm = (struct rt_msghdr *)w->w_tmem;
+               buflen = w->w_tmemsize - len;
+               cp = (caddr_t)w->w_tmem + len;
+       }
+
+       rtinfo->rti_addrs = 0;
        for (i = 0; i < RTAX_MAX; i++) {
                struct sockaddr *sa;
 
@@ -1104,7 +1142,7 @@ again:
                        continue;
                rtinfo->rti_addrs |= (1 << i);
                dlen = SA_SIZE(sa);
-               if (cp) {
+               if (cp != NULL && buflen >= dlen) {
 #ifdef INET6
                        if (V_deembed_scopeid && sa->sa_family == AF_INET6) {
                                sin6 = (struct sockaddr_in6 *)&ss;
@@ -1115,37 +1153,40 @@ again:
 #endif
                        bcopy((caddr_t)sa, cp, (unsigned)dlen);
                        cp += dlen;
+                       buflen -= dlen;
+               } else if (cp != NULL) {
+                       /*
+                        * Buffer too small. Count needed size
+                        * and return with error.
+                        */
+                       cp = NULL;
                }
+
                len += dlen;
        }
-       len = ALIGN(len);
-       if (cp == NULL && w != NULL && !second_time) {
-               struct walkarg *rw = w;
 
-               if (rw->w_req) {
-                       if (rw->w_tmemsize < len) {
-                               if (rw->w_tmem)
-                                       free(rw->w_tmem, M_RTABLE);
-                               rw->w_tmem = (caddr_t)
-                                       malloc(len, M_RTABLE, M_NOWAIT);
-                               if (rw->w_tmem)
-                                       rw->w_tmemsize = len;
-                       }
-                       if (rw->w_tmem) {
-                               cp = rw->w_tmem;
-                               second_time = 1;
-                               goto again;
-                       }
-               }
+       if (cp != NULL) {
+               dlen = ALIGN(len) - len;
+               if (buflen < dlen)
+                       cp = NULL;
+               else
+                       buflen -= dlen;
        }
-       if (cp) {
-               struct rt_msghdr *rtm = (struct rt_msghdr *)cp0;
+       len = ALIGN(len);
 
+       if (cp != NULL) {
+               /* fill header iff buffer is large enough */
                rtm->rtm_version = RTM_VERSION;
                rtm->rtm_type = type;
                rtm->rtm_msglen = len;
        }
-       return (len);
+
+       *plen = len;
+
+       if (w != NULL && cp == NULL)
+               return (ENOBUFS);
+
+       return (0);
 }
 
 /*
@@ -1473,7 +1514,8 @@ sysctl_dumpentry(struct radix_node *rn, 
                if (rt->rt_ifp->if_flags & IFF_POINTOPOINT)
                        info.rti_info[RTAX_BRD] = rt->rt_ifa->ifa_dstaddr;
        }
-       size = rt_msg2(RTM_GET, &info, NULL, w);
+       if ((error = rtsock_msg_buffer(RTM_GET, &info, w, &size)) != 0)
+               return (error);
        if (w->w_req && w->w_tmem) {
                struct rt_msghdr *rtm = (struct rt_msghdr *)w->w_tmem;
 
@@ -1649,7 +1691,9 @@ sysctl_iflist(int af, struct walkarg *w)
                IF_ADDR_RLOCK(ifp);
                ifa = ifp->if_addr;
                info.rti_info[RTAX_IFP] = ifa->ifa_addr;
-               len = rt_msg2(RTM_IFINFO, &info, NULL, w);
+               error = rtsock_msg_buffer(RTM_IFINFO, &info, w, &len);
+               if (error != 0)
+                       goto done;
                info.rti_info[RTAX_IFP] = NULL;
                if (w->w_req && w->w_tmem) {
                        if (w->w_op == NET_RT_IFLISTL)
@@ -1668,7 +1712,9 @@ sysctl_iflist(int af, struct walkarg *w)
                        info.rti_info[RTAX_IFA] = ifa->ifa_addr;
                        info.rti_info[RTAX_NETMASK] = ifa->ifa_netmask;
                        info.rti_info[RTAX_BRD] = ifa->ifa_dstaddr;
-                       len = rt_msg2(RTM_NEWADDR, &info, NULL, w);
+                       error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
+                       if (error != 0)
+                               goto done;
                        if (w->w_req && w->w_tmem) {
                                if (w->w_op == NET_RT_IFLISTL)
                                        error = sysctl_iflist_ifaml(ifa, &info,
@@ -1718,7 +1764,9 @@ sysctl_ifmalist(int af, struct walkarg *
                        info.rti_info[RTAX_GATEWAY] =
                            (ifma->ifma_addr->sa_family != AF_LINK) ?
                            ifma->ifma_lladdr : NULL;
-                       len = rt_msg2(RTM_NEWMADDR, &info, NULL, w);
+                       error = rtsock_msg_buffer(RTM_NEWADDR, &info, w, &len);
+                       if (error != 0)
+                               goto done;
                        if (w->w_req && w->w_tmem) {
                                struct ifma_msghdr *ifmam;
 
@@ -1778,6 +1826,14 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
        error = sysctl_wire_old_buffer(req, 0);
        if (error)
                return (error);
+       
+       /*
+        * Allocate reply buffer in advance.
+        * All rtsock messages has maximum length of u_short.
+        */
+       w.w_tmemsize = 65536;
+       w.w_tmem = malloc(w.w_tmemsize, M_TEMP, M_WAITOK);
+
        switch (w.w_op) {
 
        case NET_RT_DUMP:
@@ -1824,8 +1880,8 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
                error = sysctl_ifmalist(af, &w);
                break;
        }
-       if (w.w_tmem)
-               free(w.w_tmem, M_RTABLE);
+
+       free(w.w_tmem, M_TEMP);
        return (error);
 }
 
_______________________________________________
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to