Author: smh
Date: Mon Apr 24 16:31:28 2017
New Revision: 317375
URL: https://svnweb.freebsd.org/changeset/base/317375

Log:
  Partial MFC r316676 and the required r313045
  
  MFC r316676:
  
  Use estimated RTT for receive buffer auto resizing instead of timestamps.
  This is a partial MFC as stable/10 doesn't include the TCP stack
  modularisation.
  
  MFC r313045:
  
  Add an mbuf to ipinfo_t translator to finish cleanup of mbuf passing to TCP
  probes. This is a partial MFC (missing debug__output & debug__drop changes)
  due to the massive amount of additional dtrace changes that would be
  required for a full MFC.
  
  Relnotes:     Yes
  Sponsored by: Multiplay

Modified:
  stable/10/cddl/lib/libdtrace/ip.d
  stable/10/sys/netinet/in_kdtrace.c
  stable/10/sys/netinet/in_kdtrace.h
  stable/10/sys/netinet/tcp_input.c
  stable/10/sys/netinet/tcp_output.c
  stable/10/sys/netinet/tcp_var.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/cddl/lib/libdtrace/ip.d
==============================================================================
--- stable/10/cddl/lib/libdtrace/ip.d   Mon Apr 24 16:07:30 2017        
(r317374)
+++ stable/10/cddl/lib/libdtrace/ip.d   Mon Apr 24 16:31:28 2017        
(r317375)
@@ -240,6 +240,24 @@ translator ipinfo_t < uint8_t *p > {
 #pragma D binding "1.0" IFF_LOOPBACK
 inline int IFF_LOOPBACK =      0x8;
 
+#pragma D binding "1.13" translator
+translator ipinfo_t < struct mbuf *m > {
+       ip_ver =        m == NULL ? 0 : ((struct ip *)m->m_data)->ip_v;
+       ip_plength =    m == NULL ? 0 :
+           ((struct ip *)m->m_data)->ip_v == 4 ?
+           ntohs(((struct ip *)m->m_data)->ip_len) - 
+                       (((struct ip *)m->m_data)->ip_hl << 2):
+           ntohs(((struct ip6_hdr 
*)m->m_data)->ip6_ctlun.ip6_un1.ip6_un1_plen);
+       ip_saddr =      m == NULL ? 0 :
+           ((struct ip *)m->m_data)->ip_v == 4 ?
+           inet_ntoa(&((struct ip *)m->m_data)->ip_src.s_addr) :
+           inet_ntoa6(&((struct ip6_hdr *)m->m_data)->ip6_src);
+       ip_daddr =      m == NULL ? 0 :
+           ((struct ip *)m->m_data)->ip_v == 4 ?
+           inet_ntoa(&((struct ip *)m->m_data)->ip_dst.s_addr) :
+           inet_ntoa6(&((struct ip6_hdr *)m->m_data)->ip6_dst);
+};
+
 #pragma D binding "1.0" translator
 translator ifinfo_t < struct ifnet *p > {
        if_name =       p->if_xname;

Modified: stable/10/sys/netinet/in_kdtrace.c
==============================================================================
--- stable/10/sys/netinet/in_kdtrace.c  Mon Apr 24 16:07:30 2017        
(r317374)
+++ stable/10/sys/netinet/in_kdtrace.c  Mon Apr 24 16:31:28 2017        
(r317375)
@@ -58,28 +58,28 @@ SDT_PROBE_DEFINE6_XLATE(ip, , , send,
 SDT_PROBE_DEFINE5_XLATE(tcp, , , accept__established,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
-    "uint8_t *", "ipinfo_t *",
+    "struct mbuf *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfoh_t *");
 
 SDT_PROBE_DEFINE5_XLATE(tcp, , , accept__refused,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
-    "uint8_t *", "ipinfo_t *",
+    "struct mbuf *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfo_t *");
 
 SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__established,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
-    "uint8_t *", "ipinfo_t *",
+    "struct mbuf *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfoh_t *");
 
 SDT_PROBE_DEFINE5_XLATE(tcp, , , connect__refused,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
-    "uint8_t *", "ipinfo_t *",
+    "struct mbuf *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfoh_t *");
 
@@ -93,7 +93,7 @@ SDT_PROBE_DEFINE5_XLATE(tcp, , , connect
 SDT_PROBE_DEFINE5_XLATE(tcp, , , receive,
     "void *", "pktinfo_t *",
     "struct tcpcb *", "csinfo_t *",
-    "uint8_t *", "ipinfo_t *",
+    "struct mbuf *", "ipinfo_t *",
     "struct tcpcb *", "tcpsinfo_t *" ,
     "struct tcphdr *", "tcpinfoh_t *");
 
@@ -112,6 +112,14 @@ SDT_PROBE_DEFINE6_XLATE(tcp, , , state__
     "void *", "void *",
     "int", "tcplsinfo_t *");
 
+SDT_PROBE_DEFINE6_XLATE(tcp, , , receive__autoresize,
+    "void *", "void *",
+    "struct tcpcb *", "csinfo_t *",
+    "struct mbuf *", "ipinfo_t *",
+    "struct tcpcb *", "tcpsinfo_t *" ,
+    "struct tcphdr *", "tcpinfoh_t *",
+    "int", "int");
+
 SDT_PROBE_DEFINE5_XLATE(udp, , , receive,
     "void *", "pktinfo_t *",
     "struct inpcb *", "csinfo_t *",

Modified: stable/10/sys/netinet/in_kdtrace.h
==============================================================================
--- stable/10/sys/netinet/in_kdtrace.h  Mon Apr 24 16:07:30 2017        
(r317374)
+++ stable/10/sys/netinet/in_kdtrace.h  Mon Apr 24 16:31:28 2017        
(r317375)
@@ -52,6 +52,7 @@ SDT_PROBE_DECLARE(tcp, , , connect__requ
 SDT_PROBE_DECLARE(tcp, , , receive);
 SDT_PROBE_DECLARE(tcp, , , send);
 SDT_PROBE_DECLARE(tcp, , , state__change);
+SDT_PROBE_DECLARE(tcp, , , receive__autoresize);
 
 SDT_PROBE_DECLARE(udp, , , receive);
 SDT_PROBE_DECLARE(udp, , , send);

Modified: stable/10/sys/netinet/tcp_input.c
==============================================================================
--- stable/10/sys/netinet/tcp_input.c   Mon Apr 24 16:07:30 2017        
(r317374)
+++ stable/10/sys/netinet/tcp_input.c   Mon Apr 24 16:31:28 2017        
(r317375)
@@ -1469,6 +1469,68 @@ drop:
                m_freem(m);
 }
 
+/*
+ * Automatic sizing of receive socket buffer.  Often the send
+ * buffer size is not optimally adjusted to the actual network
+ * conditions at hand (delay bandwidth product).  Setting the
+ * buffer size too small limits throughput on links with high
+ * bandwidth and high delay (eg. trans-continental/oceanic links).
+ *
+ * On the receive side the socket buffer memory is only rarely
+ * used to any significant extent.  This allows us to be much
+ * more aggressive in scaling the receive socket buffer.  For
+ * the case that the buffer space is actually used to a large
+ * extent and we run out of kernel memory we can simply drop
+ * the new segments; TCP on the sender will just retransmit it
+ * later.  Setting the buffer size too big may only consume too
+ * much kernel memory if the application doesn't read() from
+ * the socket or packet loss or reordering makes use of the
+ * reassembly queue.
+ *
+ * The criteria to step up the receive buffer one notch are:
+ *  1. Application has not set receive buffer size with
+ *     SO_RCVBUF. Setting SO_RCVBUF clears SB_AUTOSIZE.
+ *  2. the number of bytes received during the time it takes
+ *     one timestamp to be reflected back to us (the RTT);
+ *  3. received bytes per RTT is within seven eighth of the
+ *     current socket buffer size;
+ *  4. receive buffer size has not hit maximal automatic size;
+ *
+ * This algorithm does one step per RTT at most and only if
+ * we receive a bulk stream w/o packet losses or reorderings.
+ * Shrinking the buffer during idle times is not necessary as
+ * it doesn't consume any memory when idle.
+ *
+ * TODO: Only step up if the application is actually serving
+ * the buffer to better manage the socket buffer resources.
+ */
+int
+tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so,
+    struct tcpcb *tp, int tlen)
+{
+       int newsize = 0;
+
+       if (V_tcp_do_autorcvbuf && (so->so_rcv.sb_flags & SB_AUTOSIZE) &&
+           tp->t_srtt != 0 && tp->rfbuf_ts != 0 &&
+           TCP_TS_TO_TICKS(tcp_ts_getticks() - tp->rfbuf_ts) >
+           (tp->t_srtt >> TCP_RTT_SHIFT)) {
+               if (tp->rfbuf_cnt > (so->so_rcv.sb_hiwat / 8 * 7) &&
+                   so->so_rcv.sb_hiwat < V_tcp_autorcvbuf_max) {
+                       newsize = min(so->so_rcv.sb_hiwat +
+                           V_tcp_autorcvbuf_inc, V_tcp_autorcvbuf_max);
+               }
+               TCP_PROBE6(receive__autoresize, NULL, tp, m, tp, th, newsize);
+
+               /* Start over with next RTT. */
+               tp->rfbuf_ts = 0;
+               tp->rfbuf_cnt = 0;
+       } else {
+               tp->rfbuf_cnt += tlen;  /* add up */
+       }
+
+       return (newsize);
+}
+
 static void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
@@ -1811,60 +1873,7 @@ tcp_do_segment(struct mbuf *m, struct tc
                                tcp_trace(TA_INPUT, ostate, tp,
                                    (void *)tcp_saveipgen, &tcp_savetcp, 0);
 #endif
-               /*
-                * Automatic sizing of receive socket buffer.  Often the send
-                * buffer size is not optimally adjusted to the actual network
-                * conditions at hand (delay bandwidth product).  Setting the
-                * buffer size too small limits throughput on links with high
-                * bandwidth and high delay (eg. trans-continental/oceanic 
links).
-                *
-                * On the receive side the socket buffer memory is only rarely
-                * used to any significant extent.  This allows us to be much
-                * more aggressive in scaling the receive socket buffer.  For
-                * the case that the buffer space is actually used to a large
-                * extent and we run out of kernel memory we can simply drop
-                * the new segments; TCP on the sender will just retransmit it
-                * later.  Setting the buffer size too big may only consume too
-                * much kernel memory if the application doesn't read() from
-                * the socket or packet loss or reordering makes use of the
-                * reassembly queue.
-                *
-                * The criteria to step up the receive buffer one notch are:
-                *  1. the number of bytes received during the time it takes
-                *     one timestamp to be reflected back to us (the RTT);
-                *  2. received bytes per RTT is within seven eighth of the
-                *     current socket buffer size;
-                *  3. receive buffer size has not hit maximal automatic size;
-                *
-                * This algorithm does one step per RTT at most and only if
-                * we receive a bulk stream w/o packet losses or reorderings.
-                * Shrinking the buffer during idle times is not necessary as
-                * it doesn't consume any memory when idle.
-                *
-                * TODO: Only step up if the application is actually serving
-                * the buffer to better manage the socket buffer resources.
-                */
-                       if (V_tcp_do_autorcvbuf &&
-                           (to.to_flags & TOF_TS) &&
-                           to.to_tsecr &&
-                           (so->so_rcv.sb_flags & SB_AUTOSIZE)) {
-                               if (TSTMP_GT(to.to_tsecr, tp->rfbuf_ts) &&
-                                   to.to_tsecr - tp->rfbuf_ts < hz) {
-                                       if (tp->rfbuf_cnt >
-                                           (so->so_rcv.sb_hiwat / 8 * 7) &&
-                                           so->so_rcv.sb_hiwat <
-                                           V_tcp_autorcvbuf_max) {
-                                               newsize =
-                                                   min(so->so_rcv.sb_hiwat +
-                                                   V_tcp_autorcvbuf_inc,
-                                                   V_tcp_autorcvbuf_max);
-                                       }
-                                       /* Start over with next RTT. */
-                                       tp->rfbuf_ts = 0;
-                                       tp->rfbuf_cnt = 0;
-                               } else
-                                       tp->rfbuf_cnt += tlen;  /* add up */
-                       }
+                       newsize = tcp_autorcvbuf(m, th, so, tp, tlen);
 
                        /* Add data to socket buffer. */
                        SOCKBUF_LOCK(&so->so_rcv);
@@ -1905,10 +1914,6 @@ tcp_do_segment(struct mbuf *m, struct tc
                win = 0;
        tp->rcv_wnd = imax(win, (int)(tp->rcv_adv - tp->rcv_nxt));
 
-       /* Reset receive buffer auto scaling when not in bulk receive mode. */
-       tp->rfbuf_ts = 0;
-       tp->rfbuf_cnt = 0;
-
        switch (tp->t_state) {
 
        /*

Modified: stable/10/sys/netinet/tcp_output.c
==============================================================================
--- stable/10/sys/netinet/tcp_output.c  Mon Apr 24 16:07:30 2017        
(r317374)
+++ stable/10/sys/netinet/tcp_output.c  Mon Apr 24 16:31:28 2017        
(r317375)
@@ -790,11 +790,13 @@ send:
                        to.to_tsval = tcp_ts_getticks() + tp->ts_offset;
                        to.to_tsecr = tp->ts_recent;
                        to.to_flags |= TOF_TS;
-                       /* Set receive buffer autosizing timestamp. */
-                       if (tp->rfbuf_ts == 0 &&
-                           (so->so_rcv.sb_flags & SB_AUTOSIZE))
-                               tp->rfbuf_ts = tcp_ts_getticks();
                }
+
+               /* Set receive buffer autosizing timestamp. */
+               if (tp->rfbuf_ts == 0 &&
+                   (so->so_rcv.sb_flags & SB_AUTOSIZE))
+                       tp->rfbuf_ts = tcp_ts_getticks();
+
                /* Selective ACK's. */
                if (tp->t_flags & TF_SACK_PERMIT) {
                        if (flags & TH_SYN)

Modified: stable/10/sys/netinet/tcp_var.h
==============================================================================
--- stable/10/sys/netinet/tcp_var.h     Mon Apr 24 16:07:30 2017        
(r317374)
+++ stable/10/sys/netinet/tcp_var.h     Mon Apr 24 16:31:28 2017        
(r317375)
@@ -704,6 +704,8 @@ int  tcp_reass(struct tcpcb *, struct tc
 void    tcp_reass_global_init(void);
 void    tcp_reass_flush(struct tcpcb *);
 void    tcp_input(struct mbuf *, int);
+int     tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
+           struct tcpcb *, int);
 u_long  tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
 u_long  tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
 void    tcp_mss_update(struct tcpcb *, int, int, struct hc_metrics_lite *,
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to