Re: [RFC PATCH v2 net-next 3/3] tcp: add NV congestion control

2015-07-23 Thread Lawrence Brakmo
Thank you all for your comments, I¹m currently testing the changes.
Other comments inline.

On 7/21/15, 11:50 PM, Yuchung Cheng ych...@google.com wrote:

On Tue, Jul 21, 2015 at 9:21 PM, Lawrence Brakmo bra...@fb.com wrote:
 This is a request for comments.

 TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of
 NV was presented at 2010's LPC (slides). It is a delayed based
 congestion avoidance for the data center. This version has been tested
 within a 10G rack where the HW RTTs are 20-50us.

 A description of TCP-NV, including implementation and experimental
 results, can be found at:
 
https://urldefense.proofpoint.com/v1/url?u=http://www.brakmo.org/networki
ng/tcp-nv/TCPNV.htmlk=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0Ar=m30SgjN07T%2FK%2
FdV1ZIt1iA%3D%3D%0Am=XeELWxnafKynbNgkHg6RW%2F85hv1bPWlufUn2Dh4cOH4%3D%0A
s=0029c47e62d84d6ffd22bd33e1895a3f61eaa21d88cbfb553aa1df780bbbdcf9

 The current version includes many module parameters to support
 experimentation with the parameters.

 Signed-off-by: Lawrence Brakmo bra...@fb.com
 ---
  include/net/tcp.h  |   1 +
  net/ipv4/Kconfig   |  16 ++
  net/ipv4/Makefile  |   1 +
  net/ipv4/sysctl_net_ipv4.c |   9 +
  net/ipv4/tcp_input.c   |   2 +
  net/ipv4/tcp_nv.c  | 479
+
  6 files changed, 508 insertions(+)
  create mode 100644 net/ipv4/tcp_nv.c

 diff --git a/include/net/tcp.h b/include/net/tcp.h
 index 2e62efe..c0690ae 100644
 --- a/include/net/tcp.h
 +++ b/include/net/tcp.h
 @@ -281,6 +281,7 @@ extern unsigned int sysctl_tcp_notsent_lowat;
  extern int sysctl_tcp_min_tso_segs;
  extern int sysctl_tcp_autocorking;
  extern int sysctl_tcp_invalid_ratelimit;
 +extern int sysctl_tcp_nv_enable;

  extern atomic_long_t tcp_memory_allocated;
  extern struct percpu_counter tcp_sockets_allocated;
 diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
 index 6fb3c90..c37b374 100644
 --- a/net/ipv4/Kconfig
 +++ b/net/ipv4/Kconfig
 @@ -539,6 +539,22 @@ config TCP_CONG_VEGAS
 window. TCP Vegas should provide less packet loss, but it is
 not as aggressive as TCP Reno.

 +config TCP_CONG_NV
 +   tristate TCP NV
 +   default m
 +   ---help---
 +   TCP NV is a follow up to TCP Vegas. It has been modified to
deal with
 +   10G networks, measurement noise introduced by LRO, GRO and
interrupt
 +   coalescence. In addition, it will decrease its cwnd
multiplicative
multiplicatively

 +   instead of linearly.
 +
 +   Note that in general congestion avoidance (cwnd decreased when
# packets
 +   queued grows) cannot coexist with congestion control (cwnd
decreased only
 +   when there is packet loss) due to fairness issues. One scenario
when the
s/the/they
 +   can coexist safely is when the CA flows have RTTs  CC flows
RTTs.
 +
 +   For further details see
https://urldefense.proofpoint.com/v1/url?u=http://www.brakmo.org/networki
ng/tcp-nv/k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0Ar=m30SgjN07T%2FK%2FdV1ZIt1iA
%3D%3D%0Am=XeELWxnafKynbNgkHg6RW%2F85hv1bPWlufUn2Dh4cOH4%3D%0As=3441162
a0eefcad01003dbf0ba478e00a2080f76cd460eaf12213eb74f2eedbd
 +
  config TCP_CONG_SCALABLE
 tristate Scalable TCP
 default n
 diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
 index efc43f3..06f335f 100644
 --- a/net/ipv4/Makefile
 +++ b/net/ipv4/Makefile
 @@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
  obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
  obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o
  obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
 +obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
  obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
  obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
  obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
 index 433231c..31846d5 100644
 --- a/net/ipv4/sysctl_net_ipv4.c
 +++ b/net/ipv4/sysctl_net_ipv4.c
 @@ -730,6 +730,15 @@ static struct ctl_table ipv4_table[] = {
 .proc_handler   = proc_dointvec_ms_jiffies,
 },
 {
 +   .procname   = tcp_nv_enable,
 +   .data   = sysctl_tcp_nv_enable,
 +   .maxlen = sizeof(int),
 +   .mode   = 0644,
 +   .proc_handler   = proc_dointvec_minmax,
 +   .extra1 = zero,
 +   .extra2 = one,
 +   },
 +   {
 .procname   = icmp_msgs_per_sec,
 .data   = sysctl_icmp_msgs_per_sec,
 .maxlen = sizeof(int),
 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
 index aca4ae5..87560d9 100644
 --- a/net/ipv4/tcp_input.c
 +++ b/net/ipv4/tcp_input.c
 @@ -101,6 +101,8 @@ int sysctl_tcp_thin_dupack __read_mostly;
  int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
  int sysctl_tcp_early_retrans __read_mostly = 3;
  int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 +int sysctl_tcp_nv_enable __read_mostly = 1;
 

Re: [RFC PATCH v2 net-next 3/3] tcp: add NV congestion control

2015-07-22 Thread Neal Cardwell
On Wed, Jul 22, 2015 at 2:50 AM, Yuchung Cheng ych...@google.com wrote:
 On Tue, Jul 21, 2015 at 9:21 PM, Lawrence Brakmo bra...@fb.com wrote:
 This is a request for comments.

 TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of
 NV was presented at 2010's LPC (slides). It is a delayed based
 congestion avoidance for the data center. This version has been tested
 within a 10G rack where the HW RTTs are 20-50us.

 A description of TCP-NV, including implementation and experimental
 results, can be found at:
 http://www.brakmo.org/networking/tcp-nv/TCPNV.html

 The current version includes many module parameters to support
 experimentation with the parameters.
...
 +extern int sysctl_tcp_nv_enable;

The sysctl_tcp_nv_enable only seems to be used within the NV module.
Can it be a module parameter instead of sysctl?

neal
--
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH v2 net-next 3/3] tcp: add NV congestion control

2015-07-22 Thread Yuchung Cheng
On Tue, Jul 21, 2015 at 9:21 PM, Lawrence Brakmo bra...@fb.com wrote:
 This is a request for comments.

 TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of
 NV was presented at 2010's LPC (slides). It is a delayed based
 congestion avoidance for the data center. This version has been tested
 within a 10G rack where the HW RTTs are 20-50us.

 A description of TCP-NV, including implementation and experimental
 results, can be found at:
 http://www.brakmo.org/networking/tcp-nv/TCPNV.html

 The current version includes many module parameters to support
 experimentation with the parameters.

 Signed-off-by: Lawrence Brakmo bra...@fb.com
 ---
  include/net/tcp.h  |   1 +
  net/ipv4/Kconfig   |  16 ++
  net/ipv4/Makefile  |   1 +
  net/ipv4/sysctl_net_ipv4.c |   9 +
  net/ipv4/tcp_input.c   |   2 +
  net/ipv4/tcp_nv.c  | 479 
 +
  6 files changed, 508 insertions(+)
  create mode 100644 net/ipv4/tcp_nv.c

 diff --git a/include/net/tcp.h b/include/net/tcp.h
 index 2e62efe..c0690ae 100644
 --- a/include/net/tcp.h
 +++ b/include/net/tcp.h
 @@ -281,6 +281,7 @@ extern unsigned int sysctl_tcp_notsent_lowat;
  extern int sysctl_tcp_min_tso_segs;
  extern int sysctl_tcp_autocorking;
  extern int sysctl_tcp_invalid_ratelimit;
 +extern int sysctl_tcp_nv_enable;

  extern atomic_long_t tcp_memory_allocated;
  extern struct percpu_counter tcp_sockets_allocated;
 diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
 index 6fb3c90..c37b374 100644
 --- a/net/ipv4/Kconfig
 +++ b/net/ipv4/Kconfig
 @@ -539,6 +539,22 @@ config TCP_CONG_VEGAS
 window. TCP Vegas should provide less packet loss, but it is
 not as aggressive as TCP Reno.

 +config TCP_CONG_NV
 +   tristate TCP NV
 +   default m
 +   ---help---
 +   TCP NV is a follow up to TCP Vegas. It has been modified to deal with
 +   10G networks, measurement noise introduced by LRO, GRO and interrupt
 +   coalescence. In addition, it will decrease its cwnd multiplicative
multiplicatively

 +   instead of linearly.
 +
 +   Note that in general congestion avoidance (cwnd decreased when # 
 packets
 +   queued grows) cannot coexist with congestion control (cwnd decreased 
 only
 +   when there is packet loss) due to fairness issues. One scenario when 
 the
s/the/they
 +   can coexist safely is when the CA flows have RTTs  CC flows RTTs.
 +
 +   For further details see http://www.brakmo.org/networking/tcp-nv/
 +
  config TCP_CONG_SCALABLE
 tristate Scalable TCP
 default n
 diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
 index efc43f3..06f335f 100644
 --- a/net/ipv4/Makefile
 +++ b/net/ipv4/Makefile
 @@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
  obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
  obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o
  obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
 +obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
  obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
  obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
  obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
 index 433231c..31846d5 100644
 --- a/net/ipv4/sysctl_net_ipv4.c
 +++ b/net/ipv4/sysctl_net_ipv4.c
 @@ -730,6 +730,15 @@ static struct ctl_table ipv4_table[] = {
 .proc_handler   = proc_dointvec_ms_jiffies,
 },
 {
 +   .procname   = tcp_nv_enable,
 +   .data   = sysctl_tcp_nv_enable,
 +   .maxlen = sizeof(int),
 +   .mode   = 0644,
 +   .proc_handler   = proc_dointvec_minmax,
 +   .extra1 = zero,
 +   .extra2 = one,
 +   },
 +   {
 .procname   = icmp_msgs_per_sec,
 .data   = sysctl_icmp_msgs_per_sec,
 .maxlen = sizeof(int),
 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
 index aca4ae5..87560d9 100644
 --- a/net/ipv4/tcp_input.c
 +++ b/net/ipv4/tcp_input.c
 @@ -101,6 +101,8 @@ int sysctl_tcp_thin_dupack __read_mostly;
  int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
  int sysctl_tcp_early_retrans __read_mostly = 3;
  int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 +int sysctl_tcp_nv_enable __read_mostly = 1;
 +EXPORT_SYMBOL(sysctl_tcp_nv_enable);

  #define FLAG_DATA  0x01 /* Incoming frame contained data.
   */
  #define FLAG_WIN_UPDATE0x02 /* Incoming ACK was a window 
 update.   */
 diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
 new file mode 100644
 index 000..af451b6
 --- /dev/null
 +++ b/net/ipv4/tcp_nv.c
 @@ -0,0 +1,479 @@
 +/*
 + * TCP NV: TCP with Congestion Avoidance
 + *
 + * TCP-NV is a successor of TCP-Vegas that has been developed to
 + * deal with the issues that occur in modern networks.
 + * Like TCP-Vegas, TCP-NV supports true congestion avoidance,
 + * 

[RFC PATCH v2 net-next 3/3] tcp: add NV congestion control

2015-07-21 Thread Lawrence Brakmo
This is a request for comments.

TCP-NV (New Vegas) is a major update to TCP-Vegas. An earlier version of
NV was presented at 2010's LPC (slides). It is a delayed based
congestion avoidance for the data center. This version has been tested
within a 10G rack where the HW RTTs are 20-50us.

A description of TCP-NV, including implementation and experimental
results, can be found at:
http://www.brakmo.org/networking/tcp-nv/TCPNV.html

The current version includes many module parameters to support
experimentation with the parameters.

Signed-off-by: Lawrence Brakmo bra...@fb.com
---
 include/net/tcp.h  |   1 +
 net/ipv4/Kconfig   |  16 ++
 net/ipv4/Makefile  |   1 +
 net/ipv4/sysctl_net_ipv4.c |   9 +
 net/ipv4/tcp_input.c   |   2 +
 net/ipv4/tcp_nv.c  | 479 +
 6 files changed, 508 insertions(+)
 create mode 100644 net/ipv4/tcp_nv.c

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 2e62efe..c0690ae 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -281,6 +281,7 @@ extern unsigned int sysctl_tcp_notsent_lowat;
 extern int sysctl_tcp_min_tso_segs;
 extern int sysctl_tcp_autocorking;
 extern int sysctl_tcp_invalid_ratelimit;
+extern int sysctl_tcp_nv_enable;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 6fb3c90..c37b374 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -539,6 +539,22 @@ config TCP_CONG_VEGAS
window. TCP Vegas should provide less packet loss, but it is
not as aggressive as TCP Reno.
 
+config TCP_CONG_NV
+   tristate TCP NV
+   default m
+   ---help---
+   TCP NV is a follow up to TCP Vegas. It has been modified to deal with
+   10G networks, measurement noise introduced by LRO, GRO and interrupt
+   coalescence. In addition, it will decrease its cwnd multiplicative
+   instead of linearly.
+
+   Note that in general congestion avoidance (cwnd decreased when # packets
+   queued grows) cannot coexist with congestion control (cwnd decreased 
only
+   when there is packet loss) due to fairness issues. One scenario when the
+   can coexist safely is when the CA flows have RTTs  CC flows RTTs.
+
+   For further details see http://www.brakmo.org/networking/tcp-nv/
+
 config TCP_CONG_SCALABLE
tristate Scalable TCP
default n
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index efc43f3..06f335f 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o
 obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o
 obj-$(CONFIG_TCP_CONG_HTCP) += tcp_htcp.o
 obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o
+obj-$(CONFIG_TCP_CONG_NV) += tcp_nv.o
 obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o
 obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 433231c..31846d5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -730,6 +730,15 @@ static struct ctl_table ipv4_table[] = {
.proc_handler   = proc_dointvec_ms_jiffies,
},
{
+   .procname   = tcp_nv_enable,
+   .data   = sysctl_tcp_nv_enable,
+   .maxlen = sizeof(int),
+   .mode   = 0644,
+   .proc_handler   = proc_dointvec_minmax,
+   .extra1 = zero,
+   .extra2 = one,
+   },  
+   {
.procname   = icmp_msgs_per_sec,
.data   = sysctl_icmp_msgs_per_sec,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index aca4ae5..87560d9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -101,6 +101,8 @@ int sysctl_tcp_thin_dupack __read_mostly;
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_early_retrans __read_mostly = 3;
 int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
+int sysctl_tcp_nv_enable __read_mostly = 1;
+EXPORT_SYMBOL(sysctl_tcp_nv_enable);
 
 #define FLAG_DATA  0x01 /* Incoming frame contained data.  
*/
 #define FLAG_WIN_UPDATE0x02 /* Incoming ACK was a window 
update.   */
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
new file mode 100644
index 000..af451b6
--- /dev/null
+++ b/net/ipv4/tcp_nv.c
@@ -0,0 +1,479 @@
+/*
+ * TCP NV: TCP with Congestion Avoidance
+ *
+ * TCP-NV is a successor of TCP-Vegas that has been developed to
+ * deal with the issues that occur in modern networks. 
+ * Like TCP-Vegas, TCP-NV supports true congestion avoidance,
+ * the ability to detect congestion before packet losses occur.
+ * When congestion (queue buildup) starts to occur, TCP-NV
+ * predicts what the cwnd size should be for the current
+ * throughput and it