Re: [PATCH net-next] tcp: adjust tail loss probe timeout

2017-07-19 Thread David Miller
From: Yuchung Cheng 
Date: Wed, 19 Jul 2017 15:41:26 -0700

> This patch adjusts the timeout formula to schedule the TCP loss probe
> (TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if
> only one packet is in flight. It keeps a lower bound of 10 msec which
> is too large for short RTT connections (e.g. within a data-center).
> The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which
> performs better for short and fast connections.
> 
> Signed-off-by: Yuchung Cheng 
> Signed-off-by: Neal Cardwell 

Applied, thanks!


[PATCH net-next] tcp: adjust tail loss probe timeout

2017-07-19 Thread Yuchung Cheng
This patch adjusts the timeout formula to schedule the TCP loss probe
(TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if
only one packet is in flight. It keeps a lower bound of 10 msec which
is too large for short RTT connections (e.g. within a data-center).
The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which
performs better for short and fast connections.

Signed-off-by: Yuchung Cheng 
Signed-off-by: Neal Cardwell 
---
 include/net/tcp.h   |  3 +--
 net/ipv4/tcp_output.c   | 17 ++---
 net/ipv4/tcp_recovery.c |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70483296157f..4f056ea79df2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #endif
 #define TCP_RTO_MAX((unsigned)(120*HZ))
 #define TCP_RTO_MIN((unsigned)(HZ/5))
+#define TCP_TIMEOUT_MIN(2U) /* Min timeout for TCP timers in jiffies */
 #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))/* RFC6298 2.1 initial RTO 
value*/
 #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))/* RFC 1122 initial RTO 
value, now
 * used as a fallback RTO for 
the
@@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval 
between probes
 * for local resources.
 */
-#define TCP_REO_TIMEOUT_MIN(2000) /* Min RACK reordering timeout in usec */
-
 #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
 #define TCP_KEEPALIVE_PROBES   9   /* Max of 9 keepalive probes
*/
 #define TCP_KEEPALIVE_INTVL(75*HZ)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e985dea1dd2..886d874775df 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2377,7 +2377,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, tlp_time_stamp, rto_time_stamp;
-   u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 
/* No consecutive loss probes. */
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
@@ -2406,15 +2405,19 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 tcp_send_head(sk))
return false;
 
-   /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+   /* Probe timeout is 2*rtt. Add minimum RTO to account
 * for delayed ack when there's one outstanding packet. If no RTT
 * sample is available then probe after TCP_TIMEOUT_INIT.
 */
-   timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
-   if (tp->packets_out == 1)
-   timeout = max_t(u32, timeout,
-   (rtt + (rtt >> 1) + TCP_DELACK_MAX));
-   timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+   if (tp->srtt_us) {
+   timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+   if (tp->packets_out == 1)
+   timeout += TCP_RTO_MIN;
+   else
+   timeout += TCP_TIMEOUT_MIN;
+   } else {
+   timeout = TCP_TIMEOUT_INIT;
+   }
 
/* If RTO is shorter, just schedule TLP in its place. */
tlp_time_stamp = tcp_jiffies32 + timeout;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fe9a493d0208..449cd914d58e 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk)
tp->rack.advanced = 0;
tcp_rack_detect_loss(sk, );
if (timeout) {
-   timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
+   timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
  timeout, inet_csk(sk)->icsk_rto);
}
-- 
2.14.0.rc0.284.gd933b75aa4-goog



[PATCH net-next] tcp: adjust tail loss probe timeout

2017-07-14 Thread Yuchung Cheng
This patch adjusts the timeout formula to schedule the TCP loss probe
(TLP). The previous formula uses 2*SRTT or 1.5*RTT + DelayACKMax if
only one packet is in flight. It keeps a lower bound of 10 msec which
is too large for short RTT connections (e.g. within a data-center).
The new formula = 2*RTT + (inflight == 1 ? 200ms : 2ticks) which
performs better for short and fast connections.

Signed-off-by: Yuchung Cheng 
Signed-off-by: Neal Cardwell 
---
 include/net/tcp.h   |  3 +--
 net/ipv4/tcp_output.c   | 17 ++---
 net/ipv4/tcp_recovery.c |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70483296157f..4f056ea79df2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -139,6 +139,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #endif
 #define TCP_RTO_MAX((unsigned)(120*HZ))
 #define TCP_RTO_MIN((unsigned)(HZ/5))
+#define TCP_TIMEOUT_MIN(2U) /* Min timeout for TCP timers in jiffies */
 #define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))/* RFC6298 2.1 initial RTO 
value*/
 #define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))/* RFC 1122 initial RTO 
value, now
 * used as a fallback RTO for 
the
@@ -150,8 +151,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) /* Maximal interval 
between probes
 * for local resources.
 */
-#define TCP_REO_TIMEOUT_MIN(2000) /* Min RACK reordering timeout in usec */
-
 #define TCP_KEEPALIVE_TIME (120*60*HZ) /* two hours */
 #define TCP_KEEPALIVE_PROBES   9   /* Max of 9 keepalive probes
*/
 #define TCP_KEEPALIVE_INTVL(75*HZ)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e985dea1dd2..886d874775df 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2377,7 +2377,6 @@ bool tcp_schedule_loss_probe(struct sock *sk)
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
u32 timeout, tlp_time_stamp, rto_time_stamp;
-   u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
 
/* No consecutive loss probes. */
if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
@@ -2406,15 +2405,19 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 tcp_send_head(sk))
return false;
 
-   /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account
+   /* Probe timeout is 2*rtt. Add minimum RTO to account
 * for delayed ack when there's one outstanding packet. If no RTT
 * sample is available then probe after TCP_TIMEOUT_INIT.
 */
-   timeout = rtt << 1 ? : TCP_TIMEOUT_INIT;
-   if (tp->packets_out == 1)
-   timeout = max_t(u32, timeout,
-   (rtt + (rtt >> 1) + TCP_DELACK_MAX));
-   timeout = max_t(u32, timeout, msecs_to_jiffies(10));
+   if (tp->srtt_us) {
+   timeout = usecs_to_jiffies(tp->srtt_us >> 2);
+   if (tp->packets_out == 1)
+   timeout += TCP_RTO_MIN;
+   else
+   timeout += TCP_TIMEOUT_MIN;
+   } else {
+   timeout = TCP_TIMEOUT_INIT;
+   }
 
/* If RTO is shorter, just schedule TLP in its place. */
tlp_time_stamp = tcp_jiffies32 + timeout;
diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c
index fe9a493d0208..449cd914d58e 100644
--- a/net/ipv4/tcp_recovery.c
+++ b/net/ipv4/tcp_recovery.c
@@ -113,7 +113,7 @@ void tcp_rack_mark_lost(struct sock *sk)
tp->rack.advanced = 0;
tcp_rack_detect_loss(sk, );
if (timeout) {
-   timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN);
+   timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT,
  timeout, inet_csk(sk)->icsk_rto);
}
-- 
2.13.2.932.g7449e964c-goog