We can precompute a number of the cubic TCP factors because
SRTT is a constant and most of the other parts come from module
parameters.
Given the history of simple math errors, this patch deserves extra
special attention.
Signed-off-by: Stephen Hemminger <[EMAIL PROTECTED]>
--- net-2.6.16.orig/net/ipv4/tcp_cubic.c
+++ net-2.6.16/net/ipv4/tcp_cubic.c
@@ -27,22 +27,28 @@
*/
#define BICTCP_HZ 10 /* BIC HZ 2^10 = 1024 */
+#define BICTCP_SRTT ((HZ << 3)/10) /* BIC is now RTT independent */
+
static int fast_convergence = 1;
static int max_increment = 16;
static int beta = 819; /* = 819/1024 (BICTCP_BETA_SCALE) */
static int initial_ssthresh = 100;
static int bic_scale = 41;
static int tcp_friendliness = 1;
+static u32 cube_rtt_scale;
+static u32 beta_scale;
+static u32 cube_scale;
+static u64 cube_factor;
module_param(fast_convergence, int, 0644);
MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
module_param(max_increment, int, 0644);
MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary
search");
-module_param(beta, int, 0644);
+module_param(beta, int, 0444);
MODULE_PARM_DESC(beta, "beta for multiplicative increase");
module_param(initial_ssthresh, int, 0644);
MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
-module_param(bic_scale, int, 0644);
+module_param(bic_scale, int, 0444);
MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function
(bic_scale/1024)");
module_param(tcp_friendliness, int, 0644);
MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
@@ -151,56 +157,10 @@ static u32 cubic_root(u64 x)
return (u32)end;
}
-static inline u32 bictcp_K(u32 dist, u32 srtt)
+static inline u32 bictcp_K(u32 dist)
{
- u64 d64;
- u32 d32;
- u32 count;
- u32 result;
-
- /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
- so K = cubic_root( (wmax-cwnd)*rtt/c )
- the unit of K is bictcp_HZ=2^10, not HZ
-
- c = bic_scale >> 10
- rtt = (tp->srtt >> 3 ) / HZ
-
- the following code has been designed and tested for
- cwnd < 1 million packets
- RTT < 100 seconds
- HZ < 1,000,00 (corresponding to 10 nano-second)
-
- */
-
- /* 1/c * 2^2*bictcp_HZ */
- d32 = (1 << (10+2*BICTCP_HZ)) / bic_scale;
- d64 = (__u64)d32;
-
- /* srtt * 2^count / HZ
- 1) to get a better accuracy of the following d32,
- the larger the "count", the better the accuracy
- 2) and avoid overflow of the following d64
- the larger the "count", the high possibility of overflow
- 3) so find a "count" between bictcp_hz-3 and bictcp_hz
- "count" may be less than bictcp_HZ,
- then d64 becomes 0. that is OK
- */
- d32 = srtt;
- count = 0;
- while (((d32 & 0x80000000)==0) && (count < BICTCP_HZ)){
- d32 = d32 << 1;
- count++;
- }
- d32 = d32 / HZ;
-
/* (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) */
- d64 = (d64 * dist * d32) >> (count+3-BICTCP_HZ);
-
- /* cubic root */
- d64 = cubic_root(d64);
-
- result = (u32)d64;
- return result;
+ return cubic_root((cube_factor * dist) >> (cube_scale + 3 - BICTCP_HZ));
}
/*
@@ -209,7 +169,7 @@ static inline u32 bictcp_K(u32 dist, u32
static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
{
u64 d64;
- u32 d32, t, srtt, bic_target, min_cnt, max_cnt;
+ u32 d32, t, bic_target, min_cnt, max_cnt;
ca->ack_cnt++; /* count the number of ACKs */
@@ -220,7 +180,6 @@ static inline void bictcp_update(struct
ca->last_cwnd = cwnd;
ca->last_time = tcp_time_stamp;
- srtt = (HZ << 3)/10; /* use real time-based growth function */
if (ca->epoch_start == 0) {
ca->epoch_start = tcp_time_stamp; /* record the beginning
of an epoch */
@@ -231,7 +190,7 @@ static inline void bictcp_update(struct
ca->bic_K = 0;
ca->bic_origin_point = cwnd;
} else {
- ca->bic_K = bictcp_K(ca->last_max_cwnd-cwnd, srtt);
+ ca->bic_K = bictcp_K(ca->last_max_cwnd-cwnd);
ca->bic_origin_point = ca->last_max_cwnd;
}
}
@@ -260,8 +219,7 @@ static inline void bictcp_update(struct
d32 = t - ca->bic_K;
d64 = (u64)d32;
- d32 = (bic_scale << 3) * HZ / srtt; /* 1024*c/rtt */
- d64 = (d32 * d64 * d64 * d64) >> (10+3*BICTCP_HZ); /* c/rtt *
(t-K)^3 */
+ d64 = (cube_rtt_scale * d64 * d64 * d64) >> (10+3*BICTCP_HZ); /*
c/rtt * (t-K)^3 */
d32 = (u32)d64;
if (t < ca->bic_K) /* below
origin*/
bic_target = ca->bic_origin_point - d32;
@@ -288,8 +246,7 @@ static inline void bictcp_update(struct
/* TCP Friendly */
if (tcp_friendliness) {
- u32 scale =
8*(BICTCP_BETA_SCALE+beta)/3/(BICTCP_BETA_SCALE-beta);
- d32 = (cwnd * scale) >> 3;
+ d32 = (cwnd * beta_scale) >> 3;
while (ca->ack_cnt > d32) { /* update tcp cwnd */
ca->ack_cnt -= d32;
ca->tcp_cwnd++;
@@ -427,7 +384,51 @@ static struct tcp_congestion_ops cubictc
static int __init cubictcp_register(void)
{
+ u64 d64;
+ u32 d32;
+
BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+
+ /* Precompute a bunch of the scaling factors that are used per-packet */
+ beta_scale = 8*(BICTCP_BETA_SCALE+beta)/3/(BICTCP_BETA_SCALE-beta);
+
+ cube_rtt_scale = (bic_scale << 3) * HZ / BICTCP_SRTT; /* 1024*c/rtt */
+
+ /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
+ so K = cubic_root( (wmax-cwnd)*rtt/c )
+ the unit of K is bictcp_HZ=2^10, not HZ
+
+ c = bic_scale >> 10
+ rtt = (tp->srtt >> 3 ) / HZ
+
+ the following code has been designed and tested for
+ cwnd < 1 million packets
+ RTT < 100 seconds
+ HZ < 1,000,00 (corresponding to 10 nano-second)
+
+ */
+
+ /* 1/c * 2^2*bictcp_HZ */
+ d64 = d32 = (1 << (10+2*BICTCP_HZ)) / bic_scale;
+
+ /* srtt * 2^count / HZ
+ 1) to get a better accuracy of the following d32,
+ the larger the "count", the better the accuracy
+ 2) and avoid overflow of the following d64
+ the larger the "count", the high possibility of overflow
+ 3) so find a "count" between bictcp_hz-3 and bictcp_hz
+ "count" may be less than bictcp_HZ,
+ then d64 becomes 0. that is OK
+ */
+ d32 = BICTCP_SRTT;
+ cube_scale = 0;
+
+ while ( !(d32 & 0x80000000) && (cube_scale < BICTCP_HZ)){
+ d32 = d32 << 1;
+ ++cube_scale;
+ }
+ cube_factor = d64 * d32 / HZ;
+
return tcp_register_congestion_control(&cubictcp);
}
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html