Simplify the TCP congestion infrastructure. Can fold the
packets acked into the cong_avoid hook.
Signed-off-by: Stephen Hemminger <[EMAIL PROTECTED]>
--- net-2.6.orig/include/net/tcp.h
+++ net-2.6/include/net/tcp.h
@@ -679,7 +679,7 @@ struct tcp_congestion_ops {
u32 (*min_cwnd)(struct sock *sk);
/* do new cwnd calculation (required) */
void (*cong_avoid)(struct sock *sk, u32 ack,
- u32 rtt, u32 in_flight, int good_ack);
+ u32 rtt, u32 in_flight, u32 pkts_acked);
/* round trip time sample per acked packet (optional) */
void (*rtt_sample)(struct sock *sk, u32 usrtt);
/* call before changing ca_state (optional) */
@@ -688,8 +688,6 @@ struct tcp_congestion_ops {
void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
/* new value of cwnd after loss (optional) */
u32 (*undo_cwnd)(struct sock *sk);
- /* hook for packet ack accounting (optional) */
- void (*pkts_acked)(struct sock *sk, u32 num_acked);
/* get info for inet_diag (optional) */
void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
@@ -709,7 +707,7 @@ extern int tcp_set_congestion_control(st
extern struct tcp_congestion_ops tcp_init_congestion_ops;
extern u32 tcp_reno_ssthresh(struct sock *sk);
extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack,
- u32 rtt, u32 in_flight, int flag);
+ u32 rtt, u32 in_flight, u32 pkts_acked);
extern u32 tcp_reno_min_cwnd(struct sock *sk);
extern struct tcp_congestion_ops tcp_reno;
--- net-2.6.orig/net/ipv4/tcp_bic.c
+++ net-2.6/net/ipv4/tcp_bic.c
@@ -156,7 +156,7 @@ static inline void bictcp_update(struct
/* Detect low utilization in congestion avoidance */
-static inline void bictcp_low_utilization(struct sock *sk, int flag)
+static inline void bictcp_low_utilization(struct sock *sk, u32 pkts_acked)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -167,7 +167,7 @@ static inline void bictcp_low_utilizatio
/* Discard delay samples right after fast recovery */
tcp_time_stamp < ca->epoch_start + HZ ||
/* this delay samples may not be accurate */
- flag == 0) {
+ pkts_acked == 0) {
ca->last_delay = 0;
goto notlow;
}
@@ -210,12 +210,18 @@ static inline void bictcp_low_utilizatio
}
static void bictcp_cong_avoid(struct sock *sk, u32 ack,
- u32 seq_rtt, u32 in_flight, int data_acked)
+ u32 seq_rtt, u32 in_flight, u32 count)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
- bictcp_low_utilization(sk, data_acked);
+ bictcp_low_utilization(sk, count);
+
+ /* Track delayed ack ratio */
+ if (count && inet_csk(sk)->icsk_ca_state == TCP_CA_Open) {
+ count -= ca->delayed_ack >> ACK_RATIO_SHIFT;
+ ca->delayed_ack += count;
+ }
if (!tcp_is_cwnd_limited(sk, in_flight))
return;
@@ -291,21 +297,6 @@ static void bictcp_state(struct sock *sk
bictcp_reset(inet_csk_ca(sk));
}
-/* Track delayed acknowledgement ratio using sliding window
- * ratio = (15*ratio + sample) / 16
- */
-static void bictcp_acked(struct sock *sk, u32 cnt)
-{
- const struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (cnt > 0 && icsk->icsk_ca_state == TCP_CA_Open) {
- struct bictcp *ca = inet_csk_ca(sk);
- cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT;
- ca->delayed_ack += cnt;
- }
-}
-
-
static struct tcp_congestion_ops bictcp = {
.init = bictcp_init,
.ssthresh = bictcp_recalc_ssthresh,
@@ -313,7 +304,6 @@ static struct tcp_congestion_ops bictcp
.set_state = bictcp_state,
.undo_cwnd = bictcp_undo_cwnd,
.min_cwnd = bictcp_min_cwnd,
- .pkts_acked = bictcp_acked,
.owner = THIS_MODULE,
.name = "bic",
};
--- net-2.6.orig/net/ipv4/tcp_cong.c
+++ net-2.6/net/ipv4/tcp_cong.c
@@ -182,7 +182,7 @@ int tcp_set_congestion_control(struct so
* SIGCOMM '88, p. 328.
*/
void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight,
- int flag)
+ u32 pkts_acked)
{
struct tcp_sock *tp = tcp_sk(sk);
--- net-2.6.orig/net/ipv4/tcp_htcp.c
+++ net-2.6/net/ipv4/tcp_htcp.c
@@ -202,11 +202,14 @@ static u32 htcp_recalc_ssthresh(struct s
}
static void htcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
- u32 in_flight, int data_acked)
+ u32 in_flight, u32 pkts_acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct htcp *ca = inet_csk_ca(sk);
+ if (use_bandwidth_switch)
+ measure_achieved_throughput(sk, pkts_acked);
+
if (!tcp_is_cwnd_limited(sk, in_flight))
return;
@@ -271,7 +274,6 @@ static struct tcp_congestion_ops htcp =
.cong_avoid = htcp_cong_avoid,
.set_state = htcp_state,
.undo_cwnd = htcp_cwnd_undo,
- .pkts_acked = measure_achieved_throughput,
.owner = THIS_MODULE,
.name = "htcp",
};
@@ -280,8 +282,7 @@ static int __init htcp_register(void)
{
BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE);
BUILD_BUG_ON(BETA_MIN >= BETA_MAX);
- if (!use_bandwidth_switch)
- htcp.pkts_acked = NULL;
+
return tcp_register_congestion_control(&htcp);
}
--- net-2.6.orig/net/ipv4/tcp_hybla.c
+++ net-2.6/net/ipv4/tcp_hybla.c
@@ -87,7 +87,7 @@ static inline u32 hybla_fraction(u32 odd
* o remember increments <1
*/
static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
- u32 in_flight, int flag)
+ u32 in_flight, u32 count)
{
struct tcp_sock *tp = tcp_sk(sk);
struct hybla *ca = inet_csk_ca(sk);
@@ -104,7 +104,7 @@ static void hybla_cong_avoid(struct sock
return;
if (!ca->hybla_en)
- return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, flag);
+ return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, count);
if (ca->rho == 0)
hybla_recalc_param(sk);
--- net-2.6.orig/net/ipv4/tcp_input.c
+++ net-2.6/net/ipv4/tcp_input.c
@@ -1974,10 +1974,10 @@ static inline void tcp_ack_update_rtt(st
}
static inline void tcp_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
- u32 in_flight, int good)
+ u32 in_flight, u32 count)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, good);
+ icsk->icsk_ca_ops->cong_avoid(sk, ack, rtt, in_flight, count);
tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -2060,7 +2060,7 @@ static inline u32 tcp_usrtt(const struct
}
/* Remove acknowledged frames from the retransmission queue. */
-static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
+static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p, u32
*pkts_acked)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2068,7 +2068,6 @@ static int tcp_clean_rtx_queue(struct so
__u32 now = tcp_time_stamp;
int acked = 0;
__s32 seq_rtt = -1;
- u32 pkts_acked = 0;
void (*rtt_sample)(struct sock *sk, u32 usrtt)
= icsk->icsk_ca_ops->rtt_sample;
@@ -2098,7 +2097,7 @@ static int tcp_clean_rtx_queue(struct so
*/
if (!(scb->flags & TCPCB_FLAG_SYN)) {
acked |= FLAG_DATA_ACKED;
- ++pkts_acked;
+ *pkts_acked += 1;
} else {
acked |= FLAG_SYN_ACKED;
tp->retrans_stamp = 0;
@@ -2138,9 +2137,6 @@ static int tcp_clean_rtx_queue(struct so
if (acked&FLAG_ACKED) {
tcp_ack_update_rtt(sk, acked, seq_rtt);
tcp_ack_packets_out(sk, tp);
-
- if (icsk->icsk_ca_ops->pkts_acked)
- icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
}
#if FASTRETRANS_DEBUG > 0
@@ -2148,7 +2144,6 @@ static int tcp_clean_rtx_queue(struct so
BUG_TRAP((int)tp->lost_out >= 0);
BUG_TRAP((int)tp->retrans_out >= 0);
if (!tp->packets_out && tp->rx_opt.sack_ok) {
- const struct inet_connection_sock *icsk = inet_csk(sk);
if (tp->lost_out) {
printk(KERN_DEBUG "Leak l=%u %d\n",
tp->lost_out, icsk->icsk_ca_state);
@@ -2297,7 +2292,7 @@ static int tcp_ack(struct sock *sk, stru
u32 prior_snd_una = tp->snd_una;
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
- u32 prior_in_flight;
+ u32 prior_in_flight, pkts_acked;
s32 seq_rtt;
int prior_packets;
@@ -2351,7 +2346,8 @@ static int tcp_ack(struct sock *sk, stru
prior_in_flight = tcp_packets_in_flight(tp);
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, &seq_rtt);
+ pkts_acked = 0;
+ flag |= tcp_clean_rtx_queue(sk, &seq_rtt, &pkts_acked);
if (tp->frto_counter)
tcp_process_frto(sk, prior_snd_una);
@@ -2359,11 +2355,11 @@ static int tcp_ack(struct sock *sk, stru
if (tcp_ack_is_dubious(sk, flag)) {
/* Advanve CWND, if state allows this. */
if ((flag & FLAG_DATA_ACKED) && tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 0);
+ tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight,
pkts_acked);
tcp_fastretrans_alert(sk, prior_snd_una, prior_packets, flag);
} else {
if ((flag & FLAG_DATA_ACKED))
- tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight, 1);
+ tcp_cong_avoid(sk, ack, seq_rtt, prior_in_flight,
pkts_acked);
}
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
--- net-2.6.orig/net/ipv4/tcp_scalable.c
+++ net-2.6/net/ipv4/tcp_scalable.c
@@ -17,7 +17,7 @@
#define TCP_SCALABLE_MD_SCALE 3
static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
- u32 in_flight, int flag)
+ u32 in_flight, u32 cnt)
{
struct tcp_sock *tp = tcp_sk(sk);
--- net-2.6.orig/net/ipv4/tcp_vegas.c
+++ net-2.6/net/ipv4/tcp_vegas.c
@@ -163,13 +163,13 @@ static void tcp_vegas_cwnd_event(struct
}
static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack,
- u32 seq_rtt, u32 in_flight, int flag)
+ u32 seq_rtt, u32 in_flight, u32 cnt)
{
struct tcp_sock *tp = tcp_sk(sk);
struct vegas *vegas = inet_csk_ca(sk);
if (!vegas->doing_vegas_now)
- return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, flag);
+ return tcp_reno_cong_avoid(sk, ack, seq_rtt, in_flight, cnt);
/* The key players are v_beg_snd_una and v_beg_snd_nxt.
*
--- net-2.6.orig/net/ipv4/tcp_westwood.c
+++ net-2.6/net/ipv4/tcp_westwood.c
@@ -70,18 +70,6 @@ static inline void westwood_filter(struc
}
/*
- * @westwood_pkts_acked
- * Called after processing group of packets.
- * but all westwood needs is the last sample of srtt.
- */
-static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
-{
- struct westwood *w = inet_csk_ca(sk);
- if (cnt > 0)
- w->rtt = tcp_sk(sk)->srtt >> 3;
-}
-
-/*
* @westwood_update_window
* It updates RTT evaluation window if it is the right moment to do
* it. If so it calls filter for evaluating bandwidth.
@@ -231,14 +219,29 @@ static void tcp_westwood_info(struct soc
}
+/*
+ * @westwood_cong_avoid
+ * Called after processing group of packets.
+ * but all westwood needs is the last sample of srtt.
+ */
+static void tcp_westwood_cong_avoid(struct sock *sk, u32 ack, u32 rtt,
+ u32 in_flight, u32 pkts_acked)
+{
+ if (pkts_acked > 0) {
+ struct westwood *w = inet_csk_ca(sk);
+ w->rtt = tcp_sk(sk)->srtt >> 3;
+ }
+ return tcp_reno_cong_avoid(sk, ack, rtt, in_flight, pkts_acked);
+}
+
+
static struct tcp_congestion_ops tcp_westwood = {
.init = tcp_westwood_init,
.ssthresh = tcp_reno_ssthresh,
- .cong_avoid = tcp_reno_cong_avoid,
+ .cong_avoid = tcp_westwood_cong_avoid,
.min_cwnd = tcp_westwood_cwnd_min,
.cwnd_event = tcp_westwood_event,
.get_info = tcp_westwood_info,
- .pkts_acked = tcp_westwood_pkts_acked,
.owner = THIS_MODULE,
.name = "westwood"
--
Stephen Hemminger <[EMAIL PROTECTED]>
OSDL http://developer.osdl.org/~shemminger
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html