This is not intented to go to mainline, provided just for those who are interested enough about the algorithm internals during a test.
Signed-off-by: Ilpo Järvinen <[EMAIL PROTECTED]> --- include/linux/snmp.h | 20 +++++++++++++++++++ net/ipv4/proc.c | 20 +++++++++++++++++++ net/ipv4/tcp_input.c | 52 +++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 89 insertions(+), 3 deletions(-) diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 89f0c2b..42b8c07 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -214,6 +214,26 @@ enum LINUX_MIB_TCPDSACKIGNOREDOLD, /* TCPSACKIgnoredOld */ LINUX_MIB_TCPDSACKIGNOREDNOUNDO, /* TCPSACKIgnoredNoUndo */ LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */ + LINUX_MIB_TCP_SACKTAG, + LINUX_MIB_TCP_SACK0, + LINUX_MIB_TCP_SACK1, + LINUX_MIB_TCP_SACK2, + LINUX_MIB_TCP_SACK3, + LINUX_MIB_TCP_SACK4, + LINUX_MIB_TCP_WALKEDSKBS, + LINUX_MIB_TCP_WALKEDDSACKS, + LINUX_MIB_TCP_SKIPPEDSKBS, + LINUX_MIB_TCP_NOCACHE, + LINUX_MIB_TCP_FULLWALK, + LINUX_MIB_TCP_HEADWALK, + LINUX_MIB_TCP_TAILWALK, + LINUX_MIB_TCP_FULLSKIP, + LINUX_MIB_TCP_TAILSKIP, + LINUX_MIB_TCP_HEADSKIP, + LINUX_MIB_TCP_FULLSKIP_TOHIGH, + LINUX_MIB_TCP_TAILSKIP_TOHIGH, + LINUX_MIB_TCP_NEWSKIP, + LINUX_MIB_TCP_CACHEREMAINING, __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index e5b05b0..9909178 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -246,6 +246,26 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD), SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO), SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), + SNMP_MIB_ITEM("TCP_SACKTAG", LINUX_MIB_TCP_SACKTAG), + SNMP_MIB_ITEM("TCP_SACK0", LINUX_MIB_TCP_SACK0), + SNMP_MIB_ITEM("TCP_SACK1", LINUX_MIB_TCP_SACK1), + SNMP_MIB_ITEM("TCP_SACK2", LINUX_MIB_TCP_SACK2), + SNMP_MIB_ITEM("TCP_SACK3", LINUX_MIB_TCP_SACK3), + SNMP_MIB_ITEM("TCP_SACK4", LINUX_MIB_TCP_SACK4), + SNMP_MIB_ITEM("TCP_WALKEDSKBS", LINUX_MIB_TCP_WALKEDSKBS), + SNMP_MIB_ITEM("TCP_WALKEDDSACKS", LINUX_MIB_TCP_WALKEDDSACKS), + SNMP_MIB_ITEM("TCP_SKIPPEDSKBS", LINUX_MIB_TCP_SKIPPEDSKBS), + SNMP_MIB_ITEM("TCP_NOCACHE", LINUX_MIB_TCP_NOCACHE), + SNMP_MIB_ITEM("TCP_FULLWALK", LINUX_MIB_TCP_FULLWALK), + SNMP_MIB_ITEM("TCP_HEADWALK", LINUX_MIB_TCP_HEADWALK), + SNMP_MIB_ITEM("TCP_TAILWALK", LINUX_MIB_TCP_TAILWALK), + SNMP_MIB_ITEM("TCP_FULLSKIP", LINUX_MIB_TCP_FULLSKIP), + SNMP_MIB_ITEM("TCP_TAILSKIP", LINUX_MIB_TCP_TAILSKIP), + SNMP_MIB_ITEM("TCP_HEADSKIP", LINUX_MIB_TCP_HEADSKIP), + SNMP_MIB_ITEM("TCP_FULLSKIP_TOHIGH", LINUX_MIB_TCP_FULLSKIP_TOHIGH), + SNMP_MIB_ITEM("TCP_TAILSKIP_TOHIGH", LINUX_MIB_TCP_TAILSKIP_TOHIGH), + SNMP_MIB_ITEM("TCP_NEWSKIP", LINUX_MIB_TCP_NEWSKIP), + SNMP_MIB_ITEM("TCP_CACHEREMAINING", LINUX_MIB_TCP_CACHEREMAINING), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c13f1af..02b34a5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1382,6 +1382,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, tcp_sacktag_one(skb, sk, state, in_sack, dup_sack, state->fack_count, end_seq); + + NET_INC_STATS_BH(LINUX_MIB_TCP_WALKEDSKBS); + if (dup_sack) + NET_INC_STATS_BH(LINUX_MIB_TCP_WALKEDDSACKS); } return skb; } @@ -1405,6 +1409,7 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, skb = tcp_sacktag_walk(skb, sk, state, state->dup_start, state->dup_end, 1); } + NET_INC_STATS_BH(LINUX_MIB_TCP_SKIPPEDSKBS); } return skb; } @@ -1535,9 +1540,22 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ cache++; } + NET_INC_STATS_BH(LINUX_MIB_TCP_SACKTAG); + switch (used_sacks) { + case 0: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK0); break; + case 1: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK1); break; + case 2: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK2); break; + case 3: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK3); break; + case 4: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK4); break; + } + + if (!tcp_sack_cache_ok(tp, cache)) + NET_INC_STATS_BH(LINUX_MIB_TCP_NOCACHE); + while (i < used_sacks) { u32 start_seq = sp[i].start_seq; u32 end_seq = sp[i].end_seq; + int fullwalk = 0; /* Event "B" in the comment above. */ if (after(end_seq, tp->high_seq)) @@ -1550,41 +1568,69 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (tcp_sack_cache_ok(tp, cache)) { if (after(end_seq, cache->start_seq)) { + int headskip = 0; + if (before(start_seq, cache->start_seq)) { skb = tcp_sacktag_skip(skb, sk, &state, start_seq); skb = tcp_sacktag_walk(skb, sk, &state, start_seq, cache->start_seq, 0); - } + NET_INC_STATS_BH(LINUX_MIB_TCP_HEADWALK); + } else + headskip = 1; + /* Rest of the block already fully processed? */ if (!after(end_seq, cache->end_seq)) { i++; + if (headskip) + NET_INC_STATS_BH(LINUX_MIB_TCP_FULLSKIP); + else + NET_INC_STATS_BH(LINUX_MIB_TCP_TAILSKIP); continue; } + if (TCP_SKB_CB(tp->highest_sack)->end_seq != cache->end_seq) { skb = tcp_sacktag_skip(skb, sk, &state, cache->end_seq); cache++; + if (headskip) + NET_INC_STATS_BH(LINUX_MIB_TCP_HEADSKIP); continue; } skb = tcp_sacktag_skip_to_highsack(skb, sk, &state, cache); - } + if (headskip) + NET_INC_STATS_BH(LINUX_MIB_TCP_FULLSKIP_TOHIGH); + else + NET_INC_STATS_BH(LINUX_MIB_TCP_TAILSKIP_TOHIGH); + } else + fullwalk = 1; } else if (!before(start_seq, tcp_highest_sack_seq(sk)) && before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(sk))) { skb = tcp_write_queue_next(sk, tp->highest_sack); state.fack_count = tp->fackets_out; + NET_INC_STATS_BH(LINUX_MIB_TCP_NEWSKIP); + fullwalk = 1; } skb = tcp_sacktag_skip(skb, sk, &state, start_seq); skb = tcp_sacktag_walk(skb, sk, &state, start_seq, end_seq, 0); + if (fullwalk) + NET_INC_STATS_BH(LINUX_MIB_TCP_FULLWALK); + else + NET_INC_STATS_BH(LINUX_MIB_TCP_TAILWALK); i++; } + if (tcp_sack_cache_ok(tp, cache)) + NET_INC_STATS_BH(LINUX_MIB_TCP_CACHEREMAINING); + /* Clear the head of the cache sack blocks so we can skip it next time */ for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) { tp->recv_sack_cache[i].start_seq = 0; tp->recv_sack_cache[i].end_seq = 0; } - for (j = 0; j < used_sacks; j++) + for (j = 0; j < used_sacks; j++) { + WARN_ON(i >= ARRAY_SIZE(tp->recv_sack_cache)); tp->recv_sack_cache[i++] = sp[j]; + } if (tp->retrans_out && after(state.highest_sack_end_seq, tp->lost_retrans_low) && -- 1.5.0.6 - To unsubscribe from this list: send the line "unsubscribe netdev" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html