This is not intented to go to mainline, provided just for those
who are interested enough about the algorithm internals during
a test.

Signed-off-by: Ilpo Järvinen <[EMAIL PROTECTED]>
---
 include/linux/snmp.h |   19 +++++++++++++++++++
 net/ipv4/proc.c      |   19 +++++++++++++++++++
 net/ipv4/tcp_input.c |   50 ++++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 86 insertions(+), 2 deletions(-)

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 89f0c2b..fbcd62d 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -214,6 +214,25 @@ enum
        LINUX_MIB_TCPDSACKIGNOREDOLD,           /* TCPSACKIgnoredOld */
        LINUX_MIB_TCPDSACKIGNOREDNOUNDO,        /* TCPSACKIgnoredNoUndo */
        LINUX_MIB_TCPSPURIOUSRTOS,              /* TCPSpuriousRTOs */
+       LINUX_MIB_TCP_SACK0,
+       LINUX_MIB_TCP_SACK1,
+       LINUX_MIB_TCP_SACK2,
+       LINUX_MIB_TCP_SACK3,
+       LINUX_MIB_TCP_SACK4,
+       LINUX_MIB_TCP_WALKEDSKBS,
+       LINUX_MIB_TCP_WALKEDDSACKS,
+       LINUX_MIB_TCP_SKIPPEDSKBS,
+       LINUX_MIB_TCP_NOCACHE,
+       LINUX_MIB_TCP_HEADWALK,
+       LINUX_MIB_TCP_FULLSKIP,
+       LINUX_MIB_TCP_TAILSKIP,
+       LINUX_MIB_TCP_HEADSKIP_TOHIGH,
+       LINUX_MIB_TCP_TAIL_TOHIGH,
+       LINUX_MIB_TCP_HEADSKIP,
+       LINUX_MIB_TCP_NEWSKIP,
+       LINUX_MIB_TCP_FULLWALK,
+       LINUX_MIB_TCP_TAILWALK,
+       LINUX_MIB_TCP_CACHEREMAINING,
        __LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index ce34b28..a5e842d 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -227,6 +227,25 @@ static const struct snmp_mib snmp4_net_list[] = {
        SNMP_MIB_ITEM("TCPDSACKIgnoredOld", LINUX_MIB_TCPDSACKIGNOREDOLD),
        SNMP_MIB_ITEM("TCPDSACKIgnoredNoUndo", LINUX_MIB_TCPDSACKIGNOREDNOUNDO),
        SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS),
+       SNMP_MIB_ITEM("TCP_SACK0", LINUX_MIB_TCP_SACK0),
+       SNMP_MIB_ITEM("TCP_SACK1", LINUX_MIB_TCP_SACK1),
+       SNMP_MIB_ITEM("TCP_SACK2", LINUX_MIB_TCP_SACK2),
+       SNMP_MIB_ITEM("TCP_SACK3", LINUX_MIB_TCP_SACK3),
+       SNMP_MIB_ITEM("TCP_SACK4", LINUX_MIB_TCP_SACK4),
+       SNMP_MIB_ITEM("TCP_WALKEDSKBS", LINUX_MIB_TCP_WALKEDSKBS),
+       SNMP_MIB_ITEM("TCP_WALKEDDSACKS", LINUX_MIB_TCP_WALKEDDSACKS),
+       SNMP_MIB_ITEM("TCP_SKIPPEDSKBS", LINUX_MIB_TCP_SKIPPEDSKBS),
+       SNMP_MIB_ITEM("TCP_NOCACHE", LINUX_MIB_TCP_NOCACHE),
+       SNMP_MIB_ITEM("TCP_FULLWALK", LINUX_MIB_TCP_FULLWALK),
+       SNMP_MIB_ITEM("TCP_HEADWALK", LINUX_MIB_TCP_HEADWALK),
+       SNMP_MIB_ITEM("TCP_TAILWALK", LINUX_MIB_TCP_TAILWALK),
+       SNMP_MIB_ITEM("TCP_FULLSKIP", LINUX_MIB_TCP_FULLSKIP),
+       SNMP_MIB_ITEM("TCP_TAILSKIP", LINUX_MIB_TCP_TAILSKIP),
+       SNMP_MIB_ITEM("TCP_HEADSKIP", LINUX_MIB_TCP_HEADSKIP),
+       SNMP_MIB_ITEM("TCP_HEADSKIP_TOHIGH", LINUX_MIB_TCP_HEADSKIP_TOHIGH),
+       SNMP_MIB_ITEM("TCP_TAIL_TOHIGH", LINUX_MIB_TCP_TAIL_TOHIGH),
+       SNMP_MIB_ITEM("TCP_NEWSKIP", LINUX_MIB_TCP_NEWSKIP),
+       SNMP_MIB_ITEM("TCP_CACHEREMAINING", LINUX_MIB_TCP_CACHEREMAINING),
        SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5833b01..87ab327 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1370,6 +1370,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff 
*skb, struct sock *sk,
                        *flag |= tcp_sacktag_one(skb, tp, reord, dup_sack, 
*fack_count);
 
                *fack_count += tcp_skb_pcount(skb);
+
+               NET_INC_STATS_BH(LINUX_MIB_TCP_WALKEDSKBS);
+               if (dup_sack)
+                       NET_INC_STATS_BH(LINUX_MIB_TCP_WALKEDDSACKS);
        }
        return skb;
 }
@@ -1386,6 +1390,8 @@ static struct sk_buff *tcp_sacktag_skip(struct sk_buff 
*skb, struct sock *sk,
 
                if (before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
                        break;
+
+               NET_INC_STATS_BH(LINUX_MIB_TCP_SKIPPEDSKBS);
        }
        return skb;
 }
@@ -1434,6 +1440,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff 
*ack_skb, u32 prior_snd_
        int fack_count;
        int i, j;
        int first_sack_index;
+       int fullwalk = 1;
 
        if (!tp->sacked_out) {
                if (WARN_ON(tp->fackets_out))
@@ -1523,6 +1530,17 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff 
*ack_skb, u32 prior_snd_
                        cache++;
        }
 
+       switch (used_sacks) {
+               case 0: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK0); break;
+               case 1: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK1); break;
+               case 2: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK2); break;
+               case 3: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK3); break;
+               case 4: NET_INC_STATS_BH(LINUX_MIB_TCP_SACK4); break;
+       }
+
+       if (!tcp_sack_cache_ok(tp, cache))
+               NET_INC_STATS_BH(LINUX_MIB_TCP_NOCACHE);
+
        while (i < used_sacks) {
                u32 start_seq = sp[i].start_seq;
                u32 end_seq = sp[i].end_seq;
@@ -1544,6 +1562,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff 
*ack_skb, u32 prior_snd_
                /* Can skip some work by looking recv_sack_cache? */
                if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
                    after(end_seq, cache->start_seq)) {
+                       int headskip = 0;
+
+                       fullwalk = 0;
 
                        /* Head todo? */
                        if (before(start_seq, cache->start_seq)) {
@@ -1551,12 +1572,18 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff 
*ack_skb, u32 prior_snd_
                                skb = tcp_sacktag_walk(skb, sk, next_dup, 
start_seq,
                                                       cache->start_seq, 
dup_sack,
                                                       &fack_count, &reord, 
&flag);
-                       }
+                               NET_INC_STATS_BH(LINUX_MIB_TCP_HEADWALK);
+                       } else
+                               headskip = 1;
 
                        /* Rest of the block already fully processed? */
                        if (!after(end_seq, cache->end_seq)) {
                                skb = tcp_maybe_skipping_dsack(skb, sk, 
next_dup, cache->end_seq,
                                                               &fack_count, 
&reord, &flag);
+                               if (headskip)
+                                       
NET_INC_STATS_BH(LINUX_MIB_TCP_FULLSKIP);
+                               else
+                                       
NET_INC_STATS_BH(LINUX_MIB_TCP_TAILSKIP);
                                goto advance_sp;
                        }
 
@@ -1571,24 +1598,37 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff 
*ack_skb, u32 prior_snd_
                                skb = tcp_write_queue_next(sk, 
tp->highest_sack);
                                fack_count = tp->fackets_out;
                                cache++;
+
+                               if (headskip)
+                                       
NET_INC_STATS_BH(LINUX_MIB_TCP_HEADSKIP_TOHIGH);
+                               else
+                                       
NET_INC_STATS_BH(LINUX_MIB_TCP_TAIL_TOHIGH);
                                goto walk;
                        }
 
                        skb = tcp_sacktag_skip(skb, sk, cache->end_seq);
                        /* Check overlap against next cached too (past this one 
already) */
                        cache++;
+
+                       if (headskip)
+                               NET_INC_STATS_BH(LINUX_MIB_TCP_HEADSKIP);
                        continue;
                }
 
                if (!before(start_seq, tcp_highest_sack_seq(tp))) {
                        skb = tcp_write_queue_next(sk, tp->highest_sack);
                        fack_count = tp->fackets_out;
+                       NET_INC_STATS_BH(LINUX_MIB_TCP_NEWSKIP);
                }
                skb = tcp_sacktag_skip(skb, sk, start_seq);
 
 walk:
                skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
                                       dup_sack, &fack_count, &reord, &flag);
+               if (fullwalk)
+                       NET_INC_STATS_BH(LINUX_MIB_TCP_FULLWALK);
+               else
+                       NET_INC_STATS_BH(LINUX_MIB_TCP_TAILWALK);
 
 advance_sp:
                /* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
@@ -1598,15 +1638,21 @@ advance_sp:
                        flag &= ~FLAG_ONLY_ORIG_SACKED;
 
                i++;
+               fullwalk = 1;
        }
 
+       if (tcp_sack_cache_ok(tp, cache))
+               NET_INC_STATS_BH(LINUX_MIB_TCP_CACHEREMAINING);
+
        /* Clear the head of the cache sack blocks so we can skip it next time 
*/
        for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
                tp->recv_sack_cache[i].start_seq = 0;
                tp->recv_sack_cache[i].end_seq = 0;
        }
-       for (j = 0; j < used_sacks; j++)
+       for (j = 0; j < used_sacks; j++) {
+               WARN_ON(i >= ARRAY_SIZE(tp->recv_sack_cache));
                tp->recv_sack_cache[i++] = sp[j];
+       }
 
        flag |= tcp_mark_lost_retrans(sk);
 
-- 
1.5.0.6

-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to