tree e96a9edb2df5b06e8ba59f0f8f685b0c1f09f840
parent 97077c4a9868fce8ac151512cde5d24fc1144f24
author Herbert Xu <[EMAIL PROTECTED]> Thu, 18 Aug 2005 02:03:59 -0700
committer David S. Miller <[EMAIL PROTECTED]> Thu, 18 Aug 2005 02:03:59 -0700

[TCP]: Fix bug #5070: kernel BUG at net/ipv4/tcp_output.c:864

1) We send out a normal sized packet with TSO on to start off.
2) ICMP is received indicating a smaller MTU.
3) We send the current sk_send_head which needs to be fragmented
since it was created before the ICMP event.  The first fragment
is then sent out.

At this point the remaining fragment is allocated by tcp_fragment.
However, its size is padded to fit the L1 cache-line size therefore
creating tail-room up to 124 bytes long.

This fragment will also be sitting at sk_send_head.

4) tcp_sendmsg is called again and it stores data in the tail-room of
of the fragment.
5) tcp_push_one is called by tcp_sendmsg which then calls tso_fragment
since the packet as a whole exceeds the MTU.

At this point we have a packet that has data in the head area being
fed to tso_fragment which bombs out.

My take on this is that we shouldn't ever call tcp_fragment on a TSO
socket for a packet that is yet to be transmitted since this creates
a packet on sk_send_head that cannot be extended.

So here is a patch to change it so that tso_fragment is always used
in this case.

Signed-off-by: Herbert Xu <[EMAIL PROTECTED]>
Signed-off-by: David S. Miller <[EMAIL PROTECTED]>

 net/ipv4/tcp_output.c |   39 ++++++++++++++++++++-------------------
 1 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -861,7 +861,8 @@ static int tso_fragment(struct sock *sk,
        u16 flags;
 
        /* All of a TSO frame must be composed of paged data.  */
-       BUG_ON(skb->len != skb->data_len);
+       if (skb->len != skb->data_len)
+               return tcp_fragment(sk, skb, len, mss_now);
 
        buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
        if (unlikely(buff == NULL))
@@ -974,6 +975,8 @@ static int tcp_write_xmit(struct sock *s
 
        sent_pkts = 0;
        while ((skb = sk->sk_send_head)) {
+               unsigned int limit;
+
                tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
                BUG_ON(!tso_segs);
 
@@ -994,9 +997,10 @@ static int tcp_write_xmit(struct sock *s
                                break;
                }
 
+               limit = mss_now;
                if (tso_segs > 1) {
-                       u32 limit = tcp_window_allows(tp, skb,
-                                                     mss_now, cwnd_quota);
+                       limit = tcp_window_allows(tp, skb,
+                                                 mss_now, cwnd_quota);
 
                        if (skb->len < limit) {
                                unsigned int trim = skb->len % mss_now;
@@ -1004,15 +1008,12 @@ static int tcp_write_xmit(struct sock *s
                                if (trim)
                                        limit = skb->len - trim;
                        }
-                       if (skb->len > limit) {
-                               if (tso_fragment(sk, skb, limit, mss_now))
-                                       break;
-                       }
-               } else if (unlikely(skb->len > mss_now)) {
-                       if (unlikely(tcp_fragment(sk, skb,  mss_now, mss_now)))
-                               break;
                }
 
+               if (skb->len > limit &&
+                   unlikely(tso_fragment(sk, skb, limit, mss_now)))
+                       break;
+
                TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
                if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
@@ -1064,11 +1065,14 @@ void tcp_push_one(struct sock *sk, unsig
        cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
 
        if (likely(cwnd_quota)) {
+               unsigned int limit;
+
                BUG_ON(!tso_segs);
 
+               limit = mss_now;
                if (tso_segs > 1) {
-                       u32 limit = tcp_window_allows(tp, skb,
-                                                     mss_now, cwnd_quota);
+                       limit = tcp_window_allows(tp, skb,
+                                                 mss_now, cwnd_quota);
 
                        if (skb->len < limit) {
                                unsigned int trim = skb->len % mss_now;
@@ -1076,15 +1080,12 @@ void tcp_push_one(struct sock *sk, unsig
                                if (trim)
                                        limit = skb->len - trim;
                        }
-                       if (skb->len > limit) {
-                               if (unlikely(tso_fragment(sk, skb, limit, 
mss_now)))
-                                       return;
-                       }
-               } else if (unlikely(skb->len > mss_now)) {
-                       if (unlikely(tcp_fragment(sk, skb, mss_now, mss_now)))
-                               return;
                }
 
+               if (skb->len > limit &&
+                   unlikely(tso_fragment(sk, skb, limit, mss_now)))
+                       return;
+
                /* Send it out now. */
                TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to