I've been wating for this issue is solved.
I will try to test this implementation.
thanks.

On Tue, Dec 08, 2015 at 09:21:48PM +0000, Hiren Panchasara wrote:
> Author: hiren
> Date: Tue Dec  8 21:21:48 2015
> New Revision: 292003
> URL: https://svnweb.freebsd.org/changeset/base/292003
> 
> Log:
>   One of the ways to detect loss is to count duplicate acks coming back from 
> the
>   other end till it reaches predetermined threshold which is 3 for us right 
> now.
>   Once that happens, we trigger fast-retransmit to do loss recovery.
>   
>   Main problem with the current implementation is that we don't honor SACK
>   information well to detect whether an incoming ack is a dupack or not. 
> RFC6675
>   has latest recommendations for that. According to it, dupack is a segment 
> that
>   arrives carrying a SACK block that identifies previously unknown information
>   between snd_una and snd_max even if it carries new data, changes the 
> advertised
>   window, or moves the cumulative acknowledgment point.
>   
>   With the prevalence of Selective ACK (SACK) these days, improper handling 
> can
>   lead to delayed loss recovery.
>   
>   With the fix, new behavior looks like following:
>   
>   0) th_ack < snd_una --> ignore
>   Old acks are ignored.
>   1) th_ack == snd_una, !sack_changed --> ignore
>   Acks with SACK enabled but without any new SACK info in them are ignored.
>   2) th_ack == snd_una, window == old_window --> increment
>   Increment on a good dupack.
>   3) th_ack == snd_una, window != old_window, sack_changed --> increment
>   When SACK enabled, it's okay to have advertized window changed if the ack 
> has
>   new SACK info.
>   4) th_ack > snd_una --> reset to 0
>   Reset to 0 when left edge moves.
>   5) th_ack > snd_una, sack_changed --> increment
>   Increment if left edge moves but there is new SACK info.
>   
>   Here, sack_changed is the indicator that incoming ack has previously unknown
>   SACK info in it.
>   
>   Note: This fix is not fully compliant to RFC6675. That may require a few
>   changes to current implementation in order to keep per-sackhole dupack 
> counter
>   and change to the way we mark/handle sack holes.
>   
>   PR:                 203663
>   Reviewed by:                jtl
>   MFC after:          3 weeks
>   Sponsored by:               Limelight Networks
>   Differential Revision:      https://reviews.freebsd.org/D4225
> 
> Modified:
>   head/sys/netinet/tcp_input.c
>   head/sys/netinet/tcp_sack.c
>   head/sys/netinet/tcp_var.h
> 
> Modified: head/sys/netinet/tcp_input.c
> ==============================================================================
> --- head/sys/netinet/tcp_input.c      Tue Dec  8 20:20:40 2015        
> (r292002)
> +++ head/sys/netinet/tcp_input.c      Tue Dec  8 21:21:48 2015        
> (r292003)
> @@ -1481,7 +1481,7 @@ tcp_do_segment(struct mbuf *m, struct tc
>      struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos,
>      int ti_locked)
>  {
> -     int thflags, acked, ourfinisacked, needoutput = 0;
> +     int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
>       int rstreason, todrop, win;
>       u_long tiwin;
>       char *s;
> @@ -1501,6 +1501,7 @@ tcp_do_segment(struct mbuf *m, struct tc
>       thflags = th->th_flags;
>       inc = &tp->t_inpcb->inp_inc;
>       tp->sackhint.last_sack_ack = 0;
> +     sack_changed = 0;
>  
>       /*
>        * If this is either a state-changing packet or current state isn't
> @@ -2424,7 +2425,7 @@ tcp_do_segment(struct mbuf *m, struct tc
>               if ((tp->t_flags & TF_SACK_PERMIT) &&
>                   ((to.to_flags & TOF_SACK) ||
>                    !TAILQ_EMPTY(&tp->snd_holes)))
> -                     tcp_sack_doack(tp, &to, th->th_ack);
> +                     sack_changed = tcp_sack_doack(tp, &to, th->th_ack);
>               else
>                       /*
>                        * Reset the value so that previous (valid) value
> @@ -2436,7 +2437,9 @@ tcp_do_segment(struct mbuf *m, struct tc
>               hhook_run_tcp_est_in(tp, th, &to);
>  
>               if (SEQ_LEQ(th->th_ack, tp->snd_una)) {
> -                     if (tlen == 0 && tiwin == tp->snd_wnd) {
> +                     if (tlen == 0 &&
> +                         (tiwin == tp->snd_wnd ||
> +                         (tp->t_flags & TF_SACK_PERMIT))) {
>                               /*
>                                * If this is the first time we've seen a
>                                * FIN from the remote, this is not a
> @@ -2478,8 +2481,20 @@ tcp_do_segment(struct mbuf *m, struct tc
>                                * When using TCP ECN, notify the peer that
>                                * we reduced the cwnd.
>                                */
> -                             if (!tcp_timer_active(tp, TT_REXMT) ||
> -                                 th->th_ack != tp->snd_una)
> +                             /*
> +                              * Following 2 kinds of acks should not affect
> +                              * dupack counting:
> +                              * 1) Old acks
> +                              * 2) Acks with SACK but without any new SACK
> +                              * information in them. These could result from
> +                              * any anomaly in the network like a switch
> +                              * duplicating packets or a possible DoS attack.
> +                              */
> +                             if (th->th_ack != tp->snd_una ||
> +                                 ((tp->t_flags & TF_SACK_PERMIT) &&
> +                                 !sack_changed))
> +                                     break;
> +                             else if (!tcp_timer_active(tp, TT_REXMT))
>                                       tp->t_dupacks = 0;
>                               else if (++tp->t_dupacks > tcprexmtthresh ||
>                                    IN_FASTRECOVERY(tp->t_flags)) {
> @@ -2608,9 +2623,20 @@ tcp_do_segment(struct mbuf *m, struct tc
>                                       tp->snd_cwnd = oldcwnd;
>                                       goto drop;
>                               }
> -                     } else
> -                             tp->t_dupacks = 0;
> +                     }
>                       break;
> +             } else {
> +                     /*
> +                      * This ack is advancing the left edge, reset the
> +                      * counter.
> +                      */
> +                     tp->t_dupacks = 0;
> +                     /*
> +                      * If this ack also has new SACK info, increment the
> +                      * counter as per rfc6675.
> +                      */
> +                     if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed)
> +                             tp->t_dupacks++;
>               }
>  
>               KASSERT(SEQ_GT(th->th_ack, tp->snd_una),
> @@ -2629,7 +2655,6 @@ tcp_do_segment(struct mbuf *m, struct tc
>                       } else
>                               cc_post_recovery(tp, th);
>               }
> -             tp->t_dupacks = 0;
>               /*
>                * If we reach this point, ACK is not a duplicate,
>                *     i.e., it ACKs something we sent.
> 
> Modified: head/sys/netinet/tcp_sack.c
> ==============================================================================
> --- head/sys/netinet/tcp_sack.c       Tue Dec  8 20:20:40 2015        
> (r292002)
> +++ head/sys/netinet/tcp_sack.c       Tue Dec  8 21:21:48 2015        
> (r292003)
> @@ -345,17 +345,22 @@ tcp_sackhole_remove(struct tcpcb *tp, st
>   * Process cumulative ACK and the TCP SACK option to update the scoreboard.
>   * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of
>   * the sequence space).
> + * Returns 1 if incoming ACK has previously unknown SACK information,
> + * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any 
> changes
> + * to that (i.e. left edge moving) would also be considered a change in SACK
> + * information which is slightly different than rfc6675.
>   */
> -void
> +int
>  tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack)
>  {
>       struct sackhole *cur, *temp;
>       struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp;
> -     int i, j, num_sack_blks;
> +     int i, j, num_sack_blks, sack_changed;
>  
>       INP_WLOCK_ASSERT(tp->t_inpcb);
>  
>       num_sack_blks = 0;
> +     sack_changed = 0;
>       /*
>        * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist,
>        * treat [SND.UNA, SEG.ACK) as if it is a SACK block.
> @@ -392,7 +397,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
>        * received.
>        */
>       if (num_sack_blks == 0)
> -             return;
> +             return (sack_changed);
>  
>       /*
>        * Sort the SACK blocks so we can update the scoreboard with just one
> @@ -443,6 +448,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
>                       tp->snd_fack = sblkp->end;
>                       /* Go to the previous sack block. */
>                       sblkp--;
> +                     sack_changed = 1;
>               } else {
>                       /* 
>                        * We failed to add a new hole based on the current 
> @@ -459,9 +465,11 @@ tcp_sack_doack(struct tcpcb *tp, struct 
>                           SEQ_LT(tp->snd_fack, sblkp->end))
>                               tp->snd_fack = sblkp->end;
>               }
> -     } else if (SEQ_LT(tp->snd_fack, sblkp->end))
> +     } else if (SEQ_LT(tp->snd_fack, sblkp->end)) {
>               /* fack is advanced. */
>               tp->snd_fack = sblkp->end;
> +             sack_changed = 1;
> +     }
>       /* We must have at least one SACK hole in scoreboard. */
>       KASSERT(!TAILQ_EMPTY(&tp->snd_holes),
>           ("SACK scoreboard must not be empty"));
> @@ -490,6 +498,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
>               tp->sackhint.sack_bytes_rexmit -= (cur->rxmit - cur->start);
>               KASSERT(tp->sackhint.sack_bytes_rexmit >= 0,
>                   ("sackhint bytes rtx >= 0"));
> +             sack_changed = 1;
>               if (SEQ_LEQ(sblkp->start, cur->start)) {
>                       /* Data acks at least the beginning of hole. */
>                       if (SEQ_GEQ(sblkp->end, cur->end)) {
> @@ -545,6 +554,7 @@ tcp_sack_doack(struct tcpcb *tp, struct 
>               else
>                       sblkp--;
>       }
> +     return (sack_changed);
>  }
>  
>  /*
> 
> Modified: head/sys/netinet/tcp_var.h
> ==============================================================================
> --- head/sys/netinet/tcp_var.h        Tue Dec  8 20:20:40 2015        
> (r292002)
> +++ head/sys/netinet/tcp_var.h        Tue Dec  8 21:21:48 2015        
> (r292003)
> @@ -741,7 +741,7 @@ void       tcp_hc_update(struct in_conninfo *
>  extern       struct pr_usrreqs tcp_usrreqs;
>  tcp_seq tcp_new_isn(struct tcpcb *);
>  
> -void  tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
> +int   tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
>  void  tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq 
> rcv_lastend);
>  void  tcp_clean_sackreport(struct tcpcb *tp);
>  void  tcp_sack_adjust(struct tcpcb *tp);
> _______________________________________________
> svn-src-h...@freebsd.org mailing list
> https://lists.freebsd.org/mailman/listinfo/svn-src-head
> To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to