Acked-by: Jon Maloy <jon.ma...@ericsson.com>

Remember to change the prefix to 'net-next' and send it when net-next re-opens.

///jon

> -----Original Message-----
> From: Tuong Lien <tuong.t.l...@dektech.com.au>
> Sent: 18-Jul-19 23:57
> To: tipc-discussion@lists.sourceforge.net; Jon Maloy
> <jon.ma...@ericsson.com>; ma...@donjonn.com; ying....@windriver.com
> Subject: [net] tipc: fix false detection of retransmit failures
> 
> This commit eliminates the use of the link 'stale_limit' & 'prev_from'
> (besides the already removed - 'stale_cnt') variables in the detection of
> repeated retransmit failures as there is no proper way to initialize them to
> avoid a false detection, i.e. it is not really a retransmission failure but 
> due to a
> garbage values in the variables.
> 
> Instead, a jiffies variable will be added to individual skbs (like the way we
> restrict the skb retransmissions) in order to mark the first skb retransmit 
> time.
> Later on, at the next retransmissions, the timestamp will be checked to see if
> the skb in the link transmq is "too stale", that is, the link tolerance time 
> has
> passed, so that a link reset will be ordered. Note, just checking on the 
> first skb
> in the queue is fine enough since it must be the oldest one.
> A counter is also added to keep track the actual skb retransmissions'
> number for later checking when the failure happens.
> 
> The downside of this approach is that the skb->cb[] buffer is about to be
> exhausted, however it is always able to allocate another memory area and
> keep a reference to it when needed.
> 
> Fixes: 77cf8edbc0e7 ("tipc: simplify stale link failure criteria")
> Reported-by: Hoang Le <hoang.h...@dektech.com.au>
> Signed-off-by: Tuong Lien <tuong.t.l...@dektech.com.au>
> ---
>  net/tipc/link.c | 92 ++++++++++++++++++++++++++++++++---------------------
> ----
>  net/tipc/msg.h  |  8 +++--
>  2 files changed, 57 insertions(+), 43 deletions(-)
> 
> diff --git a/net/tipc/link.c b/net/tipc/link.c index
> 66d3a07bc571..c2c5c53cad22 100644
> --- a/net/tipc/link.c
> +++ b/net/tipc/link.c
> @@ -106,8 +106,6 @@ struct tipc_stats {
>   * @transmitq: queue for sent, non-acked messages
>   * @backlogq: queue for messages waiting to be sent
>   * @snt_nxt: next sequence number to use for outbound messages
> - * @prev_from: sequence number of most previous retransmission request
> - * @stale_limit: time when repeated identical retransmits must force link
> reset
>   * @ackers: # of peers that needs to ack each packet before it can be 
> released
>   * @acked: # last packet acked by a certain peer. Used for broadcast.
>   * @rcv_nxt: next sequence number to expect for inbound messages @@ -
> 164,9 +162,7 @@ struct tipc_link {
>               u16 limit;
>       } backlog[5];
>       u16 snd_nxt;
> -     u16 prev_from;
>       u16 window;
> -     unsigned long stale_limit;
> 
>       /* Reception */
>       u16 rcv_nxt;
> @@ -1044,47 +1040,53 @@ static void tipc_link_advance_backlog(struct
> tipc_link *l,
>   * link_retransmit_failure() - Detect repeated retransmit failures
>   * @l: tipc link sender
>   * @r: tipc link receiver (= l in case of unicast)
> - * @from: seqno of the 1st packet in retransmit request
>   * @rc: returned code
>   *
>   * Return: true if the repeated retransmit failures happens, otherwise
>   * false
>   */
>  static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
> -                                 u16 from, int *rc)
> +                                 int *rc)
>  {
>       struct sk_buff *skb = skb_peek(&l->transmq);
>       struct tipc_msg *hdr;
> 
>       if (!skb)
>               return false;
> -     hdr = buf_msg(skb);
> 
> -     /* Detect repeated retransmit failures on same packet */
> -     if (r->prev_from != from) {
> -             r->prev_from = from;
> -             r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
> -     } else if (time_after(jiffies, r->stale_limit)) {
> -             pr_warn("Retransmission failure on link <%s>\n", l->name);
> -             link_print(l, "State of link ");
> -             pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
> -                     msg_user(hdr), msg_type(hdr), msg_size(hdr),
> -                     msg_errcode(hdr));
> -             pr_info("sqno %u, prev: %x, src: %x\n",
> -                     msg_seqno(hdr), msg_prevnode(hdr),
> msg_orignode(hdr));
> -
> -             trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
> -             trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
> -             trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
> +     if (!TIPC_SKB_CB(skb)->retr_cnt)
> +             return false;
> 
> -             if (link_is_bc_sndlink(l))
> -                     *rc = TIPC_LINK_DOWN_EVT;
> +     if (!time_after(jiffies, TIPC_SKB_CB(skb)->retr_stamp +
> +                     msecs_to_jiffies(r->tolerance)))
> +             return false;
> +
> +     hdr = buf_msg(skb);
> +     if (link_is_bc_sndlink(l) && !less(r->acked, msg_seqno(hdr)))
> +             return false;
> 
> +     pr_warn("Retransmission failure on link <%s>\n", l->name);
> +     link_print(l, "State of link ");
> +     pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n",
> +             msg_user(hdr), msg_type(hdr), msg_size(hdr),
> msg_errcode(hdr));
> +     pr_info("sqno %u, prev: %x, dest: %x\n",
> +             msg_seqno(hdr), msg_prevnode(hdr), msg_destnode(hdr));
> +     pr_info("retr_stamp %d, retr_cnt %d\n",
> +             jiffies_to_msecs(TIPC_SKB_CB(skb)->retr_stamp),
> +             TIPC_SKB_CB(skb)->retr_cnt);
> +
> +     trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
> +     trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
> +     trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
> +
> +     if (link_is_bc_sndlink(l)) {
> +             r->state = LINK_RESET;
> +             *rc = TIPC_LINK_DOWN_EVT;
> +     } else {
>               *rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
> -             return true;
>       }
> 
> -     return false;
> +     return true;
>  }
> 
>  /* tipc_link_bc_retrans() - retransmit zero or more packets @@ -1110,7
> +1112,7 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct 
> tipc_link
> *r,
> 
>       trace_tipc_link_retrans(r, from, to, &l->transmq);
> 
> -     if (link_retransmit_failure(l, r, from, &rc))
> +     if (link_retransmit_failure(l, r, &rc))
>               return rc;
> 
>       skb_queue_walk(&l->transmq, skb) {
> @@ -1119,11 +1121,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l,
> struct tipc_link *r,
>                       continue;
>               if (more(msg_seqno(hdr), to))
>                       break;
> -             if (link_is_bc_sndlink(l)) {
> -                     if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
> -                             continue;
> -                     TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
> -             }
> +
> +             if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
> +                     continue;
> +             TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
>               _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE,
> GFP_ATOMIC);
>               if (!_skb)
>                       return 0;
> @@ -1133,6 +1134,10 @@ static int tipc_link_bc_retrans(struct tipc_link *l,
> struct tipc_link *r,
>               _skb->priority = TC_PRIO_CONTROL;
>               __skb_queue_tail(xmitq, _skb);
>               l->stats.retransmitted++;
> +
> +             /* Increase actual retrans counter & mark first time */
> +             if (!TIPC_SKB_CB(skb)->retr_cnt++)
> +                     TIPC_SKB_CB(skb)->retr_stamp = jiffies;
>       }
>       return 0;
>  }
> @@ -1357,12 +1362,10 @@ static int tipc_link_advance_transmq(struct
> tipc_link *l, u16 acked, u16 gap,
>       struct tipc_msg *hdr;
>       u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
>       u16 ack = l->rcv_nxt - 1;
> +     bool passed = false;
>       u16 seqno, n = 0;
>       int rc = 0;
> 
> -     if (gap && link_retransmit_failure(l, l, acked + 1, &rc))
> -             return rc;
> -
>       skb_queue_walk_safe(&l->transmq, skb, tmp) {
>               seqno = buf_seqno(skb);
> 
> @@ -1372,12 +1375,17 @@ static int tipc_link_advance_transmq(struct
> tipc_link *l, u16 acked, u16 gap,
>                       __skb_unlink(skb, &l->transmq);
>                       kfree_skb(skb);
>               } else if (less_eq(seqno, acked + gap)) {
> -                     /* retransmit skb */
> +                     /* First, check if repeated retrans failures occurs? */
> +                     if (!passed && link_retransmit_failure(l, l, &rc))
> +                             return rc;
> +                     passed = true;
> +
> +                     /* retransmit skb if unrestricted*/
>                       if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
>                               continue;
>                       TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
> -
> -                     _skb = __pskb_copy(skb, MIN_H_SIZE, GFP_ATOMIC);
> +                     _skb = __pskb_copy(skb, LL_MAX_HEADER + MIN_H_SIZE,
> +                                        GFP_ATOMIC);
>                       if (!_skb)
>                               continue;
>                       hdr = buf_msg(_skb);
> @@ -1386,6 +1394,10 @@ static int tipc_link_advance_transmq(struct
> tipc_link *l, u16 acked, u16 gap,
>                       _skb->priority = TC_PRIO_CONTROL;
>                       __skb_queue_tail(xmitq, _skb);
>                       l->stats.retransmitted++;
> +
> +                     /* Increase actual retrans counter & mark first time */
> +                     if (!TIPC_SKB_CB(skb)->retr_cnt++)
> +                             TIPC_SKB_CB(skb)->retr_stamp = jiffies;
>               } else {
>                       /* retry with Gap ACK blocks if any */
>                       if (!ga || n >= ga->gack_cnt)
> @@ -2577,7 +2589,7 @@ int tipc_link_dump(struct tipc_link *l, u16
> dqueues, char *buf)
>       i += scnprintf(buf + i, sz - i, " %x", l->peer_caps);
>       i += scnprintf(buf + i, sz - i, " %u", l->silent_intv_cnt);
>       i += scnprintf(buf + i, sz - i, " %u", l->rst_cnt);
> -     i += scnprintf(buf + i, sz - i, " %u", l->prev_from);
> +     i += scnprintf(buf + i, sz - i, " %u", 0);
>       i += scnprintf(buf + i, sz - i, " %u", 0);
>       i += scnprintf(buf + i, sz - i, " %u", l->acked);
> 
> diff --git a/net/tipc/msg.h b/net/tipc/msg.h index
> da509f0eb9ca..d7ebc9e955f6 100644
> --- a/net/tipc/msg.h
> +++ b/net/tipc/msg.h
> @@ -102,13 +102,15 @@ struct plist;
>  #define TIPC_MEDIA_INFO_OFFSET       5
> 
>  struct tipc_skb_cb {
> -     u32 bytes_read;
> -     u32 orig_member;
>       struct sk_buff *tail;
>       unsigned long nxt_retr;
> -     bool validated;
> +     unsigned long retr_stamp;
> +     u32 bytes_read;
> +     u32 orig_member;
>       u16 chain_imp;
>       u16 ackers;
> +     u16 retr_cnt;
> +     bool validated;
>  };
> 
>  #define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
> --
> 2.13.7



_______________________________________________
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion

Reply via email to