On Wed, 2010-05-19 at 11:01 -0700, Joe Eykholt wrote:
> Vasu Dev wrote:
> > From: Joe Eykholt <[email protected]>
> > 
> > This patch has several improvements to the code in
> > the fip timers.
> > 
> > The current code allows an advertisement to be used
> > even if it has been 3 times the FCF keep-alive
> > advertisement period (FKA) since one was received from
> > that FCF.  The spec. calls for 2.5 times FKA.
> > 
> > Fix this and make sure we detect missed keep-alives promptly.
> > 
> > Signed-off-by: Joe Eykholt <[email protected]>
> > 
> > --
> > Use fka_period as periodic timeouts to age out fcf if
> > keep alives are disabled due to fd_flags set and also
> > stop updating keep alive values in that case.
> > 
> > Update select fcf time only if fcf is not already selected or
> > select time is not already determined from parse adv, and then
> > have select time cleared only once after fcf is selected.
> > 
> > Changed deadline check to time_after_eq() from time_after()
> > since now next timeout will be on exact 2.5 times FKA followed
> > by first advertisement.
> > 
> > Signed-off-by: Vasu Dev <[email protected]>
> > ---
> > 
> >  drivers/scsi/fcoe/libfcoe.c |   89 
> > ++++++++++++++++++++++++-------------------
> >  1 files changed, 50 insertions(+), 39 deletions(-)
> > 
> > diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
> > index 1d5b949..6ac1f0e 100644
> > --- a/drivers/scsi/fcoe/libfcoe.c
> > +++ b/drivers/scsi/fcoe/libfcoe.c
> > @@ -557,38 +557,48 @@ EXPORT_SYMBOL(fcoe_ctlr_els_send);
> >   *
> >   * Called with lock held and preemption disabled.
> >   *
> > - * An FCF is considered old if we have missed three advertisements.
> > - * That is, there have been no valid advertisement from it for three
> > - * times its keep-alive period including fuzz.
> > + * An FCF is considered old if we have missed two advertisements.
> > + * That is, there have been no valid advertisement from it for 2.5
> > + * times its keep-alive period.
> >   *
> >   * In addition, determine the time when an FCF selection can occur.
> >   *
> >   * Also, increment the MissDiscAdvCount when no advertisement is received
> >   * for the corresponding FCF for 1.5 * FKA_ADV_PERIOD (FC-BB-5 LESB).
> > + *
> > + * Returns the time in jiffies for the next call.
> >   */
> > -static void fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
> > +static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
> >  {
> >     struct fcoe_fcf *fcf;
> >     struct fcoe_fcf *next;
> > +   unsigned long next_timer = jiffies + msecs_to_jiffies(FIP_DEF_FKA);
> > +   unsigned long deadline;
> >     unsigned long sel_time = 0;
> > -   unsigned long mda_time = 0;
> >     struct fcoe_dev_stats *stats;
> >  
> >     list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
> > -           mda_time = fcf->fka_period + (fcf->fka_period >> 1);
> > -           if ((fip->sel_fcf == fcf) &&
> > -               (time_after(jiffies, fcf->time + mda_time))) {
> > -                   mod_timer(&fip->timer, jiffies + mda_time);
> > -                   stats = per_cpu_ptr(fip->lp->dev_stats,
> > -                                       smp_processor_id());
> > -                   stats->MissDiscAdvCount++;
> > -                   printk(KERN_INFO "libfcoe: host%d: Missing Discovery "
> > -                          "Advertisement for fab %16.16llx count %lld\n",
> > -                          fip->lp->host->host_no, fcf->fabric_name,
> > -                          stats->MissDiscAdvCount);
> 
> 
> > +           deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
> > +           if (fip->sel_fcf == fcf) {
> > +                   if (fcf->fd_flags &&
> > +                       time_after(next_timer, jiffies + fcf->fka_period))
> > +                           next_timer = jiffies + fcf->fka_period;
> 
> The fd_flags should be in the outer if, since that means we shouldn't
> be looking for missing advertisements at all.  

Spec only recommends to not send KA and okay with verifying
advertisements per sec 7.8.6.3.13, therefore I think admin should know
any missed adv on at least connected fcf, therefore I added periodic
fcf->fka_period for only connected fcf when fd_flag is set but only if
this time happens to be sooner than next_timer in the loop.

> Otherwise we'll set
> next_timer to the deadline.
> 

FCF age out is needed on deadline ir-respective of fd_flag value, so
moving to as suggested below won't age out fcf with fd_flag set.
I verified fcf does get de-selected on 2.5 KA missed adv after two
missed adv prints w/ this patch while fd_flag set and I think should be
kept this way.

>               if (fip->sel_fcf == fcf && !fcf->fd_flags) {
>                       if (time_after ...
> 
> > +
> > +                   if (time_after(jiffies, deadline)) {
> > +                           stats = per_cpu_ptr(fip->lp->dev_stats,
> > +                                               smp_processor_id());
> > +                           stats->MissDiscAdvCount++;
> > +                           printk(KERN_INFO "libfcoe: host%d: "
> > +                                  "Missing Discovery Advertisement "
> > +                                  "for fab %16.16llx count %lld\n",
> > +                                  fip->lp->host->host_no, fcf->fabric_name,
> > +                                  stats->MissDiscAdvCount);
> > +                   } else if (time_after(next_timer, deadline))
> > +                           next_timer = deadline;
> >             }
> > -           if (time_after(jiffies, fcf->time + fcf->fka_period * 3 +
> > -                          msecs_to_jiffies(FIP_FCF_FUZZ * 3))) {
> > +
> > +           deadline += fcf->fka_period;
> > +           if (time_after_eq(jiffies, deadline)) {
> 
> This is a pre-existing bug, but the above also should check
> fcf->fd_flags around both the if and else so
> 
>               if (!fcf->fd_flags) {

This check will prevent aging  out fcf with fd_flag set and we don't
want that as I said above.

>                       if (time_after_eq(...)) {
>                               ...
>                               delete the fcf
> 
>                               continue; // so we don't check sel_time on this 
> one.
>                       }
>                       if (time_after(next_timer, deadline))
>                               next_timer = deadline;
>               }
>               if (fcoe_ctlr_mtu_valid(fcf) && ...
>       
> This rearranges things a bit.  I hope you see what I mean.
> Let me know if you want to split that out into a separate
> patch or want me to update this patch.
> 

I'm fine whatever approach you decide for any more changes.

> >                     if (fip->sel_fcf == fcf)
> >                             fip->sel_fcf = NULL;
> >                     list_del(&fcf->list);
> > @@ -598,19 +608,20 @@ static void fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
> >                     stats = per_cpu_ptr(fip->lp->dev_stats,
> >                                         smp_processor_id());
> >                     stats->VLinkFailureCount++;
> > -           } else if (fcoe_ctlr_mtu_valid(fcf) &&
> > -                      (!sel_time || time_before(sel_time, fcf->time))) {
> > -                   sel_time = fcf->time;
> > +           } else {
> > +                   if (time_after(next_timer, deadline))
> > +                           next_timer = deadline;
> > +                   if (fcoe_ctlr_mtu_valid(fcf) &&
> > +                       (!sel_time || time_before(sel_time, fcf->time)))
> > +                           sel_time = fcf->time;
> >             }
> >     }
> > -   if (sel_time) {
> > +   if (!(fip->sel_fcf || fip->sel_time)) {
> 
> Shouldn't this be:
>       if (sel_time && !fip->sel_fcf) {
> 

Yes, it should be this way so that initiator could switch quickly to new
fcf in case there is already other fcf is in the list, I can update
patch for this if you agree w/ rest of the changes, let me know.

> sel_time non-zero indicates that one of the FCFs in the loop
> 
> >             sel_time += msecs_to_jiffies(FCOE_CTLR_START_DELAY);
> >             fip->sel_time = sel_time;
> > -           if (time_before(sel_time, fip->timer.expires))
> > -                   mod_timer(&fip->timer, sel_time);
> > -   } else {
> > -           fip->sel_time = 0;
> >     }
> > +
> > +   return next_timer;
> >  }
> >  
> >  /**
> > @@ -767,7 +778,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, 
> > struct sk_buff *skb)
> >              * ignored after a usable solicited advertisement
> >              * has been received.
> >              */
> > -           if (fcf == fip->sel_fcf) {
> > +           if (fcf == fip->sel_fcf && !fcf->fd_flags) {
> >                     fip->ctlr_ka_time -= fcf->fka_period;
> >                     fip->ctlr_ka_time += new.fka_period;
> >                     if (time_before(fip->ctlr_ka_time, fip->timer.expires))
> > @@ -805,7 +816,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, 
> > struct sk_buff *skb)
> >      * If this is the first validated FCF, note the time and
> >      * set a timer to trigger selection.
> >      */
> > -   if (mtu_valid && !fip->sel_time && fcoe_ctlr_fcf_usable(fcf)) {
> > +   if (mtu_valid && !fip->sel_fcf && fcoe_ctlr_fcf_usable(fcf)) {
> >             fip->sel_time = jiffies +
> >                     msecs_to_jiffies(FCOE_CTLR_START_DELAY);
> >             if (!timer_pending(&fip->timer) ||
> > @@ -1148,7 +1159,7 @@ static void fcoe_ctlr_timeout(unsigned long arg)
> >     struct fcoe_ctlr *fip = (struct fcoe_ctlr *)arg;
> >     struct fcoe_fcf *sel;
> >     struct fcoe_fcf *fcf;
> > -   unsigned long next_timer = jiffies + msecs_to_jiffies(FIP_VN_KA_PERIOD);
> > +   unsigned long next_timer;
> >  
> >     spin_lock_bh(&fip->lock);
> >     if (fip->state == FIP_ST_DISABLED) {
> > @@ -1157,13 +1168,16 @@ static void fcoe_ctlr_timeout(unsigned long arg)
> >     }
> >  
> >     fcf = fip->sel_fcf;
> > -   fcoe_ctlr_age_fcfs(fip);
> > +   next_timer = fcoe_ctlr_age_fcfs(fip);
> >  
> >     sel = fip->sel_fcf;
> > -   if (!sel && fip->sel_time && time_after_eq(jiffies, fip->sel_time)) {
> > -           fcoe_ctlr_select(fip);
> > -           sel = fip->sel_fcf;
> > -           fip->sel_time = 0;
> > +   if (!sel && fip->sel_time) {
> > +           if (time_after_eq(jiffies, fip->sel_time)) {
> > +                   fcoe_ctlr_select(fip);
> > +                   sel = fip->sel_fcf;
> > +                   fip->sel_time = 0;
> > +           } else if (time_after(next_timer, fip->sel_time))
> > +                   next_timer = fip->sel_time;
> >     }
> >  
> >     if (sel != fcf) {
> > @@ -1201,12 +1215,9 @@ static void fcoe_ctlr_timeout(unsigned long arg)
> >             }
> >             if (time_after(next_timer, fip->port_ka_time))
> >                     next_timer = fip->port_ka_time;
> > -           mod_timer(&fip->timer, next_timer);
> > -   } else if (fip->sel_time) {
> > -           next_timer = fip->sel_time +
> > -                   msecs_to_jiffies(FCOE_CTLR_START_DELAY);
> > -           mod_timer(&fip->timer, next_timer);
> >     }
> > +   if (!list_empty(&fip->fcfs))
> > +           mod_timer(&fip->timer, next_timer);
> >     if (fip->send_ctlr_ka || fip->send_port_ka)
> >             schedule_work(&fip->timer_work);
> >     spin_unlock_bh(&fip->lock);
> > 
> > _______________________________________________
> > devel mailing list
> > [email protected]
> > http://www.open-fcoe.org/mailman/listinfo/devel
> 
> _______________________________________________
> devel mailing list
> [email protected]
> http://www.open-fcoe.org/mailman/listinfo/devel

_______________________________________________
devel mailing list
[email protected]
http://www.open-fcoe.org/mailman/listinfo/devel

Reply via email to