On Thu, 2010-05-20 at 12:02 -0700, Joe Eykholt wrote:
> Vasu Dev wrote:
> > On Wed, 2010-05-19 at 11:01 -0700, Joe Eykholt wrote:
> >> Vasu Dev wrote:
> >>> From: Joe Eykholt <[email protected]>
> >>>
> >>> This patch has several improvements to the code in
> >>> the fip timers.
> >>>
> >>> The current code allows an advertisement to be used
> >>> even if it has been 3 times the FCF keep-alive
> >>> advertisement period (FKA) since one was received from
> >>> that FCF.  The spec. calls for 2.5 times FKA.
> >>>
> >>> Fix this and make sure we detect missed keep-alives promptly.
> >>>
> >>> Signed-off-by: Joe Eykholt <[email protected]>
> >>>
> >>> --
> >>> Use fka_period as periodic timeouts to age out fcf if
> >>> keep alives are disabled due to fd_flags set and also
> >>> stop updating keep alive values in that case.
> >>>
> >>> Update select fcf time only if fcf is not already selected or
> >>> select time is not already determined from parse adv, and then
> >>> have select time cleared only once after fcf is selected.
> >>>
> >>> Changed deadline check to time_after_eq() from time_after()
> >>> since now next timeout will be on exact 2.5 times FKA followed
> >>> by first advertisement.
> >>>
> >>> Signed-off-by: Vasu Dev <[email protected]>
> >>> ---
> >>>
> >>>  drivers/scsi/fcoe/libfcoe.c |   89 
> >>> ++++++++++++++++++++++++-------------------
> >>>  1 files changed, 50 insertions(+), 39 deletions(-)
> >>>
> >>> diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
> >>> index 1d5b949..6ac1f0e 100644
> >>> --- a/drivers/scsi/fcoe/libfcoe.c
> >>> +++ b/drivers/scsi/fcoe/libfcoe.c
> >>> @@ -557,38 +557,48 @@ EXPORT_SYMBOL(fcoe_ctlr_els_send);
> >>>   *
> >>>   * Called with lock held and preemption disabled.
> >>>   *
> >>> - * An FCF is considered old if we have missed three advertisements.
> >>> - * That is, there have been no valid advertisement from it for three
> >>> - * times its keep-alive period including fuzz.
> >>> + * An FCF is considered old if we have missed two advertisements.
> >>> + * That is, there have been no valid advertisement from it for 2.5
> >>> + * times its keep-alive period.
> >>>   *
> >>>   * In addition, determine the time when an FCF selection can occur.
> >>>   *
> >>>   * Also, increment the MissDiscAdvCount when no advertisement is received
> >>>   * for the corresponding FCF for 1.5 * FKA_ADV_PERIOD (FC-BB-5 LESB).
> >>> + *
> >>> + * Returns the time in jiffies for the next call.
> >>>   */
> >>> -static void fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
> >>> +static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
> >>>  {
> >>>   struct fcoe_fcf *fcf;
> >>>   struct fcoe_fcf *next;
> >>> + unsigned long next_timer = jiffies + msecs_to_jiffies(FIP_DEF_FKA);
> >>> + unsigned long deadline;
> >>>   unsigned long sel_time = 0;
> >>> - unsigned long mda_time = 0;
> >>>   struct fcoe_dev_stats *stats;
> >>>  
> >>>   list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
> >>> -         mda_time = fcf->fka_period + (fcf->fka_period >> 1);
> >>> -         if ((fip->sel_fcf == fcf) &&
> >>> -             (time_after(jiffies, fcf->time + mda_time))) {
> >>> -                 mod_timer(&fip->timer, jiffies + mda_time);
> >>> -                 stats = per_cpu_ptr(fip->lp->dev_stats,
> >>> -                                     smp_processor_id());
> >>> -                 stats->MissDiscAdvCount++;
> >>> -                 printk(KERN_INFO "libfcoe: host%d: Missing Discovery "
> >>> -                        "Advertisement for fab %16.16llx count %lld\n",
> >>> -                        fip->lp->host->host_no, fcf->fabric_name,
> >>> -                        stats->MissDiscAdvCount);
> >>
> >>> +         deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
> >>> +         if (fip->sel_fcf == fcf) {
> >>> +                 if (fcf->fd_flags &&
> >>> +                     time_after(next_timer, jiffies + fcf->fka_period))
> >>> +                         next_timer = jiffies + fcf->fka_period;
> >> The fd_flags should be in the outer if, since that means we shouldn't
> >> be looking for missing advertisements at all.  
> > 
> > Spec only recommends to not send KA and okay with verifying
> > advertisements per sec 7.8.6.3.13, therefore I think admin should know
> > any missed adv on at least connected fcf, therefore I added periodic
> > fcf->fka_period for only connected fcf when fd_flag is set but only if
> > this time happens to be sooner than next_timer in the loop.
> 
> OK, I see, that's fine.  I was confused about what the flag meant.
> It means keep-alives are not needed, it doesn't say anything about
> periodic advertisements.
> 
> >> Otherwise we'll set
> >> next_timer to the deadline.
> 
> > FCF age out is needed on deadline ir-respective of fd_flag value, so
> > moving to as suggested below won't age out fcf with fd_flag set.
> > I verified fcf does get de-selected on 2.5 KA missed adv after two
> > missed adv prints w/ this patch while fd_flag set and I think should be
> > kept this way.
> > 
> >>            if (fip->sel_fcf == fcf && !fcf->fd_flags) {
> >>                    if (time_after ...
> >>
> >>> +
> >>> +                 if (time_after(jiffies, deadline)) {
> >>> +                         stats = per_cpu_ptr(fip->lp->dev_stats,
> >>> +                                             smp_processor_id());
> >>> +                         stats->MissDiscAdvCount++;
> >>> +                         printk(KERN_INFO "libfcoe: host%d: "
> >>> +                                "Missing Discovery Advertisement "
> >>> +                                "for fab %16.16llx count %lld\n",
> >>> +                                fip->lp->host->host_no, fcf->fabric_name,
> >>> +                                stats->MissDiscAdvCount);
> >>> +                 } else if (time_after(next_timer, deadline))
> >>> +                         next_timer = deadline;
> >>>           }
> >>> -         if (time_after(jiffies, fcf->time + fcf->fka_period * 3 +
> >>> -                        msecs_to_jiffies(FIP_FCF_FUZZ * 3))) {
> >>> +
> >>> +         deadline += fcf->fka_period;
> >>> +         if (time_after_eq(jiffies, deadline)) {
> >> This is a pre-existing bug, but the above also should check
> >> fcf->fd_flags around both the if and else so
> >>
> >>            if (!fcf->fd_flags) {
> > 
> > This check will prevent aging  out fcf with fd_flag set and we don't
> > want that as I said above.
> > 
> >>                    if (time_after_eq(...)) {
> >>                            ...
> >>                            delete the fcf
> >>
> >>                            continue; // so we don't check sel_time on this 
> >> one.
> >>                    }
> >>                    if (time_after(next_timer, deadline))
> >>                            next_timer = deadline;
> >>            }
> >>            if (fcoe_ctlr_mtu_valid(fcf) && ...
> >>    
> >> This rearranges things a bit.  I hope you see what I mean.
> >> Let me know if you want to split that out into a separate
> >> patch or want me to update this patch.
> >>
> > 
> > I'm fine whatever approach you decide for any more changes.
> > 
> >>>                   if (fip->sel_fcf == fcf)
> >>>                           fip->sel_fcf = NULL;
> >>>                   list_del(&fcf->list);
> >>> @@ -598,19 +608,20 @@ static void fcoe_ctlr_age_fcfs(struct fcoe_ctlr 
> >>> *fip)
> >>>                   stats = per_cpu_ptr(fip->lp->dev_stats,
> >>>                                       smp_processor_id());
> >>>                   stats->VLinkFailureCount++;
> >>> -         } else if (fcoe_ctlr_mtu_valid(fcf) &&
> >>> -                    (!sel_time || time_before(sel_time, fcf->time))) {
> >>> -                 sel_time = fcf->time;
> >>> +         } else {
> >>> +                 if (time_after(next_timer, deadline))
> >>> +                         next_timer = deadline;
> >>> +                 if (fcoe_ctlr_mtu_valid(fcf) &&
> >>> +                     (!sel_time || time_before(sel_time, fcf->time)))
> >>> +                         sel_time = fcf->time;
> >>>           }
> >>>   }
> >>> - if (sel_time) {
> >>> + if (!(fip->sel_fcf || fip->sel_time)) {
> >> Shouldn't this be:
> >>    if (sel_time && !fip->sel_fcf) {
> >>
> > 
> > Yes, it should be this way so that initiator could switch quickly to new
> > fcf in case there is already other fcf is in the list, I can update
> > patch for this if you agree w/ rest of the changes, let me know.
> > 
> >> sel_time non-zero indicates that one of the FCFs in the loop
> 
> I split our two patches and changed the above line to:
> 
>       if (sel_time && !fip->sel_fcf && !fip->sel_time)
> 

This is better of older two variants as it would also prevent
over-writing sel_time when it is already set from parse adv beside also
setting up sel_time when needed just after aging out fcf in the loop.

Splitting these into two patches is also good idea since your patch
passed testing and was doing fine for its changes and also fixed calling
TO handler on each jiffies when d flag set.

> in your patch.  I'll send them out for your review in a bit.
> 
> >>
> >>>           sel_time += msecs_to_jiffies(FCOE_CTLR_START_DELAY);
> >>>           fip->sel_time = sel_time;
> >>> -         if (time_before(sel_time, fip->timer.expires))
> >>> -                 mod_timer(&fip->timer, sel_time);
> >>> - } else {
> >>> -         fip->sel_time = 0;
> >>>   }
> >>> +
> >>> + return next_timer;
> >>>  }
> >>>  
> >>>  /**
> >>> @@ -767,7 +778,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, 
> >>> struct sk_buff *skb)
> >>>            * ignored after a usable solicited advertisement
> >>>            * has been received.
> >>>            */
> >>> -         if (fcf == fip->sel_fcf) {
> >>> +         if (fcf == fip->sel_fcf && !fcf->fd_flags) {
> >>>                   fip->ctlr_ka_time -= fcf->fka_period;
> >>>                   fip->ctlr_ka_time += new.fka_period;
> >>>                   if (time_before(fip->ctlr_ka_time, fip->timer.expires))
> 
> Why should the fd_flags keep us from adopting a change in the keep-alive 
> period?
> That affects advertisements as well, right?
> Also, the else clause (pasted below for context) should not be invoked for 
> the selected FCF
> in any case so, if we want to check fd_flags we should do it a nested if.
 
> Here's the code as it is after the patch:
> 
>                  if (fcf == fip->sel_fcf && !fcf->fd_flags) {
>                          fip->ctlr_ka_time -= fcf->fka_period;
>                          fip->ctlr_ka_time += new.fka_period;
>                          if (time_before(fip->ctlr_ka_time, 
> fip->timer.expires))
>                                  mod_timer(&fip->timer, fip->ctlr_ka_time);
>                  } else if (!fcoe_ctlr_fcf_usable(fcf))
>                          fcf->flags = new.flags;
> 
> I think it should be something like:
> 
>                  if (fcf == fip->sel_fcf) {
>                       if (!fcf->fd_flags) {
>                               fip->ctlr_ka_time -= fcf->fka_period;
>                               fip->ctlr_ka_time += new.fka_period;
>                               if (time_before(fip->ctlr_ka_time,
>                                               fip->timer.expires))
>                                       mod_timer(&fip->timer,
>                                                 fip->ctlr_ka_time);
>                       }
>                  } else if (!fcoe_ctlr_fcf_usable(fcf))
>                          fcf->flags = new.flags;
> 
> But only if there's a good reason to ignore the FKA period change when
> the D flag is set in fcf (from the first advertisment we received).

Since KAs are not send while D flag set, therefore mod_timer also should
not be called here based on KA TO values. thus not to update KA timers
also. However also not to update fcf->flags on all unsolicited adv and
your suggested code would do that.

> I noticed that D flag changes are also ignored after the first advertisment
> that's received.   Since it's an administrative setting on the switch,
> maybe we should copy it every time we receive an advertisement?

Yes that would be better as that would allow to change D flag anytime,
that would need to update fcf->flags on all unsolicited adv but for only
D bit. 

        Thanks
        Vasu

> I guess we should leave it as is unless it's causing a problem.
> 
> So, could you get back to me on the above change?  Can we just
> eliminate the fd_flags test here?
> 
>       Thanks,
>       Joe
> 
> >>> @@ -805,7 +816,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, 
> >>> struct sk_buff *skb)
> >>>    * If this is the first validated FCF, note the time and
> >>>    * set a timer to trigger selection.
> >>>    */
> >>> - if (mtu_valid && !fip->sel_time && fcoe_ctlr_fcf_usable(fcf)) {
> >>> + if (mtu_valid && !fip->sel_fcf && fcoe_ctlr_fcf_usable(fcf)) {
> >>>           fip->sel_time = jiffies +
> >>>                   msecs_to_jiffies(FCOE_CTLR_START_DELAY);
> >>>           if (!timer_pending(&fip->timer) ||
> >>> @@ -1148,7 +1159,7 @@ static void fcoe_ctlr_timeout(unsigned long arg)
> >>>   struct fcoe_ctlr *fip = (struct fcoe_ctlr *)arg;
> >>>   struct fcoe_fcf *sel;
> >>>   struct fcoe_fcf *fcf;
> >>> - unsigned long next_timer = jiffies + msecs_to_jiffies(FIP_VN_KA_PERIOD);
> >>> + unsigned long next_timer;
> >>>  
> >>>   spin_lock_bh(&fip->lock);
> >>>   if (fip->state == FIP_ST_DISABLED) {
> >>> @@ -1157,13 +1168,16 @@ static void fcoe_ctlr_timeout(unsigned long arg)
> >>>   }
> >>>  
> >>>   fcf = fip->sel_fcf;
> >>> - fcoe_ctlr_age_fcfs(fip);
> >>> + next_timer = fcoe_ctlr_age_fcfs(fip);
> >>>  
> >>>   sel = fip->sel_fcf;
> >>> - if (!sel && fip->sel_time && time_after_eq(jiffies, fip->sel_time)) {
> >>> -         fcoe_ctlr_select(fip);
> >>> -         sel = fip->sel_fcf;
> >>> -         fip->sel_time = 0;
> >>> + if (!sel && fip->sel_time) {
> >>> +         if (time_after_eq(jiffies, fip->sel_time)) {
> >>> +                 fcoe_ctlr_select(fip);
> >>> +                 sel = fip->sel_fcf;
> >>> +                 fip->sel_time = 0;
> >>> +         } else if (time_after(next_timer, fip->sel_time))
> >>> +                 next_timer = fip->sel_time;
> >>>   }
> >>>  
> >>>   if (sel != fcf) {
> >>> @@ -1201,12 +1215,9 @@ static void fcoe_ctlr_timeout(unsigned long arg)
> >>>           }
> >>>           if (time_after(next_timer, fip->port_ka_time))
> >>>                   next_timer = fip->port_ka_time;
> >>> -         mod_timer(&fip->timer, next_timer);
> >>> - } else if (fip->sel_time) {
> >>> -         next_timer = fip->sel_time +
> >>> -                 msecs_to_jiffies(FCOE_CTLR_START_DELAY);
> >>> -         mod_timer(&fip->timer, next_timer);
> >>>   }
> >>> + if (!list_empty(&fip->fcfs))
> >>> +         mod_timer(&fip->timer, next_timer);
> >>>   if (fip->send_ctlr_ka || fip->send_port_ka)
> >>>           schedule_work(&fip->timer_work);
> >>>   spin_unlock_bh(&fip->lock);
> >>>
> >>> _______________________________________________
> >>> devel mailing list
> >>> [email protected]
> >>> http://www.open-fcoe.org/mailman/listinfo/devel
> >> _______________________________________________
> >> devel mailing list
> >> [email protected]
> >> http://www.open-fcoe.org/mailman/listinfo/devel
> > 
> 

_______________________________________________
devel mailing list
[email protected]
http://www.open-fcoe.org/mailman/listinfo/devel

Reply via email to