Vasu Dev wrote:
> On Wed, 2010-05-19 at 11:01 -0700, Joe Eykholt wrote:
>> Vasu Dev wrote:
>>> From: Joe Eykholt <[email protected]>
>>>
>>> This patch has several improvements to the code in
>>> the fip timers.
>>>
>>> The current code allows an advertisement to be used
>>> even if it has been 3 times the FCF keep-alive
>>> advertisement period (FKA) since one was received from
>>> that FCF.  The spec. calls for 2.5 times FKA.
>>>
>>> Fix this and make sure we detect missed keep-alives promptly.
>>>
>>> Signed-off-by: Joe Eykholt <[email protected]>
>>>
>>> --
>>> Use fka_period as periodic timeouts to age out fcf if
>>> keep alives are disabled due to fd_flags set and also
>>> stop updating keep alive values in that case.
>>>
>>> Update select fcf time only if fcf is not already selected or
>>> select time is not already determined from parse adv, and then
>>> have select time cleared only once after fcf is selected.
>>>
>>> Changed deadline check to time_after_eq() from time_after()
>>> since now next timeout will be on exact 2.5 times FKA followed
>>> by first advertisement.
>>>
>>> Signed-off-by: Vasu Dev <[email protected]>
>>> ---
>>>
>>>  drivers/scsi/fcoe/libfcoe.c |   89 
>>> ++++++++++++++++++++++++-------------------
>>>  1 files changed, 50 insertions(+), 39 deletions(-)
>>>
>>> diff --git a/drivers/scsi/fcoe/libfcoe.c b/drivers/scsi/fcoe/libfcoe.c
>>> index 1d5b949..6ac1f0e 100644
>>> --- a/drivers/scsi/fcoe/libfcoe.c
>>> +++ b/drivers/scsi/fcoe/libfcoe.c
>>> @@ -557,38 +557,48 @@ EXPORT_SYMBOL(fcoe_ctlr_els_send);
>>>   *
>>>   * Called with lock held and preemption disabled.
>>>   *
>>> - * An FCF is considered old if we have missed three advertisements.
>>> - * That is, there have been no valid advertisement from it for three
>>> - * times its keep-alive period including fuzz.
>>> + * An FCF is considered old if we have missed two advertisements.
>>> + * That is, there have been no valid advertisement from it for 2.5
>>> + * times its keep-alive period.
>>>   *
>>>   * In addition, determine the time when an FCF selection can occur.
>>>   *
>>>   * Also, increment the MissDiscAdvCount when no advertisement is received
>>>   * for the corresponding FCF for 1.5 * FKA_ADV_PERIOD (FC-BB-5 LESB).
>>> + *
>>> + * Returns the time in jiffies for the next call.
>>>   */
>>> -static void fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
>>> +static unsigned long fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
>>>  {
>>>     struct fcoe_fcf *fcf;
>>>     struct fcoe_fcf *next;
>>> +   unsigned long next_timer = jiffies + msecs_to_jiffies(FIP_DEF_FKA);
>>> +   unsigned long deadline;
>>>     unsigned long sel_time = 0;
>>> -   unsigned long mda_time = 0;
>>>     struct fcoe_dev_stats *stats;
>>>  
>>>     list_for_each_entry_safe(fcf, next, &fip->fcfs, list) {
>>> -           mda_time = fcf->fka_period + (fcf->fka_period >> 1);
>>> -           if ((fip->sel_fcf == fcf) &&
>>> -               (time_after(jiffies, fcf->time + mda_time))) {
>>> -                   mod_timer(&fip->timer, jiffies + mda_time);
>>> -                   stats = per_cpu_ptr(fip->lp->dev_stats,
>>> -                                       smp_processor_id());
>>> -                   stats->MissDiscAdvCount++;
>>> -                   printk(KERN_INFO "libfcoe: host%d: Missing Discovery "
>>> -                          "Advertisement for fab %16.16llx count %lld\n",
>>> -                          fip->lp->host->host_no, fcf->fabric_name,
>>> -                          stats->MissDiscAdvCount);
>>
>>> +           deadline = fcf->time + fcf->fka_period + fcf->fka_period / 2;
>>> +           if (fip->sel_fcf == fcf) {
>>> +                   if (fcf->fd_flags &&
>>> +                       time_after(next_timer, jiffies + fcf->fka_period))
>>> +                           next_timer = jiffies + fcf->fka_period;
>> The fd_flags should be in the outer if, since that means we shouldn't
>> be looking for missing advertisements at all.  
> 
> Spec only recommends to not send KA and okay with verifying
> advertisements per sec 7.8.6.3.13, therefore I think admin should know
> any missed adv on at least connected fcf, therefore I added periodic
> fcf->fka_period for only connected fcf when fd_flag is set but only if
> this time happens to be sooner than next_timer in the loop.

OK, I see, that's fine.  I was confused about what the flag meant.
It means keep-alives are not needed, it doesn't say anything about
periodic advertisements.

>> Otherwise we'll set
>> next_timer to the deadline.

> FCF age out is needed on deadline ir-respective of fd_flag value, so
> moving to as suggested below won't age out fcf with fd_flag set.
> I verified fcf does get de-selected on 2.5 KA missed adv after two
> missed adv prints w/ this patch while fd_flag set and I think should be
> kept this way.
> 
>>              if (fip->sel_fcf == fcf && !fcf->fd_flags) {
>>                      if (time_after ...
>>
>>> +
>>> +                   if (time_after(jiffies, deadline)) {
>>> +                           stats = per_cpu_ptr(fip->lp->dev_stats,
>>> +                                               smp_processor_id());
>>> +                           stats->MissDiscAdvCount++;
>>> +                           printk(KERN_INFO "libfcoe: host%d: "
>>> +                                  "Missing Discovery Advertisement "
>>> +                                  "for fab %16.16llx count %lld\n",
>>> +                                  fip->lp->host->host_no, fcf->fabric_name,
>>> +                                  stats->MissDiscAdvCount);
>>> +                   } else if (time_after(next_timer, deadline))
>>> +                           next_timer = deadline;
>>>             }
>>> -           if (time_after(jiffies, fcf->time + fcf->fka_period * 3 +
>>> -                          msecs_to_jiffies(FIP_FCF_FUZZ * 3))) {
>>> +
>>> +           deadline += fcf->fka_period;
>>> +           if (time_after_eq(jiffies, deadline)) {
>> This is a pre-existing bug, but the above also should check
>> fcf->fd_flags around both the if and else so
>>
>>              if (!fcf->fd_flags) {
> 
> This check will prevent aging  out fcf with fd_flag set and we don't
> want that as I said above.
> 
>>                      if (time_after_eq(...)) {
>>                              ...
>>                              delete the fcf
>>
>>                              continue; // so we don't check sel_time on this 
>> one.
>>                      }
>>                      if (time_after(next_timer, deadline))
>>                              next_timer = deadline;
>>              }
>>              if (fcoe_ctlr_mtu_valid(fcf) && ...
>>      
>> This rearranges things a bit.  I hope you see what I mean.
>> Let me know if you want to split that out into a separate
>> patch or want me to update this patch.
>>
> 
> I'm fine whatever approach you decide for any more changes.
> 
>>>                     if (fip->sel_fcf == fcf)
>>>                             fip->sel_fcf = NULL;
>>>                     list_del(&fcf->list);
>>> @@ -598,19 +608,20 @@ static void fcoe_ctlr_age_fcfs(struct fcoe_ctlr *fip)
>>>                     stats = per_cpu_ptr(fip->lp->dev_stats,
>>>                                         smp_processor_id());
>>>                     stats->VLinkFailureCount++;
>>> -           } else if (fcoe_ctlr_mtu_valid(fcf) &&
>>> -                      (!sel_time || time_before(sel_time, fcf->time))) {
>>> -                   sel_time = fcf->time;
>>> +           } else {
>>> +                   if (time_after(next_timer, deadline))
>>> +                           next_timer = deadline;
>>> +                   if (fcoe_ctlr_mtu_valid(fcf) &&
>>> +                       (!sel_time || time_before(sel_time, fcf->time)))
>>> +                           sel_time = fcf->time;
>>>             }
>>>     }
>>> -   if (sel_time) {
>>> +   if (!(fip->sel_fcf || fip->sel_time)) {
>> Shouldn't this be:
>>      if (sel_time && !fip->sel_fcf) {
>>
> 
> Yes, it should be this way so that initiator could switch quickly to new
> fcf in case there is already other fcf is in the list, I can update
> patch for this if you agree w/ rest of the changes, let me know.
> 
>> sel_time non-zero indicates that one of the FCFs in the loop

I split our two patches and changed the above line to:

        if (sel_time && !fip->sel_fcf && !fip->sel_time)

in your patch.  I'll send them out for your review in a bit.

>>
>>>             sel_time += msecs_to_jiffies(FCOE_CTLR_START_DELAY);
>>>             fip->sel_time = sel_time;
>>> -           if (time_before(sel_time, fip->timer.expires))
>>> -                   mod_timer(&fip->timer, sel_time);
>>> -   } else {
>>> -           fip->sel_time = 0;
>>>     }
>>> +
>>> +   return next_timer;
>>>  }
>>>  
>>>  /**
>>> @@ -767,7 +778,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, 
>>> struct sk_buff *skb)
>>>              * ignored after a usable solicited advertisement
>>>              * has been received.
>>>              */
>>> -           if (fcf == fip->sel_fcf) {
>>> +           if (fcf == fip->sel_fcf && !fcf->fd_flags) {
>>>                     fip->ctlr_ka_time -= fcf->fka_period;
>>>                     fip->ctlr_ka_time += new.fka_period;
>>>                     if (time_before(fip->ctlr_ka_time, fip->timer.expires))

Why should the fd_flags keep us from adopting a change in the keep-alive period?
That affects advertisements as well, right?
Also, the else clause (pasted below for context) should not be invoked for the 
selected FCF
in any case so, if we want to check fd_flags we should do it a nested if.

Here's the code as it is after the patch:

                 if (fcf == fip->sel_fcf && !fcf->fd_flags) {
                         fip->ctlr_ka_time -= fcf->fka_period;
                         fip->ctlr_ka_time += new.fka_period;
                         if (time_before(fip->ctlr_ka_time, fip->timer.expires))
                                 mod_timer(&fip->timer, fip->ctlr_ka_time);
                 } else if (!fcoe_ctlr_fcf_usable(fcf))
                         fcf->flags = new.flags;

I think it should be something like:

                 if (fcf == fip->sel_fcf) {
                        if (!fcf->fd_flags) {
                                fip->ctlr_ka_time -= fcf->fka_period;
                                fip->ctlr_ka_time += new.fka_period;
                                if (time_before(fip->ctlr_ka_time,
                                                fip->timer.expires))
                                        mod_timer(&fip->timer,
                                                  fip->ctlr_ka_time);
                        }
                 } else if (!fcoe_ctlr_fcf_usable(fcf))
                         fcf->flags = new.flags;

But only if there's a good reason to ignore the FKA period change when
the D flag is set in fcf (from the first advertisment we received).
I noticed that D flag changes are also ignored after the first advertisment
that's received.   Since it's an administrative setting on the switch,
maybe we should copy it every time we receive an advertisement?
I guess we should leave it as is unless it's causing a problem.

So, could you get back to me on the above change?  Can we just
eliminate the fd_flags test here?

        Thanks,
        Joe

>>> @@ -805,7 +816,7 @@ static void fcoe_ctlr_recv_adv(struct fcoe_ctlr *fip, 
>>> struct sk_buff *skb)
>>>      * If this is the first validated FCF, note the time and
>>>      * set a timer to trigger selection.
>>>      */
>>> -   if (mtu_valid && !fip->sel_time && fcoe_ctlr_fcf_usable(fcf)) {
>>> +   if (mtu_valid && !fip->sel_fcf && fcoe_ctlr_fcf_usable(fcf)) {
>>>             fip->sel_time = jiffies +
>>>                     msecs_to_jiffies(FCOE_CTLR_START_DELAY);
>>>             if (!timer_pending(&fip->timer) ||
>>> @@ -1148,7 +1159,7 @@ static void fcoe_ctlr_timeout(unsigned long arg)
>>>     struct fcoe_ctlr *fip = (struct fcoe_ctlr *)arg;
>>>     struct fcoe_fcf *sel;
>>>     struct fcoe_fcf *fcf;
>>> -   unsigned long next_timer = jiffies + msecs_to_jiffies(FIP_VN_KA_PERIOD);
>>> +   unsigned long next_timer;
>>>  
>>>     spin_lock_bh(&fip->lock);
>>>     if (fip->state == FIP_ST_DISABLED) {
>>> @@ -1157,13 +1168,16 @@ static void fcoe_ctlr_timeout(unsigned long arg)
>>>     }
>>>  
>>>     fcf = fip->sel_fcf;
>>> -   fcoe_ctlr_age_fcfs(fip);
>>> +   next_timer = fcoe_ctlr_age_fcfs(fip);
>>>  
>>>     sel = fip->sel_fcf;
>>> -   if (!sel && fip->sel_time && time_after_eq(jiffies, fip->sel_time)) {
>>> -           fcoe_ctlr_select(fip);
>>> -           sel = fip->sel_fcf;
>>> -           fip->sel_time = 0;
>>> +   if (!sel && fip->sel_time) {
>>> +           if (time_after_eq(jiffies, fip->sel_time)) {
>>> +                   fcoe_ctlr_select(fip);
>>> +                   sel = fip->sel_fcf;
>>> +                   fip->sel_time = 0;
>>> +           } else if (time_after(next_timer, fip->sel_time))
>>> +                   next_timer = fip->sel_time;
>>>     }
>>>  
>>>     if (sel != fcf) {
>>> @@ -1201,12 +1215,9 @@ static void fcoe_ctlr_timeout(unsigned long arg)
>>>             }
>>>             if (time_after(next_timer, fip->port_ka_time))
>>>                     next_timer = fip->port_ka_time;
>>> -           mod_timer(&fip->timer, next_timer);
>>> -   } else if (fip->sel_time) {
>>> -           next_timer = fip->sel_time +
>>> -                   msecs_to_jiffies(FCOE_CTLR_START_DELAY);
>>> -           mod_timer(&fip->timer, next_timer);
>>>     }
>>> +   if (!list_empty(&fip->fcfs))
>>> +           mod_timer(&fip->timer, next_timer);
>>>     if (fip->send_ctlr_ka || fip->send_port_ka)
>>>             schedule_work(&fip->timer_work);
>>>     spin_unlock_bh(&fip->lock);
>>>
>>> _______________________________________________
>>> devel mailing list
>>> [email protected]
>>> http://www.open-fcoe.org/mailman/listinfo/devel
>> _______________________________________________
>> devel mailing list
>> [email protected]
>> http://www.open-fcoe.org/mailman/listinfo/devel
> 

_______________________________________________
devel mailing list
[email protected]
http://www.open-fcoe.org/mailman/listinfo/devel

Reply via email to