Joe Eykholt wrote:
> Timer crashes were caused by freeing a struct fc_rport_priv
> with a timer pending, causing the timer facility list to be
> corrupted.  This was during FC uplink flap tests with a lot
> of targets.
> 
> After discovery, we were doing an ADISC on an rdata that was
> in DELETE state but not yet removed from the lookup list.

Abhijeet Joglekar points out that this description should've
said we were doing a PLOGI in this case, not an ADISC.
The result is the same, though.

I'll update the description and subject line and resubmit.

> This moved the rdata from DELETE state to ADISC state.
> If the ADISC exchange allocation failed and needed to be
> retried, the timer scheduling could race with the free
> being done by fc_rport_work().
> 
> When fc_rport_login() is called on a rport in DELETE state,
> move it to a new state RESTART.  In fc_rport_work, when
> handling a LOGO, STOPPED or FAILED event, look for restart
> state.  In the RESTART case, don't take the rdata off the
> list and after the transport remote port is deleted and
> exchanges are reset, re-login to the remote port.
> 
> Note that the new RESTART state also corrects a problem we
> had when re-discovering a port that had moved to DELETE state.
> In that case, a new rdata was created, but the old rdata
> would do an exchange manager reset affecting the FC_ID
> for both the new rdata and old rdata.  With the new state,
> the new port isn't logged into until after any old exchanges
> are reset.
> 
> Signed-off-by: Joe Eykholt <[email protected]>
> ---
>  drivers/scsi/libfc/fc_rport.c |   69 
> ++++++++++++++++++++++++++++++-----------
>  1 files changed, 50 insertions(+), 19 deletions(-)
> 
> 
> diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
> index c6a15e3..6f36e46 100644
> --- a/drivers/scsi/libfc/fc_rport.c
> +++ b/drivers/scsi/libfc/fc_rport.c
> @@ -88,6 +88,7 @@ static const char *fc_rport_state_names[] = {
>       [RPORT_ST_LOGO] = "LOGO",
>       [RPORT_ST_ADISC] = "ADISC",
>       [RPORT_ST_DELETE] = "Delete",
> +     [RPORT_ST_RESTART] = "Restart",
>  };
>  
>  /**
> @@ -101,8 +102,7 @@ static struct fc_rport_priv *fc_rport_lookup(const struct 
> fc_lport *lport,
>       struct fc_rport_priv *rdata;
>  
>       list_for_each_entry(rdata, &lport->disc.rports, peers)
> -             if (rdata->ids.port_id == port_id &&
> -                 rdata->rp_state != RPORT_ST_DELETE)
> +             if (rdata->ids.port_id == port_id)
>                       return rdata;
>       return NULL;
>  }
> @@ -245,6 +245,7 @@ static void fc_rport_work(struct work_struct *work)
>       struct fc_rport_operations *rport_ops;
>       struct fc_rport_identifiers ids;
>       struct fc_rport *rport;
> +     int restart = 0;
>  
>       mutex_lock(&rdata->rp_mutex);
>       event = rdata->event;
> @@ -297,8 +298,19 @@ static void fc_rport_work(struct work_struct *work)
>               mutex_unlock(&rdata->rp_mutex);
>  
>               if (port_id != FC_FID_DIR_SERV) {
> +                     /*
> +                      * We must drop rp_mutex before taking disc_mutex.
> +                      * Re-evaluate state to allow for restart.
> +                      * A transition to RESTART state must only happen
> +                      * while disc_mutex is held and rdata is on the list.
> +                      */
>                       mutex_lock(&lport->disc.disc_mutex);
> -                     list_del(&rdata->peers);
> +                     mutex_lock(&rdata->rp_mutex);
> +                     if (rdata->rp_state == RPORT_ST_RESTART)
> +                             restart = 1;
> +                     else
> +                             list_del(&rdata->peers);
> +                     mutex_unlock(&rdata->rp_mutex);
>                       mutex_unlock(&lport->disc.disc_mutex);
>               }
>  
> @@ -322,7 +334,13 @@ static void fc_rport_work(struct work_struct *work)
>                       mutex_unlock(&rdata->rp_mutex);
>                       fc_remote_port_delete(rport);
>               }
> -             kref_put(&rdata->kref, lport->tt.rport_destroy);
> +             if (restart) {
> +                     mutex_lock(&rdata->rp_mutex);
> +                     FC_RPORT_DBG(rdata, "work restart\n");
> +                     fc_rport_enter_plogi(rdata);
> +                     mutex_unlock(&rdata->rp_mutex);
> +             } else
> +                     kref_put(&rdata->kref, lport->tt.rport_destroy);
>               break;
>  
>       default:
> @@ -352,6 +370,12 @@ int fc_rport_login(struct fc_rport_priv *rdata)
>               FC_RPORT_DBG(rdata, "ADISC port\n");
>               fc_rport_enter_adisc(rdata);
>               break;
> +     case RPORT_ST_RESTART:
> +             break;
> +     case RPORT_ST_DELETE:
> +             FC_RPORT_DBG(rdata, "Restart deleted port\n");
> +             fc_rport_state_enter(rdata, RPORT_ST_RESTART);
> +             break;
>       default:
>               FC_RPORT_DBG(rdata, "Login to port\n");
>               fc_rport_enter_plogi(rdata);
> @@ -407,20 +431,21 @@ int fc_rport_logoff(struct fc_rport_priv *rdata)
>  
>       if (rdata->rp_state == RPORT_ST_DELETE) {
>               FC_RPORT_DBG(rdata, "Port in Delete state, not removing\n");
> -             mutex_unlock(&rdata->rp_mutex);
>               goto out;
>       }
>  
> -     fc_rport_enter_logo(rdata);
> +     if (rdata->rp_state == RPORT_ST_RESTART)
> +             FC_RPORT_DBG(rdata, "Port in Restart state, deleting\n");
> +     else
> +             fc_rport_enter_logo(rdata);
>  
>       /*
>        * Change the state to Delete so that we discard
>        * the response.
>        */
>       fc_rport_enter_delete(rdata, RPORT_EV_STOP);
> -     mutex_unlock(&rdata->rp_mutex);
> -
>  out:
> +     mutex_unlock(&rdata->rp_mutex);
>       return 0;
>  }
>  
> @@ -476,6 +501,7 @@ static void fc_rport_timeout(struct work_struct *work)
>       case RPORT_ST_READY:
>       case RPORT_ST_INIT:
>       case RPORT_ST_DELETE:
> +     case RPORT_ST_RESTART:
>               break;
>       }
>  
> @@ -509,6 +535,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, 
> struct fc_frame *fp)
>               fc_rport_enter_logo(rdata);
>               break;
>       case RPORT_ST_DELETE:
> +     case RPORT_ST_RESTART:
>       case RPORT_ST_READY:
>       case RPORT_ST_INIT:
>               break;
> @@ -1258,6 +1285,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport 
> *lport,
>               }
>               break;
>       case RPORT_ST_PRLI:
> +     case RPORT_ST_RTV:
>       case RPORT_ST_READY:
>       case RPORT_ST_ADISC:
>               FC_RPORT_DBG(rdata, "Received PLOGI in logged-in state %d "
> @@ -1265,11 +1293,14 @@ static void fc_rport_recv_plogi_req(struct fc_lport 
> *lport,
>               /* XXX TBD - should reset */
>               break;
>       case RPORT_ST_DELETE:
> -     default:
> -             FC_RPORT_DBG(rdata, "Received PLOGI in unexpected state %d\n",
> -                          rdata->rp_state);
> -             fc_frame_free(rx_fp);
> -             goto out;
> +     case RPORT_ST_LOGO:
> +     case RPORT_ST_RESTART:
> +             FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n",
> +                          fc_rport_state(rdata));
> +             mutex_unlock(&rdata->rp_mutex);
> +             rjt_data.reason = ELS_RJT_BUSY;
> +             rjt_data.explan = ELS_EXPL_NONE;
> +             goto reject;
>       }
>  
>       /*
> @@ -1520,14 +1551,14 @@ static void fc_rport_recv_logo_req(struct fc_lport 
> *lport,
>               FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n",
>                            fc_rport_state(rdata));
>  
> +             fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
> +
>               /*
> -              * If the remote port was created due to discovery,
> -              * log back in.  It may have seen a stale RSCN about us.
> +              * If the remote port was created due to discovery, set state
> +              * to log back in.  It may have seen a stale RSCN about us.
>                */
> -             if (rdata->rp_state != RPORT_ST_DELETE && rdata->disc_id)
> -                     fc_rport_enter_plogi(rdata);
> -             else
> -                     fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
> +             if (rdata->disc_id)
> +                     fc_rport_state_enter(rdata, RPORT_ST_RESTART);
>               mutex_unlock(&rdata->rp_mutex);
>       } else
>               FC_RPORT_ID_DBG(lport, sid,
> diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
> index fd2abfa..0dea411 100644
> --- a/include/scsi/libfc.h
> +++ b/include/scsi/libfc.h
> @@ -105,6 +105,7 @@ enum fc_disc_event {
>   * @RPORT_ST_LOGO:   Remote port logout (LOGO) sent
>   * @RPORT_ST_ADISC:  Discover Address sent
>   * @RPORT_ST_DELETE: Remote port being deleted
> + * @RPORT_ST_RESTART: Remote port being deleted and will restart
>   */
>  enum fc_rport_state {
>       RPORT_ST_INIT,
> @@ -115,6 +116,7 @@ enum fc_rport_state {
>       RPORT_ST_LOGO,
>       RPORT_ST_ADISC,
>       RPORT_ST_DELETE,
> +     RPORT_ST_RESTART,
>  };
>  
>  /**
> 
> 
> _______________________________________________
> devel mailing list
> [email protected]
> http://www.open-fcoe.org/mailman/listinfo/devel
_______________________________________________
devel mailing list
[email protected]
http://www.open-fcoe.org/mailman/listinfo/devel

Reply via email to