Joe Eykholt wrote: > Timer crashes were caused by freeing a struct fc_rport_priv > with a timer pending, causing the timer facility list to be > corrupted. This was during FC uplink flap tests with a lot > of targets. > > After discovery, we were doing an ADISC on an rdata that was > in DELETE state but not yet removed from the lookup list.
Abhijeet Joglekar points out that this description should've said we were doing a PLOGI in this case, not an ADISC. The result is the same, though. I'll update the description and subject line and resubmit. > This moved the rdata from DELETE state to ADISC state. > If the ADISC exchange allocation failed and needed to be > retried, the timer scheduling could race with the free > being done by fc_rport_work(). > > When fc_rport_login() is called on a rport in DELETE state, > move it to a new state RESTART. In fc_rport_work, when > handling a LOGO, STOPPED or FAILED event, look for restart > state. In the RESTART case, don't take the rdata off the > list and after the transport remote port is deleted and > exchanges are reset, re-login to the remote port. > > Note that the new RESTART state also corrects a problem we > had when re-discovering a port that had moved to DELETE state. > In that case, a new rdata was created, but the old rdata > would do an exchange manager reset affecting the FC_ID > for both the new rdata and old rdata. With the new state, > the new port isn't logged into until after any old exchanges > are reset. > > Signed-off-by: Joe Eykholt <[email protected]> > --- > drivers/scsi/libfc/fc_rport.c | 69 > ++++++++++++++++++++++++++++++----------- > 1 files changed, 50 insertions(+), 19 deletions(-) > > > diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c > index c6a15e3..6f36e46 100644 > --- a/drivers/scsi/libfc/fc_rport.c > +++ b/drivers/scsi/libfc/fc_rport.c > @@ -88,6 +88,7 @@ static const char *fc_rport_state_names[] = { > [RPORT_ST_LOGO] = "LOGO", > [RPORT_ST_ADISC] = "ADISC", > [RPORT_ST_DELETE] = "Delete", > + [RPORT_ST_RESTART] = "Restart", > }; > > /** > @@ -101,8 +102,7 @@ static struct fc_rport_priv *fc_rport_lookup(const struct > fc_lport *lport, > struct fc_rport_priv *rdata; > > list_for_each_entry(rdata, &lport->disc.rports, peers) > - if (rdata->ids.port_id == port_id && > - rdata->rp_state != RPORT_ST_DELETE) > + if (rdata->ids.port_id == port_id) > return rdata; > return NULL; > } > @@ -245,6 +245,7 @@ static void fc_rport_work(struct work_struct *work) > struct fc_rport_operations *rport_ops; > struct fc_rport_identifiers ids; > struct fc_rport *rport; > + int restart = 0; > > mutex_lock(&rdata->rp_mutex); > event = rdata->event; > @@ -297,8 +298,19 @@ static void fc_rport_work(struct work_struct *work) > mutex_unlock(&rdata->rp_mutex); > > if (port_id != FC_FID_DIR_SERV) { > + /* > + * We must drop rp_mutex before taking disc_mutex. > + * Re-evaluate state to allow for restart. > + * A transition to RESTART state must only happen > + * while disc_mutex is held and rdata is on the list. > + */ > mutex_lock(&lport->disc.disc_mutex); > - list_del(&rdata->peers); > + mutex_lock(&rdata->rp_mutex); > + if (rdata->rp_state == RPORT_ST_RESTART) > + restart = 1; > + else > + list_del(&rdata->peers); > + mutex_unlock(&rdata->rp_mutex); > mutex_unlock(&lport->disc.disc_mutex); > } > > @@ -322,7 +334,13 @@ static void fc_rport_work(struct work_struct *work) > mutex_unlock(&rdata->rp_mutex); > fc_remote_port_delete(rport); > } > - kref_put(&rdata->kref, lport->tt.rport_destroy); > + if (restart) { > + mutex_lock(&rdata->rp_mutex); > + FC_RPORT_DBG(rdata, "work restart\n"); > + fc_rport_enter_plogi(rdata); > + mutex_unlock(&rdata->rp_mutex); > + } else > + kref_put(&rdata->kref, lport->tt.rport_destroy); > break; > > default: > @@ -352,6 +370,12 @@ int fc_rport_login(struct fc_rport_priv *rdata) > FC_RPORT_DBG(rdata, "ADISC port\n"); > fc_rport_enter_adisc(rdata); > break; > + case RPORT_ST_RESTART: > + break; > + case RPORT_ST_DELETE: > + FC_RPORT_DBG(rdata, "Restart deleted port\n"); > + fc_rport_state_enter(rdata, RPORT_ST_RESTART); > + break; > default: > FC_RPORT_DBG(rdata, "Login to port\n"); > fc_rport_enter_plogi(rdata); > @@ -407,20 +431,21 @@ int fc_rport_logoff(struct fc_rport_priv *rdata) > > if (rdata->rp_state == RPORT_ST_DELETE) { > FC_RPORT_DBG(rdata, "Port in Delete state, not removing\n"); > - mutex_unlock(&rdata->rp_mutex); > goto out; > } > > - fc_rport_enter_logo(rdata); > + if (rdata->rp_state == RPORT_ST_RESTART) > + FC_RPORT_DBG(rdata, "Port in Restart state, deleting\n"); > + else > + fc_rport_enter_logo(rdata); > > /* > * Change the state to Delete so that we discard > * the response. > */ > fc_rport_enter_delete(rdata, RPORT_EV_STOP); > - mutex_unlock(&rdata->rp_mutex); > - > out: > + mutex_unlock(&rdata->rp_mutex); > return 0; > } > > @@ -476,6 +501,7 @@ static void fc_rport_timeout(struct work_struct *work) > case RPORT_ST_READY: > case RPORT_ST_INIT: > case RPORT_ST_DELETE: > + case RPORT_ST_RESTART: > break; > } > > @@ -509,6 +535,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, > struct fc_frame *fp) > fc_rport_enter_logo(rdata); > break; > case RPORT_ST_DELETE: > + case RPORT_ST_RESTART: > case RPORT_ST_READY: > case RPORT_ST_INIT: > break; > @@ -1258,6 +1285,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport > *lport, > } > break; > case RPORT_ST_PRLI: > + case RPORT_ST_RTV: > case RPORT_ST_READY: > case RPORT_ST_ADISC: > FC_RPORT_DBG(rdata, "Received PLOGI in logged-in state %d " > @@ -1265,11 +1293,14 @@ static void fc_rport_recv_plogi_req(struct fc_lport > *lport, > /* XXX TBD - should reset */ > break; > case RPORT_ST_DELETE: > - default: > - FC_RPORT_DBG(rdata, "Received PLOGI in unexpected state %d\n", > - rdata->rp_state); > - fc_frame_free(rx_fp); > - goto out; > + case RPORT_ST_LOGO: > + case RPORT_ST_RESTART: > + FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n", > + fc_rport_state(rdata)); > + mutex_unlock(&rdata->rp_mutex); > + rjt_data.reason = ELS_RJT_BUSY; > + rjt_data.explan = ELS_EXPL_NONE; > + goto reject; > } > > /* > @@ -1520,14 +1551,14 @@ static void fc_rport_recv_logo_req(struct fc_lport > *lport, > FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n", > fc_rport_state(rdata)); > > + fc_rport_enter_delete(rdata, RPORT_EV_LOGO); > + > /* > - * If the remote port was created due to discovery, > - * log back in. It may have seen a stale RSCN about us. > + * If the remote port was created due to discovery, set state > + * to log back in. It may have seen a stale RSCN about us. > */ > - if (rdata->rp_state != RPORT_ST_DELETE && rdata->disc_id) > - fc_rport_enter_plogi(rdata); > - else > - fc_rport_enter_delete(rdata, RPORT_EV_LOGO); > + if (rdata->disc_id) > + fc_rport_state_enter(rdata, RPORT_ST_RESTART); > mutex_unlock(&rdata->rp_mutex); > } else > FC_RPORT_ID_DBG(lport, sid, > diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h > index fd2abfa..0dea411 100644 > --- a/include/scsi/libfc.h > +++ b/include/scsi/libfc.h > @@ -105,6 +105,7 @@ enum fc_disc_event { > * @RPORT_ST_LOGO: Remote port logout (LOGO) sent > * @RPORT_ST_ADISC: Discover Address sent > * @RPORT_ST_DELETE: Remote port being deleted > + * @RPORT_ST_RESTART: Remote port being deleted and will restart > */ > enum fc_rport_state { > RPORT_ST_INIT, > @@ -115,6 +116,7 @@ enum fc_rport_state { > RPORT_ST_LOGO, > RPORT_ST_ADISC, > RPORT_ST_DELETE, > + RPORT_ST_RESTART, > }; > > /** > > > _______________________________________________ > devel mailing list > [email protected] > http://www.open-fcoe.org/mailman/listinfo/devel _______________________________________________ devel mailing list [email protected] http://www.open-fcoe.org/mailman/listinfo/devel
