Joe, Changes look good to me, however I have a comment. Please read inline.
Thanks, Bhanu On Mon, 2010-05-17 at 16:38 -0700, Joe Eykholt wrote: > This patch somewhat combines two fixes to remote port handing in libfc. > > The first problem was that rport work could be queued on a deleted > and freed rport. This is handled by not resetting rdata->event > ton NONE if the rdata is about to be deleted. > > However, that fix led to the second problem, described by > Bhanu Gollapudi, as follows: > > Here is the sequence of events. T1 is first LOGO receive thread, T2 is > > fc_rport_work() scheduled by T1 and T3 is second LOGO receive thread and > > T4 is fc_rport_work scheduled by T3. > > > > 1. (T1)Received 1st LOGO in state Ready > > 2. (T1)Delete port & enter to RESTART state. > > 3. (T1)schdule event_work, since event is RPORT_EV_NONE. > > 4. (T1)set event = RPORT_EV_LOGO > > 5. (T1)Enter RESTART state as disc_id is set. > > 6. (T2)remember to PLOGI, and set event = RPORT_EV_NONE > > 6. (T3)Received 2nd LOGO > > 7. (T3)Delete Port & enter to RESTART state. > > 8. (T3)schedule event_work, since event is RPORT_EV_NONE. > > 9. (T3)Enter RESTART state as disc_id is set. > > 9. (T3)set event = RPORT_EV_LOGO > > 10.(T2)work restart, enter PLOGI state and issues PLOGI > > 11.(T4)Since state is not RESTART anymore, restart is not set, and the > > event is not reset to RPORT_EV_NONE. (current event is RPORT_EV_LOGO). > > 12. Now, PLOGI succeeds and fc_rport_enter_ready() will not schedule > > event_work, and hence the rport will never be created, eventually losing > > the target after dev_loss_tmo. > > So, the problem here is that we were tracking the desire for > the rport be restarted by state RESTART, which was otherwise > equivalent to DELETE. A contributing factor is that we dropped > the lock between steps 6 and 10 in thread T2, which allows the > state to change, and we didn't completely re-evaluate then. > > This is hopefully corrected by the following minor redesign: > > Simplify the rport restart logic by making the decision to > restart after deleting the transport rport. That decision > is based on a new STARTED flag that indicates fc_rport_login() > has been called and fc_rport_logoff() has not been called > since then. This replaces the need for the RESTART state. > > Only restart if the rdata is still in DELETED state > and only if it still has the STARTED flag set. > > Also now, since we clear the event code much later in the > work thread, allow for the possibility that the rport may > have become READY again via incoming PLOGI, and if so, > queue another event to handle that. > > In the problem scenario, the second LOGO received will > cause the LOGO event to occur again. > > Reported-by: Bhanu Gollapudi <[email protected]> > Signed-off-by: Joe Eykholt <[email protected]> > --- > drivers/scsi/libfc/fc_rport.c | 80 > +++++++++++++++++------------------------ > include/scsi/libfc.h | 5 +-- > 2 files changed, 35 insertions(+), 50 deletions(-) > > > diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c > index 39e440f..5a00a1a 100644 > --- a/drivers/scsi/libfc/fc_rport.c > +++ b/drivers/scsi/libfc/fc_rport.c > @@ -89,7 +89,6 @@ static const char *fc_rport_state_names[] = { > [RPORT_ST_LOGO] = "LOGO", > [RPORT_ST_ADISC] = "ADISC", > [RPORT_ST_DELETE] = "Delete", > - [RPORT_ST_RESTART] = "Restart", > }; > > /** > @@ -246,7 +245,6 @@ static void fc_rport_work(struct work_struct *work) > struct fc_rport_operations *rport_ops; > struct fc_rport_identifiers ids; > struct fc_rport *rport; > - int restart = 0; > > mutex_lock(&rdata->rp_mutex); > event = rdata->event; > @@ -296,26 +294,9 @@ static void fc_rport_work(struct work_struct *work) > case RPORT_EV_LOGO: > case RPORT_EV_STOP: > port_id = rdata->ids.port_id; > + rdata->rport = NULL; Can you please have this assignment after the callback? That way, it will help the LLDD specific callback routine to access the driver specific data structures from fc_rport_libfc_priv structure from rdata->rport. > mutex_unlock(&rdata->rp_mutex); > > - if (port_id != FC_FID_DIR_SERV) { > - /* > - * We must drop rp_mutex before taking disc_mutex. > - * Re-evaluate state to allow for restart. > - * A transition to RESTART state must only happen > - * while disc_mutex is held and rdata is on the list. > - */ > - mutex_lock(&lport->disc.disc_mutex); > - mutex_lock(&rdata->rp_mutex); > - if (rdata->rp_state == RPORT_ST_RESTART) > - restart = 1; > - else > - list_del(&rdata->peers); > - rdata->event = RPORT_EV_NONE; > - mutex_unlock(&rdata->rp_mutex); > - mutex_unlock(&lport->disc.disc_mutex); > - } > - > if (rport_ops && rport_ops->event_callback) { > FC_RPORT_DBG(rdata, "callback ev %d\n", event); > rport_ops->event_callback(lport, rdata, event); > @@ -331,18 +312,36 @@ static void fc_rport_work(struct work_struct *work) > if (rport) { I propose to have "rdata->rport = NULL" here, which ofcourse should be called with rp_mutex held. > rpriv = rport->dd_data; > rpriv->rp_state = RPORT_ST_DELETE; > - mutex_lock(&rdata->rp_mutex); > - rdata->rport = NULL; > - mutex_unlock(&rdata->rp_mutex); > fc_remote_port_delete(rport); > } > - if (restart) { > - mutex_lock(&rdata->rp_mutex); > - FC_RPORT_DBG(rdata, "work restart\n"); > - fc_rport_enter_plogi(rdata); > + > + mutex_lock(&lport->disc.disc_mutex); > + mutex_lock(&rdata->rp_mutex); > + if (rdata->rp_state == RPORT_ST_DELETE) { > + if (port_id == FC_FID_DIR_SERV) { > + rdata->event = RPORT_EV_NONE; > + mutex_unlock(&rdata->rp_mutex); > + } else if (rdata->flags & FC_RP_STARTED) { > + rdata->event = RPORT_EV_NONE; > + FC_RPORT_DBG(rdata, "work restart\n"); > + fc_rport_enter_plogi(rdata); > + mutex_unlock(&rdata->rp_mutex); > + } else { > + FC_RPORT_DBG(rdata, "work delete\n"); > + list_del(&rdata->peers); > + mutex_unlock(&rdata->rp_mutex); > + kref_put(&rdata->kref, lport->tt.rport_destroy); > + } > + } else { > + /* > + * Re-open for events. Reissue READY event if ready. > + */ > + rdata->event = RPORT_EV_NONE; > + if (rdata->rp_state == RPORT_ST_READY) > + fc_rport_enter_ready(rdata); > mutex_unlock(&rdata->rp_mutex); > - } else > - kref_put(&rdata->kref, lport->tt.rport_destroy); > + } > + mutex_unlock(&lport->disc.disc_mutex); > break; > > default: > @@ -367,16 +366,14 @@ int fc_rport_login(struct fc_rport_priv *rdata) > { > mutex_lock(&rdata->rp_mutex); > > + rdata->flags |= FC_RP_STARTED; > switch (rdata->rp_state) { > case RPORT_ST_READY: > FC_RPORT_DBG(rdata, "ADISC port\n"); > fc_rport_enter_adisc(rdata); > break; > - case RPORT_ST_RESTART: > - break; > case RPORT_ST_DELETE: > FC_RPORT_DBG(rdata, "Restart deleted port\n"); > - fc_rport_state_enter(rdata, RPORT_ST_RESTART); > break; > default: > FC_RPORT_DBG(rdata, "Login to port\n"); > @@ -431,15 +428,12 @@ int fc_rport_logoff(struct fc_rport_priv *rdata) > > FC_RPORT_DBG(rdata, "Remove port\n"); > > + rdata->flags &= ~FC_RP_STARTED; > if (rdata->rp_state == RPORT_ST_DELETE) { > FC_RPORT_DBG(rdata, "Port in Delete state, not removing\n"); > goto out; > } > - > - if (rdata->rp_state == RPORT_ST_RESTART) > - FC_RPORT_DBG(rdata, "Port in Restart state, deleting\n"); > - else > - fc_rport_enter_logo(rdata); > + fc_rport_enter_logo(rdata); > > /* > * Change the state to Delete so that we discard > @@ -503,7 +497,6 @@ static void fc_rport_timeout(struct work_struct *work) > case RPORT_ST_READY: > case RPORT_ST_INIT: > case RPORT_ST_DELETE: > - case RPORT_ST_RESTART: > break; > } > > @@ -527,6 +520,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, > struct fc_frame *fp) > switch (rdata->rp_state) { > case RPORT_ST_PLOGI: > case RPORT_ST_LOGO: > + rdata->flags &= ~FC_RP_STARTED; > fc_rport_enter_delete(rdata, RPORT_EV_FAILED); > break; > case RPORT_ST_RTV: > @@ -537,7 +531,6 @@ static void fc_rport_error(struct fc_rport_priv *rdata, > struct fc_frame *fp) > fc_rport_enter_logo(rdata); > break; > case RPORT_ST_DELETE: > - case RPORT_ST_RESTART: > case RPORT_ST_READY: > case RPORT_ST_INIT: > break; > @@ -739,6 +732,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct > fc_frame *fp, > > } else { > FC_RPORT_DBG(rdata, "Bad ELS response for PRLI command\n"); > + rdata->flags &= ~FC_RP_STARTED; > fc_rport_enter_delete(rdata, RPORT_EV_FAILED); > } > > @@ -1377,7 +1371,6 @@ static void fc_rport_recv_plogi_req(struct fc_lport > *lport, > break; > case RPORT_ST_DELETE: > case RPORT_ST_LOGO: > - case RPORT_ST_RESTART: > FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n", > fc_rport_state(rdata)); > mutex_unlock(&rdata->rp_mutex); > @@ -1614,13 +1607,6 @@ static void fc_rport_recv_logo_req(struct fc_lport > *lport, > fc_rport_state(rdata)); > > fc_rport_enter_delete(rdata, RPORT_EV_LOGO); > - > - /* > - * If the remote port was created due to discovery, set state > - * to log back in. It may have seen a stale RSCN about us. > - */ > - if (rdata->disc_id) > - fc_rport_state_enter(rdata, RPORT_ST_RESTART); > mutex_unlock(&rdata->rp_mutex); > } else > FC_RPORT_ID_DBG(lport, sid, > diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h > index 7495c0b..db54c4a 100644 > --- a/include/scsi/libfc.h > +++ b/include/scsi/libfc.h > @@ -104,7 +104,6 @@ enum fc_disc_event { > * @RPORT_ST_LOGO: Remote port logout (LOGO) sent > * @RPORT_ST_ADISC: Discover Address sent > * @RPORT_ST_DELETE: Remote port being deleted > - * @RPORT_ST_RESTART: Remote port being deleted and will restart > */ > enum fc_rport_state { > RPORT_ST_INIT, > @@ -115,7 +114,6 @@ enum fc_rport_state { > RPORT_ST_LOGO, > RPORT_ST_ADISC, > RPORT_ST_DELETE, > - RPORT_ST_RESTART, > }; > > /** > @@ -173,6 +171,7 @@ struct fc_rport_libfc_priv { > u16 flags; > #define FC_RP_FLAGS_REC_SUPPORTED (1 << 0) > #define FC_RP_FLAGS_RETRY (1 << 1) > + #define FC_RP_STARTED (1 << 2) > unsigned int e_d_tov; > unsigned int r_a_tov; > }; > @@ -185,7 +184,7 @@ struct fc_rport_libfc_priv { > * @rp_state: Enumeration that tracks progress of PLOGI, PRLI, > * and RTV exchanges > * @ids: The remote port identifiers and roles > - * @flags: REC and RETRY supported flags > + * @flags: STARTED, REC and RETRY_SUPPORTED flags > * @max_seq: Maximum number of concurrent sequences > * @disc_id: The discovery identifier > * @maxframe_size: The maximum frame size > > > _______________________________________________ > devel mailing list > [email protected] > http://www.open-fcoe.org/mailman/listinfo/devel > _______________________________________________ devel mailing list [email protected] http://www.open-fcoe.org/mailman/listinfo/devel
