Bhanu Gollapudi wrote:
> Joe,
>
> Changes look good to me, however I have a comment. Please read inline.
>
> Thanks,
> Bhanu
>
> On Mon, 2010-05-17 at 16:38 -0700, Joe Eykholt wrote:
>> This patch somewhat combines two fixes to remote port handing in libfc.
>>
>> The first problem was that rport work could be queued on a deleted
>> and freed rport. This is handled by not resetting rdata->event
>> ton NONE if the rdata is about to be deleted.
>>
>> However, that fix led to the second problem, described by
>> Bhanu Gollapudi, as follows:
>>> Here is the sequence of events. T1 is first LOGO receive thread, T2 is
>>> fc_rport_work() scheduled by T1 and T3 is second LOGO receive thread and
>>> T4 is fc_rport_work scheduled by T3.
>>>
>>> 1. (T1)Received 1st LOGO in state Ready
>>> 2. (T1)Delete port & enter to RESTART state.
>>> 3. (T1)schdule event_work, since event is RPORT_EV_NONE.
>>> 4. (T1)set event = RPORT_EV_LOGO
>>> 5. (T1)Enter RESTART state as disc_id is set.
>>> 6. (T2)remember to PLOGI, and set event = RPORT_EV_NONE
>>> 6. (T3)Received 2nd LOGO
>>> 7. (T3)Delete Port & enter to RESTART state.
>>> 8. (T3)schedule event_work, since event is RPORT_EV_NONE.
>>> 9. (T3)Enter RESTART state as disc_id is set.
>>> 9. (T3)set event = RPORT_EV_LOGO
>>> 10.(T2)work restart, enter PLOGI state and issues PLOGI
>>> 11.(T4)Since state is not RESTART anymore, restart is not set, and the
>>> event is not reset to RPORT_EV_NONE. (current event is RPORT_EV_LOGO).
>>> 12. Now, PLOGI succeeds and fc_rport_enter_ready() will not schedule
>>> event_work, and hence the rport will never be created, eventually losing
>>> the target after dev_loss_tmo.
>> So, the problem here is that we were tracking the desire for
>> the rport be restarted by state RESTART, which was otherwise
>> equivalent to DELETE. A contributing factor is that we dropped
>> the lock between steps 6 and 10 in thread T2, which allows the
>> state to change, and we didn't completely re-evaluate then.
>>
>> This is hopefully corrected by the following minor redesign:
>>
>> Simplify the rport restart logic by making the decision to
>> restart after deleting the transport rport. That decision
>> is based on a new STARTED flag that indicates fc_rport_login()
>> has been called and fc_rport_logoff() has not been called
>> since then. This replaces the need for the RESTART state.
>>
>> Only restart if the rdata is still in DELETED state
>> and only if it still has the STARTED flag set.
>>
>> Also now, since we clear the event code much later in the
>> work thread, allow for the possibility that the rport may
>> have become READY again via incoming PLOGI, and if so,
>> queue another event to handle that.
>>
>> In the problem scenario, the second LOGO received will
>> cause the LOGO event to occur again.
>>
>> Reported-by: Bhanu Gollapudi <[email protected]>
>> Signed-off-by: Joe Eykholt <[email protected]>
>> ---
>> drivers/scsi/libfc/fc_rport.c | 80
>> +++++++++++++++++------------------------
>> include/scsi/libfc.h | 5 +--
>> 2 files changed, 35 insertions(+), 50 deletions(-)
>>
>>
>> diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
>> index 39e440f..5a00a1a 100644
>> --- a/drivers/scsi/libfc/fc_rport.c
>> +++ b/drivers/scsi/libfc/fc_rport.c
>> @@ -89,7 +89,6 @@ static const char *fc_rport_state_names[] = {
>> [RPORT_ST_LOGO] = "LOGO",
>> [RPORT_ST_ADISC] = "ADISC",
>> [RPORT_ST_DELETE] = "Delete",
>> - [RPORT_ST_RESTART] = "Restart",
>> };
>>
>> /**
>> @@ -246,7 +245,6 @@ static void fc_rport_work(struct work_struct *work)
>> struct fc_rport_operations *rport_ops;
>> struct fc_rport_identifiers ids;
>> struct fc_rport *rport;
>> - int restart = 0;
>>
>> mutex_lock(&rdata->rp_mutex);
>> event = rdata->event;
>> @@ -296,26 +294,9 @@ static void fc_rport_work(struct work_struct *work)
>> case RPORT_EV_LOGO:
>> case RPORT_EV_STOP:
>> port_id = rdata->ids.port_id;
>> + rdata->rport = NULL;
>
> Can you please have this assignment after the callback? That way, it
> will help the LLDD specific callback routine to access the driver
> specific data structures from fc_rport_libfc_priv structure from
> rdata->rport.
Yes, I'll resubmit with that change after more testing.
I'm not sure the LLDD should be going through the rdata or using
the callback, but I don't have a better solution at this point.
Thanks for the review.
>> mutex_unlock(&rdata->rp_mutex);
>>
>> - if (port_id != FC_FID_DIR_SERV) {
>> - /*
>> - * We must drop rp_mutex before taking disc_mutex.
>> - * Re-evaluate state to allow for restart.
>> - * A transition to RESTART state must only happen
>> - * while disc_mutex is held and rdata is on the list.
>> - */
>> - mutex_lock(&lport->disc.disc_mutex);
>> - mutex_lock(&rdata->rp_mutex);
>> - if (rdata->rp_state == RPORT_ST_RESTART)
>> - restart = 1;
>> - else
>> - list_del(&rdata->peers);
>> - rdata->event = RPORT_EV_NONE;
>> - mutex_unlock(&rdata->rp_mutex);
>> - mutex_unlock(&lport->disc.disc_mutex);
>> - }
>> -
>> if (rport_ops && rport_ops->event_callback) {
>> FC_RPORT_DBG(rdata, "callback ev %d\n", event);
>> rport_ops->event_callback(lport, rdata, event);
>> @@ -331,18 +312,36 @@ static void fc_rport_work(struct work_struct *work)
>> if (rport) {
>
> I propose to have "rdata->rport = NULL" here, which ofcourse should be
> called with rp_mutex held.
>
>> rpriv = rport->dd_data;
>> rpriv->rp_state = RPORT_ST_DELETE;
>> - mutex_lock(&rdata->rp_mutex);
>> - rdata->rport = NULL;
>> - mutex_unlock(&rdata->rp_mutex);
>> fc_remote_port_delete(rport);
>> }
>> - if (restart) {
>> - mutex_lock(&rdata->rp_mutex);
>> - FC_RPORT_DBG(rdata, "work restart\n");
>> - fc_rport_enter_plogi(rdata);
>> +
>> + mutex_lock(&lport->disc.disc_mutex);
>> + mutex_lock(&rdata->rp_mutex);
>> + if (rdata->rp_state == RPORT_ST_DELETE) {
>> + if (port_id == FC_FID_DIR_SERV) {
>> + rdata->event = RPORT_EV_NONE;
>> + mutex_unlock(&rdata->rp_mutex);
>> + } else if (rdata->flags & FC_RP_STARTED) {
>> + rdata->event = RPORT_EV_NONE;
>> + FC_RPORT_DBG(rdata, "work restart\n");
>> + fc_rport_enter_plogi(rdata);
>> + mutex_unlock(&rdata->rp_mutex);
>> + } else {
>> + FC_RPORT_DBG(rdata, "work delete\n");
>> + list_del(&rdata->peers);
>> + mutex_unlock(&rdata->rp_mutex);
>> + kref_put(&rdata->kref, lport->tt.rport_destroy);
>> + }
>> + } else {
>> + /*
>> + * Re-open for events. Reissue READY event if ready.
>> + */
>> + rdata->event = RPORT_EV_NONE;
>> + if (rdata->rp_state == RPORT_ST_READY)
>> + fc_rport_enter_ready(rdata);
>> mutex_unlock(&rdata->rp_mutex);
>> - } else
>> - kref_put(&rdata->kref, lport->tt.rport_destroy);
>> + }
>> + mutex_unlock(&lport->disc.disc_mutex);
>> break;
>>
>> default:
>> @@ -367,16 +366,14 @@ int fc_rport_login(struct fc_rport_priv *rdata)
>> {
>> mutex_lock(&rdata->rp_mutex);
>>
>> + rdata->flags |= FC_RP_STARTED;
>> switch (rdata->rp_state) {
>> case RPORT_ST_READY:
>> FC_RPORT_DBG(rdata, "ADISC port\n");
>> fc_rport_enter_adisc(rdata);
>> break;
>> - case RPORT_ST_RESTART:
>> - break;
>> case RPORT_ST_DELETE:
>> FC_RPORT_DBG(rdata, "Restart deleted port\n");
>> - fc_rport_state_enter(rdata, RPORT_ST_RESTART);
>> break;
>> default:
>> FC_RPORT_DBG(rdata, "Login to port\n");
>> @@ -431,15 +428,12 @@ int fc_rport_logoff(struct fc_rport_priv *rdata)
>>
>> FC_RPORT_DBG(rdata, "Remove port\n");
>>
>> + rdata->flags &= ~FC_RP_STARTED;
>> if (rdata->rp_state == RPORT_ST_DELETE) {
>> FC_RPORT_DBG(rdata, "Port in Delete state, not removing\n");
>> goto out;
>> }
>> -
>> - if (rdata->rp_state == RPORT_ST_RESTART)
>> - FC_RPORT_DBG(rdata, "Port in Restart state, deleting\n");
>> - else
>> - fc_rport_enter_logo(rdata);
>> + fc_rport_enter_logo(rdata);
>>
>> /*
>> * Change the state to Delete so that we discard
>> @@ -503,7 +497,6 @@ static void fc_rport_timeout(struct work_struct *work)
>> case RPORT_ST_READY:
>> case RPORT_ST_INIT:
>> case RPORT_ST_DELETE:
>> - case RPORT_ST_RESTART:
>> break;
>> }
>>
>> @@ -527,6 +520,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata,
>> struct fc_frame *fp)
>> switch (rdata->rp_state) {
>> case RPORT_ST_PLOGI:
>> case RPORT_ST_LOGO:
>> + rdata->flags &= ~FC_RP_STARTED;
>> fc_rport_enter_delete(rdata, RPORT_EV_FAILED);
>> break;
>> case RPORT_ST_RTV:
>> @@ -537,7 +531,6 @@ static void fc_rport_error(struct fc_rport_priv *rdata,
>> struct fc_frame *fp)
>> fc_rport_enter_logo(rdata);
>> break;
>> case RPORT_ST_DELETE:
>> - case RPORT_ST_RESTART:
>> case RPORT_ST_READY:
>> case RPORT_ST_INIT:
>> break;
>> @@ -739,6 +732,7 @@ static void fc_rport_prli_resp(struct fc_seq *sp, struct
>> fc_frame *fp,
>>
>> } else {
>> FC_RPORT_DBG(rdata, "Bad ELS response for PRLI command\n");
>> + rdata->flags &= ~FC_RP_STARTED;
>> fc_rport_enter_delete(rdata, RPORT_EV_FAILED);
>> }
>>
>> @@ -1377,7 +1371,6 @@ static void fc_rport_recv_plogi_req(struct fc_lport
>> *lport,
>> break;
>> case RPORT_ST_DELETE:
>> case RPORT_ST_LOGO:
>> - case RPORT_ST_RESTART:
>> FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n",
>> fc_rport_state(rdata));
>> mutex_unlock(&rdata->rp_mutex);
>> @@ -1614,13 +1607,6 @@ static void fc_rport_recv_logo_req(struct fc_lport
>> *lport,
>> fc_rport_state(rdata));
>>
>> fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
>> -
>> - /*
>> - * If the remote port was created due to discovery, set state
>> - * to log back in. It may have seen a stale RSCN about us.
>> - */
>> - if (rdata->disc_id)
>> - fc_rport_state_enter(rdata, RPORT_ST_RESTART);
>> mutex_unlock(&rdata->rp_mutex);
>> } else
>> FC_RPORT_ID_DBG(lport, sid,
>> diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
>> index 7495c0b..db54c4a 100644
>> --- a/include/scsi/libfc.h
>> +++ b/include/scsi/libfc.h
>> @@ -104,7 +104,6 @@ enum fc_disc_event {
>> * @RPORT_ST_LOGO: Remote port logout (LOGO) sent
>> * @RPORT_ST_ADISC: Discover Address sent
>> * @RPORT_ST_DELETE: Remote port being deleted
>> - * @RPORT_ST_RESTART: Remote port being deleted and will restart
>> */
>> enum fc_rport_state {
>> RPORT_ST_INIT,
>> @@ -115,7 +114,6 @@ enum fc_rport_state {
>> RPORT_ST_LOGO,
>> RPORT_ST_ADISC,
>> RPORT_ST_DELETE,
>> - RPORT_ST_RESTART,
>> };
>>
>> /**
>> @@ -173,6 +171,7 @@ struct fc_rport_libfc_priv {
>> u16 flags;
>> #define FC_RP_FLAGS_REC_SUPPORTED (1 << 0)
>> #define FC_RP_FLAGS_RETRY (1 << 1)
>> + #define FC_RP_STARTED (1 << 2)
>> unsigned int e_d_tov;
>> unsigned int r_a_tov;
>> };
>> @@ -185,7 +184,7 @@ struct fc_rport_libfc_priv {
>> * @rp_state: Enumeration that tracks progress of PLOGI, PRLI,
>> * and RTV exchanges
>> * @ids: The remote port identifiers and roles
>> - * @flags: REC and RETRY supported flags
>> + * @flags: STARTED, REC and RETRY_SUPPORTED flags
>> * @max_seq: Maximum number of concurrent sequences
>> * @disc_id: The discovery identifier
>> * @maxframe_size: The maximum frame size
_______________________________________________
devel mailing list
[email protected]
http://www.open-fcoe.org/mailman/listinfo/devel