On 08/18/2009 11:42 AM, Mike Christie wrote:
>
> So my proposal is to just wait for the tasks that are queued and
> affected by the TMF to get fully sent. Note, at this time, we do not
> allow new commands that could be affected by the TMF to be started.
>
> The eh thread sits around and every second would check if the tasks that
> were affected by the tmf but still needed to send data, have sent all
> their data. Either they will eventually send all their data, or the user
> will logout the session or there will be some connection error that
> kills the session.
>
> The wait for a while can be a simple msleep, ssleep, etc. Check out
> drivers like qla2xxx or lpfc for examples if you want to steal a loop
> and wait.
>

Here is a patch that should to this. It is not tested. It was made over 
Hannes's first two patches:
[PATCH 1/4] libiscsi: Check TMF state before sending PDU
[PATCH 2/4] libiscsi: Traverse all Data-Out PDUs

--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"open-iscsi" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to 
[email protected]
For more options, visit this group at http://groups.google.com/group/open-iscsi
-~----------~----~----~----~------~----~------~--~---

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index a7ee4bb..e68b6d2 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2086,6 +2174,82 @@ failed_unlocked:
 }
 EXPORT_SYMBOL_GPL(iscsi_eh_abort);
 
+
+static int sleep_on_aborted_task(struct iscsi_task *task, unsigned lun)
+{
+       struct iscsi_conn *conn = task->conn;
+       struct iscsi_session *session = conn->session;
+       struct iscsi_tm *tmf = &conn->tmhdr;
+
+       switch (ISCSI_TM_FUNC_VALUE(tmf)) {
+       case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+               if (lun == task->sc->device->lun) {
+                       ISCSI_DBG_SESSION(session,
+                                         "sleep for sc %p itt 0x%x state %d\n",
+                                         task->sc, task->itt, task->state);
+                       spin_unlock_bh(&session->lock);
+                       mutex_unlock(&session->eh_mutex);
+
+                       /* FIXME ssleep might be too long??? */
+                       ssleep(1);
+
+                       mutex_lock(&session->eh_mutex);
+                       spin_lock_bh(&session->lock);
+                       return -EAGAIN;
+               }
+               break;
+       case ISCSI_TM_FUNC_ABORT_TASK:
+               /* It looks like for abort task the rfc does not
+                * say if we have to send data-outs or not. There is
+                * no mention because we have to send the abort on the
+                * same conn as the task, so we do not have similar
+                * ordering issues as with lun reset and MCS.
+                *
+                * FIXME - fix this when we figure out how to
+                * handle fast_abort and abort task in Hannes's
+                * check tmf patch.
+                */
+               break;
+       }
+       return 0;
+}
+
+/**
+ * wait_for_aborted_tasks - wait for pdus affected by reset to be sent
+ * @conn: iscsi conn reset was sent on
+ * @lun: lun reset was sent to
+ *
+ * Notes: called with session lock and eh mutex
+ */
+static void wait_for_aborted_tasks(struct iscsi_conn *conn, unsigned lun)
+{
+       struct iscsi_session *session = conn->session;
+       struct iscsi_task *task;
+
+       if (session->fast_abort || conn->tmf_state == TMF_INITIAL)
+               return;
+
+       /*
+        * iscsi_data_xmit could complete multiple tasks while we sleep,
+        * so we have to do the gross loop goto. FIXME - we should fix
+        * this but this only happanes in the eh path so perf is not
+        * that a big deal.
+        * */
+restart_loop:
+       if (session->state != ISCSI_STATE_LOGGED_IN ||
+           test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx))
+               return;
+
+       if (conn->task &&
+           (sleep_on_aborted_task(conn->task, lun) == -EAGAIN))
+               goto restart_loop;
+
+       list_for_each_entry(task, &conn->requeue, running) {
+               if (sleep_on_aborted_task(task, lun) == -EAGAIN)
+                       goto restart_loop;
+       }
+}
+
 static void iscsi_prep_lun_reset_pdu(struct scsi_cmnd *sc, struct iscsi_tm 
*hdr)
 {
        memset(hdr, 0, sizeof(*hdr));
@@ -2146,10 +2310,15 @@ int iscsi_eh_device_reset(struct scsi_cmnd *sc)
        }
 
        rc = SUCCESS;
-       spin_unlock_bh(&session->lock);
 
+       wait_for_aborted_tasks(conn, sc->device->lun);
+       spin_unlock_bh(&session->lock);
+       /*
+        * iscsi_data_xmit uses list_for_each_safe but fail_scsi_tasks
+        * could remove multiple tasks, so for now we just suspend the tx
+        * thread to avoid the problem.
+        */
        iscsi_suspend_tx(conn);
-
        spin_lock_bh(&session->lock);
        fail_scsi_tasks(conn, sc->device->lun, DID_ERROR);
        conn->tmf_state = TMF_INITIAL;

Reply via email to