this patch adds handling of BUSY status reponse from an iSCSI target. Currently, we fail with -EIO in case of SCSI_STATUS_BUSY while the obvious reaction would be to retry the operation after some time. The retry time is randomly choosen from a range with exponential growth increasing with each retry.
This patch includes most of the changes by a an upcoming patch from Stefan Hajnoczi: iscsi: implement .bdrv_detach/attach_aio_context() because I also need the reference to the aio_context for the retry timer to work. I included the changes to maintain better mergeability. Signed-off-by: Peter Lieven <p...@kamp.de> --- block/iscsi.c | 122 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 91 insertions(+), 31 deletions(-) diff --git a/block/iscsi.c b/block/iscsi.c index 3892cc5..7db7ece 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -49,6 +49,7 @@ typedef struct IscsiLun { struct iscsi_context *iscsi; + AioContext *aio_context; int lun; enum scsi_inquiry_peripheral_device_type type; int block_size; @@ -73,6 +74,8 @@ typedef struct IscsiTask { struct scsi_task *task; Coroutine *co; QEMUBH *bh; + IscsiLun *iscsilun; + QEMUTimer *retry_timer; } IscsiTask; typedef struct IscsiAIOCB { @@ -95,6 +98,7 @@ typedef struct IscsiAIOCB { #define NOP_INTERVAL 5000 #define MAX_NOP_FAILURES 3 #define ISCSI_CMD_RETRIES 5 +static const unsigned iscsi_retry_times[] = {4, 16, 64, 256, 1024, 4096}; /* this threshhold is a trade-off knob to choose between * the potential additional overhead of an extra GET_LBA_STATUS request @@ -133,7 +137,7 @@ iscsi_schedule_bh(IscsiAIOCB *acb) if (acb->bh) { return; } - acb->bh = qemu_bh_new(iscsi_bh_cb, acb); + acb->bh = aio_bh_new(acb->iscsilun->aio_context, iscsi_bh_cb, acb); qemu_bh_schedule(acb->bh); } @@ -144,6 +148,18 @@ static void iscsi_co_generic_bh_cb(void *opaque) qemu_coroutine_enter(iTask->co, NULL); } +static void iscsi_retry_timer_expired(void *opaque) +{ + struct IscsiTask *iTask = opaque; + timer_del(iTask->retry_timer); + timer_free(iTask->retry_timer); + if (iTask->co) { + qemu_coroutine_enter(iTask->co, NULL); + } +} + +#define RANDRANGE(M, N) (M + rand() / (RAND_MAX / (N - M + 1) + 1)) + static void iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, void *command_data, void *opaque) @@ -156,20 +172,40 @@ iscsi_co_generic_cb(struct iscsi_context *iscsi, int status, iTask->do_retry = 0; iTask->task = task; - if (iTask->retries-- > 0 && status == SCSI_STATUS_CHECK_CONDITION - && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { - error_report("iSCSI CheckCondition: %s", iscsi_get_error(iscsi)); - iTask->do_retry = 1; - goto out; - } - if (status != SCSI_STATUS_GOOD) { + if (iTask->retries++ < ISCSI_CMD_RETRIES) { + if (status == SCSI_STATUS_CHECK_CONDITION + && task->sense.key == SCSI_SENSE_UNIT_ATTENTION) { + error_report("iSCSI CheckCondition: %s", + iscsi_get_error(iscsi)); + iTask->do_retry = 1; + goto out; + } + if (status == SCSI_STATUS_BUSY) { + unsigned retry_time = + RANDRANGE(iscsi_retry_times[iTask->retries - 1], + iscsi_retry_times[iTask->retries]); + error_report("iSCSI Busy (retry #%u in %u ms): %s", + iTask->retries, retry_time, + iscsi_get_error(iscsi)); + iTask->retry_timer = aio_timer_new(iTask->iscsilun->aio_context, + QEMU_CLOCK_REALTIME, + SCALE_MS, + iscsi_retry_timer_expired, + iTask); + timer_mod(iTask->retry_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + retry_time); + iTask->do_retry = 1; + return; + } + } error_report("iSCSI Failure: %s", iscsi_get_error(iscsi)); } out: if (iTask->co) { - iTask->bh = qemu_bh_new(iscsi_co_generic_bh_cb, iTask); + iTask->bh = aio_bh_new(iTask->iscsilun->aio_context, + iscsi_co_generic_bh_cb, iTask); qemu_bh_schedule(iTask->bh); } } @@ -178,7 +214,7 @@ static void iscsi_co_init_iscsitask(IscsiLun *iscsilun, struct IscsiTask *iTask) { *iTask = (struct IscsiTask) { .co = qemu_coroutine_self(), - .retries = ISCSI_CMD_RETRIES, + .iscsilun = iscsilun, }; } @@ -209,7 +245,7 @@ iscsi_aio_cancel(BlockDriverAIOCB *blockacb) iscsi_abort_task_cb, acb); while (acb->status == -EINPROGRESS) { - qemu_aio_wait(); + aio_poll(iscsilun->aio_context, true); } } @@ -232,11 +268,11 @@ iscsi_set_events(IscsiLun *iscsilun) ev = POLLIN; ev |= iscsi_which_events(iscsi); if (ev != iscsilun->events) { - qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), - iscsi_process_read, - (ev & POLLOUT) ? iscsi_process_write : NULL, - iscsilun); - + aio_set_fd_handler(iscsilun->aio_context, + iscsi_get_fd(iscsi), + iscsi_process_read, + (ev & POLLOUT) ? iscsi_process_write : NULL, + iscsilun); } iscsilun->events = ev; @@ -791,7 +827,7 @@ static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status); while (status == -EINPROGRESS) { - qemu_aio_wait(); + aio_poll(iscsilun->aio_context, true); } return 0; @@ -1195,6 +1231,40 @@ fail_with_err: return NULL; } +static void iscsi_detach_aio_context(BlockDriverState *bs) +{ + IscsiLun *iscsilun = bs->opaque; + + aio_set_fd_handler(iscsilun->aio_context, + iscsi_get_fd(iscsilun->iscsi), + NULL, NULL, NULL); + iscsilun->events = 0; + + if (iscsilun->nop_timer) { + timer_del(iscsilun->nop_timer); + timer_free(iscsilun->nop_timer); + iscsilun->nop_timer = NULL; + } +} + +static void iscsi_attach_aio_context(BlockDriverState *bs, + AioContext *new_context) +{ + IscsiLun *iscsilun = bs->opaque; + + iscsilun->aio_context = new_context; + iscsi_set_events(iscsilun); + +#if defined(LIBISCSI_FEATURE_NOP_COUNTER) + /* Set up a timer for sending out iSCSI NOPs */ + iscsilun->nop_timer = aio_timer_new(iscsilun->aio_context, + QEMU_CLOCK_REALTIME, SCALE_MS, + iscsi_nop_timed_event, iscsilun); + timer_mod(iscsilun->nop_timer, + qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); +#endif +} + /* * We support iscsi url's on the form * iscsi://[<username>%<password>@]<host>[:<port>]/<targetname>/<lun> @@ -1301,6 +1371,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, } iscsilun->iscsi = iscsi; + iscsilun->aio_context = bdrv_get_aio_context(bs); iscsilun->lun = iscsi_url->lun; iscsilun->has_write_same = true; @@ -1374,11 +1445,7 @@ static int iscsi_open(BlockDriverState *bs, QDict *options, int flags, scsi_free_scsi_task(task); task = NULL; -#if defined(LIBISCSI_FEATURE_NOP_COUNTER) - /* Set up a timer for sending out iSCSI NOPs */ - iscsilun->nop_timer = timer_new_ms(QEMU_CLOCK_REALTIME, iscsi_nop_timed_event, iscsilun); - timer_mod(iscsilun->nop_timer, qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + NOP_INTERVAL); -#endif + iscsi_attach_aio_context(bs, iscsilun->aio_context); /* Guess the internal cluster (page) size of the iscsi target by the means * of opt_unmap_gran. Transfer the unmap granularity only if it has a @@ -1422,11 +1489,7 @@ static void iscsi_close(BlockDriverState *bs) IscsiLun *iscsilun = bs->opaque; struct iscsi_context *iscsi = iscsilun->iscsi; - if (iscsilun->nop_timer) { - timer_del(iscsilun->nop_timer); - timer_free(iscsilun->nop_timer); - } - qemu_aio_set_fd_handler(iscsi_get_fd(iscsi), NULL, NULL, NULL); + iscsi_detach_aio_context(bs); iscsi_destroy_context(iscsi); g_free(iscsilun->zeroblock); g_free(iscsilun->allocationmap); @@ -1530,10 +1593,7 @@ static int iscsi_create(const char *filename, QEMUOptionParameter *options, if (ret != 0) { goto out; } - if (iscsilun->nop_timer) { - timer_del(iscsilun->nop_timer); - timer_free(iscsilun->nop_timer); - } + iscsi_detach_aio_context(bs); if (iscsilun->type != TYPE_DISK) { ret = -ENODEV; goto out; -- 1.7.9.5