On 23/03/2026 01:47, Benjamin Marzinski wrote:
static void alua_rtpg_work(struct work_struct *work) { struct alua_dh_data *h = @@ -670,56 +179,41 @@ static void alua_rtpg_work(struct work_struct *work) int err = SCSI_DH_OK; struct alua_queue_data *qdata, *tmp; unsigned long flags; + int ret;spin_lock_irqsave(&h->lock, flags);h->flags |= ALUA_PG_RUNNING; if (h->flags & ALUA_PG_RUN_RTPG) { - int state = h->state;h->flags &= ~ALUA_PG_RUN_RTPG;spin_unlock_irqrestore(&h->lock, flags); - if (state == SCSI_ACCESS_STATE_TRANSITIONING) { - if (alua_tur(sdev) == SCSI_DH_RETRY) { - spin_lock_irqsave(&h->lock, flags); - h->flags &= ~ALUA_PG_RUNNING; - h->flags |= ALUA_PG_RUN_RTPG; - if (!h->interval) - h->interval = ALUA_RTPG_RETRY_DELAY; - spin_unlock_irqrestore(&h->lock, flags); - queue_delayed_work(kaluad_wq, &h->rtpg_work, - h->interval * HZ); - return; - } - /* Send RTPG on failure or if TUR indicates SUCCESS */ - } - err = alua_rtpg(sdev); - spin_lock_irqsave(&h->lock, flags); - - if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) { + ret = scsi_alua_rtpg_run(sdev); + if (ret == -EAGAIN) {This no longer handles the case where you want to trigger a new rtpg as soon as the running one finishes. I think it should be checking (ret == -EAGAIN || h->flags & ALUA_PG_RUN_RTPG) with a spinlock held.
Yeah, this is all tricky to handle, as the code in scsi_dh_alua.c was handling the error codes with the state machine, and I want to move the error handling into the core driver.
As for your specific point, I think that ALUA_PG_RUN_RTPG can only now be set from outside this work handler, and that should also trigger the work (so the check on ALUA_PG_RUN_RTPG was not really required). But, I think that I can just have as before (with the h->flags & ALUA_PG_RUN_RTPG check)
+ spin_lock_irqsave(&h->lock, flags); h->flags &= ~ALUA_PG_RUNNING; - if (err == SCSI_DH_IMM_RETRY) - h->interval = 0; - else if (!h->interval && !(h->flags & ALUA_PG_RUN_RTPG)) - h->interval = ALUA_RTPG_RETRY_DELAY; h->flags |= ALUA_PG_RUN_RTPG; spin_unlock_irqrestore(&h->lock, flags); - goto queue_rtpg; + queue_delayed_work(kaluad_wq, &h->rtpg_work, + sdev->alua->interval * HZ); + return; } - if (err != SCSI_DH_OK) - h->flags &= ~ALUA_PG_RUN_STPG; + if (err != 0) + h->flags &= ~ALUA_PG_RUN_STPG; } + spin_lock_irqsave(&h->lock, flags);If h->flags & ALUA_PG_RUN_RTPG is false above, h->lock will already be locked.
Right, that is a bug
if (h->flags & ALUA_PG_RUN_STPG) { h->flags &= ~ALUA_PG_RUN_STPG; spin_unlock_irqrestore(&h->lock, flags); - err = alua_stpg(sdev); - spin_lock_irqsave(&h->lock, flags); - if (err == SCSI_DH_RETRY || h->flags & ALUA_PG_RUN_RTPG) { + ret = scsi_alua_stpg_run(sdev, h->flags & ALUA_OPTIMIZE_STPG); + if (err == -EAGAIN || h->flags & ALUA_PG_RUN_RTPG) {To avoid a race with resetting ALUA_PG_RUN_RTPG, this check needs to be done with the spinlock held.
Yes, Thanks, John

