Re: [RFQ] aic7xxx driver panics under heavy swap.
Justin, Your patch works for me. printk "Temporary Resource Shortage" has to go, or may be you can make it a debug option. Here is the cleaned up patch for 2.4.5-ac15 with TAILQ macros replaced with LIST macros. Thanks for the help. Bulent --- aic7xxx_linux.c.save Mon Jun 18 20:25:35 2001 +++ aic7xxx_linux.c Tue Jun 19 17:35:55 2001 @@ -1516,7 +1516,11 @@ } cmd->result = CAM_REQ_INPROG << 16; TAILQ_INSERT_TAIL(>busyq, (struct ahc_cmd *)cmd, acmd_links.tqe); -ahc_linux_run_device_queue(ahc, dev); +if ((dev->flags & AHC_DEV_ON_RUN_LIST) == 0) { + LIST_INSERT_HEAD(>platform_data->device_runq, dev, links); + dev->flags |= AHC_DEV_ON_RUN_LIST; + ahc_linux_run_device_queues(ahc); +} ahc_unlock(ahc, ); return (0); } @@ -1532,6 +1536,9 @@ struct ahc_tmode_tstate *tstate; uint16_t mask; +if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0) + panic("running device on run list"); + while ((acmd = TAILQ_FIRST(>busyq)) != NULL && dev->openings > 0 && dev->qfrozen == 0) { @@ -1540,8 +1547,6 @@ * running is because the whole controller Q is frozen. */ if (ahc->platform_data->qfrozen != 0) { - if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0) - return; LIST_INSERT_HEAD(>platform_data->device_runq, dev, links); @@ -1552,8 +1557,6 @@ * Get an scb to use. */ if ((scb = ahc_get_scb(ahc)) == NULL) { - if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0) - panic("running device on run list"); LIST_INSERT_HEAD(>platform_data->device_runq, dev, links); dev->flags |= AHC_DEV_ON_RUN_LIST; - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFQ] aic7xxx driver panics under heavy swap.
Justin, Your patch works for me. printk Temporary Resource Shortage has to go, or may be you can make it a debug option. Here is the cleaned up patch for 2.4.5-ac15 with TAILQ macros replaced with LIST macros. Thanks for the help. Bulent --- aic7xxx_linux.c.save Mon Jun 18 20:25:35 2001 +++ aic7xxx_linux.c Tue Jun 19 17:35:55 2001 @@ -1516,7 +1516,11 @@ } cmd-result = CAM_REQ_INPROG 16; TAILQ_INSERT_TAIL(dev-busyq, (struct ahc_cmd *)cmd, acmd_links.tqe); -ahc_linux_run_device_queue(ahc, dev); +if ((dev-flags AHC_DEV_ON_RUN_LIST) == 0) { + LIST_INSERT_HEAD(ahc-platform_data-device_runq, dev, links); + dev-flags |= AHC_DEV_ON_RUN_LIST; + ahc_linux_run_device_queues(ahc); +} ahc_unlock(ahc, flags); return (0); } @@ -1532,6 +1536,9 @@ struct ahc_tmode_tstate *tstate; uint16_t mask; +if ((dev-flags AHC_DEV_ON_RUN_LIST) != 0) + panic(running device on run list); + while ((acmd = TAILQ_FIRST(dev-busyq)) != NULL dev-openings 0 dev-qfrozen == 0) { @@ -1540,8 +1547,6 @@ * running is because the whole controller Q is frozen. */ if (ahc-platform_data-qfrozen != 0) { - if ((dev-flags AHC_DEV_ON_RUN_LIST) != 0) - return; LIST_INSERT_HEAD(ahc-platform_data-device_runq, dev, links); @@ -1552,8 +1557,6 @@ * Get an scb to use. */ if ((scb = ahc_get_scb(ahc)) == NULL) { - if ((dev-flags AHC_DEV_ON_RUN_LIST) != 0) - panic(running device on run list); LIST_INSERT_HEAD(ahc-platform_data-device_runq, dev, links); dev-flags |= AHC_DEV_ON_RUN_LIST; - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFQ] aic7xxx driver panics under heavy swap.
> >Justin, >When free memory is low, I get a series of aic7xxx messages followed by >panic. It appears to be a race condition in the code. Its actually a logic error, not a race condition. You should never enter ahc_linux_run_device_queue() while the device is still on the run queue. The real issue is that ahc_linux_queue bypasses the round-robin device scheduler by calling ahc_linux_run_device_queue() directly. The code should look like this (the LIST macro calls where switched to TAILQ calls a bit ago to ensure round-robin, but that change came just after 6.1.13). I haven't tested this yet... Thanks for the bug report. If you can verify that this works under memeory pressure, the printf can go away. -- Justin //depot/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c#67 - /usr/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c --- /tmp/tmp.3288.0 Tue Jun 19 11:07:32 2001 +++ /usr/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c Tue Jun 19 11:02:54 2001 @@ -1514,7 +1514,11 @@ } cmd->result = CAM_REQ_INPROG << 16; TAILQ_INSERT_TAIL(>busyq, (struct ahc_cmd *)cmd, acmd_links.tqe); - ahc_linux_run_device_queue(ahc, dev); + if ((dev->flags & AHC_DEV_ON_RUN_LIST) == 0) { + TAILQ_INSERT_TAIL(>platform_data->device_runq, dev, links); + dev->flags |= AHC_DEV_ON_RUN_LIST; + ahc_linux_run_device_queues(ahc); + } ahc_unlock(ahc, ); return (0); } @@ -1530,6 +1534,9 @@ struct ahc_tmode_tstate *tstate; uint16_t mask; + if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0) + panic("running device on run list"); + while ((acmd = TAILQ_FIRST(>busyq)) != NULL && dev->openings > 0 && dev->qfrozen == 0) { @@ -1538,8 +1545,6 @@ * running is because the whole controller Q is frozen. */ if (ahc->platform_data->qfrozen != 0) { - if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0) - return; TAILQ_INSERT_TAIL(>platform_data->device_runq, dev, links); @@ -1550,8 +1555,6 @@ * Get an scb to use. */ if ((scb = ahc_get_scb(ahc)) == NULL) { - if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0) - panic("running device on run list"); TAILQ_INSERT_TAIL(>platform_data->device_runq, dev, links); dev->flags |= AHC_DEV_ON_RUN_LIST; - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFQ] aic7xxx driver panics under heavy swap.
Justin, When free memory is low, I get a series of aic7xxx messages followed by panic. It appears to be a race condition in the code. Should you panic? I tried the following patch to not panic. But I am not sure if it is functionally correct. Bulent scsi0: Temporary Resource Shortage scsi0: Temporary Resource Shortage scsi0: Temporary Resource Shortage scsi0: Temporary Resource Shortage scsi0: Temporary Resource Shortage Kernel panic: running device on run list --- aic7xxx_linux.c.save Mon Jun 18 20:25:35 2001 +++ aic7xxx_linux.c Mon Jun 18 20:26:29 2001 @@ -1552,12 +1552,14 @@ * Get an scb to use. */ if ((scb = ahc_get_scb(ahc)) == NULL) { + ahc->flags |= AHC_RESOURCE_SHORTAGE; if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0) - panic("running device on run list"); + return; + // panic("running device on run list"); LIST_INSERT_HEAD(>platform_data->device_runq, dev, links); dev->flags |= AHC_DEV_ON_RUN_LIST; - ahc->flags |= AHC_RESOURCE_SHORTAGE; + // ahc->flags |= AHC_RESOURCE_SHORTAGE; printf("%s: Temporary Resource Shortage\n", ahc_name(ahc)); return; - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[RFQ] aic7xxx driver panics under heavy swap.
Justin, When free memory is low, I get a series of aic7xxx messages followed by panic. It appears to be a race condition in the code. Should you panic? I tried the following patch to not panic. But I am not sure if it is functionally correct. Bulent scsi0: Temporary Resource Shortage scsi0: Temporary Resource Shortage scsi0: Temporary Resource Shortage scsi0: Temporary Resource Shortage scsi0: Temporary Resource Shortage Kernel panic: running device on run list --- aic7xxx_linux.c.save Mon Jun 18 20:25:35 2001 +++ aic7xxx_linux.c Mon Jun 18 20:26:29 2001 @@ -1552,12 +1552,14 @@ * Get an scb to use. */ if ((scb = ahc_get_scb(ahc)) == NULL) { + ahc-flags |= AHC_RESOURCE_SHORTAGE; if ((dev-flags AHC_DEV_ON_RUN_LIST) != 0) - panic(running device on run list); + return; + // panic(running device on run list); LIST_INSERT_HEAD(ahc-platform_data-device_runq, dev, links); dev-flags |= AHC_DEV_ON_RUN_LIST; - ahc-flags |= AHC_RESOURCE_SHORTAGE; + // ahc-flags |= AHC_RESOURCE_SHORTAGE; printf(%s: Temporary Resource Shortage\n, ahc_name(ahc)); return; - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [RFQ] aic7xxx driver panics under heavy swap.
Justin, When free memory is low, I get a series of aic7xxx messages followed by panic. It appears to be a race condition in the code. Its actually a logic error, not a race condition. You should never enter ahc_linux_run_device_queue() while the device is still on the run queue. The real issue is that ahc_linux_queue bypasses the round-robin device scheduler by calling ahc_linux_run_device_queue() directly. The code should look like this (the LIST macro calls where switched to TAILQ calls a bit ago to ensure round-robin, but that change came just after 6.1.13). I haven't tested this yet... Thanks for the bug report. If you can verify that this works under memeory pressure, the printf can go away. -- Justin //depot/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c#67 - /usr/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c --- /tmp/tmp.3288.0 Tue Jun 19 11:07:32 2001 +++ /usr/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c Tue Jun 19 11:02:54 2001 @@ -1514,7 +1514,11 @@ } cmd-result = CAM_REQ_INPROG 16; TAILQ_INSERT_TAIL(dev-busyq, (struct ahc_cmd *)cmd, acmd_links.tqe); - ahc_linux_run_device_queue(ahc, dev); + if ((dev-flags AHC_DEV_ON_RUN_LIST) == 0) { + TAILQ_INSERT_TAIL(ahc-platform_data-device_runq, dev, links); + dev-flags |= AHC_DEV_ON_RUN_LIST; + ahc_linux_run_device_queues(ahc); + } ahc_unlock(ahc, flags); return (0); } @@ -1530,6 +1534,9 @@ struct ahc_tmode_tstate *tstate; uint16_t mask; + if ((dev-flags AHC_DEV_ON_RUN_LIST) != 0) + panic(running device on run list); + while ((acmd = TAILQ_FIRST(dev-busyq)) != NULL dev-openings 0 dev-qfrozen == 0) { @@ -1538,8 +1545,6 @@ * running is because the whole controller Q is frozen. */ if (ahc-platform_data-qfrozen != 0) { - if ((dev-flags AHC_DEV_ON_RUN_LIST) != 0) - return; TAILQ_INSERT_TAIL(ahc-platform_data-device_runq, dev, links); @@ -1550,8 +1555,6 @@ * Get an scb to use. */ if ((scb = ahc_get_scb(ahc)) == NULL) { - if ((dev-flags AHC_DEV_ON_RUN_LIST) != 0) - panic(running device on run list); TAILQ_INSERT_TAIL(ahc-platform_data-device_runq, dev, links); dev-flags |= AHC_DEV_ON_RUN_LIST; - To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/