Re: [RFQ] aic7xxx driver panics under heavy swap.

2001-06-20 Thread Bulent Abali



Justin,
Your patch works for me.  printk "Temporary Resource Shortage"
has to go, or may be you can make it a debug option.

Here is the cleaned up patch for 2.4.5-ac15 with TAILQ
macros replaced with LIST macros.  Thanks for the help.
Bulent



--- aic7xxx_linux.c.save Mon Jun 18 20:25:35 2001
+++ aic7xxx_linux.c Tue Jun 19 17:35:55 2001
@@ -1516,7 +1516,11 @@
 }
 cmd->result = CAM_REQ_INPROG << 16;
 TAILQ_INSERT_TAIL(>busyq, (struct ahc_cmd *)cmd, acmd_links.tqe);
-ahc_linux_run_device_queue(ahc, dev);
+if ((dev->flags & AHC_DEV_ON_RUN_LIST) == 0) {
+ LIST_INSERT_HEAD(>platform_data->device_runq, dev, links);
+ dev->flags |= AHC_DEV_ON_RUN_LIST;
+ ahc_linux_run_device_queues(ahc);
+}
 ahc_unlock(ahc, );
 return (0);
 }
@@ -1532,6 +1536,9 @@
 struct ahc_tmode_tstate *tstate;
 uint16_t mask;

+if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0)
+ panic("running device on run list");
+
 while ((acmd = TAILQ_FIRST(>busyq)) != NULL
 && dev->openings > 0 && dev->qfrozen == 0) {

@@ -1540,8 +1547,6 @@
   * running is because the whole controller Q is frozen.
   */
  if (ahc->platform_data->qfrozen != 0) {
-  if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0)
-   return;

   LIST_INSERT_HEAD(>platform_data->device_runq,
  dev, links);
@@ -1552,8 +1557,6 @@
   * Get an scb to use.
   */
  if ((scb = ahc_get_scb(ahc)) == NULL) {
-  if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0)
-   panic("running device on run list");
   LIST_INSERT_HEAD(>platform_data->device_runq,
  dev, links);
   dev->flags |= AHC_DEV_ON_RUN_LIST;








-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [RFQ] aic7xxx driver panics under heavy swap.

2001-06-20 Thread Bulent Abali



Justin,
Your patch works for me.  printk Temporary Resource Shortage
has to go, or may be you can make it a debug option.

Here is the cleaned up patch for 2.4.5-ac15 with TAILQ
macros replaced with LIST macros.  Thanks for the help.
Bulent



--- aic7xxx_linux.c.save Mon Jun 18 20:25:35 2001
+++ aic7xxx_linux.c Tue Jun 19 17:35:55 2001
@@ -1516,7 +1516,11 @@
 }
 cmd-result = CAM_REQ_INPROG  16;
 TAILQ_INSERT_TAIL(dev-busyq, (struct ahc_cmd *)cmd, acmd_links.tqe);
-ahc_linux_run_device_queue(ahc, dev);
+if ((dev-flags  AHC_DEV_ON_RUN_LIST) == 0) {
+ LIST_INSERT_HEAD(ahc-platform_data-device_runq, dev, links);
+ dev-flags |= AHC_DEV_ON_RUN_LIST;
+ ahc_linux_run_device_queues(ahc);
+}
 ahc_unlock(ahc, flags);
 return (0);
 }
@@ -1532,6 +1536,9 @@
 struct ahc_tmode_tstate *tstate;
 uint16_t mask;

+if ((dev-flags  AHC_DEV_ON_RUN_LIST) != 0)
+ panic(running device on run list);
+
 while ((acmd = TAILQ_FIRST(dev-busyq)) != NULL
  dev-openings  0  dev-qfrozen == 0) {

@@ -1540,8 +1547,6 @@
   * running is because the whole controller Q is frozen.
   */
  if (ahc-platform_data-qfrozen != 0) {
-  if ((dev-flags  AHC_DEV_ON_RUN_LIST) != 0)
-   return;

   LIST_INSERT_HEAD(ahc-platform_data-device_runq,
  dev, links);
@@ -1552,8 +1557,6 @@
   * Get an scb to use.
   */
  if ((scb = ahc_get_scb(ahc)) == NULL) {
-  if ((dev-flags  AHC_DEV_ON_RUN_LIST) != 0)
-   panic(running device on run list);
   LIST_INSERT_HEAD(ahc-platform_data-device_runq,
  dev, links);
   dev-flags |= AHC_DEV_ON_RUN_LIST;








-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [RFQ] aic7xxx driver panics under heavy swap.

2001-06-19 Thread Justin T. Gibbs

>
>Justin,
>When free memory is low, I get a series of aic7xxx messages followed by
>panic.  It appears to be a race condition in the code.

Its actually a logic error, not a race condition.  You should never
enter ahc_linux_run_device_queue() while the device is still on the
run queue.  The real issue is that ahc_linux_queue bypasses the
round-robin device scheduler by calling ahc_linux_run_device_queue()
directly.  The code should look like this (the LIST macro calls
where switched to TAILQ calls a bit ago to ensure round-robin, but
that change came just after 6.1.13).  I haven't tested this yet...

Thanks for the bug report.  If you can verify that this works under
memeory pressure, the printf can go away.

--
Justin

 //depot/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c#67 - 
/usr/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c 
--- /tmp/tmp.3288.0 Tue Jun 19 11:07:32 2001
+++ /usr/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c Tue Jun 19 11:02:54 2001
@@ -1514,7 +1514,11 @@
}
cmd->result = CAM_REQ_INPROG << 16;
TAILQ_INSERT_TAIL(>busyq, (struct ahc_cmd *)cmd, acmd_links.tqe);
-   ahc_linux_run_device_queue(ahc, dev);
+   if ((dev->flags & AHC_DEV_ON_RUN_LIST) == 0) {
+   TAILQ_INSERT_TAIL(>platform_data->device_runq, dev, links);
+   dev->flags |= AHC_DEV_ON_RUN_LIST;
+   ahc_linux_run_device_queues(ahc);
+   }
ahc_unlock(ahc, );
return (0);
 }
@@ -1530,6 +1534,9 @@
struct   ahc_tmode_tstate *tstate;
uint16_t mask;
 
+   if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0)
+   panic("running device on run list");
+
while ((acmd = TAILQ_FIRST(>busyq)) != NULL
&& dev->openings > 0 && dev->qfrozen == 0) {
 
@@ -1538,8 +1545,6 @@
 * running is because the whole controller Q is frozen.
 */
if (ahc->platform_data->qfrozen != 0) {
-   if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0)
-   return;
 
TAILQ_INSERT_TAIL(>platform_data->device_runq,
  dev, links);
@@ -1550,8 +1555,6 @@
 * Get an scb to use.
 */
if ((scb = ahc_get_scb(ahc)) == NULL) {
-   if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0)
-   panic("running device on run list");
TAILQ_INSERT_TAIL(>platform_data->device_runq,
 dev, links);
dev->flags |= AHC_DEV_ON_RUN_LIST;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[RFQ] aic7xxx driver panics under heavy swap.

2001-06-19 Thread Bulent Abali


Justin,
When free memory is low, I get a series of aic7xxx messages followed by
panic.
It appears to be a race condition in the code.  Should you panic?  I tried
the following
patch to not panic.  But I am not sure if it is functionally correct.
Bulent


scsi0: Temporary Resource Shortage
scsi0: Temporary Resource Shortage
scsi0: Temporary Resource Shortage
scsi0: Temporary Resource Shortage
scsi0: Temporary Resource Shortage
Kernel panic: running device on run list


--- aic7xxx_linux.c.save Mon Jun 18 20:25:35 2001
+++ aic7xxx_linux.c Mon Jun 18 20:26:29 2001
@@ -1552,12 +1552,14 @@
   * Get an scb to use.
   */
  if ((scb = ahc_get_scb(ahc)) == NULL) {
+  ahc->flags |= AHC_RESOURCE_SHORTAGE;
   if ((dev->flags & AHC_DEV_ON_RUN_LIST) != 0)
-   panic("running device on run list");
+   return;
+   // panic("running device on run list");
   LIST_INSERT_HEAD(>platform_data->device_runq,
  dev, links);
   dev->flags |= AHC_DEV_ON_RUN_LIST;
-  ahc->flags |= AHC_RESOURCE_SHORTAGE;
+  // ahc->flags |= AHC_RESOURCE_SHORTAGE;
   printf("%s: Temporary Resource Shortage\n",
  ahc_name(ahc));
   return;



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



[RFQ] aic7xxx driver panics under heavy swap.

2001-06-19 Thread Bulent Abali


Justin,
When free memory is low, I get a series of aic7xxx messages followed by
panic.
It appears to be a race condition in the code.  Should you panic?  I tried
the following
patch to not panic.  But I am not sure if it is functionally correct.
Bulent


scsi0: Temporary Resource Shortage
scsi0: Temporary Resource Shortage
scsi0: Temporary Resource Shortage
scsi0: Temporary Resource Shortage
scsi0: Temporary Resource Shortage
Kernel panic: running device on run list


--- aic7xxx_linux.c.save Mon Jun 18 20:25:35 2001
+++ aic7xxx_linux.c Mon Jun 18 20:26:29 2001
@@ -1552,12 +1552,14 @@
   * Get an scb to use.
   */
  if ((scb = ahc_get_scb(ahc)) == NULL) {
+  ahc-flags |= AHC_RESOURCE_SHORTAGE;
   if ((dev-flags  AHC_DEV_ON_RUN_LIST) != 0)
-   panic(running device on run list);
+   return;
+   // panic(running device on run list);
   LIST_INSERT_HEAD(ahc-platform_data-device_runq,
  dev, links);
   dev-flags |= AHC_DEV_ON_RUN_LIST;
-  ahc-flags |= AHC_RESOURCE_SHORTAGE;
+  // ahc-flags |= AHC_RESOURCE_SHORTAGE;
   printf(%s: Temporary Resource Shortage\n,
  ahc_name(ahc));
   return;



-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



Re: [RFQ] aic7xxx driver panics under heavy swap.

2001-06-19 Thread Justin T. Gibbs


Justin,
When free memory is low, I get a series of aic7xxx messages followed by
panic.  It appears to be a race condition in the code.

Its actually a logic error, not a race condition.  You should never
enter ahc_linux_run_device_queue() while the device is still on the
run queue.  The real issue is that ahc_linux_queue bypasses the
round-robin device scheduler by calling ahc_linux_run_device_queue()
directly.  The code should look like this (the LIST macro calls
where switched to TAILQ calls a bit ago to ensure round-robin, but
that change came just after 6.1.13).  I haven't tested this yet...

Thanks for the bug report.  If you can verify that this works under
memeory pressure, the printf can go away.

--
Justin

 //depot/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c#67 - 
/usr/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c 
--- /tmp/tmp.3288.0 Tue Jun 19 11:07:32 2001
+++ /usr/src/linux/drivers/scsi/aic7xxx/aic7xxx_linux.c Tue Jun 19 11:02:54 2001
@@ -1514,7 +1514,11 @@
}
cmd-result = CAM_REQ_INPROG  16;
TAILQ_INSERT_TAIL(dev-busyq, (struct ahc_cmd *)cmd, acmd_links.tqe);
-   ahc_linux_run_device_queue(ahc, dev);
+   if ((dev-flags  AHC_DEV_ON_RUN_LIST) == 0) {
+   TAILQ_INSERT_TAIL(ahc-platform_data-device_runq, dev, links);
+   dev-flags |= AHC_DEV_ON_RUN_LIST;
+   ahc_linux_run_device_queues(ahc);
+   }
ahc_unlock(ahc, flags);
return (0);
 }
@@ -1530,6 +1534,9 @@
struct   ahc_tmode_tstate *tstate;
uint16_t mask;
 
+   if ((dev-flags  AHC_DEV_ON_RUN_LIST) != 0)
+   panic(running device on run list);
+
while ((acmd = TAILQ_FIRST(dev-busyq)) != NULL
 dev-openings  0  dev-qfrozen == 0) {
 
@@ -1538,8 +1545,6 @@
 * running is because the whole controller Q is frozen.
 */
if (ahc-platform_data-qfrozen != 0) {
-   if ((dev-flags  AHC_DEV_ON_RUN_LIST) != 0)
-   return;
 
TAILQ_INSERT_TAIL(ahc-platform_data-device_runq,
  dev, links);
@@ -1550,8 +1555,6 @@
 * Get an scb to use.
 */
if ((scb = ahc_get_scb(ahc)) == NULL) {
-   if ((dev-flags  AHC_DEV_ON_RUN_LIST) != 0)
-   panic(running device on run list);
TAILQ_INSERT_TAIL(ahc-platform_data-device_runq,
 dev, links);
dev-flags |= AHC_DEV_ON_RUN_LIST;
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/