Re: [PATCH] scsi: megaraid_sas - intercepts cmd timeout and throttle io

2007-04-26 Thread James Bottomley
On Wed, 2007-03-28 at 10:43 -0700, Sumant Patro wrote:
> eh_timed_out call back (megasas_reset_timer) is used to throttle io to the 
> adapter 
> when it is called the first time for a scmd.
> The MEGASAS_FW_BUSY flag is set and can_queue reduced to 16. The can_queue is 
> restored 
> from completion routine in following two conditions : 5 seconds has elapsed 
> and the # of
> outstanding cmds in FW is < 17.
> 
> Signed-off-by: Sumant Patro <[EMAIL PROTECTED]>
> ---
>  drivers/scsi/megaraid/megaraid_sas.c |   65 +++--
>  drivers/scsi/megaraid/megaraid_sas.h |   13 +++--
>  2 files changed, 70 insertions(+), 8 deletions(-)
> 
> This patch requires the patch submitted by James with subject line : 
> 
> [PATCH] expose eh_timed_out to the host template
> 
> diff -uprN linux-2.6.orig/drivers/scsi/megaraid/megaraid_sas.c 
> linux-2.6.new/drivers/scsi/megaraid/megaraid_sas.c
> --- linux-2.6.orig/drivers/scsi/megaraid/megaraid_sas.c   2007-03-28 
> 08:41:49.0 -0700
> +++ linux-2.6.new/drivers/scsi/megaraid/megaraid_sas.c2007-03-28 
> 08:36:38.0 -0700
> @@ -10,7 +10,7 @@
>   *  2 of the License, or (at your option) any later version.
>   *
>   * FILE  : megaraid_sas.c
> - * Version   : v00.00.03.10-rc1
> + * Version   : v00.00.03.10-rc3
>   *
>   * Authors:
>   *   (email-id : [EMAIL PROTECTED])
> @@ -886,6 +886,7 @@ megasas_queue_command(struct scsi_cmnd *
>   goto out_return_cmd;
>  
>   cmd->scmd = scmd;
> + scmd->SCp.ptr = (char *)cmd;
>  
>   /*
>* Issue the command to the FW
> @@ -981,8 +982,8 @@ static int megasas_generic_reset(struct 
>  
>   instance = (struct megasas_instance *)scmd->device->host->hostdata;
>  
> - scmd_printk(KERN_NOTICE, scmd, "megasas: RESET -%ld cmd=%x\n",
> -scmd->serial_number, scmd->cmnd[0]);
> + scmd_printk(KERN_NOTICE, scmd, "megasas: RESET -%ld cmd=%x 
> retries=%x\n",
> +  scmd->serial_number, scmd->cmnd[0], scmd->retries);
>  
>   if (instance->hw_crit_error) {
>   printk(KERN_ERR "megasas: cannot recover from previous reset "
> @@ -1000,6 +1001,40 @@ static int megasas_generic_reset(struct 
>  }
>  
>  /**
> + * megasas_reset_timer - quiesce the adapter if required
> + * @scmd:scsi cmnd
> + *
> + * Sets the FW busy flag and reduces the host->can_queue if the
> + * cmd has not been completed within the timeout period.
> + */
> +static enum
> +scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
> +{
> + struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr;
> + struct megasas_instance *instance;
> + unsigned long flags;
> +
> + if (cmd) {

I don't believe we can ever get a command timeout with no command, can
we?

> + if (time_after(jiffies, scmd->jiffies_at_alloc + 170 * HZ))
> + return EH_NOT_HANDLED;

This 170s is a bit arbitrary ... surely you want it to be related to a
multiple of scmd->timeout_per_command?

> + instance = cmd->instance;
> + if (!(instance->flag & MEGASAS_FW_BUSY)) {
> + /* FW is busy, throttle IO */
> + spin_lock_irqsave(>throttle_io_lock, flags);
> +
> + instance->host->can_queue = 16;

can_queue is protected by the host lock ... I think you need to dump the
throttle_io_lock and simply use the host lock for all of this.

> + instance->last_time = jiffies;
> + instance->flag |= MEGASAS_FW_BUSY;
> +
> + spin_unlock_irqrestore(>throttle_io_lock, 
> flags);
> + }
> + return EH_RESET_TIMER;
> + }
> + return EH_HANDLED;
> +}
> +
> +/**
>   * megasas_reset_device -Device reset handler entry point
>   */
>  static int megasas_reset_device(struct scsi_cmnd *scmd)
> @@ -1112,6 +1147,7 @@ static struct scsi_host_template megasas
>   .eh_device_reset_handler = megasas_reset_device,
>   .eh_bus_reset_handler = megasas_reset_bus_host,
>   .eh_host_reset_handler = megasas_reset_bus_host,
> + .eh_timed_out = megasas_reset_timer,
>   .bios_param = megasas_bios_param,
>   .use_clustering = ENABLE_CLUSTERING,
>  };
> @@ -1215,9 +1251,8 @@ megasas_complete_cmd(struct megasas_inst
>   int exception = 0;
>   struct megasas_header *hdr = >frame->hdr;
>  
> - if (cmd->scmd) {
> + if (cmd->scmd)
>   cmd->scmd->SCp.ptr = (char *)0;

That's NULL, ordinarily ...

> - }
>  
>   switch (hdr->cmd) {
>  
> @@ -1806,6 +1841,7 @@ static void megasas_complete_cmd_dpc(uns
>   u32 context;
>   struct megasas_cmd *cmd;
>   struct megasas_instance *instance = (struct megasas_instance 
> *)instance_addr;
> + unsigned long flags;
>  
>   /* If we have already declared adapter dead, donot complete cmds */
>   if (instance->hw_crit_error)
> @@ -1828,6 +1864,22 @@ static void megasas_complete_cmd_dpc(uns
>   

Re: [PATCH] scsi: megaraid_sas - intercepts cmd timeout and throttle io

2007-04-26 Thread James Bottomley
On Wed, 2007-03-28 at 10:43 -0700, Sumant Patro wrote:
 eh_timed_out call back (megasas_reset_timer) is used to throttle io to the 
 adapter 
 when it is called the first time for a scmd.
 The MEGASAS_FW_BUSY flag is set and can_queue reduced to 16. The can_queue is 
 restored 
 from completion routine in following two conditions : 5 seconds has elapsed 
 and the # of
 outstanding cmds in FW is  17.
 
 Signed-off-by: Sumant Patro [EMAIL PROTECTED]
 ---
  drivers/scsi/megaraid/megaraid_sas.c |   65 +++--
  drivers/scsi/megaraid/megaraid_sas.h |   13 +++--
  2 files changed, 70 insertions(+), 8 deletions(-)
 
 This patch requires the patch submitted by James with subject line : 
 
 [PATCH] expose eh_timed_out to the host template
 
 diff -uprN linux-2.6.orig/drivers/scsi/megaraid/megaraid_sas.c 
 linux-2.6.new/drivers/scsi/megaraid/megaraid_sas.c
 --- linux-2.6.orig/drivers/scsi/megaraid/megaraid_sas.c   2007-03-28 
 08:41:49.0 -0700
 +++ linux-2.6.new/drivers/scsi/megaraid/megaraid_sas.c2007-03-28 
 08:36:38.0 -0700
 @@ -10,7 +10,7 @@
   *  2 of the License, or (at your option) any later version.
   *
   * FILE  : megaraid_sas.c
 - * Version   : v00.00.03.10-rc1
 + * Version   : v00.00.03.10-rc3
   *
   * Authors:
   *   (email-id : [EMAIL PROTECTED])
 @@ -886,6 +886,7 @@ megasas_queue_command(struct scsi_cmnd *
   goto out_return_cmd;
  
   cmd-scmd = scmd;
 + scmd-SCp.ptr = (char *)cmd;
  
   /*
* Issue the command to the FW
 @@ -981,8 +982,8 @@ static int megasas_generic_reset(struct 
  
   instance = (struct megasas_instance *)scmd-device-host-hostdata;
  
 - scmd_printk(KERN_NOTICE, scmd, megasas: RESET -%ld cmd=%x\n,
 -scmd-serial_number, scmd-cmnd[0]);
 + scmd_printk(KERN_NOTICE, scmd, megasas: RESET -%ld cmd=%x 
 retries=%x\n,
 +  scmd-serial_number, scmd-cmnd[0], scmd-retries);
  
   if (instance-hw_crit_error) {
   printk(KERN_ERR megasas: cannot recover from previous reset 
 @@ -1000,6 +1001,40 @@ static int megasas_generic_reset(struct 
  }
  
  /**
 + * megasas_reset_timer - quiesce the adapter if required
 + * @scmd:scsi cmnd
 + *
 + * Sets the FW busy flag and reduces the host-can_queue if the
 + * cmd has not been completed within the timeout period.
 + */
 +static enum
 +scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
 +{
 + struct megasas_cmd *cmd = (struct megasas_cmd *)scmd-SCp.ptr;
 + struct megasas_instance *instance;
 + unsigned long flags;
 +
 + if (cmd) {

I don't believe we can ever get a command timeout with no command, can
we?

 + if (time_after(jiffies, scmd-jiffies_at_alloc + 170 * HZ))
 + return EH_NOT_HANDLED;

This 170s is a bit arbitrary ... surely you want it to be related to a
multiple of scmd-timeout_per_command?

 + instance = cmd-instance;
 + if (!(instance-flag  MEGASAS_FW_BUSY)) {
 + /* FW is busy, throttle IO */
 + spin_lock_irqsave(instance-throttle_io_lock, flags);
 +
 + instance-host-can_queue = 16;

can_queue is protected by the host lock ... I think you need to dump the
throttle_io_lock and simply use the host lock for all of this.

 + instance-last_time = jiffies;
 + instance-flag |= MEGASAS_FW_BUSY;
 +
 + spin_unlock_irqrestore(instance-throttle_io_lock, 
 flags);
 + }
 + return EH_RESET_TIMER;
 + }
 + return EH_HANDLED;
 +}
 +
 +/**
   * megasas_reset_device -Device reset handler entry point
   */
  static int megasas_reset_device(struct scsi_cmnd *scmd)
 @@ -1112,6 +1147,7 @@ static struct scsi_host_template megasas
   .eh_device_reset_handler = megasas_reset_device,
   .eh_bus_reset_handler = megasas_reset_bus_host,
   .eh_host_reset_handler = megasas_reset_bus_host,
 + .eh_timed_out = megasas_reset_timer,
   .bios_param = megasas_bios_param,
   .use_clustering = ENABLE_CLUSTERING,
  };
 @@ -1215,9 +1251,8 @@ megasas_complete_cmd(struct megasas_inst
   int exception = 0;
   struct megasas_header *hdr = cmd-frame-hdr;
  
 - if (cmd-scmd) {
 + if (cmd-scmd)
   cmd-scmd-SCp.ptr = (char *)0;

That's NULL, ordinarily ...

 - }
  
   switch (hdr-cmd) {
  
 @@ -1806,6 +1841,7 @@ static void megasas_complete_cmd_dpc(uns
   u32 context;
   struct megasas_cmd *cmd;
   struct megasas_instance *instance = (struct megasas_instance 
 *)instance_addr;
 + unsigned long flags;
  
   /* If we have already declared adapter dead, donot complete cmds */
   if (instance-hw_crit_error)
 @@ -1828,6 +1864,22 @@ static void megasas_complete_cmd_dpc(uns
   }
  
   *instance-consumer = producer;
 +
 + /*
 +  * Check if we can restore can_queue
 +  */
 + if (instance-flag  

[PATCH] scsi: megaraid_sas - intercepts cmd timeout and throttle io

2007-03-28 Thread Sumant Patro

eh_timed_out call back (megasas_reset_timer) is used to throttle io to the 
adapter 
when it is called the first time for a scmd.
The MEGASAS_FW_BUSY flag is set and can_queue reduced to 16. The can_queue is 
restored 
from completion routine in following two conditions : 5 seconds has elapsed and 
the # of
outstanding cmds in FW is < 17.

Signed-off-by: Sumant Patro <[EMAIL PROTECTED]>
---
 drivers/scsi/megaraid/megaraid_sas.c |   65 +++--
 drivers/scsi/megaraid/megaraid_sas.h |   13 +++--
 2 files changed, 70 insertions(+), 8 deletions(-)

This patch requires the patch submitted by James with subject line : 

[PATCH] expose eh_timed_out to the host template

diff -uprN linux-2.6.orig/drivers/scsi/megaraid/megaraid_sas.c 
linux-2.6.new/drivers/scsi/megaraid/megaraid_sas.c
--- linux-2.6.orig/drivers/scsi/megaraid/megaraid_sas.c 2007-03-28 
08:41:49.0 -0700
+++ linux-2.6.new/drivers/scsi/megaraid/megaraid_sas.c  2007-03-28 
08:36:38.0 -0700
@@ -10,7 +10,7 @@
  *2 of the License, or (at your option) any later version.
  *
  * FILE: megaraid_sas.c
- * Version : v00.00.03.10-rc1
+ * Version : v00.00.03.10-rc3
  *
  * Authors:
  * (email-id : [EMAIL PROTECTED])
@@ -886,6 +886,7 @@ megasas_queue_command(struct scsi_cmnd *
goto out_return_cmd;
 
cmd->scmd = scmd;
+   scmd->SCp.ptr = (char *)cmd;
 
/*
 * Issue the command to the FW
@@ -981,8 +982,8 @@ static int megasas_generic_reset(struct 
 
instance = (struct megasas_instance *)scmd->device->host->hostdata;
 
-   scmd_printk(KERN_NOTICE, scmd, "megasas: RESET -%ld cmd=%x\n",
-  scmd->serial_number, scmd->cmnd[0]);
+   scmd_printk(KERN_NOTICE, scmd, "megasas: RESET -%ld cmd=%x 
retries=%x\n",
+scmd->serial_number, scmd->cmnd[0], scmd->retries);
 
if (instance->hw_crit_error) {
printk(KERN_ERR "megasas: cannot recover from previous reset "
@@ -1000,6 +1001,40 @@ static int megasas_generic_reset(struct 
 }
 
 /**
+ * megasas_reset_timer - quiesce the adapter if required
+ * @scmd:  scsi cmnd
+ *
+ * Sets the FW busy flag and reduces the host->can_queue if the
+ * cmd has not been completed within the timeout period.
+ */
+static enum
+scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
+{
+   struct megasas_cmd *cmd = (struct megasas_cmd *)scmd->SCp.ptr;
+   struct megasas_instance *instance;
+   unsigned long flags;
+
+   if (cmd) {
+   if (time_after(jiffies, scmd->jiffies_at_alloc + 170 * HZ))
+   return EH_NOT_HANDLED;
+
+   instance = cmd->instance;
+   if (!(instance->flag & MEGASAS_FW_BUSY)) {
+   /* FW is busy, throttle IO */
+   spin_lock_irqsave(>throttle_io_lock, flags);
+
+   instance->host->can_queue = 16;
+   instance->last_time = jiffies;
+   instance->flag |= MEGASAS_FW_BUSY;
+
+   spin_unlock_irqrestore(>throttle_io_lock, 
flags);
+   }
+   return EH_RESET_TIMER;
+   }
+   return EH_HANDLED;
+}
+
+/**
  * megasas_reset_device -  Device reset handler entry point
  */
 static int megasas_reset_device(struct scsi_cmnd *scmd)
@@ -1112,6 +1147,7 @@ static struct scsi_host_template megasas
.eh_device_reset_handler = megasas_reset_device,
.eh_bus_reset_handler = megasas_reset_bus_host,
.eh_host_reset_handler = megasas_reset_bus_host,
+   .eh_timed_out = megasas_reset_timer,
.bios_param = megasas_bios_param,
.use_clustering = ENABLE_CLUSTERING,
 };
@@ -1215,9 +1251,8 @@ megasas_complete_cmd(struct megasas_inst
int exception = 0;
struct megasas_header *hdr = >frame->hdr;
 
-   if (cmd->scmd) {
+   if (cmd->scmd)
cmd->scmd->SCp.ptr = (char *)0;
-   }
 
switch (hdr->cmd) {
 
@@ -1806,6 +1841,7 @@ static void megasas_complete_cmd_dpc(uns
u32 context;
struct megasas_cmd *cmd;
struct megasas_instance *instance = (struct megasas_instance 
*)instance_addr;
+   unsigned long flags;
 
/* If we have already declared adapter dead, donot complete cmds */
if (instance->hw_crit_error)
@@ -1828,6 +1864,22 @@ static void megasas_complete_cmd_dpc(uns
}
 
*instance->consumer = producer;
+
+   /*
+* Check if we can restore can_queue
+*/
+   if (instance->flag & MEGASAS_FW_BUSY
+   && time_after(jiffies, instance->last_time + 5 * HZ)
+   && atomic_read(>fw_outstanding) < 17) {
+
+   spin_lock_irqsave(>throttle_io_lock, flags);
+
+   instance->flag &= ~MEGASAS_FW_BUSY;
+   instance->host->can_queue =
+   instance->max_fw_cmds - MEGASAS_INT_CMDS;
+
+   

[PATCH] scsi: megaraid_sas - intercepts cmd timeout and throttle io

2007-03-28 Thread Sumant Patro

eh_timed_out call back (megasas_reset_timer) is used to throttle io to the 
adapter 
when it is called the first time for a scmd.
The MEGASAS_FW_BUSY flag is set and can_queue reduced to 16. The can_queue is 
restored 
from completion routine in following two conditions : 5 seconds has elapsed and 
the # of
outstanding cmds in FW is  17.

Signed-off-by: Sumant Patro [EMAIL PROTECTED]
---
 drivers/scsi/megaraid/megaraid_sas.c |   65 +++--
 drivers/scsi/megaraid/megaraid_sas.h |   13 +++--
 2 files changed, 70 insertions(+), 8 deletions(-)

This patch requires the patch submitted by James with subject line : 

[PATCH] expose eh_timed_out to the host template

diff -uprN linux-2.6.orig/drivers/scsi/megaraid/megaraid_sas.c 
linux-2.6.new/drivers/scsi/megaraid/megaraid_sas.c
--- linux-2.6.orig/drivers/scsi/megaraid/megaraid_sas.c 2007-03-28 
08:41:49.0 -0700
+++ linux-2.6.new/drivers/scsi/megaraid/megaraid_sas.c  2007-03-28 
08:36:38.0 -0700
@@ -10,7 +10,7 @@
  *2 of the License, or (at your option) any later version.
  *
  * FILE: megaraid_sas.c
- * Version : v00.00.03.10-rc1
+ * Version : v00.00.03.10-rc3
  *
  * Authors:
  * (email-id : [EMAIL PROTECTED])
@@ -886,6 +886,7 @@ megasas_queue_command(struct scsi_cmnd *
goto out_return_cmd;
 
cmd-scmd = scmd;
+   scmd-SCp.ptr = (char *)cmd;
 
/*
 * Issue the command to the FW
@@ -981,8 +982,8 @@ static int megasas_generic_reset(struct 
 
instance = (struct megasas_instance *)scmd-device-host-hostdata;
 
-   scmd_printk(KERN_NOTICE, scmd, megasas: RESET -%ld cmd=%x\n,
-  scmd-serial_number, scmd-cmnd[0]);
+   scmd_printk(KERN_NOTICE, scmd, megasas: RESET -%ld cmd=%x 
retries=%x\n,
+scmd-serial_number, scmd-cmnd[0], scmd-retries);
 
if (instance-hw_crit_error) {
printk(KERN_ERR megasas: cannot recover from previous reset 
@@ -1000,6 +1001,40 @@ static int megasas_generic_reset(struct 
 }
 
 /**
+ * megasas_reset_timer - quiesce the adapter if required
+ * @scmd:  scsi cmnd
+ *
+ * Sets the FW busy flag and reduces the host-can_queue if the
+ * cmd has not been completed within the timeout period.
+ */
+static enum
+scsi_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
+{
+   struct megasas_cmd *cmd = (struct megasas_cmd *)scmd-SCp.ptr;
+   struct megasas_instance *instance;
+   unsigned long flags;
+
+   if (cmd) {
+   if (time_after(jiffies, scmd-jiffies_at_alloc + 170 * HZ))
+   return EH_NOT_HANDLED;
+
+   instance = cmd-instance;
+   if (!(instance-flag  MEGASAS_FW_BUSY)) {
+   /* FW is busy, throttle IO */
+   spin_lock_irqsave(instance-throttle_io_lock, flags);
+
+   instance-host-can_queue = 16;
+   instance-last_time = jiffies;
+   instance-flag |= MEGASAS_FW_BUSY;
+
+   spin_unlock_irqrestore(instance-throttle_io_lock, 
flags);
+   }
+   return EH_RESET_TIMER;
+   }
+   return EH_HANDLED;
+}
+
+/**
  * megasas_reset_device -  Device reset handler entry point
  */
 static int megasas_reset_device(struct scsi_cmnd *scmd)
@@ -1112,6 +1147,7 @@ static struct scsi_host_template megasas
.eh_device_reset_handler = megasas_reset_device,
.eh_bus_reset_handler = megasas_reset_bus_host,
.eh_host_reset_handler = megasas_reset_bus_host,
+   .eh_timed_out = megasas_reset_timer,
.bios_param = megasas_bios_param,
.use_clustering = ENABLE_CLUSTERING,
 };
@@ -1215,9 +1251,8 @@ megasas_complete_cmd(struct megasas_inst
int exception = 0;
struct megasas_header *hdr = cmd-frame-hdr;
 
-   if (cmd-scmd) {
+   if (cmd-scmd)
cmd-scmd-SCp.ptr = (char *)0;
-   }
 
switch (hdr-cmd) {
 
@@ -1806,6 +1841,7 @@ static void megasas_complete_cmd_dpc(uns
u32 context;
struct megasas_cmd *cmd;
struct megasas_instance *instance = (struct megasas_instance 
*)instance_addr;
+   unsigned long flags;
 
/* If we have already declared adapter dead, donot complete cmds */
if (instance-hw_crit_error)
@@ -1828,6 +1864,22 @@ static void megasas_complete_cmd_dpc(uns
}
 
*instance-consumer = producer;
+
+   /*
+* Check if we can restore can_queue
+*/
+   if (instance-flag  MEGASAS_FW_BUSY
+time_after(jiffies, instance-last_time + 5 * HZ)
+atomic_read(instance-fw_outstanding)  17) {
+
+   spin_lock_irqsave(instance-throttle_io_lock, flags);
+
+   instance-flag = ~MEGASAS_FW_BUSY;
+   instance-host-can_queue =
+   instance-max_fw_cmds - MEGASAS_INT_CMDS;
+
+