Re: [PATCH] Only disable/enable LSI interrupts in EEH

2009-02-12 Thread Mike Mason

Michael Ellerman wrote:

On Tue, 2009-02-10 at 13:12 -0800, Mike Mason wrote:

I'm resubmitting this patch with a couple changes
suggested by Michael Ellerman.  1) the new functions
should be static, and 2) some people may object to
including unrelated formating changes.

=

The EEH code disables and enables interrupts during the
device recovery process.  This is unnecessary for MSI
and MSI-X interrupts because they are effectively disabled
by the DMA Stopped state when an EEH error occurs.  The
current code is also incorrect for MSI-X interrupts.  It
doesn't take into account that MSI-X interrupts are tracked
in a different way than LSI/MSI interrupts.  This patch 
ensures only LSI interrupts are disabled/enabled.


Signed-off-by: Mike Mason mm...@us.ibm.com
Acked-by: Linas Vepstas linasveps...@gmail.com



Looks good. Assuming you've tested it :)


Yes, it's been tested with network devices that use LSI, MSI and MSI-X 
interrupts.  All recovered fine.



Acked-by: Michael Ellerman mich...@ellerman.id.au




___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: [PATCH] Only disable/enable LSI interrupts in EEH

2009-02-10 Thread Linas Vepstas
2009/2/9 Mike Mason mm...@us.ibm.com:
 The EEH code disables and enables interrupts during the
 device recovery process.  This is unnecessary for MSI
 and MSI-X interrupts because they are effectively disabled
 by the DMA Stopped state when an EEH error occurs.  The current code is also
 incorrect for MSI-X interrupts.  It
 doesn't take into account that MSI-X interrupts are tracked
 in a different way than LSI/MSI interrupts.  This patch ensures only LSI
 interrupts are disabled/enabled.

 The patch also includes a couple minor formatting fixes.


 Signed-off-by: Mike Mason mm...@us.ibm.com

Looks good to me.
Acked-by: Linas Vepstas linasveps...@gmail.com

On a somewhat-related note: there was an issue (I forget
the details) where the kernel needed to shadow some sort
of MSI state so that it could be correctly, um, kept-track-of,
after an EEH reset (it didn't need to be restored, because
firmware did this(?)).  After some digging around and
discussion, we concluded that some generic PPC MSI
code needed to be altered to track this state, and/or
the main kernel MSI code needed to be changed to
(not?) track this state.  Mike Ellerman seemed to best
grasp this area ... was this ever fixed?

Or perhaps this is an alternate fix for that bug? It may
well have been that calling the MSI disable triggered
the problem, I don't remember now.

--linas
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: [PATCH] Only disable/enable LSI interrupts in EEH

2009-02-10 Thread Mike Mason

I'm resubmitting this patch with a couple changes
suggested by Michael Ellerman.  1) the new functions
should be static, and 2) some people may object to
including unrelated formating changes.

=

The EEH code disables and enables interrupts during the
device recovery process.  This is unnecessary for MSI
and MSI-X interrupts because they are effectively disabled
by the DMA Stopped state when an EEH error occurs.  The
current code is also incorrect for MSI-X interrupts.  It
doesn't take into account that MSI-X interrupts are tracked
in a different way than LSI/MSI interrupts.  This patch 
ensures only LSI interrupts are disabled/enabled.


Signed-off-by: Mike Mason mm...@us.ibm.com
Acked-by: Linas Vepstas linasveps...@gmail.com


--- arch/powerpc/platforms/pseries/eeh_driver.c-orig2009-02-10 
07:12:31.0 -0800
+++ arch/powerpc/platforms/pseries/eeh_driver.c 2009-02-10 08:19:54.0 
-0800
@@ -76,6 +76,40 @@ static int irq_in_use(unsigned int irq)
return rc;
}

+/**
+ * eeh_disable_irq - disable interrupt for the recovering device
+ */
+static void eeh_disable_irq(struct pci_dev *dev)
+{
+   struct device_node *dn = pci_device_to_OF_node(dev);
+
+   /* Don't disable MSI and MSI-X interrupts. They are
+	 * effectively disabled by the DMA Stopped state 
+	 * when an EEH error occurs.

+   */
+   if (dev-msi_enabled || dev-msix_enabled)
+   return;
+
+   if (!irq_in_use(dev-irq))
+   return;
+
+   PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED;
+   disable_irq_nosync(dev-irq);
+}
+
+/**
+ * eeh_enable_irq - enable interrupt for the recovering device
+ */
+static void eeh_enable_irq(struct pci_dev *dev)
+{
+   struct device_node *dn = pci_device_to_OF_node(dev);
+
+   if ((PCI_DN(dn)-eeh_mode)  EEH_MODE_IRQ_DISABLED) {
+   PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED;
+   enable_irq(dev-irq);
+   }
+}
+
/* --- */
/**
 * eeh_report_error - report pci error to each device driver
@@ -95,11 +129,8 @@ static void eeh_report_error(struct pci_
if (!driver)
return;

-   if (irq_in_use (dev-irq)) {
-   struct device_node *dn = pci_device_to_OF_node(dev);
-   PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED;
-   disable_irq_nosync(dev-irq);
-   }
+   eeh_disable_irq(dev);
+
if (!driver-err_handler ||
!driver-err_handler-error_detected)
return;
@@ -144,15 +175,12 @@ static void eeh_report_reset(struct pci_
{
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver = dev-driver;
-   struct device_node *dn = pci_device_to_OF_node(dev);

if (!driver)
return;

-   if ((PCI_DN(dn)-eeh_mode)  EEH_MODE_IRQ_DISABLED) {
-   PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED;
-   enable_irq(dev-irq);
-   }
+   eeh_enable_irq(dev);
+
if (!driver-err_handler ||
!driver-err_handler-slot_reset)
return;
@@ -171,17 +199,14 @@ static void eeh_report_reset(struct pci_
static void eeh_report_resume(struct pci_dev *dev, void *userdata)
{
struct pci_driver *driver = dev-driver;
-   struct device_node *dn = pci_device_to_OF_node(dev);

dev-error_state = pci_channel_io_normal;

if (!driver)
return;

-   if ((PCI_DN(dn)-eeh_mode)  EEH_MODE_IRQ_DISABLED) {
-   PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED;
-   enable_irq(dev-irq);
-   }
+   eeh_enable_irq(dev);
+
if (!driver-err_handler ||
!driver-err_handler-resume)
return;
@@ -205,15 +230,12 @@ static void eeh_report_failure(struct pc
if (!driver)
return;

-   if (irq_in_use (dev-irq)) {
-   struct device_node *dn = pci_device_to_OF_node(dev);
-   PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED;
-   disable_irq_nosync(dev-irq);
-   }
-   if (!driver-err_handler)
-   return;
-   if (!driver-err_handler-error_detected)
+   eeh_disable_irq(dev);
+
+   if (!driver-err_handler ||
+   !driver-err_handler-error_detected)
return;
+
driver-err_handler-error_detected(dev, pci_channel_io_perm_failure);
}


___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


Re: [PATCH] Only disable/enable LSI interrupts in EEH

2009-02-10 Thread Michael Ellerman
On Tue, 2009-02-10 at 11:14 -0600, Linas Vepstas wrote:
 2009/2/9 Mike Mason mm...@us.ibm.com:
  The EEH code disables and enables interrupts during the
  device recovery process.  This is unnecessary for MSI
  and MSI-X interrupts because they are effectively disabled
  by the DMA Stopped state when an EEH error occurs.  The current code is also
  incorrect for MSI-X interrupts.  It
  doesn't take into account that MSI-X interrupts are tracked
  in a different way than LSI/MSI interrupts.  This patch ensures only LSI
  interrupts are disabled/enabled.
 
  The patch also includes a couple minor formatting fixes.
 
 
  Signed-off-by: Mike Mason mm...@us.ibm.com
 
 Looks good to me.
 Acked-by: Linas Vepstas linasveps...@gmail.com
 
 On a somewhat-related note: there was an issue (I forget
 the details) where the kernel needed to shadow some sort
 of MSI state so that it could be correctly, um, kept-track-of,
 after an EEH reset (it didn't need to be restored, because
 firmware did this(?)).  After some digging around and
 discussion, we concluded that some generic PPC MSI
 code needed to be altered to track this state, and/or
 the main kernel MSI code needed to be changed to
 (not?) track this state.  Mike Ellerman seemed to best
 grasp this area ... was this ever fixed?
 
 Or perhaps this is an alternate fix for that bug? It may
 well have been that calling the MSI disable triggered
 the problem, I don't remember now.

I'm pretty sure you're referring to this patch, which you acked :)

http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=1db3e890aed3ac39cded30d6e94618bda086f7ce

I don't know of anything else that fits your description?

cheers

-- 
Michael Ellerman
OzLabs, IBM Australia Development Lab

wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)

We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person


signature.asc
Description: This is a digitally signed message part
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev

Re: [PATCH] Only disable/enable LSI interrupts in EEH

2009-02-10 Thread Michael Ellerman
On Tue, 2009-02-10 at 13:12 -0800, Mike Mason wrote:
 I'm resubmitting this patch with a couple changes
 suggested by Michael Ellerman.  1) the new functions
 should be static, and 2) some people may object to
 including unrelated formating changes.
 
 =
 
 The EEH code disables and enables interrupts during the
 device recovery process.  This is unnecessary for MSI
 and MSI-X interrupts because they are effectively disabled
 by the DMA Stopped state when an EEH error occurs.  The
 current code is also incorrect for MSI-X interrupts.  It
 doesn't take into account that MSI-X interrupts are tracked
 in a different way than LSI/MSI interrupts.  This patch 
 ensures only LSI interrupts are disabled/enabled.
 
 Signed-off-by: Mike Mason mm...@us.ibm.com
 Acked-by: Linas Vepstas linasveps...@gmail.com


Looks good. Assuming you've tested it :)

Acked-by: Michael Ellerman mich...@ellerman.id.au


-- 
Michael Ellerman
OzLabs, IBM Australia Development Lab

wwweb: http://michael.ellerman.id.au
phone: +61 2 6212 1183 (tie line 70 21183)

We do not inherit the earth from our ancestors,
we borrow it from our children. - S.M.A.R.T Person


signature.asc
Description: This is a digitally signed message part
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev

Re: [PATCH] Only disable/enable LSI interrupts in EEH

2009-02-10 Thread Linas Vepstas
2009/2/10 Michael Ellerman mich...@ellerman.id.au:
 On Tue, 2009-02-10 at 11:14 -0600, Linas Vepstas wrote:
 On a somewhat-related note: there was an issue (I forget
 the details) where the kernel needed to shadow some sort
 of MSI state so that it could be correctly, um, kept-track-of,
 after an EEH reset (it didn't need to be restored, because
 firmware did this(?)).  After some digging around and
 discussion, we concluded that some generic PPC MSI
 code needed to be altered to track this state, and/or
 the main kernel MSI code needed to be changed to
 (not?) track this state.  Mike Ellerman seemed to best
 grasp this area ... was this ever fixed?

 Or perhaps this is an alternate fix for that bug? It may
 well have been that calling the MSI disable triggered
 the problem, I don't remember now.

 I'm pretty sure you're referring to this patch, which you acked :)

 http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=1db3e890aed3ac39cded30d6e94618bda086f7ce

 I don't know of anything else that fits your description?

Yes, that's the one. I wasn't sure if it ever made it in or
not, and I just wanted to make sure it wasn't what was
biting you.

--linas
___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev


[PATCH] Only disable/enable LSI interrupts in EEH

2009-02-09 Thread Mike Mason

The EEH code disables and enables interrupts during the
device recovery process.  This is unnecessary for MSI
and MSI-X interrupts because they are effectively disabled
by the DMA Stopped state when an EEH error occurs.  The 
current code is also incorrect for MSI-X interrupts.  It

doesn't take into account that MSI-X interrupts are tracked
in a different way than LSI/MSI interrupts.  This 
patch ensures only LSI interrupts are disabled/enabled.


The patch also includes a couple minor formatting fixes.


Signed-off-by: Mike Mason mm...@us.ibm.com 


--- linux-2.6.18.ppc64-orig/arch/powerpc/platforms/pseries/eeh_driver.c 
2009-01-16 10:59:59.0 -0800
+++ linux-2.6.18.ppc64/arch/powerpc/platforms/pseries/eeh_driver.c  
2009-02-07 12:29:14.0 -0800
@@ -67,7 +67,7 @@ static int irq_in_use(unsigned int irq)
{
int rc = 0;
unsigned long flags;
-   struct irq_desc *desc = irq_desc + irq;
+   struct irq_desc *desc = irq_desc + irq;

spin_lock_irqsave(desc-lock, flags);
if (desc-action)
@@ -76,6 +76,40 @@ static int irq_in_use(unsigned int irq)
return rc;
}

+/**
+ * eeh_disable_irq - disable interrupt for the recovering device
+ */
+void eeh_disable_irq(struct pci_dev *dev)
+{
+   struct device_node *dn = pci_device_to_OF_node(dev);
+
+   /* Don't disable MSI and MSI-X interrupts. They are
+	 * effectively disabled by the DMA Stopped state 
+	 * when an EEH error occurs.

+   */
+   if (dev-msi_enabled || dev-msix_enabled)
+   return;
+
+   if (!irq_in_use(dev-irq))
+   return;
+
+   PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED;
+   disable_irq_nosync(dev-irq);
+}
+
+/**
+ * eeh_enable_irq - enable interrupt for the recovering device
+ */
+void eeh_enable_irq(struct pci_dev *dev)
+{
+   struct device_node *dn = pci_device_to_OF_node(dev);
+
+   if ((PCI_DN(dn)-eeh_mode)  EEH_MODE_IRQ_DISABLED) {
+   PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED;
+   enable_irq(dev-irq);
+   }
+}
+
/* --- */
/**
 * eeh_report_error - report pci error to each device driver
@@ -95,11 +129,8 @@ static void eeh_report_error(struct pci_
if (!driver)
return;

-   if (irq_in_use (dev-irq)) {
-   struct device_node *dn = pci_device_to_OF_node(dev);
-   PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED;
-   disable_irq_nosync(dev-irq);
-   }
+   eeh_disable_irq(dev);
+
if (!driver-err_handler ||
!driver-err_handler-error_detected)
return;
@@ -144,15 +175,12 @@ static void eeh_report_reset(struct pci_
{
enum pci_ers_result rc, *res = userdata;
struct pci_driver *driver = dev-driver;
-   struct device_node *dn = pci_device_to_OF_node(dev);

if (!driver)
return;

-   if ((PCI_DN(dn)-eeh_mode)  EEH_MODE_IRQ_DISABLED) {
-   PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED;
-   enable_irq(dev-irq);
-   }
+   eeh_enable_irq(dev);
+
if (!driver-err_handler ||
!driver-err_handler-slot_reset)
return;
@@ -171,17 +199,14 @@ static void eeh_report_reset(struct pci_
static void eeh_report_resume(struct pci_dev *dev, void *userdata)
{
struct pci_driver *driver = dev-driver;
-   struct device_node *dn = pci_device_to_OF_node(dev);

dev-error_state = pci_channel_io_normal;

if (!driver)
return;

-   if ((PCI_DN(dn)-eeh_mode)  EEH_MODE_IRQ_DISABLED) {
-   PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED;
-   enable_irq(dev-irq);
-   }
+   eeh_enable_irq(dev);
+
if (!driver-err_handler ||
!driver-err_handler-resume)
return;
@@ -205,15 +230,12 @@ static void eeh_report_failure(struct pc
if (!driver)
return;

-   if (irq_in_use (dev-irq)) {
-   struct device_node *dn = pci_device_to_OF_node(dev);
-   PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED;
-   disable_irq_nosync(dev-irq);
-   }
-   if (!driver-err_handler)
-   return;
-   if (!driver-err_handler-error_detected)
+   eeh_disable_irq(dev);
+
+   if (!driver-err_handler ||
+   !driver-err_handler-error_detected)
return;
+
driver-err_handler-error_detected(dev, pci_channel_io_perm_failure);
}



___
Linuxppc-dev mailing list
Linuxppc-dev@ozlabs.org
https://ozlabs.org/mailman/listinfo/linuxppc-dev