Re: [PATCH] Only disable/enable LSI interrupts in EEH
Michael Ellerman wrote: On Tue, 2009-02-10 at 13:12 -0800, Mike Mason wrote: I'm resubmitting this patch with a couple changes suggested by Michael Ellerman. 1) the new functions should be static, and 2) some people may object to including unrelated formating changes. = The EEH code disables and enables interrupts during the device recovery process. This is unnecessary for MSI and MSI-X interrupts because they are effectively disabled by the DMA Stopped state when an EEH error occurs. The current code is also incorrect for MSI-X interrupts. It doesn't take into account that MSI-X interrupts are tracked in a different way than LSI/MSI interrupts. This patch ensures only LSI interrupts are disabled/enabled. Signed-off-by: Mike Mason mm...@us.ibm.com Acked-by: Linas Vepstas linasveps...@gmail.com Looks good. Assuming you've tested it :) Yes, it's been tested with network devices that use LSI, MSI and MSI-X interrupts. All recovered fine. Acked-by: Michael Ellerman mich...@ellerman.id.au ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] Only disable/enable LSI interrupts in EEH
2009/2/9 Mike Mason mm...@us.ibm.com: The EEH code disables and enables interrupts during the device recovery process. This is unnecessary for MSI and MSI-X interrupts because they are effectively disabled by the DMA Stopped state when an EEH error occurs. The current code is also incorrect for MSI-X interrupts. It doesn't take into account that MSI-X interrupts are tracked in a different way than LSI/MSI interrupts. This patch ensures only LSI interrupts are disabled/enabled. The patch also includes a couple minor formatting fixes. Signed-off-by: Mike Mason mm...@us.ibm.com Looks good to me. Acked-by: Linas Vepstas linasveps...@gmail.com On a somewhat-related note: there was an issue (I forget the details) where the kernel needed to shadow some sort of MSI state so that it could be correctly, um, kept-track-of, after an EEH reset (it didn't need to be restored, because firmware did this(?)). After some digging around and discussion, we concluded that some generic PPC MSI code needed to be altered to track this state, and/or the main kernel MSI code needed to be changed to (not?) track this state. Mike Ellerman seemed to best grasp this area ... was this ever fixed? Or perhaps this is an alternate fix for that bug? It may well have been that calling the MSI disable triggered the problem, I don't remember now. --linas ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] Only disable/enable LSI interrupts in EEH
I'm resubmitting this patch with a couple changes suggested by Michael Ellerman. 1) the new functions should be static, and 2) some people may object to including unrelated formating changes. = The EEH code disables and enables interrupts during the device recovery process. This is unnecessary for MSI and MSI-X interrupts because they are effectively disabled by the DMA Stopped state when an EEH error occurs. The current code is also incorrect for MSI-X interrupts. It doesn't take into account that MSI-X interrupts are tracked in a different way than LSI/MSI interrupts. This patch ensures only LSI interrupts are disabled/enabled. Signed-off-by: Mike Mason mm...@us.ibm.com Acked-by: Linas Vepstas linasveps...@gmail.com --- arch/powerpc/platforms/pseries/eeh_driver.c-orig2009-02-10 07:12:31.0 -0800 +++ arch/powerpc/platforms/pseries/eeh_driver.c 2009-02-10 08:19:54.0 -0800 @@ -76,6 +76,40 @@ static int irq_in_use(unsigned int irq) return rc; } +/** + * eeh_disable_irq - disable interrupt for the recovering device + */ +static void eeh_disable_irq(struct pci_dev *dev) +{ + struct device_node *dn = pci_device_to_OF_node(dev); + + /* Don't disable MSI and MSI-X interrupts. They are + * effectively disabled by the DMA Stopped state + * when an EEH error occurs. + */ + if (dev-msi_enabled || dev-msix_enabled) + return; + + if (!irq_in_use(dev-irq)) + return; + + PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED; + disable_irq_nosync(dev-irq); +} + +/** + * eeh_enable_irq - enable interrupt for the recovering device + */ +static void eeh_enable_irq(struct pci_dev *dev) +{ + struct device_node *dn = pci_device_to_OF_node(dev); + + if ((PCI_DN(dn)-eeh_mode) EEH_MODE_IRQ_DISABLED) { + PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED; + enable_irq(dev-irq); + } +} + /* --- */ /** * eeh_report_error - report pci error to each device driver @@ -95,11 +129,8 @@ static void eeh_report_error(struct pci_ if (!driver) return; - if (irq_in_use (dev-irq)) { - struct device_node *dn = pci_device_to_OF_node(dev); - PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED; - disable_irq_nosync(dev-irq); - } + eeh_disable_irq(dev); + if (!driver-err_handler || !driver-err_handler-error_detected) return; @@ -144,15 +175,12 @@ static void eeh_report_reset(struct pci_ { enum pci_ers_result rc, *res = userdata; struct pci_driver *driver = dev-driver; - struct device_node *dn = pci_device_to_OF_node(dev); if (!driver) return; - if ((PCI_DN(dn)-eeh_mode) EEH_MODE_IRQ_DISABLED) { - PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED; - enable_irq(dev-irq); - } + eeh_enable_irq(dev); + if (!driver-err_handler || !driver-err_handler-slot_reset) return; @@ -171,17 +199,14 @@ static void eeh_report_reset(struct pci_ static void eeh_report_resume(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev-driver; - struct device_node *dn = pci_device_to_OF_node(dev); dev-error_state = pci_channel_io_normal; if (!driver) return; - if ((PCI_DN(dn)-eeh_mode) EEH_MODE_IRQ_DISABLED) { - PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED; - enable_irq(dev-irq); - } + eeh_enable_irq(dev); + if (!driver-err_handler || !driver-err_handler-resume) return; @@ -205,15 +230,12 @@ static void eeh_report_failure(struct pc if (!driver) return; - if (irq_in_use (dev-irq)) { - struct device_node *dn = pci_device_to_OF_node(dev); - PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED; - disable_irq_nosync(dev-irq); - } - if (!driver-err_handler) - return; - if (!driver-err_handler-error_detected) + eeh_disable_irq(dev); + + if (!driver-err_handler || + !driver-err_handler-error_detected) return; + driver-err_handler-error_detected(dev, pci_channel_io_perm_failure); } ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] Only disable/enable LSI interrupts in EEH
On Tue, 2009-02-10 at 11:14 -0600, Linas Vepstas wrote: 2009/2/9 Mike Mason mm...@us.ibm.com: The EEH code disables and enables interrupts during the device recovery process. This is unnecessary for MSI and MSI-X interrupts because they are effectively disabled by the DMA Stopped state when an EEH error occurs. The current code is also incorrect for MSI-X interrupts. It doesn't take into account that MSI-X interrupts are tracked in a different way than LSI/MSI interrupts. This patch ensures only LSI interrupts are disabled/enabled. The patch also includes a couple minor formatting fixes. Signed-off-by: Mike Mason mm...@us.ibm.com Looks good to me. Acked-by: Linas Vepstas linasveps...@gmail.com On a somewhat-related note: there was an issue (I forget the details) where the kernel needed to shadow some sort of MSI state so that it could be correctly, um, kept-track-of, after an EEH reset (it didn't need to be restored, because firmware did this(?)). After some digging around and discussion, we concluded that some generic PPC MSI code needed to be altered to track this state, and/or the main kernel MSI code needed to be changed to (not?) track this state. Mike Ellerman seemed to best grasp this area ... was this ever fixed? Or perhaps this is an alternate fix for that bug? It may well have been that calling the MSI disable triggered the problem, I don't remember now. I'm pretty sure you're referring to this patch, which you acked :) http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=1db3e890aed3ac39cded30d6e94618bda086f7ce I don't know of anything else that fits your description? cheers -- Michael Ellerman OzLabs, IBM Australia Development Lab wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person signature.asc Description: This is a digitally signed message part ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] Only disable/enable LSI interrupts in EEH
On Tue, 2009-02-10 at 13:12 -0800, Mike Mason wrote: I'm resubmitting this patch with a couple changes suggested by Michael Ellerman. 1) the new functions should be static, and 2) some people may object to including unrelated formating changes. = The EEH code disables and enables interrupts during the device recovery process. This is unnecessary for MSI and MSI-X interrupts because they are effectively disabled by the DMA Stopped state when an EEH error occurs. The current code is also incorrect for MSI-X interrupts. It doesn't take into account that MSI-X interrupts are tracked in a different way than LSI/MSI interrupts. This patch ensures only LSI interrupts are disabled/enabled. Signed-off-by: Mike Mason mm...@us.ibm.com Acked-by: Linas Vepstas linasveps...@gmail.com Looks good. Assuming you've tested it :) Acked-by: Michael Ellerman mich...@ellerman.id.au -- Michael Ellerman OzLabs, IBM Australia Development Lab wwweb: http://michael.ellerman.id.au phone: +61 2 6212 1183 (tie line 70 21183) We do not inherit the earth from our ancestors, we borrow it from our children. - S.M.A.R.T Person signature.asc Description: This is a digitally signed message part ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
Re: [PATCH] Only disable/enable LSI interrupts in EEH
2009/2/10 Michael Ellerman mich...@ellerman.id.au: On Tue, 2009-02-10 at 11:14 -0600, Linas Vepstas wrote: On a somewhat-related note: there was an issue (I forget the details) where the kernel needed to shadow some sort of MSI state so that it could be correctly, um, kept-track-of, after an EEH reset (it didn't need to be restored, because firmware did this(?)). After some digging around and discussion, we concluded that some generic PPC MSI code needed to be altered to track this state, and/or the main kernel MSI code needed to be changed to (not?) track this state. Mike Ellerman seemed to best grasp this area ... was this ever fixed? Or perhaps this is an alternate fix for that bug? It may well have been that calling the MSI disable triggered the problem, I don't remember now. I'm pretty sure you're referring to this patch, which you acked :) http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=1db3e890aed3ac39cded30d6e94618bda086f7ce I don't know of anything else that fits your description? Yes, that's the one. I wasn't sure if it ever made it in or not, and I just wanted to make sure it wasn't what was biting you. --linas ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev
[PATCH] Only disable/enable LSI interrupts in EEH
The EEH code disables and enables interrupts during the device recovery process. This is unnecessary for MSI and MSI-X interrupts because they are effectively disabled by the DMA Stopped state when an EEH error occurs. The current code is also incorrect for MSI-X interrupts. It doesn't take into account that MSI-X interrupts are tracked in a different way than LSI/MSI interrupts. This patch ensures only LSI interrupts are disabled/enabled. The patch also includes a couple minor formatting fixes. Signed-off-by: Mike Mason mm...@us.ibm.com --- linux-2.6.18.ppc64-orig/arch/powerpc/platforms/pseries/eeh_driver.c 2009-01-16 10:59:59.0 -0800 +++ linux-2.6.18.ppc64/arch/powerpc/platforms/pseries/eeh_driver.c 2009-02-07 12:29:14.0 -0800 @@ -67,7 +67,7 @@ static int irq_in_use(unsigned int irq) { int rc = 0; unsigned long flags; - struct irq_desc *desc = irq_desc + irq; + struct irq_desc *desc = irq_desc + irq; spin_lock_irqsave(desc-lock, flags); if (desc-action) @@ -76,6 +76,40 @@ static int irq_in_use(unsigned int irq) return rc; } +/** + * eeh_disable_irq - disable interrupt for the recovering device + */ +void eeh_disable_irq(struct pci_dev *dev) +{ + struct device_node *dn = pci_device_to_OF_node(dev); + + /* Don't disable MSI and MSI-X interrupts. They are + * effectively disabled by the DMA Stopped state + * when an EEH error occurs. + */ + if (dev-msi_enabled || dev-msix_enabled) + return; + + if (!irq_in_use(dev-irq)) + return; + + PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED; + disable_irq_nosync(dev-irq); +} + +/** + * eeh_enable_irq - enable interrupt for the recovering device + */ +void eeh_enable_irq(struct pci_dev *dev) +{ + struct device_node *dn = pci_device_to_OF_node(dev); + + if ((PCI_DN(dn)-eeh_mode) EEH_MODE_IRQ_DISABLED) { + PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED; + enable_irq(dev-irq); + } +} + /* --- */ /** * eeh_report_error - report pci error to each device driver @@ -95,11 +129,8 @@ static void eeh_report_error(struct pci_ if (!driver) return; - if (irq_in_use (dev-irq)) { - struct device_node *dn = pci_device_to_OF_node(dev); - PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED; - disable_irq_nosync(dev-irq); - } + eeh_disable_irq(dev); + if (!driver-err_handler || !driver-err_handler-error_detected) return; @@ -144,15 +175,12 @@ static void eeh_report_reset(struct pci_ { enum pci_ers_result rc, *res = userdata; struct pci_driver *driver = dev-driver; - struct device_node *dn = pci_device_to_OF_node(dev); if (!driver) return; - if ((PCI_DN(dn)-eeh_mode) EEH_MODE_IRQ_DISABLED) { - PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED; - enable_irq(dev-irq); - } + eeh_enable_irq(dev); + if (!driver-err_handler || !driver-err_handler-slot_reset) return; @@ -171,17 +199,14 @@ static void eeh_report_reset(struct pci_ static void eeh_report_resume(struct pci_dev *dev, void *userdata) { struct pci_driver *driver = dev-driver; - struct device_node *dn = pci_device_to_OF_node(dev); dev-error_state = pci_channel_io_normal; if (!driver) return; - if ((PCI_DN(dn)-eeh_mode) EEH_MODE_IRQ_DISABLED) { - PCI_DN(dn)-eeh_mode = ~EEH_MODE_IRQ_DISABLED; - enable_irq(dev-irq); - } + eeh_enable_irq(dev); + if (!driver-err_handler || !driver-err_handler-resume) return; @@ -205,15 +230,12 @@ static void eeh_report_failure(struct pc if (!driver) return; - if (irq_in_use (dev-irq)) { - struct device_node *dn = pci_device_to_OF_node(dev); - PCI_DN(dn)-eeh_mode |= EEH_MODE_IRQ_DISABLED; - disable_irq_nosync(dev-irq); - } - if (!driver-err_handler) - return; - if (!driver-err_handler-error_detected) + eeh_disable_irq(dev); + + if (!driver-err_handler || + !driver-err_handler-error_detected) return; + driver-err_handler-error_detected(dev, pci_channel_io_perm_failure); } ___ Linuxppc-dev mailing list Linuxppc-dev@ozlabs.org https://ozlabs.org/mailman/listinfo/linuxppc-dev