Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-27 Thread Chen Fan


On 01/28/2016 06:32 AM, Bjorn Helgaas wrote:

On Wed, Jan 27, 2016 at 10:13:36AM +0100, Thomas Gleixner wrote:

On Tue, 26 Jan 2016, Bjorn Helgaas wrote:

On Tue, Jan 26, 2016 at 04:48:25PM +0100, Thomas Gleixner wrote:

Right. So we could certainly do something like this INVALID_IRQ thingy, but
that looks a bit weird. What would request_irq() return?

If it returns success, then drivers might make the wrong decision. If it
returns an error code, then the i801 one works, but we might have to fix
others anyway.

I was thinking request_irq() could return -EINVAL if the caller passed
INVALID_IRQ.  That should tell drivers that this interrupt won't work.

We'd be making request_irq() return -EINVAL in some cases where it
currently returns success.  But even though it returns success today,
I don't think the driver is getting interrupts, because the wire isn't
connected.

Correct. What I meant is that the i801 driver can handle the -EINVAL return
from request_irq() today, but other drivers don't. I agree that we need to fix
them anyway and a failure to request the interrupt is better than a silent 'no
interrupts delivered' issue.
  
Though instead of returning -EINVAL I prefer an explicit error code for this

case. That way a driver can distinguish between the 'not connected' case and
other failure modes. Something like the patch below should work.

This patch looks great to me, thanks for all your help!

Chen, do you want to put all this together as formal patches with
changelogs and post to the mailing lists?

With pleasure. I will test it on my environment first and send it out.
thank all of you for this problem.

Chen



Bjorn


8<--
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -387,6 +387,22 @@ static inline int acpi_isa_register_gsi(
  }
  #endif
  
+static inline bool acpi_pci_irq_valid(struc pci_dev *dev)

+{
+#ifdef CONFIG_X86
+   /*
+* On x86 irq line 0xff means "unknown" or "no connection" (PCI 3.0,
+* Section 6.2.4, footnote on page 223).
+*/
+   if (dev->irq == 0xff) {
+   dev->irq = IRQ_NOTCONNECTED;
+   dev_warn(>dev, "PCI INT not connected\n");
+   return false;
+   }
+#endif
+   return true;
+}
+
  int acpi_pci_irq_enable(struct pci_dev *dev)
  {
struct acpi_prt_entry *entry;
@@ -409,6 +425,9 @@ int acpi_pci_irq_enable(struct pci_dev *
if (pci_has_managed_irq(dev))
return 0;
  
+	if (!acpi_pci_irq_valid(dev))

+   return 0;
+
entry = acpi_pci_irq_lookup(dev, pin);
if (!entry) {
/*
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -125,6 +125,16 @@ struct irqaction {
  
  extern irqreturn_t no_action(int cpl, void *dev_id);
  
+/*

+ * If a (PCI) device interrupt is not connected we set dev->irq to
+ * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
+ * can distingiush that case from other error returns.
+ *
+ * 0x8000 is guaranteed to be outside the available range of interrupts
+ * and easy to distinguish from other possible incorrect values.
+ */
+#define IRQ_NOTCONNECTED   (1U << 31)
+
  extern int __must_check
  request_threaded_irq(unsigned int irq, irq_handler_t handler,
 irq_handler_t thread_fn,
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1609,6 +1609,9 @@ int request_threaded_irq(unsigned int ir
struct irq_desc *desc;
int retval;
  
+	if (irq == IRQ_NOTCONNECTED)

+   return -ENOTCONN;
+
/*
 * Sanity-check: shared interrupts must pass in a real dev-ID,
 * otherwise we'll have trouble later trying to figure out
@@ -1699,9 +1702,13 @@ EXPORT_SYMBOL(request_threaded_irq);
  int request_any_context_irq(unsigned int irq, irq_handler_t handler,
unsigned long flags, const char *name, void *dev_id)
  {
-   struct irq_desc *desc = irq_to_desc(irq);
+   struct irq_desc *desc;
int ret;
  
+	if (irq == IRQ_NOTCONNECTED)

+   return -ENOTCONN;
+
+   desc = irq_to_desc(irq);
if (!desc)
return -EINVAL;
  




--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


.







Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-27 Thread Bjorn Helgaas
On Wed, Jan 27, 2016 at 10:13:36AM +0100, Thomas Gleixner wrote:
> On Tue, 26 Jan 2016, Bjorn Helgaas wrote:
> > On Tue, Jan 26, 2016 at 04:48:25PM +0100, Thomas Gleixner wrote:
> > > Right. So we could certainly do something like this INVALID_IRQ thingy, 
> > > but
> > > that looks a bit weird. What would request_irq() return?
> > > 
> > > If it returns success, then drivers might make the wrong decision. If it
> > > returns an error code, then the i801 one works, but we might have to fix
> > > others anyway.
> > 
> > I was thinking request_irq() could return -EINVAL if the caller passed
> > INVALID_IRQ.  That should tell drivers that this interrupt won't work.
> > 
> > We'd be making request_irq() return -EINVAL in some cases where it
> > currently returns success.  But even though it returns success today,
> > I don't think the driver is getting interrupts, because the wire isn't
> > connected.
> 
> Correct. What I meant is that the i801 driver can handle the -EINVAL return
> from request_irq() today, but other drivers don't. I agree that we need to fix
> them anyway and a failure to request the interrupt is better than a silent 'no
> interrupts delivered' issue.
>  
> Though instead of returning -EINVAL I prefer an explicit error code for this
> case. That way a driver can distinguish between the 'not connected' case and
> other failure modes. Something like the patch below should work.

This patch looks great to me, thanks for all your help!

Chen, do you want to put all this together as formal patches with
changelogs and post to the mailing lists?

Bjorn

> 8<--
> --- a/drivers/acpi/pci_irq.c
> +++ b/drivers/acpi/pci_irq.c
> @@ -387,6 +387,22 @@ static inline int acpi_isa_register_gsi(
>  }
>  #endif
>  
> +static inline bool acpi_pci_irq_valid(struc pci_dev *dev)
> +{
> +#ifdef CONFIG_X86
> + /*
> +  * On x86 irq line 0xff means "unknown" or "no connection" (PCI 3.0,
> +  * Section 6.2.4, footnote on page 223).
> +  */
> + if (dev->irq == 0xff) {
> + dev->irq = IRQ_NOTCONNECTED;
> + dev_warn(>dev, "PCI INT not connected\n");
> + return false;
> + }
> +#endif
> + return true;
> +}
> +
>  int acpi_pci_irq_enable(struct pci_dev *dev)
>  {
>   struct acpi_prt_entry *entry;
> @@ -409,6 +425,9 @@ int acpi_pci_irq_enable(struct pci_dev *
>   if (pci_has_managed_irq(dev))
>   return 0;
>  
> + if (!acpi_pci_irq_valid(dev))
> + return 0;
> +
>   entry = acpi_pci_irq_lookup(dev, pin);
>   if (!entry) {
>   /*
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -125,6 +125,16 @@ struct irqaction {
>  
>  extern irqreturn_t no_action(int cpl, void *dev_id);
>  
> +/*
> + * If a (PCI) device interrupt is not connected we set dev->irq to
> + * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
> + * can distingiush that case from other error returns.
> + *
> + * 0x8000 is guaranteed to be outside the available range of interrupts
> + * and easy to distinguish from other possible incorrect values.
> + */
> +#define IRQ_NOTCONNECTED (1U << 31)
> +
>  extern int __must_check
>  request_threaded_irq(unsigned int irq, irq_handler_t handler,
>irq_handler_t thread_fn,
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -1609,6 +1609,9 @@ int request_threaded_irq(unsigned int ir
>   struct irq_desc *desc;
>   int retval;
>  
> + if (irq == IRQ_NOTCONNECTED)
> + return -ENOTCONN;
> +
>   /*
>* Sanity-check: shared interrupts must pass in a real dev-ID,
>* otherwise we'll have trouble later trying to figure out
> @@ -1699,9 +1702,13 @@ EXPORT_SYMBOL(request_threaded_irq);
>  int request_any_context_irq(unsigned int irq, irq_handler_t handler,
>   unsigned long flags, const char *name, void *dev_id)
>  {
> - struct irq_desc *desc = irq_to_desc(irq);
> + struct irq_desc *desc;
>   int ret;
>  
> + if (irq == IRQ_NOTCONNECTED)
> + return -ENOTCONN;
> +
> + desc = irq_to_desc(irq);
>   if (!desc)
>   return -EINVAL;
>  
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-27 Thread Thomas Gleixner
On Tue, 26 Jan 2016, Bjorn Helgaas wrote:
> On Tue, Jan 26, 2016 at 04:48:25PM +0100, Thomas Gleixner wrote:
> > Right. So we could certainly do something like this INVALID_IRQ thingy, but
> > that looks a bit weird. What would request_irq() return?
> > 
> > If it returns success, then drivers might make the wrong decision. If it
> > returns an error code, then the i801 one works, but we might have to fix
> > others anyway.
> 
> I was thinking request_irq() could return -EINVAL if the caller passed
> INVALID_IRQ.  That should tell drivers that this interrupt won't work.
> 
> We'd be making request_irq() return -EINVAL in some cases where it
> currently returns success.  But even though it returns success today,
> I don't think the driver is getting interrupts, because the wire isn't
> connected.

Correct. What I meant is that the i801 driver can handle the -EINVAL return
from request_irq() today, but other drivers don't. I agree that we need to fix
them anyway and a failure to request the interrupt is better than a silent 'no
interrupts delivered' issue.
 
Though instead of returning -EINVAL I prefer an explicit error code for this
case. That way a driver can distinguish between the 'not connected' case and
other failure modes. Something like the patch below should work.

Thanks,

tglx

8<--
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -387,6 +387,22 @@ static inline int acpi_isa_register_gsi(
 }
 #endif
 
+static inline bool acpi_pci_irq_valid(struc pci_dev *dev)
+{
+#ifdef CONFIG_X86
+   /*
+* On x86 irq line 0xff means "unknown" or "no connection" (PCI 3.0,
+* Section 6.2.4, footnote on page 223).
+*/
+   if (dev->irq == 0xff) {
+   dev->irq = IRQ_NOTCONNECTED;
+   dev_warn(>dev, "PCI INT not connected\n");
+   return false;
+   }
+#endif
+   return true;
+}
+
 int acpi_pci_irq_enable(struct pci_dev *dev)
 {
struct acpi_prt_entry *entry;
@@ -409,6 +425,9 @@ int acpi_pci_irq_enable(struct pci_dev *
if (pci_has_managed_irq(dev))
return 0;
 
+   if (!acpi_pci_irq_valid(dev))
+   return 0;
+
entry = acpi_pci_irq_lookup(dev, pin);
if (!entry) {
/*
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -125,6 +125,16 @@ struct irqaction {
 
 extern irqreturn_t no_action(int cpl, void *dev_id);
 
+/*
+ * If a (PCI) device interrupt is not connected we set dev->irq to
+ * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
+ * can distingiush that case from other error returns.
+ *
+ * 0x8000 is guaranteed to be outside the available range of interrupts
+ * and easy to distinguish from other possible incorrect values.
+ */
+#define IRQ_NOTCONNECTED   (1U << 31)
+
 extern int __must_check
 request_threaded_irq(unsigned int irq, irq_handler_t handler,
 irq_handler_t thread_fn,
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1609,6 +1609,9 @@ int request_threaded_irq(unsigned int ir
struct irq_desc *desc;
int retval;
 
+   if (irq == IRQ_NOTCONNECTED)
+   return -ENOTCONN;
+
/*
 * Sanity-check: shared interrupts must pass in a real dev-ID,
 * otherwise we'll have trouble later trying to figure out
@@ -1699,9 +1702,13 @@ EXPORT_SYMBOL(request_threaded_irq);
 int request_any_context_irq(unsigned int irq, irq_handler_t handler,
unsigned long flags, const char *name, void *dev_id)
 {
-   struct irq_desc *desc = irq_to_desc(irq);
+   struct irq_desc *desc;
int ret;
 
+   if (irq == IRQ_NOTCONNECTED)
+   return -ENOTCONN;
+
+   desc = irq_to_desc(irq);
if (!desc)
return -EINVAL;
 





Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-27 Thread Thomas Gleixner
On Wed, 27 Jan 2016, Cao jin wrote:
> How about using IRQ_BITMAP_BITS as that "irq_valid" flag? because it is the
> ceiling of struct irq_desc irq_desc[], and request_irq() will return -EINVAL
> in case of the ceiling.
> 
> #ifdef CONFIG_SPARSE_IRQ
> # define IRQ_BITMAP_BITS  (NR_IRQS + 8196)
> #else
> # define IRQ_BITMAP_BITS  NR_IRQS
> #endif

No. This is a core internal implementation detail.

Thanks,

tglx


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-27 Thread Bjorn Helgaas
On Wed, Jan 27, 2016 at 10:13:36AM +0100, Thomas Gleixner wrote:
> On Tue, 26 Jan 2016, Bjorn Helgaas wrote:
> > On Tue, Jan 26, 2016 at 04:48:25PM +0100, Thomas Gleixner wrote:
> > > Right. So we could certainly do something like this INVALID_IRQ thingy, 
> > > but
> > > that looks a bit weird. What would request_irq() return?
> > > 
> > > If it returns success, then drivers might make the wrong decision. If it
> > > returns an error code, then the i801 one works, but we might have to fix
> > > others anyway.
> > 
> > I was thinking request_irq() could return -EINVAL if the caller passed
> > INVALID_IRQ.  That should tell drivers that this interrupt won't work.
> > 
> > We'd be making request_irq() return -EINVAL in some cases where it
> > currently returns success.  But even though it returns success today,
> > I don't think the driver is getting interrupts, because the wire isn't
> > connected.
> 
> Correct. What I meant is that the i801 driver can handle the -EINVAL return
> from request_irq() today, but other drivers don't. I agree that we need to fix
> them anyway and a failure to request the interrupt is better than a silent 'no
> interrupts delivered' issue.
>  
> Though instead of returning -EINVAL I prefer an explicit error code for this
> case. That way a driver can distinguish between the 'not connected' case and
> other failure modes. Something like the patch below should work.

This patch looks great to me, thanks for all your help!

Chen, do you want to put all this together as formal patches with
changelogs and post to the mailing lists?

Bjorn

> 8<--
> --- a/drivers/acpi/pci_irq.c
> +++ b/drivers/acpi/pci_irq.c
> @@ -387,6 +387,22 @@ static inline int acpi_isa_register_gsi(
>  }
>  #endif
>  
> +static inline bool acpi_pci_irq_valid(struc pci_dev *dev)
> +{
> +#ifdef CONFIG_X86
> + /*
> +  * On x86 irq line 0xff means "unknown" or "no connection" (PCI 3.0,
> +  * Section 6.2.4, footnote on page 223).
> +  */
> + if (dev->irq == 0xff) {
> + dev->irq = IRQ_NOTCONNECTED;
> + dev_warn(>dev, "PCI INT not connected\n");
> + return false;
> + }
> +#endif
> + return true;
> +}
> +
>  int acpi_pci_irq_enable(struct pci_dev *dev)
>  {
>   struct acpi_prt_entry *entry;
> @@ -409,6 +425,9 @@ int acpi_pci_irq_enable(struct pci_dev *
>   if (pci_has_managed_irq(dev))
>   return 0;
>  
> + if (!acpi_pci_irq_valid(dev))
> + return 0;
> +
>   entry = acpi_pci_irq_lookup(dev, pin);
>   if (!entry) {
>   /*
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -125,6 +125,16 @@ struct irqaction {
>  
>  extern irqreturn_t no_action(int cpl, void *dev_id);
>  
> +/*
> + * If a (PCI) device interrupt is not connected we set dev->irq to
> + * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
> + * can distingiush that case from other error returns.
> + *
> + * 0x8000 is guaranteed to be outside the available range of interrupts
> + * and easy to distinguish from other possible incorrect values.
> + */
> +#define IRQ_NOTCONNECTED (1U << 31)
> +
>  extern int __must_check
>  request_threaded_irq(unsigned int irq, irq_handler_t handler,
>irq_handler_t thread_fn,
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -1609,6 +1609,9 @@ int request_threaded_irq(unsigned int ir
>   struct irq_desc *desc;
>   int retval;
>  
> + if (irq == IRQ_NOTCONNECTED)
> + return -ENOTCONN;
> +
>   /*
>* Sanity-check: shared interrupts must pass in a real dev-ID,
>* otherwise we'll have trouble later trying to figure out
> @@ -1699,9 +1702,13 @@ EXPORT_SYMBOL(request_threaded_irq);
>  int request_any_context_irq(unsigned int irq, irq_handler_t handler,
>   unsigned long flags, const char *name, void *dev_id)
>  {
> - struct irq_desc *desc = irq_to_desc(irq);
> + struct irq_desc *desc;
>   int ret;
>  
> + if (irq == IRQ_NOTCONNECTED)
> + return -ENOTCONN;
> +
> + desc = irq_to_desc(irq);
>   if (!desc)
>   return -EINVAL;
>  
> 
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-27 Thread Chen Fan


On 01/28/2016 06:32 AM, Bjorn Helgaas wrote:

On Wed, Jan 27, 2016 at 10:13:36AM +0100, Thomas Gleixner wrote:

On Tue, 26 Jan 2016, Bjorn Helgaas wrote:

On Tue, Jan 26, 2016 at 04:48:25PM +0100, Thomas Gleixner wrote:

Right. So we could certainly do something like this INVALID_IRQ thingy, but
that looks a bit weird. What would request_irq() return?

If it returns success, then drivers might make the wrong decision. If it
returns an error code, then the i801 one works, but we might have to fix
others anyway.

I was thinking request_irq() could return -EINVAL if the caller passed
INVALID_IRQ.  That should tell drivers that this interrupt won't work.

We'd be making request_irq() return -EINVAL in some cases where it
currently returns success.  But even though it returns success today,
I don't think the driver is getting interrupts, because the wire isn't
connected.

Correct. What I meant is that the i801 driver can handle the -EINVAL return
from request_irq() today, but other drivers don't. I agree that we need to fix
them anyway and a failure to request the interrupt is better than a silent 'no
interrupts delivered' issue.
  
Though instead of returning -EINVAL I prefer an explicit error code for this

case. That way a driver can distinguish between the 'not connected' case and
other failure modes. Something like the patch below should work.

This patch looks great to me, thanks for all your help!

Chen, do you want to put all this together as formal patches with
changelogs and post to the mailing lists?

With pleasure. I will test it on my environment first and send it out.
thank all of you for this problem.

Chen



Bjorn


8<--
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -387,6 +387,22 @@ static inline int acpi_isa_register_gsi(
  }
  #endif
  
+static inline bool acpi_pci_irq_valid(struc pci_dev *dev)

+{
+#ifdef CONFIG_X86
+   /*
+* On x86 irq line 0xff means "unknown" or "no connection" (PCI 3.0,
+* Section 6.2.4, footnote on page 223).
+*/
+   if (dev->irq == 0xff) {
+   dev->irq = IRQ_NOTCONNECTED;
+   dev_warn(>dev, "PCI INT not connected\n");
+   return false;
+   }
+#endif
+   return true;
+}
+
  int acpi_pci_irq_enable(struct pci_dev *dev)
  {
struct acpi_prt_entry *entry;
@@ -409,6 +425,9 @@ int acpi_pci_irq_enable(struct pci_dev *
if (pci_has_managed_irq(dev))
return 0;
  
+	if (!acpi_pci_irq_valid(dev))

+   return 0;
+
entry = acpi_pci_irq_lookup(dev, pin);
if (!entry) {
/*
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -125,6 +125,16 @@ struct irqaction {
  
  extern irqreturn_t no_action(int cpl, void *dev_id);
  
+/*

+ * If a (PCI) device interrupt is not connected we set dev->irq to
+ * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
+ * can distingiush that case from other error returns.
+ *
+ * 0x8000 is guaranteed to be outside the available range of interrupts
+ * and easy to distinguish from other possible incorrect values.
+ */
+#define IRQ_NOTCONNECTED   (1U << 31)
+
  extern int __must_check
  request_threaded_irq(unsigned int irq, irq_handler_t handler,
 irq_handler_t thread_fn,
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1609,6 +1609,9 @@ int request_threaded_irq(unsigned int ir
struct irq_desc *desc;
int retval;
  
+	if (irq == IRQ_NOTCONNECTED)

+   return -ENOTCONN;
+
/*
 * Sanity-check: shared interrupts must pass in a real dev-ID,
 * otherwise we'll have trouble later trying to figure out
@@ -1699,9 +1702,13 @@ EXPORT_SYMBOL(request_threaded_irq);
  int request_any_context_irq(unsigned int irq, irq_handler_t handler,
unsigned long flags, const char *name, void *dev_id)
  {
-   struct irq_desc *desc = irq_to_desc(irq);
+   struct irq_desc *desc;
int ret;
  
+	if (irq == IRQ_NOTCONNECTED)

+   return -ENOTCONN;
+
+   desc = irq_to_desc(irq);
if (!desc)
return -EINVAL;
  




--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


.







Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-27 Thread Thomas Gleixner
On Tue, 26 Jan 2016, Bjorn Helgaas wrote:
> On Tue, Jan 26, 2016 at 04:48:25PM +0100, Thomas Gleixner wrote:
> > Right. So we could certainly do something like this INVALID_IRQ thingy, but
> > that looks a bit weird. What would request_irq() return?
> > 
> > If it returns success, then drivers might make the wrong decision. If it
> > returns an error code, then the i801 one works, but we might have to fix
> > others anyway.
> 
> I was thinking request_irq() could return -EINVAL if the caller passed
> INVALID_IRQ.  That should tell drivers that this interrupt won't work.
> 
> We'd be making request_irq() return -EINVAL in some cases where it
> currently returns success.  But even though it returns success today,
> I don't think the driver is getting interrupts, because the wire isn't
> connected.

Correct. What I meant is that the i801 driver can handle the -EINVAL return
from request_irq() today, but other drivers don't. I agree that we need to fix
them anyway and a failure to request the interrupt is better than a silent 'no
interrupts delivered' issue.
 
Though instead of returning -EINVAL I prefer an explicit error code for this
case. That way a driver can distinguish between the 'not connected' case and
other failure modes. Something like the patch below should work.

Thanks,

tglx

8<--
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -387,6 +387,22 @@ static inline int acpi_isa_register_gsi(
 }
 #endif
 
+static inline bool acpi_pci_irq_valid(struc pci_dev *dev)
+{
+#ifdef CONFIG_X86
+   /*
+* On x86 irq line 0xff means "unknown" or "no connection" (PCI 3.0,
+* Section 6.2.4, footnote on page 223).
+*/
+   if (dev->irq == 0xff) {
+   dev->irq = IRQ_NOTCONNECTED;
+   dev_warn(>dev, "PCI INT not connected\n");
+   return false;
+   }
+#endif
+   return true;
+}
+
 int acpi_pci_irq_enable(struct pci_dev *dev)
 {
struct acpi_prt_entry *entry;
@@ -409,6 +425,9 @@ int acpi_pci_irq_enable(struct pci_dev *
if (pci_has_managed_irq(dev))
return 0;
 
+   if (!acpi_pci_irq_valid(dev))
+   return 0;
+
entry = acpi_pci_irq_lookup(dev, pin);
if (!entry) {
/*
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -125,6 +125,16 @@ struct irqaction {
 
 extern irqreturn_t no_action(int cpl, void *dev_id);
 
+/*
+ * If a (PCI) device interrupt is not connected we set dev->irq to
+ * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we
+ * can distingiush that case from other error returns.
+ *
+ * 0x8000 is guaranteed to be outside the available range of interrupts
+ * and easy to distinguish from other possible incorrect values.
+ */
+#define IRQ_NOTCONNECTED   (1U << 31)
+
 extern int __must_check
 request_threaded_irq(unsigned int irq, irq_handler_t handler,
 irq_handler_t thread_fn,
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1609,6 +1609,9 @@ int request_threaded_irq(unsigned int ir
struct irq_desc *desc;
int retval;
 
+   if (irq == IRQ_NOTCONNECTED)
+   return -ENOTCONN;
+
/*
 * Sanity-check: shared interrupts must pass in a real dev-ID,
 * otherwise we'll have trouble later trying to figure out
@@ -1699,9 +1702,13 @@ EXPORT_SYMBOL(request_threaded_irq);
 int request_any_context_irq(unsigned int irq, irq_handler_t handler,
unsigned long flags, const char *name, void *dev_id)
 {
-   struct irq_desc *desc = irq_to_desc(irq);
+   struct irq_desc *desc;
int ret;
 
+   if (irq == IRQ_NOTCONNECTED)
+   return -ENOTCONN;
+
+   desc = irq_to_desc(irq);
if (!desc)
return -EINVAL;
 





Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-27 Thread Thomas Gleixner
On Wed, 27 Jan 2016, Cao jin wrote:
> How about using IRQ_BITMAP_BITS as that "irq_valid" flag? because it is the
> ceiling of struct irq_desc irq_desc[], and request_irq() will return -EINVAL
> in case of the ceiling.
> 
> #ifdef CONFIG_SPARSE_IRQ
> # define IRQ_BITMAP_BITS  (NR_IRQS + 8196)
> #else
> # define IRQ_BITMAP_BITS  NR_IRQS
> #endif

No. This is a core internal implementation detail.

Thanks,

tglx


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Cao jin



On 01/27/2016 08:25 AM, Bjorn Helgaas wrote:

On Tue, Jan 26, 2016 at 04:48:25PM +0100, Thomas Gleixner wrote:

On Tue, 26 Jan 2016, Bjorn Helgaas wrote:





Right. So we could certainly do something like this INVALID_IRQ thingy, but
that looks a bit weird. What would request_irq() return?

If it returns success, then drivers might make the wrong decision. If it
returns an error code, then the i801 one works, but we might have to fix
others anyway.


I was thinking request_irq() could return -EINVAL if the caller passed
INVALID_IRQ.  That should tell drivers that this interrupt won't work.

We'd be making request_irq() return -EINVAL in some cases where it
currently returns success.  But even though it returns success today,
I don't think the driver is getting interrupts, because the wire isn't
connected.


I think it's better to have a software flag in pci_dev to indicate that there
is no irq line and fix up the (probably few) affected drivers so they avoid
calling request_irq() and take the right action.


We could add an "irq_valid" flag in struct pci_dev and make a new
rule that drivers should check dev->irq_valid before using dev->irq.
But realistically, i801 is the only place that will check irq_valid
because that's the only driver where we know about a problem, so that
seems like sort of a point solution.

Bjorn



How about using IRQ_BITMAP_BITS as that "irq_valid" flag? because it is 
the ceiling of struct irq_desc irq_desc[], and request_irq() will return 
-EINVAL in case of the ceiling.


#ifdef CONFIG_SPARSE_IRQ
# define IRQ_BITMAP_BITS(NR_IRQS + 8196)
#else
# define IRQ_BITMAP_BITSNR_IRQS
#endif


.



--
Yours Sincerely,

Cao jin




Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Bjorn Helgaas
On Tue, Jan 26, 2016 at 04:48:25PM +0100, Thomas Gleixner wrote:
> On Tue, 26 Jan 2016, Bjorn Helgaas wrote:
> > On Tue, Jan 26, 2016 at 09:26:29AM +0100, Thomas Gleixner wrote:
> > > The proper solution here is to flag that this device does not have an
> > > interrupt connected and act accordingly in the device driver, i.e. do not 
> > > call
> > > request_irq() in the first place.
> > 
> > This is the crux of the problem.  As far as I know, PCI doesn't have
> > a flag to indicate that dev->irq is a wire that's not connected, so
> > there's no generic way for a driver to know whether it should call
> > request_irq().
> 
> Ok.
>  
> > We could add one, of course, but that only helps in the drivers we
> > update.  It'd be nice if we could figure out a way to fix this
> > without having to touch all the drivers.
> 
> Hmm.
>  
> > I think any driver that uses line-based interrupts can potentially
> > fail if the platform uses Interrupt Line == 255 to indicate that the
> > line is not connected.  If another driver happens to be using IRQ 255,
> > request_irq() may fail as it does here.  Otherwise, I suspect
> > request_irq() will return success, but the driver won't get any
> > interrupts.
> 
> Right. So we could certainly do something like this INVALID_IRQ thingy, but
> that looks a bit weird. What would request_irq() return?
> 
> If it returns success, then drivers might make the wrong decision. If it
> returns an error code, then the i801 one works, but we might have to fix
> others anyway.

I was thinking request_irq() could return -EINVAL if the caller passed
INVALID_IRQ.  That should tell drivers that this interrupt won't work.

We'd be making request_irq() return -EINVAL in some cases where it
currently returns success.  But even though it returns success today,
I don't think the driver is getting interrupts, because the wire isn't
connected.

> I think it's better to have a software flag in pci_dev to indicate that there
> is no irq line and fix up the (probably few) affected drivers so they avoid
> calling request_irq() and take the right action.

We could add an "irq_valid" flag in struct pci_dev and make a new
rule that drivers should check dev->irq_valid before using dev->irq.
But realistically, i801 is the only place that will check irq_valid
because that's the only driver where we know about a problem, so that
seems like sort of a point solution.

Bjorn


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Thomas Gleixner
On Tue, 26 Jan 2016, Bjorn Helgaas wrote:
> On Tue, Jan 26, 2016 at 09:26:29AM +0100, Thomas Gleixner wrote:
> > And why would this be x86 specific? PCI3.0 is architecture independent, 
> > right?
> 
> Yes, PCI is mostly arch-independent, but Interrupt Line is
> arch-specific, and the corner case of it being 255 is only mentioned
> in an x86-specific footnote.  We can improve the comment.

Yes please :)
 
> > The proper solution here is to flag that this device does not have an
> > interrupt connected and act accordingly in the device driver, i.e. do not 
> > call
> > request_irq() in the first place.
> 
> This is the crux of the problem.  As far as I know, PCI doesn't have
> a flag to indicate that dev->irq is a wire that's not connected, so
> there's no generic way for a driver to know whether it should call
> request_irq().

Ok.
 
> We could add one, of course, but that only helps in the drivers we
> update.  It'd be nice if we could figure out a way to fix this
> without having to touch all the drivers.

Hmm.
 
> I think any driver that uses line-based interrupts can potentially
> fail if the platform uses Interrupt Line == 255 to indicate that the
> line is not connected.  If another driver happens to be using IRQ 255,
> request_irq() may fail as it does here.  Otherwise, I suspect
> request_irq() will return success, but the driver won't get any
> interrupts.

Right. So we could certainly do something like this INVALID_IRQ thingy, but
that looks a bit weird. What would request_irq() return?

If it returns success, then drivers might make the wrong decision. If it
returns an error code, then the i801 one works, but we might have to fix
others anyway.

I think it's better to have a software flag in pci_dev to indicate that there
is no irq line and fix up the (probably few) affected drivers so they avoid
calling request_irq() and take the right action.

> > No. NR_IRQS cannot be used at all if sparse irqs are enabled. 
> 
> Ah, thanks, this is a critical piece I missed.  There *are* a few
> places where we do define or use NR_IRQS when CONFIG_SPARSE_IRQ=y.  Do
> these need updates?
> 
>   include/asm-generic/irq.h defines NR_IRQS if not defined yet
>   arch/arm/include/asm/irq.h defines NR_IRQS to NR_IRQS_LEGACY
>   arch/arm/kernel/irq.c uses NR_IRQS
>   arch/sh/include/asm/irq.h defines NR_IRQS to 8

These defines are used for preallocating interrupt descriptors in early
boot. That was invented to help migrating to sparse irq.

>   kernel/irq/internals.h uses NR_IRQS to define IRQ_BITMAP_BITS

Right, that's probably pointless, but harmless.

> > Nothing in any generic code is supposed to rely on NR_IRQS.
>  
> I guess that means these uses are suspect:
> 
>   $ git grep -w NR_IRQS | grep -v "^arch" | grep -v "^kernel" | grep -v 
> "^drivers/irqchip" | grep -v "^include"
>   drivers/input/keyboard/lpc32xx-keys.c:  if (irq < 0 || irq >= NR_IRQS) {
>   drivers/mtd/nand/lpc32xx_mlc.c: if ((host->irq < 0) || (host->irq >= 
> NR_IRQS)) {
>   drivers/net/hamradio/scc.c:static struct irqflags { unsigned char used : 1; 
> } Ivec[NR_IRQS];
>   drivers/pcmcia/pcmcia_resource.c:   if (irq > NR_IRQS)
>   drivers/tty/serial/apbuart.c:   if (ser->irq < 0 || ser->irq >= NR_IRQS)
>   drivers/tty/serial/ar933x_uart.c:   if (ser->irq < 0 || ser->irq >= NR_IRQS)
>   drivers/tty/serial/lantiq.c:if (ser->irq < 0 || ser->irq >= NR_IRQS)
>   drivers/tty/serial/m32r_sio.c:static struct irq_info irq_lists[NR_IRQS];

Indeed.

Thanks,

tglx



Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Bjorn Helgaas
On Tue, Jan 26, 2016 at 09:26:29AM +0100, Thomas Gleixner wrote:
> On Mon, 25 Jan 2016, Bjorn Helgaas wrote:
> > On Mon, Jan 25, 2016 at 02:59:38PM +0800, Chen Fan wrote:
> 
> > > i801_smbus :00:1f.3: PCI INT C: no GSI
> > > i801_smbus :00:1f.3: Failed to allocate irq 255: -16
> > > i801_smbus: probe of :00:1f.3 failed with error -16
> 
> The current code does not not fail when the interrupt request fails. It
> reports it and clears the IRQ feature flag.
> 
> > > @@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
> > >* driver reported one, then use it. Exit in any case.
> > >*/
> > >   if (gsi < 0) {
> > > - if (acpi_isa_register_gsi(dev))
> > > +#ifdef CONFIG_X86
> > > + /*
> > > +  * The Interrupt Line value of 0xff is defined to mean "unknown"
> > > +  * or "no connection" (PCI 3.0, Section 6.2.4, footnote on page
> > > +  * 223), using ~0U as invalid IRQ.
> > > +  */
> 
> And why would this be x86 specific? PCI3.0 is architecture independent, right?

Yes, PCI is mostly arch-independent, but Interrupt Line is
arch-specific, and the corner case of it being 255 is only mentioned
in an x86-specific footnote.  We can improve the comment.

> > > + dev->irq = (dev->irq == 0xff) ? IRQ_INVALID : dev->irq;
> > 
> > It's much simpler and clearer to write:
> > 
> >   if (dev->irq == 0xff)
> > dev->irq = IRQ_INVALID;
> 
> I do not understand that IRQ_INVALID business at all.
> 
> > > +#endif
> > > + if (!irq_is_valid(dev->irq) || acpi_isa_register_gsi(dev))
> > >   dev_warn(>dev, "PCI INT %c: no GSI\n",
> > >pin_name(pin));
> > >  
> 
> The existing code already drops into this place because 
> acpi_isa_register_gsi() fails.
> 
> > > i801_smbus :00:1f.3: PCI INT C: no GSI
> 
> What extra value does that !irq_is_valid() provide?
> 
> And how does setting dev->irq to ~0U prevent that request_irq() is called in
> the i801 device driver? Not at all, AFAICT. It will just fail with a different
> error.
> 
> So the whole 'fix' relies on the fact that irq ~0U does not exist (at least
> not today) and therefor the false sharing with some other driver using irq 255
> will not happen.
> 
> Relying on undocumented behaviour is not a fix, that's voodoo programming.
> 
> The proper solution here is to flag that this device does not have an
> interrupt connected and act accordingly in the device driver, i.e. do not call
> request_irq() in the first place.

This is the crux of the problem.  As far as I know, PCI doesn't have
a flag to indicate that dev->irq is a wire that's not connected, so
there's no generic way for a driver to know whether it should call
request_irq().

We could add one, of course, but that only helps in the drivers we
update.  It'd be nice if we could figure out a way to fix this
without having to touch all the drivers.

I think any driver that uses line-based interrupts can potentially
fail if the platform uses Interrupt Line == 255 to indicate that the
line is not connected.  If another driver happens to be using IRQ 255,
request_irq() may fail as it does here.  Otherwise, I suspect
request_irq() will return success, but the driver won't get any
interrupts.

> > I don't like the x86 ifdef.  I'd prefer:
> > 
> >   static inline bool irq_valid(unsigned int irq)
> >   {
> > if (irq < NR_IRQS)
> >   return true;
> > return false;
> >   }
> >
> > This could be used in many of the places that currently use NR_IRQS.
> 
> No. NR_IRQS cannot be used at all if sparse irqs are enabled. 

Ah, thanks, this is a critical piece I missed.  There *are* a few
places where we do define or use NR_IRQS when CONFIG_SPARSE_IRQ=y.  Do
these need updates?

  include/asm-generic/irq.h defines NR_IRQS if not defined yet
  arch/arm/include/asm/irq.h defines NR_IRQS to NR_IRQS_LEGACY
  arch/arm/kernel/irq.c uses NR_IRQS
  arch/sh/include/asm/irq.h defines NR_IRQS to 8
  kernel/irq/internals.h uses NR_IRQS to define IRQ_BITMAP_BITS

> Nothing in any
> generic code is supposed to rely on NR_IRQS.

I guess that means these uses are suspect:

  $ git grep -w NR_IRQS | grep -v "^arch" | grep -v "^kernel" | grep -v 
"^drivers/irqchip" | grep -v "^include"
  drivers/input/keyboard/lpc32xx-keys.c:if (irq < 0 || irq >= NR_IRQS) {
  drivers/mtd/nand/lpc32xx_mlc.c:   if ((host->irq < 0) || (host->irq >= 
NR_IRQS)) {
  drivers/net/hamradio/scc.c:static struct irqflags { unsigned char used : 1; } 
Ivec[NR_IRQS];
  drivers/pcmcia/pcmcia_resource.c: if (irq > NR_IRQS)
  drivers/tty/serial/apbuart.c: if (ser->irq < 0 || ser->irq >= NR_IRQS)
  drivers/tty/serial/ar933x_uart.c: if (ser->irq < 0 || ser->irq >= NR_IRQS)
  drivers/tty/serial/lantiq.c:  if (ser->irq < 0 || ser->irq >= NR_IRQS)
  drivers/tty/serial/m32r_sio.c:static struct irq_info irq_lists[NR_IRQS];

Bjorn


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Thomas Gleixner
On Tue, 26 Jan 2016, Chen Fan wrote:
> On 01/26/2016 04:26 PM, Thomas Gleixner wrote:
> > > > if (gsi < 0) {
> > > > -   if (acpi_isa_register_gsi(dev))
> > > > +#ifdef CONFIG_X86
> > > > +   /*
> > > > +* The Interrupt Line value of 0xff is defined to mean
> > > > "unknown"
> > > > +* or "no connection" (PCI 3.0, Section 6.2.4, footnote 
> > > > on
> > > > page
> > > > +* 223), using ~0U as invalid IRQ.
> > > > +*/
> > And why would this be x86 specific? PCI3.0 is architecture independent,
> > right?
> quoting the spec document:
> "For x86 based PCs, the values in this register correspond to IRQ numbers
> (0-15) of the standard dual
> 8259 configuration. The value 255 is defined as meaning "unknown" or "no
> connection" to the interrupt
> controller. Values between 15 and 254 are reserved."

So if that is x86 specific then it needs to be documented proper. The fact
that the comment is inside the #ifdef x86 does not tell.

Thanks,

tglx

 


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Chen Fan


On 01/26/2016 04:26 PM, Thomas Gleixner wrote:

On Mon, 25 Jan 2016, Bjorn Helgaas wrote:

On Mon, Jan 25, 2016 at 02:59:38PM +0800, Chen Fan wrote:

i801_smbus :00:1f.3: PCI INT C: no GSI
i801_smbus :00:1f.3: Failed to allocate irq 255: -16
i801_smbus: probe of :00:1f.3 failed with error -16

The current code does not not fail when the interrupt request fails. It
reports it and clears the IRQ feature flag.


@@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 * driver reported one, then use it. Exit in any case.
 */
if (gsi < 0) {
-   if (acpi_isa_register_gsi(dev))
+#ifdef CONFIG_X86
+   /*
+* The Interrupt Line value of 0xff is defined to mean "unknown"
+* or "no connection" (PCI 3.0, Section 6.2.4, footnote on page
+* 223), using ~0U as invalid IRQ.
+*/

And why would this be x86 specific? PCI3.0 is architecture independent, right?

quoting the spec document:
"For x86 based PCs, the values in this register correspond to IRQ 
numbers (0-15) of the standard dual
8259 configuration. The value 255 is defined as meaning "unknown" or "no 
connection" to the interrupt

controller. Values between 15 and 254 are reserved."




+   dev->irq = (dev->irq == 0xff) ? IRQ_INVALID : dev->irq;

It's much simpler and clearer to write:

   if (dev->irq == 0xff)
 dev->irq = IRQ_INVALID;

I do not understand that IRQ_INVALID business at all.


+#endif
+   if (!irq_is_valid(dev->irq) || acpi_isa_register_gsi(dev))
dev_warn(>dev, "PCI INT %c: no GSI\n",
 pin_name(pin));
  

The existing code already drops into this place because
acpi_isa_register_gsi() fails.


i801_smbus :00:1f.3: PCI INT C: no GSI

What extra value does that !irq_is_valid() provide?

And how does setting dev->irq to ~0U prevent that request_irq() is called in
the i801 device driver? Not at all, AFAICT. It will just fail with a different
error.

So the whole 'fix' relies on the fact that irq ~0U does not exist (at least
not today) and therefor the false sharing with some other driver using irq 255
will not happen.

Relying on undocumented behaviour is not a fix, that's voodoo programming.

The proper solution here is to flag that this device does not have an
interrupt connected and act accordingly in the device driver, i.e. do not call
request_irq() in the first place.

yes, this is what I thought in previous email, I has asked that
whether we can use a broken_irq flag in pci_dev to mark the device irq 
if invalid.
and then if the device broken_irq set, we could directly skip call the 
request_irq.

maybe we can set the broken_irq in pci_read_irq if the irq is 0xff.

Thanks,
Chen




+static inline bool irq_is_valid(unsigned int irq)
+{
+#ifdef CONFIG_X86
+   if (irq == IRQ_INVALID)
+   return false;
+#endif
+   return true;
+}

I don't like the x86 ifdef.  I'd prefer:

   static inline bool irq_valid(unsigned int irq)
   {
 if (irq < NR_IRQS)
   return true;
 return false;
   }

This could be used in many of the places that currently use NR_IRQS.

No. NR_IRQS cannot be used at all if sparse irqs are enabled. Nothing in any
generic code is supposed to rely on NR_IRQS.

Thanks,

tglx



.







Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Thomas Gleixner
On Mon, 25 Jan 2016, Bjorn Helgaas wrote:
> On Mon, Jan 25, 2016 at 02:59:38PM +0800, Chen Fan wrote:

> > i801_smbus :00:1f.3: PCI INT C: no GSI
> > i801_smbus :00:1f.3: Failed to allocate irq 255: -16
> > i801_smbus: probe of :00:1f.3 failed with error -16

The current code does not not fail when the interrupt request fails. It
reports it and clears the IRQ feature flag.

> > @@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
> >  * driver reported one, then use it. Exit in any case.
> >  */
> > if (gsi < 0) {
> > -   if (acpi_isa_register_gsi(dev))
> > +#ifdef CONFIG_X86
> > +   /*
> > +* The Interrupt Line value of 0xff is defined to mean "unknown"
> > +* or "no connection" (PCI 3.0, Section 6.2.4, footnote on page
> > +* 223), using ~0U as invalid IRQ.
> > +*/

And why would this be x86 specific? PCI3.0 is architecture independent, right?

> > +   dev->irq = (dev->irq == 0xff) ? IRQ_INVALID : dev->irq;
> 
> It's much simpler and clearer to write:
> 
>   if (dev->irq == 0xff)
> dev->irq = IRQ_INVALID;

I do not understand that IRQ_INVALID business at all.

> > +#endif
> > +   if (!irq_is_valid(dev->irq) || acpi_isa_register_gsi(dev))
> > dev_warn(>dev, "PCI INT %c: no GSI\n",
> >  pin_name(pin));
> >  

The existing code already drops into this place because 
acpi_isa_register_gsi() fails.

> > i801_smbus :00:1f.3: PCI INT C: no GSI

What extra value does that !irq_is_valid() provide?

And how does setting dev->irq to ~0U prevent that request_irq() is called in
the i801 device driver? Not at all, AFAICT. It will just fail with a different
error.

So the whole 'fix' relies on the fact that irq ~0U does not exist (at least
not today) and therefor the false sharing with some other driver using irq 255
will not happen.

Relying on undocumented behaviour is not a fix, that's voodoo programming.

The proper solution here is to flag that this device does not have an
interrupt connected and act accordingly in the device driver, i.e. do not call
request_irq() in the first place.

> > +static inline bool irq_is_valid(unsigned int irq)
> > +{
> > +#ifdef CONFIG_X86
> > +   if (irq == IRQ_INVALID)
> > +   return false;
> > +#endif
> > +   return true;
> > +}
> 
> I don't like the x86 ifdef.  I'd prefer:
> 
>   static inline bool irq_valid(unsigned int irq)
>   {
> if (irq < NR_IRQS)
>   return true;
> return false;
>   }
>
> This could be used in many of the places that currently use NR_IRQS.

No. NR_IRQS cannot be used at all if sparse irqs are enabled. Nothing in any
generic code is supposed to rely on NR_IRQS.

Thanks,

tglx



Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Thomas Gleixner
On Tue, 26 Jan 2016, Chen Fan wrote:
> On 01/26/2016 04:26 PM, Thomas Gleixner wrote:
> > > > if (gsi < 0) {
> > > > -   if (acpi_isa_register_gsi(dev))
> > > > +#ifdef CONFIG_X86
> > > > +   /*
> > > > +* The Interrupt Line value of 0xff is defined to mean
> > > > "unknown"
> > > > +* or "no connection" (PCI 3.0, Section 6.2.4, footnote 
> > > > on
> > > > page
> > > > +* 223), using ~0U as invalid IRQ.
> > > > +*/
> > And why would this be x86 specific? PCI3.0 is architecture independent,
> > right?
> quoting the spec document:
> "For x86 based PCs, the values in this register correspond to IRQ numbers
> (0-15) of the standard dual
> 8259 configuration. The value 255 is defined as meaning "unknown" or "no
> connection" to the interrupt
> controller. Values between 15 and 254 are reserved."

So if that is x86 specific then it needs to be documented proper. The fact
that the comment is inside the #ifdef x86 does not tell.

Thanks,

tglx

 


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Chen Fan


On 01/26/2016 04:26 PM, Thomas Gleixner wrote:

On Mon, 25 Jan 2016, Bjorn Helgaas wrote:

On Mon, Jan 25, 2016 at 02:59:38PM +0800, Chen Fan wrote:

i801_smbus :00:1f.3: PCI INT C: no GSI
i801_smbus :00:1f.3: Failed to allocate irq 255: -16
i801_smbus: probe of :00:1f.3 failed with error -16

The current code does not not fail when the interrupt request fails. It
reports it and clears the IRQ feature flag.


@@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 * driver reported one, then use it. Exit in any case.
 */
if (gsi < 0) {
-   if (acpi_isa_register_gsi(dev))
+#ifdef CONFIG_X86
+   /*
+* The Interrupt Line value of 0xff is defined to mean "unknown"
+* or "no connection" (PCI 3.0, Section 6.2.4, footnote on page
+* 223), using ~0U as invalid IRQ.
+*/

And why would this be x86 specific? PCI3.0 is architecture independent, right?

quoting the spec document:
"For x86 based PCs, the values in this register correspond to IRQ 
numbers (0-15) of the standard dual
8259 configuration. The value 255 is defined as meaning "unknown" or "no 
connection" to the interrupt

controller. Values between 15 and 254 are reserved."




+   dev->irq = (dev->irq == 0xff) ? IRQ_INVALID : dev->irq;

It's much simpler and clearer to write:

   if (dev->irq == 0xff)
 dev->irq = IRQ_INVALID;

I do not understand that IRQ_INVALID business at all.


+#endif
+   if (!irq_is_valid(dev->irq) || acpi_isa_register_gsi(dev))
dev_warn(>dev, "PCI INT %c: no GSI\n",
 pin_name(pin));
  

The existing code already drops into this place because
acpi_isa_register_gsi() fails.


i801_smbus :00:1f.3: PCI INT C: no GSI

What extra value does that !irq_is_valid() provide?

And how does setting dev->irq to ~0U prevent that request_irq() is called in
the i801 device driver? Not at all, AFAICT. It will just fail with a different
error.

So the whole 'fix' relies on the fact that irq ~0U does not exist (at least
not today) and therefor the false sharing with some other driver using irq 255
will not happen.

Relying on undocumented behaviour is not a fix, that's voodoo programming.

The proper solution here is to flag that this device does not have an
interrupt connected and act accordingly in the device driver, i.e. do not call
request_irq() in the first place.

yes, this is what I thought in previous email, I has asked that
whether we can use a broken_irq flag in pci_dev to mark the device irq 
if invalid.
and then if the device broken_irq set, we could directly skip call the 
request_irq.

maybe we can set the broken_irq in pci_read_irq if the irq is 0xff.

Thanks,
Chen




+static inline bool irq_is_valid(unsigned int irq)
+{
+#ifdef CONFIG_X86
+   if (irq == IRQ_INVALID)
+   return false;
+#endif
+   return true;
+}

I don't like the x86 ifdef.  I'd prefer:

   static inline bool irq_valid(unsigned int irq)
   {
 if (irq < NR_IRQS)
   return true;
 return false;
   }

This could be used in many of the places that currently use NR_IRQS.

No. NR_IRQS cannot be used at all if sparse irqs are enabled. Nothing in any
generic code is supposed to rely on NR_IRQS.

Thanks,

tglx



.







Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Cao jin



On 01/27/2016 08:25 AM, Bjorn Helgaas wrote:

On Tue, Jan 26, 2016 at 04:48:25PM +0100, Thomas Gleixner wrote:

On Tue, 26 Jan 2016, Bjorn Helgaas wrote:





Right. So we could certainly do something like this INVALID_IRQ thingy, but
that looks a bit weird. What would request_irq() return?

If it returns success, then drivers might make the wrong decision. If it
returns an error code, then the i801 one works, but we might have to fix
others anyway.


I was thinking request_irq() could return -EINVAL if the caller passed
INVALID_IRQ.  That should tell drivers that this interrupt won't work.

We'd be making request_irq() return -EINVAL in some cases where it
currently returns success.  But even though it returns success today,
I don't think the driver is getting interrupts, because the wire isn't
connected.


I think it's better to have a software flag in pci_dev to indicate that there
is no irq line and fix up the (probably few) affected drivers so they avoid
calling request_irq() and take the right action.


We could add an "irq_valid" flag in struct pci_dev and make a new
rule that drivers should check dev->irq_valid before using dev->irq.
But realistically, i801 is the only place that will check irq_valid
because that's the only driver where we know about a problem, so that
seems like sort of a point solution.

Bjorn



How about using IRQ_BITMAP_BITS as that "irq_valid" flag? because it is 
the ceiling of struct irq_desc irq_desc[], and request_irq() will return 
-EINVAL in case of the ceiling.


#ifdef CONFIG_SPARSE_IRQ
# define IRQ_BITMAP_BITS(NR_IRQS + 8196)
#else
# define IRQ_BITMAP_BITSNR_IRQS
#endif


.



--
Yours Sincerely,

Cao jin




Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Thomas Gleixner
On Mon, 25 Jan 2016, Bjorn Helgaas wrote:
> On Mon, Jan 25, 2016 at 02:59:38PM +0800, Chen Fan wrote:

> > i801_smbus :00:1f.3: PCI INT C: no GSI
> > i801_smbus :00:1f.3: Failed to allocate irq 255: -16
> > i801_smbus: probe of :00:1f.3 failed with error -16

The current code does not not fail when the interrupt request fails. It
reports it and clears the IRQ feature flag.

> > @@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
> >  * driver reported one, then use it. Exit in any case.
> >  */
> > if (gsi < 0) {
> > -   if (acpi_isa_register_gsi(dev))
> > +#ifdef CONFIG_X86
> > +   /*
> > +* The Interrupt Line value of 0xff is defined to mean "unknown"
> > +* or "no connection" (PCI 3.0, Section 6.2.4, footnote on page
> > +* 223), using ~0U as invalid IRQ.
> > +*/

And why would this be x86 specific? PCI3.0 is architecture independent, right?

> > +   dev->irq = (dev->irq == 0xff) ? IRQ_INVALID : dev->irq;
> 
> It's much simpler and clearer to write:
> 
>   if (dev->irq == 0xff)
> dev->irq = IRQ_INVALID;

I do not understand that IRQ_INVALID business at all.

> > +#endif
> > +   if (!irq_is_valid(dev->irq) || acpi_isa_register_gsi(dev))
> > dev_warn(>dev, "PCI INT %c: no GSI\n",
> >  pin_name(pin));
> >  

The existing code already drops into this place because 
acpi_isa_register_gsi() fails.

> > i801_smbus :00:1f.3: PCI INT C: no GSI

What extra value does that !irq_is_valid() provide?

And how does setting dev->irq to ~0U prevent that request_irq() is called in
the i801 device driver? Not at all, AFAICT. It will just fail with a different
error.

So the whole 'fix' relies on the fact that irq ~0U does not exist (at least
not today) and therefor the false sharing with some other driver using irq 255
will not happen.

Relying on undocumented behaviour is not a fix, that's voodoo programming.

The proper solution here is to flag that this device does not have an
interrupt connected and act accordingly in the device driver, i.e. do not call
request_irq() in the first place.

> > +static inline bool irq_is_valid(unsigned int irq)
> > +{
> > +#ifdef CONFIG_X86
> > +   if (irq == IRQ_INVALID)
> > +   return false;
> > +#endif
> > +   return true;
> > +}
> 
> I don't like the x86 ifdef.  I'd prefer:
> 
>   static inline bool irq_valid(unsigned int irq)
>   {
> if (irq < NR_IRQS)
>   return true;
> return false;
>   }
>
> This could be used in many of the places that currently use NR_IRQS.

No. NR_IRQS cannot be used at all if sparse irqs are enabled. Nothing in any
generic code is supposed to rely on NR_IRQS.

Thanks,

tglx



Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Bjorn Helgaas
On Tue, Jan 26, 2016 at 09:26:29AM +0100, Thomas Gleixner wrote:
> On Mon, 25 Jan 2016, Bjorn Helgaas wrote:
> > On Mon, Jan 25, 2016 at 02:59:38PM +0800, Chen Fan wrote:
> 
> > > i801_smbus :00:1f.3: PCI INT C: no GSI
> > > i801_smbus :00:1f.3: Failed to allocate irq 255: -16
> > > i801_smbus: probe of :00:1f.3 failed with error -16
> 
> The current code does not not fail when the interrupt request fails. It
> reports it and clears the IRQ feature flag.
> 
> > > @@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
> > >* driver reported one, then use it. Exit in any case.
> > >*/
> > >   if (gsi < 0) {
> > > - if (acpi_isa_register_gsi(dev))
> > > +#ifdef CONFIG_X86
> > > + /*
> > > +  * The Interrupt Line value of 0xff is defined to mean "unknown"
> > > +  * or "no connection" (PCI 3.0, Section 6.2.4, footnote on page
> > > +  * 223), using ~0U as invalid IRQ.
> > > +  */
> 
> And why would this be x86 specific? PCI3.0 is architecture independent, right?

Yes, PCI is mostly arch-independent, but Interrupt Line is
arch-specific, and the corner case of it being 255 is only mentioned
in an x86-specific footnote.  We can improve the comment.

> > > + dev->irq = (dev->irq == 0xff) ? IRQ_INVALID : dev->irq;
> > 
> > It's much simpler and clearer to write:
> > 
> >   if (dev->irq == 0xff)
> > dev->irq = IRQ_INVALID;
> 
> I do not understand that IRQ_INVALID business at all.
> 
> > > +#endif
> > > + if (!irq_is_valid(dev->irq) || acpi_isa_register_gsi(dev))
> > >   dev_warn(>dev, "PCI INT %c: no GSI\n",
> > >pin_name(pin));
> > >  
> 
> The existing code already drops into this place because 
> acpi_isa_register_gsi() fails.
> 
> > > i801_smbus :00:1f.3: PCI INT C: no GSI
> 
> What extra value does that !irq_is_valid() provide?
> 
> And how does setting dev->irq to ~0U prevent that request_irq() is called in
> the i801 device driver? Not at all, AFAICT. It will just fail with a different
> error.
> 
> So the whole 'fix' relies on the fact that irq ~0U does not exist (at least
> not today) and therefor the false sharing with some other driver using irq 255
> will not happen.
> 
> Relying on undocumented behaviour is not a fix, that's voodoo programming.
> 
> The proper solution here is to flag that this device does not have an
> interrupt connected and act accordingly in the device driver, i.e. do not call
> request_irq() in the first place.

This is the crux of the problem.  As far as I know, PCI doesn't have
a flag to indicate that dev->irq is a wire that's not connected, so
there's no generic way for a driver to know whether it should call
request_irq().

We could add one, of course, but that only helps in the drivers we
update.  It'd be nice if we could figure out a way to fix this
without having to touch all the drivers.

I think any driver that uses line-based interrupts can potentially
fail if the platform uses Interrupt Line == 255 to indicate that the
line is not connected.  If another driver happens to be using IRQ 255,
request_irq() may fail as it does here.  Otherwise, I suspect
request_irq() will return success, but the driver won't get any
interrupts.

> > I don't like the x86 ifdef.  I'd prefer:
> > 
> >   static inline bool irq_valid(unsigned int irq)
> >   {
> > if (irq < NR_IRQS)
> >   return true;
> > return false;
> >   }
> >
> > This could be used in many of the places that currently use NR_IRQS.
> 
> No. NR_IRQS cannot be used at all if sparse irqs are enabled. 

Ah, thanks, this is a critical piece I missed.  There *are* a few
places where we do define or use NR_IRQS when CONFIG_SPARSE_IRQ=y.  Do
these need updates?

  include/asm-generic/irq.h defines NR_IRQS if not defined yet
  arch/arm/include/asm/irq.h defines NR_IRQS to NR_IRQS_LEGACY
  arch/arm/kernel/irq.c uses NR_IRQS
  arch/sh/include/asm/irq.h defines NR_IRQS to 8
  kernel/irq/internals.h uses NR_IRQS to define IRQ_BITMAP_BITS

> Nothing in any
> generic code is supposed to rely on NR_IRQS.

I guess that means these uses are suspect:

  $ git grep -w NR_IRQS | grep -v "^arch" | grep -v "^kernel" | grep -v 
"^drivers/irqchip" | grep -v "^include"
  drivers/input/keyboard/lpc32xx-keys.c:if (irq < 0 || irq >= NR_IRQS) {
  drivers/mtd/nand/lpc32xx_mlc.c:   if ((host->irq < 0) || (host->irq >= 
NR_IRQS)) {
  drivers/net/hamradio/scc.c:static struct irqflags { unsigned char used : 1; } 
Ivec[NR_IRQS];
  drivers/pcmcia/pcmcia_resource.c: if (irq > NR_IRQS)
  drivers/tty/serial/apbuart.c: if (ser->irq < 0 || ser->irq >= NR_IRQS)
  drivers/tty/serial/ar933x_uart.c: if (ser->irq < 0 || ser->irq >= NR_IRQS)
  drivers/tty/serial/lantiq.c:  if (ser->irq < 0 || ser->irq >= NR_IRQS)
  drivers/tty/serial/m32r_sio.c:static struct irq_info irq_lists[NR_IRQS];

Bjorn


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Thomas Gleixner
On Tue, 26 Jan 2016, Bjorn Helgaas wrote:
> On Tue, Jan 26, 2016 at 09:26:29AM +0100, Thomas Gleixner wrote:
> > And why would this be x86 specific? PCI3.0 is architecture independent, 
> > right?
> 
> Yes, PCI is mostly arch-independent, but Interrupt Line is
> arch-specific, and the corner case of it being 255 is only mentioned
> in an x86-specific footnote.  We can improve the comment.

Yes please :)
 
> > The proper solution here is to flag that this device does not have an
> > interrupt connected and act accordingly in the device driver, i.e. do not 
> > call
> > request_irq() in the first place.
> 
> This is the crux of the problem.  As far as I know, PCI doesn't have
> a flag to indicate that dev->irq is a wire that's not connected, so
> there's no generic way for a driver to know whether it should call
> request_irq().

Ok.
 
> We could add one, of course, but that only helps in the drivers we
> update.  It'd be nice if we could figure out a way to fix this
> without having to touch all the drivers.

Hmm.
 
> I think any driver that uses line-based interrupts can potentially
> fail if the platform uses Interrupt Line == 255 to indicate that the
> line is not connected.  If another driver happens to be using IRQ 255,
> request_irq() may fail as it does here.  Otherwise, I suspect
> request_irq() will return success, but the driver won't get any
> interrupts.

Right. So we could certainly do something like this INVALID_IRQ thingy, but
that looks a bit weird. What would request_irq() return?

If it returns success, then drivers might make the wrong decision. If it
returns an error code, then the i801 one works, but we might have to fix
others anyway.

I think it's better to have a software flag in pci_dev to indicate that there
is no irq line and fix up the (probably few) affected drivers so they avoid
calling request_irq() and take the right action.

> > No. NR_IRQS cannot be used at all if sparse irqs are enabled. 
> 
> Ah, thanks, this is a critical piece I missed.  There *are* a few
> places where we do define or use NR_IRQS when CONFIG_SPARSE_IRQ=y.  Do
> these need updates?
> 
>   include/asm-generic/irq.h defines NR_IRQS if not defined yet
>   arch/arm/include/asm/irq.h defines NR_IRQS to NR_IRQS_LEGACY
>   arch/arm/kernel/irq.c uses NR_IRQS
>   arch/sh/include/asm/irq.h defines NR_IRQS to 8

These defines are used for preallocating interrupt descriptors in early
boot. That was invented to help migrating to sparse irq.

>   kernel/irq/internals.h uses NR_IRQS to define IRQ_BITMAP_BITS

Right, that's probably pointless, but harmless.

> > Nothing in any generic code is supposed to rely on NR_IRQS.
>  
> I guess that means these uses are suspect:
> 
>   $ git grep -w NR_IRQS | grep -v "^arch" | grep -v "^kernel" | grep -v 
> "^drivers/irqchip" | grep -v "^include"
>   drivers/input/keyboard/lpc32xx-keys.c:  if (irq < 0 || irq >= NR_IRQS) {
>   drivers/mtd/nand/lpc32xx_mlc.c: if ((host->irq < 0) || (host->irq >= 
> NR_IRQS)) {
>   drivers/net/hamradio/scc.c:static struct irqflags { unsigned char used : 1; 
> } Ivec[NR_IRQS];
>   drivers/pcmcia/pcmcia_resource.c:   if (irq > NR_IRQS)
>   drivers/tty/serial/apbuart.c:   if (ser->irq < 0 || ser->irq >= NR_IRQS)
>   drivers/tty/serial/ar933x_uart.c:   if (ser->irq < 0 || ser->irq >= NR_IRQS)
>   drivers/tty/serial/lantiq.c:if (ser->irq < 0 || ser->irq >= NR_IRQS)
>   drivers/tty/serial/m32r_sio.c:static struct irq_info irq_lists[NR_IRQS];

Indeed.

Thanks,

tglx



Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-26 Thread Bjorn Helgaas
On Tue, Jan 26, 2016 at 04:48:25PM +0100, Thomas Gleixner wrote:
> On Tue, 26 Jan 2016, Bjorn Helgaas wrote:
> > On Tue, Jan 26, 2016 at 09:26:29AM +0100, Thomas Gleixner wrote:
> > > The proper solution here is to flag that this device does not have an
> > > interrupt connected and act accordingly in the device driver, i.e. do not 
> > > call
> > > request_irq() in the first place.
> > 
> > This is the crux of the problem.  As far as I know, PCI doesn't have
> > a flag to indicate that dev->irq is a wire that's not connected, so
> > there's no generic way for a driver to know whether it should call
> > request_irq().
> 
> Ok.
>  
> > We could add one, of course, but that only helps in the drivers we
> > update.  It'd be nice if we could figure out a way to fix this
> > without having to touch all the drivers.
> 
> Hmm.
>  
> > I think any driver that uses line-based interrupts can potentially
> > fail if the platform uses Interrupt Line == 255 to indicate that the
> > line is not connected.  If another driver happens to be using IRQ 255,
> > request_irq() may fail as it does here.  Otherwise, I suspect
> > request_irq() will return success, but the driver won't get any
> > interrupts.
> 
> Right. So we could certainly do something like this INVALID_IRQ thingy, but
> that looks a bit weird. What would request_irq() return?
> 
> If it returns success, then drivers might make the wrong decision. If it
> returns an error code, then the i801 one works, but we might have to fix
> others anyway.

I was thinking request_irq() could return -EINVAL if the caller passed
INVALID_IRQ.  That should tell drivers that this interrupt won't work.

We'd be making request_irq() return -EINVAL in some cases where it
currently returns success.  But even though it returns success today,
I don't think the driver is getting interrupts, because the wire isn't
connected.

> I think it's better to have a software flag in pci_dev to indicate that there
> is no irq line and fix up the (probably few) affected drivers so they avoid
> calling request_irq() and take the right action.

We could add an "irq_valid" flag in struct pci_dev and make a new
rule that drivers should check dev->irq_valid before using dev->irq.
But realistically, i801 is the only place that will check irq_valid
because that's the only driver where we know about a problem, so that
seems like sort of a point solution.

Bjorn


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-25 Thread Guenter Roeck
On Mon, Jan 25, 2016 at 02:58:04PM -0600, Bjorn Helgaas wrote:
> [+cc Thomas]
> 
[ ... ]

> 
> I don't like the x86 ifdef.  I'd prefer:
> 
>   static inline bool irq_valid(unsigned int irq)
>   {
> if (irq < NR_IRQS)
>   return true;
> return false;
>   }
> 

Or:

static inline bool irq_valid(unsigned int irq)
{
return irq < NR_IRQS;
}

Guenter


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-25 Thread Chen Fan


On 01/26/2016 04:58 AM, Bjorn Helgaas wrote:

[+cc Thomas]

On Mon, Jan 25, 2016 at 02:59:38PM +0800, Chen Fan wrote:

In our environment, when enable Secure boot, we found an abnormal
phenomenon as following call trace shows. after investigation, we
found the firmware assigned an irq number 255 which means unknown
or no connection in PCI local spec for i801_smbus, meanwhile the
ACPI didn't configure the pci irq routing. and the 255 irq number
was assigned for megasa msix without IRQF_SHARED. then in this case
during i801_smbus probe, the i801_smbus driver would request irq with
bad irq number 255. but the 255 irq number was assigned for memgasa
with MSIX enable. which will cause request_irq fails as call trace
shows, here we use ~0U as invalid IRQ to identify the 0xff IRQ specified
by BIOS.

See the call trace:

Maybe you missed my suggestion that the timestamps aren't useful;
here's my suggestion again in more detail:

Changelogs are written once, but read dozens or hundreds of time, so
stripping out irrelevant details shows consideration for the readers.

Got it, thanks for your correction, I will remake it as you suggest.




  [   32.459195] ipmi device interface
  [   32.612907] shpchp: Standard Hot Plug PCI Controller Driver version: 0.4
  [   32.800459] ixgbe: Intel(R) 10 Gigabit PCI Express Network Driver - 
version 4.0.1-k-rh
  [   32.818319] ixgbe: Copyright (c) 1999-2014 Intel Corporation.
  [   32.844009] lpc_ich 0001:80:1f.0: I/O space for ACPI uninitialized

I think the lines above are completely irrelevant.


  [   32.850093] i801_smbus :00:1f.3: enabling device (0140 -> 0143)
  [   32.851134] i801_smbus :00:1f.3: can't derive routing for PCI INT C
  [   32.851136] i801_smbus :00:1f.3: PCI INT C: no GSI
  [   32.851164] genirq: Flags mismatch irq 255. 0080 (i801_smbus) vs. 
 (megasa

These are useful, but the timestamps ("[   32.850093]") are not.


  [   32.851168] CPU: 0 PID: 2487 Comm: kworker/0:1 Not tainted 
3.10.0-229.el7.x86_64 #1
  [   32.851170] Hardware name: FUJITSU PRIMEQUEST 2800E2/D3736, BIOS 
PRIMEQUEST 2000 Serie5

These are probably useful; it's nice to know what kernel and hardware
is involved.


  [   32.851178] Workqueue: events work_for_cpu_fn
  [   32.851208]  88086c330b00 e233a9df 88086d57bca0 
81603f36
  [   32.851227]  88086d57bcf8 8110d23a 88686fe02000 
0246
  [   32.851246]  88086a9a8c00 e233a9df a00ad220 
0080

I doubt these are useful.


  [   32.851247] Call Trace:
  [   32.851261]  [] dump_stack+0x19/0x1b
  [   32.851271]  [] __setup_irq+0x54a/0x570
  [   32.851282]  [] ? i801_check_pre.isra.5+0xe0/0xe0 
[i2c_i801]
  [   32.851289]  [] request_threaded_irq+0xcc/0x170
  [   32.851298]  [] i801_probe+0x32f/0x508 [i2c_i801]
  [   32.851308]  [] local_pci_probe+0x45/0xa0

The above might be useful, but the addresses ("[]")
are not, and you should go through them manually and strip out the
lines that are junk from the stack.  For example, I don't think
request_threaded_irq() really calls i801_check_pre().


  [   32.851315]  [] work_for_cpu_fn+0x14/0x20
  [   32.851323]  [] process_one_work+0x17b/0x470
  [   32.851330]  [] worker_thread+0x293/0x400
  [   32.851338]  [] ? rescuer_thread+0x400/0x400
  [   32.851346]  [] kthread+0xcf/0xe0
  [   32.851353]  [] ? kthread_create_on_node+0x140/0x140
  [   32.851362]  [] ret_from_fork+0x7c/0xb0
  [   32.851369]  [] ? kthread_create_on_node+0x140/0x140

The lines above are completely useless.


  [   32.851373] i801_smbus :00:1f.3: Failed to allocate irq 255: -16
  [   32.851435] i801_smbus: probe of :00:1f.3 failed with error -16
  [   33.180145] ixgbe :5a:00.0: Multiq[   33.240538] ixgbe :5a:00.0: 
(PCI Express:03:e0
  [   33.280826] ixgbe :5a:00.0: MAC: 3, PHY: 0, PBA No: 00-000

These ixgbe entries are irrelevant.


Signed-off-by: Chen Fan 
---
  arch/x86/include/asm/irq_vectors.h |  2 ++
  drivers/acpi/pci_irq.c | 11 ++-
  include/linux/interrupt.h  |  9 +
  3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/irq_vectors.h 
b/arch/x86/include/asm/irq_vectors.h
index 6ca9fd6..b616d69 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -146,4 +146,6 @@
  #define NR_IRQS   NR_IRQS_LEGACY
  #endif
  
+#define IRQ_INVALID			(~0U)

If this is a good idea (I cc'd Thomas, the IRQ maintainer, for his
thoughts), I'd like to see this in a more generic place so it isn't
x86-specific.


  #endif /* _ASM_X86_IRQ_VECTORS_H */
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index d30184c..819eb23 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -33,6 +33,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #define PREFIX "ACPI: "
  
@@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)

 * driver reported one, 

Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-25 Thread Bjorn Helgaas
[+cc Thomas]

On Mon, Jan 25, 2016 at 02:59:38PM +0800, Chen Fan wrote:
> In our environment, when enable Secure boot, we found an abnormal
> phenomenon as following call trace shows. after investigation, we
> found the firmware assigned an irq number 255 which means unknown
> or no connection in PCI local spec for i801_smbus, meanwhile the
> ACPI didn't configure the pci irq routing. and the 255 irq number
> was assigned for megasa msix without IRQF_SHARED. then in this case
> during i801_smbus probe, the i801_smbus driver would request irq with
> bad irq number 255. but the 255 irq number was assigned for memgasa
> with MSIX enable. which will cause request_irq fails as call trace
> shows, here we use ~0U as invalid IRQ to identify the 0xff IRQ specified
> by BIOS.
> 
> See the call trace:

Maybe you missed my suggestion that the timestamps aren't useful;
here's my suggestion again in more detail:

Changelogs are written once, but read dozens or hundreds of time, so
stripping out irrelevant details shows consideration for the readers.

>  [   32.459195] ipmi device interface
>  [   32.612907] shpchp: Standard Hot Plug PCI Controller Driver version: 0.4
>  [   32.800459] ixgbe: Intel(R) 10 Gigabit PCI Express Network Driver - 
> version 4.0.1-k-rh
>  [   32.818319] ixgbe: Copyright (c) 1999-2014 Intel Corporation.
>  [   32.844009] lpc_ich 0001:80:1f.0: I/O space for ACPI uninitialized

I think the lines above are completely irrelevant.

>  [   32.850093] i801_smbus :00:1f.3: enabling device (0140 -> 0143)
>  [   32.851134] i801_smbus :00:1f.3: can't derive routing for PCI INT C
>  [   32.851136] i801_smbus :00:1f.3: PCI INT C: no GSI
>  [   32.851164] genirq: Flags mismatch irq 255. 0080 (i801_smbus) vs. 
>  (megasa

These are useful, but the timestamps ("[   32.850093]") are not.

>  [   32.851168] CPU: 0 PID: 2487 Comm: kworker/0:1 Not tainted 
> 3.10.0-229.el7.x86_64 #1
>  [   32.851170] Hardware name: FUJITSU PRIMEQUEST 2800E2/D3736, BIOS 
> PRIMEQUEST 2000 Serie5

These are probably useful; it's nice to know what kernel and hardware
is involved.

>  [   32.851178] Workqueue: events work_for_cpu_fn
>  [   32.851208]  88086c330b00 e233a9df 88086d57bca0 
> 81603f36
>  [   32.851227]  88086d57bcf8 8110d23a 88686fe02000 
> 0246
>  [   32.851246]  88086a9a8c00 e233a9df a00ad220 
> 0080

I doubt these are useful.

>  [   32.851247] Call Trace:
>  [   32.851261]  [] dump_stack+0x19/0x1b
>  [   32.851271]  [] __setup_irq+0x54a/0x570
>  [   32.851282]  [] ? i801_check_pre.isra.5+0xe0/0xe0 
> [i2c_i801]
>  [   32.851289]  [] request_threaded_irq+0xcc/0x170
>  [   32.851298]  [] i801_probe+0x32f/0x508 [i2c_i801]
>  [   32.851308]  [] local_pci_probe+0x45/0xa0

The above might be useful, but the addresses ("[]")
are not, and you should go through them manually and strip out the
lines that are junk from the stack.  For example, I don't think
request_threaded_irq() really calls i801_check_pre().

>  [   32.851315]  [] work_for_cpu_fn+0x14/0x20
>  [   32.851323]  [] process_one_work+0x17b/0x470
>  [   32.851330]  [] worker_thread+0x293/0x400
>  [   32.851338]  [] ? rescuer_thread+0x400/0x400
>  [   32.851346]  [] kthread+0xcf/0xe0
>  [   32.851353]  [] ? kthread_create_on_node+0x140/0x140
>  [   32.851362]  [] ret_from_fork+0x7c/0xb0
>  [   32.851369]  [] ? kthread_create_on_node+0x140/0x140

The lines above are completely useless.

>  [   32.851373] i801_smbus :00:1f.3: Failed to allocate irq 255: -16
>  [   32.851435] i801_smbus: probe of :00:1f.3 failed with error -16

>  [   33.180145] ixgbe :5a:00.0: Multiq[   33.240538] ixgbe :5a:00.0: 
> (PCI Express:03:e0
>  [   33.280826] ixgbe :5a:00.0: MAC: 3, PHY: 0, PBA No: 00-000

These ixgbe entries are irrelevant.

> Signed-off-by: Chen Fan 
> ---
>  arch/x86/include/asm/irq_vectors.h |  2 ++
>  drivers/acpi/pci_irq.c | 11 ++-
>  include/linux/interrupt.h  |  9 +
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/irq_vectors.h 
> b/arch/x86/include/asm/irq_vectors.h
> index 6ca9fd6..b616d69 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -146,4 +146,6 @@
>  #define NR_IRQS  NR_IRQS_LEGACY
>  #endif
>  
> +#define IRQ_INVALID  (~0U)

If this is a good idea (I cc'd Thomas, the IRQ maintainer, for his
thoughts), I'd like to see this in a more generic place so it isn't
x86-specific.

>  #endif /* _ASM_X86_IRQ_VECTORS_H */
> diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
> index d30184c..819eb23 100644
> --- a/drivers/acpi/pci_irq.c
> +++ b/drivers/acpi/pci_irq.c
> @@ -33,6 +33,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define PREFIX "ACPI: "
>  
> @@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
>* driver 

Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-25 Thread Guenter Roeck
On Mon, Jan 25, 2016 at 02:58:04PM -0600, Bjorn Helgaas wrote:
> [+cc Thomas]
> 
[ ... ]

> 
> I don't like the x86 ifdef.  I'd prefer:
> 
>   static inline bool irq_valid(unsigned int irq)
>   {
> if (irq < NR_IRQS)
>   return true;
> return false;
>   }
> 

Or:

static inline bool irq_valid(unsigned int irq)
{
return irq < NR_IRQS;
}

Guenter


Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-25 Thread Bjorn Helgaas
[+cc Thomas]

On Mon, Jan 25, 2016 at 02:59:38PM +0800, Chen Fan wrote:
> In our environment, when enable Secure boot, we found an abnormal
> phenomenon as following call trace shows. after investigation, we
> found the firmware assigned an irq number 255 which means unknown
> or no connection in PCI local spec for i801_smbus, meanwhile the
> ACPI didn't configure the pci irq routing. and the 255 irq number
> was assigned for megasa msix without IRQF_SHARED. then in this case
> during i801_smbus probe, the i801_smbus driver would request irq with
> bad irq number 255. but the 255 irq number was assigned for memgasa
> with MSIX enable. which will cause request_irq fails as call trace
> shows, here we use ~0U as invalid IRQ to identify the 0xff IRQ specified
> by BIOS.
> 
> See the call trace:

Maybe you missed my suggestion that the timestamps aren't useful;
here's my suggestion again in more detail:

Changelogs are written once, but read dozens or hundreds of time, so
stripping out irrelevant details shows consideration for the readers.

>  [   32.459195] ipmi device interface
>  [   32.612907] shpchp: Standard Hot Plug PCI Controller Driver version: 0.4
>  [   32.800459] ixgbe: Intel(R) 10 Gigabit PCI Express Network Driver - 
> version 4.0.1-k-rh
>  [   32.818319] ixgbe: Copyright (c) 1999-2014 Intel Corporation.
>  [   32.844009] lpc_ich 0001:80:1f.0: I/O space for ACPI uninitialized

I think the lines above are completely irrelevant.

>  [   32.850093] i801_smbus :00:1f.3: enabling device (0140 -> 0143)
>  [   32.851134] i801_smbus :00:1f.3: can't derive routing for PCI INT C
>  [   32.851136] i801_smbus :00:1f.3: PCI INT C: no GSI
>  [   32.851164] genirq: Flags mismatch irq 255. 0080 (i801_smbus) vs. 
>  (megasa

These are useful, but the timestamps ("[   32.850093]") are not.

>  [   32.851168] CPU: 0 PID: 2487 Comm: kworker/0:1 Not tainted 
> 3.10.0-229.el7.x86_64 #1
>  [   32.851170] Hardware name: FUJITSU PRIMEQUEST 2800E2/D3736, BIOS 
> PRIMEQUEST 2000 Serie5

These are probably useful; it's nice to know what kernel and hardware
is involved.

>  [   32.851178] Workqueue: events work_for_cpu_fn
>  [   32.851208]  88086c330b00 e233a9df 88086d57bca0 
> 81603f36
>  [   32.851227]  88086d57bcf8 8110d23a 88686fe02000 
> 0246
>  [   32.851246]  88086a9a8c00 e233a9df a00ad220 
> 0080

I doubt these are useful.

>  [   32.851247] Call Trace:
>  [   32.851261]  [] dump_stack+0x19/0x1b
>  [   32.851271]  [] __setup_irq+0x54a/0x570
>  [   32.851282]  [] ? i801_check_pre.isra.5+0xe0/0xe0 
> [i2c_i801]
>  [   32.851289]  [] request_threaded_irq+0xcc/0x170
>  [   32.851298]  [] i801_probe+0x32f/0x508 [i2c_i801]
>  [   32.851308]  [] local_pci_probe+0x45/0xa0

The above might be useful, but the addresses ("[]")
are not, and you should go through them manually and strip out the
lines that are junk from the stack.  For example, I don't think
request_threaded_irq() really calls i801_check_pre().

>  [   32.851315]  [] work_for_cpu_fn+0x14/0x20
>  [   32.851323]  [] process_one_work+0x17b/0x470
>  [   32.851330]  [] worker_thread+0x293/0x400
>  [   32.851338]  [] ? rescuer_thread+0x400/0x400
>  [   32.851346]  [] kthread+0xcf/0xe0
>  [   32.851353]  [] ? kthread_create_on_node+0x140/0x140
>  [   32.851362]  [] ret_from_fork+0x7c/0xb0
>  [   32.851369]  [] ? kthread_create_on_node+0x140/0x140

The lines above are completely useless.

>  [   32.851373] i801_smbus :00:1f.3: Failed to allocate irq 255: -16
>  [   32.851435] i801_smbus: probe of :00:1f.3 failed with error -16

>  [   33.180145] ixgbe :5a:00.0: Multiq[   33.240538] ixgbe :5a:00.0: 
> (PCI Express:03:e0
>  [   33.280826] ixgbe :5a:00.0: MAC: 3, PHY: 0, PBA No: 00-000

These ixgbe entries are irrelevant.

> Signed-off-by: Chen Fan 
> ---
>  arch/x86/include/asm/irq_vectors.h |  2 ++
>  drivers/acpi/pci_irq.c | 11 ++-
>  include/linux/interrupt.h  |  9 +
>  3 files changed, 21 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/include/asm/irq_vectors.h 
> b/arch/x86/include/asm/irq_vectors.h
> index 6ca9fd6..b616d69 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -146,4 +146,6 @@
>  #define NR_IRQS  NR_IRQS_LEGACY
>  #endif
>  
> +#define IRQ_INVALID  (~0U)

If this is a good idea (I cc'd Thomas, the IRQ maintainer, for his
thoughts), I'd like to see this in a more generic place so it isn't
x86-specific.

>  #endif /* _ASM_X86_IRQ_VECTORS_H */
> diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
> index d30184c..819eb23 100644
> --- a/drivers/acpi/pci_irq.c
> +++ b/drivers/acpi/pci_irq.c
> @@ -33,6 +33,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #define PREFIX "ACPI: "
>  
> @@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev 

Re: [PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-25 Thread Chen Fan


On 01/26/2016 04:58 AM, Bjorn Helgaas wrote:

[+cc Thomas]

On Mon, Jan 25, 2016 at 02:59:38PM +0800, Chen Fan wrote:

In our environment, when enable Secure boot, we found an abnormal
phenomenon as following call trace shows. after investigation, we
found the firmware assigned an irq number 255 which means unknown
or no connection in PCI local spec for i801_smbus, meanwhile the
ACPI didn't configure the pci irq routing. and the 255 irq number
was assigned for megasa msix without IRQF_SHARED. then in this case
during i801_smbus probe, the i801_smbus driver would request irq with
bad irq number 255. but the 255 irq number was assigned for memgasa
with MSIX enable. which will cause request_irq fails as call trace
shows, here we use ~0U as invalid IRQ to identify the 0xff IRQ specified
by BIOS.

See the call trace:

Maybe you missed my suggestion that the timestamps aren't useful;
here's my suggestion again in more detail:

Changelogs are written once, but read dozens or hundreds of time, so
stripping out irrelevant details shows consideration for the readers.

Got it, thanks for your correction, I will remake it as you suggest.




  [   32.459195] ipmi device interface
  [   32.612907] shpchp: Standard Hot Plug PCI Controller Driver version: 0.4
  [   32.800459] ixgbe: Intel(R) 10 Gigabit PCI Express Network Driver - 
version 4.0.1-k-rh
  [   32.818319] ixgbe: Copyright (c) 1999-2014 Intel Corporation.
  [   32.844009] lpc_ich 0001:80:1f.0: I/O space for ACPI uninitialized

I think the lines above are completely irrelevant.


  [   32.850093] i801_smbus :00:1f.3: enabling device (0140 -> 0143)
  [   32.851134] i801_smbus :00:1f.3: can't derive routing for PCI INT C
  [   32.851136] i801_smbus :00:1f.3: PCI INT C: no GSI
  [   32.851164] genirq: Flags mismatch irq 255. 0080 (i801_smbus) vs. 
 (megasa

These are useful, but the timestamps ("[   32.850093]") are not.


  [   32.851168] CPU: 0 PID: 2487 Comm: kworker/0:1 Not tainted 
3.10.0-229.el7.x86_64 #1
  [   32.851170] Hardware name: FUJITSU PRIMEQUEST 2800E2/D3736, BIOS 
PRIMEQUEST 2000 Serie5

These are probably useful; it's nice to know what kernel and hardware
is involved.


  [   32.851178] Workqueue: events work_for_cpu_fn
  [   32.851208]  88086c330b00 e233a9df 88086d57bca0 
81603f36
  [   32.851227]  88086d57bcf8 8110d23a 88686fe02000 
0246
  [   32.851246]  88086a9a8c00 e233a9df a00ad220 
0080

I doubt these are useful.


  [   32.851247] Call Trace:
  [   32.851261]  [] dump_stack+0x19/0x1b
  [   32.851271]  [] __setup_irq+0x54a/0x570
  [   32.851282]  [] ? i801_check_pre.isra.5+0xe0/0xe0 
[i2c_i801]
  [   32.851289]  [] request_threaded_irq+0xcc/0x170
  [   32.851298]  [] i801_probe+0x32f/0x508 [i2c_i801]
  [   32.851308]  [] local_pci_probe+0x45/0xa0

The above might be useful, but the addresses ("[]")
are not, and you should go through them manually and strip out the
lines that are junk from the stack.  For example, I don't think
request_threaded_irq() really calls i801_check_pre().


  [   32.851315]  [] work_for_cpu_fn+0x14/0x20
  [   32.851323]  [] process_one_work+0x17b/0x470
  [   32.851330]  [] worker_thread+0x293/0x400
  [   32.851338]  [] ? rescuer_thread+0x400/0x400
  [   32.851346]  [] kthread+0xcf/0xe0
  [   32.851353]  [] ? kthread_create_on_node+0x140/0x140
  [   32.851362]  [] ret_from_fork+0x7c/0xb0
  [   32.851369]  [] ? kthread_create_on_node+0x140/0x140

The lines above are completely useless.


  [   32.851373] i801_smbus :00:1f.3: Failed to allocate irq 255: -16
  [   32.851435] i801_smbus: probe of :00:1f.3 failed with error -16
  [   33.180145] ixgbe :5a:00.0: Multiq[   33.240538] ixgbe :5a:00.0: 
(PCI Express:03:e0
  [   33.280826] ixgbe :5a:00.0: MAC: 3, PHY: 0, PBA No: 00-000

These ixgbe entries are irrelevant.


Signed-off-by: Chen Fan 
---
  arch/x86/include/asm/irq_vectors.h |  2 ++
  drivers/acpi/pci_irq.c | 11 ++-
  include/linux/interrupt.h  |  9 +
  3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/irq_vectors.h 
b/arch/x86/include/asm/irq_vectors.h
index 6ca9fd6..b616d69 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -146,4 +146,6 @@
  #define NR_IRQS   NR_IRQS_LEGACY
  #endif
  
+#define IRQ_INVALID			(~0U)

If this is a good idea (I cc'd Thomas, the IRQ maintainer, for his
thoughts), I'd like to see this in a more generic place so it isn't
x86-specific.


  #endif /* _ASM_X86_IRQ_VECTORS_H */
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index d30184c..819eb23 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -33,6 +33,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #define PREFIX "ACPI: "
  
@@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)

  

[PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-24 Thread Chen Fan
In our environment, when enable Secure boot, we found an abnormal
phenomenon as following call trace shows. after investigation, we
found the firmware assigned an irq number 255 which means unknown
or no connection in PCI local spec for i801_smbus, meanwhile the
ACPI didn't configure the pci irq routing. and the 255 irq number
was assigned for megasa msix without IRQF_SHARED. then in this case
during i801_smbus probe, the i801_smbus driver would request irq with
bad irq number 255. but the 255 irq number was assigned for memgasa
with MSIX enable. which will cause request_irq fails as call trace
shows, here we use ~0U as invalid IRQ to identify the 0xff IRQ specified
by BIOS.

See the call trace:

 [   32.459195] ipmi device interface
 [   32.612907] shpchp: Standard Hot Plug PCI Controller Driver version: 0.4
 [   32.800459] ixgbe: Intel(R) 10 Gigabit PCI Express Network Driver - version 
4.0.1-k-rh
 [   32.818319] ixgbe: Copyright (c) 1999-2014 Intel Corporation.
 [   32.844009] lpc_ich 0001:80:1f.0: I/O space for ACPI uninitialized
 [   32.850093] i801_smbus :00:1f.3: enabling device (0140 -> 0143)
 [   32.851134] i801_smbus :00:1f.3: can't derive routing for PCI INT C
 [   32.851136] i801_smbus :00:1f.3: PCI INT C: no GSI
 [   32.851164] genirq: Flags mismatch irq 255. 0080 (i801_smbus) vs. 
 (megasa
 [   32.851168] CPU: 0 PID: 2487 Comm: kworker/0:1 Not tainted 
3.10.0-229.el7.x86_64 #1
 [   32.851170] Hardware name: FUJITSU PRIMEQUEST 2800E2/D3736, BIOS PRIMEQUEST 
2000 Serie5
 [   32.851178] Workqueue: events work_for_cpu_fn
 [   32.851208]  88086c330b00 e233a9df 88086d57bca0 
81603f36
 [   32.851227]  88086d57bcf8 8110d23a 88686fe02000 
0246
 [   32.851246]  88086a9a8c00 e233a9df a00ad220 
0080
 [   32.851247] Call Trace:
 [   32.851261]  [] dump_stack+0x19/0x1b
 [   32.851271]  [] __setup_irq+0x54a/0x570
 [   32.851282]  [] ? i801_check_pre.isra.5+0xe0/0xe0 
[i2c_i801]
 [   32.851289]  [] request_threaded_irq+0xcc/0x170
 [   32.851298]  [] i801_probe+0x32f/0x508 [i2c_i801]
 [   32.851308]  [] local_pci_probe+0x45/0xa0
 [   32.851315]  [] work_for_cpu_fn+0x14/0x20
 [   32.851323]  [] process_one_work+0x17b/0x470
 [   32.851330]  [] worker_thread+0x293/0x400
 [   32.851338]  [] ? rescuer_thread+0x400/0x400
 [   32.851346]  [] kthread+0xcf/0xe0
 [   32.851353]  [] ? kthread_create_on_node+0x140/0x140
 [   32.851362]  [] ret_from_fork+0x7c/0xb0
 [   32.851369]  [] ? kthread_create_on_node+0x140/0x140
 [   32.851373] i801_smbus :00:1f.3: Failed to allocate irq 255: -16
 [   32.851435] i801_smbus: probe of :00:1f.3 failed with error -16
 [   33.180145] ixgbe :5a:00.0: Multiq[   33.240538] ixgbe :5a:00.0: 
(PCI Express:03:e0
 [   33.280826] ixgbe :5a:00.0: MAC: 3, PHY: 0, PBA No: 00-000

Signed-off-by: Chen Fan 
---
 arch/x86/include/asm/irq_vectors.h |  2 ++
 drivers/acpi/pci_irq.c | 11 ++-
 include/linux/interrupt.h  |  9 +
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/irq_vectors.h 
b/arch/x86/include/asm/irq_vectors.h
index 6ca9fd6..b616d69 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -146,4 +146,6 @@
 #define NR_IRQSNR_IRQS_LEGACY
 #endif
 
+#define IRQ_INVALID(~0U)
+
 #endif /* _ASM_X86_IRQ_VECTORS_H */
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index d30184c..819eb23 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define PREFIX "ACPI: "
 
@@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 * driver reported one, then use it. Exit in any case.
 */
if (gsi < 0) {
-   if (acpi_isa_register_gsi(dev))
+#ifdef CONFIG_X86
+   /*
+* The Interrupt Line value of 0xff is defined to mean "unknown"
+* or "no connection" (PCI 3.0, Section 6.2.4, footnote on page
+* 223), using ~0U as invalid IRQ.
+*/
+   dev->irq = (dev->irq == 0xff) ? IRQ_INVALID : dev->irq;
+#endif
+   if (!irq_is_valid(dev->irq) || acpi_isa_register_gsi(dev))
dev_warn(>dev, "PCI INT %c: no GSI\n",
 pin_name(pin));
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index cb30edb..2f0d46b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -198,6 +198,15 @@ extern void enable_percpu_irq(unsigned int irq, unsigned 
int type);
 extern bool irq_percpu_is_enabled(unsigned int irq);
 extern void irq_wake_thread(unsigned int irq, void *dev_id);
 
+static inline bool irq_is_valid(unsigned int irq)
+{
+#ifdef CONFIG_X86
+   if (irq == IRQ_INVALID)
+   return false;
+#endif
+   return true;

[PATCH v2] pci: fix unavailable irq number 255 reported by BIOS

2016-01-24 Thread Chen Fan
In our environment, when enable Secure boot, we found an abnormal
phenomenon as following call trace shows. after investigation, we
found the firmware assigned an irq number 255 which means unknown
or no connection in PCI local spec for i801_smbus, meanwhile the
ACPI didn't configure the pci irq routing. and the 255 irq number
was assigned for megasa msix without IRQF_SHARED. then in this case
during i801_smbus probe, the i801_smbus driver would request irq with
bad irq number 255. but the 255 irq number was assigned for memgasa
with MSIX enable. which will cause request_irq fails as call trace
shows, here we use ~0U as invalid IRQ to identify the 0xff IRQ specified
by BIOS.

See the call trace:

 [   32.459195] ipmi device interface
 [   32.612907] shpchp: Standard Hot Plug PCI Controller Driver version: 0.4
 [   32.800459] ixgbe: Intel(R) 10 Gigabit PCI Express Network Driver - version 
4.0.1-k-rh
 [   32.818319] ixgbe: Copyright (c) 1999-2014 Intel Corporation.
 [   32.844009] lpc_ich 0001:80:1f.0: I/O space for ACPI uninitialized
 [   32.850093] i801_smbus :00:1f.3: enabling device (0140 -> 0143)
 [   32.851134] i801_smbus :00:1f.3: can't derive routing for PCI INT C
 [   32.851136] i801_smbus :00:1f.3: PCI INT C: no GSI
 [   32.851164] genirq: Flags mismatch irq 255. 0080 (i801_smbus) vs. 
 (megasa
 [   32.851168] CPU: 0 PID: 2487 Comm: kworker/0:1 Not tainted 
3.10.0-229.el7.x86_64 #1
 [   32.851170] Hardware name: FUJITSU PRIMEQUEST 2800E2/D3736, BIOS PRIMEQUEST 
2000 Serie5
 [   32.851178] Workqueue: events work_for_cpu_fn
 [   32.851208]  88086c330b00 e233a9df 88086d57bca0 
81603f36
 [   32.851227]  88086d57bcf8 8110d23a 88686fe02000 
0246
 [   32.851246]  88086a9a8c00 e233a9df a00ad220 
0080
 [   32.851247] Call Trace:
 [   32.851261]  [] dump_stack+0x19/0x1b
 [   32.851271]  [] __setup_irq+0x54a/0x570
 [   32.851282]  [] ? i801_check_pre.isra.5+0xe0/0xe0 
[i2c_i801]
 [   32.851289]  [] request_threaded_irq+0xcc/0x170
 [   32.851298]  [] i801_probe+0x32f/0x508 [i2c_i801]
 [   32.851308]  [] local_pci_probe+0x45/0xa0
 [   32.851315]  [] work_for_cpu_fn+0x14/0x20
 [   32.851323]  [] process_one_work+0x17b/0x470
 [   32.851330]  [] worker_thread+0x293/0x400
 [   32.851338]  [] ? rescuer_thread+0x400/0x400
 [   32.851346]  [] kthread+0xcf/0xe0
 [   32.851353]  [] ? kthread_create_on_node+0x140/0x140
 [   32.851362]  [] ret_from_fork+0x7c/0xb0
 [   32.851369]  [] ? kthread_create_on_node+0x140/0x140
 [   32.851373] i801_smbus :00:1f.3: Failed to allocate irq 255: -16
 [   32.851435] i801_smbus: probe of :00:1f.3 failed with error -16
 [   33.180145] ixgbe :5a:00.0: Multiq[   33.240538] ixgbe :5a:00.0: 
(PCI Express:03:e0
 [   33.280826] ixgbe :5a:00.0: MAC: 3, PHY: 0, PBA No: 00-000

Signed-off-by: Chen Fan 
---
 arch/x86/include/asm/irq_vectors.h |  2 ++
 drivers/acpi/pci_irq.c | 11 ++-
 include/linux/interrupt.h  |  9 +
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/irq_vectors.h 
b/arch/x86/include/asm/irq_vectors.h
index 6ca9fd6..b616d69 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -146,4 +146,6 @@
 #define NR_IRQSNR_IRQS_LEGACY
 #endif
 
+#define IRQ_INVALID(~0U)
+
 #endif /* _ASM_X86_IRQ_VECTORS_H */
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index d30184c..819eb23 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define PREFIX "ACPI: "
 
@@ -436,7 +437,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 * driver reported one, then use it. Exit in any case.
 */
if (gsi < 0) {
-   if (acpi_isa_register_gsi(dev))
+#ifdef CONFIG_X86
+   /*
+* The Interrupt Line value of 0xff is defined to mean "unknown"
+* or "no connection" (PCI 3.0, Section 6.2.4, footnote on page
+* 223), using ~0U as invalid IRQ.
+*/
+   dev->irq = (dev->irq == 0xff) ? IRQ_INVALID : dev->irq;
+#endif
+   if (!irq_is_valid(dev->irq) || acpi_isa_register_gsi(dev))
dev_warn(>dev, "PCI INT %c: no GSI\n",
 pin_name(pin));
 
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index cb30edb..2f0d46b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -198,6 +198,15 @@ extern void enable_percpu_irq(unsigned int irq, unsigned 
int type);
 extern bool irq_percpu_is_enabled(unsigned int irq);
 extern void irq_wake_thread(unsigned int irq, void *dev_id);
 
+static inline bool irq_is_valid(unsigned int irq)
+{
+#ifdef CONFIG_X86
+   if (irq == IRQ_INVALID)
+   return false;