Re: [PATCH v2] irqchip: gicv3-its: Don't assume GICv3 hardware supports 16bit INTID

2017-06-22 Thread Marc Zyngier
On 22/06/17 18:44, Shanker Donthineni wrote:
> The current ITS driver is assuming every ITS hardware implementation
> supports minimum of 16bit INTID. But this is not true, as per GICv3
> specification, INTID field is IMPLEMENTATION DEFINED in the range of
> 14-24 bits. We might see an unpredictable system behavior on systems
> where hardware support less than 16bits and software tries to use
> 64K LPI interrupts.
> 
> On Qualcomm Datacenter Technologies QDF2400 platform, boot log shows
> confusing information about number of LPI chunks as shown below. The
> QDF2400 ITS hardware supports 24bit INTID.
> 
> This patch allocates the memory resources for PEND/PROP tables based
> on discoverable value which is specified in GITS_TYPER.IDbits. Also
> taking this opportunity to increase number of LPI/MSI(x) to 128K if
> the hardware is capable, and show log message that reflects the
> correct number of LPI chunks.
> 
> ITS@0xff7efe: allocated 524288 Devices @3c040 (indirect, esz 8, psz 
> 64K, shr 1)
> ITS@0xff7efe: allocated 8192 Interrupt Collections @3c013 (flat, esz 
> 8, psz 64K, shr 1)
> ITS@0xff7efe: allocated 8192 Virtual CPUs @3c014 (flat, esz 8, psz 
> 64K, shr 1)
> ITS: Allocated 524032 chunks for LPIs
> PCI/MSI: ITS@0xff7efe domain created
> Platform MSI: ITS@0xff7efe domain created

I seriously doubt that anyone will ever see a shortage of LPIs with 
16bit IDs ("64k interrupts should be enough for everybody"). But if you 
think otherwise, fair enough. Comments on the actual patch:

> 
> Signed-off-by: Shanker Donthineni 
> ---
> Changes since v1:
>No code changes, just rebase on tip of the Marc's branch and tested on 
> QDF2400 platform.
>
> https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/irqchip-4.13
> 
>  drivers/irqchip/irq-gic-v3-its.c | 34 --
>  1 file changed, 16 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
> b/drivers/irqchip/irq-gic-v3-its.c
> index fee7d13..6000c56 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -691,9 +691,11 @@ static void its_irq_compose_msi_msg(struct irq_data *d, 
> struct msi_msg *msg)
>   */
>  #define IRQS_PER_CHUNK_SHIFT 5
>  #define IRQS_PER_CHUNK   (1 << IRQS_PER_CHUNK_SHIFT)
> +#define ITS_MAX_LPI_NRBITS   (17) /* 128K LPIs */
>  
>  static unsigned long *lpi_bitmap;
>  static u32 lpi_chunks;
> +static u32 lpi_nrbits;
>  static DEFINE_SPINLOCK(lpi_lock);
>  
>  static int its_lpi_to_chunk(int lpi)
> @@ -789,26 +791,19 @@ static void its_lpi_free(struct event_lpi_map *map)
>  }
>  
>  /*
> - * We allocate 64kB for PROPBASE. That gives us at most 64K LPIs to
> + * We allocate memory for PROPBASE to cover 2 ^ lpi_nrbits LPIs to
>   * deal with (one configuration byte per interrupt). PENDBASE has to
>   * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
>   */
> -#define LPI_PROPBASE_SZ  SZ_64K
> -#define LPI_PENDBASE_SZ  (LPI_PROPBASE_SZ / 8 + SZ_1K)

Why don't you keep the same macros and update them to deal with a
variable? It would make the patch so much easier to review.

Something along the lines of (untested):

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 63cd0f2b8707..a1891840c66a 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -691,8 +691,10 @@ static struct irq_chip its_irq_chip = {
  */
 #define IRQS_PER_CHUNK_SHIFT   5
 #define IRQS_PER_CHUNK (1 << IRQS_PER_CHUNK_SHIFT)
+#define ITS_MAX_LPI_NRBITS (17) /* 128K LPIs */
 
 static unsigned long *lpi_bitmap;
+static u32 lpi_nrbits;
 static u32 lpi_chunks;
 static DEFINE_SPINLOCK(lpi_lock);
 
@@ -706,9 +708,9 @@ static int its_chunk_to_lpi(int chunk)
return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192;
 }
 
-static int __init its_lpi_init(u32 id_bits)
+static int __init its_lpi_init(void)
 {
-   lpi_chunks = its_lpi_to_chunk(1UL << id_bits);
+   lpi_chunks = its_lpi_to_chunk(1UL << lpi_nrbits);
 
lpi_bitmap = kzalloc(BITS_TO_LONGS(lpi_chunks) * sizeof(long),
 GFP_KERNEL);
@@ -793,13 +795,9 @@ static void its_lpi_free(struct event_lpi_map *map)
  * deal with (one configuration byte per interrupt). PENDBASE has to
  * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
  */
-#define LPI_PROPBASE_SZSZ_64K
-#define LPI_PENDBASE_SZ(LPI_PROPBASE_SZ / 8 + SZ_1K)
-
-/*
- * This is how many bits of ID we need, including the useless ones.
- */
-#define LPI_NRBITS ilog2(LPI_PROPBASE_SZ + SZ_8K)
+#define LPI_PROPBASE_SZALIGN(BIT(lpi_nrbits), SZ_64K)
+#define LPI_PENDBASE_SZALIGN(BIT(lpi_nrbits) / 8, SZ_64K)
+#define LPI_NRBITS lpi_nrbits
 
 #define LPI_PROP_DEFAULT_PRIO  0xa0
 
@@ -807,6 +805,7 @@ static int __init 

Re: [PATCH v2] irqchip: gicv3-its: Don't assume GICv3 hardware supports 16bit INTID

2017-06-22 Thread Marc Zyngier
On 22/06/17 18:44, Shanker Donthineni wrote:
> The current ITS driver is assuming every ITS hardware implementation
> supports minimum of 16bit INTID. But this is not true, as per GICv3
> specification, INTID field is IMPLEMENTATION DEFINED in the range of
> 14-24 bits. We might see an unpredictable system behavior on systems
> where hardware support less than 16bits and software tries to use
> 64K LPI interrupts.
> 
> On Qualcomm Datacenter Technologies QDF2400 platform, boot log shows
> confusing information about number of LPI chunks as shown below. The
> QDF2400 ITS hardware supports 24bit INTID.
> 
> This patch allocates the memory resources for PEND/PROP tables based
> on discoverable value which is specified in GITS_TYPER.IDbits. Also
> taking this opportunity to increase number of LPI/MSI(x) to 128K if
> the hardware is capable, and show log message that reflects the
> correct number of LPI chunks.
> 
> ITS@0xff7efe: allocated 524288 Devices @3c040 (indirect, esz 8, psz 
> 64K, shr 1)
> ITS@0xff7efe: allocated 8192 Interrupt Collections @3c013 (flat, esz 
> 8, psz 64K, shr 1)
> ITS@0xff7efe: allocated 8192 Virtual CPUs @3c014 (flat, esz 8, psz 
> 64K, shr 1)
> ITS: Allocated 524032 chunks for LPIs
> PCI/MSI: ITS@0xff7efe domain created
> Platform MSI: ITS@0xff7efe domain created

I seriously doubt that anyone will ever see a shortage of LPIs with 
16bit IDs ("64k interrupts should be enough for everybody"). But if you 
think otherwise, fair enough. Comments on the actual patch:

> 
> Signed-off-by: Shanker Donthineni 
> ---
> Changes since v1:
>No code changes, just rebase on tip of the Marc's branch and tested on 
> QDF2400 platform.
>
> https://git.kernel.org/pub/scm/linux/kernel/git/maz/arm-platforms.git/log/?h=irq/irqchip-4.13
> 
>  drivers/irqchip/irq-gic-v3-its.c | 34 --
>  1 file changed, 16 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/irqchip/irq-gic-v3-its.c 
> b/drivers/irqchip/irq-gic-v3-its.c
> index fee7d13..6000c56 100644
> --- a/drivers/irqchip/irq-gic-v3-its.c
> +++ b/drivers/irqchip/irq-gic-v3-its.c
> @@ -691,9 +691,11 @@ static void its_irq_compose_msi_msg(struct irq_data *d, 
> struct msi_msg *msg)
>   */
>  #define IRQS_PER_CHUNK_SHIFT 5
>  #define IRQS_PER_CHUNK   (1 << IRQS_PER_CHUNK_SHIFT)
> +#define ITS_MAX_LPI_NRBITS   (17) /* 128K LPIs */
>  
>  static unsigned long *lpi_bitmap;
>  static u32 lpi_chunks;
> +static u32 lpi_nrbits;
>  static DEFINE_SPINLOCK(lpi_lock);
>  
>  static int its_lpi_to_chunk(int lpi)
> @@ -789,26 +791,19 @@ static void its_lpi_free(struct event_lpi_map *map)
>  }
>  
>  /*
> - * We allocate 64kB for PROPBASE. That gives us at most 64K LPIs to
> + * We allocate memory for PROPBASE to cover 2 ^ lpi_nrbits LPIs to
>   * deal with (one configuration byte per interrupt). PENDBASE has to
>   * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
>   */
> -#define LPI_PROPBASE_SZ  SZ_64K
> -#define LPI_PENDBASE_SZ  (LPI_PROPBASE_SZ / 8 + SZ_1K)

Why don't you keep the same macros and update them to deal with a
variable? It would make the patch so much easier to review.

Something along the lines of (untested):

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 63cd0f2b8707..a1891840c66a 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -691,8 +691,10 @@ static struct irq_chip its_irq_chip = {
  */
 #define IRQS_PER_CHUNK_SHIFT   5
 #define IRQS_PER_CHUNK (1 << IRQS_PER_CHUNK_SHIFT)
+#define ITS_MAX_LPI_NRBITS (17) /* 128K LPIs */
 
 static unsigned long *lpi_bitmap;
+static u32 lpi_nrbits;
 static u32 lpi_chunks;
 static DEFINE_SPINLOCK(lpi_lock);
 
@@ -706,9 +708,9 @@ static int its_chunk_to_lpi(int chunk)
return (chunk << IRQS_PER_CHUNK_SHIFT) + 8192;
 }
 
-static int __init its_lpi_init(u32 id_bits)
+static int __init its_lpi_init(void)
 {
-   lpi_chunks = its_lpi_to_chunk(1UL << id_bits);
+   lpi_chunks = its_lpi_to_chunk(1UL << lpi_nrbits);
 
lpi_bitmap = kzalloc(BITS_TO_LONGS(lpi_chunks) * sizeof(long),
 GFP_KERNEL);
@@ -793,13 +795,9 @@ static void its_lpi_free(struct event_lpi_map *map)
  * deal with (one configuration byte per interrupt). PENDBASE has to
  * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI).
  */
-#define LPI_PROPBASE_SZSZ_64K
-#define LPI_PENDBASE_SZ(LPI_PROPBASE_SZ / 8 + SZ_1K)
-
-/*
- * This is how many bits of ID we need, including the useless ones.
- */
-#define LPI_NRBITS ilog2(LPI_PROPBASE_SZ + SZ_8K)
+#define LPI_PROPBASE_SZALIGN(BIT(lpi_nrbits), SZ_64K)
+#define LPI_PENDBASE_SZALIGN(BIT(lpi_nrbits) / 8, SZ_64K)
+#define LPI_NRBITS lpi_nrbits
 
 #define LPI_PROP_DEFAULT_PRIO  0xa0
 
@@ -807,6 +805,7 @@ static int __init its_alloc_lpi_tables(void)