[PATCH V4 5/5] irqchip/gicv3-its: Implement two-level(indirect) device table support

2016-06-05 Thread Shanker Donthineni
Since device IDs are extremely sparse, the single, a.k.a flat table is
not sufficient for the following two reasons.

1) According to ARM-GIC spec, ITS hw can access maximum of 256(pages)*
   64K(pageszie) bytes. In the best case, it supports upto DEVid=21
   sparse with minimum device table entry size 8bytes.

2) The maximum memory size that is possible without memblock depends on
   MAX_ORDER. 4MB on 4K page size kernel with default MAX_ORDER, so it
   supports DEVid range 19bits.

The two-level device table feature brings us two advantages, the first
is a very high possibility of supporting upto 32bit sparse, and the
second one is the best utilization of memory allocation.

The feature is enabled automatically during driver probe if the memory
requirement is more than 2*ITS-pages and the hardware is capable of
two-level table walk.

Signed-off-by: Shanker Donthineni 
---
Changes since v3:
  Changed level-one table pointer type from 'u64 *' to '__le64 *'
  Addressed Marc's review omments.

Changes since v2:
  Fixed a porting bug device 'id' validation check in its_alloc_device_table()

Changes since v1:
  Most of this patch has been rewritten after refactoring its_alloc_tables().
  Always enable device two-level if the memory requirement is more than 
PAGE_SIZE.
  Fixed the coding bug that breaks on the BE machine.
  Edited the commit text.

 drivers/irqchip/irq-gic-v3-its.c   | 105 +++--
 include/linux/irqchip/arm-gic-v3.h |   3 ++
 2 files changed, 92 insertions(+), 16 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 4f9e288..271c7f3 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -838,7 +838,7 @@ static void its_write_baser(struct its_node *its, struct 
its_baser *baser,
 }
 
 static int its_setup_baser(struct its_node *its, struct its_baser *baser,
-  u32 order)
+  u32 order, bool indirect)
 {
u64 val = its_read_baser(its, baser);
u64 esz = GITS_BASER_ENTRY_SIZE(val);
@@ -875,6 +875,8 @@ retry_baser:
shr  |
GITS_BASER_VALID);
 
+   val |=  indirect ? GITS_BASER_INDIRECT : 0x0;
+
switch (psz) {
case SZ_4K:
val |= GITS_BASER_PAGE_SIZE_4K;
@@ -937,28 +939,56 @@ retry_baser:
baser->base = base;
baser->psz = psz;
baser->val = val;
+   tmp = indirect ? GITS_LVL1_ENTRY_SIZE : esz;
 
-   pr_info("ITS@%pa: allocated %d %s @%lx (psz %dK, shr %d)\n",
-   >phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / esz),
+   pr_info("ITS@%pa: allocated %d %s @%lx (%s, esz %d, psz %dK, shr %d)\n",
+   >phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / tmp),
its_base_type_string[type],
(unsigned long)virt_to_phys(base),
+   indirect ? "indirect" : "flat", (int)esz,
psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
 
return 0;
 }
 
-static void its_parse_baser_device(struct its_node *its, struct its_baser 
*baser,
+static bool its_parse_baser_device(struct its_node *its, struct its_baser 
*baser,
   u32 *order)
 {
u64 esz = GITS_BASER_ENTRY_SIZE(its_read_baser(its, baser));
+   u64 val = GITS_BASER_InnerShareable | GITS_BASER_WaWb;
u32 ids = its->device_ids;
u32 new_order = *order;
+   bool indirect = false;
+
+   /* No need to enable Indirection if memory requirement < (psz*2)bytes */
+   if ((esz << ids) > (baser->psz * 2)) {
+   /*
+* Find out whether hw supports a single or two-level table by
+* table by reading bit at offset '62' after writing '1' to it.
+*/
+   its_write_baser(its, baser, val | GITS_BASER_INDIRECT);
+   val = its_read_baser(its, baser);
+   indirect = !!(val & GITS_BASER_INDIRECT);
+
+   if (indirect) {
+   /*
+* The size of the lvl2 table is equal to ITS page size
+* which is 'psz'. For computing lvl1 table size,
+* subtract ID bits that sparse lvl2 table from 'ids'
+* which is reported by ITS hardware times lvl1 table
+* entry size.
+*/
+   ids -= ilog2(baser->psz / esz);
+   esz = GITS_LVL1_ENTRY_SIZE;
+   }
+   }
 
/*
 * Allocate as many entries as required to fit the
 * range of device IDs that the ITS can grok... The ID
 * space being incredibly sparse, this results in a
-* massive waste of memory.
+* massive waste of memory if two-level device table
+* feature is not supported by hardware.
 */
 

[PATCH V4 5/5] irqchip/gicv3-its: Implement two-level(indirect) device table support

2016-06-05 Thread Shanker Donthineni
Since device IDs are extremely sparse, the single, a.k.a flat table is
not sufficient for the following two reasons.

1) According to ARM-GIC spec, ITS hw can access maximum of 256(pages)*
   64K(pageszie) bytes. In the best case, it supports upto DEVid=21
   sparse with minimum device table entry size 8bytes.

2) The maximum memory size that is possible without memblock depends on
   MAX_ORDER. 4MB on 4K page size kernel with default MAX_ORDER, so it
   supports DEVid range 19bits.

The two-level device table feature brings us two advantages, the first
is a very high possibility of supporting upto 32bit sparse, and the
second one is the best utilization of memory allocation.

The feature is enabled automatically during driver probe if the memory
requirement is more than 2*ITS-pages and the hardware is capable of
two-level table walk.

Signed-off-by: Shanker Donthineni 
---
Changes since v3:
  Changed level-one table pointer type from 'u64 *' to '__le64 *'
  Addressed Marc's review omments.

Changes since v2:
  Fixed a porting bug device 'id' validation check in its_alloc_device_table()

Changes since v1:
  Most of this patch has been rewritten after refactoring its_alloc_tables().
  Always enable device two-level if the memory requirement is more than 
PAGE_SIZE.
  Fixed the coding bug that breaks on the BE machine.
  Edited the commit text.

 drivers/irqchip/irq-gic-v3-its.c   | 105 +++--
 include/linux/irqchip/arm-gic-v3.h |   3 ++
 2 files changed, 92 insertions(+), 16 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 4f9e288..271c7f3 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -838,7 +838,7 @@ static void its_write_baser(struct its_node *its, struct 
its_baser *baser,
 }
 
 static int its_setup_baser(struct its_node *its, struct its_baser *baser,
-  u32 order)
+  u32 order, bool indirect)
 {
u64 val = its_read_baser(its, baser);
u64 esz = GITS_BASER_ENTRY_SIZE(val);
@@ -875,6 +875,8 @@ retry_baser:
shr  |
GITS_BASER_VALID);
 
+   val |=  indirect ? GITS_BASER_INDIRECT : 0x0;
+
switch (psz) {
case SZ_4K:
val |= GITS_BASER_PAGE_SIZE_4K;
@@ -937,28 +939,56 @@ retry_baser:
baser->base = base;
baser->psz = psz;
baser->val = val;
+   tmp = indirect ? GITS_LVL1_ENTRY_SIZE : esz;
 
-   pr_info("ITS@%pa: allocated %d %s @%lx (psz %dK, shr %d)\n",
-   >phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / esz),
+   pr_info("ITS@%pa: allocated %d %s @%lx (%s, esz %d, psz %dK, shr %d)\n",
+   >phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / tmp),
its_base_type_string[type],
(unsigned long)virt_to_phys(base),
+   indirect ? "indirect" : "flat", (int)esz,
psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT);
 
return 0;
 }
 
-static void its_parse_baser_device(struct its_node *its, struct its_baser 
*baser,
+static bool its_parse_baser_device(struct its_node *its, struct its_baser 
*baser,
   u32 *order)
 {
u64 esz = GITS_BASER_ENTRY_SIZE(its_read_baser(its, baser));
+   u64 val = GITS_BASER_InnerShareable | GITS_BASER_WaWb;
u32 ids = its->device_ids;
u32 new_order = *order;
+   bool indirect = false;
+
+   /* No need to enable Indirection if memory requirement < (psz*2)bytes */
+   if ((esz << ids) > (baser->psz * 2)) {
+   /*
+* Find out whether hw supports a single or two-level table by
+* table by reading bit at offset '62' after writing '1' to it.
+*/
+   its_write_baser(its, baser, val | GITS_BASER_INDIRECT);
+   val = its_read_baser(its, baser);
+   indirect = !!(val & GITS_BASER_INDIRECT);
+
+   if (indirect) {
+   /*
+* The size of the lvl2 table is equal to ITS page size
+* which is 'psz'. For computing lvl1 table size,
+* subtract ID bits that sparse lvl2 table from 'ids'
+* which is reported by ITS hardware times lvl1 table
+* entry size.
+*/
+   ids -= ilog2(baser->psz / esz);
+   esz = GITS_LVL1_ENTRY_SIZE;
+   }
+   }
 
/*
 * Allocate as many entries as required to fit the
 * range of device IDs that the ITS can grok... The ID
 * space being incredibly sparse, this results in a
-* massive waste of memory.
+* massive waste of memory if two-level device table
+* feature is not supported by hardware.
 */
new_order =