Re: [PATCH kernel v9 20/32] powerpc/powernv/ioda2: Introduce pnv_pci_create_table/pnv_pci_free_table

2015-05-01 Thread Alexey Kardashevskiy

On 04/29/2015 02:39 PM, David Gibson wrote:

On Sat, Apr 25, 2015 at 10:14:44PM +1000, Alexey Kardashevskiy wrote:

This is a part of moving TCE table allocation into an iommu_ops
callback to support multiple IOMMU groups per one VFIO container.

This moves a table creation window to the file with common powernv-pci
helpers as it does not do anything IODA2-specific.

This adds pnv_pci_free_table() helper to release the actual TCE table.

This enforces window size to be a power of two.

This should cause no behavioural change.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
Reviewed-by: David Gibson da...@gibson.dropbear.id.au
---
Changes:
v9:
* moved helpers to the common powernv pci.c file from pci-ioda.c
* moved bits from pnv_pci_create_table() to pnv_alloc_tce_table_pages()
---
  arch/powerpc/platforms/powernv/pci-ioda.c | 36 ++
  arch/powerpc/platforms/powernv/pci.c  | 61 +++
  arch/powerpc/platforms/powernv/pci.h  |  4 ++
  3 files changed, 76 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index a80be34..b9b3773 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1307,8 +1307,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev 
*dev, struct pnv_ioda_pe
if (rc)
pe_warn(pe, OPAL error %ld release DMA window\n, rc);

-   iommu_reset_table(tbl, of_node_full_name(dev-dev.of_node));
-   free_pages(addr, get_order(TCE32_TABLE_SIZE));
+   pnv_pci_free_table(tbl);
  }

  static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
@@ -2039,10 +2038,7 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
  static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
   struct pnv_ioda_pe *pe)
  {
-   struct page *tce_mem = NULL;
-   void *addr;
struct iommu_table *tbl = pe-table_group.tables[0];
-   unsigned int tce_table_size, end;
int64_t rc;

/* We shouldn't already have a 32-bit DMA associated */
@@ -2053,29 +2049,20 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,

/* The PE will reserve all possible 32-bits space */
pe-tce32_seg = 0;
-   end = (1  ilog2(phb-ioda.m32_pci_base));
-   tce_table_size = (end / 0x1000) * 8;
pe_info(pe, Setting up 32-bit TCE table at 0..%08x\n,
-   end);
+   phb-ioda.m32_pci_base);

-   /* Allocate TCE table */
-   tce_mem = alloc_pages_node(phb-hose-node, GFP_KERNEL,
-  get_order(tce_table_size));
-   if (!tce_mem) {
-   pe_err(pe, Failed to allocate a 32-bit TCE memory\n);
-   goto fail;
+   rc = pnv_pci_create_table(pe-table_group, pe-phb-hose-node,
+   0, IOMMU_PAGE_SHIFT_4K, phb-ioda.m32_pci_base, tbl);
+   if (rc) {
+   pe_err(pe, Failed to create 32-bit TCE table, err %ld, rc);
+   return;
}
-   addr = page_address(tce_mem);
-   memset(addr, 0, tce_table_size);
-
-   /* Setup iommu */
-   tbl-it_table_group = pe-table_group;
-
-   /* Setup linux iommu table */
-   pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
-   IOMMU_PAGE_SHIFT_4K);

tbl-it_ops = pnv_ioda2_iommu_ops;
+
+   /* Setup iommu */
+   tbl-it_table_group = pe-table_group;
iommu_init_table(tbl, phb-hose-node);
  #ifdef CONFIG_IOMMU_API
pe-table_group.ops = pnv_pci_ioda2_ops;
@@ -2121,8 +2108,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,
  fail:
if (pe-tce32_seg = 0)
pe-tce32_seg = -1;
-   if (tce_mem)
-   __free_pages(tce_mem, get_order(tce_table_size));
+   pnv_pci_free_table(tbl);
  }

  static void pnv_ioda_setup_dma(struct pnv_phb *phb)
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index e8802ac..6bcfad5 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -20,7 +20,9 @@
  #include linux/io.h
  #include linux/msi.h
  #include linux/iommu.h
+#include linux/memblock.h

+#include asm/mmzone.h
  #include asm/sections.h
  #include asm/io.h
  #include asm/prom.h
@@ -645,6 +647,65 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
tbl-it_type = TCE_PCI;
  }

+static __be64 *pnv_alloc_tce_table_pages(int nid, unsigned shift,
+   unsigned long *tce_table_allocated)


I'm a bit confused by the tce_table_allocated parameter.  What's the
circumstance where more memory is requested than required, and why
does it matter to the caller?


+{
+   struct page *tce_mem = NULL;
+   __be64 *addr;
+   unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT;
+   unsigned long local_allocated = 1UL  (order + PAGE_SHIFT);
+
+ 

Re: [PATCH kernel v9 20/32] powerpc/powernv/ioda2: Introduce pnv_pci_create_table/pnv_pci_free_table

2015-04-29 Thread Alexey Kardashevskiy

On 04/29/2015 02:39 PM, David Gibson wrote:

On Sat, Apr 25, 2015 at 10:14:44PM +1000, Alexey Kardashevskiy wrote:

This is a part of moving TCE table allocation into an iommu_ops
callback to support multiple IOMMU groups per one VFIO container.

This moves a table creation window to the file with common powernv-pci
helpers as it does not do anything IODA2-specific.

This adds pnv_pci_free_table() helper to release the actual TCE table.

This enforces window size to be a power of two.

This should cause no behavioural change.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
Reviewed-by: David Gibson da...@gibson.dropbear.id.au
---
Changes:
v9:
* moved helpers to the common powernv pci.c file from pci-ioda.c
* moved bits from pnv_pci_create_table() to pnv_alloc_tce_table_pages()
---
  arch/powerpc/platforms/powernv/pci-ioda.c | 36 ++
  arch/powerpc/platforms/powernv/pci.c  | 61 +++
  arch/powerpc/platforms/powernv/pci.h  |  4 ++
  3 files changed, 76 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index a80be34..b9b3773 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1307,8 +1307,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev 
*dev, struct pnv_ioda_pe
if (rc)
pe_warn(pe, OPAL error %ld release DMA window\n, rc);

-   iommu_reset_table(tbl, of_node_full_name(dev-dev.of_node));
-   free_pages(addr, get_order(TCE32_TABLE_SIZE));
+   pnv_pci_free_table(tbl);
  }

  static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
@@ -2039,10 +2038,7 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
  static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
   struct pnv_ioda_pe *pe)
  {
-   struct page *tce_mem = NULL;
-   void *addr;
struct iommu_table *tbl = pe-table_group.tables[0];
-   unsigned int tce_table_size, end;
int64_t rc;

/* We shouldn't already have a 32-bit DMA associated */
@@ -2053,29 +2049,20 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,

/* The PE will reserve all possible 32-bits space */
pe-tce32_seg = 0;
-   end = (1  ilog2(phb-ioda.m32_pci_base));
-   tce_table_size = (end / 0x1000) * 8;
pe_info(pe, Setting up 32-bit TCE table at 0..%08x\n,
-   end);
+   phb-ioda.m32_pci_base);

-   /* Allocate TCE table */
-   tce_mem = alloc_pages_node(phb-hose-node, GFP_KERNEL,
-  get_order(tce_table_size));
-   if (!tce_mem) {
-   pe_err(pe, Failed to allocate a 32-bit TCE memory\n);
-   goto fail;
+   rc = pnv_pci_create_table(pe-table_group, pe-phb-hose-node,
+   0, IOMMU_PAGE_SHIFT_4K, phb-ioda.m32_pci_base, tbl);
+   if (rc) {
+   pe_err(pe, Failed to create 32-bit TCE table, err %ld, rc);
+   return;
}
-   addr = page_address(tce_mem);
-   memset(addr, 0, tce_table_size);
-
-   /* Setup iommu */
-   tbl-it_table_group = pe-table_group;
-
-   /* Setup linux iommu table */
-   pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
-   IOMMU_PAGE_SHIFT_4K);

tbl-it_ops = pnv_ioda2_iommu_ops;
+
+   /* Setup iommu */
+   tbl-it_table_group = pe-table_group;
iommu_init_table(tbl, phb-hose-node);
  #ifdef CONFIG_IOMMU_API
pe-table_group.ops = pnv_pci_ioda2_ops;
@@ -2121,8 +2108,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,
  fail:
if (pe-tce32_seg = 0)
pe-tce32_seg = -1;
-   if (tce_mem)
-   __free_pages(tce_mem, get_order(tce_table_size));
+   pnv_pci_free_table(tbl);
  }

  static void pnv_ioda_setup_dma(struct pnv_phb *phb)
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index e8802ac..6bcfad5 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -20,7 +20,9 @@
  #include linux/io.h
  #include linux/msi.h
  #include linux/iommu.h
+#include linux/memblock.h

+#include asm/mmzone.h
  #include asm/sections.h
  #include asm/io.h
  #include asm/prom.h
@@ -645,6 +647,65 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
tbl-it_type = TCE_PCI;
  }

+static __be64 *pnv_alloc_tce_table_pages(int nid, unsigned shift,
+   unsigned long *tce_table_allocated)


I'm a bit confused by the tce_table_allocated parameter.  What's the
circumstance where more memory is requested than required, and why
does it matter to the caller?


It does not make much sense here but it does for powerpc/powernv: 
Implement multilevel TCE tables - I was trying to avoid changing same 
lines many times.


The idea is if multilevel table is requested, I do 

Re: [PATCH kernel v9 20/32] powerpc/powernv/ioda2: Introduce pnv_pci_create_table/pnv_pci_free_table

2015-04-29 Thread David Gibson
On Wed, Apr 29, 2015 at 07:12:37PM +1000, Alexey Kardashevskiy wrote:
 On 04/29/2015 02:39 PM, David Gibson wrote:
 On Sat, Apr 25, 2015 at 10:14:44PM +1000, Alexey Kardashevskiy wrote:
 This is a part of moving TCE table allocation into an iommu_ops
 callback to support multiple IOMMU groups per one VFIO container.
 
 This moves a table creation window to the file with common powernv-pci
 helpers as it does not do anything IODA2-specific.
 
 This adds pnv_pci_free_table() helper to release the actual TCE table.
 
 This enforces window size to be a power of two.
 
 This should cause no behavioural change.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 Reviewed-by: David Gibson da...@gibson.dropbear.id.au
 ---
 Changes:
 v9:
 * moved helpers to the common powernv pci.c file from pci-ioda.c
 * moved bits from pnv_pci_create_table() to pnv_alloc_tce_table_pages()
 ---
   arch/powerpc/platforms/powernv/pci-ioda.c | 36 ++
   arch/powerpc/platforms/powernv/pci.c  | 61 
  +++
   arch/powerpc/platforms/powernv/pci.h  |  4 ++
   3 files changed, 76 insertions(+), 25 deletions(-)
 
 diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
 b/arch/powerpc/platforms/powernv/pci-ioda.c
 index a80be34..b9b3773 100644
 --- a/arch/powerpc/platforms/powernv/pci-ioda.c
 +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
 @@ -1307,8 +1307,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct 
 pci_dev *dev, struct pnv_ioda_pe
 if (rc)
 pe_warn(pe, OPAL error %ld release DMA window\n, rc);
 
 -   iommu_reset_table(tbl, of_node_full_name(dev-dev.of_node));
 -   free_pages(addr, get_order(TCE32_TABLE_SIZE));
 +   pnv_pci_free_table(tbl);
   }
 
   static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 @@ -2039,10 +2038,7 @@ static struct iommu_table_group_ops 
 pnv_pci_ioda2_ops = {
   static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe)
   {
 -   struct page *tce_mem = NULL;
 -   void *addr;
 struct iommu_table *tbl = pe-table_group.tables[0];
 -   unsigned int tce_table_size, end;
 int64_t rc;
 
 /* We shouldn't already have a 32-bit DMA associated */
 @@ -2053,29 +2049,20 @@ static void pnv_pci_ioda2_setup_dma_pe(struct 
 pnv_phb *phb,
 
 /* The PE will reserve all possible 32-bits space */
 pe-tce32_seg = 0;
 -   end = (1  ilog2(phb-ioda.m32_pci_base));
 -   tce_table_size = (end / 0x1000) * 8;
 pe_info(pe, Setting up 32-bit TCE table at 0..%08x\n,
 -   end);
 +   phb-ioda.m32_pci_base);
 
 -   /* Allocate TCE table */
 -   tce_mem = alloc_pages_node(phb-hose-node, GFP_KERNEL,
 -  get_order(tce_table_size));
 -   if (!tce_mem) {
 -   pe_err(pe, Failed to allocate a 32-bit TCE memory\n);
 -   goto fail;
 +   rc = pnv_pci_create_table(pe-table_group, pe-phb-hose-node,
 +   0, IOMMU_PAGE_SHIFT_4K, phb-ioda.m32_pci_base, tbl);
 +   if (rc) {
 +   pe_err(pe, Failed to create 32-bit TCE table, err %ld, rc);
 +   return;
 }
 -   addr = page_address(tce_mem);
 -   memset(addr, 0, tce_table_size);
 -
 -   /* Setup iommu */
 -   tbl-it_table_group = pe-table_group;
 -
 -   /* Setup linux iommu table */
 -   pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
 -   IOMMU_PAGE_SHIFT_4K);
 
 tbl-it_ops = pnv_ioda2_iommu_ops;
 +
 +   /* Setup iommu */
 +   tbl-it_table_group = pe-table_group;
 iommu_init_table(tbl, phb-hose-node);
   #ifdef CONFIG_IOMMU_API
 pe-table_group.ops = pnv_pci_ioda2_ops;
 @@ -2121,8 +2108,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
 *phb,
   fail:
 if (pe-tce32_seg = 0)
 pe-tce32_seg = -1;
 -   if (tce_mem)
 -   __free_pages(tce_mem, get_order(tce_table_size));
 +   pnv_pci_free_table(tbl);
   }
 
   static void pnv_ioda_setup_dma(struct pnv_phb *phb)
 diff --git a/arch/powerpc/platforms/powernv/pci.c 
 b/arch/powerpc/platforms/powernv/pci.c
 index e8802ac..6bcfad5 100644
 --- a/arch/powerpc/platforms/powernv/pci.c
 +++ b/arch/powerpc/platforms/powernv/pci.c
 @@ -20,7 +20,9 @@
   #include linux/io.h
   #include linux/msi.h
   #include linux/iommu.h
 +#include linux/memblock.h
 
 +#include asm/mmzone.h
   #include asm/sections.h
   #include asm/io.h
   #include asm/prom.h
 @@ -645,6 +647,65 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
 tbl-it_type = TCE_PCI;
   }
 
 +static __be64 *pnv_alloc_tce_table_pages(int nid, unsigned shift,
 +   unsigned long *tce_table_allocated)
 
 I'm a bit confused by the tce_table_allocated parameter.  What's the
 circumstance where more memory is requested than required, and why
 does it matter to the caller?
 
 It does not make much sense here but it does for powerpc/powernv: Implement
 multilevel TCE tables - I was trying to avoid changing same lines many
 times.
 
 The idea is if multilevel table is requested, I 

Re: [PATCH kernel v9 20/32] powerpc/powernv/ioda2: Introduce pnv_pci_create_table/pnv_pci_free_table

2015-04-28 Thread David Gibson
On Sat, Apr 25, 2015 at 10:14:44PM +1000, Alexey Kardashevskiy wrote:
 This is a part of moving TCE table allocation into an iommu_ops
 callback to support multiple IOMMU groups per one VFIO container.
 
 This moves a table creation window to the file with common powernv-pci
 helpers as it does not do anything IODA2-specific.
 
 This adds pnv_pci_free_table() helper to release the actual TCE table.
 
 This enforces window size to be a power of two.
 
 This should cause no behavioural change.
 
 Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
 Reviewed-by: David Gibson da...@gibson.dropbear.id.au
 ---
 Changes:
 v9:
 * moved helpers to the common powernv pci.c file from pci-ioda.c
 * moved bits from pnv_pci_create_table() to pnv_alloc_tce_table_pages()
 ---
  arch/powerpc/platforms/powernv/pci-ioda.c | 36 ++
  arch/powerpc/platforms/powernv/pci.c  | 61 
 +++
  arch/powerpc/platforms/powernv/pci.h  |  4 ++
  3 files changed, 76 insertions(+), 25 deletions(-)
 
 diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
 b/arch/powerpc/platforms/powernv/pci-ioda.c
 index a80be34..b9b3773 100644
 --- a/arch/powerpc/platforms/powernv/pci-ioda.c
 +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
 @@ -1307,8 +1307,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev 
 *dev, struct pnv_ioda_pe
   if (rc)
   pe_warn(pe, OPAL error %ld release DMA window\n, rc);
  
 - iommu_reset_table(tbl, of_node_full_name(dev-dev.of_node));
 - free_pages(addr, get_order(TCE32_TABLE_SIZE));
 + pnv_pci_free_table(tbl);
  }
  
  static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
 @@ -2039,10 +2038,7 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops 
 = {
  static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
  struct pnv_ioda_pe *pe)
  {
 - struct page *tce_mem = NULL;
 - void *addr;
   struct iommu_table *tbl = pe-table_group.tables[0];
 - unsigned int tce_table_size, end;
   int64_t rc;
  
   /* We shouldn't already have a 32-bit DMA associated */
 @@ -2053,29 +2049,20 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
 *phb,
  
   /* The PE will reserve all possible 32-bits space */
   pe-tce32_seg = 0;
 - end = (1  ilog2(phb-ioda.m32_pci_base));
 - tce_table_size = (end / 0x1000) * 8;
   pe_info(pe, Setting up 32-bit TCE table at 0..%08x\n,
 - end);
 + phb-ioda.m32_pci_base);
  
 - /* Allocate TCE table */
 - tce_mem = alloc_pages_node(phb-hose-node, GFP_KERNEL,
 -get_order(tce_table_size));
 - if (!tce_mem) {
 - pe_err(pe, Failed to allocate a 32-bit TCE memory\n);
 - goto fail;
 + rc = pnv_pci_create_table(pe-table_group, pe-phb-hose-node,
 + 0, IOMMU_PAGE_SHIFT_4K, phb-ioda.m32_pci_base, tbl);
 + if (rc) {
 + pe_err(pe, Failed to create 32-bit TCE table, err %ld, rc);
 + return;
   }
 - addr = page_address(tce_mem);
 - memset(addr, 0, tce_table_size);
 -
 - /* Setup iommu */
 - tbl-it_table_group = pe-table_group;
 -
 - /* Setup linux iommu table */
 - pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
 - IOMMU_PAGE_SHIFT_4K);
  
   tbl-it_ops = pnv_ioda2_iommu_ops;
 +
 + /* Setup iommu */
 + tbl-it_table_group = pe-table_group;
   iommu_init_table(tbl, phb-hose-node);
  #ifdef CONFIG_IOMMU_API
   pe-table_group.ops = pnv_pci_ioda2_ops;
 @@ -2121,8 +2108,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
 *phb,
  fail:
   if (pe-tce32_seg = 0)
   pe-tce32_seg = -1;
 - if (tce_mem)
 - __free_pages(tce_mem, get_order(tce_table_size));
 + pnv_pci_free_table(tbl);
  }
  
  static void pnv_ioda_setup_dma(struct pnv_phb *phb)
 diff --git a/arch/powerpc/platforms/powernv/pci.c 
 b/arch/powerpc/platforms/powernv/pci.c
 index e8802ac..6bcfad5 100644
 --- a/arch/powerpc/platforms/powernv/pci.c
 +++ b/arch/powerpc/platforms/powernv/pci.c
 @@ -20,7 +20,9 @@
  #include linux/io.h
  #include linux/msi.h
  #include linux/iommu.h
 +#include linux/memblock.h
  
 +#include asm/mmzone.h
  #include asm/sections.h
  #include asm/io.h
  #include asm/prom.h
 @@ -645,6 +647,65 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
   tbl-it_type = TCE_PCI;
  }
  
 +static __be64 *pnv_alloc_tce_table_pages(int nid, unsigned shift,
 + unsigned long *tce_table_allocated)

I'm a bit confused by the tce_table_allocated parameter.  What's the
circumstance where more memory is requested than required, and why
does it matter to the caller?

 +{
 + struct page *tce_mem = NULL;
 + __be64 *addr;
 + unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT;
 + unsigned long local_allocated = 1UL  (order + PAGE_SHIFT);
 +
 + tce_mem = alloc_pages_node(nid, 

[PATCH kernel v9 20/32] powerpc/powernv/ioda2: Introduce pnv_pci_create_table/pnv_pci_free_table

2015-04-25 Thread Alexey Kardashevskiy
This is a part of moving TCE table allocation into an iommu_ops
callback to support multiple IOMMU groups per one VFIO container.

This moves a table creation window to the file with common powernv-pci
helpers as it does not do anything IODA2-specific.

This adds pnv_pci_free_table() helper to release the actual TCE table.

This enforces window size to be a power of two.

This should cause no behavioural change.

Signed-off-by: Alexey Kardashevskiy a...@ozlabs.ru
Reviewed-by: David Gibson da...@gibson.dropbear.id.au
---
Changes:
v9:
* moved helpers to the common powernv pci.c file from pci-ioda.c
* moved bits from pnv_pci_create_table() to pnv_alloc_tce_table_pages()
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 36 ++
 arch/powerpc/platforms/powernv/pci.c  | 61 +++
 arch/powerpc/platforms/powernv/pci.h  |  4 ++
 3 files changed, 76 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index a80be34..b9b3773 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1307,8 +1307,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev 
*dev, struct pnv_ioda_pe
if (rc)
pe_warn(pe, OPAL error %ld release DMA window\n, rc);
 
-   iommu_reset_table(tbl, of_node_full_name(dev-dev.of_node));
-   free_pages(addr, get_order(TCE32_TABLE_SIZE));
+   pnv_pci_free_table(tbl);
 }
 
 static void pnv_ioda_release_vf_PE(struct pci_dev *pdev, u16 num_vfs)
@@ -2039,10 +2038,7 @@ static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
 static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
   struct pnv_ioda_pe *pe)
 {
-   struct page *tce_mem = NULL;
-   void *addr;
struct iommu_table *tbl = pe-table_group.tables[0];
-   unsigned int tce_table_size, end;
int64_t rc;
 
/* We shouldn't already have a 32-bit DMA associated */
@@ -2053,29 +2049,20 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,
 
/* The PE will reserve all possible 32-bits space */
pe-tce32_seg = 0;
-   end = (1  ilog2(phb-ioda.m32_pci_base));
-   tce_table_size = (end / 0x1000) * 8;
pe_info(pe, Setting up 32-bit TCE table at 0..%08x\n,
-   end);
+   phb-ioda.m32_pci_base);
 
-   /* Allocate TCE table */
-   tce_mem = alloc_pages_node(phb-hose-node, GFP_KERNEL,
-  get_order(tce_table_size));
-   if (!tce_mem) {
-   pe_err(pe, Failed to allocate a 32-bit TCE memory\n);
-   goto fail;
+   rc = pnv_pci_create_table(pe-table_group, pe-phb-hose-node,
+   0, IOMMU_PAGE_SHIFT_4K, phb-ioda.m32_pci_base, tbl);
+   if (rc) {
+   pe_err(pe, Failed to create 32-bit TCE table, err %ld, rc);
+   return;
}
-   addr = page_address(tce_mem);
-   memset(addr, 0, tce_table_size);
-
-   /* Setup iommu */
-   tbl-it_table_group = pe-table_group;
-
-   /* Setup linux iommu table */
-   pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, 0,
-   IOMMU_PAGE_SHIFT_4K);
 
tbl-it_ops = pnv_ioda2_iommu_ops;
+
+   /* Setup iommu */
+   tbl-it_table_group = pe-table_group;
iommu_init_table(tbl, phb-hose-node);
 #ifdef CONFIG_IOMMU_API
pe-table_group.ops = pnv_pci_ioda2_ops;
@@ -2121,8 +2108,7 @@ static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb 
*phb,
 fail:
if (pe-tce32_seg = 0)
pe-tce32_seg = -1;
-   if (tce_mem)
-   __free_pages(tce_mem, get_order(tce_table_size));
+   pnv_pci_free_table(tbl);
 }
 
 static void pnv_ioda_setup_dma(struct pnv_phb *phb)
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index e8802ac..6bcfad5 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -20,7 +20,9 @@
 #include linux/io.h
 #include linux/msi.h
 #include linux/iommu.h
+#include linux/memblock.h
 
+#include asm/mmzone.h
 #include asm/sections.h
 #include asm/io.h
 #include asm/prom.h
@@ -645,6 +647,65 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
tbl-it_type = TCE_PCI;
 }
 
+static __be64 *pnv_alloc_tce_table_pages(int nid, unsigned shift,
+   unsigned long *tce_table_allocated)
+{
+   struct page *tce_mem = NULL;
+   __be64 *addr;
+   unsigned order = max_t(unsigned, shift, PAGE_SHIFT) - PAGE_SHIFT;
+   unsigned long local_allocated = 1UL  (order + PAGE_SHIFT);
+
+   tce_mem = alloc_pages_node(nid, GFP_KERNEL, order);
+   if (!tce_mem) {
+   pr_err(Failed to allocate a TCE memory, order=%d\n, order);
+   return NULL;
+   }
+   addr = page_address(tce_mem);
+   memset(addr, 0, local_allocated);
+