Re: [RFC PATCH 7/7 v2] ppc: add dynamic dma window support

2010-12-09 Thread Nishanth Aravamudan
On 26.10.2010 [20:35:17 -0700], Nishanth Aravamudan wrote:
> If firmware allows us to map all of a partition's memory for DMA on a
> particular bridge, create a 1:1 mapping of that memory. Add hooks for
> dealing with hotplug events. Dyanmic DMA windows can use larger than the
> default page size, and we use the largest one possible.
> 
> Not-yet-signed-off-by: Nishanth Aravamudan 
> 
> ---
> 
> I've tested this briefly on a machine with suitable firmware/hardware.
> Things seem to work well, but I want to do more exhaustive I/O testing
> before asking for upstream merging. I would really appreciate any
> feedback on the updated approach.
> 
> Specific questions:
> 
> Ben, did I hook into the dma_set_mask() platform callback as you
> expected? Anything I can do better or which perhaps might lead to
> gotchas later?
> 
> I've added a disable_ddw option, but perhaps it would be better to
> just disable the feature if iommu=force?

So for the final version, I probably should document this option in
kernel-parameters.txt w/ the patch, right?



> +static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
> + unsigned long num_pfn, const void *arg)
> +{
> + const struct dynamic_dma_window_prop *maprange = arg;
> + int rc;
> + u64 tce_size, num_tce, dma_offset, next;
> + u32 tce_shift;
> + long limit;
> +
> + tce_shift = be32_to_cpu(maprange->tce_shift);
> + tce_size = 1ULL << tce_shift;
> + next = start_pfn << PAGE_SHIFT;
> + num_tce = num_pfn << PAGE_SHIFT;
> +
> + /* round back to the beginning of the tce page size */
> + num_tce += next & (tce_size - 1);
> + next &= ~(tce_size - 1);
> +
> + /* covert to number of tces */
> + num_tce |= tce_size - 1;
> + num_tce >>= tce_shift;
> +
> + do {
> + /*
> +  * Set up the page with TCE data, looping through and setting
> +  * the values.
> +  */
> + limit = min_t(long, num_tce, 512);
> + dma_offset = next + be64_to_cpu(maprange->dma_base);
> +
> + rc = plpar_tce_stuff(be64_to_cpu(maprange->liobn),
> + (u64)dma_offset,
> +  0, limit);
> + num_tce -= limit;
> + } while (num_tce > 0 && !rc);
> +
> + return rc;
> +}

There is a bit of a typo here, the liobn is a 32-bit value. I've fixed
this is up locally and will update it when I send out the final version
of this patch.

I'm finding that on dlpar remove of adapters running in slots supporting
64-bit DMA, that the plpar_tce_stuff is failing. Can you think of a
reason why? It looks basically the same as the put_indirect below...

> +static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
> + unsigned long num_pfn, const void *arg)
> +{
> + const struct dynamic_dma_window_prop *maprange = arg;
> + u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn;
> + u32 tce_shift;
> + u64 rc = 0;
> + long l, limit;
> +
> + local_irq_disable();/* to protect tcep and the page behind it */
> + tcep = __get_cpu_var(tce_page);
> +
> + if (!tcep) {
> + tcep = (u64 *)__get_free_page(GFP_ATOMIC);
> + if (!tcep) {
> + local_irq_enable();
> + return -ENOMEM;
> + }
> + __get_cpu_var(tce_page) = tcep;
> + }
> +
> + proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
> +
> + liobn = (u64)be32_to_cpu(maprange->liobn);
> + tce_shift = be32_to_cpu(maprange->tce_shift);
> + tce_size = 1ULL << tce_shift;
> + next = start_pfn << PAGE_SHIFT;
> + num_tce = num_pfn << PAGE_SHIFT;
> +
> + /* round back to the beginning of the tce page size */
> + num_tce += next & (tce_size - 1);
> + next &= ~(tce_size - 1);
> +
> + /* covert to number of tces */
> + num_tce |= tce_size - 1;
> + num_tce >>= tce_shift;
> +
> + /* We can map max one pageful of TCEs at a time */
> + do {
> + /*
> +  * Set up the page with TCE data, looping through and setting
> +  * the values.
> +  */
> + limit = min_t(long, num_tce, 4096/TCE_ENTRY_SIZE);
> + dma_offset = next + be64_to_cpu(maprange->dma_base);
> +
> + for (l = 0; l < limit; l++) {
> + tcep[l] = proto_tce | next;
> + next += tce_size;
> + }
> +
> + rc = plpar_tce_put_indirect(liobn,
> + (u64)dma_offset,
> + (u64)virt_to_abs(tcep),
> + limit);
> +
> + num_tce -= limit;
> + } while (num_tce > 0 && !rc);
> +printk("plpar_tce_put_indirect for offset 0x%llx and tcep[0] 
> 0x%llx returned %llu\n",
> +  

Re: [RFC PATCH 7/7 v2] ppc: add dynamic dma window support

2010-12-09 Thread Nishanth Aravamudan
On 09.12.2010 [15:17:06 +1100], Benjamin Herrenschmidt wrote:
> On Tue, 2010-10-26 at 20:35 -0700, Nishanth Aravamudan wrote:
> 
> No much comments... I'm amazed how complex he firmware folks managed to
> make this ... 
> 
> >  static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned 
> > long action, void *node)
> >  {
> > int err = NOTIFY_OK;
> > struct device_node *np = node;
> > struct pci_dn *pci = PCI_DN(np);
> > +   struct direct_window *window;
> >  
> > switch (action) {
> > case PSERIES_RECONFIG_REMOVE:
> > if (pci && pci->iommu_table)
> > iommu_free_table(pci->iommu_table, np->full_name);
> > +
> > +   spin_lock(&direct_window_list_lock);
> > +   list_for_each_entry(window, &direct_window_list, list) {
> > +   if (window->device == np) {
> > +   list_del(&window->list);
> > +   break;
> > +   }
> > +   }
> > +   spin_unlock(&direct_window_list_lock);
> 
> Should you also kfree the window ?

Yeah, looks like I should. I have a few other questions due to testing,
but I'll reply to my original e-mail with those.

Thanks for the review!
Nish

-- 
Nishanth Aravamudan 
IBM Linux Technology Center
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Re: [RFC PATCH 7/7 v2] ppc: add dynamic dma window support

2010-12-08 Thread Benjamin Herrenschmidt
On Tue, 2010-10-26 at 20:35 -0700, Nishanth Aravamudan wrote:

No much comments... I'm amazed how complex he firmware folks managed to
make this ... 

>  static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long 
> action, void *node)
>  {
>   int err = NOTIFY_OK;
>   struct device_node *np = node;
>   struct pci_dn *pci = PCI_DN(np);
> + struct direct_window *window;
>  
>   switch (action) {
>   case PSERIES_RECONFIG_REMOVE:
>   if (pci && pci->iommu_table)
>   iommu_free_table(pci->iommu_table, np->full_name);
> +
> + spin_lock(&direct_window_list_lock);
> + list_for_each_entry(window, &direct_window_list, list) {
> + if (window->device == np) {
> + list_del(&window->list);
> + break;
> + }
> + }
> + spin_unlock(&direct_window_list_lock);

Should you also kfree the window ?


Cheers,
Ben.


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


[RFC PATCH 7/7 v2] ppc: add dynamic dma window support

2010-10-26 Thread Nishanth Aravamudan
If firmware allows us to map all of a partition's memory for DMA on a
particular bridge, create a 1:1 mapping of that memory. Add hooks for
dealing with hotplug events. Dyanmic DMA windows can use larger than the
default page size, and we use the largest one possible.

Not-yet-signed-off-by: Nishanth Aravamudan 

---

I've tested this briefly on a machine with suitable firmware/hardware.
Things seem to work well, but I want to do more exhaustive I/O testing
before asking for upstream merging. I would really appreciate any
feedback on the updated approach.

Specific questions:

Ben, did I hook into the dma_set_mask() platform callback as you
expected? Anything I can do better or which perhaps might lead to
gotchas later?

I've added a disable_ddw option, but perhaps it would be better to
just disable the feature if iommu=force?

---
 arch/powerpc/platforms/pseries/iommu.c |  577 +++-
 1 files changed, 575 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/iommu.c 
b/arch/powerpc/platforms/pseries/iommu.c
index 45c6865..8090b6b 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -33,6 +33,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -45,6 +46,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "plpar_wrappers.h"
 
@@ -270,6 +272,139 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table 
*tbl, long tcenum)
return tce_ret;
 }
 
+/* this is compatable with cells for the device tree property */
+struct dynamic_dma_window_prop {
+   __be32  liobn;  /* tce table number */
+   __be64  dma_base;   /* address hi,lo */
+   __be32  tce_shift;  /* ilog2(tce_page_size) */
+   __be32  window_shift;   /* ilog2(tce_window_size) */
+};
+
+struct direct_window {
+   struct device_node *device;
+   const struct dynamic_dma_window_prop *prop;
+   struct list_head list;
+};
+static LIST_HEAD(direct_window_list);
+/* prevents races between memory on/offline and window creation */
+static DEFINE_SPINLOCK(direct_window_list_lock);
+/* protects initializing window twice for same device */
+static DEFINE_MUTEX(direct_window_init_mutex);
+#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
+
+static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
+   unsigned long num_pfn, const void *arg)
+{
+   const struct dynamic_dma_window_prop *maprange = arg;
+   int rc;
+   u64 tce_size, num_tce, dma_offset, next;
+   u32 tce_shift;
+   long limit;
+
+   tce_shift = be32_to_cpu(maprange->tce_shift);
+   tce_size = 1ULL << tce_shift;
+   next = start_pfn << PAGE_SHIFT;
+   num_tce = num_pfn << PAGE_SHIFT;
+
+   /* round back to the beginning of the tce page size */
+   num_tce += next & (tce_size - 1);
+   next &= ~(tce_size - 1);
+
+   /* covert to number of tces */
+   num_tce |= tce_size - 1;
+   num_tce >>= tce_shift;
+
+   do {
+   /*
+* Set up the page with TCE data, looping through and setting
+* the values.
+*/
+   limit = min_t(long, num_tce, 512);
+   dma_offset = next + be64_to_cpu(maprange->dma_base);
+
+   rc = plpar_tce_stuff(be64_to_cpu(maprange->liobn),
+   (u64)dma_offset,
+0, limit);
+   num_tce -= limit;
+   } while (num_tce > 0 && !rc);
+
+   return rc;
+}
+
+static int tce_setrange_multi_pSeriesLP(unsigned long start_pfn,
+   unsigned long num_pfn, const void *arg)
+{
+   const struct dynamic_dma_window_prop *maprange = arg;
+   u64 *tcep, tce_size, num_tce, dma_offset, next, proto_tce, liobn;
+   u32 tce_shift;
+   u64 rc = 0;
+   long l, limit;
+
+   local_irq_disable();/* to protect tcep and the page behind it */
+   tcep = __get_cpu_var(tce_page);
+
+   if (!tcep) {
+   tcep = (u64 *)__get_free_page(GFP_ATOMIC);
+   if (!tcep) {
+   local_irq_enable();
+   return -ENOMEM;
+   }
+   __get_cpu_var(tce_page) = tcep;
+   }
+
+   proto_tce = TCE_PCI_READ | TCE_PCI_WRITE;
+
+   liobn = (u64)be32_to_cpu(maprange->liobn);
+   tce_shift = be32_to_cpu(maprange->tce_shift);
+   tce_size = 1ULL << tce_shift;
+   next = start_pfn << PAGE_SHIFT;
+   num_tce = num_pfn << PAGE_SHIFT;
+
+   /* round back to the beginning of the tce page size */
+   num_tce += next & (tce_size - 1);
+   next &= ~(tce_size - 1);
+
+   /* covert to number of tces */
+   num_tce |= tce_size - 1;
+   num_tce >>= tce_shift;
+
+   /* We can map max one pageful of TCEs at a time */
+   do {
+   /*
+*