On Mon, 24 Dec 2007 10:18:50 +0530
Balbir Singh <[EMAIL PROTECTED]> wrote:

> Hi,
> 
> I've just seen this on my dmesg, this is new, never seen this before on
> this box and it happens only with this version of the kernel.
> 
> In this configuration, the page size is set to 64K and I've enabled fake
> NUMA nodes on PowerPC.
> 
> tce_buildmulti_pSeriesLP: plpar_tce_put failed. rc=-4
>         index   = 0x4000002
>         npages  = 0x0
>         tce[0] val = 0x15ad0001
> Call Trace:
> [c00000000ffe74f0] [c0000000000491a4]
> .tce_buildmulti_pSeriesLP+0x26c/0x2ac (unreliable)
> [c00000000ffe75c0] [c0000000000295e4] .iommu_map_sg+0x1d4/0x418
> [c00000000ffe76d0] [c000000000028664] .dma_iommu_map_sg+0x3c/0x50
> [c00000000ffe7750] [c0000000003b6c30] .scsi_dma_map+0x70/0x94
> [c00000000ffe77d0] [c0000000003dedbc] .ipr_queuecommand+0x300/0x500
> [c00000000ffe7880] [c0000000003ae964] .scsi_dispatch_cmd+0x21c/0x2b8
> [c00000000ffe7920] [c0000000003b67a0] .scsi_request_fn+0x310/0x460
> [c00000000ffe79d0] [c00000000024ab90] .blk_run_queue+0x94/0xec
> [c00000000ffe7a70] [c0000000003b3b08] .scsi_run_queue+0x24c/0x27c
> [c00000000ffe7b20] [c0000000003b4424] .scsi_next_command+0x48/0x70
> [c00000000ffe7bc0] [c0000000003b4b48] .scsi_end_request+0xbc/0xe4
> [c00000000ffe7c60] [c0000000003b5294] .scsi_io_completion+0x170/0x3e8
> [c00000000ffe7d40] [c0000000003ae0e4] .scsi_finish_command+0xb4/0xd4
> [c00000000ffe7dd0] [c0000000003b584c] .scsi_softirq_done+0x114/0x138
> [c00000000ffe7e60] [c00000000024af70] .blk_done_softirq+0xa0/0xd0
> [c00000000ffe7ef0] [c00000000007a2a0] .__do_softirq+0xa8/0x164
> [c00000000ffe7f90] [c000000000027edc] .call_do_softirq+0x14/0x24
> [c00000003e183950] [c00000000000bdcc] .do_softirq+0x74/0xc0
> [c00000003e1839e0] [c00000000007a450] .irq_exit+0x5c/0xac
> [c00000003e183a60] [c00000000000c414] .do_IRQ+0x17c/0x1f4
> [c00000003e183b00] [c000000000004c24] hardware_interrupt_entry+0x24/0x28
> --- Exception: 501 at .ppc64_runlatch_off+0x28/0x60
>     LR = .pseries_dedicated_idle_sleep+0xd8/0x1a4
> [c00000003e183df0] [c000000000048494]
> .pseries_dedicated_idle_sleep+0x78/0x1a4 (unreliable)
> [c00000003e183e80] [c00000000001110c] .cpu_idle+0x10c/0x1e8
> [c00000003e183f00] [c00000000002b5b0] .start_secondary+0x1b4/0x1d8
> [c00000003e183f90] [c0000000000083c4] .start_secondary_prolog+0xc/0x10

I might break the IOMMU code. Can you reproduce it easily? If so,
reverting my IOMMU patches (I've attached a patch to revert them) fix
the problem?

Thanks,

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ff2a62d..59899b2 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -244,9 +244,6 @@ config IOMMU_VMERGE
 
          Most drivers don't have this problem; it is safe to say Y here.
 
-config IOMMU_HELPER
-       def_bool PPC64
-
 config HOTPLUG_CPU
        bool "Support for enabling/disabling CPUs"
        depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC)
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
index 6fcb7cb..1806d96 100644
--- a/arch/powerpc/kernel/dma_64.c
+++ b/arch/powerpc/kernel/dma_64.c
@@ -31,8 +31,8 @@ static inline unsigned long device_to_mask(struct device *dev)
 static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
                                      dma_addr_t *dma_handle, gfp_t flag)
 {
-       return iommu_alloc_coherent(dev, dev->archdata.dma_data, size,
-                                   dma_handle, device_to_mask(dev), flag,
+       return iommu_alloc_coherent(dev->archdata.dma_data, size, dma_handle,
+                                   device_to_mask(dev), flag,
                                    dev->archdata.numa_node);
 }
 
@@ -52,7 +52,7 @@ static dma_addr_t dma_iommu_map_single(struct device *dev, 
void *vaddr,
                                       size_t size,
                                       enum dma_data_direction direction)
 {
-       return iommu_map_single(dev, dev->archdata.dma_data, vaddr, size,
+       return iommu_map_single(dev->archdata.dma_data, vaddr, size,
                                device_to_mask(dev), direction);
 }
 
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 18e8860..050e9ac 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -31,7 +31,6 @@
 #include <linux/string.h>
 #include <linux/dma-mapping.h>
 #include <linux/bitops.h>
-#include <linux/iommu-helper.h>
 #include <asm/io.h>
 #include <asm/prom.h>
 #include <asm/iommu.h>
@@ -82,19 +81,17 @@ static int __init setup_iommu(char *str)
 __setup("protect4gb=", setup_protect4gb);
 __setup("iommu=", setup_iommu);
 
-static unsigned long iommu_range_alloc(struct device *dev,
-                                      struct iommu_table *tbl,
+static unsigned long iommu_range_alloc(struct iommu_table *tbl,
                                        unsigned long npages,
                                        unsigned long *handle,
                                        unsigned long mask,
                                        unsigned int align_order)
 { 
-       unsigned long n, end, start;
+       unsigned long n, end, i, start;
        unsigned long limit;
        int largealloc = npages > 15;
        int pass = 0;
        unsigned long align_mask;
-       unsigned long boundary_size;
 
        align_mask = 0xffffffffffffffffl >> (64 - align_order);
 
@@ -139,17 +136,14 @@ static unsigned long iommu_range_alloc(struct device *dev,
                        start &= mask;
        }
 
-       if (dev)
-               boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
-                                     1 << IOMMU_PAGE_SHIFT);
-       else
-               boundary_size = ALIGN(1UL << 32, 1 << IOMMU_PAGE_SHIFT);
-       /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */
+       n = find_next_zero_bit(tbl->it_map, limit, start);
+
+       /* Align allocation */
+       n = (n + align_mask) & ~align_mask;
+
+       end = n + npages;
 
-       n = iommu_area_alloc(tbl->it_map, limit, start, npages,
-                            tbl->it_offset, boundary_size >> IOMMU_PAGE_SHIFT,
-                            align_mask);
-       if (n == -1) {
+       if (unlikely(end >= limit)) {
                if (likely(pass < 2)) {
                        /* First failure, just rescan the half of the table.
                         * Second failure, rescan the other half of the table.
@@ -164,7 +158,14 @@ static unsigned long iommu_range_alloc(struct device *dev,
                }
        }
 
-       end = n + npages;
+       for (i = n; i < end; i++)
+               if (test_bit(i, tbl->it_map)) {
+                       start = i+1;
+                       goto again;
+               }
+
+       for (i = n; i < end; i++)
+               __set_bit(i, tbl->it_map);
 
        /* Bump the hint to a new block for small allocs. */
        if (largealloc) {
@@ -183,17 +184,16 @@ static unsigned long iommu_range_alloc(struct device *dev,
        return n;
 }
 
-static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
-                             void *page, unsigned int npages,
-                             enum dma_data_direction direction,
-                             unsigned long mask, unsigned int align_order)
+static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
+                      unsigned int npages, enum dma_data_direction direction,
+                      unsigned long mask, unsigned int align_order)
 {
        unsigned long entry, flags;
        dma_addr_t ret = DMA_ERROR_CODE;
 
        spin_lock_irqsave(&(tbl->it_lock), flags);
 
-       entry = iommu_range_alloc(dev, tbl, npages, NULL, mask, align_order);
+       entry = iommu_range_alloc(tbl, npages, NULL, mask, align_order);
 
        if (unlikely(entry == DMA_ERROR_CODE)) {
                spin_unlock_irqrestore(&(tbl->it_lock), flags);
@@ -224,6 +224,7 @@ static void __iommu_free(struct iommu_table *tbl, 
dma_addr_t dma_addr,
                         unsigned int npages)
 {
        unsigned long entry, free_entry;
+       unsigned long i;
 
        entry = dma_addr >> IOMMU_PAGE_SHIFT;
        free_entry = entry - tbl->it_offset;
@@ -245,7 +246,9 @@ static void __iommu_free(struct iommu_table *tbl, 
dma_addr_t dma_addr,
        }
 
        ppc_md.tce_free(tbl, entry, npages);
-       iommu_area_free(tbl->it_map, free_entry, npages);
+       
+       for (i = 0; i < npages; i++)
+               __clear_bit(free_entry+i, tbl->it_map);
 }
 
 static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
@@ -309,8 +312,7 @@ int iommu_map_sg(struct device *dev, struct scatterlist 
*sglist,
                /* Allocate iommu entries for that segment */
                vaddr = (unsigned long) sg_virt(s);
                npages = iommu_num_pages(vaddr, slen);
-               entry = iommu_range_alloc(dev, tbl, npages, &handle,
-                                         mask >> IOMMU_PAGE_SHIFT, 0);
+               entry = iommu_range_alloc(tbl, npages, &handle, mask >> 
IOMMU_PAGE_SHIFT, 0);
 
                DBG("  - vaddr: %lx, size: %lx\n", vaddr, slen);
 
@@ -448,6 +450,9 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct 
scatterlist *sglist,
 struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
 {
        unsigned long sz;
+       unsigned long start_index, end_index;
+       unsigned long entries_per_4g;
+       unsigned long index;
        static int welcomed = 0;
        struct page *page;
 
@@ -469,7 +474,6 @@ struct iommu_table *iommu_init_table(struct iommu_table 
*tbl, int nid)
 
 #ifdef CONFIG_CRASH_DUMP
        if (ppc_md.tce_get) {
-               unsigned long index;
                unsigned long tceval;
                unsigned long tcecount = 0;
 
@@ -500,6 +504,23 @@ struct iommu_table *iommu_init_table(struct iommu_table 
*tbl, int nid)
        ppc_md.tce_free(tbl, tbl->it_offset, tbl->it_size);
 #endif
 
+       /*
+        * DMA cannot cross 4 GB boundary.  Mark last entry of each 4
+        * GB chunk as reserved.
+        */
+       if (protect4gb) {
+               entries_per_4g = 0x100000000l >> IOMMU_PAGE_SHIFT;
+
+               /* Mark the last bit before a 4GB boundary as used */
+               start_index = tbl->it_offset | (entries_per_4g - 1);
+               start_index -= tbl->it_offset;
+
+               end_index = tbl->it_size;
+
+               for (index = start_index; index < end_index - 1; index += 
entries_per_4g)
+                       __set_bit(index, tbl->it_map);
+       }
+
        if (!welcomed) {
                printk(KERN_INFO "IOMMU table initialized, virtual merging 
%s\n",
                       novmerge ? "disabled" : "enabled");
@@ -547,9 +568,9 @@ void iommu_free_table(struct iommu_table *tbl, const char 
*node_name)
  * need not be page aligned, the dma_addr_t returned will point to the same
  * byte within the page as vaddr.
  */
-dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl,
-                           void *vaddr, size_t size, unsigned long mask,
-                           enum dma_data_direction direction)
+dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
+               size_t size, unsigned long mask,
+               enum dma_data_direction direction)
 {
        dma_addr_t dma_handle = DMA_ERROR_CODE;
        unsigned long uaddr;
@@ -561,7 +582,7 @@ dma_addr_t iommu_map_single(struct device *dev, struct 
iommu_table *tbl,
        npages = iommu_num_pages(uaddr, size);
 
        if (tbl) {
-               dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction,
+               dma_handle = iommu_alloc(tbl, vaddr, npages, direction,
                                         mask >> IOMMU_PAGE_SHIFT, 0);
                if (dma_handle == DMA_ERROR_CODE) {
                        if (printk_ratelimit())  {
@@ -593,9 +614,8 @@ void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t 
dma_handle,
  * Returns the virtual address of the buffer and sets dma_handle
  * to the dma address (mapping) of the first page.
  */
-void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
-                          size_t size, dma_addr_t *dma_handle,
-                          unsigned long mask, gfp_t flag, int node)
+void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
+               dma_addr_t *dma_handle, unsigned long mask, gfp_t flag, int 
node)
 {
        void *ret = NULL;
        dma_addr_t mapping;
@@ -629,7 +649,7 @@ void *iommu_alloc_coherent(struct device *dev, struct 
iommu_table *tbl,
        /* Set up tces to cover the allocated range */
        nio_pages = size >> IOMMU_PAGE_SHIFT;
        io_order = get_iommu_order(size);
-       mapping = iommu_alloc(dev, tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
+       mapping = iommu_alloc(tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
                              mask >> IOMMU_PAGE_SHIFT, io_order);
        if (mapping == DMA_ERROR_CODE) {
                free_pages((unsigned long)ret, order);
diff --git a/arch/powerpc/platforms/iseries/iommu.c 
b/arch/powerpc/platforms/iseries/iommu.c
index 11fa3c7..6a0c6f6 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -199,7 +199,7 @@ static struct iommu_table vio_iommu_table;
 
 void *iseries_hv_alloc(size_t size, dma_addr_t *dma_handle, gfp_t flag)
 {
-       return iommu_alloc_coherent(NULL, &vio_iommu_table, size, dma_handle,
+       return iommu_alloc_coherent(&vio_iommu_table, size, dma_handle,
                                DMA_32BIT_MASK, flag, -1);
 }
 EXPORT_SYMBOL_GPL(iseries_hv_alloc);
@@ -213,7 +213,7 @@ EXPORT_SYMBOL_GPL(iseries_hv_free);
 dma_addr_t iseries_hv_map(void *vaddr, size_t size,
                        enum dma_data_direction direction)
 {
-       return iommu_map_single(NULL, &vio_iommu_table, vaddr, size,
+       return iommu_map_single(&vio_iommu_table, vaddr, size,
                                DMA_32BIT_MASK, direction);
 }
 
diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h
index 852e15f..a07a67c 100644
--- a/include/asm-powerpc/iommu.h
+++ b/include/asm-powerpc/iommu.h
@@ -85,13 +85,13 @@ extern int iommu_map_sg(struct device *dev, struct 
scatterlist *sglist,
 extern void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
                           int nelems, enum dma_data_direction direction);
 
-extern void *iommu_alloc_coherent(struct device *dev, struct iommu_table *tbl,
-                                 size_t size, dma_addr_t *dma_handle,
-                                 unsigned long mask, gfp_t flag, int node);
+extern void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
+                                 dma_addr_t *dma_handle, unsigned long mask,
+                                 gfp_t flag, int node);
 extern void iommu_free_coherent(struct iommu_table *tbl, size_t size,
                                void *vaddr, dma_addr_t dma_handle);
-extern dma_addr_t iommu_map_single(struct device *dev, struct iommu_table *tbl,
-                                  void *vaddr, size_t size, unsigned long mask,
+extern dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
+                                  size_t size, unsigned long mask,
                                   enum dma_data_direction direction);
 extern void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
                               size_t size, enum dma_data_direction direction);
-
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to