[PATCH 14/15] x86/numa: remove redundant iteration over memblock.reserved

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

numa_clear_kernel_node_hotplug() function first traverses numa_meminfo
regions to set node ID in memblock.reserved and than traverses
memblock.reserved to update reserved_nodemask to include node IDs that were
set in the first loop.

Remove redundant traversal over memblock.reserved and update
reserved_nodemask while iterating over numa_meminfo.

Signed-off-by: Mike Rapoport 
---
 arch/x86/mm/numa.c | 26 ++
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 8ee952038c80..4078abd33938 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -498,31 +498,25 @@ static void __init numa_clear_kernel_node_hotplug(void)
 * and use those ranges to set the nid in memblock.reserved.
 * This will split up the memblock regions along node
 * boundaries and will set the node IDs as well.
+*
+* The nid will also be set in reserved_nodemask which is later
+* used to clear MEMBLOCK_HOTPLUG flag.
+*
+* [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
+*   numa_meminfo might not include all memblock.reserved
+*   memory ranges, because quirks such as trim_snb_memory()
+*   reserve specific pages for Sandy Bridge graphics.
+*   These ranges will remain with nid == MAX_NUMNODES. ]
 */
for (i = 0; i < numa_meminfo.nr_blks; i++) {
struct numa_memblk *mb = numa_meminfo.blk + i;
int ret;
 
ret = memblock_set_node(mb->start, mb->end - mb->start, 
, mb->nid);
+   node_set(mb->nid, reserved_nodemask);
WARN_ON_ONCE(ret);
}
 
-   /*
-* Now go over all reserved memblock regions, to construct a
-* node mask of all kernel reserved memory areas.
-*
-* [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
-*   numa_meminfo might not include all memblock.reserved
-*   memory ranges, because quirks such as trim_snb_memory()
-*   reserve specific pages for Sandy Bridge graphics. ]
-*/
-   for_each_memblock(reserved, mb_region) {
-   int nid = memblock_get_region_node(mb_region);
-
-   if (nid != MAX_NUMNODES)
-   node_set(nid, reserved_nodemask);
-   }
-
/*
 * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
 * belonging to the reserved node mask.
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 13/15] arch, drivers: replace for_each_membock() with for_each_mem_range()

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

There are several occurrences of the following pattern:

for_each_memblock(memory, reg) {
start = __pfn_to_phys(memblock_region_memory_base_pfn(reg);
end = __pfn_to_phys(memblock_region_memory_end_pfn(reg));

/* do something with start and end */
}

Using for_each_mem_range() iterator is more appropriate in such cases and
allows simpler and cleaner code.

Signed-off-by: Mike Rapoport 
---
 arch/arm/kernel/setup.c  | 18 +++
 arch/arm/mm/mmu.c| 39 
 arch/arm/mm/pmsa-v7.c| 20 ++--
 arch/arm/mm/pmsa-v8.c| 17 +--
 arch/arm/xen/mm.c|  7 +++--
 arch/arm64/mm/kasan_init.c   |  8 ++---
 arch/arm64/mm/mmu.c  | 11 ++-
 arch/c6x/kernel/setup.c  |  9 +++---
 arch/microblaze/mm/init.c|  9 +++---
 arch/mips/cavium-octeon/dma-octeon.c | 12 
 arch/mips/kernel/setup.c | 31 +--
 arch/openrisc/mm/init.c  |  8 +++--
 arch/powerpc/kernel/fadump.c | 27 +++-
 arch/powerpc/mm/book3s64/hash_utils.c| 16 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c | 11 +++
 arch/powerpc/mm/kasan/kasan_init_32.c|  8 ++---
 arch/powerpc/mm/mem.c| 16 ++
 arch/powerpc/mm/pgtable_32.c |  8 ++---
 arch/riscv/mm/init.c | 24 ++-
 arch/riscv/mm/kasan_init.c   | 10 +++---
 arch/s390/kernel/setup.c | 27 ++--
 arch/s390/mm/vmem.c  | 16 +-
 arch/sparc/mm/init_64.c  | 12 +++-
 drivers/bus/mvebu-mbus.c | 12 
 drivers/s390/char/zcore.c|  9 +++---
 25 files changed, 187 insertions(+), 198 deletions(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index d8e18cdd96d3..3f65d0ac9f63 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -843,19 +843,25 @@ early_param("mem", early_mem);
 
 static void __init request_standard_resources(const struct machine_desc *mdesc)
 {
-   struct memblock_region *region;
+   phys_addr_t start, end, res_end;
struct resource *res;
+   u64 i;
 
kernel_code.start   = virt_to_phys(_text);
kernel_code.end = virt_to_phys(__init_begin - 1);
kernel_data.start   = virt_to_phys(_sdata);
kernel_data.end = virt_to_phys(_end - 1);
 
-   for_each_memblock(memory, region) {
-   phys_addr_t start = 
__pfn_to_phys(memblock_region_memory_base_pfn(region));
-   phys_addr_t end = 
__pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+   for_each_mem_range(i, , ) {
unsigned long boot_alias_start;
 
+   /*
+* In memblock, end points to the first byte after the
+* range while in resourses, end points to the last byte in
+* the range.
+*/
+   res_end = end - 1;
+
/*
 * Some systems have a special memory alias which is only
 * used for booting.  We need to advertise this region to
@@ -869,7 +875,7 @@ static void __init request_standard_resources(const struct 
machine_desc *mdesc)
  __func__, sizeof(*res));
res->name = "System RAM (boot alias)";
res->start = boot_alias_start;
-   res->end = phys_to_idmap(end);
+   res->end = phys_to_idmap(res_end);
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(_resource, res);
}
@@ -880,7 +886,7 @@ static void __init request_standard_resources(const struct 
machine_desc *mdesc)
  sizeof(*res));
res->name  = "System RAM";
res->start = start;
-   res->end = end;
+   res->end = res_end;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
request_resource(_resource, res);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 628028bfbb92..a149d9cb4fdb 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1155,9 +1155,8 @@ phys_addr_t arm_lowmem_limit __initdata = 0;
 
 void __init adjust_lowmem_bounds(void)
 {
-   phys_addr_t memblock_limit = 0;
-   u64 vmalloc_limit;
-   struct memblock_region *reg;
+   phys_addr_t block_start, block_end, memblock_limit = 0;
+   u64 vmalloc_limit, i;
phys_addr_t lowmem_limit = 0;
 
/*
@@ -1173,26 +1172,18 @@ void __init adjust_lowmem_bounds(void)
 * The first usable region must be PMD aligned. Mark its start
 * as MEMBLOCK_NOMAP if it isn't
   

[PATCH 15/15] memblock: remove 'type' parameter from for_each_memblock()

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

for_each_memblock() is used exclusively to iterate over memblock.memory in
a few places that use data from memblock_region rather than the memory
ranges.

Remove type parameter from the for_each_memblock() iterator to improve
encapsulation of memblock internals from its users.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/kernel/setup.c  |  2 +-
 arch/arm64/mm/numa.c   |  2 +-
 arch/mips/netlogic/xlp/setup.c |  2 +-
 include/linux/memblock.h   | 10 +++---
 mm/memblock.c  |  4 ++--
 mm/page_alloc.c|  8 
 6 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 93b3844cf442..23da7908cbed 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -217,7 +217,7 @@ static void __init request_standard_resources(void)
if (!standard_resources)
panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
 
-   for_each_memblock(memory, region) {
+   for_each_memblock(region) {
res = _resources[i++];
if (memblock_is_nomap(region)) {
res->name  = "reserved";
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 0cbdbcc885fb..08721d2c0b79 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -350,7 +350,7 @@ static int __init numa_register_nodes(void)
struct memblock_region *mblk;
 
/* Check that valid nid is set to memblks */
-   for_each_memblock(memory, mblk) {
+   for_each_memblock(mblk) {
int mblk_nid = memblock_get_region_node(mblk);
 
if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c
index 1a0fc5b62ba4..e69d9fc468cf 100644
--- a/arch/mips/netlogic/xlp/setup.c
+++ b/arch/mips/netlogic/xlp/setup.c
@@ -70,7 +70,7 @@ static void nlm_fixup_mem(void)
const int pref_backup = 512;
struct memblock_region *mem;
 
-   for_each_memblock(memory, mem) {
+   for_each_memblock(mem) {
memblock_remove(mem->base + mem->size - pref_backup,
pref_backup);
}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index d70c2835e913..c901cb8ecf92 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -527,9 +527,13 @@ static inline unsigned long 
memblock_region_reserved_end_pfn(const struct memblo
return PFN_UP(reg->base + reg->size);
 }
 
-#define for_each_memblock(memblock_type, region)   
\
-   for (region = memblock.memblock_type.regions;   
\
-region < (memblock.memblock_type.regions + 
memblock.memblock_type.cnt);\
+/**
+ * for_each_memblock - itereate over registered memory regions
+ * @region: loop variable
+ */
+#define for_each_memblock(region)  \
+   for (region = memblock.memory.regions;  \
+region < (memblock.memory.regions + memblock.memory.cnt);  \
 region++)
 
 extern void *alloc_large_system_hash(const char *tablename,
diff --git a/mm/memblock.c b/mm/memblock.c
index 2ad5e6e47215..550bb72cf6cb 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1694,7 +1694,7 @@ static phys_addr_t __init_memblock 
__find_max_addr(phys_addr_t limit)
 * the memory memblock regions, if the @limit exceeds the total size
 * of those regions, max_addr will keep original value PHYS_ADDR_MAX
 */
-   for_each_memblock(memory, r) {
+   for_each_memblock(r) {
if (limit <= r->size) {
max_addr = r->base + limit;
break;
@@ -1864,7 +1864,7 @@ void __init_memblock memblock_trim_memory(phys_addr_t 
align)
phys_addr_t start, end, orig_start, orig_end;
struct memblock_region *r;
 
-   for_each_memblock(memory, r) {
+   for_each_memblock(r) {
orig_start = r->base;
orig_end = r->base + r->size;
start = round_up(orig_start, align);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 95af111d69d3..8a19f46dc86e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5927,7 +5927,7 @@ overlap_memmap_init(unsigned long zone, unsigned long 
*pfn)
 
if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
if (!r || *pfn >= memblock_region_memory_end_pfn(r)) {
-   for_each_memblock(memory, r) {
+   for_each_memblock(r) {
if (*pfn < memblock_region_memory_end_pfn(r))
break;
}
@@ -6528,7 +6528,7 @@ static unsigned long __init zone_absent_pages_in_node(int 
nid,
unsigned long start_pfn, end_pfn;
struct memblock_region *r;
 
- 

[PATCH 09/15] memblock: make for_each_memblock_type() iterator private

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

for_each_memblock_type() is not used outside mm/memblock.c, move it there
from include/linux/memblock.h

Signed-off-by: Mike Rapoport 
---
 include/linux/memblock.h | 5 -
 mm/memblock.c| 5 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 017fae833d4a..220b5f0dad42 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -532,11 +532,6 @@ static inline unsigned long 
memblock_region_reserved_end_pfn(const struct memblo
 region < (memblock.memblock_type.regions + 
memblock.memblock_type.cnt);\
 region++)
 
-#define for_each_memblock_type(i, memblock_type, rgn)  \
-   for (i = 0, rgn = _type->regions[0];   \
-i < memblock_type->cnt;\
-i++, rgn = _type->regions[i])
-
 extern void *alloc_large_system_hash(const char *tablename,
 unsigned long bucketsize,
 unsigned long numentries,
diff --git a/mm/memblock.c b/mm/memblock.c
index 39aceafc57f6..a5b9b3df81fc 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -129,6 +129,11 @@ struct memblock memblock __initdata_memblock = {
.current_limit  = MEMBLOCK_ALLOC_ANYWHERE,
 };
 
+#define for_each_memblock_type(i, memblock_type, rgn)  \
+   for (i = 0, rgn = _type->regions[0];   \
+i < memblock_type->cnt;\
+i++, rgn = _type->regions[i])
+
 int memblock_debug __initdata_memblock;
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 12/15] arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

There are several occurrences of the following pattern:

for_each_memblock(memory, reg) {
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);

/* do something with start_pfn and end_pfn */
}

Rather than iterate over all memblock.memory regions and each time query
for their start and end PFNs, use for_each_mem_pfn_range() iterator to get
simpler and clearer code.

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/init.c   | 11 ---
 arch/arm64/mm/init.c | 11 ---
 arch/powerpc/kernel/fadump.c | 11 ++-
 arch/powerpc/mm/mem.c| 15 ---
 arch/powerpc/mm/numa.c   |  7 ++-
 arch/s390/mm/page-states.c   |  6 ++
 arch/sh/mm/init.c|  9 +++--
 mm/memblock.c|  6 ++
 mm/sparse.c  | 10 --
 9 files changed, 35 insertions(+), 51 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 626af348eb8f..bb56668b4f54 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -304,16 +304,14 @@ free_memmap(unsigned long start_pfn, unsigned long 
end_pfn)
  */
 static void __init free_unused_memmap(void)
 {
-   unsigned long start, prev_end = 0;
-   struct memblock_region *reg;
+   unsigned long start, end, prev_end = 0;
+   int i;
 
/*
 * This relies on each bank being in address order.
 * The banks are sorted previously in bootmem_init().
 */
-   for_each_memblock(memory, reg) {
-   start = memblock_region_memory_base_pfn(reg);
-
+   for_each_mem_pfn_range(i, NUMA_NO_NODE, , , NULL) {
 #ifdef CONFIG_SPARSEMEM
/*
 * Take care not to free memmap entries that don't exist
@@ -341,8 +339,7 @@ static void __init free_unused_memmap(void)
 * memmap entries are valid from the bank end aligned to
 * MAX_ORDER_NR_PAGES.
 */
-   prev_end = ALIGN(memblock_region_memory_end_pfn(reg),
-MAX_ORDER_NR_PAGES);
+   prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
}
 
 #ifdef CONFIG_SPARSEMEM
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 1e93cfc7c47a..271a8ea32482 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -473,12 +473,10 @@ static inline void free_memmap(unsigned long start_pfn, 
unsigned long end_pfn)
  */
 static void __init free_unused_memmap(void)
 {
-   unsigned long start, prev_end = 0;
-   struct memblock_region *reg;
-
-   for_each_memblock(memory, reg) {
-   start = __phys_to_pfn(reg->base);
+   unsigned long start, end, prev_end = 0;
+   int i;
 
+   for_each_mem_pfn_range(i, NUMA_NO_NODE, , , NULL) {
 #ifdef CONFIG_SPARSEMEM
/*
 * Take care not to free memmap entries that don't exist due
@@ -498,8 +496,7 @@ static void __init free_unused_memmap(void)
 * memmap entries are valid from the bank end aligned to
 * MAX_ORDER_NR_PAGES.
 */
-   prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size),
-MAX_ORDER_NR_PAGES);
+   prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
}
 
 #ifdef CONFIG_SPARSEMEM
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 2446a61e3c25..fdbafe417139 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1216,14 +1216,15 @@ static void fadump_free_reserved_memory(unsigned long 
start_pfn,
  */
 static void fadump_release_reserved_area(u64 start, u64 end)
 {
-   u64 tstart, tend, spfn, epfn;
-   struct memblock_region *reg;
+   u64 tstart, tend, spfn, epfn, reg_spfn, reg_epfn, i;
 
spfn = PHYS_PFN(start);
epfn = PHYS_PFN(end);
-   for_each_memblock(memory, reg) {
-   tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
-   tend   = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
+
+   for_each_mem_pfn_range(i, NUMA_NO_NODE, _spfn, _epfn, NULL) {
+   tstart = max_t(u64, spfn, reg_spfn);
+   tend   = min_t(u64, epfn, reg_epfn);
+
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c2c11eb8dcfc..38d1acd7c8ef 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -192,15 +192,16 @@ void __init initmem_init(void)
 /* mark pages that don't exist as nosave */
 static int __init mark_nonram_nosave(void)
 {
-   struct memblock_region *reg, *prev = NULL;
+   unsigned long spfn, epfn, prev = 0;
+   int i;
 
-   for_each_memblock(memory, reg) {
-   if (prev &&
-   memblock_region_memory_end_pfn(prev) < 

[PATCH 08/15] mircoblaze: drop unneeded NUMA and sparsemem initializations

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

microblaze does not support neither NUMA not SPARSMEM, so there is no point
to call memblock_set_node() and sparse_memory_present_with_active_regions()
functions during microblaze memory initialization.

Remove these calls and the surrounding code.

Signed-off-by: Mike Rapoport 
---
 arch/microblaze/mm/init.c | 17 +
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 521b59ba716c..49e0c241f9b1 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -105,9 +105,8 @@ static void __init paging_init(void)
 
 void __init setup_memory(void)
 {
-   struct memblock_region *reg;
-
 #ifndef CONFIG_MMU
+   struct memblock_region *reg;
u32 kernel_align_start, kernel_align_size;
 
/* Find main memory where is the kernel */
@@ -161,20 +160,6 @@ void __init setup_memory(void)
pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);
pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn);
 
-   /* Add active regions with valid PFNs */
-   for_each_memblock(memory, reg) {
-   unsigned long start_pfn, end_pfn;
-
-   start_pfn = memblock_region_memory_base_pfn(reg);
-   end_pfn = memblock_region_memory_end_pfn(reg);
-   memblock_set_node(start_pfn << PAGE_SHIFT,
- (end_pfn - start_pfn) << PAGE_SHIFT,
- , 0);
-   }
-
-   /* XXX need to clip this if using highmem? */
-   sparse_memory_present_with_active_regions(0);
-
paging_init();
 }
 
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 11/15] memblock: reduce number of parameters in for_each_mem_range()

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

Currently for_each_mem_range() iterator is the most generic way to traverse
memblock regions. As such, it has 8 parameters and it is hardly convenient
to users. Most users choose to utilize one of its wrappers and the only
user that actually needs most of the parameters outside memblock is s390
crash dump implementation.

To avoid yet another naming for memblock iterators, rename the existing
for_each_mem_range() to __for_each_mem_range() and add a new
for_each_mem_range() wrapper with only index, start and end parameters.

The new wrapper nicely fits into init_unavailable_mem() and will be used in
upcoming changes to simplify memblock traversals.

Signed-off-by: Mike Rapoport 
---
 .clang-format  |  1 +
 arch/arm64/kernel/machine_kexec_file.c |  6 ++
 arch/s390/kernel/crash_dump.c  |  8 
 include/linux/memblock.h   | 18 ++
 mm/page_alloc.c|  3 +--
 5 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/.clang-format b/.clang-format
index a0a96088c74f..52ededab25ce 100644
--- a/.clang-format
+++ b/.clang-format
@@ -205,6 +205,7 @@ ForEachMacros:
   - 'for_each_memblock_type'
   - 'for_each_memcg_cache_index'
   - 'for_each_mem_pfn_range'
+  - '__for_each_mem_range'
   - 'for_each_mem_range'
   - 'for_each_mem_range_rev'
   - 'for_each_migratetype_order'
diff --git a/arch/arm64/kernel/machine_kexec_file.c 
b/arch/arm64/kernel/machine_kexec_file.c
index 361a1143e09e..5b0e67b93cdc 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -215,8 +215,7 @@ static int prepare_elf_headers(void **addr, unsigned long 
*sz)
phys_addr_t start, end;
 
nr_ranges = 1; /* for exclusion of crashkernel region */
-   for_each_mem_range(i, , NULL, NUMA_NO_NODE,
-   MEMBLOCK_NONE, , , NULL)
+   for_each_mem_range(i, , )
nr_ranges++;
 
cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
@@ -225,8 +224,7 @@ static int prepare_elf_headers(void **addr, unsigned long 
*sz)
 
cmem->max_nr_ranges = nr_ranges;
cmem->nr_ranges = 0;
-   for_each_mem_range(i, , NULL, NUMA_NO_NODE,
-   MEMBLOCK_NONE, , , NULL) {
+   for_each_mem_range(i, , ) {
cmem->ranges[cmem->nr_ranges].start = start;
cmem->ranges[cmem->nr_ranges].end = end - 1;
cmem->nr_ranges++;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index f96a5857bbfd..e28085c725ff 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -549,8 +549,8 @@ static int get_mem_chunk_cnt(void)
int cnt = 0;
u64 idx;
 
-   for_each_mem_range(idx, , _type, NUMA_NO_NODE,
-  MEMBLOCK_NONE, NULL, NULL, NULL)
+   __for_each_mem_range(idx, , _type, NUMA_NO_NODE,
+MEMBLOCK_NONE, NULL, NULL, NULL)
cnt++;
return cnt;
 }
@@ -563,8 +563,8 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
phys_addr_t start, end;
u64 idx;
 
-   for_each_mem_range(idx, , _type, NUMA_NO_NODE,
-  MEMBLOCK_NONE, , , NULL) {
+   __for_each_mem_range(idx, , _type, NUMA_NO_NODE,
+MEMBLOCK_NONE, , , NULL) {
phdr->p_filesz = end - start;
phdr->p_type = PT_LOAD;
phdr->p_offset = start;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e6a23b3db696..d70c2835e913 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -142,7 +142,7 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t 
*out_start,
 void __memblock_free_late(phys_addr_t base, phys_addr_t size);
 
 /**
- * for_each_mem_range - iterate through memblock areas from type_a and not
+ * __for_each_mem_range - iterate through memblock areas from type_a and not
  * included in type_b. Or just type_a if type_b is NULL.
  * @i: u64 used as loop variable
  * @type_a: ptr to memblock_type to iterate
@@ -153,7 +153,7 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t 
size);
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
  */
-#define for_each_mem_range(i, type_a, type_b, nid, flags,  \
+#define __for_each_mem_range(i, type_a, type_b, nid, flags,\
   p_start, p_end, p_nid)   \
for (i = 0, __next_mem_range(, nid, flags, type_a, type_b,\
 p_start, p_end, p_nid);\
@@ -182,6 +182,16 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t 
size);
 __next_mem_range_rev(, nid, flags, type_a, type_b,   \
  p_start, p_end, p_nid))
 

[PATCH 10/15] memblock: make memblock_debug and related functionality private

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

The only user of memblock_dbg() outside memblock was s390 setup code and it
is converted to use pr_debug() instead.
This allows to stop exposing memblock_debug and memblock_dbg() to the rest
of the kernel.

Signed-off-by: Mike Rapoport 
---
 arch/s390/kernel/setup.c |  4 ++--
 include/linux/memblock.h | 12 +---
 mm/memblock.c| 13 +++--
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 07aa15ba43b3..8b284cf6e199 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -776,8 +776,8 @@ static void __init memblock_add_mem_detect_info(void)
unsigned long start, end;
int i;
 
-   memblock_dbg("physmem info source: %s (%hhd)\n",
-get_mem_info_source(), mem_detect.info_source);
+   pr_debug("physmem info source: %s (%hhd)\n",
+get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */
memblock_set_bottom_up(true);
for_each_mem_detect_block(i, , ) {
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 220b5f0dad42..e6a23b3db696 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -90,7 +90,6 @@ struct memblock {
 };
 
 extern struct memblock memblock;
-extern int memblock_debug;
 
 #ifndef CONFIG_ARCH_KEEP_MEMBLOCK
 #define __init_memblock __meminit
@@ -102,9 +101,6 @@ void memblock_discard(void);
 static inline void memblock_discard(void) {}
 #endif
 
-#define memblock_dbg(fmt, ...) \
-   if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
   phys_addr_t size, phys_addr_t align);
 void memblock_allow_resize(void);
@@ -456,13 +452,7 @@ bool memblock_is_region_memory(phys_addr_t base, 
phys_addr_t size);
 bool memblock_is_reserved(phys_addr_t addr);
 bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
 
-extern void __memblock_dump_all(void);
-
-static inline void memblock_dump_all(void)
-{
-   if (memblock_debug)
-   __memblock_dump_all();
-}
+void memblock_dump_all(void);
 
 /**
  * memblock_set_current_limit - Set the current allocation limit to allow
diff --git a/mm/memblock.c b/mm/memblock.c
index a5b9b3df81fc..824938849f6d 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -134,7 +134,10 @@ struct memblock memblock __initdata_memblock = {
 i < memblock_type->cnt;\
 i++, rgn = _type->regions[i])
 
-int memblock_debug __initdata_memblock;
+#define memblock_dbg(fmt, ...) \
+   if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+
+static int memblock_debug __initdata_memblock;
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
@@ -1919,7 +1922,7 @@ static void __init_memblock memblock_dump(struct 
memblock_type *type)
}
 }
 
-void __init_memblock __memblock_dump_all(void)
+static void __init_memblock __memblock_dump_all(void)
 {
pr_info("MEMBLOCK configuration:\n");
pr_info(" memory size = %pa reserved size = %pa\n",
@@ -1933,6 +1936,12 @@ void __init_memblock __memblock_dump_all(void)
 #endif
 }
 
+void __init_memblock memblock_dump_all(void)
+{
+   if (memblock_debug)
+   __memblock_dump_all();
+}
+
 void __init memblock_allow_resize(void)
 {
memblock_can_resize = 1;
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 06/15] powerpc: fadamp: simplify fadump_reserve_crash_area()

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

fadump_reserve_crash_area() reserves memory from a specified base address
till the end of the RAM.

Replace iteration through the memblock.memory with a single call to
memblock_reserve() with appropriate  that will take care of proper memory
reservation.

Signed-off-by: Mike Rapoport 
---
 arch/powerpc/kernel/fadump.c | 20 +---
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 78ab9a6ee6ac..2446a61e3c25 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1658,25 +1658,7 @@ int __init fadump_reserve_mem(void)
 /* Preserve everything above the base address */
 static void __init fadump_reserve_crash_area(u64 base)
 {
-   struct memblock_region *reg;
-   u64 mstart, msize;
-
-   for_each_memblock(memory, reg) {
-   mstart = reg->base;
-   msize  = reg->size;
-
-   if ((mstart + msize) < base)
-   continue;
-
-   if (mstart < base) {
-   msize -= (base - mstart);
-   mstart = base;
-   }
-
-   pr_info("Reserving %lluMB of memory at %#016llx for preserving 
crash data",
-   (msize >> 20), mstart);
-   memblock_reserve(mstart, msize);
-   }
+   memblock_reserve(base, memblock_end_of_DRAM() - base);
 }
 
 unsigned long __init arch_reserved_kernel_pages(void)
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 07/15] riscv: drop unneeded node initialization

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

RISC-V does not (yet) support NUMA  and for UMA architectures node 0 is
used implicitly during early memory initialization.

There is no need to call memblock_set_node(), remove this call and the
surrounding code.

Signed-off-by: Mike Rapoport 
---
 arch/riscv/mm/init.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 79e9d55bdf1a..7440ba2cdaaa 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -191,15 +191,6 @@ void __init setup_bootmem(void)
early_init_fdt_scan_reserved_mem();
memblock_allow_resize();
memblock_dump_all();
-
-   for_each_memblock(memory, reg) {
-   unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
-   unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
-
-   memblock_set_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn),
- , 0);
-   }
 }
 
 #ifdef CONFIG_MMU
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 01/15] KVM: PPC: Book3S HV: simplify kvm_cma_reserve()

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

The memory size calculation in kvm_cma_reserve() traverses memblock.memory
rather than simply call memblock_phys_mem_size(). The comment in that
function suggests that at some point there should have been call to
memblock_analyze() before memblock_phys_mem_size() could be used.
As of now, there is no memblock_analyze() at all and
memblock_phys_mem_size() can be used as soon as cold-plug memory is
registerd with memblock.

Replace loop over memblock.memory with a call to memblock_phys_mem_size().

Signed-off-by: Mike Rapoport 
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 11 ++-
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7cd3cf3d366b..56ab0d28de2a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -95,22 +95,15 @@ EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
 void __init kvm_cma_reserve(void)
 {
unsigned long align_size;
-   struct memblock_region *reg;
-   phys_addr_t selected_size = 0;
+   phys_addr_t selected_size;
 
/*
 * We need CMA reservation only when we are in HV mode
 */
if (!cpu_has_feature(CPU_FTR_HVMODE))
return;
-   /*
-* We cannot use memblock_phys_mem_size() here, because
-* memblock_analyze() has not been called yet.
-*/
-   for_each_memblock(memory, reg)
-   selected_size += memblock_region_memory_end_pfn(reg) -
-memblock_region_memory_base_pfn(reg);
 
+   selected_size = PHYS_PFN(memblock_phys_mem_size());
selected_size = (selected_size * kvm_cma_resv_ratio / 100) << 
PAGE_SHIFT;
if (selected_size) {
pr_debug("%s: reserving %ld MiB for global area\n", __func__,
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 05/15] h8300, nds32, openrisc: simplify detection of memory extents

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

Instead of traversing memblock.memory regions to find memory_start and
memory_end, simply query memblock_{start,end}_of_DRAM().

Signed-off-by: Mike Rapoport 
---
 arch/h8300/kernel/setup.c| 8 +++-
 arch/nds32/kernel/setup.c| 8 ++--
 arch/openrisc/kernel/setup.c | 9 ++---
 3 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index 28ac88358a89..0281f92eea3d 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -74,17 +74,15 @@ static void __init bootmem_init(void)
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end)
panic("No memory!");
 
/* setup bootmem globals (we use no_bootmem, but mm still depends on 
this) */
min_low_pfn = PFN_UP(memory_start);
-   max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+   max_low_pfn = PFN_DOWN(memory_end);
max_pfn = max_low_pfn;
 
memblock_reserve(__pa(_stext), _end - _stext);
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index a066efbe53c0..c356e484dcab 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -249,12 +249,8 @@ static void __init setup_memory(void)
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   pr_info("%s: Memory: 0x%x-0x%x\n", __func__,
-   memory_start, memory_end);
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end) {
panic("No memory!");
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 8aa438e1f51f..c5706153d3b6 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -48,17 +48,12 @@ static void __init setup_memory(void)
unsigned long ram_start_pfn;
unsigned long ram_end_pfn;
phys_addr_t memory_start, memory_end;
-   struct memblock_region *region;
 
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel, we assume its the only one */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__,
-  memory_start, memory_end);
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end) {
panic("No memory!");
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 02/15] dma-contiguous: simplify cma_early_percent_memory()

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

The memory size calculation in cma_early_percent_memory() traverses
memblock.memory rather than simply call memblock_phys_mem_size(). The
comment in that function suggests that at some point there should have been
call to memblock_analyze() before memblock_phys_mem_size() could be used.
As of now, there is no memblock_analyze() at all and
memblock_phys_mem_size() can be used as soon as cold-plug memory is
registerd with memblock.

Replace loop over memblock.memory with a call to memblock_phys_mem_size().

Signed-off-by: Mike Rapoport 
---
 kernel/dma/contiguous.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 15bc5026c485..1992afd8ca7b 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -73,16 +73,7 @@ early_param("cma", early_cma);
 
 static phys_addr_t __init __maybe_unused cma_early_percent_memory(void)
 {
-   struct memblock_region *reg;
-   unsigned long total_pages = 0;
-
-   /*
-* We cannot use memblock_phys_mem_size() here, because
-* memblock_analyze() has not been called yet.
-*/
-   for_each_memblock(memory, reg)
-   total_pages += memblock_region_memory_end_pfn(reg) -
-  memblock_region_memory_base_pfn(reg);
+   unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size());
 
return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
 }
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 03/15] arm, xtensa: simplify initialization of high memory pages

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

The function free_highpages() in both arm and xtensa essentially open-code
for_each_free_mem_range() loop to detect high memory pages that were not
reserved and that should be initialized and passed to the buddy allocator.

Replace open-coded implementation of for_each_free_mem_range() with usage
of memblock API to simplify the code.

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/init.c| 48 +++--
 arch/xtensa/mm/init.c | 55 ---
 2 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 01e18e43b174..626af348eb8f 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -352,61 +352,29 @@ static void __init free_unused_memmap(void)
 #endif
 }
 
-#ifdef CONFIG_HIGHMEM
-static inline void free_area_high(unsigned long pfn, unsigned long end)
-{
-   for (; pfn < end; pfn++)
-   free_highmem_page(pfn_to_page(pfn));
-}
-#endif
-
 static void __init free_highpages(void)
 {
 #ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn;
-   struct memblock_region *mem, *res;
+   phys_addr_t range_start, range_end;
+   u64 i;
 
/* set highmem page free */
-   for_each_memblock(memory, mem) {
-   unsigned long start = memblock_region_memory_base_pfn(mem);
-   unsigned long end = memblock_region_memory_end_pfn(mem);
+   for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+   _start, _end, NULL) {
+   unsigned long start = PHYS_PFN(range_start);
+   unsigned long end = PHYS_PFN(range_end);
 
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
 
-   if (memblock_is_nomap(mem))
-   continue;
-
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
 
-   /* Find and exclude any reserved regions */
-   for_each_memblock(reserved, res) {
-   unsigned long res_start, res_end;
-
-   res_start = memblock_region_reserved_base_pfn(res);
-   res_end = memblock_region_reserved_end_pfn(res);
-
-   if (res_end < start)
-   continue;
-   if (res_start < start)
-   res_start = start;
-   if (res_start > end)
-   res_start = end;
-   if (res_end > end)
-   res_end = end;
-   if (res_start != start)
-   free_area_high(start, res_start);
-   start = res_end;
-   if (start == end)
-   break;
-   }
-
-   /* And now free anything which remains */
-   if (start < end)
-   free_area_high(start, end);
+   for (; start < end; start++)
+   free_highmem_page(pfn_to_page(start));
}
 #endif
 }
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index a05b306cf371..ad9d59d93f39 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -79,67 +79,32 @@ void __init zones_init(void)
free_area_init(max_zone_pfn);
 }
 
-#ifdef CONFIG_HIGHMEM
-static void __init free_area_high(unsigned long pfn, unsigned long end)
-{
-   for (; pfn < end; pfn++)
-   free_highmem_page(pfn_to_page(pfn));
-}
-
 static void __init free_highpages(void)
 {
+#ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn;
-   struct memblock_region *mem, *res;
+   phys_addr_t range_start, range_end;
+   u64 i;
 
-   reset_all_zones_managed_pages();
/* set highmem page free */
-   for_each_memblock(memory, mem) {
-   unsigned long start = memblock_region_memory_base_pfn(mem);
-   unsigned long end = memblock_region_memory_end_pfn(mem);
+   for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+   _start, _end, NULL) {
+   unsigned long start = PHYS_PFN(range_start);
+   unsigned long end = PHYS_PFN(range_end);
 
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
 
-   if (memblock_is_nomap(mem))
-   continue;
-
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
 
-   /* Find and exclude any reserved regions */
-   for_each_memblock(reserved, res) {
-   unsigned long res_start, res_end;
-
-   res_start = memblock_region_reserved_base_pfn(res);
-  

[PATCH 04/15] arm64: numa: simplify dummy_numa_init()

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

dummy_numa_init() loops over memblock.memory and passes nid=0 to
numa_add_memblk() which essentially wraps memblock_set_node(). However,
memblock_set_node() can cope with entire memory span itself, so the loop
over memblock.memory regions is redundant.

Replace the loop with a single call to memblock_set_node() to the entire
memory.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/mm/numa.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index aafcee3e3f7e..0cbdbcc885fb 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -423,19 +423,16 @@ static int __init numa_init(int (*init_func)(void))
  */
 static int __init dummy_numa_init(void)
 {
+   phys_addr_t start = memblock_start_of_DRAM();
+   phys_addr_t end = memblock_end_of_DRAM();
int ret;
-   struct memblock_region *mblk;
 
if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */
-   pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n",
-   memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
-
-   for_each_memblock(memory, mblk) {
-   ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
-   if (!ret)
-   continue;
+   pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1);
 
+   ret = numa_add_memblk(0, start, end);
+   if (ret) {
pr_err("NUMA init failed\n");
return ret;
}
-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH 00/15] memblock: seasonal cleaning^w cleanup

2020-07-27 Thread Mike Rapoport
From: Mike Rapoport 

Hi,

These patches simplify several uses of memblock iterators and hide some of
the memblock implementation details from the rest of the system.

The patches are on top of v5.8-rc7 + cherry-pick of "mm/sparse: cleanup the
code surrounding memory_present()" [1] from mmotm tree.

[1] http://lkml.kernel.org/r/20200712083130.22919-1-r...@kernel.org 

Mike Rapoport (15):
  KVM: PPC: Book3S HV: simplify kvm_cma_reserve()
  dma-contiguous: simplify cma_early_percent_memory()
  arm, xtensa: simplify initialization of high memory pages
  arm64: numa: simplify dummy_numa_init()
  h8300, nds32, openrisc: simplify detection of memory extents
  powerpc: fadamp: simplify fadump_reserve_crash_area()
  riscv: drop unneeded node initialization
  mircoblaze: drop unneeded NUMA and sparsemem initializations
  memblock: make for_each_memblock_type() iterator private
  memblock: make memblock_debug and related functionality private
  memblock: reduce number of parameters in for_each_mem_range()
  arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()
  arch, drivers: replace for_each_membock() with for_each_mem_range()
  x86/numa: remove redundant iteration over memblock.reserved
  memblock: remove 'type' parameter from for_each_memblock()

 .clang-format|  1 +
 arch/arm/kernel/setup.c  | 18 +---
 arch/arm/mm/init.c   | 59 +---
 arch/arm/mm/mmu.c| 39 ++--
 arch/arm/mm/pmsa-v7.c| 20 
 arch/arm/mm/pmsa-v8.c| 17 ---
 arch/arm/xen/mm.c|  7 +--
 arch/arm64/kernel/machine_kexec_file.c   |  6 +--
 arch/arm64/kernel/setup.c|  2 +-
 arch/arm64/mm/init.c | 11 ++---
 arch/arm64/mm/kasan_init.c   |  8 ++--
 arch/arm64/mm/mmu.c  | 11 ++---
 arch/arm64/mm/numa.c | 15 +++---
 arch/c6x/kernel/setup.c  |  9 ++--
 arch/h8300/kernel/setup.c|  8 ++--
 arch/microblaze/mm/init.c| 24 ++
 arch/mips/cavium-octeon/dma-octeon.c | 12 ++---
 arch/mips/kernel/setup.c | 31 ++---
 arch/mips/netlogic/xlp/setup.c   |  2 +-
 arch/nds32/kernel/setup.c|  8 +---
 arch/openrisc/kernel/setup.c |  9 +---
 arch/openrisc/mm/init.c  |  8 ++--
 arch/powerpc/kernel/fadump.c | 58 ---
 arch/powerpc/kvm/book3s_hv_builtin.c | 11 +
 arch/powerpc/mm/book3s64/hash_utils.c| 16 +++
 arch/powerpc/mm/book3s64/radix_pgtable.c | 11 ++---
 arch/powerpc/mm/kasan/kasan_init_32.c|  8 ++--
 arch/powerpc/mm/mem.c| 33 +++--
 arch/powerpc/mm/numa.c   |  7 +--
 arch/powerpc/mm/pgtable_32.c |  8 ++--
 arch/riscv/mm/init.c | 33 -
 arch/riscv/mm/kasan_init.c   | 10 ++--
 arch/s390/kernel/crash_dump.c|  8 ++--
 arch/s390/kernel/setup.c | 31 -
 arch/s390/mm/page-states.c   |  6 +--
 arch/s390/mm/vmem.c  | 16 ---
 arch/sh/mm/init.c|  9 ++--
 arch/sparc/mm/init_64.c  | 12 ++---
 arch/x86/mm/numa.c   | 26 ---
 arch/xtensa/mm/init.c| 55 --
 drivers/bus/mvebu-mbus.c | 12 ++---
 drivers/s390/char/zcore.c|  9 ++--
 include/linux/memblock.h | 45 +-
 kernel/dma/contiguous.c  | 11 +
 mm/memblock.c| 28 +++
 mm/page_alloc.c  | 11 ++---
 mm/sparse.c  | 10 ++--
 47 files changed, 324 insertions(+), 485 deletions(-)

-- 
2.26.2

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 1/4] dma-mapping: Add bounced DMA ops

2020-07-27 Thread Claire Chang
v2 that reuses SWIOTLB here: https://lore.kernel.org/patchwork/cover/1280705/

Thanks,
Claire
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC v2 5/5] of: Add plumbing for restricted DMA pool

2020-07-27 Thread Claire Chang
If a device is not behind an IOMMU, we look up the device node and set
up the restricted DMA when the restricted-dma property is presented.
One can specify two reserved-memory nodes in the device tree. One with
shared-dma-pool to handle the coherent DMA buffer allocation, and
another one with device-swiotlb-pool for regular DMA to/from system
memory, which would be subject to bouncing.

Signed-off-by: Claire Chang 
---
 drivers/of/address.c| 39 +++
 drivers/of/device.c |  3 +++
 drivers/of/of_private.h |  6 ++
 3 files changed, 48 insertions(+)

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 381dc9be7b22..1285f914481f 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1009,6 +1010,44 @@ int of_dma_get_range(struct device_node *np, u64 
*dma_addr, u64 *paddr, u64 *siz
return ret;
 }
 
+int of_dma_set_restricted_buffer(struct device *dev)
+{
+   int length, size, ret, i;
+   u32 idx[2];
+
+   if (!dev || !dev->of_node)
+   return -EINVAL;
+
+   if (!of_get_property(dev->of_node, "restricted-dma", ))
+   return 0;
+
+   size = length / sizeof(idx[0]);
+   if (size > ARRAY_SIZE(idx)) {
+   dev_err(dev,
+   "restricted-dma expected less than or equal to %d 
indexs, but got %d\n",
+   ARRAY_SIZE(idx), size);
+   return -EINVAL;
+   }
+
+   ret = of_property_read_u32_array(dev->of_node, "restricted-dma", idx,
+size);
+   if (ret)
+   return ret;
+
+   for (i = 0; i < size; i++) {
+   ret = of_reserved_mem_device_init_by_idx(dev, dev->of_node,
+idx[i]);
+   if (ret) {
+   dev_err(dev,
+   "of_reserved_mem_device_init_by_idx() failed 
with %d\n",
+   ret);
+   return ret;
+   }
+   }
+
+   return 0;
+}
+
 /**
  * of_dma_is_coherent - Check if device is coherent
  * @np:device node
diff --git a/drivers/of/device.c b/drivers/of/device.c
index 27203bfd0b22..83d6cf8a8256 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -169,6 +169,9 @@ int of_dma_configure(struct device *dev, struct device_node 
*np, bool force_dma)
 
arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent);
 
+   if (!iommu)
+   return of_dma_set_restricted_buffer(dev);
+
return 0;
 }
 EXPORT_SYMBOL_GPL(of_dma_configure);
diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h
index edc682249c00..f2e3adfb7d85 100644
--- a/drivers/of/of_private.h
+++ b/drivers/of/of_private.h
@@ -160,12 +160,18 @@ extern int of_bus_n_size_cells(struct device_node *np);
 #ifdef CONFIG_OF_ADDRESS
 extern int of_dma_get_range(struct device_node *np, u64 *dma_addr,
u64 *paddr, u64 *size);
+extern int of_dma_set_restricted_buffer(struct device *dev);
 #else
 static inline int of_dma_get_range(struct device_node *np, u64 *dma_addr,
   u64 *paddr, u64 *size)
 {
return -ENODEV;
 }
+
+static inline int of_dma_get_restricted_buffer(struct device *dev)
+{
+   return -ENODEV;
+}
 #endif
 
 #endif /* _LINUX_OF_PRIVATE_H */
-- 
2.28.0.rc0.142.g3c755180ce-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC v2 4/5] dt-bindings: of: Add plumbing for restricted DMA pool

2020-07-27 Thread Claire Chang
Introduce the new compatible string, device-swiotlb-pool, for restricted
DMA. One can specify the address and length of the device swiotlb memory
region by device-swiotlb-pool in the device tree.

Signed-off-by: Claire Chang 
---
 .../reserved-memory/reserved-memory.txt   | 35 +++
 1 file changed, 35 insertions(+)

diff --git 
a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt 
b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
index 4dd20de6977f..78850896e1d0 100644
--- a/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
+++ b/Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt
@@ -51,6 +51,24 @@ compatible (optional) - standard definition
   used as a shared pool of DMA buffers for a set of devices. It can
   be used by an operating system to instantiate the necessary pool
   management subsystem if necessary.
+- device-swiotlb-pool: This indicates a region of memory meant to be
+  used as a pool of device swiotlb buffers for a given device. When
+  using this, the no-map and reusable properties must not be set, so 
the
+  operating system can create a virtual mapping that will be used for
+  synchronization. Also, there must be a restricted-dma property in the
+  device node to specify the indexes of reserved-memory nodes. One can
+  specify two reserved-memory nodes in the device tree. One with
+  shared-dma-pool to handle the coherent DMA buffer allocation, and
+  another one with device-swiotlb-pool for regular DMA to/from system
+  memory, which would be subject to bouncing. The main purpose for
+  restricted DMA is to mitigate the lack of DMA access control on
+  systems without an IOMMU, which could result in the DMA accessing the
+  system memory at unexpected times and/or unexpected addresses,
+  possibly leading to data leakage or corruption. The feature on its 
own
+  provides a basic level of protection against the DMA overwriting 
buffer
+  contents at unexpected times. However, to protect against general 
data
+  leakage and system memory corruption, the system needs to provide a
+  way to restrict the DMA to a predefined memory region.
 - vendor specific string in the form ,[-]
 no-map (optional) - empty property
 - Indicates the operating system must not create a virtual mapping
@@ -117,6 +135,16 @@ one for multimedia processing (named 
multimedia-memory@7700, 64MiB).
compatible = "acme,multimedia-memory";
reg = <0x7700 0x400>;
};
+
+   wifi_coherent_mem_region: wifi_coherent_mem_region {
+   compatible = "shared-dma-pool";
+   reg = <0x5000 0x40>;
+   };
+
+   wifi_device_swiotlb_region: wifi_device_swiotlb_region {
+   compatible = "device-swiotlb-pool";
+   reg = <0x5040 0x400>;
+   };
};
 
/* ... */
@@ -135,4 +163,11 @@ one for multimedia processing (named 
multimedia-memory@7700, 64MiB).
memory-region = <_reserved>;
/* ... */
};
+
+   pcie_wifi: pcie_wifi@0,0 {
+   memory-region = <_coherent_mem_region>,
+<_device_swiotlb_region>;
+   restricted-dma = <0>, <1>;
+   /* ... */
+   };
 };
-- 
2.28.0.rc0.142.g3c755180ce-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[RFC v2 2/5] swiotlb: Add device swiotlb pool

2020-07-27 Thread Claire Chang
Add the initialization function to create device swiotlb pools from
matching reserved-memory nodes in the device tree.

Signed-off-by: Claire Chang 
---
 include/linux/device.h |   4 ++
 kernel/dma/swiotlb.c   | 148 +
 2 files changed, 126 insertions(+), 26 deletions(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index 79ce404619e6..f40f711e43e9 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -575,6 +575,10 @@ struct device {
struct cma *cma_area;   /* contiguous memory area for dma
   allocations */
 #endif
+#ifdef CONFIG_SWIOTLB
+   struct io_tlb_mem   *dma_io_tlb_mem;
+#endif
+
/* arch specific additions */
struct dev_archdata archdata;
 
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index f83911fa14ce..eaa101b3e75b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -36,6 +36,10 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
+#include 
 #ifdef CONFIG_DEBUG_FS
 #include 
 #endif
@@ -298,20 +302,14 @@ static void swiotlb_cleanup(void)
max_segment = 0;
 }
 
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
+   size_t size)
 {
-   struct io_tlb_mem *mem = _tlb_default_mem;
-   unsigned long i, bytes;
-
-   bytes = nslabs << IO_TLB_SHIFT;
+   unsigned long i;
 
-   mem->nslabs = nslabs;
-   mem->start = virt_to_phys(tlb);
-   mem->end = mem->start + bytes;
-
-   set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-   memset(tlb, 0, bytes);
+   mem->nslabs = size >> IO_TLB_SHIFT;
+   mem->start = start;
+   mem->end = mem->start + size;
 
/*
 * Allocate and initialize the free list array.  This array is used
@@ -336,11 +334,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
}
mem->index = 0;
 
-   swiotlb_print_info();
-
-   late_alloc = 1;
-
-   swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
spin_lock_init(>lock);
 
return 0;
@@ -354,6 +347,38 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
return -ENOMEM;
 }
 
+int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+{
+   struct io_tlb_mem *mem = _tlb_default_mem;
+   unsigned long bytes;
+   int ret;
+
+   bytes = nslabs << IO_TLB_SHIFT;
+
+   set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
+   memset(tlb, 0, bytes);
+
+   ret = swiotlb_init_io_tlb_mem(mem, virt_to_phys(tlb), bytes);
+   if (ret)
+   return ret;
+
+   swiotlb_print_info();
+
+   late_alloc = 1;
+
+   swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
+
+   return 0;
+}
+
+static void swiotlb_free_pages(struct io_tlb_mem *mem)
+{
+   free_pages((unsigned long)mem->orig_addr,
+  get_order(mem->nslabs * sizeof(phys_addr_t)));
+   free_pages((unsigned long)mem->list,
+  get_order(mem->nslabs * sizeof(int)));
+}
+
 void __init swiotlb_exit(void)
 {
struct io_tlb_mem *mem = _tlb_default_mem;
@@ -362,10 +387,7 @@ void __init swiotlb_exit(void)
return;
 
if (late_alloc) {
-   free_pages((unsigned long)mem->orig_addr,
-  get_order(mem->nslabs * sizeof(phys_addr_t)));
-   free_pages((unsigned long)mem->list, get_order(mem->nslabs *
-  sizeof(int)));
+   swiotlb_free_pages(mem);
free_pages((unsigned long)phys_to_virt(mem->start),
   get_order(mem->nslabs << IO_TLB_SHIFT));
} else {
@@ -687,16 +709,90 @@ bool is_swiotlb_active(void)
 
 #ifdef CONFIG_DEBUG_FS
 
-static int __init swiotlb_create_debugfs(void)
+static void swiotlb_create_debugfs(struct io_tlb_mem *mem, const char *name,
+  struct dentry *node)
 {
-   struct io_tlb_mem *mem = _tlb_default_mem;
-
-   mem->debugfs = debugfs_create_dir("swiotlb", NULL);
+   mem->debugfs = debugfs_create_dir(name, node);
debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, >nslabs);
debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, >used);
+}
+
+static int __init swiotlb_create_default_debugfs(void)
+{
+   swiotlb_create_debugfs(_tlb_default_mem, "swiotlb", NULL);
+
return 0;
 }
 
-late_initcall(swiotlb_create_debugfs);
+late_initcall(swiotlb_create_default_debugfs);
 
 #endif
+
+static int device_swiotlb_init(struct reserved_mem *rmem,
+  struct device *dev)
+{
+   struct io_tlb_mem *mem;
+   int ret;
+
+   if (dev->dma_io_tlb_mem)
+   return 0;
+
+   mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+   

[RFC v2 1/5] swiotlb: Add io_tlb_mem struct

2020-07-27 Thread Claire Chang
Added a new struct, io_tlb_mem, as the IO TLB memory pool descriptor and
moved relevant global variables into that struct.
This will be useful later to allow for per-device swiotlb regions.

Signed-off-by: Claire Chang 
---
 drivers/iommu/intel/iommu.c |   2 +-
 drivers/xen/swiotlb-xen.c   |   4 +-
 include/linux/swiotlb.h |  38 -
 kernel/dma/swiotlb.c| 286 +---
 4 files changed, 172 insertions(+), 158 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 3f7c04cf89b3..44c9230251eb 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3736,7 +3736,7 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, 
size_t size,
 */
if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) {
tlb_addr = swiotlb_tbl_map_single(dev,
-   __phys_to_dma(dev, io_tlb_start),
+   __phys_to_dma(dev, io_tlb_default_mem.start),
paddr, size, aligned_size, dir, attrs);
if (tlb_addr == DMA_MAPPING_ERROR) {
goto swiotlb_error;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index b6d27762c6f8..62452424ec8a 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -190,8 +190,8 @@ int __ref xen_swiotlb_init(int verbose, bool early)
/*
 * IO TLB memory already allocated. Just use it.
 */
-   if (io_tlb_start != 0) {
-   xen_io_tlb_start = phys_to_virt(io_tlb_start);
+   if (io_tlb_default_mem.start != 0) {
+   xen_io_tlb_start = phys_to_virt(io_tlb_default_mem.start);
goto end;
}
 
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index 046bb94bd4d6..ab0d571d0826 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -69,11 +69,45 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
 
 #ifdef CONFIG_SWIOTLB
 extern enum swiotlb_force swiotlb_force;
-extern phys_addr_t io_tlb_start, io_tlb_end;
+
+/**
+ * struct io_tlb_mem - IO TLB Memory Pool Descriptor
+ *
+ * @start: The start address of the swiotlb memory pool. Used to do a quick
+ * range check to see if the memory was in fact allocated by this
+ * API. For device private swiotlb, this is device tree adjustable.
+ * @end:   The end address of the swiotlb memory pool. Used to do a quick
+ * range check to see if the memory was in fact allocated by this
+ * API. For device private swiotlb, this is device tree adjustable.
+ * @nslabs:The number of IO TLB blocks (in groups of 64) between @start and
+ * @end. For system swiotlb, this is command line adjustable via
+ * setup_io_tlb_npages.
+ * @used:  The number of used IO TLB block.
+ * @list:  The free list describing the number of free entries available
+ * from each index.
+ * @index: The index to start searching in the next round.
+ * @orig_addr: The original address corresponding to a mapped entry for the
+ * sync operations.
+ * @lock:  The lock to protect the above data structures in the map and
+ * unmap calls.
+ * @debugfs:   The dentry to debugfs.
+ */
+struct io_tlb_mem {
+   phys_addr_t start;
+   phys_addr_t end;
+   unsigned long nslabs;
+   unsigned long used;
+   unsigned int *list;
+   unsigned int index;
+   phys_addr_t *orig_addr;
+   spinlock_t lock;
+   struct dentry *debugfs;
+};
+extern struct io_tlb_mem io_tlb_default_mem;
 
 static inline bool is_swiotlb_buffer(phys_addr_t paddr)
 {
-   return paddr >= io_tlb_start && paddr < io_tlb_end;
+   return paddr >= io_tlb_mem.start && paddr < io_tlb_mem.end;
 }
 
 void __init swiotlb_exit(void);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index c19379fabd20..f83911fa14ce 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -61,33 +61,11 @@
  * allocate a contiguous 1MB, we're probably in trouble anyway.
  */
 #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
 
 enum swiotlb_force swiotlb_force;
 
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by 
this
- * API.
- */
-phys_addr_t io_tlb_start, io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * The number of used IO TLB block
- */
-static unsigned long io_tlb_used;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
+struct io_tlb_mem io_tlb_default_mem;
 
 /*
  * Max segment that we can 

[RFC v2 3/5] swiotlb: Use device swiotlb pool if available

2020-07-27 Thread Claire Chang
Regardless of swiotlb setting, the device swiotlb pool is preferred if
available.

The device swiotlb pools provide a basic level of protection against
the DMA overwriting buffer contents at unexpected times. However, to
protect against general data leakage and system memory corruption, the
system needs to provide a way to restrict the DMA to a predefined memory
region.

Signed-off-by: Claire Chang 
---
 drivers/iommu/intel/iommu.c |  6 +++---
 include/linux/dma-direct.h  |  8 
 include/linux/swiotlb.h | 13 -
 kernel/dma/direct.c |  8 
 kernel/dma/swiotlb.c| 18 +++---
 5 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 44c9230251eb..37d6583cf628 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -3684,7 +3684,7 @@ bounce_sync_single(struct device *dev, dma_addr_t addr, 
size_t size,
return;
 
tlb_addr = intel_iommu_iova_to_phys(>domain, addr);
-   if (is_swiotlb_buffer(tlb_addr))
+   if (is_swiotlb_buffer(dev, tlb_addr))
swiotlb_tbl_sync_single(dev, tlb_addr, size, dir, target);
 }
 
@@ -3768,7 +3768,7 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, 
size_t size,
return (phys_addr_t)iova_pfn << PAGE_SHIFT;
 
 mapping_error:
-   if (is_swiotlb_buffer(tlb_addr))
+   if (is_swiotlb_buffer(dev, tlb_addr))
swiotlb_tbl_unmap_single(dev, tlb_addr, size,
 aligned_size, dir, attrs);
 swiotlb_error:
@@ -3796,7 +3796,7 @@ bounce_unmap_single(struct device *dev, dma_addr_t 
dev_addr, size_t size,
return;
 
intel_unmap(dev, dev_addr, size);
-   if (is_swiotlb_buffer(tlb_addr))
+   if (is_swiotlb_buffer(dev, tlb_addr))
swiotlb_tbl_unmap_single(dev, tlb_addr, size,
 aligned_size, dir, attrs);
 
diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h
index 5a3ce2a24794..1cf920ddb2f6 100644
--- a/include/linux/dma-direct.h
+++ b/include/linux/dma-direct.h
@@ -134,7 +134,7 @@ static inline void dma_direct_sync_single_for_device(struct 
device *dev,
 {
phys_addr_t paddr = dma_to_phys(dev, addr);
 
-   if (unlikely(is_swiotlb_buffer(paddr)))
+   if (unlikely(is_swiotlb_buffer(dev, paddr)))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
 
if (!dev_is_dma_coherent(dev))
@@ -151,7 +151,7 @@ static inline void dma_direct_sync_single_for_cpu(struct 
device *dev,
arch_sync_dma_for_cpu_all();
}
 
-   if (unlikely(is_swiotlb_buffer(paddr)))
+   if (unlikely(is_swiotlb_buffer(dev, paddr)))
swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
 }
 
@@ -162,7 +162,7 @@ static inline dma_addr_t dma_direct_map_page(struct device 
*dev,
phys_addr_t phys = page_to_phys(page) + offset;
dma_addr_t dma_addr = phys_to_dma(dev, phys);
 
-   if (unlikely(swiotlb_force == SWIOTLB_FORCE))
+   if (unlikely(swiotlb_force == SWIOTLB_FORCE || dev->dma_io_tlb_mem))
return swiotlb_map(dev, phys, size, dir, attrs);
 
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
@@ -188,7 +188,7 @@ static inline void dma_direct_unmap_page(struct device 
*dev, dma_addr_t addr,
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
 
-   if (unlikely(is_swiotlb_buffer(phys)))
+   if (unlikely(is_swiotlb_buffer(dev, phys)))
swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
 }
 #endif /* _LINUX_DMA_DIRECT_H */
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index ab0d571d0826..8a50b3af7c3f 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -105,18 +105,21 @@ struct io_tlb_mem {
 };
 extern struct io_tlb_mem io_tlb_default_mem;
 
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
-   return paddr >= io_tlb_mem.start && paddr < io_tlb_mem.end;
+   struct io_tlb_mem *mem =
+   dev->dma_io_tlb_mem ? dev->dma_io_tlb_mem : _tlb_default_mem;
+
+   return paddr >= mem->start && paddr < mem->end;
 }
 
 void __init swiotlb_exit(void);
 unsigned int swiotlb_max_segment(void);
 size_t swiotlb_max_mapping_size(struct device *dev);
-bool is_swiotlb_active(void);
+bool is_swiotlb_active(struct device *dev);
 #else
 #define swiotlb_force SWIOTLB_NO_FORCE
-static inline bool is_swiotlb_buffer(phys_addr_t paddr)
+static inline bool is_swiotlb_buffer(struct device *dev, phys_addr_t paddr)
 {
return false;
 }
@@ -132,7 +135,7 @@ static inline size_t swiotlb_max_mapping_size(struct device 
*dev)
return SIZE_MAX;
 }
 
-static inline bool is_swiotlb_active(void)
+static inline bool 

[RFC v2 0/5] Restricted DMA

2020-07-27 Thread Claire Chang
This series implements mitigations for lack of DMA access control on
systems without an IOMMU, which could result in the DMA accessing the
system memory at unexpected times and/or unexpected addresses, possibly
leading to data leakage or corruption.

For example, we plan to use the PCI-e bus for Wi-Fi on one MTK platform and
that PCI-e bus is not behind an IOMMU. As PCI-e, by design, gives the
device full access to system memory, a vulnerability in the Wi-Fi firmware
could easily escalate to a full system exploit (remote wifi exploits: [1a],
[1b] that shows a full chain of exploits; [2], [3]).

To mitigate the security concerns, we introduce restricted DMA. The
restricted DMA is implemented by per-device swiotlb and coherent memory
pools. The feature on its own provides a basic level of protection against
the DMA overwriting buffer contents at unexpected times. However, to
protect against general data leakage and system memory corruption, the
system needs to provide a way to restrict the DMA to a predefined memory
region (this is usually done at firmware level, e.g. in ATF on some ARM
platforms).

[1a] 
https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_4.html
[1b] 
https://googleprojectzero.blogspot.com/2017/04/over-air-exploiting-broadcoms-wi-fi_11.html
[2] https://blade.tencent.com/en/advisories/qualpwn/
[3] 
https://www.bleepingcomputer.com/news/security/vulnerabilities-found-in-highly-popular-firmware-for-wifi-chips/


Claire Chang (5):
  swiotlb: Add io_tlb_mem struct
  swiotlb: Add device swiotlb pool
  swiotlb: Use device swiotlb pool if available
  dt-bindings: of: Add plumbing for restricted DMA pool
  of: Add plumbing for restricted DMA pool

 .../reserved-memory/reserved-memory.txt   |  35 ++
 drivers/iommu/intel/iommu.c   |   8 +-
 drivers/of/address.c  |  39 ++
 drivers/of/device.c   |   3 +
 drivers/of/of_private.h   |   6 +
 drivers/xen/swiotlb-xen.c |   4 +-
 include/linux/device.h|   4 +
 include/linux/dma-direct.h|   8 +-
 include/linux/swiotlb.h   |  49 +-
 kernel/dma/direct.c   |   8 +-
 kernel/dma/swiotlb.c  | 418 +++---
 11 files changed, 393 insertions(+), 189 deletions(-)

--
v1: https://lore.kernel.org/patchwork/cover/1271660/
Changes in v2:
- build on top of swiotlb
 
2.28.0.rc0.142.g3c755180ce-goog

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v2 0/2] iommu: Move AMD and Intel Kconfig + Makefile bits into their directories

2020-07-27 Thread Jerry Snitselaar


Jerry Snitselaar @ 2020-06-30 13:06 MST:

> This patchset imeplements the suggestion from Linus to move the
> Kconfig and Makefile bits for AMD and Intel into their respective
> directories.
>
> v2: Rebase against v5.8-rc3. Dropped ---help--- changes from Kconfig as that 
> was
> dealt with in systemwide cleanup.
>
> Jerry Snitselaar (2):
>   iommu/vt-d: Move Kconfig and Makefile bits down into intel directory
>   iommu/amd: Move Kconfig and Makefile bits down into amd directory
>
>
> ___
> iommu mailing list
> iommu@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/iommu

Hi Joerg,

Looks like I forgot to cc you on this cover letter for v2.
Does this work for you now?

Regards,
Jerry

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v3 1/1] PCI/ATS: Check PRI supported on the PF device when SRIOV is enabled

2020-07-27 Thread Sasha Levin
Hi

[This is an automated email]

This commit has been processed because it contains a "Fixes:" tag
fixing commit: b16d0cb9e2fc ("iommu/vt-d: Always enable PASID/PRI PCI 
capabilities before ATS").

The bot has tested the following trees: v5.7.10, v5.4.53, v4.19.134, v4.14.189, 
v4.9.231, v4.4.231.

v5.7.10: Build OK!
v5.4.53: Failed to apply! Possible dependencies:
2b0ae7cc3bfc ("PCI/ATS: Handle sharing of PF PASID Capability with all VFs")
751035b8dc06 ("PCI/ATS: Cache PASID Capability offset")
8cbb8a9374a2 ("PCI/ATS: Move pci_prg_resp_pasid_required() to 
CONFIG_PCI_PRI")
9bf49e36d718 ("PCI/ATS: Handle sharing of PF PRI Capability with all VFs")
c065190bbcd4 ("PCI/ATS: Cache PRI Capability offset")
e5adf79a1d80 ("PCI/ATS: Cache PRI PRG Response PASID Required bit")

v4.19.134: Failed to apply! Possible dependencies:
2b0ae7cc3bfc ("PCI/ATS: Handle sharing of PF PASID Capability with all VFs")
4f802170a861 ("PCI/DPC: Save and restore config state")
6e1ffbb7c2ab ("PCI: Move ATS declarations outside of CONFIG_PCI")
751035b8dc06 ("PCI/ATS: Cache PASID Capability offset")
8c938ddc6df3 ("PCI/ATS: Add pci_ats_page_aligned() interface")
8cbb8a9374a2 ("PCI/ATS: Move pci_prg_resp_pasid_required() to 
CONFIG_PCI_PRI")
9bf49e36d718 ("PCI/ATS: Handle sharing of PF PRI Capability with all VFs")
9c2120090586 ("PCI: Provide pci_match_id() with CONFIG_PCI=n")
b92b512a435d ("PCI: Make pci_ats_init() private")
c065190bbcd4 ("PCI/ATS: Cache PRI Capability offset")
e5567f5f6762 ("PCI/ATS: Add pci_prg_resp_pasid_required() interface.")
e5adf79a1d80 ("PCI/ATS: Cache PRI PRG Response PASID Required bit")
fff42928ade5 ("PCI/ATS: Add inline to pci_prg_resp_pasid_required()")

v4.14.189: Failed to apply! Possible dependencies:
1b79c5284439 ("PCI: cadence: Add host driver for Cadence PCIe controller")
1e4511604dfa ("PCI/AER: Expose internal API for obtaining AER information")
3133e6dd07ed ("PCI: Tidy Makefiles")
37dddf14f1ae ("PCI: cadence: Add EndPoint Controller driver for Cadence 
PCIe controller")
4696b828ca37 ("PCI/AER: Hoist aerdrv.c, aer_inject.c up to 
drivers/pci/pcie/")
4f802170a861 ("PCI/DPC: Save and restore config state")
8c938ddc6df3 ("PCI/ATS: Add pci_ats_page_aligned() interface")
8cbb8a9374a2 ("PCI/ATS: Move pci_prg_resp_pasid_required() to 
CONFIG_PCI_PRI")
9bf49e36d718 ("PCI/ATS: Handle sharing of PF PRI Capability with all VFs")
9de0eec29c07 ("PCI: Regroup all PCI related entries into 
drivers/pci/Makefile")
b92b512a435d ("PCI: Make pci_ats_init() private")
c065190bbcd4 ("PCI/ATS: Cache PRI Capability offset")
d3252ace0bc6 ("PCI: Restore resized BAR state on resume")
e5567f5f6762 ("PCI/ATS: Add pci_prg_resp_pasid_required() interface.")
e5adf79a1d80 ("PCI/ATS: Cache PRI PRG Response PASID Required bit")
fff42928ade5 ("PCI/ATS: Add inline to pci_prg_resp_pasid_required()")

v4.9.231: Failed to apply! Possible dependencies:
4ebeb1ec56d4 ("PCI: Restore PRI and PASID state after Function-Level Reset")
8c938ddc6df3 ("PCI/ATS: Add pci_ats_page_aligned() interface")
8cbb8a9374a2 ("PCI/ATS: Move pci_prg_resp_pasid_required() to 
CONFIG_PCI_PRI")
9bf49e36d718 ("PCI/ATS: Handle sharing of PF PRI Capability with all VFs")
a4f4fa681add ("PCI: Cache PRI and PASID bits in pci_dev")
c065190bbcd4 ("PCI/ATS: Cache PRI Capability offset")
e5567f5f6762 ("PCI/ATS: Add pci_prg_resp_pasid_required() interface.")
e5adf79a1d80 ("PCI/ATS: Cache PRI PRG Response PASID Required bit")
fff42928ade5 ("PCI/ATS: Add inline to pci_prg_resp_pasid_required()")

v4.4.231: Failed to apply! Possible dependencies:
2a2aca316aed ("PCI: Include  for isa_dma_bridge_buggy")
4d3f13845957 ("PCI: Add pci_unmap_iospace() to unmap I/O resources")
4ebeb1ec56d4 ("PCI: Restore PRI and PASID state after Function-Level Reset")
8cbb8a9374a2 ("PCI/ATS: Move pci_prg_resp_pasid_required() to 
CONFIG_PCI_PRI")
9bf49e36d718 ("PCI/ATS: Handle sharing of PF PRI Capability with all VFs")
a4f4fa681add ("PCI: Cache PRI and PASID bits in pci_dev")
c5076cfe7689 ("PCI, of: Move PCI I/O space management to PCI core code")
e5567f5f6762 ("PCI/ATS: Add pci_prg_resp_pasid_required() interface.")


NOTE: The patch will not be queued to stable trees until it is upstream.

How should we proceed with this patch?

-- 
Thanks
Sasha
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu: amd: Add missing function prototypes to fix -Wmissing-prototypes

2020-07-27 Thread Krzysztof Kozlowski
Few exported functions from AMD IOMMU driver are missing prototypes.
They have declaration in arch/x86/events/amd/iommu.h but this file
cannot be included in the driver.  Add prototypes to fix W=1 warnings
like:

drivers/iommu/amd/init.c:3066:19: warning:
no previous prototype for 'get_amd_iommu' [-Wmissing-prototypes]
 3066 | struct amd_iommu *get_amd_iommu(unsigned int idx)

Signed-off-by: Krzysztof Kozlowski 
---
 drivers/iommu/amd/amd_iommu.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 57309716fd18..0781b7112467 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -41,6 +41,15 @@ extern int amd_iommu_guest_ir;
 struct iommu_domain;
 
 extern bool amd_iommu_v2_supported(void);
+extern struct amd_iommu *get_amd_iommu(unsigned int idx);
+extern u8 amd_iommu_pc_get_max_banks(unsigned int idx);
+extern bool amd_iommu_pc_supported(void);
+extern u8 amd_iommu_pc_get_max_counters(unsigned int idx);
+extern int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+   u8 fxn, u64 *value);
+extern int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
+   u8 fxn, u64 *value);
+
 extern int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
 extern int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
 extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


[PATCH] iommu: mtk: Drop of_match_ptr to fix -Wunused-const-variable

2020-07-27 Thread Krzysztof Kozlowski
The of_device_id is included unconditionally by of.h header and used
in the driver as well.  Remove of_match_ptr to fix W=1 compile test
warning with !CONFIG_OF:

drivers/iommu/mtk_iommu.c:833:34: warning: 'mtk_iommu_of_ids' defined but 
not used [-Wunused-const-variable=]
  833 | static const struct of_device_id mtk_iommu_of_ids[] = {

Reported-by: kernel test robot 
Signed-off-by: Krzysztof Kozlowski 
---
 drivers/iommu/mtk_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 59e5a62a34db..cdfd9f8be190 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -843,7 +843,7 @@ static struct platform_driver mtk_iommu_driver = {
.remove = mtk_iommu_remove,
.driver = {
.name = "mtk-iommu",
-   .of_match_table = of_match_ptr(mtk_iommu_of_ids),
+   .of_match_table = mtk_iommu_of_ids,
.pm = _iommu_pm_ops,
}
 };
-- 
2.17.1

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH] dma-pool: Do not allocate pool memory from CMA

2020-07-27 Thread Nicolas Saenz Julienne
Hi Christoph,
thanks for having a look at this!

On Fri, 2020-07-24 at 15:41 +0200, Christoph Hellwig wrote:
> Yes, the iommu is an interesting case, and the current code is
> wrong for that.

Care to expand on this? I do get that checking dma_coherent_ok() on memory
that'll later on be mapped into an iommu is kind of silly, although I think
harmless in Amir's specific case, since devices have wide enough dma-ranges. Is
there more to it?

> Can you try the patch below?  It contains a modified version of Nicolas'
> patch to try CMA again for the expansion and a new (for now hackish) way to
> not apply the addressability check for dma-iommu allocations.
> 
> diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
> index 6bc74a2d51273e..ec5e525d2b9309 100644
> --- a/kernel/dma/pool.c
> +++ b/kernel/dma/pool.c
> @@ -3,7 +3,9 @@
>   * Copyright (C) 2012 ARM Ltd.
>   * Copyright (C) 2020 Google LLC
>   */
> +#include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -55,6 +57,31 @@ static void dma_atomic_pool_size_add(gfp_t gfp, size_t
> size)
>   pool_size_kernel += size;
>  }
>  
> +static bool cma_in_zone(gfp_t gfp)
> +{
> + phys_addr_t end;
> + unsigned long size;
> + struct cma *cma;
> +
> + cma = dev_get_cma_area(NULL);
> + if (!cma)
> + return false;
> +
> + size = cma_get_size(cma);
> + if (!size)
> + return false;
> + end = cma_get_base(cma) - memblock_start_of_DRAM() + size - 1;
> +
> + /* CMA can't cross zone boundaries, see cma_activate_area() */
> + if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA) &&
> + end <= DMA_BIT_MASK(zone_dma_bits))
> + return true;
> + if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32) &&
> + end <= DMA_BIT_MASK(32))
> + return true;
> + return true;

IIUC this will always return true given a CMA is present. Which reverts to the
previous behaviour (previous as in breaking some rpi4 setups), isn't it?

Regards,
Nicolas



signature.asc
Description: This is a digitally signed message part
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu

Re: [Freedreno] [PATCH v10 06/13] iommu/arm-smmu-qcom: Get and set the pagetable config for split pagetables

2020-07-27 Thread Rob Clark
On Mon, Jul 27, 2020 at 8:03 AM Jordan Crouse  wrote:
>
> On Sun, Jul 26, 2020 at 10:03:07AM -0700, Rob Clark wrote:
> > On Mon, Jul 20, 2020 at 8:41 AM Jordan Crouse  
> > wrote:
> > >
> > > The Adreno GPU has the capability to manage its own pagetables and switch
> > > them dynamically from the hardware. To do this the GPU uses TTBR1 for
> > > "global" GPU memory and creates local pagetables for each context and
> > > switches them dynamically with the GPU.
> > >
> > > Use DOMAIN_ATTR_PGTABLE_CFG to get the current configuration for the
> > > TTBR1 pagetable from the smmu driver so the leaf driver can create
> > > compatible pagetables for use with TTBR0.
> > >
> > > Because TTBR0 is disabled by default when TTBR1 is enabled the GPU
> > > driver can pass the configuration of one of the newly created pagetables
> > > back through DOMAIN_ATTR_PGTABLE_CFG as a trigger to enable translation on
> > > TTBR0.
> > >
> > > Signed-off-by: Jordan Crouse 
> > > ---
> > >
> > >  drivers/iommu/arm-smmu-qcom.c | 47 +++
> > >  drivers/iommu/arm-smmu.c  | 32 ++--
> > >  drivers/iommu/arm-smmu.h  | 10 
> > >  3 files changed, 81 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c
> > > index b9a5c5369e86..9a0c64ca9cb6 100644
> > > --- a/drivers/iommu/arm-smmu-qcom.c
> > > +++ b/drivers/iommu/arm-smmu-qcom.c
> > > @@ -34,6 +34,52 @@ static bool qcom_adreno_smmu_is_gpu_device(struct 
> > > device *dev)
> > > return false;
> > >  }
> > >
> > > +/*
> > > + * Local implementation to configure TTBR0 wil the specified pagetable 
> > > config.
> > > + * The GPU driver will call this to enable TTBR0 when per-instance 
> > > pagetables
> > > + * are active
> > > + */
> > > +static int qcom_adreno_smmu_set_pgtable_cfg(struct arm_smmu_domain 
> > > *smmu_domain,
> > > +   struct io_pgtable_cfg *pgtbl_cfg)
> > > +{
> > > +   struct io_pgtable *pgtable = 
> > > io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops);
> > > +   struct arm_smmu_cfg *cfg = _domain->cfg;
> > > +   struct arm_smmu_cb *cb = _domain->smmu->cbs[cfg->cbndx];
> > > +
> > > +   /* The domain must have split pagetables already enabled */
> > > +   if (cb->tcr[0] & ARM_SMMU_TCR_EPD1)
> > > +   return -EINVAL;
> > > +
> > > +   /* If the pagetable config is NULL, disable TTBR0 */
> > > +   if (!pgtbl_cfg) {
> > > +   /* Do nothing if it is already disabled */
> > > +   if ((cb->tcr[0] & ARM_SMMU_TCR_EPD0))
> > > +   return -EINVAL;
> > > +
> > > +   /* Set TCR to the original configuration */
> > > +   cb->tcr[0] = arm_smmu_lpae_tcr(>cfg);
> > > +   cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, 
> > > cb->cfg->asid);
> > > +   } else {
> > > +   u32 tcr = cb->tcr[0];
> > > +
> > > +   /* FIXME: What sort of validation do we need to do here? 
> > > */
> > > +
> > > +   /* Don't call this again if TTBR0 is already enabled */
> > > +   if (!(cb->tcr[0] & ARM_SMMU_TCR_EPD0))
> > > +   return -EINVAL;
> > > +
> > > +   tcr |= arm_smmu_lpae_tcr(pgtbl_cfg);
> > > +   tcr &= ~(ARM_SMMU_TCR_EPD0 | ARM_SMMU_TCR_EPD1);
> > > +
> > > +   cb->tcr[0] = tcr;
> > > +   cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
> > > +   cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID, 
> > > cb->cfg->asid);
> > > +   }
> > > +
> > > +   arm_smmu_write_context_bank(smmu_domain->smmu, cb->cfg->cbndx);
> > > +   return 0;
> > > +}
> > > +
> > >  static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain 
> > > *smmu_domain,
> > > struct device *dev, int start, int count)
> > >  {
> > > @@ -131,6 +177,7 @@ static const struct arm_smmu_impl 
> > > qcom_adreno_smmu_impl = {
> > > .def_domain_type = qcom_smmu_def_domain_type,
> > > .reset = qcom_smmu500_reset,
> > > .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
> > > +   .set_pgtable_cfg = qcom_adreno_smmu_set_pgtable_cfg,
> > >  };
> > >
> > >  static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device 
> > > *smmu,
> > > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > > index fff536a44faa..e1036ae54a8d 100644
> > > --- a/drivers/iommu/arm-smmu.c
> > > +++ b/drivers/iommu/arm-smmu.c
> > > @@ -86,13 +86,6 @@ struct arm_smmu_smr {
> > > boolvalid;
> > >  };
> > >
> > > -struct arm_smmu_cb {
> > > -   u64 ttbr[2];
> > > -   u32 tcr[2];
> > > -   u32 mair[2];
> > > -   struct arm_smmu_cfg *cfg;
> > > -};
> > > -
> > >  static bool using_legacy_binding, using_generic_binding;
> > >
> > >  static inline int 

Re: [Freedreno] [PATCH v10 06/13] iommu/arm-smmu-qcom: Get and set the pagetable config for split pagetables

2020-07-27 Thread Jordan Crouse
On Sun, Jul 26, 2020 at 10:03:07AM -0700, Rob Clark wrote:
> On Mon, Jul 20, 2020 at 8:41 AM Jordan Crouse  wrote:
> >
> > The Adreno GPU has the capability to manage its own pagetables and switch
> > them dynamically from the hardware. To do this the GPU uses TTBR1 for
> > "global" GPU memory and creates local pagetables for each context and
> > switches them dynamically with the GPU.
> >
> > Use DOMAIN_ATTR_PGTABLE_CFG to get the current configuration for the
> > TTBR1 pagetable from the smmu driver so the leaf driver can create
> > compatible pagetables for use with TTBR0.
> >
> > Because TTBR0 is disabled by default when TTBR1 is enabled the GPU
> > driver can pass the configuration of one of the newly created pagetables
> > back through DOMAIN_ATTR_PGTABLE_CFG as a trigger to enable translation on
> > TTBR0.
> >
> > Signed-off-by: Jordan Crouse 
> > ---
> >
> >  drivers/iommu/arm-smmu-qcom.c | 47 +++
> >  drivers/iommu/arm-smmu.c  | 32 ++--
> >  drivers/iommu/arm-smmu.h  | 10 
> >  3 files changed, 81 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c
> > index b9a5c5369e86..9a0c64ca9cb6 100644
> > --- a/drivers/iommu/arm-smmu-qcom.c
> > +++ b/drivers/iommu/arm-smmu-qcom.c
> > @@ -34,6 +34,52 @@ static bool qcom_adreno_smmu_is_gpu_device(struct device 
> > *dev)
> > return false;
> >  }
> >
> > +/*
> > + * Local implementation to configure TTBR0 wil the specified pagetable 
> > config.
> > + * The GPU driver will call this to enable TTBR0 when per-instance 
> > pagetables
> > + * are active
> > + */
> > +static int qcom_adreno_smmu_set_pgtable_cfg(struct arm_smmu_domain 
> > *smmu_domain,
> > +   struct io_pgtable_cfg *pgtbl_cfg)
> > +{
> > +   struct io_pgtable *pgtable = 
> > io_pgtable_ops_to_pgtable(smmu_domain->pgtbl_ops);
> > +   struct arm_smmu_cfg *cfg = _domain->cfg;
> > +   struct arm_smmu_cb *cb = _domain->smmu->cbs[cfg->cbndx];
> > +
> > +   /* The domain must have split pagetables already enabled */
> > +   if (cb->tcr[0] & ARM_SMMU_TCR_EPD1)
> > +   return -EINVAL;
> > +
> > +   /* If the pagetable config is NULL, disable TTBR0 */
> > +   if (!pgtbl_cfg) {
> > +   /* Do nothing if it is already disabled */
> > +   if ((cb->tcr[0] & ARM_SMMU_TCR_EPD0))
> > +   return -EINVAL;
> > +
> > +   /* Set TCR to the original configuration */
> > +   cb->tcr[0] = arm_smmu_lpae_tcr(>cfg);
> > +   cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID, 
> > cb->cfg->asid);
> > +   } else {
> > +   u32 tcr = cb->tcr[0];
> > +
> > +   /* FIXME: What sort of validation do we need to do here? */
> > +
> > +   /* Don't call this again if TTBR0 is already enabled */
> > +   if (!(cb->tcr[0] & ARM_SMMU_TCR_EPD0))
> > +   return -EINVAL;
> > +
> > +   tcr |= arm_smmu_lpae_tcr(pgtbl_cfg);
> > +   tcr &= ~(ARM_SMMU_TCR_EPD0 | ARM_SMMU_TCR_EPD1);
> > +
> > +   cb->tcr[0] = tcr;
> > +   cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
> > +   cb->ttbr[0] |= FIELD_PREP(ARM_SMMU_TTBRn_ASID, 
> > cb->cfg->asid);
> > +   }
> > +
> > +   arm_smmu_write_context_bank(smmu_domain->smmu, cb->cfg->cbndx);
> > +   return 0;
> > +}
> > +
> >  static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain 
> > *smmu_domain,
> > struct device *dev, int start, int count)
> >  {
> > @@ -131,6 +177,7 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl 
> > = {
> > .def_domain_type = qcom_smmu_def_domain_type,
> > .reset = qcom_smmu500_reset,
> > .alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
> > +   .set_pgtable_cfg = qcom_adreno_smmu_set_pgtable_cfg,
> >  };
> >
> >  static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device 
> > *smmu,
> > diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
> > index fff536a44faa..e1036ae54a8d 100644
> > --- a/drivers/iommu/arm-smmu.c
> > +++ b/drivers/iommu/arm-smmu.c
> > @@ -86,13 +86,6 @@ struct arm_smmu_smr {
> > boolvalid;
> >  };
> >
> > -struct arm_smmu_cb {
> > -   u64 ttbr[2];
> > -   u32 tcr[2];
> > -   u32 mair[2];
> > -   struct arm_smmu_cfg *cfg;
> > -};
> > -
> >  static bool using_legacy_binding, using_generic_binding;
> >
> >  static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
> > @@ -558,7 +551,7 @@ static void arm_smmu_init_context_bank(struct 
> > arm_smmu_domain *smmu_domain,
> > }
> >  }
> >
> > -static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int 
> > idx)
> > +void 

Re: [PATCH v10 04/13] iommu/arm-smmu-qcom: Add implementation for the adreno GPU SMMU

2020-07-27 Thread Jordan Crouse
On Sun, Jul 26, 2020 at 11:27:03PM -0700, Bjorn Andersson wrote:
> On Mon 20 Jul 08:40 PDT 2020, Jordan Crouse wrote:
> > diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c
> [..]
> > +static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain 
> > *smmu_domain,
> > +   struct device *dev, int start, int count)
> > +{
> > +   struct arm_smmu_device *smmu = smmu_domain->smmu;
> > +
> > +   /*
> > +* Assign context bank 0 to the GPU device so the GPU hardware can
> > +* switch pagetables
> > +*/
> > +   if (qcom_adreno_smmu_is_gpu_device(dev)) {
> > +   if (start > 0 || test_bit(0, smmu->context_map))
> > +   return -ENOSPC;
> > +
> > +   set_bit(0, smmu->context_map);
> > +   return 0;
> > +   }
> > +
> > +   return __arm_smmu_alloc_bitmap(smmu->context_map, start, count);
> 
> If we end up here before the GPU device shows up this is going to
> steal the first context bank, causing the subsequent allocation for the
> GPU to always fail.
> 
> As such I think it would be appropriate for you to adjust "start" to
> never be 0 here. And I think it would be appropriate to write this
> function as:
> 
>   if (gpu) {
>   start = 0;
>   count = 1;
>   } else {
>   if (start == 0)
>   start = 1;
>   }
> 
>   return __arm_smmu_alloc_bitmap(smmu->context_map, start, count);

Excellent suggestions.  Thanks.

Jordan

> Regards,
> Bjorn

-- 
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 10/11] media: exynos4-is: Prevent duplicate call to media_pipeline_stop

2020-07-27 Thread Tomasz Figa
On Sat, Jul 25, 2020 at 1:46 AM Jonathan Bakker  wrote:
>
> Hi Tomasz,
>
> On 2020-07-20 6:10 a.m., Tomasz Figa wrote:
> > On Sat, Jul 11, 2020 at 8:17 PM Jonathan Bakker  wrote:
> >>
> >> Hi Tomasz,
> >>
> >> On 2020-07-07 11:44 a.m., Tomasz Figa wrote:
> >>> Hi Jonathan,
> >>>
> >>> On Sat, Apr 25, 2020 at 07:26:49PM -0700, Jonathan Bakker wrote:
>  media_pipeline_stop can be called from both release and streamoff,
>  so make sure they're both protected under the streaming flag and
>  not just one of them.
> >>>
> >>> First of all, thanks for the patch.
> >>>
> >>> Shouldn't it be that release calls streamoff, so that only streamoff
> >>> is supposed to have the call to media_pipeline_stop()?
> >>>
> >>
> >> I can't say that I understand the whole media subsystem enough to know :)
> >> Since media_pipeline_start is called in streamon, it makes sense that 
> >> streamoff
> >> should have the media_pipeline_stop call.  However, even after removing 
> >> the call
> >> in fimc_capture_release I'm still getting a backtrace such as
> >>
> >> [   73.843117] [ cut here ]
> >> [   73.843251] WARNING: CPU: 0 PID: 1575 at 
> >> drivers/media/mc/mc-entity.c:554 media_pipeline_stop+0x20/0x2c [mc]
> >> [   73.843265] Modules linked in: s5p_fimc v4l2_fwnode exynos4_is_common 
> >> videobuf2_dma_contig pvrsrvkm_s5pv210_sgx540_120 videobuf2_memops 
> >> v4l2_mem2mem brcmfmac videobuf2_v4l2 videobuf2_common hci_uart 
> >> sha256_generic libsha256 btbcm bluetooth cfg80211 brcmutil ecdh_generic 
> >> ecc ce147 libaes s5ka3dfx videodev atmel_mxt_ts mc pwm_vibra rtc_max8998
> >> [   73.843471] CPU: 0 PID: 1575 Comm: v4l2-ctl Not tainted 
> >> 5.7.0-14534-g2b33418b254e-dirty #669
> >> [   73.843487] Hardware name: Samsung S5PC110/S5PV210-based board
> >> [   73.843562] [] (unwind_backtrace) from [] 
> >> (show_stack+0x10/0x14)
> >> [   73.843613] [] (show_stack) from [] 
> >> (__warn+0xbc/0xd4)
> >> [   73.843661] [] (__warn) from [] 
> >> (warn_slowpath_fmt+0x60/0xb8)
> >> [   73.843734] [] (warn_slowpath_fmt) from [] 
> >> (media_pipeline_stop+0x20/0x2c [mc])
> >> [   73.843867] [] (media_pipeline_stop [mc]) from [] 
> >> (fimc_cap_streamoff+0x38/0x48 [s5p_fimc])
> >> [   73.844109] [] (fimc_cap_streamoff [s5p_fimc]) from 
> >> [] (__video_do_ioctl+0x220/0x448 [videodev])
> >> [   73.844308] [] (__video_do_ioctl [videodev]) from 
> >> [] (video_usercopy+0x114/0x498 [videodev])
> >> [   73.844438] [] (video_usercopy [videodev]) from [] 
> >> (ksys_ioctl+0x20c/0xa10)
> >> [   73.844484] [] (ksys_ioctl) from [] 
> >> (ret_fast_syscall+0x0/0x54)
> >> [   73.844505] Exception stack(0xe5083fa8 to 0xe5083ff0)
> >> [   73.844546] 3fa0:   0049908d bef8f8c0 0003 40045613 
> >> bef8d5ac 004c1d16
> >> [   73.844590] 3fc0: 0049908d bef8f8c0 bef8f8c0 0036 bef8d5ac  
> >> b6d6b320 bef8faf8
> >> [   73.844620] 3fe0: 004e3ed4 bef8c718 004990bb b6f00d0a
> >> [   73.844642] ---[ end trace e6a4a8b2f20addd4 ]---
> >>
> >> The command I'm using for testing is
> >>
> >> v4l2-ctl --verbose -d 1 --stream-mmap=3 --stream-skip=2 
> >> --stream-to=./test.yuv --stream-count=1
> >>
> >> Since I noticed that the streaming flag was being checked 
> >> fimc_capture_release
> >> but not in fimc_cap_streamoff, I assumed that it was simply a missed 
> >> check.  Comparing
> >> with other drivers, they seem to call media_pipeline_stop in their vb2_ops 
> >> stop_streaming
> >> callback.
> >
> > vb2 does a lot of state handling internally and makes sure that driver
> > ops are not called when unnecessary, preventing double calls for
> > example. I suppose it could be a better place to stop the pipeline
> > indeed. However, ...
> >
> >>
> >> I'm willing to test various options
> >>
> >
> > I think it could make sense to add something like WARN_ON(1) inside
> > media_pipeline_stop() and then check where the first call came from.
>
> Here's the results of that:
>
> [   69.876823] [ cut here ]
> [   69.876962] WARNING: CPU: 0 PID: 1566 at drivers/media/mc/mc-entity.c:550 
> __media_pipeline_stop+0x24/0xfc [mc]
> [   69.876976] Modules linked in: s5p_fimc v4l2_fwnode exynos4_is_common 
> videobuf2_dma_contig videobuf2_memops v4l2_mem2mem brcmfmac videobuf2_v4l2 
> pvrsrvkm_s5pv210_sgx540_120 videobuf2_common hci_uart sha256_generic btbcm 
> libsha256 bluetooth cfg80211 ce147 brcmutil s5ka3dfx ecdh_generic ecc libaes 
> videodev atmel_mxt_ts mc pwm_vibra rtc_max8998
> [   69.877182] CPU: 0 PID: 1566 Comm: v4l2-ctl Not tainted 
> 5.7.0-14540-gb1220848c797-dirty #681
> [   69.877198] Hardware name: Samsung S5PC110/S5PV210-based board
> [   69.877274] [] (unwind_backtrace) from [] 
> (show_stack+0x10/0x14)
> [   69.877326] [] (show_stack) from [] (__warn+0xbc/0xd4)
> [   69.877375] [] (__warn) from [] 
> (warn_slowpath_fmt+0x60/0xb8)
> [   69.877448] [] (warn_slowpath_fmt) from [] 
> (__media_pipeline_stop+0x24/0xfc [mc])
> [   69.877540] [] (__media_pipeline_stop [mc]) from [] 

[GIT PULL] iommu/arm-smmu: Move driver files into their own subdir

2020-07-27 Thread Will Deacon
Hi Joerg,

As requested in [1], here is a second Arm SMMU pull request for 5.9, moving
the driver files into their own subdirectory to avoid cluttering
drivers/iommu/.

Cheers,

Will

[1] https://lore.kernel.org/r/20200722133323.gg27...@8bytes.org

--->8

The following changes since commit aa7ec73297df57a86308fee78d2bf86e22ea0bae:

  iommu/arm-smmu: Add global/context fault implementation hooks (2020-07-20 
09:30:51 +0100)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/will/linux.git 
tags/arm-smmu-updates

for you to fetch changes up to e86d1aa8b60f7ea18d36f50296d7d20eb2852e7e:

  iommu/arm-smmu: Move Arm SMMU drivers into their own subdirectory (2020-07-27 
12:53:10 +0100)


More Arm SMMU updates for 5.9

- Move Arm SMMU driver files into their own subdirectory


Will Deacon (1):
  iommu/arm-smmu: Move Arm SMMU drivers into their own subdirectory

 MAINTAINERS| 4 ++--
 drivers/iommu/Makefile | 5 +
 drivers/iommu/arm/Makefile | 2 ++
 drivers/iommu/arm/arm-smmu-v3/Makefile | 2 ++
 drivers/iommu/{ => arm/arm-smmu-v3}/arm-smmu-v3.c  | 0
 drivers/iommu/arm/arm-smmu/Makefile| 4 
 drivers/iommu/{ => arm/arm-smmu}/arm-smmu-impl.c   | 0
 drivers/iommu/{ => arm/arm-smmu}/arm-smmu-nvidia.c | 0
 drivers/iommu/{ => arm/arm-smmu}/arm-smmu-qcom.c   | 0
 drivers/iommu/{ => arm/arm-smmu}/arm-smmu.c| 0
 drivers/iommu/{ => arm/arm-smmu}/arm-smmu.h| 0
 drivers/iommu/{ => arm/arm-smmu}/qcom_iommu.c  | 0
 12 files changed, 11 insertions(+), 6 deletions(-)
 create mode 100644 drivers/iommu/arm/Makefile
 create mode 100644 drivers/iommu/arm/arm-smmu-v3/Makefile
 rename drivers/iommu/{ => arm/arm-smmu-v3}/arm-smmu-v3.c (100%)
 create mode 100644 drivers/iommu/arm/arm-smmu/Makefile
 rename drivers/iommu/{ => arm/arm-smmu}/arm-smmu-impl.c (100%)
 rename drivers/iommu/{ => arm/arm-smmu}/arm-smmu-nvidia.c (100%)
 rename drivers/iommu/{ => arm/arm-smmu}/arm-smmu-qcom.c (100%)
 rename drivers/iommu/{ => arm/arm-smmu}/arm-smmu.c (100%)
 rename drivers/iommu/{ => arm/arm-smmu}/arm-smmu.h (100%)
 rename drivers/iommu/{ => arm/arm-smmu}/qcom_iommu.c (100%)
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 11/21] iommu/mediatek: Add power-domain operation

2020-07-27 Thread chao hao
On Sat, 2020-07-11 at 14:48 +0800, Yong Wu wrote:
> In the previous SoC, the M4U HW is in the EMI power domain which is
> always on. the latest M4U is in the display power domain which may be
> turned on/off, thus we have to add pm_runtime interface for it.
> 
> we should enable its power before M4U hw initial. and disable it after HW
> initialize.
> 
> When the engine work, the engine always enable the power and clocks for
> smi-larb/smi-common, then the M4U's power will always be powered on
> automatically via the device link with smi-common.
> 
> Note: we don't enable the M4U power in iommu_map/unmap for tlb flush.
> If its power already is on, of course it is ok. if the power is off,
> the main tlb will be reset while M4U power on, thus the tlb flush while
> m4u power off is unnecessary, just skip it.
> 
> Signed-off-by: Yong Wu 
> ---
>  drivers/iommu/mtk_iommu.c | 54 ++-
>  1 file changed, 47 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
> index 931fdd19c8f3..03a6d66f4bef 100644
> --- a/drivers/iommu/mtk_iommu.c
> +++ b/drivers/iommu/mtk_iommu.c
> @@ -20,6 +20,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -172,6 +173,19 @@ static struct mtk_iommu_domain *to_mtk_domain(struct 
> iommu_domain *dom)
>   return container_of(dom, struct mtk_iommu_domain, domain);
>  }
>  
> +static int mtk_iommu_rpm_get(struct device *dev)
> +{
> + if (pm_runtime_enabled(dev))
> + return pm_runtime_get_sync(dev);
> + return 0;
> +}
> +
> +static void mtk_iommu_rpm_put(struct device *dev)
> +{
> + if (pm_runtime_enabled(dev))
> + pm_runtime_put_autosuspend(dev);
> +}
> +
>  static void mtk_iommu_tlb_flush_all(void *cookie)
>  {
>   struct mtk_iommu_data *data = cookie;
> @@ -193,6 +207,11 @@ static void mtk_iommu_tlb_flush_range_sync(unsigned long 
> iova, size_t size,
>   u32 tmp;
>  
>   for_each_m4u(data) {
> + /* skip tlb flush when pm is not active */
> + if (pm_runtime_enabled(data->dev) &&
> + !pm_runtime_active(data->dev))
> + continue;
> +
>   spin_lock_irqsave(>tlb_lock, flags);
>   writel_relaxed(F_INVLD_EN1 | F_INVLD_EN0,
>  data->base + data->plat_data->inv_sel_reg);
> @@ -377,15 +396,20 @@ static int mtk_iommu_attach_device(struct iommu_domain 
> *domain,
>  {
>   struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
>   struct mtk_iommu_domain *dom = to_mtk_domain(domain);
> + int ret;
>  
>   if (!data)
>   return -ENODEV;
>  
>   /* Update the pgtable base address register of the M4U HW */
>   if (!data->m4u_dom) {
> + ret = mtk_iommu_rpm_get(dev);
> + if (ret < 0)
> + return ret;
>   data->m4u_dom = dom;
>   writel(dom->cfg.arm_v7s_cfg.ttbr & MMU_PT_ADDR_MASK,
>  data->base + REG_MMU_PT_BASE_ADDR);
> + mtk_iommu_rpm_put(dev);
>   }
>  
>   mtk_iommu_config(data, dev, true);
> @@ -543,10 +567,14 @@ static int mtk_iommu_hw_init(const struct 
> mtk_iommu_data *data)
>   u32 regval;
>   int ret;
>  
> - ret = clk_prepare_enable(data->bclk);
> - if (ret) {
> - dev_err(data->dev, "Failed to enable iommu bclk(%d)\n", ret);
> - return ret;
> + /* bclk will be enabled in pm callback in power-domain case. */
> + if (!pm_runtime_enabled(data->dev)) {
> + ret = clk_prepare_enable(data->bclk);
> + if (ret) {
> + dev_err(data->dev, "Failed to enable iommu bclk(%d)\n",
> + ret);
> + return ret;
> + }
>   }
>  
>   if (data->plat_data->m4u_plat == M4U_MT8173) {
> @@ -728,7 +756,15 @@ static int mtk_iommu_probe(struct platform_device *pdev)
>  
>   platform_set_drvdata(pdev, data);
>  
> + if (dev->pm_domain)
> + pm_runtime_enable(dev);

hi yong,

If you put "pm_runtime_enable" here, it maybe not device_link with
smi_common for previous patch: 
if(i || !pm_runtime_enabled(dev))
continue;

Whether put it up front?

best regards,
chao

> +
> + ret = mtk_iommu_rpm_get(dev);
> + if (ret < 0)
> + return ret;
> +
>   ret = mtk_iommu_hw_init(data);
> + mtk_iommu_rpm_put(dev);
>   if (ret)
>   return ret;
>  
> @@ -801,6 +837,10 @@ static int __maybe_unused mtk_iommu_resume(struct device 
> *dev)
>   dev_err(data->dev, "Failed to enable clk(%d) in resume\n", ret);
>   return ret;
>   }
> +
> + /* Avoid first resume to affect the default value of registers below. */
> + if (!m4u_dom)
> + return 0;
>   writel_relaxed(reg->wr_len_ctrl, base + REG_MMU_WR_LEN_CTRL);
>   writel_relaxed(reg->misc_ctrl, base + 

RE: [PATCH v2] dma-contiguous: cleanup dma_alloc_contiguous

2020-07-27 Thread Song Bao Hua (Barry Song)



> -Original Message-
> From: iommu [mailto:iommu-boun...@lists.linux-foundation.org] On Behalf
> Of Christoph Hellwig
> Sent: Friday, July 24, 2020 12:02 AM
> To: iommu@lists.linux-foundation.org
> Cc: robin.mur...@arm.com
> Subject: [PATCH v2] dma-contiguous: cleanup dma_alloc_contiguous
> 
> Split out a cma_alloc_aligned helper to deal with the "interesting"
> calling conventions for cma_alloc, which then allows to the main function to
> be written straight forward.  This also takes advantage of the fact that NULL
> dev arguments have been gone from the DMA API for a while.
> 
> Signed-off-by: Christoph Hellwig 

Reviewed-by: Barry Song 

And I have rebased per-numa CMA patchset on top of this one.
https://lore.kernel.org/linux-arm-kernel/20200723131344.41472-1-song.bao@hisilicon.com/

> ---
> 
> Changes since v1:
>  - actually pass on the select struct cma
>  - clean up cma_alloc_aligned a bit
> 
>  kernel/dma/contiguous.c | 31 ++-
>  1 file changed, 14 insertions(+), 17 deletions(-)
> 
> diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index
> 15bc5026c485f2..cff7e60968b9e1 100644
> --- a/kernel/dma/contiguous.c
> +++ b/kernel/dma/contiguous.c
> @@ -215,6 +215,13 @@ bool dma_release_from_contiguous(struct device
> *dev, struct page *pages,
>   return cma_release(dev_get_cma_area(dev), pages, count);  }
> 
> +static struct page *cma_alloc_aligned(struct cma *cma, size_t size,
> +gfp_t gfp) {
> + unsigned int align = min(get_order(size), CONFIG_CMA_ALIGNMENT);
> +
> + return cma_alloc(cma, size >> PAGE_SHIFT, align, gfp & __GFP_NOWARN);
> +}
> +
>  /**
>   * dma_alloc_contiguous() - allocate contiguous pages
>   * @dev:   Pointer to device for which the allocation is performed.
> @@ -231,24 +238,14 @@ bool dma_release_from_contiguous(struct device
> *dev, struct page *pages,
>   */
>  struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
> {
> - size_t count = size >> PAGE_SHIFT;
> - struct page *page = NULL;
> - struct cma *cma = NULL;
> -
> - if (dev && dev->cma_area)
> - cma = dev->cma_area;
> - else if (count > 1)
> - cma = dma_contiguous_default_area;
> -
>   /* CMA can be used only in the context which permits sleeping */
> - if (cma && gfpflags_allow_blocking(gfp)) {
> - size_t align = get_order(size);
> - size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT);
> -
> - page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN);
> - }
> -
> - return page;
> + if (!gfpflags_allow_blocking(gfp))
> + return NULL;
> + if (dev->cma_area)
> + return cma_alloc_aligned(dev->cma_area, size, gfp);
> + if (size <= PAGE_SIZE || !dma_contiguous_default_area)
> + return NULL;
> + return cma_alloc_aligned(dma_contiguous_default_area, size, gfp);
>  }
> 
>  /**
> --
> 2.27.0
Thanks
Barry

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH 18/21] iommu/mediatek: Add support for multi domain

2020-07-27 Thread Yong Wu
On Thu, 2020-07-23 at 14:47 -0600, Rob Herring wrote:
> On Sat, Jul 11, 2020 at 02:48:43PM +0800, Yong Wu wrote:
> > Some HW IP(ex: CCU) require the special iova range. That means the
> > iova got from dma_alloc_attrs for that devices must locate in his
> > special range. In this patch, we allocate a special iova_range for
> > each a special requirement and create each a iommu domain for each
> > a iova_range.
> > 
> > meanwhile we still use one pagetable which support 16GB iova.
> > 
> > After this patch, If the iova range of a master is over 4G, the master
> > should:
> > a) Declare its special dma_ranges in its dtsi node. For example, If we
> > preassign the iova 4G-8G for vcodec, then the vcodec dtsi node should:
> > dma-ranges = <0x1 0x0 0x1 0x0 0x1 0x0>;  /* 4G ~ 8G */
> 
> BTW, dma-ranges should be in the parent node of the vcodec.

But the vcodec doesn't have its special parent node. Currently the
vcodec/display dtsi like this:

soc {

ovl:{  /* display */
/*No dma-ranges property. defaultly it is 0-4G iova range. */
}

vcodec_dec: { /* decode */
dma-ranges = <0x1 0x0 0x1 0x0 0x1 0x0>; /* 4G ~ 8G*/
};

vcodec_enc: {  /* encode */
dma-ranges = <0x1 0x0 0x1 0x0 0x1 0x0>; /* 4G ~ 8G*/
};

camera: {
dma-ranges = <0x2 0x0 0x2 0x0 0x1 0x0>; /* 8G ~ 12G */
};

}

If we add the parent node for vcodec, the vcodec driver flow will be
changed, and it may be incompatible with the previous dtb.

Here we don't have the actual bus concept. currently we support 16GB
dma_addr(iova) ranges. we only preassign 4-8G for vcodec, 8G-12G for
camera.

If the usage of dma-ranges here is different from the common one. then
how should I do here?

Thanks.
> 
> > b) Update the dma_mask:
> >  dma_set_mask_and_coherent(dev, DMA_BIT_MASK(33));
> 
> This should happen for you automatically. The DMA PFN offset 
> should also be 4GB here.

I may not follow here.

If the iova start at 0x1__, phys address start at 0x4000_.
Do you means the dma-ranges should be <0x1 0 0x0 0x4000 0x1 0x0>?
then dma_pfn_offset = PFN_DOWN(paddr - dma_addr) = 0x4. this
is also ok for us. we don't call the macro regarding this
"dev->dma_pfn_offset"

The purpose that I call it here is for updating the
dev->coherent_dma_mask[1], then we could get the iova over 4GB.

[1]
https://elixir.bootlin.com/linux/v5.8-rc1/source/drivers/iommu/dma-iommu.c#L619

> 
> > 
> > Signed-off-by: Yong Wu 
> > ---
> >  drivers/iommu/mtk_iommu.c | 49 ---
> >  drivers/iommu/mtk_iommu.h |  3 ++-
> >  2 files changed, 42 insertions(+), 10 deletions(-)

___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


RE: [EXT] Re: [PATCH v2 03/12] ACPI/IORT: Make iort_msi_map_rid() PCI agnostic

2020-07-27 Thread Makarand Pawagi



> -Original Message-
> From: Bjorn Helgaas 
> Sent: Tuesday, July 21, 2020 8:29 PM
> To: Lorenzo Pieralisi 
> Cc: linux-arm-ker...@lists.infradead.org; Will Deacon ;
> Hanjun Guo ; Bjorn Helgaas
> ; Sudeep Holla ; Catalin
> Marinas ; Robin Murphy ;
> Rafael J. Wysocki ; iommu@lists.linux-foundation.org;
> linux-a...@vger.kernel.org; devicet...@vger.kernel.org; linux-
> p...@vger.kernel.org; Rob Herring ; Joerg Roedel
> ; Marc Zyngier ; Makarand Pawagi
> ; Diana Madalina Craciun (OSS)
> ; Laurentiu Tudor 
> Subject: [EXT] Re: [PATCH v2 03/12] ACPI/IORT: Make iort_msi_map_rid() PCI
> agnostic
> 
> Caution: EXT Email
> 
> On Fri, Jun 19, 2020 at 09:20:04AM +0100, Lorenzo Pieralisi wrote:
> > There is nothing PCI specific in iort_msi_map_rid().
> >
> > Rename the function using a bus protocol agnostic name,
> > iort_msi_map_id(), and convert current callers to it.
> >
> > Signed-off-by: Lorenzo Pieralisi 
> > Cc: Will Deacon 
> > Cc: Hanjun Guo 
> > Cc: Bjorn Helgaas 
> > Cc: Sudeep Holla 
> > Cc: Catalin Marinas 
> > Cc: Robin Murphy 
> > Cc: "Rafael J. Wysocki" 
> 
> Acked-by: Bjorn Helgaas 
> 
> Sorry I missed this!
[Makarand Pawagi] 
Thanks Bjorn, Hi Rafael can you also finalize your review?
> 
> > ---
> >  drivers/acpi/arm64/iort.c | 12 ++--
> >  drivers/pci/msi.c |  2 +-
> >  include/linux/acpi_iort.h |  6 +++---
> >  3 files changed, 10 insertions(+), 10 deletions(-)
> >
> > diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
> > index 902e2aaca946..53f9ef515089 100644
> > --- a/drivers/acpi/arm64/iort.c
> > +++ b/drivers/acpi/arm64/iort.c
> > @@ -568,22 +568,22 @@ static struct acpi_iort_node
> > *iort_find_dev_node(struct device *dev)  }
> >
> >  /**
> > - * iort_msi_map_rid() - Map a MSI requester ID for a device
> > + * iort_msi_map_id() - Map a MSI input ID for a device
> >   * @dev: The device for which the mapping is to be done.
> > - * @req_id: The device requester ID.
> > + * @input_id: The device input ID.
> >   *
> > - * Returns: mapped MSI RID on success, input requester ID otherwise
> > + * Returns: mapped MSI ID on success, input ID otherwise
> >   */
> > -u32 iort_msi_map_rid(struct device *dev, u32 req_id)
> > +u32 iort_msi_map_id(struct device *dev, u32 input_id)
> >  {
> >   struct acpi_iort_node *node;
> >   u32 dev_id;
> >
> >   node = iort_find_dev_node(dev);
> >   if (!node)
> > - return req_id;
> > + return input_id;
> >
> > - iort_node_map_id(node, req_id, _id, IORT_MSI_TYPE);
> > + iort_node_map_id(node, input_id, _id, IORT_MSI_TYPE);
> >   return dev_id;
> >  }
> >
> > diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index
> > 74a91f52ecc0..77f48b95e277 100644
> > --- a/drivers/pci/msi.c
> > +++ b/drivers/pci/msi.c
> > @@ -1536,7 +1536,7 @@ u32 pci_msi_domain_get_msi_rid(struct irq_domain
> > *domain, struct pci_dev *pdev)
> >
> >   of_node = irq_domain_get_of_node(domain);
> >   rid = of_node ? of_msi_map_rid(>dev, of_node, rid) :
> > - iort_msi_map_rid(>dev, rid);
> > + iort_msi_map_id(>dev, rid);
> >
> >   return rid;
> >  }
> > diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
> > index 08ec6bd2297f..e51425e083da 100644
> > --- a/include/linux/acpi_iort.h
> > +++ b/include/linux/acpi_iort.h
> > @@ -28,7 +28,7 @@ void iort_deregister_domain_token(int trans_id);
> > struct fwnode_handle *iort_find_domain_token(int trans_id);  #ifdef
> > CONFIG_ACPI_IORT  void acpi_iort_init(void);
> > -u32 iort_msi_map_rid(struct device *dev, u32 req_id);
> > +u32 iort_msi_map_id(struct device *dev, u32 id);
> >  struct irq_domain *iort_get_device_domain(struct device *dev, u32 id,
> > enum irq_domain_bus_token
> > bus_token);  void acpi_configure_pmsi_domain(struct device *dev); @@
> > -39,8 +39,8 @@ const struct iommu_ops *iort_iommu_configure(struct
> > device *dev);  int iort_iommu_msi_get_resv_regions(struct device *dev,
> > struct list_head *head);  #else  static inline void
> > acpi_iort_init(void) { } -static inline u32 iort_msi_map_rid(struct
> > device *dev, u32 req_id) -{ return req_id; }
> > +static inline u32 iort_msi_map_id(struct device *dev, u32 id) {
> > +return id; }
> >  static inline struct irq_domain *iort_get_device_domain(
> >   struct device *dev, u32 id, enum irq_domain_bus_token bus_token)
> > { return NULL; }
> > --
> > 2.26.1
> >
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu


Re: [PATCH v10 04/13] iommu/arm-smmu-qcom: Add implementation for the adreno GPU SMMU

2020-07-27 Thread Bjorn Andersson
On Mon 20 Jul 08:40 PDT 2020, Jordan Crouse wrote:
> diff --git a/drivers/iommu/arm-smmu-qcom.c b/drivers/iommu/arm-smmu-qcom.c
[..]
> +static int qcom_adreno_smmu_alloc_context_bank(struct arm_smmu_domain 
> *smmu_domain,
> + struct device *dev, int start, int count)
> +{
> + struct arm_smmu_device *smmu = smmu_domain->smmu;
> +
> + /*
> +  * Assign context bank 0 to the GPU device so the GPU hardware can
> +  * switch pagetables
> +  */
> + if (qcom_adreno_smmu_is_gpu_device(dev)) {
> + if (start > 0 || test_bit(0, smmu->context_map))
> + return -ENOSPC;
> +
> + set_bit(0, smmu->context_map);
> + return 0;
> + }
> +
> + return __arm_smmu_alloc_bitmap(smmu->context_map, start, count);

If we end up here before the GPU device shows up this is going to
steal the first context bank, causing the subsequent allocation for the
GPU to always fail.

As such I think it would be appropriate for you to adjust "start" to
never be 0 here. And I think it would be appropriate to write this
function as:

if (gpu) {
start = 0;
count = 1;
} else {
if (start == 0)
start = 1;
}

return __arm_smmu_alloc_bitmap(smmu->context_map, start, count);

Regards,
Bjorn
___
iommu mailing list
iommu@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/iommu