Le 07/06/2019 à 08:19, Nicholas Piggin a écrit :
powerpc/64s does not use ioremap_page_range, so it does not get huge
vmap iomap mappings automatically. The radix kernel mapping function
already allows larger page mappings that work with huge vmap, so wire
that up to allow huge pages to be used for ioremap mappings.

Argh ... I was on the way to merge pgtable_64.c and pgtable_32.c. This will complicate the task ... Anyway this looks a good improvment.

Any reason to limit that to Radix ?


Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
  arch/powerpc/include/asm/book3s/64/pgtable.h |  8 +++
  arch/powerpc/mm/pgtable_64.c                 | 58 ++++++++++++++++++--
  include/linux/io.h                           |  1 +
  lib/ioremap.c                                |  2 +-
  4 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index ccf00a8b98c6..d7a4f2d80598 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -274,6 +274,14 @@ extern unsigned long __vmalloc_end;
  #define VMALLOC_START __vmalloc_start
  #define VMALLOC_END   __vmalloc_end
+static inline unsigned int ioremap_max_order(void)
+{
+       if (radix_enabled())
+               return PUD_SHIFT;
+       return 7 + PAGE_SHIFT; /* default from linux/vmalloc.h */
+}
+#define IOREMAP_MAX_ORDER ({ ioremap_max_order();})

Following form doesn't work ?

#define IOREMAP_MAX_ORDER       ioremap_max_order()

+
  extern unsigned long __kernel_virt_start;
  extern unsigned long __kernel_virt_size;
  extern unsigned long __kernel_io_start;
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d2d976ff8a0e..cf02b67eee55 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -112,7 +112,7 @@ unsigned long ioremap_bot = IOREMAP_BASE;
   * __ioremap_at - Low level function to establish the page tables
   *                for an IO mapping
   */
-void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, 
pgprot_t prot)
+static void __iomem * hash__ioremap_at(phys_addr_t pa, void *ea, unsigned long 
size, pgprot_t prot)

Is this the correct name ?

As far as I understand, this function will be used by nohash/64, looks strange to call hash__something() a function used by nohash platforms.

  {
        unsigned long i;
@@ -120,6 +120,54 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
        if (pgprot_val(prot) & H_PAGE_4K_PFN)
                return NULL;
+ for (i = 0; i < size; i += PAGE_SIZE)
+               if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
+                       return NULL;
+
+       return (void __iomem *)ea;
+}
+
+static int radix__ioremap_page_range(unsigned long addr, unsigned long end,
+                      phys_addr_t phys_addr, pgprot_t prot)
+{
+       while (addr != end) {
+               if (unlikely(ioremap_huge_disabled))
+                       goto use_small_page;

I don't like too much a goto in the middle of an if/else set inside a loop.

Couldn't we have two while() loops, one for the !ioremap_huge_disabled() and one for the ioremap_huge_disabled() case ? It would duplicate some code but that's only 3 small lines.

Or, when ioremap_huge_disabled(), couldn't it just fallback to the hash__ioremap_at() function ?

+
+               if (!(addr & ~PUD_MASK) && !(phys_addr & ~PUD_MASK) &&
+                               end - addr >= PUD_SIZE) {
+                       if (radix__map_kernel_page(addr, phys_addr, prot, 
PUD_SIZE))
+                               return -ENOMEM;
+                       addr += PUD_SIZE;
+                       phys_addr += PUD_SIZE;
+
+               } else if (!(addr & ~PMD_MASK) && !(phys_addr & ~PMD_MASK) &&
+                               end - addr >= PMD_SIZE) {
+                       if (radix__map_kernel_page(addr, phys_addr, prot, 
PMD_SIZE))
+                               return -ENOMEM;
+                       addr += PMD_SIZE;
+                       phys_addr += PMD_SIZE;
+
+               } else {
+use_small_page:
+                       if (radix__map_kernel_page(addr, phys_addr, prot, 
PAGE_SIZE))
+                               return -ENOMEM;
+                       addr += PAGE_SIZE;
+                       phys_addr += PAGE_SIZE;
+               }
+       }
+       return 0;
+}
+
+static void __iomem * radix__ioremap_at(phys_addr_t pa, void *ea, unsigned 
long size, pgprot_t prot)
+{
+       if (radix__ioremap_page_range((unsigned long)ea, (unsigned long)ea + 
size, pa, prot))
+               return NULL;
+       return ea;
+}
+
+void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, 
pgprot_t prot)
+{
        if ((ea + size) >= (void *)IOREMAP_END) {
                pr_warn("Outside the supported range\n");
                return NULL;
@@ -129,11 +177,9 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, 
unsigned long size, pgprot_
        WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
        WARN_ON(size & ~PAGE_MASK);
- for (i = 0; i < size; i += PAGE_SIZE)
-               if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
-                       return NULL;
-
-       return (void __iomem *)ea;
+       if (radix_enabled())

What about  if (radix_enabled() && !ioremap_huge_disabled())  instead ?

+               return radix__ioremap_at(pa, ea, size, prot);
+       return hash__ioremap_at(pa, ea, size, prot);

Can't we just leave the no radix stuff here instead of making that hash__ioremap_at() function ?

Christophe


  }
/**
diff --git a/include/linux/io.h b/include/linux/io.h
index 32e30e8fb9db..423c4294aaa3 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -44,6 +44,7 @@ static inline int ioremap_page_range(unsigned long addr, 
unsigned long end,
  #endif
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+extern int ioremap_huge_disabled;
  void __init ioremap_huge_init(void);
  int arch_ioremap_pud_supported(void);
  int arch_ioremap_pmd_supported(void);
diff --git a/lib/ioremap.c b/lib/ioremap.c
index 063213685563..386ff956755f 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -18,7 +18,7 @@
  static int __read_mostly ioremap_p4d_capable;
  static int __read_mostly ioremap_pud_capable;
  static int __read_mostly ioremap_pmd_capable;
-static int __read_mostly ioremap_huge_disabled;
+int __read_mostly ioremap_huge_disabled;
static int __init set_nohugeiomap(char *str)
  {

Reply via email to