Now, that we abstracted the required functions move them to a new home.
Later, we will generalize these function in order to be useful outside
of hibernation.

Signed-off-by: Pavel Tatashin <pasha.tatas...@soleen.com>
---
 arch/arm64/Kconfig                 |   4 +
 arch/arm64/include/asm/trans_pgd.h |  21 +++
 arch/arm64/kernel/hibernate.c      | 199 +-------------------------
 arch/arm64/mm/Makefile             |   1 +
 arch/arm64/mm/trans_pgd.c          | 219 +++++++++++++++++++++++++++++
 5 files changed, 246 insertions(+), 198 deletions(-)
 create mode 100644 arch/arm64/include/asm/trans_pgd.h
 create mode 100644 arch/arm64/mm/trans_pgd.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0b30e884e088..63e0e1db6b2e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1107,6 +1107,10 @@ config CRASH_DUMP
 
          For more details see Documentation/admin-guide/kdump/kdump.rst
 
+config TRANS_TABLE
+       def_bool y
+       depends on HIBERNATION
+
 config XEN_DOM0
        def_bool y
        depends on XEN
diff --git a/arch/arm64/include/asm/trans_pgd.h 
b/arch/arm64/include/asm/trans_pgd.h
new file mode 100644
index 000000000000..23153c13d1ce
--- /dev/null
+++ b/arch/arm64/include/asm/trans_pgd.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/*
+ * Copyright (c) 2020, Microsoft Corporation.
+ * Pavel Tatashin <pasha.tatas...@soleen.com>
+ */
+
+#ifndef _ASM_TRANS_TABLE_H
+#define _ASM_TRANS_TABLE_H
+
+#include <linux/bits.h>
+#include <linux/types.h>
+#include <asm/pgtable-types.h>
+
+int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
+                         unsigned long end);
+
+int trans_pgd_map_page(pgd_t *trans_pgd, void *page, unsigned long dst_addr,
+                      pgprot_t pgprot);
+
+#endif /* _ASM_TRANS_TABLE_H */
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index 590963c9c609..3d6f0fd73591 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -16,7 +16,6 @@
 #define pr_fmt(x) "hibernate: " x
 #include <linux/cpu.h>
 #include <linux/kvm_host.h>
-#include <linux/mm.h>
 #include <linux/pm.h>
 #include <linux/sched.h>
 #include <linux/suspend.h>
@@ -31,14 +30,12 @@
 #include <asm/kexec.h>
 #include <asm/memory.h>
 #include <asm/mmu_context.h>
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/pgtable-hwdef.h>
 #include <asm/sections.h>
 #include <asm/smp.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/sysreg.h>
+#include <asm/trans_pgd.h>
 #include <asm/virt.h>
 
 /*
@@ -182,45 +179,6 @@ int arch_hibernation_header_restore(void *addr)
 }
 EXPORT_SYMBOL(arch_hibernation_header_restore);
 
-static int trans_pgd_map_page(pgd_t *trans_pgd, void *page,
-                      unsigned long dst_addr,
-                      pgprot_t pgprot)
-{
-       pgd_t *pgdp;
-       pud_t *pudp;
-       pmd_t *pmdp;
-       pte_t *ptep;
-
-       pgdp = pgd_offset_raw(trans_pgd, dst_addr);
-       if (pgd_none(READ_ONCE(*pgdp))) {
-               pudp = (void *)get_safe_page(GFP_ATOMIC);
-               if (!pudp)
-                       return -ENOMEM;
-               pgd_populate(&init_mm, pgdp, pudp);
-       }
-
-       pudp = pud_offset(pgdp, dst_addr);
-       if (pud_none(READ_ONCE(*pudp))) {
-               pmdp = (void *)get_safe_page(GFP_ATOMIC);
-               if (!pmdp)
-                       return -ENOMEM;
-               pud_populate(&init_mm, pudp, pmdp);
-       }
-
-       pmdp = pmd_offset(pudp, dst_addr);
-       if (pmd_none(READ_ONCE(*pmdp))) {
-               ptep = (void *)get_safe_page(GFP_ATOMIC);
-               if (!ptep)
-                       return -ENOMEM;
-               pmd_populate_kernel(&init_mm, pmdp, ptep);
-       }
-
-       ptep = pte_offset_kernel(pmdp, dst_addr);
-       set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));
-
-       return 0;
-}
-
 /*
  * Copies length bytes, starting at src_start into an new page,
  * perform cache maintenance, then maps it at the specified address low
@@ -339,161 +297,6 @@ int swsusp_arch_suspend(void)
        return ret;
 }
 
-static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
-{
-       pte_t pte = READ_ONCE(*src_ptep);
-
-       if (pte_valid(pte)) {
-               /*
-                * Resume will overwrite areas that may be marked
-                * read only (code, rodata). Clear the RDONLY bit from
-                * the temporary mappings we use during restore.
-                */
-               set_pte(dst_ptep, pte_mkwrite(pte));
-       } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
-               /*
-                * debug_pagealloc will removed the PTE_VALID bit if
-                * the page isn't in use by the resume kernel. It may have
-                * been in use by the original kernel, in which case we need
-                * to put it back in our copy to do the restore.
-                *
-                * Before marking this entry valid, check the pfn should
-                * be mapped.
-                */
-               BUG_ON(!pfn_valid(pte_pfn(pte)));
-
-               set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
-       }
-}
-
-static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
-                   unsigned long end)
-{
-       pte_t *src_ptep;
-       pte_t *dst_ptep;
-       unsigned long addr = start;
-
-       dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
-       if (!dst_ptep)
-               return -ENOMEM;
-       pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
-       dst_ptep = pte_offset_kernel(dst_pmdp, start);
-
-       src_ptep = pte_offset_kernel(src_pmdp, start);
-       do {
-               _copy_pte(dst_ptep, src_ptep, addr);
-       } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
-
-       return 0;
-}
-
-static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
-                   unsigned long end)
-{
-       pmd_t *src_pmdp;
-       pmd_t *dst_pmdp;
-       unsigned long next;
-       unsigned long addr = start;
-
-       if (pud_none(READ_ONCE(*dst_pudp))) {
-               dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
-               if (!dst_pmdp)
-                       return -ENOMEM;
-               pud_populate(&init_mm, dst_pudp, dst_pmdp);
-       }
-       dst_pmdp = pmd_offset(dst_pudp, start);
-
-       src_pmdp = pmd_offset(src_pudp, start);
-       do {
-               pmd_t pmd = READ_ONCE(*src_pmdp);
-
-               next = pmd_addr_end(addr, end);
-               if (pmd_none(pmd))
-                       continue;
-               if (pmd_table(pmd)) {
-                       if (copy_pte(dst_pmdp, src_pmdp, addr, next))
-                               return -ENOMEM;
-               } else {
-                       set_pmd(dst_pmdp,
-                               __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
-               }
-       } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
-
-       return 0;
-}
-
-static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
-                   unsigned long end)
-{
-       pud_t *dst_pudp;
-       pud_t *src_pudp;
-       unsigned long next;
-       unsigned long addr = start;
-
-       if (pgd_none(READ_ONCE(*dst_pgdp))) {
-               dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
-               if (!dst_pudp)
-                       return -ENOMEM;
-               pgd_populate(&init_mm, dst_pgdp, dst_pudp);
-       }
-       dst_pudp = pud_offset(dst_pgdp, start);
-
-       src_pudp = pud_offset(src_pgdp, start);
-       do {
-               pud_t pud = READ_ONCE(*src_pudp);
-
-               next = pud_addr_end(addr, end);
-               if (pud_none(pud))
-                       continue;
-               if (pud_table(pud)) {
-                       if (copy_pmd(dst_pudp, src_pudp, addr, next))
-                               return -ENOMEM;
-               } else {
-                       set_pud(dst_pudp,
-                               __pud(pud_val(pud) & ~PUD_SECT_RDONLY));
-               }
-       } while (dst_pudp++, src_pudp++, addr = next, addr != end);
-
-       return 0;
-}
-
-static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
-                           unsigned long end)
-{
-       unsigned long next;
-       unsigned long addr = start;
-       pgd_t *src_pgdp = pgd_offset_k(start);
-
-       dst_pgdp = pgd_offset_raw(dst_pgdp, start);
-       do {
-               next = pgd_addr_end(addr, end);
-               if (pgd_none(READ_ONCE(*src_pgdp)))
-                       continue;
-               if (copy_pud(dst_pgdp, src_pgdp, addr, next))
-                       return -ENOMEM;
-       } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
-
-       return 0;
-}
-
-static int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
-                         unsigned long end)
-{
-       int rc;
-       pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
-
-       if (!trans_pgd) {
-               pr_err("Failed to allocate memory for temporary page 
tables.\n");
-               return -ENOMEM;
-       }
-
-       rc = copy_page_tables(trans_pgd, start, end);
-       if (!rc)
-               *dst_pgdp = trans_pgd;
-
-       return rc;
-}
-
 /*
  * Setup then Resume from the hibernate image using swsusp_arch_suspend_exit().
  *
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index d91030f0ffee..bdad6ff0d72c 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -6,6 +6,7 @@ obj-y                           := dma-mapping.o extable.o 
fault.o init.o \
 obj-$(CONFIG_HUGETLB_PAGE)     += hugetlbpage.o
 obj-$(CONFIG_PTDUMP_CORE)      += dump.o
 obj-$(CONFIG_PTDUMP_DEBUGFS)   += ptdump_debugfs.o
+obj-$(CONFIG_TRANS_TABLE)      += trans_pgd.o
 obj-$(CONFIG_NUMA)             += numa.o
 obj-$(CONFIG_DEBUG_VIRTUAL)    += physaddr.o
 KASAN_SANITIZE_physaddr.o      += n
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
new file mode 100644
index 000000000000..d20e48520cef
--- /dev/null
+++ b/arch/arm64/mm/trans_pgd.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Transitional page tables for kexec and hibernate
+ *
+ * This file derived from: arch/arm64/kernel/hibernate.c
+ *
+ * Copyright (c) 2020, Microsoft Corporation.
+ * Pavel Tatashin <pasha.tatas...@soleen.com>
+ *
+ */
+
+/*
+ * Transitional tables are used during system transferring from one world to
+ * another: such as during hibernate restore, and kexec reboots. During these
+ * phases one cannot rely on page table not being overwritten. This is because
+ * hibernate and kexec can overwrite the current page tables during transition.
+ */
+
+#include <asm/trans_pgd.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <linux/suspend.h>
+#include <linux/bug.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+
+static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
+{
+       pte_t pte = READ_ONCE(*src_ptep);
+
+       if (pte_valid(pte)) {
+               /*
+                * Resume will overwrite areas that may be marked
+                * read only (code, rodata). Clear the RDONLY bit from
+                * the temporary mappings we use during restore.
+                */
+               set_pte(dst_ptep, pte_mkwrite(pte));
+       } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
+               /*
+                * debug_pagealloc will removed the PTE_VALID bit if
+                * the page isn't in use by the resume kernel. It may have
+                * been in use by the original kernel, in which case we need
+                * to put it back in our copy to do the restore.
+                *
+                * Before marking this entry valid, check the pfn should
+                * be mapped.
+                */
+               BUG_ON(!pfn_valid(pte_pfn(pte)));
+
+               set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte)));
+       }
+}
+
+static int copy_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long start,
+                   unsigned long end)
+{
+       pte_t *src_ptep;
+       pte_t *dst_ptep;
+       unsigned long addr = start;
+
+       dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC);
+       if (!dst_ptep)
+               return -ENOMEM;
+       pmd_populate_kernel(&init_mm, dst_pmdp, dst_ptep);
+       dst_ptep = pte_offset_kernel(dst_pmdp, start);
+
+       src_ptep = pte_offset_kernel(src_pmdp, start);
+       do {
+               _copy_pte(dst_ptep, src_ptep, addr);
+       } while (dst_ptep++, src_ptep++, addr += PAGE_SIZE, addr != end);
+
+       return 0;
+}
+
+static int copy_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long start,
+                   unsigned long end)
+{
+       pmd_t *src_pmdp;
+       pmd_t *dst_pmdp;
+       unsigned long next;
+       unsigned long addr = start;
+
+       if (pud_none(READ_ONCE(*dst_pudp))) {
+               dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC);
+               if (!dst_pmdp)
+                       return -ENOMEM;
+               pud_populate(&init_mm, dst_pudp, dst_pmdp);
+       }
+       dst_pmdp = pmd_offset(dst_pudp, start);
+
+       src_pmdp = pmd_offset(src_pudp, start);
+       do {
+               pmd_t pmd = READ_ONCE(*src_pmdp);
+
+               next = pmd_addr_end(addr, end);
+               if (pmd_none(pmd))
+                       continue;
+               if (pmd_table(pmd)) {
+                       if (copy_pte(dst_pmdp, src_pmdp, addr, next))
+                               return -ENOMEM;
+               } else {
+                       set_pmd(dst_pmdp,
+                               __pmd(pmd_val(pmd) & ~PMD_SECT_RDONLY));
+               }
+       } while (dst_pmdp++, src_pmdp++, addr = next, addr != end);
+
+       return 0;
+}
+
+static int copy_pud(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long start,
+                   unsigned long end)
+{
+       pud_t *dst_pudp;
+       pud_t *src_pudp;
+       unsigned long next;
+       unsigned long addr = start;
+
+       if (pgd_none(READ_ONCE(*dst_pgdp))) {
+               dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC);
+               if (!dst_pudp)
+                       return -ENOMEM;
+               pgd_populate(&init_mm, dst_pgdp, dst_pudp);
+       }
+       dst_pudp = pud_offset(dst_pgdp, start);
+
+       src_pudp = pud_offset(src_pgdp, start);
+       do {
+               pud_t pud = READ_ONCE(*src_pudp);
+
+               next = pud_addr_end(addr, end);
+               if (pud_none(pud))
+                       continue;
+               if (pud_table(pud)) {
+                       if (copy_pmd(dst_pudp, src_pudp, addr, next))
+                               return -ENOMEM;
+               } else {
+                       set_pud(dst_pudp,
+                               __pud(pud_val(pud) & ~PUD_SECT_RDONLY));
+               }
+       } while (dst_pudp++, src_pudp++, addr = next, addr != end);
+
+       return 0;
+}
+
+static int copy_page_tables(pgd_t *dst_pgdp, unsigned long start,
+                           unsigned long end)
+{
+       unsigned long next;
+       unsigned long addr = start;
+       pgd_t *src_pgdp = pgd_offset_k(start);
+
+       dst_pgdp = pgd_offset_raw(dst_pgdp, start);
+       do {
+               next = pgd_addr_end(addr, end);
+               if (pgd_none(READ_ONCE(*src_pgdp)))
+                       continue;
+               if (copy_pud(dst_pgdp, src_pgdp, addr, next))
+                       return -ENOMEM;
+       } while (dst_pgdp++, src_pgdp++, addr = next, addr != end);
+
+       return 0;
+}
+
+int trans_pgd_create_copy(pgd_t **dst_pgdp, unsigned long start,
+                         unsigned long end)
+{
+       int rc;
+       pgd_t *trans_pgd = (pgd_t *)get_safe_page(GFP_ATOMIC);
+
+       if (!trans_pgd) {
+               pr_err("Failed to allocate memory for temporary page 
tables.\n");
+               return -ENOMEM;
+       }
+
+       rc = copy_page_tables(trans_pgd, start, end);
+       if (!rc)
+               *dst_pgdp = trans_pgd;
+
+       return rc;
+}
+
+int trans_pgd_map_page(pgd_t *trans_pgd, void *page, unsigned long dst_addr,
+                      pgprot_t pgprot)
+{
+       pgd_t *pgdp;
+       pud_t *pudp;
+       pmd_t *pmdp;
+       pte_t *ptep;
+
+       pgdp = pgd_offset_raw(trans_pgd, dst_addr);
+       if (pgd_none(READ_ONCE(*pgdp))) {
+               pudp = (void *)get_safe_page(GFP_ATOMIC);
+               if (!pudp)
+                       return -ENOMEM;
+               pgd_populate(&init_mm, pgdp, pudp);
+       }
+
+       pudp = pud_offset(pgdp, dst_addr);
+       if (pud_none(READ_ONCE(*pudp))) {
+               pmdp = (void *)get_safe_page(GFP_ATOMIC);
+               if (!pmdp)
+                       return -ENOMEM;
+               pud_populate(&init_mm, pudp, pmdp);
+       }
+
+       pmdp = pmd_offset(pudp, dst_addr);
+       if (pmd_none(READ_ONCE(*pmdp))) {
+               ptep = (void *)get_safe_page(GFP_ATOMIC);
+               if (!ptep)
+                       return -ENOMEM;
+               pmd_populate_kernel(&init_mm, pmdp, ptep);
+       }
+
+       ptep = pte_offset_kernel(pmdp, dst_addr);
+       set_pte(ptep, pfn_pte(virt_to_pfn(page), PAGE_KERNEL_EXEC));
+
+       return 0;
+}
-- 
2.17.1


_______________________________________________
kexec mailing list
kexec@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/kexec

Reply via email to