From: Konrad Schwarz <[email protected]>

While we need support for SV39, SV48 AND SV57 in the bootstrapping
pagetables, we can later grade down to SV39.

For hypervisor paging, I chose to use SV39, as it comes with less levels
than the other ones.

For G-Stage (guest) paging, I chose SV39 as well: It is very unlikely
that we need a GP address that is very very high. Independant of the
G-Stage paging, the guest may use any available paging method that it
wants. This means, Linux may enable SV57 paging on top of a SV39
G-stage.

Signed-off-by: Ralf Ramsauer <[email protected]>
---
 hypervisor/arch/riscv/paging.c        | 262 +++++++++++++++++++++++++-
 hypervisor/include/jailhouse/paging.h |   2 +-
 2 files changed, 260 insertions(+), 4 deletions(-)

diff --git a/hypervisor/arch/riscv/paging.c b/hypervisor/arch/riscv/paging.c
index 294ea958..99a842ea 100644
--- a/hypervisor/arch/riscv/paging.c
+++ b/hypervisor/arch/riscv/paging.c
@@ -2,22 +2,278 @@
  * Jailhouse, a Linux-based partitioning hypervisor
  *
  * Copyright (c) Siemens AG, 2020
+ * Copyright (c) OTH Regensburg, 2022
  *
  * Authors:
- *  Jan Kiszka <[email protected]>
+ *  Konrad Schwarz <[email protected]>
+ *  Ralf Ramsauer <[email protected]>
  *
  * This work is licensed under the terms of the GNU GPL, version 2.  See
  * the COPYING file in the top-level directory.
  */
 
+#include <jailhouse/control.h>
 #include <jailhouse/paging.h>
+#include <jailhouse/percpu.h>
+#include <asm/csr64.h>
+
+#define HV_PAGING      riscv_Sv39
+#define HV_ATP_MODE    ATP_MODE_SV39
+#define CELL_PAGING    riscv_Sv39x4
+#define CELL_ATP_MODE  ATP_MODE_SV39
+
+#define        PAGE_BITS       12
+#define        WORD_BITS       3 /* 1 << WORD_BITS == sizeof (void *) */
+#define        MAX_FLAG        10
+#define FLAG_MASK      ((1 << MAX_FLAG) - 1)
+
+#define        PAGE_LEVEL_BITS         (PAGE_BITS - WORD_BITS)
+#define PAGE_LEVEL_MASK(ROOT)  ((1 << (PAGE_LEVEL_BITS + 2 * !!(ROOT))) - 1)
+
+#define        UNTRANSLATED_BITS(LEVEL) \
+                       ((LEVEL) * PAGE_LEVEL_BITS + PAGE_BITS)
+
+#define        PAGE_TERMINAL_FLAGS \
+       (RISCV_PTE_FLAG(R) | RISCV_PTE_FLAG(W) | RISCV_PTE_FLAG(X))
+
+unsigned char hv_atp_mode;
+static unsigned char cell_atp_mode;
+
+static inline unsigned long pte2phys(unsigned long pte)
+{
+       return (pte & ~FLAG_MASK) << (PAGE_BITS - MAX_FLAG);
+}
+
+static inline unsigned long phys2pte(unsigned long phys)
+{
+       return phys >> (PAGE_BITS - MAX_FLAG);
+}
+
+#define DEF_GET_ENTRY(NAME, LEVEL, ROOT)                       \
+static pt_entry_t                                              \
+sv## NAME ##_vpn## LEVEL ##_get_entry(page_table_t pt,         \
+                                       unsigned long virt)     \
+{                                                              \
+       return pt + ((virt >> UNTRANSLATED_BITS(LEVEL)) &       \
+                   PAGE_LEVEL_MASK(ROOT));                     \
+}
+
+DEF_GET_ENTRY(X, 0, false)
+DEF_GET_ENTRY(X, 1, false)
+DEF_GET_ENTRY(X, 2, false)
+DEF_GET_ENTRY(X, 3, false)
+
+static bool svX_entry_valid(pt_entry_t pte, unsigned long flags)
+{
+       /*
+        * We must not check flags, we only need to check for the V-bit. PTEs
+        * are valid, if V is set. If R/W/X is set, then it means that the PTE
+        * is a leaf.
+        */
+       return !!(*pte & RISCV_PTE_FLAG(V));
+}
+
+#define DEF_SET_TERMINAL(NAME, FLAGS)                                  \
+static void sv## NAME ##_vpnX_set_terminal(pt_entry_t pte,             \
+                                          unsigned long phys,          \
+                                          unsigned long flags)         \
+{                                                                      \
+       /*                                                              \
+        * set A and D flas pre-emptively, to avoid page-faults         \
+        * exceptions when the hardware does not set A and D by itself  \
+        */                                                             \
+       *pte = FLAGS | RISCV_PTE_FLAG(D) | RISCV_PTE_FLAG(A) | flags |  \
+               phys2pte(phys);                                         \
+}
+
+DEF_SET_TERMINAL(X, 0)
+
+#define DEF_GET_PHYS(LEVEL)                                    \
+static unsigned long                                           \
+svX_vpn## LEVEL ##_get_phys (pt_entry_t pte, unsigned long virt)\
+{                                                              \
+       unsigned long entry = *pte;                             \
+       if (!(RISCV_PTE_FLAG(V) & entry) ||                     \
+           !(PAGE_TERMINAL_FLAGS & (entry)))                   \
+               return INVALID_PHYS_ADDR;                       \
+       return pte2phys(entry) |                                \
+              (((1UL << UNTRANSLATED_BITS(LEVEL)) - 1) & virt);\
+}
+
+DEF_GET_PHYS(0)
+DEF_GET_PHYS(1)
+DEF_GET_PHYS(2)
+DEF_GET_PHYS(3)
+
+static unsigned long svX_get_flags(pt_entry_t pte)
+{
+       return *pte & FLAG_MASK;
+}
+
+#define DEF_SET_NEXT(NAME, FLAGS)                                      \
+static void                                                            \
+sv## NAME ##_vpnX_set_next_pt(pt_entry_t pte, unsigned long next_pt)   \
+{                                                                      \
+       *pte = FLAGS | RISCV_PTE_FLAG(V) | phys2pte(next_pt);           \
+}
+
+DEF_SET_NEXT(X, RISCV_PTE_FLAG(G))
+
+static unsigned long svX_vpnX_get_next_pt(pt_entry_t pte)
+{
+       return pte2phys(*pte);
+}
+
+static void svX_clear_entry(pt_entry_t pte)
+{
+       *pte = 0;
+}
+
+static inline bool _svX_page_table_empty(page_table_t page_table,
+                                        unsigned long len)
+{
+       unsigned long *page_table_end = page_table + len;
+
+       for (; page_table_end > page_table; ++page_table)
+               if (RISCV_PTE_FLAG (V) & *page_table)
+                       return false;
+       return true;
+}
+
+static bool svX_page_table_empty(page_table_t page_table)
+{
+       return _svX_page_table_empty(page_table, 1 << PAGE_LEVEL_BITS);
+}
+
+#define        RISCV_SVX_PAGING_LEVEL(LEVEL)                           \
+       {                                                       \
+               .page_size = 1UL << UNTRANSLATED_BITS(LEVEL),   \
+               .get_entry = svX_vpn ## LEVEL ## _get_entry,    \
+               .entry_valid = svX_entry_valid,                 \
+               .set_terminal = svX_vpnX_set_terminal,          \
+               .get_phys = svX_vpn## LEVEL ##_get_phys,        \
+               .get_flags = svX_get_flags,                     \
+               .set_next_pt = svX_vpnX_set_next_pt,            \
+               .get_next_pt = svX_vpnX_get_next_pt,            \
+               .clear_entry = svX_clear_entry,                 \
+               .page_table_empty = svX_page_table_empty,       \
+       }
+
+/* sequence is from root to leaves */
+const struct paging riscv_Sv39[] = {
+       RISCV_SVX_PAGING_LEVEL(2),
+       RISCV_SVX_PAGING_LEVEL(1),
+       RISCV_SVX_PAGING_LEVEL(0),
+};
+
+const struct paging riscv_Sv48[] = {
+       RISCV_SVX_PAGING_LEVEL(3),
+       RISCV_SVX_PAGING_LEVEL(2),
+       RISCV_SVX_PAGING_LEVEL(1),
+       RISCV_SVX_PAGING_LEVEL(0),
+};
+
+/* 4K*2 for level 2, in case of SV39, and for level 3, ind case of SV48 */
+DEF_GET_ENTRY(39x4, 2, true)
+DEF_GET_ENTRY(48x4, 3, true)
+
+/* For the rest (non-root tbls), reuse svX routines */
+#define        sv39x4_vpn0_get_entry   svX_vpn0_get_entry
+#define        sv39x4_vpn1_get_entry   svX_vpn1_get_entry
+
+#define        sv48x4_vpn0_get_entry   svX_vpn0_get_entry
+#define        sv48x4_vpn1_get_entry   svX_vpn1_get_entry
+#define        sv48x4_vpn2_get_entry   svX_vpn2_get_entry
+
+#define        sv39x4_vpn0_get_phys    svX_vpn0_get_phys
+#define        sv48x4_vpn0_get_phys    svX_vpn0_get_phys
+
+#define        sv39x4_vpn1_get_phys    svX_vpn1_get_phys
+#define        sv48x4_vpn1_get_phys    svX_vpn1_get_phys
+
+#define        sv39x4_vpn2_get_phys    svX_vpn2_get_phys
+#define        sv48x4_vpn2_get_phys    svX_vpn2_get_phys
+
+#define        sv48x4_vpn3_get_phys    svX_vpn3_get_phys
+
+DEF_SET_TERMINAL(Xx4, RISCV_PTE_FLAG(U))
+
+DEF_SET_NEXT(Xx4, 0)
+
+static bool svXx4_root_page_table_empty(page_table_t page_table)
+{
+       return _svX_page_table_empty(page_table, 2 << (2 + PAGE_LEVEL_BITS));
+}
+
+#define        RISCV_SVXx4_PAGING_LEVEL(WIDTH, LEVEL, ROOT)            \
+       {                                                       \
+               1UL << UNTRANSLATED_BITS(LEVEL),                \
+               sv ## WIDTH ## x4_vpn ## LEVEL ## _get_entry,   \
+               svX_entry_valid,                                \
+               svXx4_vpnX_set_terminal,                        \
+               sv ## WIDTH ## x4_vpn ## LEVEL ## _get_phys,    \
+               svX_get_flags,                                  \
+               svXx4_vpnX_set_next_pt,                         \
+               svX_vpnX_get_next_pt,                           \
+               svX_clear_entry,                                \
+               (ROOT)? svXx4_root_page_table_empty:            \
+                               svX_page_table_empty,           \
+       }
+
+/* sequence is from root to leaves */
+const struct paging riscv_Sv39x4[] = {
+       RISCV_SVXx4_PAGING_LEVEL(39, 2, true),
+       RISCV_SVXx4_PAGING_LEVEL(39, 1, false),
+       RISCV_SVXx4_PAGING_LEVEL(39, 0, false),
+};
+
+const struct paging riscv_Sv48x4[] = {
+       RISCV_SVXx4_PAGING_LEVEL(48, 3, true),
+       RISCV_SVXx4_PAGING_LEVEL(48, 2, false),
+       RISCV_SVXx4_PAGING_LEVEL(48, 1, false),
+       RISCV_SVXx4_PAGING_LEVEL(48, 0, false),
+};
 
 void arch_paging_init(void)
 {
+       /*
+        * Basically, any MMU mode can be used here. Let's choose SV39 for two
+        * reasons:
+        *  - It only requires three levels for 4K pages, whereas SV57 requires
+        *    five levels
+        *  - In Jailhouse, we typically use 1:1 mappings, and any real
+        *    hardware should be mapable with a SV39. No need for SV48 or SV57.
+        *    Guests may still use whatever they want.
+        *
+        *    Same arguments apply for cell paging (G-stage paging).
+        */
+       hv_paging_structs.root_paging = HV_PAGING;
+       hv_atp_mode = HV_ATP_MODE;
+       cell_atp_mode = CELL_ATP_MODE;
 }
 
-// Might be misplaced
-unsigned long arch_paging_gphys2phys(unsigned long gphys, unsigned long flags)
+unsigned long arch_paging_gphys2phys (unsigned long gphys, unsigned long flags)
 {
+       return paging_virt2phys(&this_cell()->arch.mm, gphys, flags);
+}
+
+void riscv_paging_vcpu_init(struct paging_structures *pg_structs)
+{
+       unsigned long table;
+
+       table = paging_hvirt2phys(pg_structs->root_table);
+       enable_mmu_hgatp(ATP_MODE_SV39, table);
+}
+
+int riscv_paging_cell_init(struct cell *cell)
+{
+       cell->arch.mm.hv_paging = 0;
+       cell->arch.mm.root_paging = CELL_PAGING;
+
+       cell->arch.mm.root_table =
+               page_alloc_aligned(&mem_pool, CELL_ROOT_PT_PAGES);
+       if (!cell->arch.mm.root_table)
+               return -ENOMEM;
+
        return 0;
 }
diff --git a/hypervisor/include/jailhouse/paging.h 
b/hypervisor/include/jailhouse/paging.h
index d592abad..8841b09e 100644
--- a/hypervisor/include/jailhouse/paging.h
+++ b/hypervisor/include/jailhouse/paging.h
@@ -96,7 +96,7 @@ typedef pt_entry_t page_table_t;
 struct paging {
        /** Page size of terminal entries in this level or 0 if none are
         * supported. */
-       unsigned int page_size;
+       unsigned long page_size;
 
        /**
         * Get entry in given table corresponding to virt address.
-- 
2.36.1

-- 
You received this message because you are subscribed to the Google Groups 
"Jailhouse" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/jailhouse-dev/20220627132905.4338-21-ralf.ramsauer%40oth-regensburg.de.

Reply via email to