We designed the Book3S port of KVM as modular as possible. Most
of the code could be easily used on a Book3S_32 host as well.

The main difference between 32 and 64 bit cores is the MMU. To keep
things well separated, we treat the book3s_64 MMU as one possible compile
option.

This patch adds all the MMU helpers the rest of the code needs in
order to modify the host's MMU, like setting PTEs and segments.

Signed-off-by: Alexander Graf <ag...@suse.de>
---
 arch/powerpc/kvm/book3s_64_mmu_host.c |  439 +++++++++++++++++++++++++++++++++
 1 files changed, 439 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_64_mmu_host.c

diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c 
b/arch/powerpc/kvm/book3s_64_mmu_host.c
new file mode 100644
index 0000000..6d9f3a3
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright (C) 2009 SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *     Alexander Graf <ag...@suse.de>
+ *     Kevin Wolf <m...@kevin-wolf.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_book3s.h>
+#include <asm/mmu-hash64.h>
+#include <asm/machdep.h>
+#include <asm/mmu_context.h>
+#include <asm/hw_irq.h>
+
+#define PTE_SIZE 12
+#define VSID_ALL 0
+
+// #define DEBUG_MMU
+// #define DEBUG_SLB
+
+void kvmppc_mmu_pte_flush(struct kvm_vcpu *vcpu, u64 guest_ea, u64 ea_mask)
+{
+       int i;
+
+#ifdef DEBUG_MMU
+       printk(KERN_INFO "KVM: Flushing %d Shadow PTEs: 0x%llx & 0x%llx\n",
+               vcpu->arch.hpte_cache_offset, guest_ea, ea_mask);
+#endif
+       BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
+       guest_ea &= ea_mask;
+       for (i=0; i<vcpu->arch.hpte_cache_offset; i++) {
+               struct hpte_cache *pte;
+
+               pte = &vcpu->arch.hpte_cache[i];
+               if (!pte->host_va)
+                       continue;
+
+               if ((pte->pte.eaddr & ea_mask) == guest_ea) {
+#ifdef DEBUG_MMU
+       printk(KERN_INFO "KVM: Flushing SPT %d: 0x%llx (0x%llx) -> 0x%llx\n", 
i, pte->pte.eaddr, pte->pte.vpage, pte->host_va);
+#endif
+                       ppc_md.hpte_invalidate(pte->slot, pte->host_va,
+                                              MMU_PAGE_4K, MMU_SEGSIZE_256M,
+                                              false);
+                       pte->host_va = 0;
+                       kvm_release_pfn_dirty(pte->pfn);
+               }
+       }
+
+       /* Doing a complete flush -> start from scratch */
+       if (!ea_mask)
+               vcpu->arch.hpte_cache_offset = 0;
+}
+
+void kvmppc_mmu_pte_vflush(struct kvm_vcpu *vcpu, u64 guest_vp, u64 vp_mask)
+{
+       int i;
+
+#ifdef DEBUG_MMU
+       printk(KERN_INFO "KVM: Flushing %d Shadow vPTEs: 0x%llx & 0x%llx\n",
+               vcpu->arch.hpte_cache_offset, guest_vp, vp_mask);
+#endif
+       BUG_ON(vcpu->arch.hpte_cache_offset > HPTEG_CACHE_NUM);
+       guest_vp &= vp_mask;
+       for (i=0; i<vcpu->arch.hpte_cache_offset; i++) {
+               struct hpte_cache *pte;
+
+               pte = &vcpu->arch.hpte_cache[i];
+               if (!pte->host_va)
+                       continue;
+
+               if ((pte->pte.vpage & vp_mask) == guest_vp) {
+#ifdef DEBUG_MMU
+       printk(KERN_INFO "KVM: Flushing SPT %d: 0x%llx (0x%llx) -> 0x%llx\n", 
i, pte->pte.eaddr, pte->pte.vpage, pte->host_va);
+#endif
+                       ppc_md.hpte_invalidate(pte->slot, pte->host_va,
+                                              MMU_PAGE_4K, MMU_SEGSIZE_256M,
+                                              false);
+                       pte->host_va = 0;
+                       kvm_release_pfn_dirty(pte->pfn);
+               }
+       }
+}
+
+struct kvmppc_pte *kvmppc_mmu_find_pte(struct kvm_vcpu *vcpu, u64 ea, bool 
data)
+{
+       int i;
+       u64 guest_vp;
+
+       guest_vp = vcpu->arch.mmu.ea_to_vp(vcpu, ea, false);
+       for (i=0; i<vcpu->arch.hpte_cache_offset; i++) {
+               struct hpte_cache *pte;
+
+               pte = &vcpu->arch.hpte_cache[i];
+               if (!pte->host_va)
+                       continue;
+
+               if (pte->pte.vpage == guest_vp)
+                       return &pte->pte;
+       }
+
+       return NULL;
+}
+
+static int kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
+{
+       if (vcpu->arch.hpte_cache_offset == HPTEG_CACHE_NUM)
+               kvmppc_mmu_pte_flush(vcpu, 0, 0);
+
+       return vcpu->arch.hpte_cache_offset++;
+}
+
+/* We keep 512 gvsid->hvsid entries, mapping the guest ones to the array using
+ * a hash, so we don't waste cycles on looping */
+static u16 kvmppc_sid_hash(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+       return (u16)(((gvsid >> (SID_MAP_BITS * 7)) & SID_MAP_MASK) ^
+                    ((gvsid >> (SID_MAP_BITS * 6)) & SID_MAP_MASK) ^
+                    ((gvsid >> (SID_MAP_BITS * 5)) & SID_MAP_MASK) ^
+                    ((gvsid >> (SID_MAP_BITS * 4)) & SID_MAP_MASK) ^
+                    ((gvsid >> (SID_MAP_BITS * 3)) & SID_MAP_MASK) ^
+                    ((gvsid >> (SID_MAP_BITS * 2)) & SID_MAP_MASK) ^
+                    ((gvsid >> (SID_MAP_BITS * 1)) & SID_MAP_MASK) ^
+                    ((gvsid >> (SID_MAP_BITS * 0)) & SID_MAP_MASK));
+}
+
+
+static struct kvmppc_sid_map *find_sid_vsid(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+       struct kvmppc_sid_map *map;
+
+       map = &to_book3s(vcpu)->sid_map[kvmppc_sid_hash(vcpu, gvsid)];
+       if (map->guest_vsid == gvsid) {
+#ifdef DEBUG_SLB
+               printk(KERN_INFO "SLB: Searching 0x%llx -> 0x%llx\n", gvsid, 
map->host_vsid);
+#endif
+               return map;
+       }
+
+       map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - kvmppc_sid_hash(vcpu, 
gvsid)];
+       if (map->guest_vsid == gvsid) {
+#ifdef DEBUG_SLB
+               printk(KERN_INFO "SLB: Searching 0x%llx -> 0x%llx\n", gvsid, 
map->host_vsid);
+#endif
+               return map;
+       }
+
+#ifdef DEBUG_SLB
+       printk(KERN_INFO "SLB: Searching 0x%llx -> not found\n", gvsid);
+#endif
+       return NULL;
+}
+
+int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte)
+{
+       pfn_t hpaddr;
+       ulong vsid, hash, hpteg, va;
+       int ret;
+       int rflags = 0x192;
+       int vflags = 0;
+       int attempt = 0;
+       struct kvmppc_sid_map *map;
+
+       kvmppc_switch_context(vcpu, CONTEXT_HOST);
+
+       /* Get host physical address for gpa */
+       down_read(&current->mm->mmap_sem);
+       hpaddr = gfn_to_pfn(vcpu->kvm, orig_pte->raddr >> PAGE_SHIFT);
+       if (kvm_is_error_hva(hpaddr)) {
+               printk(KERN_INFO "Couldn't get guest page for gfn %llx!\n", 
orig_pte->eaddr);
+               up_read(&current->mm->mmap_sem);
+               return -EINVAL;
+       }
+       hpaddr <<= PAGE_SHIFT;
+#if PAGE_SHIFT == 12
+#elif PAGE_SHIFT == 16
+       hpaddr |= orig_pte->raddr & 0xf000;
+#else
+#error Unknown page size
+#endif
+
+       up_read(&current->mm->mmap_sem);
+
+       /* and write the mapping ea -> hpa into the pt */
+       map = find_sid_vsid(vcpu, vcpu->arch.mmu.esid_to_vsid(vcpu,
+                           orig_pte->eaddr >> SID_SHIFT));
+       if (!map) {
+               kvmppc_mmu_map_segment(vcpu, orig_pte->eaddr);
+               map = find_sid_vsid(vcpu, vcpu->arch.mmu.esid_to_vsid(vcpu,
+                                   orig_pte->eaddr >> SID_SHIFT));
+       }
+       BUG_ON(!map);
+
+       vsid = map->host_vsid;
+       va = hpt_va(orig_pte->eaddr, vsid, MMU_SEGSIZE_256M);
+
+       if (!orig_pte->may_write)
+               rflags |= HPTE_R_PP;
+       if (!orig_pte->may_execute)
+               rflags |= HPTE_R_N;
+
+       hash = hpt_hash(va, PTE_SIZE, MMU_SEGSIZE_256M);
+
+map_again:
+       hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
+
+       /* In case we tried normal mapping already, let's nuke old entries */
+       if (attempt > 1)
+               if (ppc_md.hpte_remove(hpteg) < 0)
+                       return -1;
+
+       ret = ppc_md.hpte_insert(hpteg, va, hpaddr, rflags, vflags, 
MMU_PAGE_4K, MMU_SEGSIZE_256M);
+
+       if (ret < 0) {
+               /* If we couldn't map a primary PTE, try a secondary */
+#ifdef USE_SECONDARY
+               hash = ~hash;
+               attempt++;
+               if (attempt % 2)
+                       vflags = HPTE_V_SECONDARY;
+               else
+                       vflags = 0;
+#else
+               attempt = 2;
+#endif
+               goto map_again;
+       } else {
+               int hpte_id = kvmppc_mmu_hpte_cache_next(vcpu);
+               struct hpte_cache *pte = &vcpu->arch.hpte_cache[hpte_id];
+#ifdef DEBUG_MMU
+               printk(KERN_INFO "KVM: Map 0x%llx: [%lx] 0x%lx (0x%llx) -> 
%lx\n",
+                                orig_pte->eaddr, hpteg, va, orig_pte->vpage,
+                                hpaddr);
+#endif
+               pte->slot = hpteg + (ret & 7);
+               pte->host_va = va;
+               pte->pte = *orig_pte;
+               pte->pfn = hpaddr >> PAGE_SHIFT;
+       }
+
+       return 0;
+}
+
+static struct kvmppc_sid_map *create_sid_map(struct kvm_vcpu *vcpu, u64 gvsid)
+{
+       struct kvmppc_sid_map *map;
+       struct kvmppc_vcpu_book3s *vcpu_book3s = to_book3s(vcpu);
+       static int backwards_map = 0;
+
+       /* We might get collisions that trap in preceding order, so let's
+          map them differently */
+       if (backwards_map)
+               map = &to_book3s(vcpu)->sid_map[SID_MAP_MASK - 
kvmppc_sid_hash(vcpu, gvsid)];
+       else
+               map = &to_book3s(vcpu)->sid_map[kvmppc_sid_hash(vcpu, gvsid)];
+       backwards_map = !backwards_map;
+
+       // Uh-oh ... out of mappings. Let's flush!
+       if (vcpu_book3s->vsid_next == vcpu_book3s->vsid_max) {
+               vcpu_book3s->vsid_next = vcpu_book3s->vsid_first;
+               memset(vcpu_book3s->sid_map, 0,
+                      sizeof(struct kvmppc_sid_map) * SID_MAP_NUM);
+               kvmppc_mmu_pte_flush(vcpu, 0, 0);
+               kvmppc_reload_context(vcpu);
+       }
+       map->host_vsid = vcpu_book3s->vsid_next++;
+
+       map->guest_vsid = gvsid;
+       map->valid = true;
+
+       return map;
+}
+
+// If we're lazy, we don't create a new entry if none existed
+static int _kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr, bool 
lazy)
+{
+       u64 esid = eaddr >> SID_SHIFT;
+       u64 slb_esid = (eaddr & ESID_MASK) | SLB_ESID_V | get_paca()->stab_rr;
+       u64 slb_vsid = SLB_VSID_USER;
+       u64 gvsid;
+       struct kvmppc_sid_map *map;
+
+       gvsid = vcpu->arch.mmu.esid_to_vsid(vcpu, esid);
+       map = find_sid_vsid(vcpu, gvsid);
+       if (!map) {
+               if (lazy)
+                       return -ENOENT;
+               map = create_sid_map(vcpu, gvsid);
+       }
+
+       map->guest_esid = esid;
+
+       if (get_paca()->slb_cache_ptr < SLB_CACHE_ENTRIES)
+               get_paca()->slb_cache[get_paca()->slb_cache_ptr++] = esid;
+       else
+               get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+
+       if (++get_paca()->stab_rr >= mmu_slb_size)
+               get_paca()->stab_rr = SLB_NUM_BOLTED;
+
+       // Let's check for big segments
+       switch(eaddr & 0xf000000000000000ULL) {
+       case 0xc000000000000000ULL:
+       case 0xd000000000000000ULL:
+       case 0xe000000000000000ULL:
+       case 0xf000000000000000ULL:
+               /*
+                * We need to shove these to 0x4/0x5/0x6/0x7 so our entry/exit
+                * code can shove them back
+                */
+               slb_esid &= ~0x8000000000000000ULL;
+               esid &= ~0x8000000000000000ULL >> SID_SHIFT;
+               /* Tell Linux we want to flush all SLB entries on context 
switch */
+               get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+               break;
+       case 0:
+               // We're good on all low segments
+               break;
+       default:
+               // OMG! The guest wanted to access an unknown high segment.
+               if (lazy)
+                       return -ENOENT;
+
+               printk(KERN_ERR "KVM: Guest accessed high segment: 0x%llx at 
0x%lx\n",
+                               esid, vcpu->arch.pc);
+               kvmppc_book3s_queue_irqprio(vcpu, 
BOOK3S_INTERRUPT_MACHINE_CHECK);
+               get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+               return -ENOENT;
+       }
+
+       slb_vsid |= (map->host_vsid << 12);
+
+#ifdef DEBUG_SLB
+       printk(KERN_INFO "slbmte %#llx, %#llx\n", slb_vsid, slb_esid);
+#endif
+
+       // If we had a mapping for that ESID, let's remove it
+       asm volatile("slbie %0" : : "r" ((esid << SID_SHIFT) | SLBIE_C));
+
+       // And put in the proper new one!
+       asm volatile("slbmte  %0,%1" :
+                    : "r" (slb_vsid),
+                      "r" (slb_esid)
+                    : "memory" );
+
+       return 0;
+}
+
+int kvmppc_mmu_map_segment(struct kvm_vcpu *vcpu, ulong eaddr)
+{
+       kvmppc_switch_context(vcpu, CONTEXT_GUEST);
+       return _kvmppc_mmu_map_segment(vcpu, eaddr, false);
+}
+
+static void kvmppc_mmu_prepopulate_slb(struct kvm_vcpu *vcpu)
+{
+       // Prepopulate the PC and last data access faulting segments
+
+       _kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc, true);
+       _kvmppc_mmu_map_segment(vcpu, vcpu->arch.dear, true);
+}
+
+void kvmppc_reload_context(struct kvm_vcpu *vcpu)
+{
+       if (to_book3s(vcpu)->context == CONTEXT_GUEST) {
+               kvmppc_switch_context(vcpu, CONTEXT_HOST);
+               kvmppc_switch_context(vcpu, CONTEXT_GUEST);
+       }
+}
+
+void kvmppc_switch_context(struct kvm_vcpu *vcpu, int context)
+{
+       struct mm_struct *mm = NULL;
+       unsigned long pc = KSTK_EIP(current);
+       unsigned long flags;
+
+       /* CONTEXT_GUEST_END means we don't care which context we're in. */
+       if (context == CONTEXT_GUEST_END) {
+#if 0
+               /* XXX remember that we don't need to switch */
+               if (to_book3s(vcpu)->context == CONTEXT_GUEST)
+                       to_book3s(vcpu)->context = context;
+#endif
+               return;
+       }
+
+       if (to_book3s(vcpu)->context == context)
+               return;
+
+       switch(context) {
+       case CONTEXT_GUEST:
+               mm = vcpu->arch.mm_guest;
+               break;
+       case CONTEXT_HOST:
+               mm = vcpu->arch.mm_host;
+               break;
+       default:
+               BUG();
+       }
+
+       local_irq_save(flags);
+
+       /* switch_slb tries to be clever and maps the task's pc,
+          but we prepopulate all SLB entries ourselves anyways. */
+       get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+       if ((context == CONTEXT_GUEST) && pc) {
+               current->thread.regs->nip = PAGE_OFFSET;
+               switch_slb(current, mm);
+               current->thread.regs->nip = pc;
+               kvmppc_mmu_prepopulate_slb(vcpu);
+       } else {
+               switch_slb(current, mm);
+       }
+
+       to_book3s(vcpu)->context = context;
+
+       local_irq_restore(flags);
+}
+
+void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu)
+{
+       kvmppc_mmu_pte_flush(vcpu, 0, 0);
+}
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to