Avi, Marcelo,

below is a patch for the System z (s390x) ballooner (as defined by the
other System z hypervisors). I want to push that patch via Martins tree,
since the tricky part is in architecture specific memory management code.

Can you ack/nack?


Linux on System z uses a ballooner based on diagnose 0x10. (aka as
collaborative memory management). This patch implements diagnose
0x10 on the guest address space.

Signed-off-by: Christian Borntraeger <[email protected]>
---
 arch/s390/include/asm/kvm_host.h |    1 
 arch/s390/include/asm/pgtable.h  |    1 
 arch/s390/kvm/diag.c             |   32 ++++++++++++++++++++++++-
 arch/s390/kvm/kvm-s390.c         |    1 
 arch/s390/mm/pgtable.c           |   49 ++++++++++++++++++++++++++++++++++++++-
 5 files changed, 82 insertions(+), 2 deletions(-)

Index: b/arch/s390/include/asm/kvm_host.h
===================================================================
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -145,6 +145,7 @@ struct kvm_vcpu_stat {
        u32 instruction_sigp_arch;
        u32 instruction_sigp_prefix;
        u32 instruction_sigp_restart;
+       u32 diagnose_10;
        u32 diagnose_44;
 };
 
Index: b/arch/s390/include/asm/pgtable.h
===================================================================
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -681,6 +681,7 @@ int gmap_map_segment(struct gmap *gmap, 
 int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
 unsigned long __gmap_fault(unsigned long address, struct gmap *);
 unsigned long gmap_fault(unsigned long address, struct gmap *);
+void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
 
 /*
  * Certain architectures need to do special things when PTEs
Index: b/arch/s390/kvm/diag.c
===================================================================
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -1,7 +1,7 @@
 /*
  * diag.c - handling diagnose instructions
  *
- * Copyright IBM Corp. 2008
+ * Copyright IBM Corp. 2008,2011
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -15,6 +15,34 @@
 #include <linux/kvm_host.h>
 #include "kvm-s390.h"
 
+static int diag_release_pages(struct kvm_vcpu *vcpu)
+{
+       unsigned long start, curr, end;
+       unsigned long prefix  = vcpu->arch.sie_block->prefix;
+
+       start = vcpu->arch.guest_gprs[(vcpu->arch.sie_block->ipa & 0xf0) >> 4];
+       end = vcpu->arch.guest_gprs[vcpu->arch.sie_block->ipa & 0xf] + 4096;
+
+       if (start & ~PAGE_MASK || end & ~PAGE_MASK || start > end
+           || start < 2 * PAGE_SIZE)
+               return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+       VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
+       vcpu->stat.diagnose_10++;
+
+       /* we checked for start > end above */
+       if (end < prefix || start >= prefix + 2 * PAGE_SIZE) {
+               gmap_discard(start, end, vcpu->arch.gmap);
+       } else {
+               if (start < prefix)
+                       gmap_discard(start, prefix, vcpu->arch.gmap);
+               if (end >= prefix)
+                       gmap_discard(prefix + 2 * PAGE_SIZE,
+                                    end, vcpu->arch.gmap);
+       }
+       return 0;
+}
+
 static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
 {
        VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
@@ -57,6 +85,8 @@ int kvm_s390_handle_diag(struct kvm_vcpu
        int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
 
        switch (code) {
+       case 0x10:
+               return diag_release_pages(vcpu);
        case 0x44:
                return __diag_time_slice_end(vcpu);
        case 0x308:
Index: b/arch/s390/kvm/kvm-s390.c
===================================================================
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -71,6 +71,7 @@ struct kvm_stats_debugfs_item debugfs_en
        { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
        { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
        { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+       { "diagnose_10", VCPU_STAT(diagnose_10) },
        { "diagnose_44", VCPU_STAT(diagnose_44) },
        { NULL }
 };
Index: b/arch/s390/mm/pgtable.c
===================================================================
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -1,5 +1,5 @@
 /*
- *    Copyright IBM Corp. 2007,2009
+ *    Copyright IBM Corp. 2007,2011
  *    Author(s): Martin Schwidefsky <[email protected]>
  */
 
@@ -467,6 +467,53 @@ unsigned long gmap_fault(unsigned long a
 }
 EXPORT_SYMBOL_GPL(gmap_fault);
 
+void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap)
+{
+
+       unsigned long *table, address, size;
+       struct vm_area_struct *vma;
+       struct gmap_pgtable *mp;
+       struct page *page;
+
+       down_read(&gmap->mm->mmap_sem);
+       address = from;
+       while (address < to) {
+               /* Walk the gmap address space page table */
+               table = gmap->table + ((address >> 53) & 0x7ff);
+               if (unlikely(*table & _REGION_ENTRY_INV)) {
+                       address = (address + PMD_SIZE) & PMD_MASK;
+                       continue;
+               }
+               table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+               table = table + ((address >> 42) & 0x7ff);
+               if (unlikely(*table & _REGION_ENTRY_INV)) {
+                       address = (address + PMD_SIZE) & PMD_MASK;
+                       continue;
+               }
+               table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+               table = table + ((address >> 31) & 0x7ff);
+               if (unlikely(*table & _REGION_ENTRY_INV)) {
+                       address = (address + PMD_SIZE) & PMD_MASK;
+                       continue;
+               }
+               table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+               table = table + ((address >> 20) & 0x7ff);
+               if (unlikely(*table & _SEGMENT_ENTRY_INV)) {
+                       address = (address + PMD_SIZE) & PMD_MASK;
+                       continue;
+               }
+               page = pfn_to_page(*table >> PAGE_SHIFT);
+               mp = (struct gmap_pgtable *) page->index;
+               vma = find_vma(gmap->mm, mp->vmaddr);
+               size = min(to - address, PMD_SIZE - (address & ~PMD_MASK));
+               zap_page_range(vma, mp->vmaddr | (address & ~PMD_MASK),
+                              size, NULL);
+               address = (address + PMD_SIZE) & PMD_MASK;
+       }
+       up_read(&gmap->mm->mmap_sem);
+}
+EXPORT_SYMBOL_GPL(gmap_discard);
+
 void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table)
 {
        struct gmap_rmap *rmap, *next;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to