ok, i change some things as Anthony noted me, but the most important
thing is that i changed here:
is setting the dirty bit after using the page (i forgat to do it before)
>From 9088fb6a35517bdafe7a7066f60fab56f98c9127 Mon Sep 17 00:00:00 2001
From: Izik Eidus <[EMAIL PROTECTED]>
Date: Sun, 14 Oct 2007 01:12:05 +0200
Subject: [PATCH] make the guest non shadowed memory swappable
Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
---
drivers/kvm/kvm.h | 2 +
drivers/kvm/kvm_main.c | 81 +++++++++++++++++++++++++--------------------
drivers/kvm/mmu.c | 23 ++++++++++++-
drivers/kvm/paging_tmpl.h | 27 +++++++++++++--
4 files changed, 93 insertions(+), 40 deletions(-)
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index a155c2b..74b427f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -409,6 +409,7 @@ struct kvm_memory_slot {
unsigned long *rmap;
unsigned long *dirty_bitmap;
int user_alloc; /* user allocated memory */
+ unsigned long userspace_addr;
};
struct kvm {
@@ -570,6 +571,7 @@ extern struct page *bad_page;
int is_error_page(struct page *page);
gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
+void kvm_release_page(struct page *page);
int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index bfa201c..f58d49b 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -319,19 +319,6 @@ static struct kvm *kvm_create_vm(void)
return kvm;
}
-static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
-{
- int i;
-
- for (i = 0; i < free->npages; ++i) {
- if (free->phys_mem[i]) {
- if (!PageReserved(free->phys_mem[i]))
- SetPageDirty(free->phys_mem[i]);
- page_cache_release(free->phys_mem[i]);
- }
- }
-}
-
static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
{
int i;
@@ -349,9 +336,7 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
{
if (!dont || free->phys_mem != dont->phys_mem)
if (free->phys_mem) {
- if (free->user_alloc)
- kvm_free_userspace_physmem(free);
- else
+ if (!free->user_alloc)
kvm_free_kernel_physmem(free);
vfree(free->phys_mem);
}
@@ -771,19 +756,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
memset(new.phys_mem, 0, npages * sizeof(struct page *));
memset(new.rmap, 0, npages * sizeof(*new.rmap));
if (user_alloc) {
- unsigned long pages_num;
-
new.user_alloc = 1;
- down_read(¤t->mm->mmap_sem);
-
- pages_num = get_user_pages(current, current->mm,
- mem->userspace_addr,
- npages, 1, 0, new.phys_mem,
- NULL);
-
- up_read(¤t->mm->mmap_sem);
- if (pages_num != npages)
- goto out_unlock;
+ new.userspace_addr = mem->userspace_addr;
} else {
for (i = 0; i < npages; ++i) {
new.phys_mem[i] = alloc_page(GFP_HIGHUSER
@@ -1058,12 +1032,39 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
gfn = unalias_gfn(kvm, gfn);
slot = __gfn_to_memslot(kvm, gfn);
- if (!slot)
+ if (!slot) {
+ get_page(bad_page);
return bad_page;
+ }
+ if (slot->user_alloc) {
+ struct page *page[1];
+ int npages;
+
+ down_read(¤t->mm->mmap_sem);
+ npages = get_user_pages(current, current->mm,
+ slot->userspace_addr
+ + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
+ 1, 0, page, NULL);
+ up_read(¤t->mm->mmap_sem);
+ if (npages != 1) {
+ get_page(bad_page);
+ return bad_page;
+ }
+ return page[0];
+ }
+ get_page(slot->phys_mem[gfn - slot->base_gfn]);
return slot->phys_mem[gfn - slot->base_gfn];
}
EXPORT_SYMBOL_GPL(gfn_to_page);
+void kvm_release_page(struct page *page)
+{
+ if (!PageReserved(page))
+ SetPageDirty(page);
+ put_page(page);
+}
+EXPORT_SYMBOL_GPL(kvm_release_page);
+
static int next_segment(unsigned long len, int offset)
{
if (len > PAGE_SIZE - offset)
@@ -1079,13 +1080,16 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
struct page *page;
page = gfn_to_page(kvm, gfn);
- if (is_error_page(page))
+ if (is_error_page(page)) {
+ kvm_release_page(page);
return -EFAULT;
+ }
page_virt = kmap_atomic(page, KM_USER0);
memcpy(data, page_virt + offset, len);
kunmap_atomic(page_virt, KM_USER0);
+ kvm_release_page(page);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_read_guest_page);
@@ -1117,14 +1121,17 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
struct page *page;
page = gfn_to_page(kvm, gfn);
- if (is_error_page(page))
+ if (is_error_page(page)) {
+ kvm_release_page(page);
return -EFAULT;
+ }
page_virt = kmap_atomic(page, KM_USER0);
memcpy(page_virt + offset, data, len);
kunmap_atomic(page_virt, KM_USER0);
mark_page_dirty(kvm, gfn);
+ kvm_release_page(page);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_write_guest_page);
@@ -1155,13 +1162,16 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
struct page *page;
page = gfn_to_page(kvm, gfn);
- if (is_error_page(page))
+ if (is_error_page(page)) {
+ kvm_release_page(page);
return -EFAULT;
+ }
page_virt = kmap_atomic(page, KM_USER0);
memset(page_virt + offset, 0, len);
kunmap_atomic(page_virt, KM_USER0);
+ kvm_release_page(page);
return 0;
}
EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
@@ -2090,8 +2100,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
for (i = 0; i < nr_pages; ++i) {
mutex_lock(&vcpu->kvm->lock);
page = gva_to_page(vcpu, address + i * PAGE_SIZE);
- if (page)
- get_page(page);
vcpu->pio.guest_pages[i] = page;
mutex_unlock(&vcpu->kvm->lock);
if (!page) {
@@ -3081,9 +3089,10 @@ static struct page *kvm_vm_nopage(struct vm_area_struct *vma,
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
page = gfn_to_page(kvm, pgoff);
- if (is_error_page(page))
+ if (is_error_page(page)) {
+ kvm_release_page(page);
return NOPAGE_SIGBUS;
- get_page(page);
+ }
if (type != NULL)
*type = VM_FAULT_MINOR;
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index e6a9b4a..4c91c84 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -425,6 +425,8 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
if (!is_rmap_pte(*spte))
return;
page = page_header(__pa(spte));
+ kvm_release_page(pfn_to_page((*spte & PT64_BASE_ADDR_MASK) >>
+ PAGE_SHIFT));
rmapp = gfn_to_rmap(kvm, page->gfns[spte - page->spt]);
if (!*rmapp) {
printk(KERN_ERR "rmap_remove: %p %llx 0->BUG\n", spte, *spte);
@@ -907,6 +909,13 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
PT_USER_MASK;
rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
+ if (!is_rmap_pte(table[index])) {
+ struct page *page;
+
+ page = pfn_to_page((v & PT64_BASE_ADDR_MASK)
+ >> PAGE_SHIFT);
+ kvm_release_page(page);
+ }
return 0;
}
@@ -920,7 +929,12 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, hpa_t p)
v, level - 1,
1, 3, &table[index]);
if (!new_table) {
+ struct page *page;
+
pgprintk("nonpaging_map: ENOMEM\n");
+ page = pfn_to_page((v & PT64_BASE_ADDR_MASK)
+ >> PAGE_SHIFT);
+ kvm_release_page(page);
return -ENOMEM;
}
@@ -1035,8 +1049,11 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
paddr = gpa_to_hpa(vcpu->kvm, addr & PT64_BASE_ADDR_MASK);
- if (is_error_hpa(paddr))
+ if (is_error_hpa(paddr)) {
+ kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+ >> PAGE_SHIFT));
return 1;
+ }
return nonpaging_map(vcpu, addr & PAGE_MASK, paddr);
}
@@ -1503,6 +1520,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
} else {
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, va);
hpa_t hpa = gpa_to_hpa(vcpu, gpa);
+ struct page *page;
if (is_shadow_present_pte(ent)
&& (ent & PT64_BASE_ADDR_MASK) != hpa)
@@ -1515,6 +1533,9 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
&& !is_error_hpa(hpa))
printk(KERN_ERR "audit: (%s) notrap shadow,"
" valid guest gva %lx\n", audit_msg, va);
+ page = pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+ >> PAGE_SHIFT);
+ kvm_release_page(page);
}
}
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 58fd35a..600b1cc 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -175,6 +175,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->inherited_ar &= walker->table[index];
table_gfn = (*ptep & PT_BASE_ADDR_MASK) >> PAGE_SHIFT;
kunmap_atomic(walker->table, KM_USER0);
+ kvm_release_page(walker->page);
walker->page = gfn_to_page(vcpu->kvm, table_gfn);
walker->table = kmap_atomic(walker->page, KM_USER0);
--walker->level;
@@ -183,8 +184,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->level - 1, table_gfn);
}
walker->pte = *ptep;
- if (walker->page)
+ if (walker->page) {
walker->ptep = NULL;
+ kvm_release_page(walker->page);
+ }
if (walker->table)
kunmap_atomic(walker->table, KM_USER0);
pgprintk("%s: pte %llx\n", __FUNCTION__, (u64)*ptep);
@@ -206,6 +209,8 @@ err:
walker->error_code |= PFERR_FETCH_MASK;
if (walker->table)
kunmap_atomic(walker->table, KM_USER0);
+ if (walker->page)
+ kvm_release_page(walker->page);
return 0;
}
@@ -249,6 +254,8 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
if (is_error_hpa(paddr)) {
set_shadow_pte(shadow_pte,
shadow_trap_nonpresent_pte | PT_SHADOW_IO_MARK);
+ kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+ >> PAGE_SHIFT));
return;
}
@@ -286,9 +293,20 @@ unshadowed:
pgprintk("%s: setting spte %llx\n", __FUNCTION__, spte);
set_shadow_pte(shadow_pte, spte);
page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
- if (!was_rmapped)
+ if (!was_rmapped) {
rmap_add(vcpu, shadow_pte, (gaddr & PT64_BASE_ADDR_MASK)
>> PAGE_SHIFT);
+ if (!is_rmap_pte(*shadow_pte)) {
+ struct page *page;
+
+ page = pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+ >> PAGE_SHIFT);
+ kvm_release_page(page);
+ }
+ }
+ else
+ kvm_release_page(pfn_to_page((paddr & PT64_BASE_ADDR_MASK)
+ >> PAGE_SHIFT));
if (!ptwrite || !*ptwrite)
vcpu->last_pte_updated = shadow_pte;
}
@@ -512,19 +530,22 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu,
{
int i;
pt_element_t *gpt;
+ struct page *page;
if (sp->role.metaphysical || PTTYPE == 32) {
nonpaging_prefetch_page(vcpu, sp);
return;
}
- gpt = kmap_atomic(gfn_to_page(vcpu->kvm, sp->gfn), KM_USER0);
+ page = gfn_to_page(vcpu->kvm, sp->gfn);
+ gpt = kmap_atomic(page, KM_USER0);
for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
if (is_present_pte(gpt[i]))
sp->spt[i] = shadow_trap_nonpresent_pte;
else
sp->spt[i] = shadow_notrap_nonpresent_pte;
kunmap_atomic(gpt, KM_USER0);
+ kvm_release_page(page);
}
#undef pt_element_t
--
1.5.2.4
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel