Re: [kvm-devel] [RFC] Expose infrastructure for unpinning guest memory

Dor Laor Thu, 11 Oct 2007 17:11:35 -0700

The idea being that kvm_read_guest_page() will effectively pin the pageand put_page() has the effect of unpinning it? It seems to me that weshould be using page_cache_release()'ing since we're not justget_page()'ing the memory. I may be wrong though.
Both of these are an optimization though. It's not strictly needed forwhat I'm after since in the case of ballooning, there's no reason whysomeone would be calling kvm_read_guest_page() on the ballooned memory.
secoend, is hacking the rmap to do reverse mapping to every presentpte and put_page() the pages at rmap_remove()
and this about all, to make this work.
If I understand you correctly, this is to unpin the page whenever it isremoved from the rmap? That would certainly be useful but it's still anoptimization. The other obvious optimization to me would be to not useget_user_pages() on all memory to start with and instead, allow pages tobe faulted in on use. This is particularly useful for creating a VMwith a very large amount of memory, and immediately ballooning down.That way the large amount of memory doesn't need to be present to actualspawn the guest.
Regards,

Anthony Liguori

Izik idea is towards general guest swapping capability. The first stepis just to increase thereference count of the rmapped pages. The second is to change the sizeof the shadow pagestable as function of the guest memory usage and the third is to getnotifications from Linux

about pte state changes.

btw: I have an unmerged balloon code (guest & host) with the old kernelmapping.

The guest part may be still valid for the userspace allocation.
Attaching it.
Dor.



-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

/*
 * KVM guest balloon driver
 *
 * Copyright (C) 2007, Qumranet, Inc., Dor Laor <[EMAIL PROTECTED]>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 */

#include "../kvm.h"
#include <linux/kvm_para.h>
#include <linux/kvm.h>

#include <asm/hypercall.h>
#include <asm/uaccess.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/version.h>
#include <linux/miscdevice.h>

MODULE_AUTHOR ("Dor Laor");
MODULE_DESCRIPTION ("Implements guest ballooning support");
MODULE_LICENSE("GPL");
MODULE_VERSION("1");

#define KVM_BALLOON_MINOR MISC_DYNAMIC_MINOR

static LIST_HEAD(balloon_plist);
static int balloon_size = 0;
static DEFINE_SPINLOCK(balloon_plist_lock);
static  gfn_t balloon_shared_gfn;

struct balloon_page {
        struct page *bpage;
        struct list_head bp_list;
};

static int kvm_trigger_balloon_op(int npages)
{
        unsigned long ret;

        ret = kvm_hypercall2(__NR_hypercall_balloon, balloon_shared_gfn, 
npages);
        WARN_ON(ret);
        printk(KERN_DEBUG "%s:hypercall ret: %lx\n", __FUNCTION__, ret);

        return ret;

}

static int kvm_balloon_inflate(unsigned long *shared_page_addr, int npages)
{
        LIST_HEAD(tmp_list);
        struct balloon_page *node, *tmp;
        u32 *pfn = (u32*)shared_page_addr;
        int allocated = 0;
        int i, r = -ENOMEM;

        for (i = 0; i < npages; i++) {
                node = kzalloc(sizeof(struct balloon_page), GFP_KERNEL);
                if (!node)
                        goto out_free;

                node->bpage = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
                if (!node->bpage) {
                        kfree(node);
                        goto out_free;
                }

                list_add(&node->bp_list, &tmp_list);

                allocated++;
                *pfn++ = page_to_pfn(node->bpage);
        }

        spin_lock(&balloon_plist_lock);

        r = kvm_trigger_balloon_op(npages);
        if (r < 0) {
                printk(KERN_DEBUG "%s: got kvm_trigger_balloon_op res=%d\n",
                       __FUNCTION__, r);
                spin_unlock(&balloon_plist_lock);
                goto out_free;
        }

        list_splice(&tmp_list, &balloon_plist);
        balloon_size += allocated;
        printk(KERN_DEBUG "%s: current balloon size=%d\n", __FUNCTION__,
               balloon_size);

        spin_unlock(&balloon_plist_lock);

        return allocated;
        
out_free:
        list_for_each_entry_safe(node, tmp, &tmp_list, bp_list) {
                __free_page(node->bpage);
                list_del(&node->bp_list);
                kfree(node);
        }

        return r;
}

static int kvm_balloon_deflate(unsigned long *shared_page_addr, int npages)
{
        LIST_HEAD(tmp_list);
        struct balloon_page *node, *tmp;
        u32 *pfn = (u32*)shared_page_addr;
        int deallocated = 0;
        int r = 0;

        spin_lock(&balloon_plist_lock);

        if (balloon_size < npages) {
                printk(KERN_DEBUG "%s: error balloon=%d while deflate rq=%d\n",
                       __FUNCTION__, balloon_size, npages);
                npages = balloon_size;
        }

        /*
         * Move the balloon pages to tmp list before issuing 
         * the hypercall
         */
        list_for_each_entry_safe(node, tmp, &balloon_plist, bp_list) {
                *pfn++ = page_to_pfn(node->bpage);
                list_move(&node->bp_list, &tmp_list);
                if (++deallocated == npages)
                        break;
        }

        r = kvm_trigger_balloon_op(-npages);
        if (r < 0) {
                printk(KERN_DEBUG "%s: got kvm_trigger_balloon_op res=%d\n",
                       __FUNCTION__, r);
                goto out;
        }

        list_for_each_entry_safe(node, tmp, &tmp_list, bp_list) {
                __free_page(node->bpage);
                list_del(&node->bp_list);
                kfree(node);
        }
        balloon_size -= npages;
        printk(KERN_DEBUG "%s: current balloon size=%d\n", __FUNCTION__,
               balloon_size);

        spin_unlock(&balloon_plist_lock);

        return deallocated;

out:
        npages = 0;
        list_splice(&tmp_list, &balloon_plist);
        spin_unlock(&balloon_plist_lock);

        return r;
}

#define MAX_BALLOON_PAGES_PER_OP (PAGE_SIZE/sizeof(u32))
#define MAX_BALLOON_XFLATE_OP 1000000

static int kvm_balloon_xflate(struct kvm_balloon_op *balloon_op)
{
        unsigned long *shared_page_addr;
        int r = -EINVAL, i;
        int iterations;
        int npages;
        int curr_pages = 0;
        int gfns_per_page;

        if (balloon_op->npages < -MAX_BALLOON_XFLATE_OP ||
            balloon_op->npages > MAX_BALLOON_XFLATE_OP ||
            !balloon_op->npages) {
                printk(KERN_DEBUG "%s: got bad npages=%d\n",
                       __FUNCTION__, balloon_op->npages);
                return -EINVAL;
        }

        npages = abs(balloon_op->npages);

        printk(KERN_DEBUG "%s: got %s, npages=%d\n", __FUNCTION__,
               (balloon_op->npages > 0)? "inflate":"deflate", npages);

        gfns_per_page = MAX_BALLOON_PAGES_PER_OP;
        shared_page_addr = __va(balloon_shared_gfn << PAGE_SHIFT);

        /*
         * Call the balloon hypercall in PAGE_SIZE*pfns-per-page
         * iterations
         */
        iterations = DIV_ROUND_UP(npages, gfns_per_page);
        printk(KERN_DEBUG "%s: iterations=%d\n", __FUNCTION__, iterations);

        for (i = 0; i < iterations; i++) {
                int pages_in_iteration = 
                        min(npages - curr_pages, gfns_per_page);

                if (balloon_op->npages > 0)
                        r = kvm_balloon_inflate(shared_page_addr,
                                                pages_in_iteration);
                else
                        r = kvm_balloon_deflate(shared_page_addr,
                                                pages_in_iteration);
                if (r < 0)
                        return r;
                curr_pages += r;
                if (r != pages_in_iteration)
                        break;
        }

        return curr_pages;
}

static long kvm_balloon_ioctl(struct file *filp,
                              unsigned int ioctl, unsigned long arg)
{
        int r = -EINVAL;
        void __user *argp = (void __user *)arg;

        switch (ioctl) {
        case KVM_BALLOON_OP: {
                struct kvm_balloon_op balloon_op;

                r = -EFAULT;
                if (copy_from_user(&balloon_op, argp, sizeof balloon_op))
                        goto out;

                r = kvm_balloon_xflate(&balloon_op);
                if (r < 0)
                        goto out;
                balloon_op.npages = r;

                r = -EFAULT;
                if (copy_to_user(argp, &balloon_op, sizeof balloon_op))
                        goto out;
                r = 0;
                break;
        }
        default:
                ;
        }
out:
        return r;
}

static int kvm_balloon_open(struct inode *inode, struct file *filp)
{
        return 0;
}

static int kvm_balloon_release(struct inode *inode, struct file *filp)
{
        return 0;
}

static struct file_operations balloon_chardev_ops = {
        .open           = kvm_balloon_open,
        .release        = kvm_balloon_release,
        .unlocked_ioctl = kvm_balloon_ioctl,
        .compat_ioctl   = kvm_balloon_ioctl,
};

static struct miscdevice kvm_balloon_dev = {
        KVM_BALLOON_MINOR,
        "kvm_balloon",
        &balloon_chardev_ops,
};

static int __init kvm_balloon_init(void)
{
        struct page *gfns_page;
        int r = 0;

        balloon_chardev_ops.owner = THIS_MODULE;
        if (misc_register(&kvm_balloon_dev)) {
                printk (KERN_ERR "balloon: misc device register failed\n");
                return -EBUSY;
        }

        if ((gfns_page = alloc_page(GFP_KERNEL)) == NULL) {
                r = -ENOMEM;
                goto out;
        }

        balloon_shared_gfn = page_to_pfn(gfns_page);
        
        return 0;

out:
        misc_deregister(&kvm_balloon_dev);
        return r;
}

static void __exit kvm_balloon_exit(void)
{
        misc_deregister(&kvm_balloon_dev);

        spin_lock(&balloon_plist_lock);

        /*
         * I dont free the pages because the KVM had revoked access
         * for them so it's a leak.
         *
         * {struct balloon_page *node, *tmp;
         * list_for_each_entry_safe(node, tmp, &balloon_plist, bp_list) {
         *      __free_page(node->bpage);
         *      list_del(&node->bp_list);
         *      kfree(node);
         * }}
         */
        if (balloon_size)
                printk(KERN_ERR "%s: exit while balloon not empty!\n",
                        __FUNCTION__);

        spin_unlock(&balloon_plist_lock);

        __free_page(pfn_to_page(balloon_shared_gfn));
}

module_init(kvm_balloon_init);
module_exit(kvm_balloon_exit);

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/

_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Re: [kvm-devel] [RFC] Expose infrastructure for unpinning guest memory

Reply via email to