-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On 20/06/14 17:26, Konstantin Belousov wrote:
> On Fri, Jun 20, 2014 at 05:15:43PM +0200, Roger Pau Monn? wrote:
>> -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1
>> 
>> On 20/06/14 15:28, Konstantin Belousov wrote:
>>> On Fri, Jun 20, 2014 at 11:35:53AM +0200, Roger Pau Monn?
>>> wrote:
>>>> Hello,
>>>> 
>>>> I've been looking into the Xen balloon driver, because I've 
>>>> experienced problems when ballooning memory down which AFAICT
>>>> are also present in the VirtIO balloon driver. The problem
>>>> I've experienced is that when ballooning memory down, we
>>>> basically allocate a bunch of memory as WIRED, to make sure
>>>> nobody tries to swap it do disk, since it will crash the
>>>> kernel because the memory is not populated. Due to this
>>>> massive amount of memory allocated as WIRED, user-space
>>>> programs that try to use mlock will fail because we hit the
>>>> limit in vm.max_wired.
>>>> 
>>>> I'm not sure what's the best way to deal with this
>>>> limitation, should vm.max_wired be changed from the balloon
>>>> drivers when ballooning down/up? Is there anyway to remove
>>>> the pages ballooned down from the memory accounting of wired
>>>> pages?
>>> 
>>> You could change the type of pages the ballon driver is 
>>> allocating. Instead of wired pages, you may request unmanaged,
>>> by passing NULL object to vm_page_alloc().  This would also
>>> save on the trie nodes for managing the radix trie for the
>>> object.  There are still plinks or listq to keep track of the
>>> allocated pages.
>> 
>> Thanks for the info, I have the following patch which fixes the
>> usage of WIRED for both the Xen and the VirtIO balloon drivers,
>> could someone please test the VirtIO side?
> I briefly looked at the xen balloon.  You do not need
> balloon_append(). Use struct vm_page plinks field to link the
> pages.

Sure, thanks for the review, here is an updated version.

Roger.

-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.12 (Darwin)

iQEcBAEBAgAGBQJTpGe0AAoJEKXZdqUyumTA5acH/jQ+Iwjw+QTwUtsh9ZLs7Gm0
h7XaCwa/gASEjzcxK2smIBuKA10LUSoulcYkC3cUWXqaPwFega14JcbySBRI06Z1
1bU50DL8TPLuQIzrVWZgtA+QsZkwEu/jhijr0AMxTcnm6Wq1LV5QRDoqhD0kRiHu
kEW1ez832y/0j+oeQAq0aR67zVw7hoIouH9eLaWXS4Vgxz5YQ2Pal1BMAY/OCgua
xyF7BHia9KsGTKZ9pPUQfQAW5eJrwAxR0AitQjmOwRKtWyRqymZxhYHjLYOgOKQJ
w93aLRVeG2oo0NNC6a9JD/c64sGHLABg37mnSTRL/v9gTj9I1DcFoid2q0iDGbM=
=8nDn
-----END PGP SIGNATURE-----
>From e5c898b1b2b734fdc4aa2acf645efd481f09b71d Mon Sep 17 00:00:00 2001
From: Roger Pau Monne <roger....@citrix.com>
Date: Fri, 20 Jun 2014 16:34:31 +0200
Subject: [PATCH] xen/virtio: fix balloon drivers to not mark pages as WIRED

Prevent the Xen and VirtIO balloon drivers from marking pages as
wired. This prevents them from increasing the system wired page count,
which can lead to mlock failing because of hitting the limit in
vm.max_wired.

In the Xen case make sure pages are zeroed before giving them back to
the hypervisor, or else we might be leaking data. Also remove the
balloon_{append/retrieve} and link pages directly into the
ballooned_pages queue using the plinks.q field in the page struct.

Sponsored by: Citrix Systems R&D
Reviewed by: xxx
Approved by: xxx

dev/virtio/balloon/virtio_balloon.c:
 - Don't allocate pages with VM_ALLOC_WIRED.

dev/xen/balloon/balloon.c:
 - Don't allocate pages with VM_ALLOC_WIRED.
 - Make sure pages are zeroed before giving them back to the
   hypervisor.
 - Remove the balloon_entry struct and the balloon_{append/retrieve}
   functions and use the page plinks.q entry to link the pages
   directly into the ballooned_pages queue.
---
 sys/dev/virtio/balloon/virtio_balloon.c |    4 +-
 sys/dev/xen/balloon/balloon.c           |   87 ++++++++-----------------------
 2 files changed, 23 insertions(+), 68 deletions(-)

diff --git a/sys/dev/virtio/balloon/virtio_balloon.c 
b/sys/dev/virtio/balloon/virtio_balloon.c
index d540099..6d00ef3 100644
--- a/sys/dev/virtio/balloon/virtio_balloon.c
+++ b/sys/dev/virtio/balloon/virtio_balloon.c
@@ -438,8 +438,7 @@ vtballoon_alloc_page(struct vtballoon_softc *sc)
 {
        vm_page_t m;
 
-       m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_WIRED |
-           VM_ALLOC_NOOBJ);
+       m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ);
        if (m != NULL)
                sc->vtballoon_current_npages++;
 
@@ -450,7 +449,6 @@ static void
 vtballoon_free_page(struct vtballoon_softc *sc, vm_page_t m)
 {
 
-       vm_page_unwire(m, PQ_INACTIVE);
        vm_page_free(m);
        sc->vtballoon_current_npages--;
 }
diff --git a/sys/dev/xen/balloon/balloon.c b/sys/dev/xen/balloon/balloon.c
index fa56c86..0d2bba2 100644
--- a/sys/dev/xen/balloon/balloon.c
+++ b/sys/dev/xen/balloon/balloon.c
@@ -94,13 +94,8 @@ SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, low_mem, CTLFLAG_RD,
 SYSCTL_ULONG(_dev_xen_balloon, OID_AUTO, high_mem, CTLFLAG_RD,
     &bs.balloon_high, 0, "High-mem balloon");
 
-struct balloon_entry {
-       vm_page_t page;
-       STAILQ_ENTRY(balloon_entry) list;
-};
-
 /* List of ballooned pages, threaded through the mem_map array. */
-static STAILQ_HEAD(,balloon_entry) ballooned_pages;
+static TAILQ_HEAD(,vm_page) ballooned_pages;
 
 /* Main work function, always executed in process context. */
 static void balloon_process(void *unused);
@@ -110,47 +105,6 @@ static void balloon_process(void *unused);
 #define WPRINTK(fmt, args...) \
        printk(KERN_WARNING "xen_mem: " fmt, ##args)
 
-/* balloon_append: add the given page to the balloon. */
-static int
-balloon_append(vm_page_t page)
-{
-       struct balloon_entry *entry;
-
-       mtx_assert(&balloon_mutex, MA_OWNED);
-
-       entry = malloc(sizeof(struct balloon_entry), M_BALLOON, M_NOWAIT);
-       if (!entry)
-               return (ENOMEM);
-       entry->page = page;
-       STAILQ_INSERT_HEAD(&ballooned_pages, entry, list);
-       bs.balloon_low++;
-
-       return (0);
-}
-
-/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
-static vm_page_t
-balloon_retrieve(void)
-{
-       vm_page_t page;
-       struct balloon_entry *entry;
-
-       mtx_assert(&balloon_mutex, MA_OWNED);
-
-       if (STAILQ_EMPTY(&ballooned_pages))
-               return (NULL);
-
-       entry = STAILQ_FIRST(&ballooned_pages);
-       STAILQ_REMOVE_HEAD(&ballooned_pages, list);
-
-       page = entry->page;
-       free(entry, M_BALLOON);
-       
-       bs.balloon_low--;
-
-       return (page);
-}
-
 static unsigned long 
 current_target(void)
 {
@@ -203,7 +157,6 @@ static int
 increase_reservation(unsigned long nr_pages)
 {
        unsigned long  pfn, i;
-       struct balloon_entry *entry;
        vm_page_t      page;
        long           rc;
        struct xen_memory_reservation reservation = {
@@ -217,10 +170,9 @@ increase_reservation(unsigned long nr_pages)
        if (nr_pages > nitems(frame_list))
                nr_pages = nitems(frame_list);
 
-       for (entry = STAILQ_FIRST(&ballooned_pages), i = 0;
-            i < nr_pages; i++, entry = STAILQ_NEXT(entry, list)) {
-               KASSERT(entry, ("ballooned_pages list corrupt"));
-               page = entry->page;
+       for (page = TAILQ_FIRST(&ballooned_pages), i = 0;
+            i < nr_pages; i++, page = TAILQ_NEXT(page, plinks.q)) {
+               KASSERT(page != NULL, ("ballooned_pages list corrupt"));
                frame_list[i] = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
        }
 
@@ -245,8 +197,10 @@ increase_reservation(unsigned long nr_pages)
        }
 
        for (i = 0; i < nr_pages; i++) {
-               page = balloon_retrieve();
-               KASSERT(page, ("balloon_retrieve failed"));
+               page = TAILQ_FIRST(&ballooned_pages);
+               KASSERT(page != NULL, ("Unable to get ballooned page"));
+               TAILQ_REMOVE(&ballooned_pages, page, plinks.q);
+               bs.balloon_low--;
 
                pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
                KASSERT((xen_feature(XENFEAT_auto_translated_physmap) ||
@@ -255,7 +209,6 @@ increase_reservation(unsigned long nr_pages)
 
                set_phys_to_machine(pfn, frame_list[i]);
 
-               vm_page_unwire(page, PQ_INACTIVE);
                vm_page_free(page);
        }
 
@@ -286,24 +239,27 @@ decrease_reservation(unsigned long nr_pages)
        for (i = 0; i < nr_pages; i++) {
                if ((page = vm_page_alloc(NULL, 0, 
                            VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 
-                           VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
+                           VM_ALLOC_ZERO)) == NULL) {
                        nr_pages = i;
                        need_sleep = 1;
                        break;
                }
 
+               if ((page->flags & PG_ZERO) == 0) {
+                       /*
+                        * Zero the page, or else we might be leaking
+                        * important data to other domains on the same
+                        * host.
+                        */
+                       pmap_zero_page(page);
+               }
+
                pfn = (VM_PAGE_TO_PHYS(page) >> PAGE_SHIFT);
                frame_list[i] = PFNTOMFN(pfn);
 
                set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
-               if (balloon_append(page) != 0) {
-                       vm_page_unwire(page, PQ_INACTIVE);
-                       vm_page_free(page);
-
-                       nr_pages = i;
-                       need_sleep = 1;
-                       break;
-               }
+               TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
+               bs.balloon_low++;
        }
 
        set_xen_guest_handle(reservation.extent_start, frame_list);
@@ -438,7 +394,8 @@ balloon_init(void *arg)
        /* Initialise the balloon with excess memory space. */
        for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
                page = PHYS_TO_VM_PAGE(pfn << PAGE_SHIFT);
-               balloon_append(page);
+               TAILQ_INSERT_HEAD(&ballooned_pages, page, plinks.q);
+               bs.balloon_low++;
        }
 #undef max_pfn
 #endif
-- 
1.7.7.5 (Apple Git-26)

_______________________________________________
freebsd-virtualization@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/freebsd-virtualization
To unsubscribe, send any mail to 
"freebsd-virtualization-unsubscr...@freebsd.org"

Reply via email to