At the moment, if a virtio balloon device has a page reporting vq but
its size is < PAGE_REPORTING_CAPACITY (32), the balloon driver fails
probe.
But, there's no way for host to know this value, so it can easily
create a smaller vq and suddenly adding the reporting capability
to the device makes all of the driver fail. Not pretty.
Add a capacity field to page_reporting_dev_info so drivers can
control the maximum number of pages per report batch.
In virtio-balloon, set the capacity to the reporting virtqueue size,
letting page_reporting adapt to whatever the device provides.
Capacity need not be a power of two. Code previously called out
division by PAGE_REPORTING_CAPACITY as cheap since it was a power
of 2, but no performance difference was observed with non-power-of-2
values.
If capacity is 0 or exceeds PAGE_REPORTING_CAPACITY, it defaults
to PAGE_REPORTING_CAPACITY. The 0 check and the clamping is done in
page_reporting_register(), before the reporting work is scheduled,
so we never get division by 0.
Fixes: b0c504f15471 ("virtio-balloon: add support for providing free page
reports to host")
Signed-off-by: Michael S. Tsirkin <[email protected]>
Assisted-by: Claude:claude-opus-4-6
---
Changes v1->v2:
- Document capacity=0 as default in commit log
- Document that capacity need not be a power of two
- Drop unnecessary comment about integer division cost
- Update comment on capacity field: "0 (default) means PAGE_REPORTING_CAPACITY"
drivers/virtio/virtio_balloon.c | 5 +----
include/linux/page_reporting.h | 3 +++
mm/page_reporting.c | 24 ++++++++++++------------
3 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index f6c2dff33f8a..6a1a610c2cb1 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -1017,10 +1017,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
unsigned int capacity;
capacity = virtqueue_get_vring_size(vb->reporting_vq);
- if (capacity < PAGE_REPORTING_CAPACITY) {
- err = -ENOSPC;
- goto out_unregister_oom;
- }
vb->pr_dev_info.order = PAGE_REPORTING_ORDER_UNSPECIFIED;
@@ -1041,6 +1037,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
vb->pr_dev_info.order = 5;
#endif
+ vb->pr_dev_info.capacity = capacity;
err = page_reporting_register(&vb->pr_dev_info);
if (err)
goto out_unregister_oom;
diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h
index 9d4ca5c218a0..048578118a4b 100644
--- a/include/linux/page_reporting.h
+++ b/include/linux/page_reporting.h
@@ -22,6 +22,9 @@ struct page_reporting_dev_info {
/* Minimal order of page reporting */
unsigned int order;
+
+ /* Max pages per report batch; 0 (default) means
PAGE_REPORTING_CAPACITY */
+ unsigned int capacity;
};
/* Tear-down and bring-up for page reporting devices */
diff --git a/mm/page_reporting.c b/mm/page_reporting.c
index 7418f2e500bb..942e84b6908a 100644
--- a/mm/page_reporting.c
+++ b/mm/page_reporting.c
@@ -173,11 +173,8 @@ page_reporting_cycle(struct page_reporting_dev_info
*prdev, struct zone *zone,
* any pages that may have already been present from the previous
* list processed. This should result in us reporting all pages on
* an idle system in about 30 seconds.
- *
- * The division here should be cheap since PAGE_REPORTING_CAPACITY
- * should always be a power of 2.
*/
- budget = DIV_ROUND_UP(area->nr_free, PAGE_REPORTING_CAPACITY * 16);
+ budget = DIV_ROUND_UP(area->nr_free, prdev->capacity * 16);
/* loop through free list adding unreported pages to sg list */
list_for_each_entry_safe(page, next, list, lru) {
@@ -222,10 +219,10 @@ page_reporting_cycle(struct page_reporting_dev_info
*prdev, struct zone *zone,
spin_unlock_irq(&zone->lock);
/* begin processing pages in local list */
- err = prdev->report(prdev, sgl, PAGE_REPORTING_CAPACITY);
+ err = prdev->report(prdev, sgl, prdev->capacity);
/* reset offset since the full list was reported */
- *offset = PAGE_REPORTING_CAPACITY;
+ *offset = prdev->capacity;
/* update budget to reflect call to report function */
budget--;
@@ -234,7 +231,7 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev,
struct zone *zone,
spin_lock_irq(&zone->lock);
/* flush reported pages from the sg list */
- page_reporting_drain(prdev, sgl, PAGE_REPORTING_CAPACITY, !err);
+ page_reporting_drain(prdev, sgl, prdev->capacity, !err);
/*
* Reset next to first entry, the old next isn't valid
@@ -260,13 +257,13 @@ static int
page_reporting_process_zone(struct page_reporting_dev_info *prdev,
struct scatterlist *sgl, struct zone *zone)
{
- unsigned int order, mt, leftover, offset = PAGE_REPORTING_CAPACITY;
+ unsigned int order, mt, leftover, offset = prdev->capacity;
unsigned long watermark;
int err = 0;
/* Generate minimum watermark to be able to guarantee progress */
watermark = low_wmark_pages(zone) +
- (PAGE_REPORTING_CAPACITY << page_reporting_order);
+ (prdev->capacity << page_reporting_order);
/*
* Cancel request if insufficient free memory or if we failed
@@ -290,7 +287,7 @@ page_reporting_process_zone(struct page_reporting_dev_info
*prdev,
}
/* report the leftover pages before going idle */
- leftover = PAGE_REPORTING_CAPACITY - offset;
+ leftover = prdev->capacity - offset;
if (leftover) {
sgl = &sgl[offset];
err = prdev->report(prdev, sgl, leftover);
@@ -322,11 +319,11 @@ static void page_reporting_process(struct work_struct
*work)
atomic_set(&prdev->state, state);
/* allocate scatterlist to store pages being reported on */
- sgl = kmalloc_objs(*sgl, PAGE_REPORTING_CAPACITY);
+ sgl = kmalloc_objs(*sgl, prdev->capacity);
if (!sgl)
goto err_out;
- sg_init_table(sgl, PAGE_REPORTING_CAPACITY);
+ sg_init_table(sgl, prdev->capacity);
for_each_zone(zone) {
err = page_reporting_process_zone(prdev, sgl, zone);
@@ -377,6 +374,9 @@ int page_reporting_register(struct page_reporting_dev_info
*prdev)
page_reporting_order = pageblock_order;
}
+ if (!prdev->capacity || prdev->capacity > PAGE_REPORTING_CAPACITY)
+ prdev->capacity = PAGE_REPORTING_CAPACITY;
+
/* initialize state and work structures */
atomic_set(&prdev->state, PAGE_REPORTING_IDLE);
INIT_DELAYED_WORK(&prdev->work, &page_reporting_process);
--
MST