On Wed, Jun 10, 2026 at 09:17:08AM -0400, Michael S. Tsirkin wrote:
> At the moment, if a virtio balloon device has a page reporting vq but
> its size is < PAGE_REPORTING_CAPACITY (32), the balloon driver fails
> probe.
>
> But, there's no way for host to know this value, so it can easily
> create a smaller vq and suddenly adding the reporting capability
> to the device makes all of the driver fail. Not pretty.
>
> Add a capacity field to page_reporting_dev_info so drivers can
> control the maximum number of pages per report batch.
>
> In virtio-balloon, set the capacity to the reporting virtqueue size,
> letting page_reporting adapt to whatever the device provides.
>
> Capacity need not be a power of two. Code previously called out
> division by PAGE_REPORTING_CAPACITY as cheap since it was a power
> of 2, but no performance difference was observed with non-power-of-2
> values.
>
> If capacity is 0 or exceeds PAGE_REPORTING_CAPACITY, it defaults
> to PAGE_REPORTING_CAPACITY. The 0 check and the clamping is done in
> page_reporting_register(), before the reporting work is scheduled,
> so we never get division by 0.
>
> Fixes: b0c504f15471 ("virtio-balloon: add support for providing free page
> reports to host")
> Signed-off-by: Michael S. Tsirkin <[email protected]>
> Assisted-by: Claude:claude-opus-4-6
lgtm asside from David's comment request
Reviewed-by: Gregory Price <[email protected]>
> ---
> Changes v1->v2:
> - Document capacity=0 as default in commit log
> - Document that capacity need not be a power of two
> - Drop unnecessary comment about integer division cost
> - Update comment on capacity field: "0 (default) means
> PAGE_REPORTING_CAPACITY"
>
> drivers/virtio/virtio_balloon.c | 5 +----
> include/linux/page_reporting.h | 3 +++
> mm/page_reporting.c | 24 ++++++++++++------------
> 3 files changed, 16 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index f6c2dff33f8a..6a1a610c2cb1 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -1017,10 +1017,6 @@ static int virtballoon_probe(struct virtio_device
> *vdev)
> unsigned int capacity;
>
> capacity = virtqueue_get_vring_size(vb->reporting_vq);
> - if (capacity < PAGE_REPORTING_CAPACITY) {
> - err = -ENOSPC;
> - goto out_unregister_oom;
> - }
>
> vb->pr_dev_info.order = PAGE_REPORTING_ORDER_UNSPECIFIED;
>
> @@ -1041,6 +1037,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
> vb->pr_dev_info.order = 5;
> #endif
>
> + vb->pr_dev_info.capacity = capacity;
> err = page_reporting_register(&vb->pr_dev_info);
> if (err)
> goto out_unregister_oom;
> diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h
> index 9d4ca5c218a0..048578118a4b 100644
> --- a/include/linux/page_reporting.h
> +++ b/include/linux/page_reporting.h
> @@ -22,6 +22,9 @@ struct page_reporting_dev_info {
>
> /* Minimal order of page reporting */
> unsigned int order;
> +
> + /* Max pages per report batch; 0 (default) means
> PAGE_REPORTING_CAPACITY */
> + unsigned int capacity;
> };
>
> /* Tear-down and bring-up for page reporting devices */
> diff --git a/mm/page_reporting.c b/mm/page_reporting.c
> index 7418f2e500bb..942e84b6908a 100644
> --- a/mm/page_reporting.c
> +++ b/mm/page_reporting.c
> @@ -173,11 +173,8 @@ page_reporting_cycle(struct page_reporting_dev_info
> *prdev, struct zone *zone,
> * any pages that may have already been present from the previous
> * list processed. This should result in us reporting all pages on
> * an idle system in about 30 seconds.
> - *
> - * The division here should be cheap since PAGE_REPORTING_CAPACITY
> - * should always be a power of 2.
> */
> - budget = DIV_ROUND_UP(area->nr_free, PAGE_REPORTING_CAPACITY * 16);
> + budget = DIV_ROUND_UP(area->nr_free, prdev->capacity * 16);
>
> /* loop through free list adding unreported pages to sg list */
> list_for_each_entry_safe(page, next, list, lru) {
> @@ -222,10 +219,10 @@ page_reporting_cycle(struct page_reporting_dev_info
> *prdev, struct zone *zone,
> spin_unlock_irq(&zone->lock);
>
> /* begin processing pages in local list */
> - err = prdev->report(prdev, sgl, PAGE_REPORTING_CAPACITY);
> + err = prdev->report(prdev, sgl, prdev->capacity);
>
> /* reset offset since the full list was reported */
> - *offset = PAGE_REPORTING_CAPACITY;
> + *offset = prdev->capacity;
>
> /* update budget to reflect call to report function */
> budget--;
> @@ -234,7 +231,7 @@ page_reporting_cycle(struct page_reporting_dev_info
> *prdev, struct zone *zone,
> spin_lock_irq(&zone->lock);
>
> /* flush reported pages from the sg list */
> - page_reporting_drain(prdev, sgl, PAGE_REPORTING_CAPACITY, !err);
> + page_reporting_drain(prdev, sgl, prdev->capacity, !err);
>
> /*
> * Reset next to first entry, the old next isn't valid
> @@ -260,13 +257,13 @@ static int
> page_reporting_process_zone(struct page_reporting_dev_info *prdev,
> struct scatterlist *sgl, struct zone *zone)
> {
> - unsigned int order, mt, leftover, offset = PAGE_REPORTING_CAPACITY;
> + unsigned int order, mt, leftover, offset = prdev->capacity;
> unsigned long watermark;
> int err = 0;
>
> /* Generate minimum watermark to be able to guarantee progress */
> watermark = low_wmark_pages(zone) +
> - (PAGE_REPORTING_CAPACITY << page_reporting_order);
> + (prdev->capacity << page_reporting_order);
>
> /*
> * Cancel request if insufficient free memory or if we failed
> @@ -290,7 +287,7 @@ page_reporting_process_zone(struct
> page_reporting_dev_info *prdev,
> }
>
> /* report the leftover pages before going idle */
> - leftover = PAGE_REPORTING_CAPACITY - offset;
> + leftover = prdev->capacity - offset;
> if (leftover) {
> sgl = &sgl[offset];
> err = prdev->report(prdev, sgl, leftover);
> @@ -322,11 +319,11 @@ static void page_reporting_process(struct work_struct
> *work)
> atomic_set(&prdev->state, state);
>
> /* allocate scatterlist to store pages being reported on */
> - sgl = kmalloc_objs(*sgl, PAGE_REPORTING_CAPACITY);
> + sgl = kmalloc_objs(*sgl, prdev->capacity);
> if (!sgl)
> goto err_out;
>
> - sg_init_table(sgl, PAGE_REPORTING_CAPACITY);
> + sg_init_table(sgl, prdev->capacity);
>
> for_each_zone(zone) {
> err = page_reporting_process_zone(prdev, sgl, zone);
> @@ -377,6 +374,9 @@ int page_reporting_register(struct
> page_reporting_dev_info *prdev)
> page_reporting_order = pageblock_order;
> }
>
> + if (!prdev->capacity || prdev->capacity > PAGE_REPORTING_CAPACITY)
> + prdev->capacity = PAGE_REPORTING_CAPACITY;
> +
> /* initialize state and work structures */
> atomic_set(&prdev->state, PAGE_REPORTING_IDLE);
> INIT_DELAYED_WORK(&prdev->work, &page_reporting_process);
> --
> MST
>