The branch main has been updated by kib: URL: https://cgit.FreeBSD.org/src/commit/?id=7685aaea8850f5b6995a17740a016019e0956c70
commit 7685aaea8850f5b6995a17740a016019e0956c70 Author: Konstantin Belousov <[email protected]> AuthorDate: 2025-12-20 16:03:40 +0000 Commit: Konstantin Belousov <[email protected]> CommitDate: 2025-12-30 03:25:36 +0000 vm_object_coalesce(): return swap reservation back if overcharged It is possible for both vm_map_insert() and vm_object_coalesce() to charge both for the same region. The issue is that vm_map_insert() must charge in advance to ensure that the mapping would not exceed the swap limit, but then the coalesce might decide to extend the object, and already (partially) backs the mapped region. Handle this by passing to vm_object_coalesce() exact information about the charging mode of the extending range 'not charging', 'charged' using flags instead of simple boolean. In vm_object_coalesce(), detect overcharge and undo it if needed. Note that this relies on vm_object_coalesce() call being the last action in vm_map_insert() before extending the previous map entry. Reported and tested by: pho Reviewed by: markj Sponsored by: The FreeBSD Foundation MFC after: 1 week Differential revision: https://reviews.freebsd.org/D54338 --- sys/vm/vm_map.c | 44 ++++++++++++++++++++++++++++++-------------- sys/vm/vm_object.c | 41 +++++++++++++++++++++++++++++++---------- sys/vm/vm_object.h | 8 +++++++- 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 6b09552c5fee..68dcadd2b2f1 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1620,6 +1620,7 @@ vm_map_insert1(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_inherit_t inheritance; u_long bdry; u_int bidx; + int cflags; VM_MAP_ASSERT_LOCKED(map); KASSERT(object != kernel_object || @@ -1696,20 +1697,36 @@ vm_map_insert1(vm_map_t map, vm_object_t object, vm_ooffset_t offset, } cred = NULL; - if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0) - goto charged; - if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) && - ((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) { - if (!(cow & MAP_ACC_CHARGED) && !swap_reserve(end - start)) - return (KERN_RESOURCE_SHORTAGE); - KASSERT(object == NULL || - (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 || - object->cred == NULL, - ("overcommit: vm_map_insert o %p", object)); - cred = curthread->td_ucred; + if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0) { + cflags = OBJCO_NO_CHARGE; + } else { + cflags = 0; + if ((cow & MAP_ACC_CHARGED) != 0 || + ((prot & VM_PROT_WRITE) != 0 && + ((protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 || + object == NULL))) { + if ((cow & MAP_ACC_CHARGED) == 0) { + if (!swap_reserve(end - start)) + return (KERN_RESOURCE_SHORTAGE); + + /* + * Only inform vm_object_coalesce() + * that the object was charged if + * there is no need for CoW, so the + * swap amount reserved is applicable + * to the prev_entry->object. + */ + if ((protoeflags & MAP_ENTRY_NEEDS_COPY) == 0) + cflags |= OBJCO_CHARGED; + } + KASSERT(object == NULL || + (protoeflags & MAP_ENTRY_NEEDS_COPY) != 0 || + object->cred == NULL, + ("overcommit: vm_map_insert o %p", object)); + cred = curthread->td_ucred; + } } -charged: /* Expand the kernel pmap, if necessary. */ if (map == kernel_map && end > kernel_vm_end) { int rv; @@ -1741,8 +1758,7 @@ charged: vm_object_coalesce(prev_entry->object.vm_object, prev_entry->offset, (vm_size_t)(prev_entry->end - prev_entry->start), - (vm_size_t)(end - prev_entry->end), cred != NULL && - (protoeflags & MAP_ENTRY_NEEDS_COPY) == 0)) { + (vm_size_t)(end - prev_entry->end), cflags)) { /* * We were able to extend the object. Determine if we * can extend the previous map entry to include the diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index c216fdc01af1..f4c54ba91742 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -2161,7 +2161,7 @@ vm_object_populate(vm_object_t object, vm_pindex_t start, vm_pindex_t end) */ boolean_t vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, - vm_size_t prev_size, vm_size_t next_size, boolean_t reserved) + vm_size_t prev_size, vm_size_t next_size, int cflags) { vm_pindex_t next_end, next_pindex; @@ -2202,8 +2202,7 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, /* * Account for the charge. */ - if (prev_object->cred != NULL && - next_pindex + next_size > prev_object->size) { + if (prev_object->cred != NULL && (cflags & OBJCO_NO_CHARGE) == 0) { /* * If prev_object was charged, then this mapping, * although not charged now, may become writable @@ -2214,14 +2213,36 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset, * entry, and swap reservation for this entry is * managed in appropriate time. */ - vm_size_t charge = ptoa(next_pindex + next_size - - prev_object->size); - if (!reserved && - !swap_reserve_by_cred(charge, prev_object->cred)) { - VM_OBJECT_WUNLOCK(prev_object); - return (FALSE); + if (next_end > prev_object->size) { + vm_size_t charge = ptoa(next_end - prev_object->size); + + if ((cflags & OBJCO_CHARGED) == 0) { + if (!swap_reserve_by_cred(charge, + prev_object->cred)) { + VM_OBJECT_WUNLOCK(prev_object); + return (FALSE); + } + } else if (prev_object->size > next_pindex) { + /* + * The caller charged, but: + * - the object has already accounted for the + * space, + * - and the object end is between previous + * mapping end and next_end. + */ + swap_release_by_cred(ptoa(prev_object->size - + next_pindex), prev_object->cred); + } + prev_object->charge += charge; + } else if ((cflags & OBJCO_CHARGED) != 0) { + /* + * The caller charged, but the object has + * already accounted for the space. Whole new + * mapping charge should be released, + */ + swap_release_by_cred(ptoa(next_size), + prev_object->cred); } - prev_object->charge += charge; } /* diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index e58fae5f0090..98afb93f8389 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -228,6 +228,12 @@ struct vm_object { #define OBJPR_NOTMAPPED 0x2 /* Don't unmap pages. */ #define OBJPR_VALIDONLY 0x4 /* Ignore invalid pages. */ +/* + * Options for vm_object_coalesce(). + */ +#define OBJCO_CHARGED 0x1 /* The next_size was charged already */ +#define OBJCO_NO_CHARGE 0x2 /* Do not do swap accounting at all */ + TAILQ_HEAD(object_q, vm_object); extern struct object_q vm_object_list; /* list of allocated objects */ @@ -354,7 +360,7 @@ vm_object_t vm_object_allocate_anon(vm_pindex_t, vm_object_t, struct ucred *, vm_size_t); vm_object_t vm_object_allocate_dyn(objtype_t, vm_pindex_t, u_short); boolean_t vm_object_coalesce(vm_object_t, vm_ooffset_t, vm_size_t, vm_size_t, - boolean_t); + int); void vm_object_collapse (vm_object_t); void vm_object_deallocate (vm_object_t); void vm_object_destroy (vm_object_t);
