Stefan Kempf wrote:
> Next try. A problem with the last version was that amaps for shared
> mappings still would grow pretty large: establishing a shared mapping
> causes uvm to immediately allocate an amap for the whole range.
I messed up the last diff.
tb@ found and fixed bug in the libkvm bits that would abort a make
build. (thanks!).
Updated diff below.
Index: lib/libkvm/kvm_proc.c
===================================================================
RCS file: /cvs/src/lib/libkvm/kvm_proc.c,v
retrieving revision 1.52
diff -u -p -r1.52 kvm_proc.c
--- lib/libkvm/kvm_proc.c 22 Oct 2014 04:13:35 -0000 1.52
+++ lib/libkvm/kvm_proc.c 22 Apr 2016 18:08:51 -0000
@@ -108,6 +108,56 @@ static int proc_verify(kvm_t *, const st
static void ps_str_a(struct ps_strings *, u_long *, int *);
static void ps_str_e(struct ps_strings *, u_long *, int *);
+static struct vm_anon *
+_kvm_findanon(kvm_t *kd, struct vm_amap *amapp, int slot)
+{
+ u_long addr;
+ int bucket;
+ struct vm_amap amap;
+ struct vm_amap_chunk chunk, *chunkp;
+ struct vm_anon *anonp;
+
+ addr = (u_long)amapp;
+ if (KREAD(kd, addr, &amap))
+ return (NULL);
+
+ /* sanity-check slot number */
+ if (slot > amap.am_nslot)
+ return (NULL);
+
+ if (UVM_AMAP_SMALL(&amap))
+ chunkp = &amapp->am_small;
+ else {
+ bucket = UVM_AMAP_BUCKET(&amap, slot);
+ addr = (u_long)(amap.am_buckets + bucket);
+ if (KREAD(kd, addr, &chunkp))
+ return (NULL);
+
+ while (chunkp != NULL) {
+ addr = (u_long)chunkp;
+ if (KREAD(kd, addr, &chunk))
+ return (NULL);
+
+ if (UVM_AMAP_BUCKET(&amap, chunk.ac_baseslot) !=
+ bucket)
+ return (NULL);
+ if (slot >= chunk.ac_baseslot &&
+ slot < chunk.ac_baseslot + chunk.ac_nslot)
+ break;
+
+ chunkp = TAILQ_NEXT(&chunk, ac_list);
+ }
+ if (chunkp == NULL)
+ return (NULL);
+ }
+
+ addr = (u_long)&chunkp->ac_anon[UVM_AMAP_SLOTIDX(slot)];
+ if (KREAD(kd, addr, &anonp))
+ return (NULL);
+
+ return (anonp);
+}
+
static char *
_kvm_ureadm(kvm_t *kd, const struct kinfo_proc *p, u_long va, u_long *cnt)
{
@@ -115,7 +165,6 @@ _kvm_ureadm(kvm_t *kd, const struct kinf
struct vmspace vm;
struct vm_anon *anonp, anon;
struct vm_map_entry vme;
- struct vm_amap amap;
struct vm_page pg;
if (kd->swapspc == 0) {
@@ -153,18 +202,11 @@ _kvm_ureadm(kvm_t *kd, const struct kinf
if (vme.aref.ar_amap == NULL)
return (NULL);
- addr = (u_long)vme.aref.ar_amap;
- if (KREAD(kd, addr, &amap))
- return (NULL);
-
offset = va - vme.start;
slot = offset / kd->nbpg + vme.aref.ar_pageoff;
- /* sanity-check slot number */
- if (slot > amap.am_nslot)
- return (NULL);
- addr = (u_long)amap.am_anon + (offset / kd->nbpg) * sizeof(anonp);
- if (KREAD(kd, addr, &anonp))
+ anonp = _kvm_findanon(kd, vme.aref.ar_amap, slot);
+ if (anonp == NULL)
return (NULL);
addr = (u_long)anonp;
Index: sys/uvm/uvm_amap.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_amap.c,v
retrieving revision 1.66
diff -u -p -r1.66 uvm_amap.c
--- sys/uvm/uvm_amap.c 16 Apr 2016 18:39:31 -0000 1.66
+++ sys/uvm/uvm_amap.c 22 Apr 2016 18:08:54 -0000
@@ -44,20 +44,19 @@
#include <uvm/uvm_swap.h>
/*
- * pool for allocation of vm_map structures. note that in order to
+ * pools for allocation of vm_amap structures. note that in order to
* avoid an endless loop, the amap pool's allocator cannot allocate
* memory from an amap (it currently goes through the kernel uobj, so
* we are ok).
*/
struct pool uvm_amap_pool;
-
-/* Pools for amap slots for the most common amap slot sizes */
-struct pool uvm_amap_slot_pools[UVM_AMAP_CHUNK];
+struct pool uvm_small_amap_pool[UVM_AMAP_CHUNK];
+struct pool uvm_amap_chunk_pool;
LIST_HEAD(, vm_amap) amap_list;
-static char amap_slot_pool_names[UVM_AMAP_CHUNK][13];
+static char amap_small_pool_names[UVM_AMAP_CHUNK][9];
#define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *))
@@ -65,10 +64,12 @@ static char amap_slot_pool_names[UVM_AMA
* local functions
*/
-static struct vm_amap *amap_alloc1(int, int);
+static struct vm_amap *amap_alloc1(int, int, int);
static __inline void amap_list_insert(struct vm_amap *);
static __inline void amap_list_remove(struct vm_amap *);
+struct vm_amap_chunk *amap_chunk_get(struct vm_amap *, int, int);
+
static __inline void
amap_list_insert(struct vm_amap *amap)
{
@@ -81,6 +82,72 @@ amap_list_remove(struct vm_amap *amap)
LIST_REMOVE(amap, am_list);
}
+/*
+ * amap_chunk_get: lookup a chunk for slot. if create is non-zero,
+ * the chunk is created if it does not yet exist.
+ *
+ * => returns the chunk on success or NULL on error
+ */
+struct vm_amap_chunk *
+amap_chunk_get(struct vm_amap *amap, int slot, int create)
+{
+ int bucket = UVM_AMAP_BUCKET(amap, slot);
+ int baseslot = AMAP_BASE_SLOT(slot);
+ int n;
+ struct vm_amap_chunk *chunk, *newchunk, *pchunk = NULL;
+
+ if (UVM_AMAP_SMALL(amap))
+ return &amap->am_small;
+
+ for (chunk = amap->am_buckets[bucket]; chunk != NULL;
+ chunk = TAILQ_NEXT(chunk, ac_list)) {
+ if (UVM_AMAP_BUCKET(amap, chunk->ac_baseslot) != bucket)
+ break;
+ if (chunk->ac_baseslot == baseslot)
+ return chunk;
+ pchunk = chunk;
+ }
+ if (!create)
+ return NULL;
+
+ if (amap->am_nslot - baseslot >= UVM_AMAP_CHUNK)
+ n = UVM_AMAP_CHUNK;
+ else
+ n = amap->am_nslot - baseslot;
+
+ newchunk = pool_get(&uvm_amap_chunk_pool, PR_NOWAIT | PR_ZERO);
+ if (newchunk == NULL)
+ return NULL;
+
+ if (pchunk == NULL) {
+ TAILQ_INSERT_TAIL(&amap->am_chunks, newchunk, ac_list);
+ KASSERT(amap->am_buckets[bucket] == NULL);
+ amap->am_buckets[bucket] = newchunk;
+ } else
+ TAILQ_INSERT_AFTER(&amap->am_chunks, pchunk, newchunk,
+ ac_list);
+
+ amap->am_ncused++;
+ newchunk->ac_baseslot = baseslot;
+ newchunk->ac_nslot = n;
+ return newchunk;
+}
+
+void
+amap_chunk_free(struct vm_amap *amap, struct vm_amap_chunk *chunk)
+{
+ int bucket = UVM_AMAP_BUCKET(amap, chunk->ac_baseslot);
+
+ if (UVM_AMAP_SMALL(amap))
+ return;
+
+ TAILQ_REMOVE(&amap->am_chunks, chunk, ac_list);
+ if (amap->am_buckets[bucket] == chunk)
+ amap->am_buckets[bucket] = NULL;
+ pool_put(&uvm_amap_chunk_pool, chunk);
+ amap->am_ncused--;
+}
+
#ifdef UVM_AMAP_PPREF
/*
* what is ppref? ppref is an _optional_ amap feature which is used
@@ -157,19 +224,28 @@ void
amap_init(void)
{
int i;
+ size_t size;
/* Initialize the vm_amap pool. */
pool_init(&uvm_amap_pool, sizeof(struct vm_amap), 0, 0, PR_WAITOK,
"amappl", NULL);
pool_sethiwat(&uvm_amap_pool, 4096);
- for (i = 0; i < nitems(uvm_amap_slot_pools); i++) {
- snprintf(amap_slot_pool_names[i],
- sizeof(amap_slot_pool_names[0]), "amapslotpl%d", i + 1);
- pool_init(&uvm_amap_slot_pools[i], (i + 1) * MALLOC_SLOT_UNIT,
- 0, 0, PR_WAITOK, amap_slot_pool_names[i], NULL);
- pool_sethiwat(&uvm_amap_slot_pools[i], 4096);
+ /* initialize small amap pools */
+ for (i = 0; i < nitems(uvm_small_amap_pool); i++) {
+ snprintf(amap_small_pool_names[i],
+ sizeof(amap_small_pool_names[0]), "amappl%d", i + 1);
+ size = offsetof(struct vm_amap, am_small.ac_anon) +
+ (i + 1) * sizeof(struct vm_anon *);
+ pool_init(&uvm_small_amap_pool[i], size, 0, 0, PR_WAITOK,
+ amap_small_pool_names[i], NULL);
}
+
+ pool_init(&uvm_amap_chunk_pool,
+ sizeof(struct vm_amap_chunk) +
+ UVM_AMAP_CHUNK * sizeof(struct vm_anon *), 0, 0, PR_WAITOK,
+ "amapchunkpl", NULL);
+ pool_sethiwat(&uvm_amap_chunk_pool, 4096);
}
/*
@@ -177,12 +253,53 @@ amap_init(void)
* init the overlay.
*/
static inline struct vm_amap *
-amap_alloc1(int slots, int waitf)
+amap_alloc1(int slots, int waitf, int dynamic)
{
struct vm_amap *amap;
+ struct vm_amap_chunk *chunk, *tmp;
+ int chunks, chunkperbucket = 1, hashshift = 0;
+ int buckets, i, n;
+ int pwaitf = (waitf == M_WAITOK) ? PR_WAITOK : PR_NOWAIT;
+
+ chunks = roundup(slots, UVM_AMAP_CHUNK) / UVM_AMAP_CHUNK;
- amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK
- : PR_NOWAIT);
+ if (dynamic) {
+ /*
+ * Basically, the amap is a hash map where the number of
+ * buckets is fixed. We select the number of buckets using the
+ * following strategy:
+ *
+ * 1. The maximal number of entries to search in a bucket upon
+ * a collision should be less than or equal to
+ * log2(slots / UVM_AMAP_CHUNK). This is the worst-case number
+ * of lookups we would have if we could chunk the amap. The
+ * log2(n) comes from the fact that amaps are chunked by
+ * splitting up their vm_map_entries and organizing those
+ * in a binary search tree.
+ *
+ * 2. The maximal number of entries in a bucket must be a
+ * power of two.
+ *
+ * The maximal number of entries per bucket is used to hash
+ * a slot to a bucket.
+ *
+ * In the future, this strategy could be refined to make it
+ * even harder/impossible that the total amount of KVA needed
+ * for the hash buckets of all amaps to exceed the maximal
+ * amount of KVA memory reserved for amaps.
+ */
+ chunkperbucket = 1 << hashshift;
+ while ((1 << chunkperbucket) * 2 <= chunks) {
+ hashshift++;
+ chunkperbucket = 1 << hashshift;
+ }
+ }
+
+ if (slots > UVM_AMAP_CHUNK)
+ amap = pool_get(&uvm_amap_pool, pwaitf);
+ else
+ amap = pool_get(&uvm_small_amap_pool[slots - 1],
+ pwaitf | PR_ZERO);
if (amap == NULL)
return(NULL);
@@ -196,25 +313,48 @@ amap_alloc1(int slots, int waitf)
amap->am_nslot = slots;
amap->am_nused = 0;
- if (slots > UVM_AMAP_CHUNK)
- amap->am_slots = malloc(slots * MALLOC_SLOT_UNIT,
- M_UVMAMAP, waitf);
- else
- amap->am_slots = pool_get(
- &uvm_amap_slot_pools[slots - 1],
- (waitf == M_WAITOK) ? PR_WAITOK : PR_NOWAIT);
+ if (slots <= UVM_AMAP_CHUNK)
+ return (amap);
- if (amap->am_slots == NULL)
+ amap->am_ncused = 0;
+ TAILQ_INIT(&amap->am_chunks);
+ amap->am_hashshift = hashshift;
+ amap->am_buckets = NULL;
+
+ buckets = howmany(chunks, chunkperbucket);
+ amap->am_buckets = mallocarray(buckets, sizeof(amap->am_buckets),
+ M_UVMAMAP, waitf | (dynamic ? M_ZERO : 0));
+ if (amap->am_buckets == NULL)
goto fail1;
- amap->am_bckptr = (int *)(((char *)amap->am_slots) + slots *
- sizeof(int));
- amap->am_anon = (struct vm_anon **)(((char *)amap->am_bckptr) +
- slots * sizeof(int));
+ if (!dynamic) {
+ for (i = 0; i < buckets; i++) {
+ if (i == buckets - 1) {
+ n = slots % UVM_AMAP_CHUNK;
+ if (n == 0)
+ n = UVM_AMAP_CHUNK;
+ } else
+ n = UVM_AMAP_CHUNK;
+
+ chunk = pool_get(&uvm_amap_chunk_pool,
+ PR_ZERO | pwaitf);
+ if (chunk == NULL)
+ goto fail1;
+
+ amap->am_buckets[i] = chunk;
+ amap->am_ncused++;
+ chunk->ac_baseslot = i * UVM_AMAP_CHUNK;
+ chunk->ac_nslot = n;
+ TAILQ_INSERT_TAIL(&amap->am_chunks, chunk, ac_list);
+ }
+ }
return(amap);
fail1:
+ free(amap->am_buckets, M_UVMAMAP, 0);
+ TAILQ_FOREACH_SAFE(chunk, &amap->am_chunks, ac_list, tmp)
+ pool_put(&uvm_amap_chunk_pool, chunk);
pool_put(&uvm_amap_pool, amap);
return (NULL);
}
@@ -226,19 +366,16 @@ fail1:
* => reference count to new amap is set to one
*/
struct vm_amap *
-amap_alloc(vaddr_t sz, int waitf)
+amap_alloc(vaddr_t sz, int waitf, int dynamic)
{
struct vm_amap *amap;
int slots;
AMAP_B2SLOT(slots, sz); /* load slots */
- amap = amap_alloc1(slots, waitf);
- if (amap) {
- memset(amap->am_anon, 0,
- amap->am_nslot * sizeof(struct vm_anon *));
+ amap = amap_alloc1(slots, waitf, dynamic);
+ if (amap)
amap_list_insert(amap);
- }
return(amap);
}
@@ -252,22 +389,24 @@ amap_alloc(vaddr_t sz, int waitf)
void
amap_free(struct vm_amap *amap)
{
+ struct vm_amap_chunk *chunk, *tmp;
KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
- if (amap->am_nslot > UVM_AMAP_CHUNK)
- free(amap->am_slots, M_UVMAMAP, 0);
- else
- pool_put(&uvm_amap_slot_pools[amap->am_nslot - 1],
- amap->am_slots);
-
#ifdef UVM_AMAP_PPREF
if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
free(amap->am_ppref, M_UVMAMAP, 0);
#endif
- pool_put(&uvm_amap_pool, amap);
+ if (UVM_AMAP_SMALL(amap))
+ pool_put(&uvm_small_amap_pool[amap->am_nslot - 1], amap);
+ else {
+ TAILQ_FOREACH_SAFE(chunk, &amap->am_chunks, ac_list, tmp)
+ pool_put(&uvm_amap_chunk_pool, chunk);
+ free(amap->am_buckets, M_UVMAMAP, 0);
+ pool_put(&uvm_amap_pool, amap);
+ }
}
/*
@@ -280,8 +419,9 @@ amap_free(struct vm_amap *amap)
void
amap_wipeout(struct vm_amap *amap)
{
- int lcv, slot;
+ int slot;
struct vm_anon *anon;
+ struct vm_amap_chunk *chunk;
KASSERT(amap->am_ref == 0);
@@ -291,19 +431,25 @@ amap_wipeout(struct vm_amap *amap)
}
amap_list_remove(amap);
- for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
- int refs;
+ AMAP_CHUNK_FOREACH(chunk, amap) {
+ int i, refs, map = chunk->ac_usedmap;
- slot = amap->am_slots[lcv];
- anon = amap->am_anon[slot];
+ for (i = ffs(map); i != 0; i = ffs(map)) {
+ slot = i - 1;
+ map ^= 1 << slot;
+ anon = chunk->ac_anon[slot];
- if (anon == NULL || anon->an_ref == 0)
- panic("amap_wipeout: corrupt amap");
+ if (anon == NULL || anon->an_ref == 0)
+ panic("amap_wipeout: corrupt amap");
- refs = --anon->an_ref;
- if (refs == 0) {
- /* we had the last reference to a vm_anon. free it. */
- uvm_anfree(anon);
+ refs = --anon->an_ref;
+ if (refs == 0) {
+ /*
+ * we had the last reference to a vm_anon.
+ * free it.
+ */
+ uvm_anfree(anon);
+ }
}
}
@@ -330,8 +476,10 @@ amap_copy(struct vm_map *map, struct vm_
boolean_t canchunk, vaddr_t startva, vaddr_t endva)
{
struct vm_amap *amap, *srcamap;
- int slots, lcv;
+ int slots, lcv, dynamic = 0;
vaddr_t chunksize;
+ int i, j, k, n, srcslot;
+ struct vm_amap_chunk *chunk = NULL, *srcchunk = NULL;
/* is there a map to copy? if not, create one from scratch. */
if (entry->aref.ar_amap == NULL) {
@@ -339,22 +487,29 @@ amap_copy(struct vm_map *map, struct vm_
* check to see if we have a large amap that we can
* chunk. we align startva/endva to chunk-sized
* boundaries and then clip to them.
+ *
+ * if we cannot chunk the amap, allocate it in a way
+ * that makes it grow or shrink dynamically with
+ * the number of slots.
*/
- if (canchunk && atop(entry->end - entry->start) >=
- UVM_AMAP_LARGE) {
- /* convert slots to bytes */
- chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
- startva = (startva / chunksize) * chunksize;
- endva = roundup(endva, chunksize);
- UVM_MAP_CLIP_START(map, entry, startva);
- /* watch out for endva wrap-around! */
- if (endva >= startva)
- UVM_MAP_CLIP_END(map, entry, endva);
+ if (atop(entry->end - entry->start) >= UVM_AMAP_LARGE) {
+ if (canchunk) {
+ /* convert slots to bytes */
+ chunksize = UVM_AMAP_CHUNK << PAGE_SHIFT;
+ startva = (startva / chunksize) * chunksize;
+ endva = roundup(endva, chunksize);
+ UVM_MAP_CLIP_START(map, entry, startva);
+ /* watch out for endva wrap-around! */
+ if (endva >= startva)
+ UVM_MAP_CLIP_END(map, entry, endva);
+ } else
+ dynamic = 1;
}
entry->aref.ar_pageoff = 0;
entry->aref.ar_amap = amap_alloc(entry->end - entry->start,
- waitf);
+ waitf, dynamic);
+
if (entry->aref.ar_amap != NULL)
entry->etype &= ~UVM_ET_NEEDSCOPY;
return;
@@ -373,7 +528,9 @@ amap_copy(struct vm_map *map, struct vm_
/* looks like we need to copy the map. */
AMAP_B2SLOT(slots, entry->end - entry->start);
- amap = amap_alloc1(slots, waitf);
+ amap = amap_alloc1(slots, waitf,
+ entry->aref.ar_amap->am_nslot > UVM_AMAP_CHUNK &&
+ entry->aref.ar_amap->am_hashshift != 0);
if (amap == NULL)
return;
srcamap = entry->aref.ar_amap;
@@ -392,17 +549,39 @@ amap_copy(struct vm_map *map, struct vm_
}
/* we must copy it now. */
- for (lcv = 0 ; lcv < slots; lcv++) {
- amap->am_anon[lcv] =
- srcamap->am_anon[entry->aref.ar_pageoff + lcv];
- if (amap->am_anon[lcv] == NULL)
+ for (lcv = 0; lcv < slots; lcv += n) {
+ srcslot = entry->aref.ar_pageoff + lcv;
+ i = UVM_AMAP_SLOTIDX(lcv);
+ j = UVM_AMAP_SLOTIDX(srcslot);
+ n = UVM_AMAP_CHUNK;
+ if (i > j)
+ n -= i;
+ else
+ n -= j;
+ if (lcv + n > slots)
+ n = slots - lcv;
+
+ srcchunk = amap_chunk_get(srcamap, srcslot, 0);
+ if (srcchunk == NULL)
continue;
- amap->am_anon[lcv]->an_ref++;
- amap->am_bckptr[lcv] = amap->am_nused;
- amap->am_slots[amap->am_nused] = lcv;
- amap->am_nused++;
+
+ chunk = amap_chunk_get(amap, lcv, 1);
+ if (chunk == NULL) {
+ amap->am_ref = 0;
+ amap_wipeout(amap);
+ return;
+ }
+
+ for (k = 0; k < n; i++, j++, k++) {
+ chunk->ac_anon[i] = srcchunk->ac_anon[j];
+ if (chunk->ac_anon[i] == NULL)
+ continue;
+
+ chunk->ac_usedmap |= (1 << i);
+ chunk->ac_anon[i]->an_ref++;
+ amap->am_nused++;
+ }
}
- KASSERT(lcv == amap->am_nslot);
/*
* drop our reference to the old amap (srcamap).
@@ -446,9 +625,10 @@ void
amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
{
struct vm_amap *amap = entry->aref.ar_amap;
- int lcv, slot;
+ int slot;
struct vm_anon *anon, *nanon;
struct vm_page *pg, *npg;
+ struct vm_amap_chunk *chunk;
/*
* note that if we wait, we must ReStart the "lcv" for loop because
@@ -456,22 +636,27 @@ amap_cow_now(struct vm_map *map, struct
* am_anon[] array on us.
*/
ReStart:
- for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
- /* get the page */
- slot = amap->am_slots[lcv];
- anon = amap->am_anon[slot];
- pg = anon->an_page;
-
- /* page must be resident since parent is wired */
- if (pg == NULL)
- panic("amap_cow_now: non-resident wired page"
- " in anon %p", anon);
+ AMAP_CHUNK_FOREACH(chunk, amap) {
+ int i, map = chunk->ac_usedmap;
+
+ for (i = ffs(map); i != 0; i = ffs(map)) {
+ slot = i - 1;
+ map ^= 1 << slot;
+ anon = chunk->ac_anon[slot];
+ pg = anon->an_page;
+
+ /* page must be resident since parent is wired */
+ if (pg == NULL)
+ panic("amap_cow_now: non-resident wired page"
+ " in anon %p", anon);
+
+ /*
+ * if the anon ref count is one, we are safe (the child
+ * has exclusive access to the page).
+ */
+ if (anon->an_ref <= 1)
+ continue;
- /*
- * if the anon ref count is one, we are safe (the child has
- * exclusive access to the page).
- */
- if (anon->an_ref > 1) {
/*
* if the page is busy then we have to wait for
* it and then restart.
@@ -501,14 +686,14 @@ ReStart:
uvm_wait("cownowpage");
goto ReStart;
}
-
+
/*
* got it... now we can copy the data and replace anon
* with our new one...
*/
uvm_pagecopy(pg, npg); /* old -> new */
anon->an_ref--; /* can't drop to zero */
- amap->am_anon[slot] = nanon; /* replace */
+ chunk->ac_anon[slot] = nanon; /* replace */
/*
* drop PG_BUSY on new page ... since we have had its
@@ -652,61 +837,83 @@ amap_pp_adjref(struct vm_amap *amap, int
void
amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
{
- int byanon, lcv, stop, curslot, ptr, slotend;
+ int curslot, i, map, bybucket;
+ int bucket, startbucket, endbucket;
+ int startbase, endbase;
struct vm_anon *anon;
+ struct vm_amap_chunk *chunk, *nchunk;
/*
- * we can either traverse the amap by am_anon or by am_slots depending
- * on which is cheaper. decide now.
+ * we can either traverse the amap by am_chunks or by am_buckets
+ * depending on which is cheaper. decide now.
*/
- if (slots < amap->am_nused) {
- byanon = TRUE;
- lcv = slotoff;
- stop = slotoff + slots;
+ startbucket = UVM_AMAP_BUCKET(amap, slotoff);
+ endbucket = UVM_AMAP_BUCKET(amap, slotoff + slots - 1);
+
+ if (UVM_AMAP_SMALL(amap)) {
+ bybucket = FALSE;
+ chunk = &amap->am_small;
+ } else if (endbucket + 1 - startbucket >= amap->am_ncused) {
+ bybucket = FALSE;
+ chunk = TAILQ_FIRST(&amap->am_chunks);
} else {
- byanon = FALSE;
- lcv = 0;
- stop = amap->am_nused;
- slotend = slotoff + slots;
+ bybucket = TRUE;
+ bucket = startbucket;
+ chunk = amap->am_buckets[bucket];
}
- while (lcv < stop) {
- int refs;
-
- if (byanon) {
- curslot = lcv++; /* lcv advances here */
- if (amap->am_anon[curslot] == NULL)
- continue;
- } else {
- curslot = amap->am_slots[lcv];
- if (curslot < slotoff || curslot >= slotend) {
- lcv++; /* lcv advances here */
- continue;
- }
- stop--; /* drop stop, since anon will be removed */
+ startbase = AMAP_BASE_SLOT(slotoff);
+ endbase = AMAP_BASE_SLOT(slotoff + slots - 1);
+ for (;;) {
+ if (chunk == NULL || (bybucket &&
+ UVM_AMAP_BUCKET(amap, chunk->ac_baseslot) != bucket)) {
+ if (!bybucket || bucket >= endbucket)
+ break;
+ bucket++;
+ chunk = amap->am_buckets[bucket];
+ continue;
+ } else if (!bybucket) {
+ if (chunk->ac_baseslot + chunk->ac_nslot <= slotoff)
+ goto next;
+ if (chunk->ac_baseslot >= slotoff + slots)
+ goto next;
}
- anon = amap->am_anon[curslot];
- /* remove it from the amap */
- amap->am_anon[curslot] = NULL;
- ptr = amap->am_bckptr[curslot];
- if (ptr != (amap->am_nused - 1)) {
- amap->am_slots[ptr] =
- amap->am_slots[amap->am_nused - 1];
- amap->am_bckptr[amap->am_slots[ptr]] =
- ptr; /* back ptr. */
- }
- amap->am_nused--;
-
- /* drop anon reference count */
- refs = --anon->an_ref;
- if (refs == 0) {
- /*
- * we just eliminated the last reference to an anon.
- * free it.
- */
- uvm_anfree(anon);
+ map = chunk->ac_usedmap;
+ if (startbase == chunk->ac_baseslot)
+ map &= ~((1 << (slotoff - startbase)) - 1);
+ if (endbase == chunk->ac_baseslot)
+ map &= (1 << (slotoff + slots - endbase)) - 1;
+
+ for (i = ffs(map); i != 0; i = ffs(map)) {
+ int refs;
+
+ curslot = i - 1;
+ map ^= 1 << curslot;
+ chunk->ac_usedmap ^= 1 << curslot;
+ anon = chunk->ac_anon[curslot];
+
+ /* remove it from the amap */
+ chunk->ac_anon[curslot] = NULL;
+
+ amap->am_nused--;
+
+ /* drop anon reference count */
+ refs = --anon->an_ref;
+ if (refs == 0) {
+ /*
+ * we just eliminated the last reference to an
+ * anon. free it.
+ */
+ uvm_anfree(anon);
+ }
}
+
+next:
+ nchunk = TAILQ_NEXT(chunk, ac_list);
+ if (chunk->ac_usedmap == 0)
+ amap_chunk_free(amap, chunk);
+ chunk = nchunk;
}
}
@@ -728,31 +935,38 @@ amap_swap_off(int startslot, int endslot
boolean_t rv = FALSE;
for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
- int i;
+ int i, map;
+ struct vm_amap_chunk *chunk;
- for (i = 0; i < am->am_nused; i++) {
- int slot;
- int swslot;
- struct vm_anon *anon;
-
- slot = am->am_slots[i];
- anon = am->am_anon[slot];
-
- swslot = anon->an_swslot;
- if (swslot < startslot || endslot <= swslot) {
- continue;
- }
+again:
+ AMAP_CHUNK_FOREACH(chunk, am) {
+ map = chunk->ac_usedmap;
+
+ for (i = ffs(map); i != 0; i = ffs(map)) {
+ int swslot;
+ int slot = i - 1;
+ struct vm_anon *anon;
+
+ map ^= 1 << slot;
+ anon = chunk->ac_anon[slot];
+
+ swslot = anon->an_swslot;
+ if (swslot < startslot || endslot <= swslot) {
+ continue;
+ }
- am->am_flags |= AMAP_SWAPOFF;
+ am->am_flags |= AMAP_SWAPOFF;
- rv = uvm_anon_pagein(anon);
+ rv = uvm_anon_pagein(anon);
- am->am_flags &= ~AMAP_SWAPOFF;
- if (rv || amap_refs(am) == 0)
- break;
- i = 0;
+ am->am_flags &= ~AMAP_SWAPOFF;
+ if (rv || amap_refs(am) == 0)
+ goto nextamap;
+ goto again;
+ }
}
+nextamap:
am_next = LIST_NEXT(am, am_list);
if (amap_refs(am) == 0)
amap_wipeout(am);
@@ -769,6 +983,7 @@ amap_lookup(struct vm_aref *aref, vaddr_
{
int slot;
struct vm_amap *amap = aref->ar_amap;
+ struct vm_amap_chunk *chunk;
AMAP_B2SLOT(slot, offset);
slot += aref->ar_pageoff;
@@ -776,7 +991,11 @@ amap_lookup(struct vm_aref *aref, vaddr_
if (slot >= amap->am_nslot)
panic("amap_lookup: offset out of range");
- return(amap->am_anon[slot]);
+ chunk = amap_chunk_get(amap, slot, 0);
+ if (chunk == NULL)
+ return NULL;
+
+ return chunk->ac_anon[UVM_AMAP_SLOTIDX(slot)];
}
/*
@@ -788,8 +1007,9 @@ void
amap_lookups(struct vm_aref *aref, vaddr_t offset,
struct vm_anon **anons, int npages)
{
- int slot;
+ int i, lcv, n, slot;
struct vm_amap *amap = aref->ar_amap;
+ struct vm_amap_chunk *chunk = NULL;
AMAP_B2SLOT(slot, offset);
slot += aref->ar_pageoff;
@@ -797,49 +1017,65 @@ amap_lookups(struct vm_aref *aref, vaddr
if ((slot + (npages - 1)) >= amap->am_nslot)
panic("amap_lookups: offset out of range");
- memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
-
- return;
+ for (i = 0, lcv = slot; lcv < slot + npages; i += n, lcv += n) {
+ n = UVM_AMAP_CHUNK - UVM_AMAP_SLOTIDX(lcv);
+ if (lcv + n > slot + npages)
+ n = slot + npages - lcv;
+
+ chunk = amap_chunk_get(amap, lcv, 0);
+ if (chunk == NULL)
+ memset(&anons[i], 0, n * sizeof(*anons[i]));
+ else
+ memcpy(&anons[i],
+ &chunk->ac_anon[UVM_AMAP_SLOTIDX(lcv)],
+ n * sizeof(*anons[i]));
+ }
}
/*
* amap_add: add (or replace) a page to an amap
*
- * => returns an "offset" which is meaningful to amap_unadd().
+ * => returns 0 if adding the page was successful or 1 when not.
*/
-void
+int
amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
boolean_t replace)
{
int slot;
struct vm_amap *amap = aref->ar_amap;
+ struct vm_amap_chunk *chunk;
AMAP_B2SLOT(slot, offset);
slot += aref->ar_pageoff;
if (slot >= amap->am_nslot)
panic("amap_add: offset out of range");
+ chunk = amap_chunk_get(amap, slot, 1);
+ if (chunk == NULL)
+ return 1;
+ slot = UVM_AMAP_SLOTIDX(slot);
if (replace) {
- if (amap->am_anon[slot] == NULL)
+ if (chunk->ac_anon[slot] == NULL)
panic("amap_add: replacing null anon");
- if (amap->am_anon[slot]->an_page != NULL &&
+ if (chunk->ac_anon[slot]->an_page != NULL &&
(amap->am_flags & AMAP_SHARED) != 0) {
- pmap_page_protect(amap->am_anon[slot]->an_page,
+ pmap_page_protect(chunk->ac_anon[slot]->an_page,
PROT_NONE);
/*
* XXX: suppose page is supposed to be wired somewhere?
*/
}
} else { /* !replace */
- if (amap->am_anon[slot] != NULL)
+ if (chunk->ac_anon[slot] != NULL)
panic("amap_add: slot in use");
- amap->am_bckptr[slot] = amap->am_nused;
- amap->am_slots[amap->am_nused] = slot;
+ chunk->ac_usedmap |= 1 << slot;
amap->am_nused++;
}
- amap->am_anon[slot] = anon;
+ chunk->ac_anon[slot] = anon;
+
+ return 0;
}
/*
@@ -848,26 +1084,29 @@ amap_add(struct vm_aref *aref, vaddr_t o
void
amap_unadd(struct vm_aref *aref, vaddr_t offset)
{
- int ptr, slot;
+ int slot;
struct vm_amap *amap = aref->ar_amap;
+ struct vm_amap_chunk *chunk;
AMAP_B2SLOT(slot, offset);
slot += aref->ar_pageoff;
if (slot >= amap->am_nslot)
panic("amap_unadd: offset out of range");
+ chunk = amap_chunk_get(amap, slot, 0);
+ if (chunk == NULL)
+ panic("amap_unadd: chunk for slot %d not present", slot);
- if (amap->am_anon[slot] == NULL)
+ slot = UVM_AMAP_SLOTIDX(slot);
+ if (chunk->ac_anon[slot] == NULL)
panic("amap_unadd: nothing there");
- amap->am_anon[slot] = NULL;
- ptr = amap->am_bckptr[slot];
-
- if (ptr != (amap->am_nused - 1)) { /* swap to keep slots contig? */
- amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
- amap->am_bckptr[amap->am_slots[ptr]] = ptr; /* back link */
- }
+ chunk->ac_anon[slot] = NULL;
+ chunk->ac_usedmap &= ~(1 << slot);
amap->am_nused--;
+
+ if (chunk->ac_usedmap == 0)
+ amap_chunk_free(amap, chunk);
}
/*
Index: sys/uvm/uvm_amap.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_amap.h,v
retrieving revision 1.24
diff -u -p -r1.24 uvm_amap.h
--- sys/uvm/uvm_amap.h 16 Apr 2016 18:39:31 -0000 1.24
+++ sys/uvm/uvm_amap.h 22 Apr 2016 18:08:54 -0000
@@ -61,12 +61,11 @@ struct vm_amap;
/*
* prototypes for the amap interface
*/
-
/* add an anon to an amap */
-void amap_add(struct vm_aref *, vaddr_t, struct vm_anon *,
+int amap_add(struct vm_aref *, vaddr_t, struct vm_anon *,
boolean_t);
/* allocate a new amap */
-struct vm_amap *amap_alloc(vaddr_t, int);
+struct vm_amap *amap_alloc(vaddr_t, int, int);
/* clear amap needs-copy flag */
void amap_copy(vm_map_t, vm_map_entry_t, int, boolean_t, vaddr_t,
vaddr_t);
@@ -110,57 +109,78 @@ boolean_t amap_swap_off(int, int);
/*
* we currently provide an array-based amap implementation. in this
- * implementation we provide the option of tracking split references
- * so that we don't lose track of references during partial unmaps
- * ... this is enabled with the "UVM_AMAP_PPREF" define.
+ * implementation we track split references so that we don't lose
+ * track of references during partial unmaps
*/
#define UVM_AMAP_PPREF /* track partial references */
/*
- * here is the definition of the vm_amap structure for this implementation.
+ * here is the definition of the vm_amap structure and helper structures for
+ * this implementation.
*/
+struct vm_amap_chunk {
+ TAILQ_ENTRY(vm_amap_chunk) ac_list;
+ int ac_baseslot;
+ uint16_t ac_usedmap;
+ uint16_t ac_nslot;
+ struct vm_anon *ac_anon[];
+};
+
struct vm_amap {
int am_ref; /* reference count */
int am_flags; /* flags */
int am_nslot; /* # of slots currently in map */
int am_nused; /* # of slots currently in use */
- int *am_slots; /* contig array of active slots */
- int *am_bckptr; /* back pointer array to am_slots */
- struct vm_anon **am_anon; /* array of anonymous pages */
#ifdef UVM_AMAP_PPREF
int *am_ppref; /* per page reference count (if !NULL) */
#endif
LIST_ENTRY(vm_amap) am_list;
+
+ union {
+ struct {
+ struct vm_amap_chunk **amn_buckets;
+ TAILQ_HEAD(, vm_amap_chunk) amn_chunks;
+ int amn_ncused; /* # of chunkers currently in use */
+ int amn_hashshift; /* shift count to hash slot to
bucket */
+ } ami_normal;
+
+ /*
+ * MUST be last element in vm_amap because it contains a
+ * variably sized array element.
+ */
+ struct vm_amap_chunk ami_small;
+ } am_impl;
+
+#define am_buckets am_impl.ami_normal.amn_buckets
+#define am_chunks am_impl.ami_normal.amn_chunks
+#define am_ncused am_impl.ami_normal.amn_ncused
+#define am_hashshift am_impl.ami_normal.amn_hashshift
+
+#define am_small am_impl.ami_small
};
/*
- * note that am_slots, am_bckptr, and am_anon are arrays. this allows
- * fast lookup of pages based on their virual address at the expense of
- * some extra memory. in the future we should be smarter about memory
- * usage and fall back to a non-array based implementation on systems
- * that are short of memory (XXXCDC).
- *
- * the entries in the array are called slots... for example an amap that
- * covers four pages of virtual memory is said to have four slots. here
- * is an example of the array usage for a four slot amap. note that only
- * slots one and three have anons assigned to them. "D/C" means that we
- * "don't care" about the value.
- *
- * 0 1 2 3
- * am_anon: NULL, anon0, NULL, anon1 (actual pointers to anons)
- * am_bckptr: D/C, 1, D/C, 0 (points to am_slots entry)
+ * The entries in an amap are called slots. For example an amap that
+ * covers four pages is said to have four slots.
*
- * am_slots: 3, 1, D/C, D/C (says slots 3 and 1 are in use)
- *
- * note that am_bckptr is D/C if the slot in am_anon is set to NULL.
- * to find the entry in am_slots for an anon, look at am_bckptr[slot],
- * thus the entry for slot 3 in am_slots[] is at am_slots[am_bckptr[3]].
- * in general, if am_anon[X] is non-NULL, then the following must be
- * true: am_slots[am_bckptr[X]] == X
+ * The slots of an amap are clustered into chunks of UVM_AMAP_CHUNK
+ * slots each. The data structure of a chunk is vm_amap_chunk.
+ * Every chunk contains an array of pointers to vm_anon, and a bitmap
+ * is used to represent which of the slots are in use.
+ *
+ * Small amaps of up to UVM_AMAP_CHUNK slots have the chunk directly
+ * embedded in the amap structure.
+ *
+ * amaps with more slots are normal amaps and organize chunks in a hash
+ * table. The hash table is organized as an array of buckets.
+ * All chunks of the amap are additionally stored in a linked list.
+ * Chunks that belong to the same hash bucket are stored in the list
+ * consecutively. When all slots in a chunk are unused, the chunk is freed.
*
- * note that am_slots is always contig-packed.
+ * For large amaps, the bucket array can grow large. See the description
+ * below how large bucket arrays are avoided.
*/
/*
@@ -203,6 +223,11 @@ struct vm_amap {
#define UVM_AMAP_LARGE 256 /* # of slots in "large" amap */
#define UVM_AMAP_CHUNK 16 /* # of slots to chunk large amaps in */
+#define UVM_AMAP_SMALL(amap) ((amap)->am_nslot <= UVM_AMAP_CHUNK)
+#define UVM_AMAP_SLOTIDX(slot) ((slot) % UVM_AMAP_CHUNK)
+#define UVM_AMAP_BUCKET(amap, slot) \
+ (((slot) / UVM_AMAP_CHUNK) >> (amap)->am_hashshift)
+
#ifdef _KERNEL
/*
@@ -214,6 +239,14 @@ struct vm_amap {
KASSERT(((B) & (PAGE_SIZE - 1)) == 0); \
(S) = (B) >> PAGE_SHIFT; \
}
+
+#define AMAP_CHUNK_FOREACH(chunk, amap)
\
+ for (chunk = (UVM_AMAP_SMALL(amap) ? \
+ &(amap)->am_small : TAILQ_FIRST(&(amap)->am_chunks)); \
+ (chunk) != NULL; (chunk) = TAILQ_NEXT(chunk, ac_list))
+
+#define AMAP_BASE_SLOT(slot) \
+ (((slot) / UVM_AMAP_CHUNK) * UVM_AMAP_CHUNK)
/*
* flags macros
Index: sys/uvm/uvm_fault.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
retrieving revision 1.89
diff -u -p -r1.89 uvm_fault.c
--- sys/uvm/uvm_fault.c 29 Mar 2016 12:04:26 -0000 1.89
+++ sys/uvm/uvm_fault.c 22 Apr 2016 18:08:54 -0000
@@ -902,8 +902,13 @@ ReFault:
/* un-busy! new page */
atomic_clearbits_int(&pg->pg_flags, PG_BUSY|PG_FAKE);
UVM_PAGE_OWN(pg, NULL);
- amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start,
- anon, 1);
+ if (amap_add(&ufi.entry->aref,
+ ufi.orig_rvaddr - ufi.entry->start, anon, 1)) {
+ uvm_anfree(anon);
+ uvmfault_unlockall(&ufi, amap, NULL, oanon);
+ uvmexp.fltnoamap++;
+ return (ENOMEM);
+ }
/* deref: can not drop to zero here by defn! */
oanon->an_ref--;
@@ -1175,8 +1180,13 @@ Case2:
*/
}
- amap_add(&ufi.entry->aref, ufi.orig_rvaddr - ufi.entry->start,
- anon, 0);
+ if (amap_add(&ufi.entry->aref,
+ ufi.orig_rvaddr - ufi.entry->start, anon, 0)) {
+ uvm_anfree(anon);
+ uvmfault_unlockall(&ufi, amap, NULL, oanon);
+ uvmexp.fltnoamap++;
+ return (ENOMEM);
+ }
}
/* note: pg is either the uobjpage or the new page in the new anon */
Index: sys/uvm/uvm_map.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_map.c,v
retrieving revision 1.211
diff -u -p -r1.211 uvm_map.c
--- sys/uvm/uvm_map.c 4 Apr 2016 16:34:16 -0000 1.211
+++ sys/uvm/uvm_map.c 22 Apr 2016 18:08:55 -0000
@@ -1084,7 +1084,7 @@ uvm_mapanon(struct vm_map *map, vaddr_t
if (flags & UVM_FLAG_OVERLAY) {
KERNEL_LOCK();
entry->aref.ar_pageoff = 0;
- entry->aref.ar_amap = amap_alloc(sz, M_WAITOK);
+ entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
KERNEL_UNLOCK();
}
@@ -1340,7 +1340,7 @@ uvm_map(struct vm_map *map, vaddr_t *add
}
if (flags & UVM_FLAG_OVERLAY) {
entry->aref.ar_pageoff = 0;
- entry->aref.ar_amap = amap_alloc(sz, M_WAITOK);
+ entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
}
/* Update map and process statistics. */
Index: sys/uvm/uvm_stat.c
===================================================================
RCS file: /cvs/src/sys/uvm/uvm_stat.c,v
retrieving revision 1.28
diff -u -p -r1.28 uvm_stat.c
--- sys/uvm/uvm_stat.c 25 Oct 2014 12:54:16 -0000 1.28
+++ sys/uvm/uvm_stat.c 22 Apr 2016 18:08:55 -0000
@@ -69,9 +69,9 @@ uvmexp_print(int (*pr)(const char *, ...
uvmexp.softs, uvmexp.syscalls, uvmexp.kmapent);
(*pr)(" fault counts:\n");
- (*pr)(" noram=%d, noanon=%d, pgwait=%d, pgrele=%d\n",
- uvmexp.fltnoram, uvmexp.fltnoanon, uvmexp.fltpgwait,
- uvmexp.fltpgrele);
+ (*pr)(" noram=%d, noanon=%d, noamap=%d, pgwait=%d, pgrele=%d\n",
+ uvmexp.fltnoram, uvmexp.fltnoanon, uvmexp.fltnoamap,
+ uvmexp.fltpgwait, uvmexp.fltpgrele);
(*pr)(" ok relocks(total)=%d(%d), anget(retries)=%d(%d), "
"amapcopy=%d\n", uvmexp.fltrelckok, uvmexp.fltrelck,
uvmexp.fltanget, uvmexp.fltanretry, uvmexp.fltamcopy);
Index: sys/uvm/uvmexp.h
===================================================================
RCS file: /cvs/src/sys/uvm/uvmexp.h,v
retrieving revision 1.1
diff -u -p -r1.1 uvmexp.h
--- sys/uvm/uvmexp.h 8 Jul 2014 17:19:26 -0000 1.1
+++ sys/uvm/uvmexp.h 22 Apr 2016 18:08:55 -0000
@@ -107,6 +107,7 @@ struct uvmexp {
/* fault subcounters */
int fltnoram; /* number of times fault was out of ram */
int fltnoanon; /* number of times fault was out of anons */
+ int fltnoamap; /* number of times fault was out of amap chunks */
int fltpgwait; /* number of times fault had to wait on a page */
int fltpgrele; /* number of times fault found a released page */
int fltrelck; /* number of times fault relock called */