> Date: Mon, 21 Mar 2016 20:02:28 +0100
> From: Stefan Kempf <sisnk...@gmail.com>
> 
> Recently we found that amaps consume a good deal of kernel address space.
> See this thread: https://marc.info/?l=openbsd-tech&m=145752756005014&w=2.
> And we found a way to reduce kernel mem pressure for some architectures
> at least. See the diffs in that thread.
> 
> Besides that, it's possible to shrink the amap struct from 72 to 48 bytes
> on 64 bit systems (or from 44 to 32 bytes on 32 bit systems).
> 
> It's also possible to cut down the memory needed for slots roughly in half
> on 64 bit architectures, and cut it up to a factor of 3 on 32 bit machines.
> 
> Here's how amap slots are maintained currently: for every slot, the kernel
> allocates one pointer to a vm_anon and two ints (16 bytes per slot on
> 64 bit systems, 12 bytes for 32 CPUs).
> 
> To reduce these memory requirements, we need three flavors of amaps:
> 
> - Tiny amaps with only one slot store the pointer to the vm_anon in the
>   amap directly. The two ints are not needed. This was Theo's idea.
> 
> - Small amaps with up to 32 slots need 8 instead of 16 bytes per slot
>   (or 4 bytes instead of 12 on 32 bit machines).
>   It's enough to store the array of anons. The two ints per slot are
>   not needed.
> 
>   Tiny and small amaps are the ones used most often.
> 
> - Normal amaps with n >= 32 slots only need
>   4 * sizeof(pointer) + n * sizeof(struct vm_anon *) + 12*n/32 bytes
>   to maintain amap slots. For large n that's also around 1.8 times
>   less memory for slots (or about 2.7 times less on 32 bit CPUs) compared
>   to the current implementation.
>   That memory is for the vm_anon array, and a header structure. The two
>   ints per slot in the current code are replaced by n/32 bitmaps.
> 
> But that also means that the amap layer has to do some special-casing
> when dealing with amaps. I tried to factor out the common code where
> possible.
> 
> How does it work? Basically, the *current* slots in an amap consist
> of three arrays (for n slots, every array has size n):
> - am_anon: an array of vm_anon
> - am_slots: an array of indices that store which entries in am_anon are
>   not NULL. This is for quickly traversing all occupied entries in
>   the amap
> - am_bckptr: maps a non-NULL entry in am_anon to an index in am_slots
>   Needed to update am_slots when setting an entry in am_anon to NULL.
> 
> We can compress am_slots and am_bckptr to bitmaps of 32 bits each,
> so we only need n/32 entries for am_slots and am_bckptr.
> 
> For amaps with up to 32 slots, we do not need am_slots and am_bckptr:
> it's possible to store the bitmap of used slots in the amap directly.
> 
> I see no difference in doing a make -j4 build with this on an amd64
> T430s (so things don't seem to get slower here with this). Tests on
> other systems and architectures are much appreciated.
> 
> The diff below contains a proof of concept, but I'll also post smaller
> diffs that are easier to review in the next days.
> 
> Comments/thoughts about doing this?

Is it really worth having three flavours?  If having only two
(tiny+normal?) simplifies the code considerable and doesn't result in
much more memory being used, that may be preferable.

The amaps are one of the roadblocks on the way to make uvm more
mpsafe.  And keeping the code simple will make that easier.

> Index: uvm/uvm_amap.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_amap.c,v
> retrieving revision 1.62
> diff -u -p -r1.62 uvm_amap.c
> --- uvm/uvm_amap.c    16 Mar 2016 16:53:43 -0000      1.62
> +++ uvm/uvm_amap.c    21 Mar 2016 18:53:45 -0000
> @@ -53,21 +53,31 @@
>  struct pool uvm_amap_pool;
>  
>  /* Pools for amap slots for the most common amap slot sizes */
> -struct pool uvm_amap_slot_pools[UVM_AMAP_CHUNK];
> +struct pool uvm_amap_slot_pools[UVM_AMAP_CHUNK - 1];
>  
>  LIST_HEAD(, vm_amap) amap_list;
>  
> -static char amap_slot_pool_names[UVM_AMAP_CHUNK][13];
> -
> -#define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *))
> +static char amap_slot_pool_names[UVM_AMAP_CHUNK - 1][13];
>  
>  /*
>   * local functions
>   */
>  
> -static struct vm_amap *amap_alloc1(int, int, int);
> +struct vm_anon **amap_slots_alloc(struct vm_amap *, u_int, int,
> +    struct vm_amap_meta **);
> +static struct vm_amap *amap_alloc1(u_int, u_int, int);
>  static __inline void amap_list_insert(struct vm_amap *);
>  static __inline void amap_list_remove(struct vm_amap *);   
> +static __inline void amap_anon_release(struct vm_anon *);
> +void amap_fill_slot(struct vm_amap *, u_int);
> +void amap_normal_clear_slot(struct vm_amap *, u_int);
> +void amap_clear_slot(struct vm_amap *, u_int);
> +static __inline void amap_normal_wipe_slot(struct vm_amap *, u_int);
> +
> +void amap_wipeout_traverse(struct vm_anon **, u_int, u_int);
> +int amap_cow_now_traverse(struct vm_anon **, u_int, u_int);
> +int amap_swap_off_traverse(struct vm_amap *, struct vm_anon **, u_int, u_int,
> +    int, int);
>  
>  static __inline void
>  amap_list_insert(struct vm_amap *amap)
> @@ -81,6 +91,99 @@ amap_list_remove(struct vm_amap *amap)
>       LIST_REMOVE(amap, am_list);
>  }
>  
> +static __inline void
> +amap_anon_release(struct vm_anon *anon)
> +{
> +     int refs;
> +
> +     refs = --anon->an_ref;
> +     if (refs == 0) {
> +             /* we had the last reference to a vm_anon. free it. */
> +             uvm_anfree(anon);
> +     }
> +}
> +
> +void
> +amap_fill_slot(struct vm_amap *amap, u_int slot)
> +{
> +     u_int clust, ptr;
> +     struct vm_amap_meta *meta;
> +     struct vm_amap_clust *slotclust;
> +
> +     if (amap->am_maxslot == 1) {
> +             amap->am_nused = 1;
> +             return;
> +     } else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL) {
> +             amap->am_nused |= 1 << slot;
> +             return;
> +     }
> +
> +     amap->am_nused++;
> +
> +     meta = amap->am_meta;
> +     clust = AMAP_S2C(slot);
> +     ptr = meta->am_bckptr[clust];
> +
> +     if (ptr >= meta->am_nused ||
> +         meta->am_clust[ptr].ac_clust != clust) {
> +             meta->am_bckptr[clust] = meta->am_nused;
> +             slotclust = &meta->am_clust[meta->am_nused];
> +             slotclust->ac_clust = clust;
> +             slotclust->ac_map = 1 << AMAP_SLOTCHUNK(slot);
> +             meta->am_nused++;
> +
> +     } else {
> +             slotclust = &meta->am_clust[ptr];
> +             slotclust->ac_map |= 1 << AMAP_SLOTCHUNK(slot);
> +     }
> +}
> +
> +void
> +amap_normal_clear_slot(struct vm_amap *amap, u_int slot)
> +{
> +     u_int clust, ptr;
> +     struct vm_amap_meta *meta;
> +
> +     amap->am_nused--;
> +
> +     meta = amap->am_meta;
> +     clust = AMAP_S2C(slot);
> +     ptr = meta->am_bckptr[clust];
> +
> +     meta->am_clust[ptr].ac_map &= ~(1 << AMAP_SLOTCHUNK(slot));
> +     if (meta->am_clust[ptr].ac_map != 0)
> +             return;
> +
> +     if (ptr != (meta->am_nused - 1)) {      /* swap to keep slots contig? */
> +             meta->am_clust[ptr] = meta->am_clust[meta->am_nused - 1];
> +             meta->am_bckptr[meta->am_clust[ptr].ac_clust] = ptr;
> +     }
> +
> +     meta->am_nused--;
> +}
> +
> +void
> +amap_clear_slot(struct vm_amap *amap, u_int slot)
> +{
> +     if (amap->am_maxslot == 1)
> +             amap->am_nused = 0;
> +     else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL)
> +             amap->am_nused &= ~(1 << slot);
> +     else
> +             amap_normal_clear_slot(amap, slot);
> +}
> +
> +static __inline void
> +amap_normal_wipe_slot(struct vm_amap *amap, u_int slot)
> +{
> +     struct vm_anon *anon;
> +
> +     anon = amap->am_meta->am_anon[slot];
> +     amap->am_meta->am_anon[slot] = NULL;
> +     amap_normal_clear_slot(amap, slot);
> +     amap_anon_release(anon);
> +}
> +
>  #ifdef UVM_AMAP_PPREF
>  /*
>   * what is ppref?   ppref is an _optional_ amap feature which is used
> @@ -165,22 +268,87 @@ amap_init(void)
>  
>       for (i = 0; i < nitems(uvm_amap_slot_pools); i++) {
>               snprintf(amap_slot_pool_names[i],
> -                 sizeof(amap_slot_pool_names[0]), "amapslotpl%d", i + 1);
> -             pool_init(&uvm_amap_slot_pools[i], (i + 1) * MALLOC_SLOT_UNIT,
> -                 0, 0, PR_WAITOK, amap_slot_pool_names[i], NULL);
> +                 sizeof(amap_slot_pool_names[0]), "amapslotpl%d", i + 2);
> +             pool_init(&uvm_amap_slot_pools[i],
> +                 (i + 2) * sizeof(struct vm_anon *), 0, 0, PR_WAITOK,
> +                 amap_slot_pool_names[i], NULL);
>               pool_sethiwat(&uvm_amap_slot_pools[i], 4096);
>       }
>  }
>  
> +struct vm_anon **
> +amap_slots_alloc(struct vm_amap *amap, u_int totalslots, int waitf,
> +    struct vm_amap_meta **metap)
> +{
> +     struct vm_anon **anon;
> +     struct vm_amap_meta *meta;
> +     struct vm_amap_clust *clust;
> +     u_int *bckptr;
> +     u_int nclust;
> +     int pwaitf = PR_NOWAIT;
> +     size_t size;
> +
> +     if (totalslots == 1)
> +             return amap == NULL ? NULL : &amap->am_diranon;
> +     else if (totalslots <= UVM_AMAP_CHUNK) {
> +             if (waitf & M_WAITOK) {
> +                     pwaitf = PR_WAITOK;
> +                     if (waitf & M_CANFAIL)
> +                             pwaitf |= PR_LIMITFAIL;
> +             }
> +             meta = NULL;
> +             anon = pool_get(&uvm_amap_slot_pools[totalslots - 2], pwaitf);
> +
> +             if (amap != NULL)
> +                     amap->am_anon = anon;
> +     } else if (totalslots <= UVM_AMAP_MAXSLOT_SMALL) {
> +             meta = NULL;
> +             anon = mallocarray(totalslots, sizeof *anon, M_UVMAMAP, waitf);
> +             if (amap != NULL)
> +                     amap->am_anon = anon;
> +     } else {
> +             nclust = roundup(totalslots, AMAP_SLOTPERCLUST) /
> +                 AMAP_SLOTPERCLUST;
> +             size = sizeof(struct vm_amap_meta) +
> +                 totalslots * sizeof(struct vm_anon *) +
> +                 nclust * (sizeof(int) + sizeof(struct vm_amap_clust));
> +
> +             meta = malloc(sizeof *meta, M_UVMAMAP, waitf);
> +             anon = mallocarray(totalslots, sizeof *anon, M_UVMAMAP, waitf);
> +             clust = mallocarray(nclust, sizeof *clust, M_UVMAMAP, waitf);
> +             bckptr = mallocarray(nclust, sizeof *bckptr, M_UVMAMAP, waitf);
> +             if (meta == NULL || anon == NULL || clust == NULL ||
> +                 bckptr == NULL) {
> +                     free(meta, M_UVMAMAP, sizeof *meta);
> +                     free(anon, M_UVMAMAP, totalslots * sizeof *anon);
> +                     free(clust, M_UVMAMAP, nclust * sizeof *clust);
> +                     free(bckptr, M_UVMAMAP, nclust * sizeof *bckptr);
> +                     return NULL;
> +             }
> +
> +             meta->am_anon = anon;
> +             meta->am_clust = clust;
> +             meta->am_bckptr = bckptr;
> +             meta->am_nused = 0;
> +
> +             if (amap != NULL)
> +                     amap->am_meta = meta;
> +     }
> +
> +     if (metap != NULL)
> +             *metap = meta;
> +     return anon;
> +}
> +
>  /*
>   * amap_alloc1: internal function that allocates an amap, but does not
>   *   init the overlay.
>   */
>  static inline struct vm_amap *
> -amap_alloc1(int slots, int padslots, int waitf)
> +amap_alloc1(u_int slots, u_int padslots, int waitf)
>  {
>       struct vm_amap *amap;
> -     int totalslots;
> +     u_int totalslots;
>  
>       amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK
>           : PR_NOWAIT);
> @@ -189,41 +357,23 @@ amap_alloc1(int slots, int padslots, int
>  
>       totalslots = slots + padslots;
>       KASSERT(totalslots > 0);
> -
>       if (totalslots > UVM_AMAP_CHUNK)
> -             totalslots = malloc_roundup(totalslots * MALLOC_SLOT_UNIT) /
> -                 MALLOC_SLOT_UNIT;
> +             totalslots = malloc_roundup(totalslots *
> +                 sizeof(struct vm_anon *)) / sizeof(struct vm_anon *);
>  
> -     amap->am_ref = 1;
> -     amap->am_flags = 0;
> +     amap->am_flgref = 1;
>  #ifdef UVM_AMAP_PPREF
>       amap->am_ppref = NULL;
>  #endif
>       amap->am_maxslot = totalslots;
>       amap->am_nslot = slots;
>       amap->am_nused = 0;
> -
> -     if (totalslots > UVM_AMAP_CHUNK)
> -             amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT,
> -                 M_UVMAMAP, waitf);
> -     else
> -             amap->am_slots = pool_get(
> -                 &uvm_amap_slot_pools[totalslots - 1],
> -                 (waitf == M_WAITOK) ? PR_WAITOK : PR_NOWAIT);
> -
> -     if (amap->am_slots == NULL)
> -             goto fail1;
> -
> -     amap->am_bckptr = (int *)(((char *)amap->am_slots) + totalslots *
> -         sizeof(int));
> -     amap->am_anon = (struct vm_anon **)(((char *)amap->am_bckptr) +
> -         totalslots * sizeof(int));
> +     if (amap_slots_alloc(amap, totalslots, waitf, NULL) == NULL) {
> +             pool_put(&uvm_amap_pool, amap);
> +             return (NULL);
> +     }
>  
>       return(amap);
> -
> -fail1:
> -     pool_put(&uvm_amap_pool, amap);
> -     return (NULL);
>  }
>  
>  /*
> @@ -236,14 +386,14 @@ struct vm_amap *
>  amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
>  {
>       struct vm_amap *amap;
> -     int slots, padslots;
> +     u_int slots, padslots;
>  
>       AMAP_B2SLOT(slots, sz);         /* load slots */
>       AMAP_B2SLOT(padslots, padsz);
>  
>       amap = amap_alloc1(slots, padslots, waitf);
>       if (amap) {
> -             memset(amap->am_anon, 0,
> +             memset(AMAP_ANON(amap), 0,
>                   amap->am_maxslot * sizeof(struct vm_anon *));
>               amap_list_insert(amap);
>       }
> @@ -260,15 +410,27 @@ amap_alloc(vaddr_t sz, vaddr_t padsz, in
>  void
>  amap_free(struct vm_amap *amap)
>  {
> +     u_int nclust;
>  
> -     KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
> -     KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
> +     KASSERT(amap_refs(amap) == 0 && amap->am_nused == 0);
> +     KASSERT((amap_flags(amap) & AMAP_SWAPOFF) == 0);
>  
> -     if (amap->am_maxslot > UVM_AMAP_CHUNK)
> -             free(amap->am_slots, M_UVMAMAP, 0);
> -     else
> -             pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 1],
> -                 amap->am_slots);
> +     if (amap->am_maxslot > UVM_AMAP_MAXSLOT_SMALL) {
> +             nclust = roundup(amap->am_maxslot, AMAP_SLOTPERCLUST) /
> +                 AMAP_SLOTPERCLUST;
> +             free(amap->am_meta->am_anon, M_UVMAMAP,
> +                 amap->am_maxslot * sizeof *amap->am_meta->am_anon);
> +             free(amap->am_meta->am_clust, M_UVMAMAP,
> +                 nclust * sizeof *amap->am_meta->am_clust);
> +             free(amap->am_meta->am_bckptr, M_UVMAMAP,
> +                 nclust * sizeof *amap->am_meta->am_bckptr);
> +             free(amap->am_meta, M_UVMAMAP, sizeof *amap->am_meta);
> +     } else if (amap->am_maxslot > UVM_AMAP_CHUNK)
> +             free(amap->am_anon, M_UVMAMAP,
> +                 amap->am_maxslot * sizeof *amap->am_anon);
> +     else if (amap->am_maxslot > 1)
> +             pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 2],
> +                 amap->am_anon);
>  
>  #ifdef UVM_AMAP_PPREF
>       if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
> @@ -291,14 +453,14 @@ int
>  amap_extend(struct vm_map_entry *entry, vsize_t addsize)
>  {
>       struct vm_amap *amap = entry->aref.ar_amap;
> -     int slotoff = entry->aref.ar_pageoff;
> -     int slotmapped, slotadd, slotneed, slotalloc;
> +     u_int slotoff = entry->aref.ar_pageoff;
> +     u_int slotmapped, slotadd, slotneed, slotalloc;
>  #ifdef UVM_AMAP_PPREF
>       int *newppref, *oldppref;
>  #endif
> -     u_int *newsl, *newbck, *oldsl, *oldbck;
> +     struct vm_amap_meta *newmeta, *oldmeta;
>       struct vm_anon **newover, **oldover;
> -     int slotadded;
> +     u_int slotadded;
>  
>       /*
>        * first, determine how many slots we need in the amap.  don't
> @@ -355,9 +517,11 @@ amap_extend(struct vm_map_entry *entry, 
>       if (slotneed >= UVM_AMAP_LARGE)
>               return E2BIG;
>  
> +     KASSERT(slotneed > 1);
> +     KASSERT(amap->am_maxslot < slotneed);
>       if (slotneed > UVM_AMAP_CHUNK)
> -             slotalloc = malloc_roundup(slotneed * MALLOC_SLOT_UNIT) /
> -                 MALLOC_SLOT_UNIT;
> +             slotalloc = malloc_roundup(slotneed *
> +                 sizeof(struct vm_anon *)) / sizeof(struct vm_anon *);
>       else
>               slotalloc = slotneed;
>  
> @@ -373,13 +537,9 @@ amap_extend(struct vm_map_entry *entry, 
>               }
>       }
>  #endif
> -     if (slotneed > UVM_AMAP_CHUNK)
> -             newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP,
> -                 M_WAITOK | M_CANFAIL);
> -     else
> -             newsl = pool_get(&uvm_amap_slot_pools[slotalloc - 1],
> -                 PR_WAITOK | PR_LIMITFAIL);
> -     if (newsl == NULL) {
> +     newover = amap_slots_alloc(NULL, slotalloc, M_WAITOK | M_CANFAIL,
> +         &newmeta);
> +     if (newover == NULL) {
>  #ifdef UVM_AMAP_PPREF
>               if (newppref != NULL) {
>                       free(newppref, M_UVMAMAP, 0);
> @@ -387,31 +547,60 @@ amap_extend(struct vm_map_entry *entry, 
>  #endif
>               return (ENOMEM);
>       }
> -     newbck = (int *)(((char *)newsl) + slotalloc * sizeof(int));
> -     newover = (struct vm_anon **)(((char *)newbck) + slotalloc *
> -         sizeof(int));
> -     KASSERT(amap->am_maxslot < slotneed);
>  
>       /* now copy everything over to new malloc'd areas... */
>       slotadded = slotalloc - amap->am_nslot;
>  
> -     /* do am_slots */
> -     oldsl = amap->am_slots;
> -     memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
> -     amap->am_slots = newsl;
> -
>       /* do am_anon */
> -     oldover = amap->am_anon;
> +     oldover = AMAP_ANON(amap);
> +     oldmeta = amap->am_meta;
>       memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot);
>       memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) *
>           slotadded);
> -     amap->am_anon = newover;
>  
> -     /* do am_bckptr */
> -     oldbck = amap->am_bckptr;
> -     memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
> -     memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: 
> needed? */
> -     amap->am_bckptr = newbck;
> +     if (amap->am_maxslot == 1) {
> +             /* do am_meta and am_bckptr */
> +             if (slotalloc > UVM_AMAP_MAXSLOT_SMALL) {
> +                     if (amap->am_diranon != NULL) {
> +                             newmeta->am_nused = 1;
> +                             newmeta->am_clust[0].ac_clust = 0;
> +                             newmeta->am_clust[0].ac_map = 1;
> +                             newmeta->am_bckptr[0] = 0;
> +                     }
> +             }
> +             /* am_nused is correctly set to 1 or 0 already */
> +     } else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL) {
> +             /* do am_meta and am_bckptr */
> +             if (slotalloc > UVM_AMAP_MAXSLOT_SMALL) {
> +                     newmeta->am_clust[0].ac_clust = 0;
> +                     newmeta->am_clust[0].ac_map = amap->am_nused;
> +                     newmeta->am_bckptr[0] = 0;
> +                     int i, map = amap->am_nused;
> +
> +                     amap->am_nused = 0;
> +                     for (i = ffs(map); i != 0; i = ffs(map)) {
> +                             amap->am_nused++;
> +                             map &= ~(1 << (i - 1));
> +                     }
> +                     if (amap->am_nused)
> +                             newmeta->am_nused = 1;
> +             } /* else slotalloc <= UVM_AMAP_MAXSLOT_SMALL => bitmap ok */
> +     } else {
> +             newmeta->am_nused = oldmeta->am_nused;
> +
> +             /* do am_meta */
> +             memcpy(newmeta->am_clust, oldmeta->am_clust,
> +                 sizeof(*newmeta->am_clust) * oldmeta->am_nused);
> +
> +             /* do am_bckptr */
> +             memcpy(newmeta->am_bckptr, oldmeta->am_bckptr,
> +                 sizeof(*newmeta->am_bckptr) * oldmeta->am_nused);
> +     }
> +
> +     if (slotalloc <= UVM_AMAP_MAXSLOT_SMALL)
> +             amap->am_anon = newover;
> +     else
> +             amap->am_meta = newmeta;
>  
>  #ifdef UVM_AMAP_PPREF
>       /* do ppref */
> @@ -429,11 +618,22 @@ amap_extend(struct vm_map_entry *entry, 
>  #endif
>  
>       /* free */
> -     if (amap->am_maxslot > UVM_AMAP_CHUNK)
> -             free(oldsl, M_UVMAMAP, 0);
> -     else
> -             pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 1],
> -                 oldsl);
> +     if (amap->am_maxslot > UVM_AMAP_MAXSLOT_SMALL) {
> +             u_int nclust = roundup(slotalloc, AMAP_SLOTPERCLUST) /
> +                 AMAP_SLOTPERCLUST;
> +
> +             free(oldmeta->am_anon, M_UVMAMAP,
> +                 amap->am_maxslot * sizeof *oldmeta->am_anon);
> +             free(oldmeta->am_clust, M_UVMAMAP,
> +                 nclust * sizeof *oldmeta->am_clust);
> +             free(oldmeta->am_bckptr, M_UVMAMAP,
> +                 nclust * sizeof *oldmeta->am_bckptr);
> +             free(oldmeta, M_UVMAMAP, sizeof *oldmeta);
> +     } else if (amap->am_maxslot > UVM_AMAP_CHUNK)
> +             free(oldover, M_UVMAMAP, 0);
> +     else if (amap->am_maxslot > 1)
> +             pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 2],
> +                 oldover);
>  
>       /* and update master values */
>       amap->am_nslot = slotneed;
> @@ -446,6 +646,23 @@ amap_extend(struct vm_map_entry *entry, 
>       return (0);
>  }
>  
> +void
> +amap_wipeout_traverse(struct vm_anon **anonp, u_int map, u_int slotoff)
> +{
> +     u_int lcv, slot;
> +     struct vm_anon *anon;
> +
> +     for (lcv = ffs(map); lcv != 0; lcv = ffs(map)) {
> +             slot = lcv - 1;
> +             map &= ~(1 << slot);
> +             anon = anonp[slotoff + slot];
> +             if (anon == NULL || anon->an_ref == 0)
> +                     panic("amap_wipeout: corrupt amap %p", anon);
> +
> +             amap_anon_release(anon);
> +     }
> +}
> +
>  /*
>   * amap_wipeout: wipeout all anon's in an amap; then free the amap!
>   *
> @@ -456,35 +673,32 @@ amap_extend(struct vm_map_entry *entry, 
>  void
>  amap_wipeout(struct vm_amap *amap)
>  {
> -     int lcv, slot;
> -     struct vm_anon *anon;
> +     u_int i, slotoff;
> +     struct vm_amap_meta *meta = amap->am_meta;
>  
> -     KASSERT(amap->am_ref == 0);
> +     KASSERT(amap_refs(amap) == 0);
>  
> -     if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
> +     if (__predict_false((amap_flags(amap) & AMAP_SWAPOFF) != 0)) {
>               /* amap_swap_off will call us again. */
>               return;
>       }
>       amap_list_remove(amap);
>  
> -     for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
> -             int refs;
> -
> -             slot = amap->am_slots[lcv];
> -             anon = amap->am_anon[slot];
> -
> -             if (anon == NULL || anon->an_ref == 0)
> -                     panic("amap_wipeout: corrupt amap");
> -
> -             refs = --anon->an_ref;
> -             if (refs == 0) {
> -                     /* we had the last reference to a vm_anon. free it. */
> -                     uvm_anfree(anon);
> +     if (amap->am_maxslot == 1) {
> +             if (amap->am_diranon != NULL)
> +                     amap_wipeout_traverse(&amap->am_diranon, 1, 0);
> +     } else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL)
> +             amap_wipeout_traverse(amap->am_anon, amap->am_nused, 0);
> +     else {
> +             for (i = 0; i < meta->am_nused; i++) {
> +                     slotoff = AMAP_C2S(meta->am_clust[i].ac_clust);
> +                     amap_wipeout_traverse(meta->am_anon,
> +                         meta->am_clust[i].ac_map, slotoff);
>               }
>       }
>  
>       /* now we free the map */
> -     amap->am_ref = 0;       /* ... was one */
> +     amap->am_flgref &= AMAP_FLAGMASK;       /* set refcount 1 -> 0 */
>       amap->am_nused = 0;
>       amap_free(amap);        /* will free amap */
>  }
> @@ -506,7 +720,8 @@ amap_copy(struct vm_map *map, struct vm_
>      boolean_t canchunk, vaddr_t startva, vaddr_t endva)
>  {
>       struct vm_amap *amap, *srcamap;
> -     int slots, lcv;
> +     struct vm_anon **anonp, **srcanonp;
> +     u_int slots, lcv;
>       vaddr_t chunksize;
>  
>       /* is there a map to copy?   if not, create one from scratch. */
> @@ -542,7 +757,7 @@ amap_copy(struct vm_map *map, struct vm_
>        * just take it over rather than copying it.  the value can only
>        * be one if we have the only reference to the amap
>        */
> -     if (entry->aref.ar_amap->am_ref == 1) {
> +     if (amap_refs(entry->aref.ar_amap) == 1) {
>               entry->etype &= ~UVM_ET_NEEDSCOPY;
>               return;
>       }
> @@ -560,25 +775,24 @@ amap_copy(struct vm_map *map, struct vm_
>        * dropped down to one we take over the old map rather than
>        * copying the amap.
>        */
> -     if (srcamap->am_ref == 1) {             /* take it over? */
> +     if (amap_refs(srcamap) == 1) {          /* take it over? */
>               entry->etype &= ~UVM_ET_NEEDSCOPY;
> -             amap->am_ref--;         /* drop final reference to map */
> +             amap->am_flgref--;      /* drop final reference to map */
>               amap_free(amap);        /* dispose of new (unused) amap */
>               return;
>       }
>  
>       /* we must copy it now. */
> +     anonp = AMAP_ANON(amap);
> +     srcanonp = AMAP_ANON(srcamap);
>       for (lcv = 0 ; lcv < slots; lcv++) {
> -             amap->am_anon[lcv] =
> -                 srcamap->am_anon[entry->aref.ar_pageoff + lcv];
> -             if (amap->am_anon[lcv] == NULL)
> +             anonp[lcv] = srcanonp[entry->aref.ar_pageoff + lcv];
> +             if (anonp[lcv] == NULL)
>                       continue;
> -             amap->am_anon[lcv]->an_ref++;
> -             amap->am_bckptr[lcv] = amap->am_nused;
> -             amap->am_slots[amap->am_nused] = lcv;
> -             amap->am_nused++;
> +             anonp[lcv]->an_ref++;
> +             amap_fill_slot(amap, lcv);
>       }
> -     memset(&amap->am_anon[lcv], 0,
> +     memset(&anonp[lcv], 0,
>           (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
>  
>       /*
> @@ -587,9 +801,10 @@ amap_copy(struct vm_map *map, struct vm_
>        * one (we checked above), so there is no way we could drop
>        * the count to zero.  [and no need to worry about freeing it]
>        */
> -     srcamap->am_ref--;
> -     if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
> -             srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
> +     srcamap->am_flgref--;
> +     if (amap_refs(srcamap) == 1 &&
> +         (amap_flags(srcamap) & AMAP_SHARED) != 0)
> +             srcamap->am_flgref &= ~AMAP_SHARED;   /* clear shared flag */
>  #ifdef UVM_AMAP_PPREF
>       if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
>               amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 
> @@ -605,38 +820,18 @@ amap_copy(struct vm_map *map, struct vm_
>       amap_list_insert(amap);
>  }
>  
> -/*
> - * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
> - *
> - *   called during fork(2) when the parent process has a wired map
> - *   entry.   in that case we want to avoid write-protecting pages
> - *   in the parent's map (e.g. like what you'd do for a COW page)
> - *   so we resolve the COW here.
> - *
> - * => assume parent's entry was wired, thus all pages are resident.
> - * => caller passes child's map/entry in to us
> - * => XXXCDC: out of memory should cause fork to fail, but there is
> - *   currently no easy way to do this (needs fix)
> - */
> -
> -void
> -amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
> +int
> +amap_cow_now_traverse(struct vm_anon **anonp, u_int map, u_int slotoff)
>  {
> -     struct vm_amap *amap = entry->aref.ar_amap;
> -     int lcv, slot;
> +     u_int lcv, slot;
>       struct vm_anon *anon, *nanon;
>       struct vm_page *pg, *npg;
>  
> -     /*
> -      * note that if we wait, we must ReStart the "lcv" for loop because
> -      * some other process could reorder the anon's in the
> -      * am_anon[] array on us.
> -      */
> -ReStart:
> -     for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
> -             /* get the page */
> -             slot = amap->am_slots[lcv];
> -             anon = amap->am_anon[slot];
> +     for (lcv = ffs(map); lcv != 0; lcv = ffs(map)) {
> +             slot = lcv - 1;
> +             map &= ~(1 << slot);
> +
> +             anon = anonp[slotoff + slot];
>               pg = anon->an_page;
>  
>               /* page must be resident since parent is wired */
> @@ -656,7 +851,7 @@ ReStart:
>                       if (pg->pg_flags & PG_BUSY) {
>                               atomic_setbits_int(&pg->pg_flags, PG_WANTED);
>                               UVM_WAIT(pg, FALSE, "cownow", 0);
> -                             goto ReStart;
> +                             return 1;
>                       }
>  
>                       /* ok, time to do a copy-on-write to a new anon */
> @@ -676,16 +871,16 @@ ReStart:
>                                       uvm_anfree(nanon);
>                               }
>                               uvm_wait("cownowpage");
> -                             goto ReStart;
> +                             return 1;
>                       }
> -     
> +
>                       /*
>                        * got it... now we can copy the data and replace anon
>                        * with our new one...
>                        */
>                       uvm_pagecopy(pg, npg);          /* old -> new */
>                       anon->an_ref--;                 /* can't drop to zero */
> -                     amap->am_anon[slot] = nanon;    /* replace */
> +                     anonp[slotoff + slot] = nanon;  /* replace */
>  
>                       /*
>                        * drop PG_BUSY on new page ... since we have had its
> @@ -699,6 +894,52 @@ ReStart:
>                       uvm_unlock_pageq();
>               }
>       }
> +     return 0;
> +}
> +
> +/*
> + * amap_cow_now: resolve all copy-on-write faults in an amap now for fork(2)
> + *
> + *   called during fork(2) when the parent process has a wired map
> + *   entry.   in that case we want to avoid write-protecting pages
> + *   in the parent's map (e.g. like what you'd do for a COW page)
> + *   so we resolve the COW here.
> + *
> + * => assume parent's entry was wired, thus all pages are resident.
> + * => caller passes child's map/entry in to us
> + * => XXXCDC: out of memory should cause fork to fail, but there is
> + *   currently no easy way to do this (needs fix)
> + */
> +
> +void
> +amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
> +{
> +     struct vm_amap *amap = entry->aref.ar_amap;
> +     u_int i, slotoff;
> +
> +     /*
> +      * note that if we wait, we must ReStart the "lcv" for loop because
> +      * some other process could reorder the anon's in the
> +      * am_anon[] array on us.
> +      */
> +ReStart:
> +     if (amap->am_maxslot == 1) {
> +             if (amap->am_diranon != NULL) {
> +                     if (amap_cow_now_traverse(&amap->am_diranon, 1, 0))
> +                             goto ReStart;
> +             }
> +     } else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL) {
> +             if (amap_cow_now_traverse(amap->am_anon, amap->am_nused, 0))
> +                     goto ReStart;
> +     } else {
> +             for (i = 0; i < amap->am_meta->am_nused; i++) {
> +                     slotoff = AMAP_C2S(
> +                         amap->am_meta->am_clust[i].ac_clust);
> +                     if (amap_cow_now_traverse(amap->am_meta->am_anon,
> +                         amap->am_meta->am_clust[i].ac_map, slotoff))
> +                             goto ReStart;
> +             }
> +     }
>  }
>  
>  /*
> @@ -726,7 +967,7 @@ amap_splitref(struct vm_aref *origref, s
>  #endif
>  
>       splitref->ar_amap = origref->ar_amap;
> -     splitref->ar_amap->am_ref++;            /* not a share reference */
> +     splitref->ar_amap->am_flgref++;         /* not a share reference */
>       splitref->ar_pageoff = origref->ar_pageoff + leftslots;
>  }
>  
> @@ -749,7 +990,7 @@ amap_pp_establish(struct vm_amap *amap)
>       }
>  
>       /* init ppref */
> -     pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
> +     pp_setreflen(amap->am_ppref, 0, amap_refs(amap), amap->am_nslot);
>  }
>  
>  /*
> @@ -829,66 +1070,110 @@ amap_pp_adjref(struct vm_amap *amap, int
>  void
>  amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
>  {
> -     int byanon, lcv, stop, curslot, ptr, slotend;
> +     u_int i, clustslot, lcv, map, mask, curslot, slotend, slotbase, ptr;
>       struct vm_anon *anon;
>  
> +     /* Fast path for anons with only a few slots */
> +     if (amap->am_maxslot == 1) {
> +             anon = amap->am_diranon;
> +             if (anon != NULL) {
> +                     amap->am_diranon = NULL;
> +                     amap->am_nused = 0;
> +                     amap_anon_release(anon);
> +             }
> +             return;
> +     } else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL) {
> +             mask = ((1 << slots) - 1) << slotoff;
> +             map = amap->am_nused & mask;
> +
> +             for (lcv = ffs(map); lcv != 0; lcv = ffs(map)) {
> +                     curslot = lcv - 1;
> +                     map &= ~(1 << curslot);
> +                     anon = amap->am_anon[curslot];
> +                     amap->am_anon[curslot] = NULL;
> +                     amap->am_nused &= ~(1 << curslot);
> +                     amap_anon_release(anon);
> +             }
> +             return;
> +     }
> +
>       /*
> -      * we can either traverse the amap by am_anon or by am_slots depending
> +      * we can either traverse the amap by am_anon or by am_clust depending
>        * on which is cheaper.    decide now.
>        */
>       if (slots < amap->am_nused) {
> -             byanon = TRUE;
> -             lcv = slotoff;
> -             stop = slotoff + slots;
> -     } else {
> -             byanon = FALSE;
> -             lcv = 0;
> -             stop = amap->am_nused;
> -             slotend = slotoff + slots;
> -     }
> -
> -     while (lcv < stop) {
> -             int refs;
> -
> -             if (byanon) {
> -                     curslot = lcv++;        /* lcv advances here */
> -                     if (amap->am_anon[curslot] == NULL)
> +             for (curslot = slotoff; curslot < slotoff + slots; curslot++) {
> +                     if (amap->am_meta->am_anon[curslot] == NULL)
>                               continue;
> -             } else {
> -                     curslot = amap->am_slots[lcv];
> -                     if (curslot < slotoff || curslot >= slotend) {
> -                             lcv++;          /* lcv advances here */
> -                             continue;
> -                     }
> -                     stop--; /* drop stop, since anon will be removed */
> +                     amap_normal_wipe_slot(amap, curslot);
>               }
> -             anon = amap->am_anon[curslot];
> +             return;
> +     }
>  
> -             /* remove it from the amap */
> -             amap->am_anon[curslot] = NULL;
> -             ptr = amap->am_bckptr[curslot];
> -             if (ptr != (amap->am_nused - 1)) {
> -                     amap->am_slots[ptr] =
> -                         amap->am_slots[amap->am_nused - 1];
> -                     amap->am_bckptr[amap->am_slots[ptr]] =
> -                         ptr;    /* back ptr. */
> -             }
> -             amap->am_nused--;
> -
> -             /* drop anon reference count */
> -             refs = --anon->an_ref;
> -             if (refs == 0) {
> -                     /*
> -                      * we just eliminated the last reference to an anon.
> -                      * free it.
> -                      */
> -                     uvm_anfree(anon);
> +     slotend = slotoff + slots;
> +     clustslot = AMAP_SLOTCHUNK(slotoff);
> +     for (i = AMAP_S2C(slotoff); i <= AMAP_S2C(slotend);
> +         i++, clustslot = 0) {
> +             ptr = amap->am_meta->am_bckptr[i];
> +             if (ptr >= amap->am_meta->am_nused ||
> +                 amap->am_meta->am_clust[ptr].ac_clust != i)
> +                     continue;
> +
> +             slotbase = AMAP_C2S(i);
> +             if (slotend - slotbase - clustslot >= AMAP_SLOTPERCLUST)
> +                     mask = ~(u_int)0;
> +             else
> +                     mask = (1 << (slotend - slotbase - clustslot)) - 1;
> +             mask <<= clustslot;
> +
> +             map = amap->am_meta->am_clust[ptr].ac_map & mask;
> +             for (lcv = ffs(map); lcv != 0; lcv = ffs(map)) {
> +                     lcv--;
> +                     map &= ~(1 << lcv);
> +                     curslot = slotbase + lcv;
> +
> +                     KASSERT(curslot >= slotoff && curslot < slotend);
> +                     amap_normal_wipe_slot(amap, curslot);
>               }
>       }
>  }
>  
>  #endif
>  
> +int
> +amap_swap_off_traverse(struct vm_amap *am, struct vm_anon **anonp, u_int map,
> +    u_int slotoff, int startslot, int endslot)
> +{
> +     int err = 0;
> +     u_int lcv, slot, swslot;
> +     struct vm_anon *anon;
> +     boolean_t rv;
> +
> +     for (lcv = ffs(map); lcv != 0; lcv = ffs(map)) {
> +             slot = lcv - 1;
> +             map &= ~(1 << slot);
> +             anon = anonp[slotoff + slot];
> +
> +             swslot = anon->an_swslot;
> +             if (swslot < startslot || endslot <= swslot) {
> +                     continue;
> +             }
> +
> +             am->am_flgref |= AMAP_SWAPOFF;
> +             rv = uvm_anon_pagein(anon);
> +             am->am_flgref &= ~AMAP_SWAPOFF;
> +
> +             err = EAGAIN;
> +             if (amap_refs(am) == 0)
> +                     err = ENOENT;
> +             if (rv)
> +                     err = EIO;
> +             break;
> +     }
> +
> +     return err;
> +}
> +
>  /*
>   * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
>   *
> @@ -902,59 +1187,40 @@ amap_swap_off(int startslot, int endslot
>  {
>       struct vm_amap *am;
>       struct vm_amap *am_next;
> -     struct vm_amap marker_prev;
> -     struct vm_amap marker_next;
> -     boolean_t rv = FALSE;
> -
> -#if defined(DIAGNOSTIC)
> -     memset(&marker_prev, 0, sizeof(marker_prev));
> -     memset(&marker_next, 0, sizeof(marker_next));
> -#endif /* defined(DIAGNOSTIC) */
> -
> -     for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
> -             int i;
> -
> -             LIST_INSERT_BEFORE(am, &marker_prev, am_list);
> -             LIST_INSERT_AFTER(am, &marker_next, am_list);
> -
> -             for (i = 0; i < am->am_nused; i++) {
> -                     int slot;
> -                     int swslot;
> -                     struct vm_anon *anon;
> +     int err = 0;
> +     u_int i, slotoff;
>  
> -                     slot = am->am_slots[i];
> -                     anon = am->am_anon[slot];
> -
> -                     swslot = anon->an_swslot;
> -                     if (swslot < startslot || endslot <= swslot) {
> -                             continue;
> +     for (am = LIST_FIRST(&amap_list); am != NULL; am = am_next) {
> +             if (am->am_maxslot == 1) {
> +                     if (am->am_diranon != NULL)
> +                             err = amap_swap_off_traverse(am,
> +                                 &am->am_diranon, 1, 0, startslot, endslot);
> +             } else if (am->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL)
> +                     err = amap_swap_off_traverse(am, am->am_anon,
> +                         am->am_nused, 0, startslot, endslot);
> +             else {
> +                     for (i = 0; !err && i < am->am_meta->am_nused; i++) {
> +                             slotoff = AMAP_C2S(
> +                                 am->am_meta->am_clust[i].ac_clust);
> +                             err = amap_swap_off_traverse(am,
> +                                 am->am_meta->am_anon,
> +                                 am->am_meta->am_clust[i].ac_map, slotoff,
> +                                 startslot, endslot);
>                       }
> +             }
>  
> -                     am->am_flags |= AMAP_SWAPOFF;
> -
> -                     rv = uvm_anon_pagein(anon);
> -
> -                     am->am_flags &= ~AMAP_SWAPOFF;
> -                     if (amap_refs(am) == 0) {
> +             if (err == EIO)
> +                     return TRUE;
> +             if (err == EAGAIN)
> +                     am_next = am;
> +             else {
> +                     am_next = LIST_NEXT(am, am_list);
> +                     if (err == ENOENT)
>                               amap_wipeout(am);
> -                             am = NULL;
> -                             break;
> -                     }
> -                     if (rv) {
> -                             break;
> -                     }
> -                     i = 0;
>               }
> -
> -             KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
> -                 LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
> -                 &marker_next);
> -             am_next = LIST_NEXT(&marker_next, am_list);
> -             LIST_REMOVE(&marker_prev, am_list);
> -             LIST_REMOVE(&marker_next, am_list);
>       }
>  
> -     return rv;
> +     return FALSE;
>  }
>  
>  /*
> @@ -963,7 +1229,7 @@ amap_swap_off(int startslot, int endslot
>  struct vm_anon *
>  amap_lookup(struct vm_aref *aref, vaddr_t offset)
>  {
> -     int slot;
> +     u_int slot;
>       struct vm_amap *amap = aref->ar_amap;
>  
>       AMAP_B2SLOT(slot, offset);
> @@ -972,7 +1238,7 @@ amap_lookup(struct vm_aref *aref, vaddr_
>       if (slot >= amap->am_nslot)
>               panic("amap_lookup: offset out of range");
>  
> -     return(amap->am_anon[slot]);
> +     return(AMAP_ANON(amap)[slot]);
>  }
>  
>  /*
> @@ -984,7 +1250,7 @@ void
>  amap_lookups(struct vm_aref *aref, vaddr_t offset,
>      struct vm_anon **anons, int npages)
>  {
> -     int slot;
> +     u_int slot;
>       struct vm_amap *amap = aref->ar_amap;
>  
>       AMAP_B2SLOT(slot, offset);
> @@ -993,9 +1259,8 @@ amap_lookups(struct vm_aref *aref, vaddr
>       if ((slot + (npages - 1)) >= amap->am_nslot)
>               panic("amap_lookups: offset out of range");
>  
> -     memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
> -
> -     return;
> +     memcpy(anons, &AMAP_ANON(amap)[slot],
> +         npages * sizeof(struct vm_anon *));
>  }
>  
>  /*
> @@ -1007,8 +1272,9 @@ void
>  amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
>      boolean_t replace)
>  {
> -     int slot;
> +     u_int slot;
>       struct vm_amap *amap = aref->ar_amap;
> +     struct vm_anon **anonp = AMAP_ANON(amap);
>  
>       AMAP_B2SLOT(slot, offset);
>       slot += aref->ar_pageoff;
> @@ -1017,25 +1283,22 @@ amap_add(struct vm_aref *aref, vaddr_t o
>               panic("amap_add: offset out of range");
>  
>       if (replace) {
> -             if (amap->am_anon[slot] == NULL)
> +             if (anonp[slot] == NULL)
>                       panic("amap_add: replacing null anon");
> -             if (amap->am_anon[slot]->an_page != NULL && 
> -                 (amap->am_flags & AMAP_SHARED) != 0) {
> -                     pmap_page_protect(amap->am_anon[slot]->an_page,
> -                         PROT_NONE);
> +             if (anonp[slot]->an_page != NULL &&
> +                 (amap_flags(amap) & AMAP_SHARED) != 0) {
> +                     pmap_page_protect(anonp[slot]->an_page, PROT_NONE);
>                       /*
>                        * XXX: suppose page is supposed to be wired somewhere?
>                        */
>               }
>       } else {   /* !replace */
> -             if (amap->am_anon[slot] != NULL)
> +             if (anonp[slot] != NULL)
>                       panic("amap_add: slot in use");
>  
> -             amap->am_bckptr[slot] = amap->am_nused;
> -             amap->am_slots[amap->am_nused] = slot;
> -             amap->am_nused++;
> +             amap_fill_slot(amap, slot);
>       }
> -     amap->am_anon[slot] = anon;
> +     anonp[slot] = anon;
>  }
>  
>  /*
> @@ -1044,8 +1307,9 @@ amap_add(struct vm_aref *aref, vaddr_t o
>  void
>  amap_unadd(struct vm_aref *aref, vaddr_t offset)
>  {
> -     int ptr, slot;
> +     u_int slot;
>       struct vm_amap *amap = aref->ar_amap;
> +     struct vm_anon **anonp = AMAP_ANON(amap);
>  
>       AMAP_B2SLOT(slot, offset);
>       slot += aref->ar_pageoff;
> @@ -1053,17 +1317,11 @@ amap_unadd(struct vm_aref *aref, vaddr_t
>       if (slot >= amap->am_nslot)
>               panic("amap_unadd: offset out of range");
>  
> -     if (amap->am_anon[slot] == NULL)
> +     if (anonp[slot] == NULL)
>               panic("amap_unadd: nothing there");
>  
> -     amap->am_anon[slot] = NULL;
> -     ptr = amap->am_bckptr[slot];
> -
> -     if (ptr != (amap->am_nused - 1)) {      /* swap to keep slots contig? */
> -             amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
> -             amap->am_bckptr[amap->am_slots[ptr]] = ptr;     /* back link */
> -     }
> -     amap->am_nused--;
> +     anonp[slot] = NULL;
> +     amap_clear_slot(amap, slot);
>  }
>  
>  /*
> @@ -1076,9 +1334,9 @@ void
>  amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
>  {
>  
> -     amap->am_ref++;
> +     amap->am_flgref++;
>       if (flags & AMAP_SHARED)
> -             amap->am_flags |= AMAP_SHARED;
> +             amap->am_flgref |= AMAP_SHARED;
>  #ifdef UVM_AMAP_PPREF
>       if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
>           len != amap->am_nslot)
> @@ -1105,14 +1363,15 @@ amap_unref(struct vm_amap *amap, vaddr_t
>  {
>  
>       /* if we are the last reference, free the amap and return. */
> -     if (amap->am_ref-- == 1) {
> +     amap->am_flgref--;
> +     if (amap_refs(amap) == 0) {
>               amap_wipeout(amap);     /* drops final ref and frees */
>               return;
>       }
>  
>       /* otherwise just drop the reference count(s) */
> -     if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0)
> -             amap->am_flags &= ~AMAP_SHARED; /* clear shared flag */
> +     if (amap_refs(amap) == 1 && (amap_flags(amap) & AMAP_SHARED) != 0)
> +             amap->am_flgref &= ~AMAP_SHARED;        /* clear shared flag */
>  #ifdef UVM_AMAP_PPREF
>       if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot)
>               amap_pp_establish(amap);
> Index: uvm/uvm_amap.h
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_amap.h,v
> retrieving revision 1.21
> diff -u -p -r1.21 uvm_amap.h
> --- uvm/uvm_amap.h    6 Mar 2016 14:47:07 -0000       1.21
> +++ uvm/uvm_amap.h    21 Mar 2016 18:53:45 -0000
> @@ -98,15 +98,17 @@ void              amap_unref(struct vm_amap *, vaddr
>  void         amap_wipeout(struct vm_amap *);
>  boolean_t    amap_swap_off(int, int);
>  
> +#endif /* _KERNEL */
> +
>  /*
>   * amap flag values
>   */
>  
> -#define AMAP_SHARED  0x1     /* amap is shared */
> -#define AMAP_REFALL  0x2     /* amap_ref: reference entire amap */
> -#define AMAP_SWAPOFF 0x4     /* amap_swap_off() is in progress */
> +#define AMAP_SHARED  0x80000000U     /* amap is shared */
> +#define AMAP_REFALL  0x40000000U     /* amap_ref: reference entire amap */
> +#define AMAP_SWAPOFF 0x20000000U     /* amap_swap_off() is in progress */
>  
> -#endif /* _KERNEL */
> +#define AMAP_FLAGMASK        0xe0000000U
>  
>  /**********************************************************************/
>  
> @@ -123,52 +125,80 @@ boolean_t       amap_swap_off(int, int);
>  
>  #define UVM_AMAP_PPREF               /* track partial references */
>  
> +struct vm_amap_meta;
> +
>  /*
>   * here is the definition of the vm_amap structure for this implementation.
>   */
>  
> +/*
> + * amaps come in three flavors: tiny, small, and normal:
> + * - tiny amaps contain exactly one slot and embed the pointer
> + *   to the anon directly (one slot = an entry in the amap to
> + *   represent a page)
> + * - small amaps contain up to UVM_AMAP_MAXSLOT_SMALL slots and store
> + *   a pointer to an array of anons.
> + * - normal amaps contain more than UVM_AMAP_MAXSLOT_SMALL slots
> + *   and store a pointer to meta information. The meta data
> + *   contains an array of anons and additional information to
> + *   keep track of used slots efficiently.
> + *
> + * For tiny amaps, am_nused is either 0 or 1, depending on whether
> + * the anon is present.
> + * For small amaps, am_used is a bitmap of the slots that are used.
> + *
> + * For normal amaps, am_nused is the number of occupied slots.
> + * In normal amaps, these slots are packed into clusters of AMAP_SLOTPERCLUST
> + * slots each. For each cluster, there is a bitmap that represents which
> + * slots are occupied.
> + *
> + * The clusters are organized as an array. This allows fast mapping of
> + * a pages to their cluster. In order to be able all occupied slots in an
> + * amap quickly, non-empty clusters are packed in array am_clusts.
> + * am_nused in the meta data contains the number of entries in am_clusts.
> + *
> + * In order to find the cluster for a given page, am_bckptr is used to
> + * map a cluster to a position in am_clusts.
> + *
> + * For example, an amap with 12 slots and clusters of 4 slots each,
> + * assume that the slots 7, 5, 3 got filled in that order.
> + * Then the amap meta data would look like this:
> + *
> + * am_nused: 2
> + * am_clusts: { 4, 0xc }, { 0, 0x8 }
> + * am_bckptr: 1, 0, (invalid)
> + */
>  struct vm_amap {
> -     int am_ref;             /* reference count */
> -     int am_flags;           /* flags */
> -     int am_maxslot;         /* max # of slots allocated */
> -     int am_nslot;           /* # of slots currently in map ( <= maxslot) */
> -     int am_nused;           /* # of slots currently in use */
> -     int *am_slots;          /* contig array of active slots */
> -     int *am_bckptr;         /* back pointer array to am_slots */
> -     struct vm_anon **am_anon; /* array of anonymous pages */
> +     u_int am_flgref;        /* flags and reference count */
> +     u_int am_maxslot;       /* max # of slots allocated */
> +     u_int am_nslot;         /* # of slots currently in map ( <= maxslot) */
> +     u_int am_nused;         /* # of slots currently in use */
> +
> +     union { /* fields used in the tiny, small or normal amap flavor */
> +             struct vm_anon *am_diranon;     /* tiny: pointer to anon */
> +             struct vm_anon **am_anon;       /* small: array of anons */
> +             struct vm_amap_meta *am_meta;   /* normal: meta data */
> +     };
> +
>  #ifdef UVM_AMAP_PPREF
>       int *am_ppref;          /* per page reference count (if !NULL) */
>  #endif
>       LIST_ENTRY(vm_amap) am_list;
>  };
>  
> -/*
> - * note that am_slots, am_bckptr, and am_anon are arrays.   this allows
> - * fast lookup of pages based on their virual address at the expense of
> - * some extra memory.   in the future we should be smarter about memory
> - * usage and fall back to a non-array based implementation on systems 
> - * that are short of memory (XXXCDC).
> - *
> - * the entries in the array are called slots... for example an amap that
> - * covers four pages of virtual memory is said to have four slots.   here
> - * is an example of the array usage for a four slot amap.   note that only
> - * slots one and three have anons assigned to them.  "D/C" means that we
> - * "don't care" about the value.
> - * 
> - *            0     1      2     3
> - * am_anon:   NULL, anon0, NULL, anon1               (actual pointers to 
> anons)
> - * am_bckptr: D/C,  1,     D/C,  0           (points to am_slots entry)
> - *
> - * am_slots:  3, 1, D/C, D/C                 (says slots 3 and 1 are in use)
> - * 
> - * note that am_bckptr is D/C if the slot in am_anon is set to NULL.
> - * to find the entry in am_slots for an anon, look at am_bckptr[slot],
> - * thus the entry for slot 3 in am_slots[] is at am_slots[am_bckptr[3]].
> - * in general, if am_anon[X] is non-NULL, then the following must be
> - * true: am_slots[am_bckptr[X]] == X
> - *
> - * note that am_slots is always contig-packed.
> - */
> +struct vm_amap_clust {
> +     u_int ac_clust;
> +     u_int ac_map;
> +};
> +
> +struct vm_amap_meta {
> +     struct vm_anon **am_anon;
> +     struct vm_amap_clust *am_clust;
> +     u_int *am_bckptr;
> +     u_int am_nused;
> +};
> +
> +#define UVM_AMAP_MAXSLOT_SMALL       32
>  
>  /*
>   * defines for handling of large sparce amaps:
> @@ -210,6 +240,13 @@ struct vm_amap {
>  #define UVM_AMAP_LARGE       256     /* # of slots in "large" amap */
>  #define UVM_AMAP_CHUNK       16      /* # of slots to chunk large amaps in */
>  
> +/*
> + * flags and reference count macros
> + */
> +
> +#define amap_flags(AMAP)     ((AMAP)->am_flgref & AMAP_FLAGMASK)
> +#define amap_refs(AMAP)              ((AMAP)->am_flgref & ~AMAP_FLAGMASK)
> +
>  #ifdef _KERNEL
>  
>  /*
> @@ -222,12 +259,17 @@ struct vm_amap {
>       (S) = (B) >> PAGE_SHIFT;                                        \
>  }
>  
> -/*
> - * flags macros
> - */
> +#define AMAP_SLOTPERCLUST    32
> +
> +#define AMAP_S2C(slot)       ((slot) / AMAP_SLOTPERCLUST)
> +#define AMAP_C2S(clust)      ((clust) * AMAP_SLOTPERCLUST)
> +
> +#define AMAP_SLOTCHUNK(slot) ((slot) % AMAP_SLOTPERCLUST)
>  
> -#define amap_flags(AMAP)     ((AMAP)->am_flags)
> -#define amap_refs(AMAP)              ((AMAP)->am_ref)
> +#define AMAP_ANON(amap)                                                      
> \
> +    ((amap)->am_maxslot == 1 ? &(amap)->am_diranon :                 \
> +     ((amap)->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL ? (amap)->am_anon :       
> \
> +      (amap)->am_meta->am_anon))
>  
>  /*
>   * if we enable PPREF, then we have a couple of extra functions that
> Index: usr.sbin//procmap/procmap.c
> ===================================================================
> RCS file: /cvs/src/usr.sbin/procmap/procmap.c,v
> retrieving revision 1.59
> diff -u -p -r1.59 procmap.c
> --- usr.sbin//procmap/procmap.c       19 Jan 2015 19:25:28 -0000      1.59
> +++ usr.sbin//procmap/procmap.c       21 Mar 2016 18:51:49 -0000
> @@ -785,16 +785,37 @@ dump_vm_map_entry(kvm_t *kd, struct kbit
>       }
>  
>       if (print_amap && vme->aref.ar_amap) {
> -             printf(" amap - ref: %d fl: 0x%x maxsl: %d nsl: %d nuse: %d\n",
> -                 D(amap, vm_amap)->am_ref,
> -                 D(amap, vm_amap)->am_flags,
> -                 D(amap, vm_amap)->am_maxslot,
> -                 D(amap, vm_amap)->am_nslot,
> -                 D(amap, vm_amap)->am_nused);
> +             char *flags = "?";
> +             u_int ref, maxslot, nslot, nused, map;
> +
> +             if (amap_flags(D(amap, vm_amap)) == (AMAP_SHARED|AMAP_SWAPOFF))
> +                     flags = "shared|swapoff";
> +             else if (amap_flags(D(amap, vm_amap)) == AMAP_SHARED)
> +                     flags = "shared";
> +             else if (amap_flags(D(amap, vm_amap)) == AMAP_SWAPOFF)
> +                     flags = "swapoff";
> +             else if (amap_flags(D(amap, vm_amap)) == 0)
> +                     flags = "none";
> +
> +             nslot = D(amap, vm_amap)->am_nslot;
> +             maxslot = D(amap, vm_amap)->am_maxslot;
> +             if (maxslot <= UVM_AMAP_MAXSLOT_SMALL) {
> +                     map = D(amap, vm_amap)->am_nused;
> +                     nused = 0;
> +                     while (map) {
> +                             if (map & 1)
> +                                     nused++;
> +                             map >>= 1;
> +                     }
> +             } else
> +                     nused = D(amap, vm_amap)->am_nused;
> +
> +             printf(" amap - ref: %u fl: %s maxsl: %u nsl: %u nuse: %u\n",
> +                 amap_refs(D(amap, vm_amap)), flags, maxslot, nslot, nused);
>               if (sum) {
> -                     sum->s_am_nslots += D(amap, vm_amap)->am_nslot;
> -                     sum->s_am_maxslots += D(amap, vm_amap)->am_maxslot;
> -                     sum->s_am_nusedslots += D(amap, vm_amap)->am_nused;
> +                     sum->s_am_nslots += nslot;
> +                     sum->s_am_maxslots += maxslot;
> +                     sum->s_am_nusedslots += nused;
>               }
>       }
>  
> 
> 

Reply via email to