Mark Kettenis wrote:
> > Date: Mon, 21 Mar 2016 20:02:28 +0100
> > From: Stefan Kempf <[email protected]>
> > 
> > Recently we found that amaps consume a good deal of kernel address space.
> > See this thread: https://marc.info/?l=openbsd-tech&m=145752756005014&w=2.
> > And we found a way to reduce kernel mem pressure for some architectures
> > at least. See the diffs in that thread.
> > 
> > Besides that, it's possible to shrink the amap struct from 72 to 48 bytes
> > on 64 bit systems (or from 44 to 32 bytes on 32 bit systems).
> > 
> > It's also possible to cut down the memory needed for slots roughly in half
> > on 64 bit architectures, and cut it up to a factor of 3 on 32 bit machines.
> > 
> > Here's how amap slots are maintained currently: for every slot, the kernel
> > allocates one pointer to a vm_anon and two ints (16 bytes per slot on
> > 64 bit systems, 12 bytes for 32 CPUs).
> > 
> > To reduce these memory requirements, we need three flavors of amaps:
> > 
> > - Tiny amaps with only one slot store the pointer to the vm_anon in the
> >   amap directly. The two ints are not needed. This was Theo's idea.
> > 
> > - Small amaps with up to 32 slots need 8 instead of 16 bytes per slot
> >   (or 4 bytes instead of 12 on 32 bit machines).
> >   It's enough to store the array of anons. The two ints per slot are
> >   not needed.
> > 
> >   Tiny and small amaps are the ones used most often.
> > 
> > - Normal amaps with n >= 32 slots only need
> >   4 * sizeof(pointer) + n * sizeof(struct vm_anon *) + 12*n/32 bytes
> >   to maintain amap slots. For large n that's also around 1.8 times
> >   less memory for slots (or about 2.7 times less on 32 bit CPUs) compared
> >   to the current implementation.
> >   That memory is for the vm_anon array, and a header structure. The two
> >   ints per slot in the current code are replaced by n/32 bitmaps.
> > 
> [...]
> 
> Is it really worth having three flavours?  If having only two
> (tiny+normal?) simplifies the code considerable and doesn't result in
> much more memory being used, that may be preferable.

I think it's possible to simplify the current diff a little more.
If it's still considered too complex then doing small and normal
only would be an option.

I'll experiment some more.
 
> The amaps are one of the roadblocks on the way to make uvm more
> mpsafe.  And keeping the code simple will make that easier.
> 
> > Index: uvm/uvm_amap.c
> > ===================================================================
> > RCS file: /cvs/src/sys/uvm/uvm_amap.c,v
> > retrieving revision 1.62
> > diff -u -p -r1.62 uvm_amap.c
> > --- uvm/uvm_amap.c  16 Mar 2016 16:53:43 -0000      1.62
> > +++ uvm/uvm_amap.c  21 Mar 2016 18:53:45 -0000
> > @@ -53,21 +53,31 @@
> >  struct pool uvm_amap_pool;
> >  
> >  /* Pools for amap slots for the most common amap slot sizes */
> > -struct pool uvm_amap_slot_pools[UVM_AMAP_CHUNK];
> > +struct pool uvm_amap_slot_pools[UVM_AMAP_CHUNK - 1];
> >  
> >  LIST_HEAD(, vm_amap) amap_list;
> >  
> > -static char amap_slot_pool_names[UVM_AMAP_CHUNK][13];
> > -
> > -#define MALLOC_SLOT_UNIT (2 * sizeof(int) + sizeof(struct vm_anon *))
> > +static char amap_slot_pool_names[UVM_AMAP_CHUNK - 1][13];
> >  
> >  /*
> >   * local functions
> >   */
> >  
> > -static struct vm_amap *amap_alloc1(int, int, int);
> > +struct vm_anon **amap_slots_alloc(struct vm_amap *, u_int, int,
> > +    struct vm_amap_meta **);
> > +static struct vm_amap *amap_alloc1(u_int, u_int, int);
> >  static __inline void amap_list_insert(struct vm_amap *);
> >  static __inline void amap_list_remove(struct vm_amap *);   
> > +static __inline void amap_anon_release(struct vm_anon *);
> > +void amap_fill_slot(struct vm_amap *, u_int);
> > +void amap_normal_clear_slot(struct vm_amap *, u_int);
> > +void amap_clear_slot(struct vm_amap *, u_int);
> > +static __inline void amap_normal_wipe_slot(struct vm_amap *, u_int);
> > +
> > +void amap_wipeout_traverse(struct vm_anon **, u_int, u_int);
> > +int amap_cow_now_traverse(struct vm_anon **, u_int, u_int);
> > +int amap_swap_off_traverse(struct vm_amap *, struct vm_anon **, u_int, 
> > u_int,
> > +    int, int);
> >  
> >  static __inline void
> >  amap_list_insert(struct vm_amap *amap)
> > @@ -81,6 +91,99 @@ amap_list_remove(struct vm_amap *amap)
> >     LIST_REMOVE(amap, am_list);
> >  }
> >  
> > +static __inline void
> > +amap_anon_release(struct vm_anon *anon)
> > +{
> > +   int refs;
> > +
> > +   refs = --anon->an_ref;
> > +   if (refs == 0) {
> > +           /* we had the last reference to a vm_anon. free it. */
> > +           uvm_anfree(anon);
> > +   }
> > +}
> > +
> > +void
> > +amap_fill_slot(struct vm_amap *amap, u_int slot)
> > +{
> > +   u_int clust, ptr;
> > +   struct vm_amap_meta *meta;
> > +   struct vm_amap_clust *slotclust;
> > +
> > +   if (amap->am_maxslot == 1) {
> > +           amap->am_nused = 1;
> > +           return;
> > +   } else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL) {
> > +           amap->am_nused |= 1 << slot;
> > +           return;
> > +   }
> > +
> > +   amap->am_nused++;
> > +
> > +   meta = amap->am_meta;
> > +   clust = AMAP_S2C(slot);
> > +   ptr = meta->am_bckptr[clust];
> > +
> > +   if (ptr >= meta->am_nused ||
> > +       meta->am_clust[ptr].ac_clust != clust) {
> > +           meta->am_bckptr[clust] = meta->am_nused;
> > +           slotclust = &meta->am_clust[meta->am_nused];
> > +           slotclust->ac_clust = clust;
> > +           slotclust->ac_map = 1 << AMAP_SLOTCHUNK(slot);
> > +           meta->am_nused++;
> > +
> > +   } else {
> > +           slotclust = &meta->am_clust[ptr];
> > +           slotclust->ac_map |= 1 << AMAP_SLOTCHUNK(slot);
> > +   }
> > +}
> > +
> > +void
> > +amap_normal_clear_slot(struct vm_amap *amap, u_int slot)
> > +{
> > +   u_int clust, ptr;
> > +   struct vm_amap_meta *meta;
> > +
> > +   amap->am_nused--;
> > +
> > +   meta = amap->am_meta;
> > +   clust = AMAP_S2C(slot);
> > +   ptr = meta->am_bckptr[clust];
> > +
> > +   meta->am_clust[ptr].ac_map &= ~(1 << AMAP_SLOTCHUNK(slot));
> > +   if (meta->am_clust[ptr].ac_map != 0)
> > +           return;
> > +
> > +   if (ptr != (meta->am_nused - 1)) {      /* swap to keep slots contig? */
> > +           meta->am_clust[ptr] = meta->am_clust[meta->am_nused - 1];
> > +           meta->am_bckptr[meta->am_clust[ptr].ac_clust] = ptr;
> > +   }
> > +
> > +   meta->am_nused--;
> > +}
> > +
> > +void
> > +amap_clear_slot(struct vm_amap *amap, u_int slot)
> > +{
> > +   if (amap->am_maxslot == 1)
> > +           amap->am_nused = 0;
> > +   else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL)
> > +           amap->am_nused &= ~(1 << slot);
> > +   else
> > +           amap_normal_clear_slot(amap, slot);
> > +}
> > +
> > +static __inline void
> > +amap_normal_wipe_slot(struct vm_amap *amap, u_int slot)
> > +{
> > +   struct vm_anon *anon;
> > +
> > +   anon = amap->am_meta->am_anon[slot];
> > +   amap->am_meta->am_anon[slot] = NULL;
> > +   amap_normal_clear_slot(amap, slot);
> > +   amap_anon_release(anon);
> > +}
> > +
> >  #ifdef UVM_AMAP_PPREF
> >  /*
> >   * what is ppref?   ppref is an _optional_ amap feature which is used
> > @@ -165,22 +268,87 @@ amap_init(void)
> >  
> >     for (i = 0; i < nitems(uvm_amap_slot_pools); i++) {
> >             snprintf(amap_slot_pool_names[i],
> > -               sizeof(amap_slot_pool_names[0]), "amapslotpl%d", i + 1);
> > -           pool_init(&uvm_amap_slot_pools[i], (i + 1) * MALLOC_SLOT_UNIT,
> > -               0, 0, PR_WAITOK, amap_slot_pool_names[i], NULL);
> > +               sizeof(amap_slot_pool_names[0]), "amapslotpl%d", i + 2);
> > +           pool_init(&uvm_amap_slot_pools[i],
> > +               (i + 2) * sizeof(struct vm_anon *), 0, 0, PR_WAITOK,
> > +               amap_slot_pool_names[i], NULL);
> >             pool_sethiwat(&uvm_amap_slot_pools[i], 4096);
> >     }
> >  }
> >  
> > +struct vm_anon **
> > +amap_slots_alloc(struct vm_amap *amap, u_int totalslots, int waitf,
> > +    struct vm_amap_meta **metap)
> > +{
> > +   struct vm_anon **anon;
> > +   struct vm_amap_meta *meta;
> > +   struct vm_amap_clust *clust;
> > +   u_int *bckptr;
> > +   u_int nclust;
> > +   int pwaitf = PR_NOWAIT;
> > +   size_t size;
> > +
> > +   if (totalslots == 1)
> > +           return amap == NULL ? NULL : &amap->am_diranon;
> > +   else if (totalslots <= UVM_AMAP_CHUNK) {
> > +           if (waitf & M_WAITOK) {
> > +                   pwaitf = PR_WAITOK;
> > +                   if (waitf & M_CANFAIL)
> > +                           pwaitf |= PR_LIMITFAIL;
> > +           }
> > +           meta = NULL;
> > +           anon = pool_get(&uvm_amap_slot_pools[totalslots - 2], pwaitf);
> > +
> > +           if (amap != NULL)
> > +                   amap->am_anon = anon;
> > +   } else if (totalslots <= UVM_AMAP_MAXSLOT_SMALL) {
> > +           meta = NULL;
> > +           anon = mallocarray(totalslots, sizeof *anon, M_UVMAMAP, waitf);
> > +           if (amap != NULL)
> > +                   amap->am_anon = anon;
> > +   } else {
> > +           nclust = roundup(totalslots, AMAP_SLOTPERCLUST) /
> > +               AMAP_SLOTPERCLUST;
> > +           size = sizeof(struct vm_amap_meta) +
> > +               totalslots * sizeof(struct vm_anon *) +
> > +               nclust * (sizeof(int) + sizeof(struct vm_amap_clust));
> > +
> > +           meta = malloc(sizeof *meta, M_UVMAMAP, waitf);
> > +           anon = mallocarray(totalslots, sizeof *anon, M_UVMAMAP, waitf);
> > +           clust = mallocarray(nclust, sizeof *clust, M_UVMAMAP, waitf);
> > +           bckptr = mallocarray(nclust, sizeof *bckptr, M_UVMAMAP, waitf);
> > +           if (meta == NULL || anon == NULL || clust == NULL ||
> > +               bckptr == NULL) {
> > +                   free(meta, M_UVMAMAP, sizeof *meta);
> > +                   free(anon, M_UVMAMAP, totalslots * sizeof *anon);
> > +                   free(clust, M_UVMAMAP, nclust * sizeof *clust);
> > +                   free(bckptr, M_UVMAMAP, nclust * sizeof *bckptr);
> > +                   return NULL;
> > +           }
> > +
> > +           meta->am_anon = anon;
> > +           meta->am_clust = clust;
> > +           meta->am_bckptr = bckptr;
> > +           meta->am_nused = 0;
> > +
> > +           if (amap != NULL)
> > +                   amap->am_meta = meta;
> > +   }
> > +
> > +   if (metap != NULL)
> > +           *metap = meta;
> > +   return anon;
> > +}
> > +
> >  /*
> >   * amap_alloc1: internal function that allocates an amap, but does not
> >   * init the overlay.
> >   */
> >  static inline struct vm_amap *
> > -amap_alloc1(int slots, int padslots, int waitf)
> > +amap_alloc1(u_int slots, u_int padslots, int waitf)
> >  {
> >     struct vm_amap *amap;
> > -   int totalslots;
> > +   u_int totalslots;
> >  
> >     amap = pool_get(&uvm_amap_pool, (waitf == M_WAITOK) ? PR_WAITOK
> >         : PR_NOWAIT);
> > @@ -189,41 +357,23 @@ amap_alloc1(int slots, int padslots, int
> >  
> >     totalslots = slots + padslots;
> >     KASSERT(totalslots > 0);
> > -
> >     if (totalslots > UVM_AMAP_CHUNK)
> > -           totalslots = malloc_roundup(totalslots * MALLOC_SLOT_UNIT) /
> > -               MALLOC_SLOT_UNIT;
> > +           totalslots = malloc_roundup(totalslots *
> > +               sizeof(struct vm_anon *)) / sizeof(struct vm_anon *);
> >  
> > -   amap->am_ref = 1;
> > -   amap->am_flags = 0;
> > +   amap->am_flgref = 1;
> >  #ifdef UVM_AMAP_PPREF
> >     amap->am_ppref = NULL;
> >  #endif
> >     amap->am_maxslot = totalslots;
> >     amap->am_nslot = slots;
> >     amap->am_nused = 0;
> > -
> > -   if (totalslots > UVM_AMAP_CHUNK)
> > -           amap->am_slots = malloc(totalslots * MALLOC_SLOT_UNIT,
> > -               M_UVMAMAP, waitf);
> > -   else
> > -           amap->am_slots = pool_get(
> > -               &uvm_amap_slot_pools[totalslots - 1],
> > -               (waitf == M_WAITOK) ? PR_WAITOK : PR_NOWAIT);
> > -
> > -   if (amap->am_slots == NULL)
> > -           goto fail1;
> > -
> > -   amap->am_bckptr = (int *)(((char *)amap->am_slots) + totalslots *
> > -       sizeof(int));
> > -   amap->am_anon = (struct vm_anon **)(((char *)amap->am_bckptr) +
> > -       totalslots * sizeof(int));
> > +   if (amap_slots_alloc(amap, totalslots, waitf, NULL) == NULL) {
> > +           pool_put(&uvm_amap_pool, amap);
> > +           return (NULL);
> > +   }
> >  
> >     return(amap);
> > -
> > -fail1:
> > -   pool_put(&uvm_amap_pool, amap);
> > -   return (NULL);
> >  }
> >  
> >  /*
> > @@ -236,14 +386,14 @@ struct vm_amap *
> >  amap_alloc(vaddr_t sz, vaddr_t padsz, int waitf)
> >  {
> >     struct vm_amap *amap;
> > -   int slots, padslots;
> > +   u_int slots, padslots;
> >  
> >     AMAP_B2SLOT(slots, sz);         /* load slots */
> >     AMAP_B2SLOT(padslots, padsz);
> >  
> >     amap = amap_alloc1(slots, padslots, waitf);
> >     if (amap) {
> > -           memset(amap->am_anon, 0,
> > +           memset(AMAP_ANON(amap), 0,
> >                 amap->am_maxslot * sizeof(struct vm_anon *));
> >             amap_list_insert(amap);
> >     }
> > @@ -260,15 +410,27 @@ amap_alloc(vaddr_t sz, vaddr_t padsz, in
> >  void
> >  amap_free(struct vm_amap *amap)
> >  {
> > +   u_int nclust;
> >  
> > -   KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
> > -   KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
> > +   KASSERT(amap_refs(amap) == 0 && amap->am_nused == 0);
> > +   KASSERT((amap_flags(amap) & AMAP_SWAPOFF) == 0);
> >  
> > -   if (amap->am_maxslot > UVM_AMAP_CHUNK)
> > -           free(amap->am_slots, M_UVMAMAP, 0);
> > -   else
> > -           pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 1],
> > -               amap->am_slots);
> > +   if (amap->am_maxslot > UVM_AMAP_MAXSLOT_SMALL) {
> > +           nclust = roundup(amap->am_maxslot, AMAP_SLOTPERCLUST) /
> > +               AMAP_SLOTPERCLUST;
> > +           free(amap->am_meta->am_anon, M_UVMAMAP,
> > +               amap->am_maxslot * sizeof *amap->am_meta->am_anon);
> > +           free(amap->am_meta->am_clust, M_UVMAMAP,
> > +               nclust * sizeof *amap->am_meta->am_clust);
> > +           free(amap->am_meta->am_bckptr, M_UVMAMAP,
> > +               nclust * sizeof *amap->am_meta->am_bckptr);
> > +           free(amap->am_meta, M_UVMAMAP, sizeof *amap->am_meta);
> > +   } else if (amap->am_maxslot > UVM_AMAP_CHUNK)
> > +           free(amap->am_anon, M_UVMAMAP,
> > +               amap->am_maxslot * sizeof *amap->am_anon);
> > +   else if (amap->am_maxslot > 1)
> > +           pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 2],
> > +               amap->am_anon);
> >  
> >  #ifdef UVM_AMAP_PPREF
> >     if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
> > @@ -291,14 +453,14 @@ int
> >  amap_extend(struct vm_map_entry *entry, vsize_t addsize)
> >  {
> >     struct vm_amap *amap = entry->aref.ar_amap;
> > -   int slotoff = entry->aref.ar_pageoff;
> > -   int slotmapped, slotadd, slotneed, slotalloc;
> > +   u_int slotoff = entry->aref.ar_pageoff;
> > +   u_int slotmapped, slotadd, slotneed, slotalloc;
> >  #ifdef UVM_AMAP_PPREF
> >     int *newppref, *oldppref;
> >  #endif
> > -   u_int *newsl, *newbck, *oldsl, *oldbck;
> > +   struct vm_amap_meta *newmeta, *oldmeta;
> >     struct vm_anon **newover, **oldover;
> > -   int slotadded;
> > +   u_int slotadded;
> >  
> >     /*
> >      * first, determine how many slots we need in the amap.  don't
> > @@ -355,9 +517,11 @@ amap_extend(struct vm_map_entry *entry, 
> >     if (slotneed >= UVM_AMAP_LARGE)
> >             return E2BIG;
> >  
> > +   KASSERT(slotneed > 1);
> > +   KASSERT(amap->am_maxslot < slotneed);
> >     if (slotneed > UVM_AMAP_CHUNK)
> > -           slotalloc = malloc_roundup(slotneed * MALLOC_SLOT_UNIT) /
> > -               MALLOC_SLOT_UNIT;
> > +           slotalloc = malloc_roundup(slotneed *
> > +               sizeof(struct vm_anon *)) / sizeof(struct vm_anon *);
> >     else
> >             slotalloc = slotneed;
> >  
> > @@ -373,13 +537,9 @@ amap_extend(struct vm_map_entry *entry, 
> >             }
> >     }
> >  #endif
> > -   if (slotneed > UVM_AMAP_CHUNK)
> > -           newsl = malloc(slotalloc * MALLOC_SLOT_UNIT, M_UVMAMAP,
> > -               M_WAITOK | M_CANFAIL);
> > -   else
> > -           newsl = pool_get(&uvm_amap_slot_pools[slotalloc - 1],
> > -               PR_WAITOK | PR_LIMITFAIL);
> > -   if (newsl == NULL) {
> > +   newover = amap_slots_alloc(NULL, slotalloc, M_WAITOK | M_CANFAIL,
> > +       &newmeta);
> > +   if (newover == NULL) {
> >  #ifdef UVM_AMAP_PPREF
> >             if (newppref != NULL) {
> >                     free(newppref, M_UVMAMAP, 0);
> > @@ -387,31 +547,60 @@ amap_extend(struct vm_map_entry *entry, 
> >  #endif
> >             return (ENOMEM);
> >     }
> > -   newbck = (int *)(((char *)newsl) + slotalloc * sizeof(int));
> > -   newover = (struct vm_anon **)(((char *)newbck) + slotalloc *
> > -       sizeof(int));
> > -   KASSERT(amap->am_maxslot < slotneed);
> >  
> >     /* now copy everything over to new malloc'd areas... */
> >     slotadded = slotalloc - amap->am_nslot;
> >  
> > -   /* do am_slots */
> > -   oldsl = amap->am_slots;
> > -   memcpy(newsl, oldsl, sizeof(int) * amap->am_nused);
> > -   amap->am_slots = newsl;
> > -
> >     /* do am_anon */
> > -   oldover = amap->am_anon;
> > +   oldover = AMAP_ANON(amap);
> > +   oldmeta = amap->am_meta;
> >     memcpy(newover, oldover, sizeof(struct vm_anon *) * amap->am_nslot);
> >     memset(newover + amap->am_nslot, 0, sizeof(struct vm_anon *) *
> >         slotadded);
> > -   amap->am_anon = newover;
> >  
> > -   /* do am_bckptr */
> > -   oldbck = amap->am_bckptr;
> > -   memcpy(newbck, oldbck, sizeof(int) * amap->am_nslot);
> > -   memset(newbck + amap->am_nslot, 0, sizeof(int) * slotadded); /* XXX: 
> > needed? */
> > -   amap->am_bckptr = newbck;
> > +   if (amap->am_maxslot == 1) {
> > +           /* do am_meta and am_bckptr */
> > +           if (slotalloc > UVM_AMAP_MAXSLOT_SMALL) {
> > +                   if (amap->am_diranon != NULL) {
> > +                           newmeta->am_nused = 1;
> > +                           newmeta->am_clust[0].ac_clust = 0;
> > +                           newmeta->am_clust[0].ac_map = 1;
> > +                           newmeta->am_bckptr[0] = 0;
> > +                   }
> > +           }
> > +           /* am_nused is correctly set to 1 or 0 already */
> > +   } else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL) {
> > +           /* do am_meta and am_bckptr */
> > +           if (slotalloc > UVM_AMAP_MAXSLOT_SMALL) {
> > +                   newmeta->am_clust[0].ac_clust = 0;
> > +                   newmeta->am_clust[0].ac_map = amap->am_nused;
> > +                   newmeta->am_bckptr[0] = 0;
> > +                   int i, map = amap->am_nused;
> > +
> > +                   amap->am_nused = 0;
> > +                   for (i = ffs(map); i != 0; i = ffs(map)) {
> > +                           amap->am_nused++;
> > +                           map &= ~(1 << (i - 1));
> > +                   }
> > +                   if (amap->am_nused)
> > +                           newmeta->am_nused = 1;
> > +           } /* else slotalloc <= UVM_AMAP_MAXSLOT_SMALL => bitmap ok */
> > +   } else {
> > +           newmeta->am_nused = oldmeta->am_nused;
> > +
> > +           /* do am_meta */
> > +           memcpy(newmeta->am_clust, oldmeta->am_clust,
> > +               sizeof(*newmeta->am_clust) * oldmeta->am_nused);
> > +
> > +           /* do am_bckptr */
> > +           memcpy(newmeta->am_bckptr, oldmeta->am_bckptr,
> > +               sizeof(*newmeta->am_bckptr) * oldmeta->am_nused);
> > +   }
> > +
> > +   if (slotalloc <= UVM_AMAP_MAXSLOT_SMALL)
> > +           amap->am_anon = newover;
> > +   else
> > +           amap->am_meta = newmeta;
> >  
> >  #ifdef UVM_AMAP_PPREF
> >     /* do ppref */
> > @@ -429,11 +618,22 @@ amap_extend(struct vm_map_entry *entry, 
> >  #endif
> >  
> >     /* free */
> > -   if (amap->am_maxslot > UVM_AMAP_CHUNK)
> > -           free(oldsl, M_UVMAMAP, 0);
> > -   else
> > -           pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 1],
> > -               oldsl);
> > +   if (amap->am_maxslot > UVM_AMAP_MAXSLOT_SMALL) {
> > +           u_int nclust = roundup(slotalloc, AMAP_SLOTPERCLUST) /
> > +               AMAP_SLOTPERCLUST;
> > +
> > +           free(oldmeta->am_anon, M_UVMAMAP,
> > +               amap->am_maxslot * sizeof *oldmeta->am_anon);
> > +           free(oldmeta->am_clust, M_UVMAMAP,
> > +               nclust * sizeof *oldmeta->am_clust);
> > +           free(oldmeta->am_bckptr, M_UVMAMAP,
> > +               nclust * sizeof *oldmeta->am_bckptr);
> > +           free(oldmeta, M_UVMAMAP, sizeof *oldmeta);
> > +   } else if (amap->am_maxslot > UVM_AMAP_CHUNK)
> > +           free(oldover, M_UVMAMAP, 0);
> > +   else if (amap->am_maxslot > 1)
> > +           pool_put(&uvm_amap_slot_pools[amap->am_maxslot - 2],
> > +               oldover);
> >  
> >     /* and update master values */
> >     amap->am_nslot = slotneed;
> > @@ -446,6 +646,23 @@ amap_extend(struct vm_map_entry *entry, 
> >     return (0);
> >  }
> >  
> > +void
> > +amap_wipeout_traverse(struct vm_anon **anonp, u_int map, u_int slotoff)
> > +{
> > +   u_int lcv, slot;
> > +   struct vm_anon *anon;
> > +
> > +   for (lcv = ffs(map); lcv != 0; lcv = ffs(map)) {
> > +           slot = lcv - 1;
> > +           map &= ~(1 << slot);
> > +           anon = anonp[slotoff + slot];
> > +           if (anon == NULL || anon->an_ref == 0)
> > +                   panic("amap_wipeout: corrupt amap %p", anon);
> > +
> > +           amap_anon_release(anon);
> > +   }
> > +}
> > +
> >  /*
> >   * amap_wipeout: wipeout all anon's in an amap; then free the amap!
> >   *
> > @@ -456,35 +673,32 @@ amap_extend(struct vm_map_entry *entry, 
> >  void
> >  amap_wipeout(struct vm_amap *amap)
> >  {
> > -   int lcv, slot;
> > -   struct vm_anon *anon;
> > +   u_int i, slotoff;
> > +   struct vm_amap_meta *meta = amap->am_meta;
> >  
> > -   KASSERT(amap->am_ref == 0);
> > +   KASSERT(amap_refs(amap) == 0);
> >  
> > -   if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
> > +   if (__predict_false((amap_flags(amap) & AMAP_SWAPOFF) != 0)) {
> >             /* amap_swap_off will call us again. */
> >             return;
> >     }
> >     amap_list_remove(amap);
> >  
> > -   for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
> > -           int refs;
> > -
> > -           slot = amap->am_slots[lcv];
> > -           anon = amap->am_anon[slot];
> > -
> > -           if (anon == NULL || anon->an_ref == 0)
> > -                   panic("amap_wipeout: corrupt amap");
> > -
> > -           refs = --anon->an_ref;
> > -           if (refs == 0) {
> > -                   /* we had the last reference to a vm_anon. free it. */
> > -                   uvm_anfree(anon);
> > +   if (amap->am_maxslot == 1) {
> > +           if (amap->am_diranon != NULL)
> > +                   amap_wipeout_traverse(&amap->am_diranon, 1, 0);
> > +   } else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL)
> > +           amap_wipeout_traverse(amap->am_anon, amap->am_nused, 0);
> > +   else {
> > +           for (i = 0; i < meta->am_nused; i++) {
> > +                   slotoff = AMAP_C2S(meta->am_clust[i].ac_clust);
> > +                   amap_wipeout_traverse(meta->am_anon,
> > +                       meta->am_clust[i].ac_map, slotoff);
> >             }
> >     }
> >  
> >     /* now we free the map */
> > -   amap->am_ref = 0;       /* ... was one */
> > +   amap->am_flgref &= AMAP_FLAGMASK;       /* set refcount 1 -> 0 */
> >     amap->am_nused = 0;
> >     amap_free(amap);        /* will free amap */
> >  }
> > @@ -506,7 +720,8 @@ amap_copy(struct vm_map *map, struct vm_
> >      boolean_t canchunk, vaddr_t startva, vaddr_t endva)
> >  {
> >     struct vm_amap *amap, *srcamap;
> > -   int slots, lcv;
> > +   struct vm_anon **anonp, **srcanonp;
> > +   u_int slots, lcv;
> >     vaddr_t chunksize;
> >  
> >     /* is there a map to copy?   if not, create one from scratch. */
> > @@ -542,7 +757,7 @@ amap_copy(struct vm_map *map, struct vm_
> >      * just take it over rather than copying it.  the value can only
> >      * be one if we have the only reference to the amap
> >      */
> > -   if (entry->aref.ar_amap->am_ref == 1) {
> > +   if (amap_refs(entry->aref.ar_amap) == 1) {
> >             entry->etype &= ~UVM_ET_NEEDSCOPY;
> >             return;
> >     }
> > @@ -560,25 +775,24 @@ amap_copy(struct vm_map *map, struct vm_
> >      * dropped down to one we take over the old map rather than
> >      * copying the amap.
> >      */
> > -   if (srcamap->am_ref == 1) {             /* take it over? */
> > +   if (amap_refs(srcamap) == 1) {          /* take it over? */
> >             entry->etype &= ~UVM_ET_NEEDSCOPY;
> > -           amap->am_ref--;         /* drop final reference to map */
> > +           amap->am_flgref--;      /* drop final reference to map */
> >             amap_free(amap);        /* dispose of new (unused) amap */
> >             return;
> >     }
> >  
> >     /* we must copy it now. */
> > +   anonp = AMAP_ANON(amap);
> > +   srcanonp = AMAP_ANON(srcamap);
> >     for (lcv = 0 ; lcv < slots; lcv++) {
> > -           amap->am_anon[lcv] =
> > -               srcamap->am_anon[entry->aref.ar_pageoff + lcv];
> > -           if (amap->am_anon[lcv] == NULL)
> > +           anonp[lcv] = srcanonp[entry->aref.ar_pageoff + lcv];
> > +           if (anonp[lcv] == NULL)
> >                     continue;
> > -           amap->am_anon[lcv]->an_ref++;
> > -           amap->am_bckptr[lcv] = amap->am_nused;
> > -           amap->am_slots[amap->am_nused] = lcv;
> > -           amap->am_nused++;
> > +           anonp[lcv]->an_ref++;
> > +           amap_fill_slot(amap, lcv);
> >     }
> > -   memset(&amap->am_anon[lcv], 0,
> > +   memset(&anonp[lcv], 0,
> >         (amap->am_maxslot - lcv) * sizeof(struct vm_anon *));
> >  
> >     /*
> > @@ -587,9 +801,10 @@ amap_copy(struct vm_map *map, struct vm_
> >      * one (we checked above), so there is no way we could drop
> >      * the count to zero.  [and no need to worry about freeing it]
> >      */
> > -   srcamap->am_ref--;
> > -   if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
> > -           srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
> > +   srcamap->am_flgref--;
> > +   if (amap_refs(srcamap) == 1 &&
> > +       (amap_flags(srcamap) & AMAP_SHARED) != 0)
> > +           srcamap->am_flgref &= ~AMAP_SHARED;   /* clear shared flag */
> >  #ifdef UVM_AMAP_PPREF
> >     if (srcamap->am_ppref && srcamap->am_ppref != PPREF_NONE) {
> >             amap_pp_adjref(srcamap, entry->aref.ar_pageoff, 
> > @@ -605,38 +820,18 @@ amap_copy(struct vm_map *map, struct vm_
> >     amap_list_insert(amap);
> >  }
> >  
> > -/*
> > - * amap_cow_now: resolve all copy-on-write faults in an amap now for 
> > fork(2)
> > - *
> > - * called during fork(2) when the parent process has a wired map
> > - * entry.   in that case we want to avoid write-protecting pages
> > - * in the parent's map (e.g. like what you'd do for a COW page)
> > - * so we resolve the COW here.
> > - *
> > - * => assume parent's entry was wired, thus all pages are resident.
> > - * => caller passes child's map/entry in to us
> > - * => XXXCDC: out of memory should cause fork to fail, but there is
> > - * currently no easy way to do this (needs fix)
> > - */
> > -
> > -void
> > -amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
> > +int
> > +amap_cow_now_traverse(struct vm_anon **anonp, u_int map, u_int slotoff)
> >  {
> > -   struct vm_amap *amap = entry->aref.ar_amap;
> > -   int lcv, slot;
> > +   u_int lcv, slot;
> >     struct vm_anon *anon, *nanon;
> >     struct vm_page *pg, *npg;
> >  
> > -   /*
> > -    * note that if we wait, we must ReStart the "lcv" for loop because
> > -    * some other process could reorder the anon's in the
> > -    * am_anon[] array on us.
> > -    */
> > -ReStart:
> > -   for (lcv = 0 ; lcv < amap->am_nused ; lcv++) {
> > -           /* get the page */
> > -           slot = amap->am_slots[lcv];
> > -           anon = amap->am_anon[slot];
> > +   for (lcv = ffs(map); lcv != 0; lcv = ffs(map)) {
> > +           slot = lcv - 1;
> > +           map &= ~(1 << slot);
> > +
> > +           anon = anonp[slotoff + slot];
> >             pg = anon->an_page;
> >  
> >             /* page must be resident since parent is wired */
> > @@ -656,7 +851,7 @@ ReStart:
> >                     if (pg->pg_flags & PG_BUSY) {
> >                             atomic_setbits_int(&pg->pg_flags, PG_WANTED);
> >                             UVM_WAIT(pg, FALSE, "cownow", 0);
> > -                           goto ReStart;
> > +                           return 1;
> >                     }
> >  
> >                     /* ok, time to do a copy-on-write to a new anon */
> > @@ -676,16 +871,16 @@ ReStart:
> >                                     uvm_anfree(nanon);
> >                             }
> >                             uvm_wait("cownowpage");
> > -                           goto ReStart;
> > +                           return 1;
> >                     }
> > -   
> > +
> >                     /*
> >                      * got it... now we can copy the data and replace anon
> >                      * with our new one...
> >                      */
> >                     uvm_pagecopy(pg, npg);          /* old -> new */
> >                     anon->an_ref--;                 /* can't drop to zero */
> > -                   amap->am_anon[slot] = nanon;    /* replace */
> > +                   anonp[slotoff + slot] = nanon;  /* replace */
> >  
> >                     /*
> >                      * drop PG_BUSY on new page ... since we have had its
> > @@ -699,6 +894,52 @@ ReStart:
> >                     uvm_unlock_pageq();
> >             }
> >     }
> > +   return 0;
> > +}
> > +
> > +/*
> > + * amap_cow_now: resolve all copy-on-write faults in an amap now for 
> > fork(2)
> > + *
> > + * called during fork(2) when the parent process has a wired map
> > + * entry.   in that case we want to avoid write-protecting pages
> > + * in the parent's map (e.g. like what you'd do for a COW page)
> > + * so we resolve the COW here.
> > + *
> > + * => assume parent's entry was wired, thus all pages are resident.
> > + * => caller passes child's map/entry in to us
> > + * => XXXCDC: out of memory should cause fork to fail, but there is
> > + * currently no easy way to do this (needs fix)
> > + */
> > +
> > +void
> > +amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
> > +{
> > +   struct vm_amap *amap = entry->aref.ar_amap;
> > +   u_int i, slotoff;
> > +
> > +   /*
> > +    * note that if we wait, we must ReStart the "lcv" for loop because
> > +    * some other process could reorder the anon's in the
> > +    * am_anon[] array on us.
> > +    */
> > +ReStart:
> > +   if (amap->am_maxslot == 1) {
> > +           if (amap->am_diranon != NULL) {
> > +                   if (amap_cow_now_traverse(&amap->am_diranon, 1, 0))
> > +                           goto ReStart;
> > +           }
> > +   } else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL) {
> > +           if (amap_cow_now_traverse(amap->am_anon, amap->am_nused, 0))
> > +                   goto ReStart;
> > +   } else {
> > +           for (i = 0; i < amap->am_meta->am_nused; i++) {
> > +                   slotoff = AMAP_C2S(
> > +                       amap->am_meta->am_clust[i].ac_clust);
> > +                   if (amap_cow_now_traverse(amap->am_meta->am_anon,
> > +                       amap->am_meta->am_clust[i].ac_map, slotoff))
> > +                           goto ReStart;
> > +           }
> > +   }
> >  }
> >  
> >  /*
> > @@ -726,7 +967,7 @@ amap_splitref(struct vm_aref *origref, s
> >  #endif
> >  
> >     splitref->ar_amap = origref->ar_amap;
> > -   splitref->ar_amap->am_ref++;            /* not a share reference */
> > +   splitref->ar_amap->am_flgref++;         /* not a share reference */
> >     splitref->ar_pageoff = origref->ar_pageoff + leftslots;
> >  }
> >  
> > @@ -749,7 +990,7 @@ amap_pp_establish(struct vm_amap *amap)
> >     }
> >  
> >     /* init ppref */
> > -   pp_setreflen(amap->am_ppref, 0, amap->am_ref, amap->am_nslot);
> > +   pp_setreflen(amap->am_ppref, 0, amap_refs(amap), amap->am_nslot);
> >  }
> >  
> >  /*
> > @@ -829,66 +1070,110 @@ amap_pp_adjref(struct vm_amap *amap, int
> >  void
> >  amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
> >  {
> > -   int byanon, lcv, stop, curslot, ptr, slotend;
> > +   u_int i, clustslot, lcv, map, mask, curslot, slotend, slotbase, ptr;
> >     struct vm_anon *anon;
> >  
> > +   /* Fast path for anons with only a few slots */
> > +   if (amap->am_maxslot == 1) {
> > +           anon = amap->am_diranon;
> > +           if (anon != NULL) {
> > +                   amap->am_diranon = NULL;
> > +                   amap->am_nused = 0;
> > +                   amap_anon_release(anon);
> > +           }
> > +           return;
> > +   } else if (amap->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL) {
> > +           mask = ((1 << slots) - 1) << slotoff;
> > +           map = amap->am_nused & mask;
> > +
> > +           for (lcv = ffs(map); lcv != 0; lcv = ffs(map)) {
> > +                   curslot = lcv - 1;
> > +                   map &= ~(1 << curslot);
> > +                   anon = amap->am_anon[curslot];
> > +                   amap->am_anon[curslot] = NULL;
> > +                   amap->am_nused &= ~(1 << curslot);
> > +                   amap_anon_release(anon);
> > +           }
> > +           return;
> > +   }
> > +
> >     /*
> > -    * we can either traverse the amap by am_anon or by am_slots depending
> > +    * we can either traverse the amap by am_anon or by am_clust depending
> >      * on which is cheaper.    decide now.
> >      */
> >     if (slots < amap->am_nused) {
> > -           byanon = TRUE;
> > -           lcv = slotoff;
> > -           stop = slotoff + slots;
> > -   } else {
> > -           byanon = FALSE;
> > -           lcv = 0;
> > -           stop = amap->am_nused;
> > -           slotend = slotoff + slots;
> > -   }
> > -
> > -   while (lcv < stop) {
> > -           int refs;
> > -
> > -           if (byanon) {
> > -                   curslot = lcv++;        /* lcv advances here */
> > -                   if (amap->am_anon[curslot] == NULL)
> > +           for (curslot = slotoff; curslot < slotoff + slots; curslot++) {
> > +                   if (amap->am_meta->am_anon[curslot] == NULL)
> >                             continue;
> > -           } else {
> > -                   curslot = amap->am_slots[lcv];
> > -                   if (curslot < slotoff || curslot >= slotend) {
> > -                           lcv++;          /* lcv advances here */
> > -                           continue;
> > -                   }
> > -                   stop--; /* drop stop, since anon will be removed */
> > +                   amap_normal_wipe_slot(amap, curslot);
> >             }
> > -           anon = amap->am_anon[curslot];
> > +           return;
> > +   }
> >  
> > -           /* remove it from the amap */
> > -           amap->am_anon[curslot] = NULL;
> > -           ptr = amap->am_bckptr[curslot];
> > -           if (ptr != (amap->am_nused - 1)) {
> > -                   amap->am_slots[ptr] =
> > -                       amap->am_slots[amap->am_nused - 1];
> > -                   amap->am_bckptr[amap->am_slots[ptr]] =
> > -                       ptr;    /* back ptr. */
> > -           }
> > -           amap->am_nused--;
> > -
> > -           /* drop anon reference count */
> > -           refs = --anon->an_ref;
> > -           if (refs == 0) {
> > -                   /*
> > -                    * we just eliminated the last reference to an anon.
> > -                    * free it.
> > -                    */
> > -                   uvm_anfree(anon);
> > +   slotend = slotoff + slots;
> > +   clustslot = AMAP_SLOTCHUNK(slotoff);
> > +   for (i = AMAP_S2C(slotoff); i <= AMAP_S2C(slotend);
> > +       i++, clustslot = 0) {
> > +           ptr = amap->am_meta->am_bckptr[i];
> > +           if (ptr >= amap->am_meta->am_nused ||
> > +               amap->am_meta->am_clust[ptr].ac_clust != i)
> > +                   continue;
> > +
> > +           slotbase = AMAP_C2S(i);
> > +           if (slotend - slotbase - clustslot >= AMAP_SLOTPERCLUST)
> > +                   mask = ~(u_int)0;
> > +           else
> > +                   mask = (1 << (slotend - slotbase - clustslot)) - 1;
> > +           mask <<= clustslot;
> > +
> > +           map = amap->am_meta->am_clust[ptr].ac_map & mask;
> > +           for (lcv = ffs(map); lcv != 0; lcv = ffs(map)) {
> > +                   lcv--;
> > +                   map &= ~(1 << lcv);
> > +                   curslot = slotbase + lcv;
> > +
> > +                   KASSERT(curslot >= slotoff && curslot < slotend);
> > +                   amap_normal_wipe_slot(amap, curslot);
> >             }
> >     }
> >  }
> >  
> >  #endif
> >  
> > +int
> > +amap_swap_off_traverse(struct vm_amap *am, struct vm_anon **anonp, u_int 
> > map,
> > +    u_int slotoff, int startslot, int endslot)
> > +{
> > +   int err = 0;
> > +   u_int lcv, slot, swslot;
> > +   struct vm_anon *anon;
> > +   boolean_t rv;
> > +
> > +   for (lcv = ffs(map); lcv != 0; lcv = ffs(map)) {
> > +           slot = lcv - 1;
> > +           map &= ~(1 << slot);
> > +           anon = anonp[slotoff + slot];
> > +
> > +           swslot = anon->an_swslot;
> > +           if (swslot < startslot || endslot <= swslot) {
> > +                   continue;
> > +           }
> > +
> > +           am->am_flgref |= AMAP_SWAPOFF;
> > +           rv = uvm_anon_pagein(anon);
> > +           am->am_flgref &= ~AMAP_SWAPOFF;
> > +
> > +           err = EAGAIN;
> > +           if (amap_refs(am) == 0)
> > +                   err = ENOENT;
> > +           if (rv)
> > +                   err = EIO;
> > +           break;
> > +   }
> > +
> > +   return err;
> > +}
> > +
> >  /*
> >   * amap_swap_off: pagein anonymous pages in amaps and drop swap slots.
> >   *
> > @@ -902,59 +1187,40 @@ amap_swap_off(int startslot, int endslot
> >  {
> >     struct vm_amap *am;
> >     struct vm_amap *am_next;
> > -   struct vm_amap marker_prev;
> > -   struct vm_amap marker_next;
> > -   boolean_t rv = FALSE;
> > -
> > -#if defined(DIAGNOSTIC)
> > -   memset(&marker_prev, 0, sizeof(marker_prev));
> > -   memset(&marker_next, 0, sizeof(marker_next));
> > -#endif /* defined(DIAGNOSTIC) */
> > -
> > -   for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
> > -           int i;
> > -
> > -           LIST_INSERT_BEFORE(am, &marker_prev, am_list);
> > -           LIST_INSERT_AFTER(am, &marker_next, am_list);
> > -
> > -           for (i = 0; i < am->am_nused; i++) {
> > -                   int slot;
> > -                   int swslot;
> > -                   struct vm_anon *anon;
> > +   int err = 0;
> > +   u_int i, slotoff;
> >  
> > -                   slot = am->am_slots[i];
> > -                   anon = am->am_anon[slot];
> > -
> > -                   swslot = anon->an_swslot;
> > -                   if (swslot < startslot || endslot <= swslot) {
> > -                           continue;
> > +   for (am = LIST_FIRST(&amap_list); am != NULL; am = am_next) {
> > +           if (am->am_maxslot == 1) {
> > +                   if (am->am_diranon != NULL)
> > +                           err = amap_swap_off_traverse(am,
> > +                               &am->am_diranon, 1, 0, startslot, endslot);
> > +           } else if (am->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL)
> > +                   err = amap_swap_off_traverse(am, am->am_anon,
> > +                       am->am_nused, 0, startslot, endslot);
> > +           else {
> > +                   for (i = 0; !err && i < am->am_meta->am_nused; i++) {
> > +                           slotoff = AMAP_C2S(
> > +                               am->am_meta->am_clust[i].ac_clust);
> > +                           err = amap_swap_off_traverse(am,
> > +                               am->am_meta->am_anon,
> > +                               am->am_meta->am_clust[i].ac_map, slotoff,
> > +                               startslot, endslot);
> >                     }
> > +           }
> >  
> > -                   am->am_flags |= AMAP_SWAPOFF;
> > -
> > -                   rv = uvm_anon_pagein(anon);
> > -
> > -                   am->am_flags &= ~AMAP_SWAPOFF;
> > -                   if (amap_refs(am) == 0) {
> > +           if (err == EIO)
> > +                   return TRUE;
> > +           if (err == EAGAIN)
> > +                   am_next = am;
> > +           else {
> > +                   am_next = LIST_NEXT(am, am_list);
> > +                   if (err == ENOENT)
> >                             amap_wipeout(am);
> > -                           am = NULL;
> > -                           break;
> > -                   }
> > -                   if (rv) {
> > -                           break;
> > -                   }
> > -                   i = 0;
> >             }
> > -
> > -           KASSERT(LIST_NEXT(&marker_prev, am_list) == &marker_next ||
> > -               LIST_NEXT(LIST_NEXT(&marker_prev, am_list), am_list) ==
> > -               &marker_next);
> > -           am_next = LIST_NEXT(&marker_next, am_list);
> > -           LIST_REMOVE(&marker_prev, am_list);
> > -           LIST_REMOVE(&marker_next, am_list);
> >     }
> >  
> > -   return rv;
> > +   return FALSE;
> >  }
> >  
> >  /*
> > @@ -963,7 +1229,7 @@ amap_swap_off(int startslot, int endslot
> >  struct vm_anon *
> >  amap_lookup(struct vm_aref *aref, vaddr_t offset)
> >  {
> > -   int slot;
> > +   u_int slot;
> >     struct vm_amap *amap = aref->ar_amap;
> >  
> >     AMAP_B2SLOT(slot, offset);
> > @@ -972,7 +1238,7 @@ amap_lookup(struct vm_aref *aref, vaddr_
> >     if (slot >= amap->am_nslot)
> >             panic("amap_lookup: offset out of range");
> >  
> > -   return(amap->am_anon[slot]);
> > +   return(AMAP_ANON(amap)[slot]);
> >  }
> >  
> >  /*
> > @@ -984,7 +1250,7 @@ void
> >  amap_lookups(struct vm_aref *aref, vaddr_t offset,
> >      struct vm_anon **anons, int npages)
> >  {
> > -   int slot;
> > +   u_int slot;
> >     struct vm_amap *amap = aref->ar_amap;
> >  
> >     AMAP_B2SLOT(slot, offset);
> > @@ -993,9 +1259,8 @@ amap_lookups(struct vm_aref *aref, vaddr
> >     if ((slot + (npages - 1)) >= amap->am_nslot)
> >             panic("amap_lookups: offset out of range");
> >  
> > -   memcpy(anons, &amap->am_anon[slot], npages * sizeof(struct vm_anon *));
> > -
> > -   return;
> > +   memcpy(anons, &AMAP_ANON(amap)[slot],
> > +       npages * sizeof(struct vm_anon *));
> >  }
> >  
> >  /*
> > @@ -1007,8 +1272,9 @@ void
> >  amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
> >      boolean_t replace)
> >  {
> > -   int slot;
> > +   u_int slot;
> >     struct vm_amap *amap = aref->ar_amap;
> > +   struct vm_anon **anonp = AMAP_ANON(amap);
> >  
> >     AMAP_B2SLOT(slot, offset);
> >     slot += aref->ar_pageoff;
> > @@ -1017,25 +1283,22 @@ amap_add(struct vm_aref *aref, vaddr_t o
> >             panic("amap_add: offset out of range");
> >  
> >     if (replace) {
> > -           if (amap->am_anon[slot] == NULL)
> > +           if (anonp[slot] == NULL)
> >                     panic("amap_add: replacing null anon");
> > -           if (amap->am_anon[slot]->an_page != NULL && 
> > -               (amap->am_flags & AMAP_SHARED) != 0) {
> > -                   pmap_page_protect(amap->am_anon[slot]->an_page,
> > -                       PROT_NONE);
> > +           if (anonp[slot]->an_page != NULL &&
> > +               (amap_flags(amap) & AMAP_SHARED) != 0) {
> > +                   pmap_page_protect(anonp[slot]->an_page, PROT_NONE);
> >                     /*
> >                      * XXX: suppose page is supposed to be wired somewhere?
> >                      */
> >             }
> >     } else {   /* !replace */
> > -           if (amap->am_anon[slot] != NULL)
> > +           if (anonp[slot] != NULL)
> >                     panic("amap_add: slot in use");
> >  
> > -           amap->am_bckptr[slot] = amap->am_nused;
> > -           amap->am_slots[amap->am_nused] = slot;
> > -           amap->am_nused++;
> > +           amap_fill_slot(amap, slot);
> >     }
> > -   amap->am_anon[slot] = anon;
> > +   anonp[slot] = anon;
> >  }
> >  
> >  /*
> > @@ -1044,8 +1307,9 @@ amap_add(struct vm_aref *aref, vaddr_t o
> >  void
> >  amap_unadd(struct vm_aref *aref, vaddr_t offset)
> >  {
> > -   int ptr, slot;
> > +   u_int slot;
> >     struct vm_amap *amap = aref->ar_amap;
> > +   struct vm_anon **anonp = AMAP_ANON(amap);
> >  
> >     AMAP_B2SLOT(slot, offset);
> >     slot += aref->ar_pageoff;
> > @@ -1053,17 +1317,11 @@ amap_unadd(struct vm_aref *aref, vaddr_t
> >     if (slot >= amap->am_nslot)
> >             panic("amap_unadd: offset out of range");
> >  
> > -   if (amap->am_anon[slot] == NULL)
> > +   if (anonp[slot] == NULL)
> >             panic("amap_unadd: nothing there");
> >  
> > -   amap->am_anon[slot] = NULL;
> > -   ptr = amap->am_bckptr[slot];
> > -
> > -   if (ptr != (amap->am_nused - 1)) {      /* swap to keep slots contig? */
> > -           amap->am_slots[ptr] = amap->am_slots[amap->am_nused - 1];
> > -           amap->am_bckptr[amap->am_slots[ptr]] = ptr;     /* back link */
> > -   }
> > -   amap->am_nused--;
> > +   anonp[slot] = NULL;
> > +   amap_clear_slot(amap, slot);
> >  }
> >  
> >  /*
> > @@ -1076,9 +1334,9 @@ void
> >  amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
> >  {
> >  
> > -   amap->am_ref++;
> > +   amap->am_flgref++;
> >     if (flags & AMAP_SHARED)
> > -           amap->am_flags |= AMAP_SHARED;
> > +           amap->am_flgref |= AMAP_SHARED;
> >  #ifdef UVM_AMAP_PPREF
> >     if (amap->am_ppref == NULL && (flags & AMAP_REFALL) == 0 &&
> >         len != amap->am_nslot)
> > @@ -1105,14 +1363,15 @@ amap_unref(struct vm_amap *amap, vaddr_t
> >  {
> >  
> >     /* if we are the last reference, free the amap and return. */
> > -   if (amap->am_ref-- == 1) {
> > +   amap->am_flgref--;
> > +   if (amap_refs(amap) == 0) {
> >             amap_wipeout(amap);     /* drops final ref and frees */
> >             return;
> >     }
> >  
> >     /* otherwise just drop the reference count(s) */
> > -   if (amap->am_ref == 1 && (amap->am_flags & AMAP_SHARED) != 0)
> > -           amap->am_flags &= ~AMAP_SHARED; /* clear shared flag */
> > +   if (amap_refs(amap) == 1 && (amap_flags(amap) & AMAP_SHARED) != 0)
> > +           amap->am_flgref &= ~AMAP_SHARED;        /* clear shared flag */
> >  #ifdef UVM_AMAP_PPREF
> >     if (amap->am_ppref == NULL && all == 0 && len != amap->am_nslot)
> >             amap_pp_establish(amap);
> > Index: uvm/uvm_amap.h
> > ===================================================================
> > RCS file: /cvs/src/sys/uvm/uvm_amap.h,v
> > retrieving revision 1.21
> > diff -u -p -r1.21 uvm_amap.h
> > --- uvm/uvm_amap.h  6 Mar 2016 14:47:07 -0000       1.21
> > +++ uvm/uvm_amap.h  21 Mar 2016 18:53:45 -0000
> > @@ -98,15 +98,17 @@ void            amap_unref(struct vm_amap *, vaddr
> >  void               amap_wipeout(struct vm_amap *);
> >  boolean_t  amap_swap_off(int, int);
> >  
> > +#endif /* _KERNEL */
> > +
> >  /*
> >   * amap flag values
> >   */
> >  
> > -#define AMAP_SHARED        0x1     /* amap is shared */
> > -#define AMAP_REFALL        0x2     /* amap_ref: reference entire amap */
> > -#define AMAP_SWAPOFF       0x4     /* amap_swap_off() is in progress */
> > +#define AMAP_SHARED        0x80000000U     /* amap is shared */
> > +#define AMAP_REFALL        0x40000000U     /* amap_ref: reference entire 
> > amap */
> > +#define AMAP_SWAPOFF       0x20000000U     /* amap_swap_off() is in 
> > progress */
> >  
> > -#endif /* _KERNEL */
> > +#define AMAP_FLAGMASK      0xe0000000U
> >  
> >  /**********************************************************************/
> >  
> > @@ -123,52 +125,80 @@ boolean_t     amap_swap_off(int, int);
> >  
> >  #define UVM_AMAP_PPREF             /* track partial references */
> >  
> > +struct vm_amap_meta;
> > +
> >  /*
> >   * here is the definition of the vm_amap structure for this implementation.
> >   */
> >  
> > +/*
> > + * amaps come in three flavors: tiny, small, and normal:
> > + * - tiny amaps contain exactly one slot and embed the pointer
> > + *   to the anon directly (one slot = an entry in the amap to
> > + *   represent a page)
> > + * - small amaps contain up to UVM_AMAP_MAXSLOT_SMALL slots and store
> > + *   a pointer to an array of anons.
> > + * - normal amaps contain more than UVM_AMAP_MAXSLOT_SMALL slots
> > + *   and store a pointer to meta information. The meta data
> > + *   contains an array of anons and additional information to
> > + *   keep track of used slots efficiently.
> > + *
> > + * For tiny amaps, am_nused is either 0 or 1, depending on whether
> > + * the anon is present.
> > + * For small amaps, am_used is a bitmap of the slots that are used.
> > + *
> > + * For normal amaps, am_nused is the number of occupied slots.
> > + * In normal amaps, these slots are packed into clusters of 
> > AMAP_SLOTPERCLUST
> > + * slots each. For each cluster, there is a bitmap that represents which
> > + * slots are occupied.
> > + *
> > + * The clusters are organized as an array. This allows fast mapping of
> > + * a pages to their cluster. In order to be able all occupied slots in an
> > + * amap quickly, non-empty clusters are packed in array am_clusts.
> > + * am_nused in the meta data contains the number of entries in am_clusts.
> > + *
> > + * In order to find the cluster for a given page, am_bckptr is used to
> > + * map a cluster to a position in am_clusts.
> > + *
> > + * For example, an amap with 12 slots and clusters of 4 slots each,
> > + * assume that the slots 7, 5, 3 got filled in that order.
> > + * Then the amap meta data would look like this:
> > + *
> > + * am_nused: 2
> > + * am_clusts: { 4, 0xc }, { 0, 0x8 }
> > + * am_bckptr: 1, 0, (invalid)
> > + */
> >  struct vm_amap {
> > -   int am_ref;             /* reference count */
> > -   int am_flags;           /* flags */
> > -   int am_maxslot;         /* max # of slots allocated */
> > -   int am_nslot;           /* # of slots currently in map ( <= maxslot) */
> > -   int am_nused;           /* # of slots currently in use */
> > -   int *am_slots;          /* contig array of active slots */
> > -   int *am_bckptr;         /* back pointer array to am_slots */
> > -   struct vm_anon **am_anon; /* array of anonymous pages */
> > +   u_int am_flgref;        /* flags and reference count */
> > +   u_int am_maxslot;       /* max # of slots allocated */
> > +   u_int am_nslot;         /* # of slots currently in map ( <= maxslot) */
> > +   u_int am_nused;         /* # of slots currently in use */
> > +
> > +   union { /* fields used in the tiny, small or normal amap flavor */
> > +           struct vm_anon *am_diranon;     /* tiny: pointer to anon */
> > +           struct vm_anon **am_anon;       /* small: array of anons */
> > +           struct vm_amap_meta *am_meta;   /* normal: meta data */
> > +   };
> > +
> >  #ifdef UVM_AMAP_PPREF
> >     int *am_ppref;          /* per page reference count (if !NULL) */
> >  #endif
> >     LIST_ENTRY(vm_amap) am_list;
> >  };
> >  
> > -/*
> > - * note that am_slots, am_bckptr, and am_anon are arrays.   this allows
> > - * fast lookup of pages based on their virual address at the expense of
> > - * some extra memory.   in the future we should be smarter about memory
> > - * usage and fall back to a non-array based implementation on systems 
> > - * that are short of memory (XXXCDC).
> > - *
> > - * the entries in the array are called slots... for example an amap that
> > - * covers four pages of virtual memory is said to have four slots.   here
> > - * is an example of the array usage for a four slot amap.   note that only
> > - * slots one and three have anons assigned to them.  "D/C" means that we
> > - * "don't care" about the value.
> > - * 
> > - *            0     1      2     3
> > - * am_anon:   NULL, anon0, NULL, anon1             (actual pointers to 
> > anons)
> > - * am_bckptr: D/C,  1,     D/C,  0         (points to am_slots entry)
> > - *
> > - * am_slots:  3, 1, D/C, D/C               (says slots 3 and 1 are in use)
> > - * 
> > - * note that am_bckptr is D/C if the slot in am_anon is set to NULL.
> > - * to find the entry in am_slots for an anon, look at am_bckptr[slot],
> > - * thus the entry for slot 3 in am_slots[] is at am_slots[am_bckptr[3]].
> > - * in general, if am_anon[X] is non-NULL, then the following must be
> > - * true: am_slots[am_bckptr[X]] == X
> > - *
> > - * note that am_slots is always contig-packed.
> > - */
> > +struct vm_amap_clust {
> > +   u_int ac_clust;
> > +   u_int ac_map;
> > +};
> > +
> > +struct vm_amap_meta {
> > +   struct vm_anon **am_anon;
> > +   struct vm_amap_clust *am_clust;
> > +   u_int *am_bckptr;
> > +   u_int am_nused;
> > +};
> > +
> > +#define UVM_AMAP_MAXSLOT_SMALL     32
> >  
> >  /*
> >   * defines for handling of large sparce amaps:
> > @@ -210,6 +240,13 @@ struct vm_amap {
> >  #define UVM_AMAP_LARGE     256     /* # of slots in "large" amap */
> >  #define UVM_AMAP_CHUNK     16      /* # of slots to chunk large amaps in */
> >  
> > +/*
> > + * flags and reference count macros
> > + */
> > +
> > +#define amap_flags(AMAP)   ((AMAP)->am_flgref & AMAP_FLAGMASK)
> > +#define amap_refs(AMAP)            ((AMAP)->am_flgref & ~AMAP_FLAGMASK)
> > +
> >  #ifdef _KERNEL
> >  
> >  /*
> > @@ -222,12 +259,17 @@ struct vm_amap {
> >     (S) = (B) >> PAGE_SHIFT;                                        \
> >  }
> >  
> > -/*
> > - * flags macros
> > - */
> > +#define AMAP_SLOTPERCLUST  32
> > +
> > +#define AMAP_S2C(slot)     ((slot) / AMAP_SLOTPERCLUST)
> > +#define AMAP_C2S(clust)    ((clust) * AMAP_SLOTPERCLUST)
> > +
> > +#define AMAP_SLOTCHUNK(slot)       ((slot) % AMAP_SLOTPERCLUST)
> >  
> > -#define amap_flags(AMAP)   ((AMAP)->am_flags)
> > -#define amap_refs(AMAP)            ((AMAP)->am_ref)
> > +#define AMAP_ANON(amap)                                                    
> > \
> > +    ((amap)->am_maxslot == 1 ? &(amap)->am_diranon :                       
> > \
> > +     ((amap)->am_maxslot <= UVM_AMAP_MAXSLOT_SMALL ? (amap)->am_anon :     
> > \
> > +      (amap)->am_meta->am_anon))
> >  
> >  /*
> >   * if we enable PPREF, then we have a couple of extra functions that
> > Index: usr.sbin//procmap/procmap.c
> > ===================================================================
> > RCS file: /cvs/src/usr.sbin/procmap/procmap.c,v
> > retrieving revision 1.59
> > diff -u -p -r1.59 procmap.c
> > --- usr.sbin//procmap/procmap.c     19 Jan 2015 19:25:28 -0000      1.59
> > +++ usr.sbin//procmap/procmap.c     21 Mar 2016 18:51:49 -0000
> > @@ -785,16 +785,37 @@ dump_vm_map_entry(kvm_t *kd, struct kbit
> >     }
> >  
> >     if (print_amap && vme->aref.ar_amap) {
> > -           printf(" amap - ref: %d fl: 0x%x maxsl: %d nsl: %d nuse: %d\n",
> > -               D(amap, vm_amap)->am_ref,
> > -               D(amap, vm_amap)->am_flags,
> > -               D(amap, vm_amap)->am_maxslot,
> > -               D(amap, vm_amap)->am_nslot,
> > -               D(amap, vm_amap)->am_nused);
> > +           char *flags = "?";
> > +           u_int ref, maxslot, nslot, nused, map;
> > +
> > +           if (amap_flags(D(amap, vm_amap)) == (AMAP_SHARED|AMAP_SWAPOFF))
> > +                   flags = "shared|swapoff";
> > +           else if (amap_flags(D(amap, vm_amap)) == AMAP_SHARED)
> > +                   flags = "shared";
> > +           else if (amap_flags(D(amap, vm_amap)) == AMAP_SWAPOFF)
> > +                   flags = "swapoff";
> > +           else if (amap_flags(D(amap, vm_amap)) == 0)
> > +                   flags = "none";
> > +
> > +           nslot = D(amap, vm_amap)->am_nslot;
> > +           maxslot = D(amap, vm_amap)->am_maxslot;
> > +           if (maxslot <= UVM_AMAP_MAXSLOT_SMALL) {
> > +                   map = D(amap, vm_amap)->am_nused;
> > +                   nused = 0;
> > +                   while (map) {
> > +                           if (map & 1)
> > +                                   nused++;
> > +                           map >>= 1;
> > +                   }
> > +           } else
> > +                   nused = D(amap, vm_amap)->am_nused;
> > +
> > +           printf(" amap - ref: %u fl: %s maxsl: %u nsl: %u nuse: %u\n",
> > +               amap_refs(D(amap, vm_amap)), flags, maxslot, nslot, nused);
> >             if (sum) {
> > -                   sum->s_am_nslots += D(amap, vm_amap)->am_nslot;
> > -                   sum->s_am_maxslots += D(amap, vm_amap)->am_maxslot;
> > -                   sum->s_am_nusedslots += D(amap, vm_amap)->am_nused;
> > +                   sum->s_am_nslots += nslot;
> > +                   sum->s_am_maxslots += maxslot;
> > +                   sum->s_am_nusedslots += nused;
> >             }
> >     }
> >  
> > 
> > 

Reply via email to