The branch main has been updated by dougm:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=8df38859d0f92025540bcbe99c9a291a584327f2

commit 8df38859d0f92025540bcbe99c9a291a584327f2
Author:     Doug Moore <do...@freebsd.org>
AuthorDate: 2023-07-07 16:09:36 +0000
Commit:     Doug Moore <do...@freebsd.org>
CommitDate: 2023-07-07 16:09:36 +0000

    radix_trie: replace node count with popmap
    
    Replace the 'count' field in a trie node with a bitmap that
    identifies non-NULL children. Drop the 'last' field, and use the
    last bit set in the bitmap instead.  In lookup_le, lookup_ge,
    remove, and reclaim_all, use the bitmap to find the
    previous/next/only/every non-null child in constant time by
    examining the bitmask instead of looping across array elements
    and null-checking them one-by-one.
    
    A buildworld test suggests that this reduces the cycle count on
    those functions that eliminate some null-checks by 4.9%, 1.5%,
    0.0% and 13.3%.
    Reviewed by:    alc
    Tested by:      pho
    Differential Revision:  https://reviews.freebsd.org/D40775
---
 sys/kern/subr_pctrie.c | 192 +++++++++++++++++++++++--------------------------
 sys/vm/vm_radix.c      | 191 +++++++++++++++++++++++-------------------------
 2 files changed, 179 insertions(+), 204 deletions(-)

diff --git a/sys/kern/subr_pctrie.c b/sys/kern/subr_pctrie.c
index 0f28e5ebb2f1..cf09903556ec 100644
--- a/sys/kern/subr_pctrie.c
+++ b/sys/kern/subr_pctrie.c
@@ -67,6 +67,18 @@ __FBSDID("$FreeBSD$");
 #define        PCTRIE_MASK     (PCTRIE_COUNT - 1)
 #define        PCTRIE_LIMIT    (howmany(sizeof(uint64_t) * NBBY, PCTRIE_WIDTH) 
- 1)
 
+#if PCTRIE_WIDTH == 3
+typedef uint8_t pn_popmap_t;
+#elif PCTRIE_WIDTH == 4
+typedef uint16_t pn_popmap_t;
+#elif PCTRIE_WIDTH == 5
+typedef uint32_t pn_popmap_t;
+#else
+#error Unsupported width
+#endif
+_Static_assert(sizeof(pn_popmap_t) <= sizeof(int),
+    "pn_popmap_t too wide");
+
 /* Flag bits stored in node pointers. */
 #define        PCTRIE_ISLEAF   0x1
 #define        PCTRIE_FLAGS    0x1
@@ -81,9 +93,8 @@ typedef SMR_POINTER(struct pctrie_node *) smr_pctnode_t;
 
 struct pctrie_node {
        uint64_t        pn_owner;                       /* Owner of record. */
-       uint16_t        pn_count;                       /* Valid children. */
+       pn_popmap_t     pn_popmap;                      /* Valid children. */
        uint8_t         pn_clev;                        /* Current level. */
-       int8_t          pn_last;                        /* Zero last ptr. */
        smr_pctnode_t   pn_child[PCTRIE_COUNT];         /* Child nodes. */
 };
 
@@ -127,13 +138,12 @@ pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t 
allocfn, uint64_t index,
         * has exited so lookup can not return false negatives.  It is done
         * here because it will be cache-cold in the dtor callback.
         */
-       if (node->pn_last != 0) {
-               pctrie_node_store(&node->pn_child[node->pn_last - 1], NULL,
-                   PCTRIE_UNSERIALIZED);
-               node->pn_last = 0;
+       if (node->pn_popmap != 0) {
+               pctrie_node_store(&node->pn_child[ffs(node->pn_popmap) - 1],
+                   NULL, PCTRIE_UNSERIALIZED);
+               node->pn_popmap = 0;
        }
        node->pn_owner = pctrie_trimkey(index, clevel + 1);
-       node->pn_count = 2;
        node->pn_clev = clevel;
        return (node);
 }
@@ -143,22 +153,21 @@ pctrie_node_get(struct pctrie *ptree, pctrie_alloc_t 
allocfn, uint64_t index,
  */
 static __inline void
 pctrie_node_put(struct pctrie *ptree, struct pctrie_node *node,
-    pctrie_free_t freefn, int8_t last)
+    pctrie_free_t freefn)
 {
 #ifdef INVARIANTS
        int slot;
 
-       KASSERT(node->pn_count == 0,
-           ("pctrie_node_put: node %p has %d children", node,
-           node->pn_count));
+       KASSERT(powerof2(node->pn_popmap),
+           ("pctrie_node_put: node %p has too many children %04x", node,
+           node->pn_popmap));
        for (slot = 0; slot < PCTRIE_COUNT; slot++) {
-               if (slot == last)
+               if ((node->pn_popmap & (1 << slot)) != 0)
                        continue;
                KASSERT(smr_unserialized_load(&node->pn_child[slot], true) ==
                    NULL, ("pctrie_node_put: node %p has a child", node));
        }
 #endif
-       node->pn_last = last + 1;
        freefn(ptree, node);
 }
 
@@ -258,6 +267,9 @@ pctrie_addval(struct pctrie_node *node, uint64_t index, 
uint16_t clev,
        slot = pctrie_slot(index, clev);
        pctrie_node_store(&node->pn_child[slot],
            pctrie_toleaf(val), access);
+       node->pn_popmap ^= 1 << slot;
+       KASSERT((node->pn_popmap & (1 << slot)) != 0,
+           ("%s: bad popmap slot %d in node %p", __func__, slot, node));
 }
 
 /*
@@ -305,20 +317,19 @@ pctrie_reclaim_allnodes_int(struct pctrie *ptree, struct 
pctrie_node *node,
        struct pctrie_node *child;
        int slot;
 
-       KASSERT(node->pn_count <= PCTRIE_COUNT,
-           ("pctrie_reclaim_allnodes_int: bad count in node %p", node));
-       for (slot = 0; node->pn_count != 0; slot++) {
+       while (node->pn_popmap != 0) {
+               slot = ffs(node->pn_popmap) - 1;
                child = pctrie_node_load(&node->pn_child[slot], NULL,
                    PCTRIE_UNSERIALIZED);
-               if (child == NULL)
-                       continue;
+               KASSERT(child != NULL, ("%s: bad popmap slot %d in node %p",
+                   __func__, slot, node));
                if (!pctrie_isleaf(child))
                        pctrie_reclaim_allnodes_int(ptree, child, freefn);
+               node->pn_popmap ^= 1 << slot;
                pctrie_node_store(&node->pn_child[slot], NULL,
                    PCTRIE_UNSERIALIZED);
-               node->pn_count--;
        }
-       pctrie_node_put(ptree, node, freefn, -1);
+       pctrie_node_put(ptree, node, freefn);
 }
 
 /*
@@ -330,7 +341,7 @@ pctrie_zone_init(void *mem, int size __unused, int flags 
__unused)
        struct pctrie_node *node;
 
        node = mem;
-       node->pn_last = 0;
+       node->pn_popmap = 0;
        memset(node->pn_child, 0, sizeof(node->pn_child));
        return (0);
 }
@@ -391,7 +402,6 @@ pctrie_insert(struct pctrie *ptree, uint64_t *val, 
pctrie_alloc_t allocfn)
                parentp = &node->pn_child[slot];
                tmp = pctrie_node_load(parentp, NULL, PCTRIE_LOCKED);
                if (tmp == NULL) {
-                       node->pn_count++;
                        pctrie_addval(node, index, node->pn_clev, val,
                            PCTRIE_LOCKED);
                        return (0);
@@ -413,6 +423,7 @@ pctrie_insert(struct pctrie *ptree, uint64_t *val, 
pctrie_alloc_t allocfn)
        /* These writes are not yet visible due to ordering. */
        pctrie_addval(tmp, index, clev, val, PCTRIE_UNSERIALIZED);
        pctrie_node_store(&tmp->pn_child[slot], node, PCTRIE_UNSERIALIZED);
+       tmp->pn_popmap ^= 1 << slot;
        /* Synchronize to make the above visible. */
        pctrie_node_store(parentp, tmp, PCTRIE_LOCKED);
 
@@ -483,7 +494,6 @@ uint64_t *
 pctrie_lookup_ge(struct pctrie *ptree, uint64_t index)
 {
        struct pctrie_node *stack[PCTRIE_LIMIT];
-       uint64_t inc;
        uint64_t *m;
        struct pctrie_node *child, *node;
 #ifdef INVARIANTS
@@ -552,35 +562,24 @@ ascend:
                } else if (child != NULL)
                        goto descend;
 
-               /*
-                * Look for an available edge or val within the current
-                * bisection node.
-                */
-                if (slot < (PCTRIE_COUNT - 1)) {
-                       inc = PCTRIE_UNITLEVEL(node->pn_clev);
-                       index = pctrie_trimkey(index, node->pn_clev);
-                       do {
-                               index += inc;
-                               slot++;
-                               child = pctrie_node_load(&node->pn_child[slot],
-                                   NULL, PCTRIE_LOCKED);
-                               if (pctrie_isleaf(child)) {
-                                       m = pctrie_toval(child);
-                                       KASSERT(*m >= index,
-                                           ("pctrie_lookup_ge: leaf < index"));
-                                       return (m);
-                               } else if (child != NULL)
-                                       goto descend;
-                       } while (slot < (PCTRIE_COUNT - 1));
+               /* Find the first set bit beyond the first slot+1 bits. */
+               slot = ffs(node->pn_popmap & (-2 << slot)) - 1;
+               if (slot < 0) {
+                       /*
+                        * A value or edge greater than the search slot is not
+                        * found in the current node; ascend to the next
+                        * higher-level node.
+                        */
+                       goto ascend;
                }
-               KASSERT(child == NULL || pctrie_isleaf(child),
-                   ("pctrie_lookup_ge: child is radix node"));
-
-               /*
-                * If a value or edge greater than the search slot is not found
-                * in the current node, ascend to the next higher-level node.
-                */
-               goto ascend;
+               child = pctrie_node_load(&node->pn_child[slot],
+                   NULL, PCTRIE_LOCKED);
+               KASSERT(child != NULL, ("%s: bad popmap slot %d in node %p",
+                   __func__, slot, node));
+               if (pctrie_isleaf(child))
+                       return (pctrie_toval(child));
+               index = pctrie_trimkey(index, node->pn_clev + 1) +
+                   slot * PCTRIE_UNITLEVEL(node->pn_clev);
 descend:
                KASSERT(node->pn_clev > 0,
                    ("pctrie_lookup_ge: pushing leaf's parent"));
@@ -599,7 +598,6 @@ uint64_t *
 pctrie_lookup_le(struct pctrie *ptree, uint64_t index)
 {
        struct pctrie_node *stack[PCTRIE_LIMIT];
-       uint64_t inc;
        uint64_t *m;
        struct pctrie_node *child, *node;
 #ifdef INVARIANTS
@@ -670,35 +668,22 @@ ascend:
                } else if (child != NULL)
                        goto descend;
 
-               /*
-                * Look for an available edge or value within the current
-                * bisection node.
-                */
-               if (slot > 0) {
-                       inc = PCTRIE_UNITLEVEL(node->pn_clev);
-                       index |= inc - 1;
-                       do {
-                               index -= inc;
-                               slot--;
-                               child = pctrie_node_load(&node->pn_child[slot],
-                                   NULL, PCTRIE_LOCKED);
-                               if (pctrie_isleaf(child)) {
-                                       m = pctrie_toval(child);
-                                       KASSERT(*m <= index,
-                                           ("pctrie_lookup_le: leaf > index"));
-                                       return (m);
-                               } else if (child != NULL)
-                                       goto descend;
-                       } while (slot > 0);
+               /* Find the last set bit among the first slot bits. */
+               slot = fls(node->pn_popmap & ((1 << slot) - 1)) - 1;
+               if (slot < 0) {
+                       /*
+                        * A value or edge smaller than the search slot is not
+                        * found in the current node; ascend to the next
+                        * higher-level node.
+                        */
+                       goto ascend;
                }
-               KASSERT(child == NULL || pctrie_isleaf(child),
-                   ("pctrie_lookup_le: child is radix node"));
-
-               /*
-                * If a value or edge smaller than the search slot is not found
-                * in the current node, ascend to the next higher-level node.
-                */
-               goto ascend;
+               child = pctrie_node_load(&node->pn_child[slot],
+                   NULL, PCTRIE_LOCKED);
+               if (pctrie_isleaf(child))
+                       return (pctrie_toval(child));
+               index = pctrie_trimkey(index, node->pn_clev + 1) +
+                   (slot + 1) * PCTRIE_UNITLEVEL(node->pn_clev) - 1;
 descend:
                KASSERT(node->pn_clev > 0,
                    ("pctrie_lookup_le: pushing leaf's parent"));
@@ -718,7 +703,7 @@ pctrie_remove(struct pctrie *ptree, uint64_t index, 
pctrie_free_t freefn)
 {
        struct pctrie_node *node, *parent, *tmp;
        uint64_t *m;
-       int i, slot;
+       int slot;
 
        node = pctrie_root_load(ptree, NULL, PCTRIE_LOCKED);
        if (pctrie_isleaf(node)) {
@@ -739,19 +724,22 @@ pctrie_remove(struct pctrie *ptree, uint64_t index, 
pctrie_free_t freefn)
                        m = pctrie_toval(tmp);
                        if (*m != index)
                                panic("%s: invalid key found", __func__);
+                       KASSERT((node->pn_popmap & (1 << slot)) != 0,
+                           ("%s: bad popmap slot %d in node %p",
+                           __func__, slot, node));
+                       node->pn_popmap ^= 1 << slot;
                        pctrie_node_store(&node->pn_child[slot], NULL,
                            PCTRIE_LOCKED);
-                       node->pn_count--;
-                       if (node->pn_count > 1)
+                       if (!powerof2(node->pn_popmap))
                                break;
-                       for (i = 0; i < PCTRIE_COUNT; i++) {
-                               tmp = pctrie_node_load(&node->pn_child[i],
-                                   NULL, PCTRIE_LOCKED);
-                               if (tmp != NULL)
-                                       break;
-                       }
+                       KASSERT(node->pn_popmap != 0,
+                           ("%s: bad popmap all zeroes", __func__));
+                       slot = ffs(node->pn_popmap) - 1;
+                       tmp = pctrie_node_load(&node->pn_child[slot],
+                           NULL, PCTRIE_LOCKED);
                        KASSERT(tmp != NULL,
-                           ("%s: invalid node configuration", __func__));
+                           ("%s: bad popmap slot %d in node %p",
+                           __func__, slot, node));
                        if (parent == NULL)
                                pctrie_root_store(ptree, tmp, PCTRIE_LOCKED);
                        else {
@@ -767,8 +755,7 @@ pctrie_remove(struct pctrie *ptree, uint64_t index, 
pctrie_free_t freefn)
                         * The child is still valid and we can not zero the
                         * pointer until all SMR references are gone.
                         */
-                       node->pn_count--;
-                       pctrie_node_put(ptree, node, freefn, i);
+                       pctrie_node_put(ptree, node, freefn);
                        break;
                }
                parent = node;
@@ -801,22 +788,23 @@ pctrie_reclaim_allnodes(struct pctrie *ptree, 
pctrie_free_t freefn)
 DB_SHOW_COMMAND(pctrienode, db_show_pctrienode)
 {
        struct pctrie_node *node, *tmp;
-       int i;
+       int slot;
+       pn_popmap_t popmap;
 
         if (!have_addr)
                 return;
        node = (struct pctrie_node *)addr;
-       db_printf("node %p, owner %jx, children count %u, level %u:\n",
-           (void *)node, (uintmax_t)node->pn_owner, node->pn_count,
+       db_printf("node %p, owner %jx, children popmap %04x, level %u:\n",
+           (void *)node, (uintmax_t)node->pn_owner, node->pn_popmap,
            node->pn_clev);
-       for (i = 0; i < PCTRIE_COUNT; i++) {
-               tmp = pctrie_node_load(&node->pn_child[i], NULL,
+       for (popmap = node->pn_popmap; popmap != 0; popmap ^= 1 << slot) {
+               slot = ffs(popmap) - 1;
+               tmp = pctrie_node_load(&node->pn_child[slot], NULL,
                    PCTRIE_UNSERIALIZED);
-               if (tmp != NULL)
-                       db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
-                           i, (void *)tmp,
-                           pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
-                           node->pn_clev);
+               db_printf("slot: %d, val: %p, value: %p, clev: %d\n",
+                   slot, (void *)tmp,
+                   pctrie_isleaf(tmp) ? pctrie_toval(tmp) : NULL,
+                   node->pn_clev);
        }
 }
 #endif /* DDB */
diff --git a/sys/vm/vm_radix.c b/sys/vm/vm_radix.c
index b3d0d92f9969..d2cd2c2536fd 100644
--- a/sys/vm/vm_radix.c
+++ b/sys/vm/vm_radix.c
@@ -91,6 +91,18 @@ __FBSDID("$FreeBSD$");
 #define        VM_RADIX_LIMIT                                                  
\
        (howmany(sizeof(vm_pindex_t) * NBBY, VM_RADIX_WIDTH) - 1)
 
+#if VM_RADIX_WIDTH == 3
+typedef uint8_t rn_popmap_t;
+#elif VM_RADIX_WIDTH == 4
+typedef uint16_t rn_popmap_t;
+#elif VM_RADIX_WIDTH == 5
+typedef uint32_t rn_popmap_t;
+#else
+#error Unsupported width
+#endif
+_Static_assert(sizeof(rn_popmap_t) <= sizeof(int),
+    "rn_popmap_t too wide");
+
 /* Flag bits stored in node pointers. */
 #define        VM_RADIX_ISLEAF 0x1
 #define        VM_RADIX_FLAGS  0x1
@@ -107,9 +119,8 @@ typedef SMR_POINTER(struct vm_radix_node *) smrnode_t;
 
 struct vm_radix_node {
        vm_pindex_t     rn_owner;                       /* Owner of record. */
-       uint16_t        rn_count;                       /* Valid children. */
+       rn_popmap_t     rn_popmap;                      /* Valid children. */
        uint8_t         rn_clev;                        /* Current level. */
-       int8_t          rn_last;                        /* zero last ptr. */
        smrnode_t       rn_child[VM_RADIX_COUNT];       /* Child nodes. */
 };
 
@@ -152,13 +163,12 @@ vm_radix_node_get(vm_pindex_t index, uint16_t clevel)
         * has exited so lookup can not return false negatives.  It is done
         * here because it will be cache-cold in the dtor callback.
         */
-       if (rnode->rn_last != 0) {
-               vm_radix_node_store(&rnode->rn_child[rnode->rn_last - 1],
+       if (rnode->rn_popmap != 0) {
+               vm_radix_node_store(&rnode->rn_child[ffs(rnode->rn_popmap) - 1],
                    NULL, UNSERIALIZED);
-               rnode->rn_last = 0;
+               rnode->rn_popmap = 0;
        }
        rnode->rn_owner = vm_radix_trimkey(index, clevel + 1);
-       rnode->rn_count = 2;
        rnode->rn_clev = clevel;
        return (rnode);
 }
@@ -167,23 +177,21 @@ vm_radix_node_get(vm_pindex_t index, uint16_t clevel)
  * Free radix node.
  */
 static __inline void
-vm_radix_node_put(struct vm_radix_node *rnode, int8_t last)
+vm_radix_node_put(struct vm_radix_node *rnode)
 {
 #ifdef INVARIANTS
        int slot;
 
-       KASSERT(rnode->rn_count == 0,
-           ("vm_radix_node_put: rnode %p has %d children", rnode,
-           rnode->rn_count));
+       KASSERT(powerof2(rnode->rn_popmap),
+           ("vm_radix_node_put: rnode %p has too many children %04x", rnode,
+           rnode->rn_popmap));
        for (slot = 0; slot < VM_RADIX_COUNT; slot++) {
-               if (slot == last)
+               if ((rnode->rn_popmap & (1 << slot)) != 0)
                        continue;
                KASSERT(smr_unserialized_load(&rnode->rn_child[slot], true) ==
                    NULL, ("vm_radix_node_put: rnode %p has a child", rnode));
        }
 #endif
-       /* Off by one so a freshly zero'd node is not assigned to. */
-       rnode->rn_last = last + 1;
        uma_zfree_smr(vm_radix_node_zone, rnode);
 }
 
@@ -284,6 +292,9 @@ vm_radix_addpage(struct vm_radix_node *rnode, vm_pindex_t 
index, uint16_t clev,
        slot = vm_radix_slot(index, clev);
        vm_radix_node_store(&rnode->rn_child[slot],
            vm_radix_toleaf(page), access);
+       rnode->rn_popmap ^= 1 << slot;
+       KASSERT((rnode->rn_popmap & (1 << slot)) != 0,
+           ("%s: bad popmap slot %d in rnode %p", __func__, slot, rnode));
 }
 
 /*
@@ -330,19 +341,19 @@ vm_radix_reclaim_allnodes_int(struct vm_radix_node *rnode)
        struct vm_radix_node *child;
        int slot;
 
-       KASSERT(rnode->rn_count <= VM_RADIX_COUNT,
-           ("vm_radix_reclaim_allnodes_int: bad count in rnode %p", rnode));
-       for (slot = 0; rnode->rn_count != 0; slot++) {
+       while (rnode->rn_popmap != 0) {
+               slot = ffs(rnode->rn_popmap) - 1;
                child = vm_radix_node_load(&rnode->rn_child[slot],
                    UNSERIALIZED);
-               if (child == NULL)
-                       continue;
+               KASSERT(child != NULL, ("%s: bad popmap slot %d in rnode %p",
+                   __func__, slot, rnode));
                if (!vm_radix_isleaf(child))
                        vm_radix_reclaim_allnodes_int(child);
-               vm_radix_node_store(&rnode->rn_child[slot], NULL, UNSERIALIZED);
-               rnode->rn_count--;
+               rnode->rn_popmap ^= 1 << slot;
+               vm_radix_node_store(&rnode->rn_child[slot], NULL,
+                   UNSERIALIZED);
        }
-       vm_radix_node_put(rnode, -1);
+       vm_radix_node_put(rnode);
 }
 
 #ifndef UMA_MD_SMALL_ALLOC
@@ -430,7 +441,6 @@ vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
                parentp = &rnode->rn_child[slot];
                tmp = vm_radix_node_load(parentp, LOCKED);
                if (tmp == NULL) {
-                       rnode->rn_count++;
                        vm_radix_addpage(rnode, index, rnode->rn_clev, page,
                            LOCKED);
                        return (0);
@@ -452,6 +462,7 @@ vm_radix_insert(struct vm_radix *rtree, vm_page_t page)
        /* These writes are not yet visible due to ordering. */
        vm_radix_addpage(tmp, index, clev, page, UNSERIALIZED);
        vm_radix_node_store(&tmp->rn_child[slot], rnode, UNSERIALIZED);
+       tmp->rn_popmap ^= 1 << slot;
        /* Serializing write to make the above visible. */
        vm_radix_node_store(parentp, tmp, LOCKED);
 
@@ -522,7 +533,6 @@ vm_page_t
 vm_radix_lookup_ge(struct vm_radix *rtree, vm_pindex_t index)
 {
        struct vm_radix_node *stack[VM_RADIX_LIMIT];
-       vm_pindex_t inc;
        vm_page_t m;
        struct vm_radix_node *child, *rnode;
 #ifdef INVARIANTS
@@ -589,35 +599,23 @@ ascend:
                } else if (child != NULL)
                        goto descend;
 
-               /*
-                * Look for an available edge or page within the current
-                * bisection node.
-                */
-                if (slot < (VM_RADIX_COUNT - 1)) {
-                       inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
-                       index = vm_radix_trimkey(index, rnode->rn_clev);
-                       do {
-                               index += inc;
-                               slot++;
-                               child = 
vm_radix_node_load(&rnode->rn_child[slot],
-                                   LOCKED);
-                               if (vm_radix_isleaf(child)) {
-                                       m = vm_radix_topage(child);
-                                       KASSERT(m->pindex >= index,
-                                           ("vm_radix_lookup_ge: leaf<index"));
-                                       return (m);
-                               } else if (child != NULL)
-                                       goto descend;
-                       } while (slot < (VM_RADIX_COUNT - 1));
+               /* Find the first set bit beyond the first slot+1 bits. */
+               slot = ffs(rnode->rn_popmap & (-2 << slot)) - 1;
+               if (slot < 0) {
+                       /*
+                        * A page or edge greater than the search slot is not
+                        * found in the current node; ascend to the next
+                        * higher-level node.
+                        */
+                       goto ascend;
                }
-               KASSERT(child == NULL || vm_radix_isleaf(child),
-                   ("vm_radix_lookup_ge: child is radix node"));
-
-               /*
-                * If a page or edge greater than the search slot is not found
-                * in the current node, ascend to the next higher-level node.
-                */
-               goto ascend;
+               child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
+               KASSERT(child != NULL, ("%s: bad popmap slot %d in rnode %p",
+                   __func__, slot, rnode));
+               if (vm_radix_isleaf(child))
+                       return (vm_radix_topage(child));
+               index = vm_radix_trimkey(index, rnode->rn_clev + 1) +
+                   slot * VM_RADIX_UNITLEVEL(rnode->rn_clev);
 descend:
                KASSERT(rnode->rn_clev > 0,
                    ("vm_radix_lookup_ge: pushing leaf's parent"));
@@ -635,7 +633,6 @@ vm_page_t
 vm_radix_lookup_le(struct vm_radix *rtree, vm_pindex_t index)
 {
        struct vm_radix_node *stack[VM_RADIX_LIMIT];
-       vm_pindex_t inc;
        vm_page_t m;
        struct vm_radix_node *child, *rnode;
 #ifdef INVARIANTS
@@ -704,35 +701,23 @@ ascend:
                } else if (child != NULL)
                        goto descend;
 
-               /*
-                * Look for an available edge or page within the current
-                * bisection node.
-                */
-               if (slot > 0) {
-                       inc = VM_RADIX_UNITLEVEL(rnode->rn_clev);
-                       index |= inc - 1;
-                       do {
-                               index -= inc;
-                               slot--;
-                               child = 
vm_radix_node_load(&rnode->rn_child[slot],
-                                   LOCKED);
-                               if (vm_radix_isleaf(child)) {
-                                       m = vm_radix_topage(child);
-                                       KASSERT(m->pindex <= index,
-                                           ("vm_radix_lookup_le: leaf>index"));
-                                       return (m);
-                               } else if (child != NULL)
-                                       goto descend;
-                       } while (slot > 0);
+               /* Find the last set bit among the first slot bits. */
+               slot = fls(rnode->rn_popmap & ((1 << slot) - 1)) - 1;
+               if (slot < 0) {
+                       /*
+                        * A page or edge smaller than the search slot is not
+                        * found in the current node; ascend to the next
+                        * higher-level node.
+                        */
+                       goto ascend;
                }
-               KASSERT(child == NULL || vm_radix_isleaf(child),
-                   ("vm_radix_lookup_le: child is radix node"));
-
-               /*
-                * If a page or edge smaller than the search slot is not found
-                * in the current node, ascend to the next higher-level node.
-                */
-               goto ascend;
+               child = vm_radix_node_load(&rnode->rn_child[slot], LOCKED);
+               KASSERT(child != NULL, ("%s: bad popmap slot %d in rnode %p",
+                   __func__, slot, rnode));
+               if (vm_radix_isleaf(child))
+                       return (vm_radix_topage(child));
+               index = vm_radix_trimkey(index, rnode->rn_clev + 1) +
+                   (slot + 1) * VM_RADIX_UNITLEVEL(rnode->rn_clev) - 1;
 descend:
                KASSERT(rnode->rn_clev > 0,
                    ("vm_radix_lookup_le: pushing leaf's parent"));
@@ -752,7 +737,7 @@ vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
 {
        struct vm_radix_node *rnode, *parent, *tmp;
        vm_page_t m;
-       int i, slot;
+       int slot;
 
        rnode = vm_radix_root_load(rtree, LOCKED);
        if (vm_radix_isleaf(rnode)) {
@@ -772,19 +757,21 @@ vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
                        m = vm_radix_topage(tmp);
                        if (m->pindex != index)
                                return (NULL);
+                       KASSERT((rnode->rn_popmap & (1 << slot)) != 0,
+                           ("%s: bad popmap slot %d in rnode %p",
+                           __func__, slot, rnode));
+                       rnode->rn_popmap ^= 1 << slot;
                        vm_radix_node_store(
                            &rnode->rn_child[slot], NULL, LOCKED);
-                       rnode->rn_count--;
-                       if (rnode->rn_count > 1)
+                       if (!powerof2(rnode->rn_popmap))
                                return (m);
-                       for (i = 0; i < VM_RADIX_COUNT; i++) {
-                               tmp = vm_radix_node_load(&rnode->rn_child[i],
-                                   LOCKED);
-                               if (tmp != NULL)
-                                       break;
-                       }
+                       KASSERT(rnode->rn_popmap != 0,
+                           ("%s: bad popmap all zeroes", __func__));
+                       slot = ffs(rnode->rn_popmap) - 1;
+                       tmp = vm_radix_node_load(&rnode->rn_child[slot], 
LOCKED);
                        KASSERT(tmp != NULL,
-                           ("%s: invalid node configuration", __func__));
+                           ("%s: bad popmap slot %d in rnode %p",
+                           __func__, slot, rnode));
                        if (parent == NULL)
                                vm_radix_root_store(rtree, tmp, LOCKED);
                        else {
@@ -799,8 +786,7 @@ vm_radix_remove(struct vm_radix *rtree, vm_pindex_t index)
                         * The child is still valid and we can not zero the
                         * pointer until all smr references are gone.
                         */
-                       rnode->rn_count--;
-                       vm_radix_node_put(rnode, i);
+                       vm_radix_node_put(rnode);
                        return (m);
                }
                parent = rnode;
@@ -880,21 +866,22 @@ vm_radix_wait(void)
 DB_SHOW_COMMAND(radixnode, db_show_radixnode)
 {
        struct vm_radix_node *rnode, *tmp;
-       int i;
+       int slot;
+       rn_popmap_t popmap;
 
         if (!have_addr)
                 return;
        rnode = (struct vm_radix_node *)addr;
-       db_printf("radixnode %p, owner %jx, children count %u, level %u:\n",
-           (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_count,
+       db_printf("radixnode %p, owner %jx, children popmap %04x, level %u:\n",
+           (void *)rnode, (uintmax_t)rnode->rn_owner, rnode->rn_popmap,
            rnode->rn_clev);
-       for (i = 0; i < VM_RADIX_COUNT; i++) {
-               tmp = vm_radix_node_load(&rnode->rn_child[i], UNSERIALIZED);
-               if (tmp != NULL)
-                       db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
-                           i, (void *)tmp,
-                           vm_radix_isleaf(tmp) ?  vm_radix_topage(tmp) : NULL,
-                           rnode->rn_clev);
+       for (popmap = rnode->rn_popmap; popmap != 0; popmap ^= 1 << slot) {
+               slot = ffs(popmap) - 1;
+               tmp = vm_radix_node_load(&rnode->rn_child[slot], UNSERIALIZED);
+               db_printf("slot: %d, val: %p, page: %p, clev: %d\n",
+                   slot, (void *)tmp,
+                   vm_radix_isleaf(tmp) ?  vm_radix_topage(tmp) : NULL,
+                   rnode->rn_clev);
        }
 }
 #endif /* DDB */

Reply via email to