The branch main has been updated by asomers:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=d3f96f661050e9bd21fe29931992a8b9e67ff189

commit d3f96f661050e9bd21fe29931992a8b9e67ff189
Author:     Alan Somers <asom...@freebsd.org>
AuthorDate: 2022-09-07 14:12:49 +0000
Commit:     Alan Somers <asom...@freebsd.org>
CommitDate: 2022-09-27 00:03:34 +0000

    Fix O(n^2) behavior in sysctl
    
    Sysctl OIDs were internally stored in linked lists, triggering O(n^2)
    behavior when userland iterates over many of them.  The slowdown is
    noticeable for MIBs that have > 100 children (for example, vm.uma).  But
    it's unignorable for kstat.zfs when a pool has > 1000 datasets.
    
    Convert the linked lists into RB trees.  This produces a ~25x speedup
    for listing kstat.zfs with 4100 datasets, and no measurable penalty for
    small dataset counts.
    
    Bump __FreeBSD_version for the KPI change.
    
    Sponsored by:   Axcient
    Reviewed by:    mjg
    Differential Revision: https://reviews.freebsd.org/D36500
---
 sys/compat/linuxkpi/common/include/linux/sysfs.h |   2 +-
 sys/kern/kern_sysctl.c                           | 149 +++++++++++------------
 sys/kern/vfs_init.c                              |   2 +-
 sys/sys/param.h                                  |   2 +-
 sys/sys/sysctl.h                                 |  31 ++++-
 5 files changed, 99 insertions(+), 87 deletions(-)

diff --git a/sys/compat/linuxkpi/common/include/linux/sysfs.h 
b/sys/compat/linuxkpi/common/include/linux/sysfs.h
index 0b6b479d9362..881a72e62ed9 100644
--- a/sys/compat/linuxkpi/common/include/linux/sysfs.h
+++ b/sys/compat/linuxkpi/common/include/linux/sysfs.h
@@ -246,7 +246,7 @@ sysfs_unmerge_group(struct kobject *kobj, const struct 
attribute_group *grp)
        struct attribute **attr;
        struct sysctl_oid *oidp;
 
-       SLIST_FOREACH(oidp, SYSCTL_CHILDREN(kobj->oidp), oid_link) {
+       RB_FOREACH(oidp, sysctl_oid_list, SYSCTL_CHILDREN(kobj->oidp)) {
                if (strcmp(oidp->oid_name, grp->name) != 0)
                        continue;
                for (attr = grp->attrs; *attr != NULL; attr++) {
diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c
index 9bc595f111cc..e1cd6ea4bd61 100644
--- a/sys/kern/kern_sysctl.c
+++ b/sys/kern/kern_sysctl.c
@@ -84,6 +84,8 @@ static MALLOC_DEFINE(M_SYSCTL, "sysctl", "sysctl internal 
magic");
 static MALLOC_DEFINE(M_SYSCTLOID, "sysctloid", "sysctl dynamic oids");
 static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
 
+RB_GENERATE(sysctl_oid_list, sysctl_oid, oid_link, cmp_sysctl_oid);
+
 /*
  * The sysctllock protects the MIB tree.  It also protects sysctl
  * contexts used with dynamic sysctls.  The sysctl_register_oid() and
@@ -120,7 +122,7 @@ static struct sx sysctlstringlock;
 static int sysctl_root(SYSCTL_HANDLER_ARGS);
 
 /* Root list */
-struct sysctl_oid_list sysctl__children = 
SLIST_HEAD_INITIALIZER(&sysctl__children);
+struct sysctl_oid_list sysctl__children = RB_INITIALIZER(&sysctl__children);
 
 static char*   sysctl_escape_name(const char*);
 static int     sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del,
@@ -134,7 +136,7 @@ sysctl_find_oidname(const char *name, struct 
sysctl_oid_list *list)
        struct sysctl_oid *oidp;
 
        SYSCTL_ASSERT_LOCKED();
-       SLIST_FOREACH(oidp, list, oid_link) {
+       RB_FOREACH(oidp, sysctl_oid_list, list) {
                if (strcmp(oidp->oid_name, name) == 0) {
                        return (oidp);
                }
@@ -356,11 +358,14 @@ sysctl_search_oid(struct sysctl_oid **nodes, struct 
sysctl_oid *needle)
        indx = 0;
        while (indx < CTL_MAXNAME && indx >= 0) {
                if (nodes[indx] == NULL && indx == 0)
-                       nodes[indx] = SLIST_FIRST(&sysctl__children);
+                       nodes[indx] = RB_MIN(sysctl_oid_list,
+                           &sysctl__children);
                else if (nodes[indx] == NULL)
-                       nodes[indx] = SLIST_FIRST(&nodes[indx - 
1]->oid_children);
+                       nodes[indx] = RB_MIN(sysctl_oid_list,
+                           &nodes[indx - 1]->oid_children);
                else
-                       nodes[indx] = SLIST_NEXT(nodes[indx], oid_link);
+                       nodes[indx] = RB_NEXT(sysctl_oid_list,
+                           &nodes[indx - 1]->oid_children, nodes[indx]);
 
                if (nodes[indx] == needle)
                        return (indx + 1);
@@ -425,8 +430,7 @@ void
 sysctl_register_oid(struct sysctl_oid *oidp)
 {
        struct sysctl_oid_list *parent = oidp->oid_parent;
-       struct sysctl_oid *p;
-       struct sysctl_oid *q;
+       struct sysctl_oid *p, key;
        int oid_number;
        int timeout = 2;
 
@@ -476,25 +480,21 @@ sysctl_register_oid(struct sysctl_oid *oidp)
         * Insert the OID into the parent's list sorted by OID number.
         */
 retry:
-       q = NULL;
-       SLIST_FOREACH(p, parent, oid_link) {
-               /* check if the current OID number is in use */
-               if (oid_number == p->oid_number) {
-                       /* get the next valid OID number */
-                       if (oid_number < CTL_AUTO_START ||
-                           oid_number == 0x7fffffff) {
-                               /* wraparound - restart */
-                               oid_number = CTL_AUTO_START;
-                               /* don't loop forever */
-                               if (!timeout--)
-                                       panic("sysctl: Out of OID numbers\n");
-                               goto retry;
-                       } else {
-                               oid_number++;
-                       }
-               } else if (oid_number < p->oid_number)
-                       break;
-               q = p;
+       key.oid_number = oid_number;
+       p = RB_FIND(sysctl_oid_list, parent, &key);
+       if (p) {
+               /* get the next valid OID number */
+               if (oid_number < CTL_AUTO_START ||
+                   oid_number == 0x7fffffff) {
+                       /* wraparound - restart */
+                       oid_number = CTL_AUTO_START;
+                       /* don't loop forever */
+                       if (!timeout--)
+                               panic("sysctl: Out of OID numbers\n");
+                       goto retry;
+               } else {
+                       oid_number++;
+               }
        }
        /* check for non-auto OID number collision */
        if (oidp->oid_number >= 0 && oidp->oid_number < CTL_AUTO_START &&
@@ -504,10 +504,7 @@ retry:
        }
        /* update the OID number, if any */
        oidp->oid_number = oid_number;
-       if (q != NULL)
-               SLIST_INSERT_AFTER(q, oidp, oid_link);
-       else
-               SLIST_INSERT_HEAD(parent, oidp, oid_link);
+       RB_INSERT(sysctl_oid_list, parent, oidp);
 
        if ((oidp->oid_kind & CTLTYPE) != CTLTYPE_NODE &&
 #ifdef VIMAGE
@@ -556,7 +553,6 @@ sysctl_enable_oid(struct sysctl_oid *oidp)
 void
 sysctl_unregister_oid(struct sysctl_oid *oidp)
 {
-       struct sysctl_oid *p;
        int error;
 
        SYSCTL_ASSERT_WLOCKED();
@@ -564,14 +560,8 @@ sysctl_unregister_oid(struct sysctl_oid *oidp)
                error = EINVAL;
        } else {
                error = ENOENT;
-               SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
-                       if (p == oidp) {
-                               SLIST_REMOVE(oidp->oid_parent, oidp,
-                                   sysctl_oid, oid_link);
-                               error = 0;
-                               break;
-                       }
-               }
+               if (RB_REMOVE(sysctl_oid_list, oidp->oid_parent, oidp))
+                       error = 0;
        }
 
        /* 
@@ -732,17 +722,14 @@ int
 sysctl_remove_name(struct sysctl_oid *parent, const char *name,
     int del, int recurse)
 {
-       struct sysctl_oid *p, *tmp;
+       struct sysctl_oid *p;
        int error;
 
        error = ENOENT;
        SYSCTL_WLOCK();
-       SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) {
-               if (strcmp(p->oid_name, name) == 0) {
-                       error = sysctl_remove_oid_locked(p, del, recurse);
-                       break;
-               }
-       }
+       p = sysctl_find_oidname(name, &parent->oid_children);
+       if (p)
+               error = sysctl_remove_oid_locked(p, del, recurse);
        SYSCTL_WUNLOCK();
 
        return (error);
@@ -811,14 +798,16 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int 
del, int recurse)
         */
        if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
                if (oidp->oid_refcnt == 1) {
-                       SLIST_FOREACH_SAFE(p,
-                           SYSCTL_CHILDREN(oidp), oid_link, tmp) {
+                       for(p = RB_MIN(sysctl_oid_list, &oidp->oid_children);
+                           p != NULL; p = tmp) {
                                if (!recurse) {
                                        printf("Warning: failed attempt to "
                                            "remove oid %s with child %s\n",
                                            oidp->oid_name, p->oid_name);
                                        return (ENOTEMPTY);
                                }
+                               tmp = RB_NEXT(sysctl_oid_list,
+                                   &oidp->oid_children, p);
                                error = sysctl_remove_oid_locked(p, del,
                                    recurse);
                                if (error)
@@ -895,7 +884,7 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct 
sysctl_oid_list *parent,
        }
        oidp = malloc(sizeof(struct sysctl_oid), M_SYSCTLOID, M_WAITOK|M_ZERO);
        oidp->oid_parent = parent;
-       SLIST_INIT(&oidp->oid_children);
+       RB_INIT(&oidp->oid_children);
        oidp->oid_number = number;
        oidp->oid_refcnt = 1;
        oidp->oid_name = escaped;
@@ -1016,7 +1005,7 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, 
int i)
        struct sysctl_oid *oidp;
 
        SYSCTL_ASSERT_LOCKED();
-       SLIST_FOREACH(oidp, l, oid_link) {
+       RB_FOREACH(oidp, sysctl_oid_list, l) {
                for (k=0; k<i; k++)
                        printf(" ");
 
@@ -1081,7 +1070,7 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
        int *name = (int *) arg1;
        u_int namelen = arg2;
        int error;
-       struct sysctl_oid *oid;
+       struct sysctl_oid *oid, key;
        struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
        struct rm_priotracker tracker;
        char buf[10];
@@ -1105,10 +1094,9 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
                        continue;
                }
                lsp2 = NULL;
-               SLIST_FOREACH(oid, lsp, oid_link) {
-                       if (oid->oid_number != *name)
-                               continue;
-
+               key.oid_number = *name;
+               oid = RB_FIND(sysctl_oid_list, lsp, &key);
+               if (oid) {
                        if (req->oldidx)
                                error = SYSCTL_OUT(req, ".", 1);
                        if (!error)
@@ -1120,14 +1108,9 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
                        namelen--;
                        name++;
 
-                       if ((oid->oid_kind & CTLTYPE) != CTLTYPE_NODE) 
-                               break;
-
-                       if (oid->oid_handler)
-                               break;
-
-                       lsp2 = SYSCTL_CHILDREN(oid);
-                       break;
+                       if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE &&
+                               !oid->oid_handler)
+                               lsp2 = SYSCTL_CHILDREN(oid);
                }
                lsp = lsp2;
        }
@@ -1239,13 +1222,25 @@ static bool
 sysctl_sysctl_next_action(struct sysctl_oid_list *lsp, int *name, u_int 
namelen, 
     int *next, int *len, int level, bool honor_skip)
 {
-       struct sysctl_oid *oidp;
+       struct sysctl_oid_list *next_lsp;
+       struct sysctl_oid *oidp = NULL, key;
        bool success = false;
        enum sysctl_iter_action action;
 
        SYSCTL_ASSERT_LOCKED();
-       SLIST_FOREACH(oidp, lsp, oid_link) {
-               action = sysctl_sysctl_next_node(oidp, name, namelen, 
honor_skip);
+       /*
+        * Start the search at the requested oid.  But if not found, then scan
+        * through all children.
+        */
+       if (namelen > 0) {
+               key.oid_number = *name;
+               oidp = RB_FIND(sysctl_oid_list, lsp, &key);
+       }
+       if (!oidp)
+               oidp = RB_MIN(sysctl_oid_list, lsp);
+       for(; oidp != NULL; oidp = RB_NEXT(sysctl_oid_list, lsp, oidp)) {
+               action = sysctl_sysctl_next_node(oidp, name, namelen,
+                   honor_skip);
                if (action == ITER_SIBLINGS)
                        continue;
                if (action == ITER_FOUND) {
@@ -1254,13 +1249,13 @@ sysctl_sysctl_next_action(struct sysctl_oid_list *lsp, 
int *name, u_int namelen,
                }
                KASSERT((action== ITER_CHILDREN), ("ret(%d)!=ITER_CHILDREN", 
action));
 
-               lsp = SYSCTL_CHILDREN(oidp);
+               next_lsp = SYSCTL_CHILDREN(oidp);
                if (namelen == 0) {
-                       success = sysctl_sysctl_next_action(lsp, NULL, 0,
+                       success = sysctl_sysctl_next_action(next_lsp, NULL, 0,
                            next + 1, len, level + 1, honor_skip);
                } else {
-                       success = sysctl_sysctl_next_action(lsp, name + 1, 
namelen - 1,
-                           next + 1, len, level + 1, honor_skip);
+                       success = sysctl_sysctl_next_action(next_lsp, name + 1,
+                           namelen - 1, next + 1, len, level + 1, honor_skip);
                        if (!success) {
 
                                /*
@@ -1332,13 +1327,12 @@ name2oid(char *name, int *oid, int *len, struct 
sysctl_oid **oidpp)
        for (*len = 0; *len < CTL_MAXNAME;) {
                p = strsep(&name, ".");
 
-               oidp = SLIST_FIRST(lsp);
-               for (;; oidp = SLIST_NEXT(oidp, oid_link)) {
-                       if (oidp == NULL)
-                               return (ENOENT);
+               RB_FOREACH(oidp, sysctl_oid_list, lsp) {
                        if (strcmp(p, oidp->oid_name) == 0)
                                break;
                }
+               if (oidp == NULL)
+                       return (ENOENT);
                *oid++ = oidp->oid_number;
                (*len)++;
 
@@ -2162,16 +2156,15 @@ sysctl_find_oid(int *name, u_int namelen, struct 
sysctl_oid **noid,
 {
        struct sysctl_oid_list *lsp;
        struct sysctl_oid *oid;
+       struct sysctl_oid key;
        int indx;
 
        SYSCTL_ASSERT_LOCKED();
        lsp = &sysctl__children;
        indx = 0;
        while (indx < CTL_MAXNAME) {
-               SLIST_FOREACH(oid, lsp, oid_link) {
-                       if (oid->oid_number == name[indx])
-                               break;
-               }
+               key.oid_number = name[indx];
+               oid = RB_FIND(sysctl_oid_list, lsp, &key);
                if (oid == NULL)
                        return (ENOENT);
 
diff --git a/sys/kern/vfs_init.c b/sys/kern/vfs_init.c
index 6572a8e362c2..6e2e78aaf597 100644
--- a/sys/kern/vfs_init.c
+++ b/sys/kern/vfs_init.c
@@ -525,7 +525,7 @@ vfs_register(struct vfsconf *vfc)
         * number.
         */
        sysctl_wlock();
-       SLIST_FOREACH(oidp, SYSCTL_CHILDREN(&sysctl___vfs), oid_link) {
+       RB_FOREACH(oidp, sysctl_oid_list, SYSCTL_CHILDREN(&sysctl___vfs)) {
                if (strcmp(oidp->oid_name, vfc->vfc_name) == 0) {
                        sysctl_unregister_oid(oidp);
                        oidp->oid_number = vfc->vfc_typenum;
diff --git a/sys/sys/param.h b/sys/sys/param.h
index f875d839d41f..3f5da06ef951 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -76,7 +76,7 @@
  * cannot include sys/param.h and should only be updated here.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1400070
+#define __FreeBSD_version 1400071
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
diff --git a/sys/sys/sysctl.h b/sys/sys/sysctl.h
index 451d83bbe125..3bd77cf87243 100644
--- a/sys/sys/sysctl.h
+++ b/sys/sys/sysctl.h
@@ -39,7 +39,8 @@
 #define        _SYS_SYSCTL_H_
 
 #ifdef _KERNEL
-#include <sys/queue.h>
+#include <sys/tree.h>
+#include <sys/systm.h>
 #endif
 
 /*
@@ -173,20 +174,25 @@ struct sysctl_req {
        int              flags;
 };
 
-SLIST_HEAD(sysctl_oid_list, sysctl_oid);
+struct sysctl_oid;
+
+/* RB Tree handling */
+RB_HEAD(sysctl_oid_list, sysctl_oid);
 
 /*
  * This describes one "oid" in the MIB tree.  Potentially more nodes can
  * be hidden behind it, expanded by the handler.
  */
 struct sysctl_oid {
-       struct sysctl_oid_list oid_children;
-       struct sysctl_oid_list *oid_parent;
-       SLIST_ENTRY(sysctl_oid) oid_link;
+       struct sysctl_oid_list  oid_children;
+       struct sysctl_oid_list* oid_parent;
+       RB_ENTRY(sysctl_oid) oid_link;
+       /* Sort key for all siblings, and lookup key for userland */
        int              oid_number;
        u_int            oid_kind;
        void            *oid_arg1;
        intmax_t         oid_arg2;
+       /* Must be unique amongst all siblings. */
        const char      *oid_name;
        int             (*oid_handler)(SYSCTL_HANDLER_ARGS);
        const char      *oid_fmt;
@@ -196,6 +202,19 @@ struct sysctl_oid {
        const char      *oid_label;
 };
 
+static inline int
+cmp_sysctl_oid(struct sysctl_oid *a, struct sysctl_oid *b)
+{
+       if (a->oid_number > b->oid_number)
+               return (1);
+       else if (a->oid_number < b->oid_number)
+               return (-1);
+       else
+               return (0);
+}
+
+RB_PROTOTYPE(sysctl_oid_list, sysctl_oid, oid_link, cmp_sysctl_oid);
+
 #define        SYSCTL_IN(r, p, l)      (r->newfunc)(r, p, l)
 #define        SYSCTL_OUT(r, p, l)     (r->oldfunc)(r, p, l)
 #define        SYSCTL_OUT_STR(r, p)    (r->oldfunc)(r, p, strlen(p) + 1)
@@ -275,7 +294,7 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry);
 #define        SYSCTL_OID_RAW(id, parent_child_head, nbr, name, kind, a1, a2, 
handler, fmt, descr, label) \
        struct sysctl_oid id = {                                        \
                .oid_parent = (parent_child_head),                      \
-               .oid_children = SLIST_HEAD_INITIALIZER(&id.oid_children), \
+               .oid_children = RB_INITIALIZER(&id.oid_children), \
                .oid_number = (nbr),                                    \
                .oid_kind = (kind),                                     \
                .oid_arg1 = (a1),                                       \

Reply via email to