git: 120ca8d74b46 - main - Re-introduce kern.sched.topology_spec

Konstantin Belousov Mon, 02 Feb 2026 18:43:35 -0800

The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=120ca8d74b46caa260702485e30fe5f9f9984682


commit 120ca8d74b46caa260702485e30fe5f9f9984682
Author:     Konstantin Belousov <[email protected]>
AuthorDate: 2026-02-02 20:22:46 +0000
Commit:     Konstantin Belousov <[email protected]>
CommitDate: 2026-02-03 02:43:18 +0000

    Re-introduce kern.sched.topology_spec
    
    Move it back from kern.sched.ule.topology_spec.
    Make it scheduler-agnostic.
    Provide trivial report for UP kernels.
    
    Apparently the MIB is used by some third-party software.  Obviously it
    did not worked on UP or 4BSD configs.
    
    PR:     292574
    Reviewed by:    olce
    Sponsored by:   The FreeBSD Foundation
    MFC after:      1 week
    Differential revision:  https://reviews.freebsd.org/D55062
---
 sys/kern/sched_shim.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++
 sys/kern/sched_ule.c  | 92 ---------------------------------------------------
 sys/kern/subr_smp.c   | 81 ++++++++++++++++++++++++++-------------------
 sys/sys/smp.h         |  9 +++--
 4 files changed, 144 insertions(+), 129 deletions(-)

diff --git a/sys/kern/sched_shim.c b/sys/kern/sched_shim.c
index ec5c42c37aab..83e4412494d3 100644
--- a/sys/kern/sched_shim.c
+++ b/sys/kern/sched_shim.c
@@ -16,6 +16,7 @@
 #include <sys/runq.h>
 #include <sys/sbuf.h>
 #include <sys/sched.h>
+#include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <machine/ifunc.h>
 
@@ -171,9 +172,12 @@ schedinit(void)
        active_sched->init();
 }
 
+struct cpu_group __read_mostly *cpu_top;               /* CPU topology */
+
 static void
 sched_setup(void *dummy)
 {
+       cpu_top = smp_topo();
        active_sched->setup();
 }
 SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
@@ -232,3 +236,90 @@ SYSCTL_PROC(_kern_sched, OID_AUTO, available,
 fixpt_t ccpu;
 SYSCTL_UINT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0,
     "Decay factor used for updating %CPU");
+
+/*
+ * Build the CPU topology dump string. Is recursively called to collect
+ * the topology tree.
+ */
+static int
+sysctl_kern_sched_topology_spec_internal(struct sbuf *sb,
+    struct cpu_group *cg, int indent)
+{
+       char cpusetbuf[CPUSETBUFSIZ];
+       int i, first;
+
+       if (cpu_top == NULL) {
+               sbuf_printf(sb, "%*s<group level=\"1\" cache-level=\"1\">\n",
+                   indent, "");
+               sbuf_printf(sb, "%*s</group>\n", indent, "");
+               return (0);
+       }
+
+       sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,
+           "", 1 + indent / 2, cg->cg_level);
+       sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "",
+           cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));
+       first = TRUE;
+       for (i = cg->cg_first; i <= cg->cg_last; i++) {
+               if (CPU_ISSET(i, &cg->cg_mask)) {
+                       if (!first)
+                               sbuf_cat(sb, ", ");
+                       else
+                               first = FALSE;
+                       sbuf_printf(sb, "%d", i);
+               }
+       }
+       sbuf_cat(sb, "</cpu>\n");
+
+       if (cg->cg_flags != 0) {
+               sbuf_printf(sb, "%*s <flags>", indent, "");
+               if ((cg->cg_flags & CG_FLAG_HTT) != 0)
+                       sbuf_cat(sb, "<flag name=\"HTT\">HTT group</flag>");
+               if ((cg->cg_flags & CG_FLAG_THREAD) != 0)
+                       sbuf_cat(sb, "<flag name=\"THREAD\">THREAD 
group</flag>");
+               if ((cg->cg_flags & CG_FLAG_SMT) != 0)
+                       sbuf_cat(sb, "<flag name=\"SMT\">SMT group</flag>");
+               if ((cg->cg_flags & CG_FLAG_NODE) != 0)
+                       sbuf_cat(sb, "<flag name=\"NODE\">NUMA node</flag>");
+               sbuf_cat(sb, "</flags>\n");
+       }
+
+       if (cg->cg_children > 0) {
+               sbuf_printf(sb, "%*s <children>\n", indent, "");
+               for (i = 0; i < cg->cg_children; i++)
+                       sysctl_kern_sched_topology_spec_internal(sb,
+                           &cg->cg_child[i], indent + 2);
+               sbuf_printf(sb, "%*s </children>\n", indent, "");
+       }
+       sbuf_printf(sb, "%*s</group>\n", indent, "");
+       return (0);
+}
+
+/*
+ * Sysctl handler for retrieving topology dump. It's a wrapper for
+ * the recursive sysctl_kern_smp_topology_spec_internal().
+ */
+static int
+sysctl_kern_sched_topology_spec(SYSCTL_HANDLER_ARGS)
+{
+       struct sbuf *topo;
+       int err;
+
+       topo = sbuf_new_for_sysctl(NULL, NULL, 512, req);
+       if (topo == NULL)
+               return (ENOMEM);
+
+       sbuf_cat(topo, "<groups>\n");
+       err = sysctl_kern_sched_topology_spec_internal(topo, cpu_top, 1);
+       sbuf_cat(topo, "</groups>\n");
+
+       if (err == 0)
+               err = sbuf_finish(topo);
+       sbuf_delete(topo);
+       return (err);
+}
+
+SYSCTL_PROC(_kern_sched, OID_AUTO, topology_spec, CTLTYPE_STRING |
+    CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0,
+    sysctl_kern_sched_topology_spec, "A",
+    "XML dump of detected CPU topology");
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index ccad7947c4f5..c6bfe15e768b 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -304,7 +304,6 @@ struct tdq {
                                 atomic_load_short(&(tdq)->tdq_switchcnt) + 1))
 
 #ifdef SMP
-struct cpu_group __read_mostly *cpu_top;               /* CPU topology */
 
 #define        SCHED_AFFINITY_DEFAULT  (max(1, hz / 1000))
 /*
@@ -398,9 +397,6 @@ static void sched_balance(void);
 static bool sched_balance_pair(struct tdq *, struct tdq *);
 static inline struct tdq *sched_setcpu(struct thread *, int, int);
 static inline void thread_unblock_switch(struct thread *, struct mtx *);
-static int sysctl_kern_sched_ule_topology_spec(SYSCTL_HANDLER_ARGS);
-static int sysctl_kern_sched_ule_topology_spec_internal(struct sbuf *sb,
-    struct cpu_group *cg, int indent);
 #endif
 
 /*
@@ -1590,7 +1586,6 @@ sched_setup_smp(void)
        struct tdq *tdq;
        int i;
 
-       cpu_top = smp_topo();
        CPU_FOREACH(i) {
                tdq = DPCPU_ID_PTR(i, tdq);
                tdq_setup(tdq, i);
@@ -3452,89 +3447,6 @@ struct sched_instance sched_ule_instance = {
 };
 DECLARE_SCHEDULER(ule_sched_selector, "ULE", &sched_ule_instance);
 
-#ifdef SMP
-
-/*
- * Build the CPU topology dump string. Is recursively called to collect
- * the topology tree.
- */
-static int
-sysctl_kern_sched_ule_topology_spec_internal(struct sbuf *sb,
-    struct cpu_group *cg, int indent)
-{
-       char cpusetbuf[CPUSETBUFSIZ];
-       int i, first;
-
-       sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,
-           "", 1 + indent / 2, cg->cg_level);
-       sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "",
-           cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));
-       first = TRUE;
-       for (i = cg->cg_first; i <= cg->cg_last; i++) {
-               if (CPU_ISSET(i, &cg->cg_mask)) {
-                       if (!first)
-                               sbuf_cat(sb, ", ");
-                       else
-                               first = FALSE;
-                       sbuf_printf(sb, "%d", i);
-               }
-       }
-       sbuf_cat(sb, "</cpu>\n");
-
-       if (cg->cg_flags != 0) {
-               sbuf_printf(sb, "%*s <flags>", indent, "");
-               if ((cg->cg_flags & CG_FLAG_HTT) != 0)
-                       sbuf_cat(sb, "<flag name=\"HTT\">HTT group</flag>");
-               if ((cg->cg_flags & CG_FLAG_THREAD) != 0)
-                       sbuf_cat(sb, "<flag name=\"THREAD\">THREAD 
group</flag>");
-               if ((cg->cg_flags & CG_FLAG_SMT) != 0)
-                       sbuf_cat(sb, "<flag name=\"SMT\">SMT group</flag>");
-               if ((cg->cg_flags & CG_FLAG_NODE) != 0)
-                       sbuf_cat(sb, "<flag name=\"NODE\">NUMA node</flag>");
-               sbuf_cat(sb, "</flags>\n");
-       }
-
-       if (cg->cg_children > 0) {
-               sbuf_printf(sb, "%*s <children>\n", indent, "");
-               for (i = 0; i < cg->cg_children; i++)
-                       sysctl_kern_sched_ule_topology_spec_internal(sb,
-                           &cg->cg_child[i], indent+2);
-               sbuf_printf(sb, "%*s </children>\n", indent, "");
-       }
-       sbuf_printf(sb, "%*s</group>\n", indent, "");
-       return (0);
-}
-
-/*
- * Sysctl handler for retrieving topology dump. It's a wrapper for
- * the recursive sysctl_kern_smp_topology_spec_internal().
- */
-static int
-sysctl_kern_sched_ule_topology_spec(SYSCTL_HANDLER_ARGS)
-{
-       struct sbuf *topo;
-       int err;
-
-       if (cpu_top == NULL)
-               return (ENOTTY);
-
-       topo = sbuf_new_for_sysctl(NULL, NULL, 512, req);
-       if (topo == NULL)
-               return (ENOMEM);
-
-       sbuf_cat(topo, "<groups>\n");
-       err = sysctl_kern_sched_ule_topology_spec_internal(topo, cpu_top, 1);
-       sbuf_cat(topo, "</groups>\n");
-
-       if (err == 0) {
-               err = sbuf_finish(topo);
-       }
-       sbuf_delete(topo);
-       return (err);
-}
-
-#endif
-
 static int
 sysctl_kern_quantum(SYSCTL_HANDLER_ARGS)
 {
@@ -3597,8 +3509,4 @@ SYSCTL_INT(_kern_sched_ule, OID_AUTO, trysteal_limit, 
CTLFLAG_RWTUN,
 SYSCTL_INT(_kern_sched_ule, OID_AUTO, always_steal, CTLFLAG_RWTUN,
     &always_steal, 0,
     "Always run the stealer from the idle thread");
-SYSCTL_PROC(_kern_sched_ule, OID_AUTO, topology_spec, CTLTYPE_STRING |
-    CTLFLAG_MPSAFE | CTLFLAG_RD, NULL, 0,
-    sysctl_kern_sched_ule_topology_spec, "A",
-    "XML dump of detected CPU topology");
 #endif
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 353a69435971..2256ba648e4d 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -50,9 +50,43 @@
 
 #include "opt_sched.h"
 
-#ifdef SMP
 MALLOC_DEFINE(M_TOPO, "toponodes", "SMP topology data");
 
+struct cpu_group *
+smp_topo_alloc(u_int count)
+{
+       static struct cpu_group *group = NULL;
+       static u_int index;
+       u_int curr;
+
+       if (group == NULL) {
+               group = mallocarray((mp_maxid + 1) * MAX_CACHE_LEVELS + 1,
+                   sizeof(*group), M_DEVBUF, M_WAITOK | M_ZERO);
+       }
+       curr = index;
+       index += count;
+       return (&group[curr]);
+}
+
+struct cpu_group *
+smp_topo_none(void)
+{
+       struct cpu_group *top;
+
+       top = smp_topo_alloc(1);
+       top->cg_parent = NULL;
+       top->cg_child = NULL;
+       top->cg_mask = all_cpus;
+       top->cg_count = mp_ncpus;
+       top->cg_children = 0;
+       top->cg_level = CG_SHARE_NONE;
+       top->cg_flags = 0;
+
+       return (top);
+}
+
+#ifdef SMP
+
 volatile cpuset_t stopped_cpus;
 volatile cpuset_t started_cpus;
 volatile cpuset_t suspended_cpus;
@@ -731,39 +765,6 @@ smp_topo(void)
        return (top);
 }
 
-struct cpu_group *
-smp_topo_alloc(u_int count)
-{
-       static struct cpu_group *group = NULL;
-       static u_int index;
-       u_int curr;
-
-       if (group == NULL) {
-               group = mallocarray((mp_maxid + 1) * MAX_CACHE_LEVELS + 1,
-                   sizeof(*group), M_DEVBUF, M_WAITOK | M_ZERO);
-       }
-       curr = index;
-       index += count;
-       return (&group[curr]);
-}
-
-struct cpu_group *
-smp_topo_none(void)
-{
-       struct cpu_group *top;
-
-       top = smp_topo_alloc(1);
-       top->cg_parent = NULL;
-       top->cg_child = NULL;
-       top->cg_mask = all_cpus;
-       top->cg_count = mp_ncpus;
-       top->cg_children = 0;
-       top->cg_level = CG_SHARE_NONE;
-       top->cg_flags = 0;
-
-       return (top);
-}
-
 static int
 smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
     int count, int flags, int start)
@@ -901,6 +902,18 @@ smp_rendezvous(void (*setup_func)(void *),
            arg);
 }
 
+struct cpu_group *
+smp_topo(void)
+{
+       static struct cpu_group *top = NULL;
+
+       if (top != NULL)
+               return (top);
+
+       top = smp_topo_none();
+       return (top);
+}
+
 /*
  * Provide dummy SMP support for UP kernels.  Modules that need to use SMP
  * APIs will still work using this dummy support.
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index fdb69b13c0d4..493dc91043bd 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -89,6 +89,8 @@ struct cpu_group {
 
 typedef struct cpu_group *cpu_group_t;
 
+extern cpu_group_t cpu_top;
+
 /*
  * Defines common resources for CPUs in the group.  The highest level
  * resource should be used when multiple are shared.
@@ -147,9 +149,6 @@ int topo_analyze(struct topo_node *topo_root, int all,
 #define        TOPO_FOREACH(i, root)   \
        for (i = root; i != NULL; i = topo_next_node(root, i))
 
-struct cpu_group *smp_topo(void);
-struct cpu_group *smp_topo_alloc(u_int count);
-struct cpu_group *smp_topo_none(void);
 struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
 struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
     int l1count, int l1flags);
@@ -166,6 +165,10 @@ extern cpuset_t hlt_cpus_mask;             /* XXX 'mask' 
is detail in old impl */
 extern cpuset_t logical_cpus_mask;
 #endif /* SMP */
 
+struct cpu_group *smp_topo(void);
+struct cpu_group *smp_topo_alloc(u_int count);
+struct cpu_group *smp_topo_none(void);
+
 extern u_int mp_maxid;
 extern int mp_maxcpus;
 extern int mp_ncores;

git: 120ca8d74b46 - main - Re-introduce kern.sched.topology_spec

Reply via email to