Heya, 

I have this rotting in my tree, since actually using it effectively is
way harder than it seems, anyhow, this correctly builds the topology in
amd64, we know 3 things about each cpu now:

- thread id (smt id)
- core id
- package id

This is not complete but is enough IMHO, it lacks x2apic detection.
I've tried to trim it up, but the mask logic is a bit cryptic.

obs: I left a print on dmesg just so that people can test, I intend to
remove if it goes in. 

an atom d270 reports the following:
cpu0: smt 0, core 0, package 0
cpu1: smt 1, core 0, package 0
cpu2: smt 0, core 1, package 0
cpu3: smt 1, core 1, package 0

a core2duo L7500:
cpu0: smt 0, core 0, package 0
cpu1: smt 0, core 1, package 0

Do we want this ? 

Index: arch/amd64/amd64/identcpu.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.36
diff -d -u -p -r1.36 identcpu.c
--- arch/amd64/amd64/identcpu.c 22 Apr 2012 19:36:09 -0000      1.36
+++ arch/amd64/amd64/identcpu.c 8 Jul 2012 09:03:02 -0000
@@ -446,4 +446,123 @@ identifycpu(struct cpu_info *ci)
                sensordev_install(&ci->ci_sensordev);
 #endif
        }
+
+       cpu_topology(ci);
+}
+
+/*
+ * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
+ */
+static int
+log2(unsigned int i)
+{
+       int ret = 0;
+
+       while (i >>= 1)
+               ret++;
+
+       return (ret);
+}
+
+static int
+mask_width(u_int x)
+{
+       int bit;
+       int mask;
+       int powerof2;
+
+       powerof2 = ((x - 1) & x) == 0;
+       mask = (x << (1 - powerof2)) - 1;
+
+       /* fls */
+       if (mask == 0)
+               return (0);
+       for (bit = 1; mask != 1; bit++)
+               mask = (unsigned int)mask >> 1;
+
+       return (bit);
+}
+
+/*
+ * Build up cpu topology for given cpu, must run on the core itself.
+ */
+void
+cpu_topology(struct cpu_info *ci)
+{
+       u_int32_t eax, ebx, ecx, edx;
+       u_int32_t apicid, max_apicid, max_coreid;
+       u_int32_t smt_bits, core_bits, pkg_bits;
+       u_int32_t smt_mask, core_mask, pkg_mask;
+       
+       /* We need at least apicid at CPUID 1 */
+       CPUID(0, eax, ebx, ecx, edx);
+       if (eax < 1)
+               goto no_topology;
+       
+       /* Initial apicid */
+       CPUID(1, eax, ebx, ecx, edx);
+       apicid = (ebx >> 24) & 0xff;
+       
+       if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+               /* We need at least apicid at CPUID 0x80000008 */
+               CPUID(0x80000000, eax, ebx, ecx, edx);
+               if (eax < 0x80000008)
+                       goto no_topology;
+               
+               CPUID(0x80000008, eax, ebx, ecx, edx);
+               core_bits = (ecx >> 12) & 0xf;
+               if (core_bits == 0)
+                       goto no_topology;
+               /* So coreidsize 2 gives 3, 3 gives 7... */
+               core_mask = (1 << core_bits) - 1;
+               /* Core id is the least significant considering mask */
+               ci->ci_core_id = apicid & core_mask;
+               /* Pkg id is the upper remaining bits */
+               ci->ci_pkg_id = apicid & ~core_mask;
+               ci->ci_pkg_id >>= core_bits;
+       } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
+               /* We only support leaf 1/4 detection */
+               CPUID(0, eax, ebx, ecx, edx);
+               if (eax < 4)
+                       goto no_topology;
+               /* Get max_apicid */
+               CPUID(1, eax, ebx, ecx, edx);
+               max_apicid = (ebx >> 16) & 0xff;
+               /* Get max_coreid */
+               CPUID2(4, 0, eax, ebx, ecx, edx);
+               max_coreid = ((eax >> 26) & 0x3f) + 1;
+               /* SMT */
+               smt_bits = mask_width(max_apicid / max_coreid);
+               smt_mask = (1 << smt_bits) - 1;
+               /* Core */
+               core_bits = log2(max_coreid);
+               core_mask = (1 << (core_bits + smt_bits)) - 1;
+               core_mask ^= smt_mask;
+               /* Pkg */
+               pkg_bits = core_bits + smt_bits;
+               pkg_mask = -1 << core_bits;
+                
+               ci->ci_smt_id = apicid & smt_mask;
+               ci->ci_core_id = (apicid & core_mask) >> smt_bits;
+               ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits;
+       } else
+               goto no_topology;
+#ifdef DEBUG
+       printf("cpu%d: smt %u, core %u, pkg %u "
+           "(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, 
smt_mask 0x%x, "
+           "core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 0x%x)\n",
+           ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id,
+           apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits,
+           core_mask, pkg_bits, pkg_mask);
+#else
+       printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid,
+           ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id);
+                   
+#endif
+       return;
+       /* We can't map, so consider ci_core_id as ci_cpuid */
+no_topology:
+       ci->ci_smt_id  = 0;
+       ci->ci_core_id = ci->ci_cpuid;
+       ci->ci_pkg_id  = 0;
 }
Index: arch/amd64/include/cpu.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.73
diff -d -u -p -r1.73 cpu.h
--- arch/amd64/include/cpu.h    17 Apr 2012 16:02:33 -0000      1.73
+++ arch/amd64/include/cpu.h    8 Jul 2012 07:48:10 -0000
@@ -100,7 +100,9 @@ struct cpu_info {
        u_int32_t       ci_model;
        u_int32_t       ci_cflushsz;
        u_int64_t       ci_tsc_freq;
-
+       u_int32_t       ci_smt_id; 
+       u_int32_t       ci_core_id;
+       u_int32_t       ci_pkg_id;
        struct cpu_functions *ci_func;
        void (*cpu_setup)(struct cpu_info *);
        void (*ci_info)(struct cpu_info *);
@@ -266,6 +268,7 @@ extern int cpuspeed;
 /* identcpu.c */
 void   identifycpu(struct cpu_info *);
 int    cpu_amd64speed(int *);
+void   cpu_topology(struct cpu_info *);
 
 /* machdep.c */
 void   dumpconf(void);
Index: arch/amd64/include/specialreg.h
===================================================================
RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v
retrieving revision 1.21
diff -d -u -p -r1.21 specialreg.h
--- arch/amd64/include/specialreg.h     27 Mar 2012 05:59:46 -0000      1.21
+++ arch/amd64/include/specialreg.h     8 Jul 2012 07:46:34 -0000
@@ -187,11 +187,13 @@
 #define CPUID2MODEL(cpuid)     (((cpuid) >> 4) & 15)
 #define CPUID2STEPPING(cpuid)  ((cpuid) & 15)
 
-#define CPUID(code, eax, ebx, ecx, edx)                         \
+#define CPUID2(eax_code, ecx_code, eax, ebx, ecx, edx)         \
        __asm("cpuid"                                           \
            : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)    \
-           : "a" (code));
+           : "a" (eax_code), "c" (ecx_code));
 
+#define CPUID(code, eax, ebx, ecx, edx)                                \
+       CPUID2(code, 0, eax, ebx, ecx, edx)
 
 /*
  * Model-specific registers for the i386 family

Reply via email to