Heya, I have this rotting in my tree, since actually using it effectively is way harder than it seems, anyhow, this correctly builds the topology in amd64, we know 3 things about each cpu now:
- thread id (smt id) - core id - package id This is not complete but is enough IMHO, it lacks x2apic detection. I've tried to trim it up, but the mask logic is a bit cryptic. obs: I left a print on dmesg just so that people can test, I intend to remove if it goes in. an atom d270 reports the following: cpu0: smt 0, core 0, package 0 cpu1: smt 1, core 0, package 0 cpu2: smt 0, core 1, package 0 cpu3: smt 1, core 1, package 0 a core2duo L7500: cpu0: smt 0, core 0, package 0 cpu1: smt 0, core 1, package 0 Do we want this ? Index: arch/amd64/amd64/identcpu.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v retrieving revision 1.36 diff -d -u -p -r1.36 identcpu.c --- arch/amd64/amd64/identcpu.c 22 Apr 2012 19:36:09 -0000 1.36 +++ arch/amd64/amd64/identcpu.c 8 Jul 2012 09:03:02 -0000 @@ -446,4 +446,123 @@ identifycpu(struct cpu_info *ci) sensordev_install(&ci->ci_sensordev); #endif } + + cpu_topology(ci); +} + +/* + * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know). + */ +static int +log2(unsigned int i) +{ + int ret = 0; + + while (i >>= 1) + ret++; + + return (ret); +} + +static int +mask_width(u_int x) +{ + int bit; + int mask; + int powerof2; + + powerof2 = ((x - 1) & x) == 0; + mask = (x << (1 - powerof2)) - 1; + + /* fls */ + if (mask == 0) + return (0); + for (bit = 1; mask != 1; bit++) + mask = (unsigned int)mask >> 1; + + return (bit); +} + +/* + * Build up cpu topology for given cpu, must run on the core itself. + */ +void +cpu_topology(struct cpu_info *ci) +{ + u_int32_t eax, ebx, ecx, edx; + u_int32_t apicid, max_apicid, max_coreid; + u_int32_t smt_bits, core_bits, pkg_bits; + u_int32_t smt_mask, core_mask, pkg_mask; + + /* We need at least apicid at CPUID 1 */ + CPUID(0, eax, ebx, ecx, edx); + if (eax < 1) + goto no_topology; + + /* Initial apicid */ + CPUID(1, eax, ebx, ecx, edx); + apicid = (ebx >> 24) & 0xff; + + if (strcmp(cpu_vendor, "AuthenticAMD") == 0) { + /* We need at least apicid at CPUID 0x80000008 */ + CPUID(0x80000000, eax, ebx, ecx, edx); + if (eax < 0x80000008) + goto no_topology; + + CPUID(0x80000008, eax, ebx, ecx, edx); + core_bits = (ecx >> 12) & 0xf; + if (core_bits == 0) + goto no_topology; + /* So coreidsize 2 gives 3, 3 gives 7... */ + core_mask = (1 << core_bits) - 1; + /* Core id is the least significant considering mask */ + ci->ci_core_id = apicid & core_mask; + /* Pkg id is the upper remaining bits */ + ci->ci_pkg_id = apicid & ~core_mask; + ci->ci_pkg_id >>= core_bits; + } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) { + /* We only support leaf 1/4 detection */ + CPUID(0, eax, ebx, ecx, edx); + if (eax < 4) + goto no_topology; + /* Get max_apicid */ + CPUID(1, eax, ebx, ecx, edx); + max_apicid = (ebx >> 16) & 0xff; + /* Get max_coreid */ + CPUID2(4, 0, eax, ebx, ecx, edx); + max_coreid = ((eax >> 26) & 0x3f) + 1; + /* SMT */ + smt_bits = mask_width(max_apicid / max_coreid); + smt_mask = (1 << smt_bits) - 1; + /* Core */ + core_bits = log2(max_coreid); + core_mask = (1 << (core_bits + smt_bits)) - 1; + core_mask ^= smt_mask; + /* Pkg */ + pkg_bits = core_bits + smt_bits; + pkg_mask = -1 << core_bits; + + ci->ci_smt_id = apicid & smt_mask; + ci->ci_core_id = (apicid & core_mask) >> smt_bits; + ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits; + } else + goto no_topology; +#ifdef DEBUG + printf("cpu%d: smt %u, core %u, pkg %u " + "(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, smt_mask 0x%x, " + "core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 0x%x)\n", + ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id, + apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits, + core_mask, pkg_bits, pkg_mask); +#else + printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid, + ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id); + +#endif + return; + /* We can't map, so consider ci_core_id as ci_cpuid */ +no_topology: + ci->ci_smt_id = 0; + ci->ci_core_id = ci->ci_cpuid; + ci->ci_pkg_id = 0; } Index: arch/amd64/include/cpu.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v retrieving revision 1.73 diff -d -u -p -r1.73 cpu.h --- arch/amd64/include/cpu.h 17 Apr 2012 16:02:33 -0000 1.73 +++ arch/amd64/include/cpu.h 8 Jul 2012 07:48:10 -0000 @@ -100,7 +100,9 @@ struct cpu_info { u_int32_t ci_model; u_int32_t ci_cflushsz; u_int64_t ci_tsc_freq; - + u_int32_t ci_smt_id; + u_int32_t ci_core_id; + u_int32_t ci_pkg_id; struct cpu_functions *ci_func; void (*cpu_setup)(struct cpu_info *); void (*ci_info)(struct cpu_info *); @@ -266,6 +268,7 @@ extern int cpuspeed; /* identcpu.c */ void identifycpu(struct cpu_info *); int cpu_amd64speed(int *); +void cpu_topology(struct cpu_info *); /* machdep.c */ void dumpconf(void); Index: arch/amd64/include/specialreg.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/specialreg.h,v retrieving revision 1.21 diff -d -u -p -r1.21 specialreg.h --- arch/amd64/include/specialreg.h 27 Mar 2012 05:59:46 -0000 1.21 +++ arch/amd64/include/specialreg.h 8 Jul 2012 07:46:34 -0000 @@ -187,11 +187,13 @@ #define CPUID2MODEL(cpuid) (((cpuid) >> 4) & 15) #define CPUID2STEPPING(cpuid) ((cpuid) & 15) -#define CPUID(code, eax, ebx, ecx, edx) \ +#define CPUID2(eax_code, ecx_code, eax, ebx, ecx, edx) \ __asm("cpuid" \ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) \ - : "a" (code)); + : "a" (eax_code), "c" (ecx_code)); +#define CPUID(code, eax, ebx, ecx, edx) \ + CPUID2(code, 0, eax, ebx, ecx, edx) /* * Model-specific registers for the i386 family