Re: Scheduler improvements, take 1001, Patch 3/5

2012-10-09 Thread Gregor Best
This patch simply imports Christiano's code for detecting CPU topology,
as posted on tech@ a while (more than two months) ago. I took it
verbatim and didn't change anything yet.

-- 
Gregor Best



Re: Scheduler improvements, take 1001, Patch 3/5

2012-10-09 Thread Gregor Best
diff --git a/arch/amd64/amd64/identcpu.c b/arch/amd64/amd64/identcpu.c
index c597bb0..982c2bb 100644
--- a/arch/amd64/amd64/identcpu.c
+++ b/arch/amd64/amd64/identcpu.c
@@ -210,6 +210,8 @@ void (*setperf_setup)(struct cpu_info *);
 
 void via_nano_setup(struct cpu_info *ci);
 
+void cpu_topology(struct cpu_info *ci);
+
 void
 via_nano_setup(struct cpu_info *ci)
 {
@@ -479,4 +481,123 @@ identifycpu(struct cpu_info *ci)
sensordev_install(&ci->ci_sensordev);
 #endif
}
+
+   cpu_topology(ci);
+}
+
+/*
+ * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
+ */
+static int
+log2(unsigned int i)
+{
+   int ret = 0;
+
+   while (i >>= 1)
+   ret++;
+
+   return (ret);
+}
+
+static int
+mask_width(u_int x)
+{
+   int bit;
+   int mask;
+   int powerof2;
+
+   powerof2 = ((x - 1) & x) == 0;
+   mask = (x << (1 - powerof2)) - 1;
+
+   /* fls */
+   if (mask == 0)
+   return (0);
+   for (bit = 1; mask != 1; bit++)
+   mask = (unsigned int)mask >> 1;
+
+   return (bit);
+}
+
+/*
+ * Build up cpu topology for given cpu, must run on the core itself.
+ */
+void
+cpu_topology(struct cpu_info *ci)
+{
+   u_int32_t eax, ebx, ecx, edx;
+   u_int32_t apicid, max_apicid, max_coreid;
+   u_int32_t smt_bits, core_bits, pkg_bits;
+   u_int32_t smt_mask, core_mask, pkg_mask;
+
+   /* We need at least apicid at CPUID 1 */
+   CPUID(0, eax, ebx, ecx, edx);
+   if (eax < 1)
+   goto no_topology;
+
+   /* Initial apicid */
+   CPUID(1, eax, ebx, ecx, edx);
+   apicid = (ebx >> 24) & 0xff;
+
+   if (strcmp(cpu_vendor, "AuthenticAMD") == 0) {
+   /* We need at least apicid at CPUID 0x8008 */
+   CPUID(0x8000, eax, ebx, ecx, edx);
+   if (eax < 0x8008)
+   goto no_topology;
+
+   CPUID(0x8008, eax, ebx, ecx, edx);
+   core_bits = (ecx >> 12) & 0xf;
+   if (core_bits == 0)
+   goto no_topology;
+   /* So coreidsize 2 gives 3, 3 gives 7... */
+   core_mask = (1 << core_bits) - 1;
+   /* Core id is the least significant considering mask */
+   ci->ci_core_id = apicid & core_mask;
+   /* Pkg id is the upper remaining bits */
+   ci->ci_pkg_id = apicid & ~core_mask;
+   ci->ci_pkg_id >>= core_bits;
+   } else if (strcmp(cpu_vendor, "GenuineIntel") == 0) {
+   /* We only support leaf 1/4 detection */
+   CPUID(0, eax, ebx, ecx, edx);
+   if (eax < 4)
+   goto no_topology;
+   /* Get max_apicid */
+   CPUID(1, eax, ebx, ecx, edx);
+   max_apicid = (ebx >> 16) & 0xff;
+   /* Get max_coreid */
+   CPUID2(4, 0, eax, ebx, ecx, edx);
+   max_coreid = ((eax >> 26) & 0x3f) + 1;
+   /* SMT */
+   smt_bits = mask_width(max_apicid / max_coreid);
+   smt_mask = (1 << smt_bits) - 1;
+   /* Core */
+   core_bits = log2(max_coreid);
+   core_mask = (1 << (core_bits + smt_bits)) - 1;
+   core_mask ^= smt_mask;
+   /* Pkg */
+   pkg_bits = core_bits + smt_bits;
+   pkg_mask = -1 << core_bits;
+
+   ci->ci_smt_id = apicid & smt_mask;
+   ci->ci_core_id = (apicid & core_mask) >> smt_bits;
+   ci->ci_pkg_id = (apicid & pkg_mask) >> pkg_bits;
+   } else
+   goto no_topology;
+#ifdef DEBUG
+   printf("cpu%d: smt %u, core %u, pkg %u "
+   "(apicid 0x%x, max_apicid 0x%x, max_coreid 0x%x, smt_bits 0x%x, 
smt_mask 0x%x, "
+   "core_bits 0x%x, core_mask 0x%x, pkg_bits 0x%x, pkg_mask 
0x%x)\n",
+   ci->ci_cpuid, ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id,
+   apicid, max_apicid, max_coreid, smt_bits, smt_mask, core_bits,
+   core_mask, pkg_bits, pkg_mask);
+#else
+   printf("cpu%d: smt %u, core %u, package %u\n", ci->ci_cpuid,
+   ci->ci_smt_id, ci->ci_core_id, ci->ci_pkg_id);
+
+#endif
+   return;
+   /* We can't map, so consider ci_core_id as ci_cpuid */
+no_topology:
+   ci->ci_smt_id  = 0;
+   ci->ci_core_id = ci->ci_cpuid;
+   ci->ci_pkg_id  = 0;
 }
diff --git a/arch/amd64/include/cpu.h b/arch/amd64/include/cpu.h
index 9ce437a..12e48d6 100644
--- a/arch/amd64/include/cpu.h
+++ b/arch/amd64/include/cpu.h
@@ -102,6 +102,9 @@ struct cpu_info {
u_int32_t   ci_cflushsz;
u_int64_t   ci_tsc_freq;
 
+   u_int32_t   ci_smt_id;
+   u_int32_t   ci_core_id;
+   u_int32_t   ci_pkg_id;
struct cpu_functions *ci_func;
void (*cpu_setup)(struct cpu_info *);
void (*ci_info)(struct cpu_info *);
diff --git a/arch/amd64/include/specialr