Module Name:    src
Committed By:   rmind
Date:           Tue May 26 01:42:02 UTC 2009

Modified Files:
        src/sys/arch/x86/include: specialreg.h
        src/sys/arch/x86/x86: cpu_topology.c

Log Message:
Add CPU topology detection support for AMD processors.
Tested on the following AMD CPUs:
- Family 15, model 65
- Family 15, model 67
- Family 15, model 75
- Family 16, model 2
- Family 17, model 3

Reviewed (slightly older version of patch) by <yamt>.


To generate a diff of this commit:
cvs rdiff -u -r1.35 -r1.36 src/sys/arch/x86/include/specialreg.h
cvs rdiff -u -r1.1 -r1.2 src/sys/arch/x86/x86/cpu_topology.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/x86/include/specialreg.h
diff -u src/sys/arch/x86/include/specialreg.h:1.35 src/sys/arch/x86/include/specialreg.h:1.36
--- src/sys/arch/x86/include/specialreg.h:1.35	Sat May 16 13:36:44 2009
+++ src/sys/arch/x86/include/specialreg.h	Tue May 26 01:42:02 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: specialreg.h,v 1.35 2009/05/16 13:36:44 pgoyette Exp $	*/
+/*	$NetBSD: specialreg.h,v 1.36 2009/05/26 01:42:02 rmind Exp $	*/
 
 /*-
  * Copyright (c) 1991 The Regents of the University of California.
@@ -424,6 +424,7 @@
 #define	MSR_NB_CFG	0xc001001f
 #define		NB_CFG_DISIOREQLOCK	0x0000000000000004ULL
 #define		NB_CFG_DISDATMSK	0x0000001000000000ULL
+#define		NB_CFG_INITAPICCPUIDLO	(1ULL << 54)
 
 #define	MSR_LS_CFG	0xc0011020
 #define		LS_CFG_DIS_LS2_SQUISH	0x02000000

Index: src/sys/arch/x86/x86/cpu_topology.c
diff -u src/sys/arch/x86/x86/cpu_topology.c:1.1 src/sys/arch/x86/x86/cpu_topology.c:1.2
--- src/sys/arch/x86/x86/cpu_topology.c:1.1	Thu Apr 30 00:07:23 2009
+++ src/sys/arch/x86/x86/cpu_topology.c	Tue May 26 01:42:02 2009
@@ -1,7 +1,8 @@
-/*	$NetBSD: cpu_topology.c,v 1.1 2009/04/30 00:07:23 rmind Exp $	*/
+/*	$NetBSD: cpu_topology.c,v 1.2 2009/05/26 01:42:02 rmind Exp $	*/
 
 /*-
- * Copyright (c)2008 YAMAMOTO Takashi,
+ * Copyright (c) 2009 Mindaugas Rasiukevicius <rmind at NetBSD org>,
+ * Copyright (c) 2008 YAMAMOTO Takashi,
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -26,8 +27,16 @@
  * SUCH DAMAGE.
  */
 
+/*
+ * x86 CPU topology detection.
+ *
+ * References:
+ * - 53668.pdf (7.10.2), 276613.pdf
+ * - 31116.pdf, 41256.pdf, 25481.pdf
+ */
+
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu_topology.c,v 1.1 2009/04/30 00:07:23 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu_topology.c,v 1.2 2009/05/26 01:42:02 rmind Exp $");
 
 #include <sys/param.h>
 #include <sys/bitops.h>
@@ -42,49 +51,123 @@
 void
 x86_cpu_toplogy(struct cpu_info *ci)
 {
-	u_int lp_max = 1;	/* logical processors per package */
-	u_int smt_max;		/* smt per core */
-	u_int core_max = 1;	/* core per package */
-	int smt_bits, core_bits;
-	uint32_t descs[4];
+	u_int lp_max;		/* Logical processors per package */
+	u_int core_max;		/* Core per package */
+	int n, cpu_family, apic_id, smt_bits, core_bits = 0;
+	uint32_t descs[4], lextmode;
+
+	apic_id = ci->ci_initapicid;
+	cpu_family = CPUID2FAMILY(ci->ci_signature);
+
+	/* Initial values. */
+	ci->ci_packageid = apic_id;
+	ci->ci_coreid = 0;
+	ci->ci_smtid = 0;
 
-	if (cpu_vendor != CPUVENDOR_INTEL ||
-	    CPUID2FAMILY(ci->ci_signature) < 6)
+	switch (cpu_vendor) {
+	case CPUVENDOR_INTEL:
+		if (cpu_family < 6)
+			return;
+		break;
+	case CPUVENDOR_AMD:
+		if (cpu_family < 0xf)
+			return;
+		break;
+	default:
 		return;
+	}
 
 	/* Determine the extended feature flags. */
 	x86_cpuid(0x80000000, descs);
-	if (descs[0] >= 0x80000001) {
+	lextmode = descs[0];
+	if (lextmode >= 0x80000001) {
 		x86_cpuid(0x80000001, descs);
-		ci->ci_feature3_flags |= descs[3]; /* %edx */
+		ci->ci_feature3_flags |= descs[3]; /* edx */
 	}
 
-	/* Determine topology. 253668.pdf 7.10.2. */
-	ci->ci_packageid = ci->ci_initapicid;
-	ci->ci_coreid = 0;
-	ci->ci_smtid = 0;
+	/* Check for HTT support.  See notes below regarding AMD. */
 	if ((ci->ci_feature_flags & CPUID_HTT) != 0) {
+		/* Maximum number of LPs sharing a cache (ebx[23:16]). */
 		x86_cpuid(1, descs);
 		lp_max = (descs[1] >> 16) & 0xff;
+	} else {
+		lp_max = 1;
 	}
-	x86_cpuid(0, descs);
-	if (descs[0] >= 4) {
-		x86_cpuid2(4, 0, descs);
-		core_max = (descs[0] >> 26) + 1;
+
+	switch (cpu_vendor) {
+	case CPUVENDOR_INTEL:
+		/* Check for leaf 4 support. */
+		x86_cpuid(0, descs);
+		if (descs[0] >= 4) {
+			/* Maximum number of Cores per package (eax[31:26]). */
+			x86_cpuid2(4, 0, descs);
+			core_max = (descs[0] >> 26) + 1;
+		} else {
+			core_max = 1;
+		}
+		break;
+	case CPUVENDOR_AMD:
+		/* In a case of AMD, HTT flag means CMP support. */
+		if ((ci->ci_feature_flags & CPUID_HTT) == 0) {
+			core_max = 1;
+			break;
+		}
+		/* Legacy Method, LPs represent Cores. */
+		if (cpu_family < 0x10 || lextmode < 0x80000008) {
+			core_max = lp_max;
+			break;
+		}
+		/* Number of Cores (NC) per package (ecx[7:0]). */
+		x86_cpuid(0x80000008, descs);
+		core_max = (descs[2] & 0xff) + 1;
+		/* Amount of bits representing Core ID (ecx[15:12]). */
+		n = (descs[2] >> 12) & 0x0f;
+		if (n != 0) {
+			/*
+			 * Extended Method.
+			 * core_bits = 2 ^ n (power of two)
+			 */
+			core_bits = 1 << n;
+		}
+		break;
+	default:
+		core_max = 1;
 	}
+
 	KASSERT(lp_max >= core_max);
-	smt_max = lp_max / core_max;
-	smt_bits = ilog2(smt_max - 1) + 1;
-	core_bits = ilog2(core_max - 1) + 1;
+	smt_bits = ilog2((lp_max / core_max) - 1) + 1;
+	if (core_bits == 0) {
+		core_bits = ilog2(core_max - 1) + 1;
+	}
+
+	/*
+	 * Family 0xf and 0x10 processors may have different structure of
+	 * APIC ID.  Detect that via special MSR register and move the bits,
+	 * if necessary (ref: InitApicIdCpuIdLo).
+	 */
+	if (cpu_vendor == CPUVENDOR_AMD && cpu_family < 0x11) {	/* XXX */
+		const uint64_t reg = rdmsr(MSR_NB_CFG);
+		if ((reg & NB_CFG_INITAPICCPUIDLO) == 0) {
+			/*
+			 * 0xf:  { CoreId, NodeId[2:0] }
+			 * 0x10: { CoreId[1:0], 000b, NodeId[2:0] }
+			 */
+			const u_int node_id = apic_id & __BITS(0, 2);
+			apic_id = (cpu_family == 0xf) ?
+			    (apic_id >> core_bits) | (node_id << core_bits) :
+			    (apic_id >> 5) | (node_id << 2);
+		}
+	}
+
 	if (smt_bits + core_bits) {
-		ci->ci_packageid = ci->ci_initapicid >> (smt_bits + core_bits);
+		ci->ci_packageid = apic_id >> (smt_bits + core_bits);
 	}
 	if (core_bits) {
 		u_int core_mask = __BITS(smt_bits, smt_bits + core_bits - 1);
-		ci->ci_coreid = __SHIFTOUT(ci->ci_initapicid, core_mask);
+		ci->ci_coreid = __SHIFTOUT(apic_id, core_mask);
 	}
 	if (smt_bits) {
 		u_int smt_mask = __BITS(0, smt_bits - 1);
-		ci->ci_smtid = __SHIFTOUT(ci->ci_initapicid, smt_mask);
+		ci->ci_smtid = __SHIFTOUT(apic_id, smt_mask);
 	}
 }

Reply via email to