The branch stable/13 has been updated by jhibbits:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=159ae92df5a5b9e9a7457d6beafc60b424607d50

commit 159ae92df5a5b9e9a7457d6beafc60b424607d50
Author:     Justin Hibbits <[email protected]>
AuthorDate: 2022-06-13 19:04:29 +0000
Commit:     Justin Hibbits <[email protected]>
CommitDate: 2022-06-22 15:46:55 +0000

    arm64: Print per-CPU cache summary
    
    Summary:
    It can be useful to see a summary of CPU caches on bootup.  This is done
    for most platforms already, so add this to arm64, in the form of (taken
    from Apple M1 pro test):
    
      L1 cache: 192KB (instruction), 128KB (data)
      L2 cache: 12288KB (unified)
    
    This is printed out per-CPU, only under bootverbose.
    
    Future refinements could instead determine if a cache level is shared
    with other cores (L2 is shared among cores on some SoCs, for instance),
    and perform a better calculation to the full true cache sizes.  For
    instance, it's known that the M1 pro, on which this test was done, has 2
    12MB L2 clusters, for a total of 24MB.  Seeing each CPU with 12288KB L2
    would make one think that there's 12MB * NCPUs, for possibly 120MB
    cache, which is incorrect.
    
    Sponsored by:   Juniper Networks, Inc.
    Reviewed by:    #arm64, andrew
    Differential Revision: https://reviews.freebsd.org/D35366
    
    (cherry picked from commit 139ba152c9c91fad9b63ccd2382a80f753f217b9)
---
 sys/arm64/arm64/identcpu.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++
 sys/arm64/include/armreg.h | 30 ++++++++++++++++
 2 files changed, 115 insertions(+)

diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
index 8cbe483dc1c6..90db3b82b582 100644
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 
 static void print_cpu_midr(struct sbuf *sb, u_int cpu);
 static void print_cpu_features(u_int cpu);
+static void print_cpu_caches(struct sbuf *sb, u_int);
 #ifdef COMPAT_FREEBSD32
 static u_long parse_cpu_features_hwcap32(void);
 #endif
@@ -103,6 +104,8 @@ static char cpu_model[64];
 SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD,
        cpu_model, sizeof(cpu_model), "Machine model");
 
+#define        MAX_CACHES      8       /* Maximum number of caches supported
+                                  architecturally. */
 /*
  * Per-CPU affinity as provided in MPIDR_EL1
  * Indexed by CPU number in logical order selected by the system.
@@ -135,6 +138,8 @@ struct cpu_desc {
        uint64_t        mvfr0;
        uint64_t        mvfr1;
 #endif
+       uint64_t        clidr;
+       uint32_t        ccsidr[MAX_CACHES][2]; /* 2 possible types. */
 };
 
 static struct cpu_desc cpu_desc[MAXCPU];
@@ -1767,6 +1772,7 @@ cpu_features_sysinit(void *dummy __unused)
        /* Fill in cpu_model for the hw.model sysctl */
        sbuf_new(&sb, cpu_model, sizeof(cpu_model), SBUF_FIXEDLEN);
        print_cpu_midr(&sb, 0);
+
        sbuf_finish(&sb);
        sbuf_delete(&sb);
 }
@@ -1940,6 +1946,62 @@ print_cpu_midr(struct sbuf *sb, u_int cpu)
            cpu_part_name, CPU_VAR(midr), CPU_REV(midr));
 }
 
+static void
+print_cpu_cache(u_int cpu, struct sbuf *sb, uint64_t ccs, bool icache,
+    bool unified)
+{
+       size_t cache_size;
+       size_t line_size;
+
+       /* LineSize is Log2(S) - 4. */
+       line_size = 1 << ((ccs & CCSIDR_LineSize_MASK) + 4);
+       /*
+        * Calculate cache size (sets * ways * line size).  There are different
+        * formats depending on the FEAT_CCIDX bit in ID_AA64MMFR2 feature
+        * register.
+        */
+       if ((cpu_desc[cpu].id_aa64mmfr2 & ID_AA64MMFR2_CCIDX_64))
+               cache_size = (CCSIDR_NSETS_64(ccs) + 1) *
+                   (CCSIDR_ASSOC_64(ccs) + 1);
+       else
+               cache_size = (CCSIDR_NSETS(ccs) + 1) * (CCSIDR_ASSOC(ccs) + 1);
+
+       cache_size *= line_size;
+       sbuf_printf(sb, "%zuKB (%s)", cache_size / 1024,
+           icache ? "instruction" : unified ? "unified" : "data");
+}
+
+static void
+print_cpu_caches(struct sbuf *sb, u_int cpu)
+{
+       /* Print out each cache combination */
+       uint64_t clidr;
+       int i = 1;
+       clidr = cpu_desc[cpu].clidr;
+
+       for (i = 0; (clidr & CLIDR_CTYPE_MASK) != 0; i++, clidr >>= 3) {
+               int j = 0;
+               int ctype_m = (clidr & CLIDR_CTYPE_MASK);
+
+               sbuf_printf(sb, " L%d cache: ", i + 1);
+               if ((clidr & CLIDR_CTYPE_IO)) {
+                       print_cpu_cache(cpu, sb, cpu_desc[cpu].ccsidr[i][j++],
+                           true, false);
+                       /* If there's more, add to the line. */
+                       if ((ctype_m & ~CLIDR_CTYPE_IO) != 0)
+                               sbuf_printf(sb, ", ");
+               }
+               if ((ctype_m & ~CLIDR_CTYPE_IO) != 0) {
+                       print_cpu_cache(cpu, sb, cpu_desc[cpu].ccsidr[i][j],
+                           false, (clidr & CLIDR_CTYPE_UNIFIED));
+               }
+               sbuf_printf(sb, "\n");
+
+       }
+       sbuf_finish(sb);
+       printf("%s", sbuf_data(sb));
+}
+
 static void
 print_cpu_features(u_int cpu)
 {
@@ -2069,6 +2131,8 @@ print_cpu_features(u_int cpu)
                print_id_register(sb, "AArch32 Media and VFP Features 1",
                     cpu_desc[cpu].mvfr1, mvfr1_fields);
 #endif
+       if (bootverbose)
+               print_cpu_caches(sb, cpu);
 
        sbuf_delete(sb);
        sb = NULL;
@@ -2118,6 +2182,8 @@ identify_cache(uint64_t ctr)
 void
 identify_cpu(u_int cpu)
 {
+       uint64_t clidr;
+
        /* Save affinity for current CPU */
        cpu_desc[cpu].mpidr = get_mpidr();
        CPU_AFFINITY(cpu) = cpu_desc[cpu].mpidr & CPU_AFF_MASK;
@@ -2132,6 +2198,25 @@ identify_cpu(u_int cpu)
        cpu_desc[cpu].id_aa64mmfr2 = READ_SPECIALREG(id_aa64mmfr2_el1);
        cpu_desc[cpu].id_aa64pfr0 = READ_SPECIALREG(id_aa64pfr0_el1);
        cpu_desc[cpu].id_aa64pfr1 = READ_SPECIALREG(id_aa64pfr1_el1);
+
+       cpu_desc[cpu].clidr = READ_SPECIALREG(clidr_el1);
+
+       clidr = cpu_desc[cpu].clidr;
+
+       for (int i = 0; (clidr & CLIDR_CTYPE_MASK) != 0; i++, clidr >>= 3) {
+               int j = 0;
+               if ((clidr & CLIDR_CTYPE_IO)) {
+                       WRITE_SPECIALREG(csselr_el1,
+                           CSSELR_Level(i) | CSSELR_InD);
+                       cpu_desc[cpu].ccsidr[i][j++] =
+                           READ_SPECIALREG(ccsidr_el1);
+               }
+               if ((clidr & ~CLIDR_CTYPE_IO) == 0)
+                       continue;
+               WRITE_SPECIALREG(csselr_el1, CSSELR_Level(i));
+               cpu_desc[cpu].ccsidr[i][j] = READ_SPECIALREG(ccsidr_el1);
+       }
+
 #ifdef COMPAT_FREEBSD32
        /* Only read aarch32 SRs if EL0-32 is available */
        if (ID_AA64PFR0_EL0_VAL(cpu_desc[cpu].id_aa64pfr0) ==
diff --git a/sys/arm64/include/armreg.h b/sys/arm64/include/armreg.h
index e16b17aa4cab..1e7982a101c5 100644
--- a/sys/arm64/include/armreg.h
+++ b/sys/arm64/include/armreg.h
@@ -69,6 +69,32 @@
 
 #define        UL(x)   UINT64_C(x)
 
+/* CCSIDR_EL1 - Cache Size ID Register */
+#define        CCSIDR_NumSets_MASK     0x0FFFE000
+#define        CCSIDR_NumSets64_MASK   0x00FFFFFF00000000
+#define        CCSIDR_NumSets_SHIFT    13
+#define        CCSIDR_NumSets64_SHIFT  32
+#define        CCSIDR_Assoc_MASK       0x00001FF8
+#define        CCSIDR_Assoc64_MASK     0x0000000000FFFFF8
+#define        CCSIDR_Assoc_SHIFT      3
+#define        CCSIDR_Assoc64_SHIFT    3
+#define        CCSIDR_LineSize_MASK    0x7
+#define        CCSIDR_NSETS(idr)                                               
\
+       (((idr) & CCSIDR_NumSets_MASK) >> CCSIDR_NumSets_SHIFT)
+#define        CCSIDR_ASSOC(idr)                                               
\
+       (((idr) & CCSIDR_Assoc_MASK) >> CCSIDR_Assoc_SHIFT)
+#define        CCSIDR_NSETS_64(idr)                                            
\
+       (((idr) & CCSIDR_NumSets64_MASK) >> CCSIDR_NumSets64_SHIFT)
+#define        CCSIDR_ASSOC_64(idr)                                            
\
+       (((idr) & CCSIDR_Assoc64_MASK) >> CCSIDR_Assoc64_SHIFT)
+
+/* CLIDR_EL1 - Cache level ID register */
+#define        CLIDR_CTYPE_MASK        0x7     /* Cache type mask bits */
+#define        CLIDR_CTYPE_IO          0x1     /* Instruction only */
+#define        CLIDR_CTYPE_DO          0x2     /* Data only */
+#define        CLIDR_CTYPE_ID          0x3     /* Split instruction and data */
+#define        CLIDR_CTYPE_UNIFIED     0x4     /* Unified */
+
 /* CNTHCTL_EL2 - Counter-timer Hypervisor Control register */
 #define        CNTHCTL_EVNTI_MASK      (0xf << 4) /* Bit to trigger event 
stream */
 #define        CNTHCTL_EVNTDIR         (1 << 3) /* Control transition trigger 
bit */
@@ -89,6 +115,10 @@
 #define         CPACR_FPEN_TRAP_NONE   (0x3 << 20) /* No traps */
 #define        CPACR_TTA               (0x1 << 28)
 
+/* CSSELR_EL1 - Cache size selection register */
+#define        CSSELR_Level(i)         (i << 1)
+#define        CSSELR_InD              0x00000001
+
 /* CTR_EL0 - Cache Type Register */
 #define        CTR_RES1                (1 << 31)
 #define        CTR_TminLine_SHIFT      32

Reply via email to