The diff below puts the page tables on armv7 in cachable memory. This
should speed things up a little bit, especially on processors that
walk the page tables coherently, which is pretty much all of them
except for Cortex-A8. We check whether the page table walk is
coherent by checking the aprpropriate field in the ID_MMFR3 register.
If the processor walks the page tables coherently, we disable flushing
of page table entries.
This is a necessary step for MULTIPROCESSOR support, and it should
reduce the number of times we move pages from cachable to uncachable,
a transition I'm not sure we handle correctly.
I'd appreciate it if somebody could test this on a system with a
Cortex-A8 processor.
Index: arch/arm/arm/cpufunc_asm_armv7.S
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/cpufunc_asm_armv7.S,v
retrieving revision 1.12
diff -u -p -r1.12 cpufunc_asm_armv7.S
--- arch/arm/arm/cpufunc_asm_armv7.S 5 Aug 2016 19:56:52 -0000 1.12
+++ arch/arm/arm/cpufunc_asm_armv7.S 6 Aug 2016 09:51:54 -0000
@@ -35,6 +35,17 @@ ENTRY(armv7_periphbase)
mrc CP15_CBAR(r0)
mov pc, lr
+#define TTBR_RGN_NC (0 << 3)
+#define TTBR_RGN_WBWA (1 << 3)
+#define TTBR_RGN_WT (1 << 3)
+#define TTBR_RGN_WBNWA (3 << 3)
+#define TTBR_IRGN_NC ((0 << 0) | (0 << 6))
+#define TTBR_IRGN_WBWA ((0 << 0) | (1 << 6))
+#define TTBR_IRGN_WT ((1 << 0) | (0 << 6))
+#define TTBR_IRGN_WBNWA ((1 << 0) | (1 << 6))
+
+#define TTBR_DEFAULT (TTBR_IRGN_WBNWA | TTBR_RGN_WBNWA)
+
/*
* Functions to set the MMU Translation Table Base register
*/
@@ -44,6 +55,7 @@ ENTRY(armv7_setttb)
dsb sy
isb sy
+ orr r0, r0, #TTBR_DEFAULT
mcr CP15_TTBR0(r0) /* load new TTB */
mcr CP15_TLBIALL(r0) /* invalidate unified TLB */
dsb sy
@@ -232,6 +244,7 @@ ENTRY(armv7_context_switch)
dsb sy
isb sy
+ orr r0, r0, #TTBR_DEFAULT
mcr CP15_TTBR0(r0) /* set the new TTB */
mcr CP15_TLBIALL(r0) /* and flush the unified tlb */
dsb sy
Index: arch/arm/arm/pmap7.c
===================================================================
RCS file: /cvs/src/sys/arch/arm/arm/pmap7.c,v
retrieving revision 1.32
diff -u -p -r1.32 pmap7.c
--- arch/arm/arm/pmap7.c 3 Aug 2016 11:52:43 -0000 1.32
+++ arch/arm/arm/pmap7.c 6 Aug 2016 09:51:55 -0000
@@ -3291,7 +3291,7 @@ pmap_pte_init_generic(void)
void
pmap_pte_init_armv7(void)
{
- uint32_t cachereg;
+ uint32_t id_mmfr3;
/*
* XXX
@@ -3305,9 +3305,10 @@ pmap_pte_init_armv7(void)
pte_l2_l_cache_mode = L2_C|L2_B;
pte_l2_s_cache_mode = L2_C|L2_B;
- pte_l1_s_cache_mode_pt = L1_S_C;
- pte_l2_l_cache_mode_pt = L2_C;
- pte_l2_s_cache_mode_pt = L2_C;
+ pte_l1_s_cache_mode_pt = L1_S_B|L1_S_C;
+ pte_l2_l_cache_mode_pt = L2_B|L2_C;
+ pte_l2_s_cache_mode_pt = L2_B|L2_C;
+ pmap_needs_pte_sync = 1;
pte_l1_s_cache_mask = L1_S_CACHE_MASK_v7;
pte_l2_l_cache_mask = L2_L_CACHE_MASK_v7;
@@ -3339,26 +3340,10 @@ pmap_pte_init_armv7(void)
pte_l1_c_proto = L1_C_PROTO_v7;
pte_l2_s_proto = L2_S_PROTO_v7;
- /* probe L1 dcache */
- __asm volatile("mcr p15, 2, %0, c0, c0, 0" :: "r" (0) );
- __asm volatile("mrc p15, 1, %0, c0, c0, 0" : "=r" (cachereg) );
- if ((cachereg & 0x80000000) == 0) {
-#if 0
- /*
- * pmap_pte_init_generic() has defaulted to write-through
- * settings for pte pages, but the cache does not support
- * write-through.
- */
- pmap_needs_pte_sync = 1;
- pte_l1_s_cache_mode_pt = L1_S_B|L1_S_C;
- pte_l2_l_cache_mode_pt = L2_B|L2_C;
- pte_l2_s_cache_mode_pt = L2_B|L2_C;
-#endif
- /* XXX: Don't cache PTEs, until write-back is fixed. */
- pte_l1_s_cache_mode_pt = L1_S_V7_TEX(1);
- pte_l2_l_cache_mode_pt = L2_V7_L_TEX(1);
- pte_l2_s_cache_mode_pt = L2_V7_S_TEX(1);
- }
+ /* Check for coherent walk. */
+ __asm volatile("mrc p15, 0, %0, c0, c1, 7" : "=r"(id_mmfr3));
+ if ((id_mmfr3 & 0x00f00000) == 0x00100000)
+ pmap_needs_pte_sync = 0;
}
uint32_t pmap_alias_dist;