> Me again :-) > > I have completed and tested my workaround for the dcbx instructions. The > workaround > handles ALL dcbx instructions, ANY register combination and works both on > kernel space and user space addresses. > > I also did some benchmarking using copy_page(dcbz enabled) and memcpy to > memory allocated with kmalloc and/or vmalloc. copy_page is about 30% faster > than memcpy even with the workaround applied.
Here I go again :) I have been running this patch on some 20-30 custom MPC860/862 boards in our test lab since I posted this message and it is stable. I made some changes since then: - Made it configurable, #define CONFIG_8xx_DCBxFIXED to enable it. - Tagging in the fast path in the DLBMiss handler is just one(1) instruction. - Test and branch if TAG present is two instructions in the DTLB Error handler. - Enabled the use of the dcbz instruction in copy_tofrom_user(), cacheable_memzero(), cacheable_memcpy(), clear_page() and copy_page() Feedback most welcome! Patch against linuxppc_2_4_devel follows. Jocke --- a/arch/ppc/kernel/head_8xx.S Tue Apr 29 00:45:35 2003 +++ b/arch/ppc/kernel/head_8xx.S Fri May 9 14:16:44 2003 @@ -31,6 +31,27 @@ #include <asm/ppc_asm.h> #include "ppc_defs.h" +#ifdef CONFIG_8xx_DCBxFIXED +/* These macros are used to tag DAR with a known value so that the + * DataTLBError can recognize a buggy dcbx instruction and workaround + * the problem. + */ + #define TAG_VAL 0x00f0 + #define TAG_DAR_R20 \ + li r20, TAG_VAL;\ + mtspr DAR, r20; +#else + #define TAG_DAR_R20 +#endif +/* Macro to make the code more readable. */ +#ifdef CONFIG_8xx_CPU6 + #define DO_8xx_CPU6(val, reg) \ + li reg, val; \ + stw reg, 12(r0); \ + lwz reg, 12(r0); +#else + #define DO_8xx_CPU6(val, reg) +#endif .text .globl _stext _stext: @@ -166,6 +187,7 @@ . = n; \ label: \ EXCEPTION_PROLOG; \ + TAG_DAR_R20; \ addi r3,r1,STACK_FRAME_OVERHEAD; \ li r20,MSR_KERNEL; \ FINISH_EXCEPTION(hdlr) @@ -188,6 +210,7 @@ mr r5,r20 mfspr r4,DAR stw r4,_DAR(r21) + TAG_DAR_R20 addi r3,r1,STACK_FRAME_OVERHEAD li r20,MSR_KERNEL rlwimi r20,r23,0,16,16 /* copy EE bit from saved MSR */ @@ -226,6 +249,7 @@ EXCEPTION_PROLOG mfspr r4,DAR stw r4,_DAR(r21) + TAG_DAR_R20 mfspr r5,DSISR stw r5,_DSISR(r21) addi r3,r1,STACK_FRAME_OVERHEAD @@ -457,6 +481,13 @@ #endif mtspr MD_RPN, r20 /* Update TLB entry */ +#ifdef CONFIG_8xx_DCBxFIXED +#if TAG_VAL == 0x00f0 /* Save 1 instr. by reusing the val loaded in r21 above */ + mtspr DAR, r21 +#else + TAG_DAR_R20 +#endif +#endif mfspr r20, M_TW /* Restore registers */ lwz r21, 0(r0) mtcr r21 @@ -466,7 +497,17 @@ #endif rfi -2: mfspr r20, M_TW /* Restore registers */ +2: +#ifdef CONFIG_8xx_DCBxFIXED + /* Copy 20 msb from MD_EPN to DAR since the dcxx instructions fails + * to update DAR when they cause a DTLB Miss. + */ + mfspr r21, MD_EPN + mfspr r20, DAR + rlwimi r20, r21, 0, 0, 19 + mtspr DAR, r20 +#endif + mfspr r20, M_TW /* Restore registers */ lwz r21, 0(r0) mtcr r21 lwz r21, 4(r0) @@ -504,10 +545,19 @@ stw r20, 0(r0) stw r21, 4(r0) + mfspr r20, DAR +#ifdef CONFIG_8xx_DCBxFIXED + /* If DAR contains TAG_VAL implies a buggy dcbx instruction + * that did not set DAR. + */ + cmplwi cr0, r20, TAG_VAL + beq- 100f /* Branch if TAG_VAL to dcbx workaround procedure */ +101: /* return from dcbx instruction bug workaround, r20 holds value of DAR */ /* First, make sure this was a store operation. */ - mfspr r20, DSISR - andis. r21, r20, 0x0200 /* If set, indicates store op */ +#endif + mfspr r21, DSISR + andis. r21, r21, 0x0200 /* If set, indicates store op */ beq 2f /* The EA of a data TLB miss is automatically stored in the MD_EPN @@ -526,7 +576,7 @@ * are initialized in mapin_ram(). This will avoid the problem, * assuming we only use the dcbi instruction on kernel addresses. */ - mfspr r20, DAR + /* DAR is in r20 already */ rlwinm r21, r20, 0, 0, 19 ori r21, r21, MD_EVALID mfspr r20, M_CASID @@ -591,6 +641,13 @@ #endif mtspr MD_RPN, r20 /* Update TLB entry */ +#ifdef CONFIG_8xx_DCBxFIXED +#if TAG_VAL == 0x00f0 /* Save 1 instr. by reusing the val loaded in r21 above */ + mtspr DAR, r21 +#else + TAG_DAR_R20 +#endif +#endif mfspr r20, M_TW /* Restore registers */ lwz r21, 0(r0) mtcr r21 @@ -628,6 +685,149 @@ . = 0x2000 +#ifdef CONFIG_8xx_DCBxFIXED +/* This is the workaround procedure to calculate the data EA for a buggy dcbx instruction + * by decoding the registers used by the dcbx instruction and adding them. + * DAR is set to the calculated address and r20 also holds the EA on exit. + */ +139: /* fetch instruction from userspace memory */ + DO_8xx_CPU6(0x3780, r3) + mtspr MD_EPN, r20 + mfspr r21, M_TWB /* Get level 1 table entry address */ + lwz r21, 0(r21) /* Get the level 1 entry */ + tophys (r21, r21) + DO_8xx_CPU6(0x3b80, r3) + mtspr MD_TWC, r21 /* Load pte table base address */ + mfspr r21, MD_TWC /* ....and get the pte address */ + lwz r21, 0(r21) /* Get the pte */ + /* concat physical page address(r21) and page offset(r20) */ + rlwimi r21, r20, 0, 20, 31 + b 140f +100: /* Entry point for dcbx workaround. */ + /* fetch instruction from memory. */ + mfspr r20,SRR0 + andis. r21, r20, 0x8000 + beq- 139b /* Branch if user space address */ + tophys (r21, r20) +140: lwz r21,0(r21) + +/* Check if it really is a dcbx instruction */ + rlwinm r20, r21, 0, 21, 30 + cmpwi cr0, r20, 2028 /* Is dcbz? */ + beq+ 142f + cmpwi cr0, r20, 940 /* Is dcbi? */ + beq+ 142f + cmpwi cr0, r20, 556 /* Is dcbt? */ + beq+ 142f + cmpwi cr0, r20, 172 /* Is dcbf? */ + beq+ 142f +#ifdef DEBUG_DCBX_INSTRUCTIONS + cmpwi cr0, r20, 108 /* Is dcbst? Should never cause a DTLB Miss/Error */ + beq+ 142f + cmpwi cr0, r20, 492 /* Is dcbtst? Should never cause a DTLB Miss/Error */ + beq+ 142f + +141: b 141b /* Stop here if no dcbx instruction */ +#endif + mfspr r20, DAR /* r20 must hold DAR at exit */ + b 101b /* None of the above, go back to normal TLB processing */ +142: /* continue, it was a dcbx instruction. */ + +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) /* restore r3 from memory */ +#endif + mfctr r20 + mtdar r20 /* save ctr reg in DAR */ + rlwinm r20, r21, 24, 24, 28 /* offset into jump table for reg RB */ + addi r20, r20, 150f at l /* add start of table */ + mtctr r20 /* load ctr with jump address */ + xor r20, r20, r20 /* sum starts at zero */ + bctr /* jump into table */ +150: + add r20, r20, r0 + b 151f + add r20, r20, r1 + b 151f + add r20, r20, r2 + b 151f + add r20, r20, r3 + b 151f + add r20, r20, r4 + b 151f + add r20, r20, r5 + b 151f + add r20, r20, r6 + b 151f + add r20, r20, r7 + b 151f + add r20, r20, r8 + b 151f + add r20, r20, r9 + b 151f + add r20, r20, r10 + b 151f + add r20, r20, r11 + b 151f + add r20, r20, r12 + b 151f + add r20, r20, r13 + b 151f + add r20, r20, r14 + b 151f + add r20, r20, r15 + b 151f + add r20, r20, r16 + b 151f + add r20, r20, r17 + b 151f + add r20, r20, r18 + b 151f + add r20, r20, r19 + b 151f + mtctr r21 /* reg 20 needs special handling */ + b 154f + mtctr r21 /* reg 21 needs special handling */ + b 153f + add r20, r20, r22 + b 151f + add r20, r20, r23 + b 151f + add r20, r20, r24 + b 151f + add r20, r20, r25 + b 151f + add r20, r20, r25 + b 151f + add r20, r20, r27 + b 151f + add r20, r20, r28 + b 151f + add r20, r20, r29 + b 151f + add r20, r20, r30 + b 151f + add r20, r20, r31 +151: + rlwinm. r21,r21,19,24,28 /* offset into jump table for reg RA */ + beq 152f /* if reg RA is zero, don't add it */ + addi r21, r21, 150b at l /* add start of table */ + mtctr r21 /* load ctr with jump address */ + rlwinm r21,r21,0,16,10 /* make sure we don't execute this more than once */ + bctr /* jump into table */ +152: + mfdar r21 + mtctr r21 /* restore ctr reg from DAR */ + mtdar r20 /* save fault EA to DAR */ + b 101b /* Go back to normal TLB handling */ + + /* special handling for r20,r21 since these are modified already */ +153: lwz r21, 4(r0) /* load r21 from memory */ + b 155f +154: mfspr r21, M_TW /* load r20 from M_TW */ +155: add r20, r20, r21 /* add it */ + mfctr r21 /* restore r21 */ + b 151b +#endif /* * This code finishes saving the registers to the exception frame * and jumps to the appropriate handler for the exception, turning --- a/arch/ppc/lib/string.S Tue Apr 29 00:45:35 2003 +++ b/arch/ppc/lib/string.S Fri May 9 14:17:07 2003 @@ -151,7 +151,7 @@ bdnz 4b 3: mtctr r9 li r7,4 -#if !defined(CONFIG_8xx) +#if !defined(CONFIG_8xx) || defined(CONFIG_8xx_DCBxFIXED) 10: dcbz r7,r6 #else 10: stw r4, 4(r6) @@ -253,7 +253,7 @@ mtctr r0 beq 63f 53: -#if !defined(CONFIG_8xx) +#if !defined(CONFIG_8xx) || defined(CONFIG_8xx_DCBxFIXED) dcbz r11,r6 #endif COPY_16_BYTES @@ -452,6 +452,8 @@ 53: #if !defined(CONFIG_8xx) dcbt r3,r4 +#endif +#if !defined(CONFIG_8xx) || defined(CONFIG_8xx_DCBxFIXED) 54: dcbz r11,r6 #endif /* had to move these to keep extable in order */ @@ -461,7 +463,7 @@ .long 71b,101f .long 72b,102f .long 73b,103f -#if !defined(CONFIG_8xx) +#if !defined(CONFIG_8xx) || defined(CONFIG_8xx_DCBxFIXED) .long 54b,105f #endif .text --- a/arch/ppc/kernel/misc.S Tue Apr 29 00:45:35 2003 +++ b/arch/ppc/kernel/misc.S Fri May 9 14:16:23 2003 @@ -657,7 +657,7 @@ _GLOBAL(clear_page) li r0,4096/L1_CACHE_LINE_SIZE mtctr r0 -#ifdef CONFIG_8xx +#if defined(CONFIG_8xx) && !defined(CONFIG_8xx_DCBxFIXED) li r4, 0 1: stw r4, 0(r3) stw r4, 4(r3) @@ -710,6 +710,8 @@ 1: #ifndef CONFIG_8xx dcbt r11,r4 +#endif +#if !defined(CONFIG_8xx) || defined(CONFIG_8xx_DCBxFIXED) dcbz r5,r3 #endif COPY_16_BYTES ** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/