On Mon, Aug 05, 2013 at 08:07:22AM -1000, Richard Henderson wrote: > Avoid a loop in the tlb_fill path; the fill will either succeed or > generate an exception. > > Inline the slow_ld/st function; it was a complete copy of the main > helper except for the actual cross-page unaligned code, and the > compiler was inlining it anyway. > > Add unlikely markers optimizing for the most common case of simple > tlb miss. > > Make sure the compiler can optimize away the unaligned paths for a > 1 byte access. > > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > include/exec/softmmu_template.h | 287 > +++++++++++++++------------------------- > 1 file changed, 104 insertions(+), 183 deletions(-) > > diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h > index 7d8bcb5..03e5155 100644 > --- a/include/exec/softmmu_template.h > +++ b/include/exec/softmmu_template.h > @@ -54,10 +54,6 @@ > #define ADDR_READ addr_read > #endif > > -static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, > - target_ulong addr, > - int mmu_idx, > - uintptr_t retaddr); > static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env, > hwaddr physaddr, > target_ulong addr, > @@ -86,52 +82,67 @@ glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(CPUArchState > *env, > target_ulong addr, int mmu_idx, > uintptr_t retaddr) > { > - DATA_TYPE res; > - int index; > - target_ulong tlb_addr; > - hwaddr ioaddr; > + int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); > + target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; > + uintptr_t haddr; > > - /* test if there is match for unaligned or IO access */ > - /* XXX: could done more in memory macro in a non portable way */ > - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); > - redo: > - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; > - if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | > TLB_INVALID_MASK))) { > - if (tlb_addr & ~TARGET_PAGE_MASK) { > - /* IO access */ > - if ((addr & (DATA_SIZE - 1)) != 0) > - goto do_unaligned_access; > - ioaddr = env->iotlb[mmu_idx][index]; > - res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr); > - } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= > TARGET_PAGE_SIZE) { > - /* slow unaligned access (it spans two pages or IO) */ > - do_unaligned_access: > + /* If the TLB entry is for a different page, reload and try again. */ > + if ((addr & TARGET_PAGE_MASK)
Checkpatch complains about a whitespace at the end. > + != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { > #ifdef ALIGNED_ONLY > + if ((addr & (DATA_SIZE - 1)) != 0) { > do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, > retaddr); > + } > #endif > - res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr, > - mmu_idx, retaddr); > - } else { > - /* unaligned/aligned access in the same page */ > - uintptr_t addend; > -#ifdef ALIGNED_ONLY > - if ((addr & (DATA_SIZE - 1)) != 0) { > - do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, > retaddr); > - } > -#endif > - addend = env->tlb_table[mmu_idx][index].addend; > - res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(intptr_t) > - (addr + addend)); > + tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr); > + tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; > + } > + > + /* Handle an IO access. */ > + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { > + hwaddr ioaddr; > + if ((addr & (DATA_SIZE - 1)) != 0) { > + goto do_unaligned_access; > } > - } else { > + ioaddr = env->iotlb[mmu_idx][index]; > + return glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr); > + } > + > + /* Handle slow unaligned access (it spans two pages or IO). */ > + if (DATA_SIZE > 1 > + && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 > + >= TARGET_PAGE_SIZE)) { > + target_ulong addr1, addr2; > + DATA_TYPE res1, res2, res; > + unsigned shift; > + do_unaligned_access: > #ifdef ALIGNED_ONLY > - if ((addr & (DATA_SIZE - 1)) != 0) > - do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, > retaddr); > + do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr); > #endif > - tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr); > - goto redo; > + addr1 = addr & ~(DATA_SIZE - 1); > + addr2 = addr1 + DATA_SIZE; > + res1 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr1, > + mmu_idx, > retaddr); > + res2 = glue(glue(helper_ret_ld, SUFFIX), MMUSUFFIX)(env, addr2, > + mmu_idx, > retaddr); > + shift = (addr & (DATA_SIZE - 1)) * 8; > +#ifdef TARGET_WORDS_BIGENDIAN > + res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift)); > +#else > + res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift)); > +#endif > + return res; > + } > + > + /* Handle aligned access or unaligned access in the same page. */ > +#ifdef ALIGNED_ONLY > + if ((addr & (DATA_SIZE - 1)) != 0) { > + do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr); > } > - return res; > +#endif > + > + haddr = addr + env->tlb_table[mmu_idx][index].addend; > + return glue(glue(ld, USUFFIX), _raw)((uint8_t *)haddr); > } > > DATA_TYPE > @@ -142,66 +153,8 @@ glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState > *env, target_ulong addr, > GETPC_EXT()); > } > > -/* handle all unaligned cases */ > -static DATA_TYPE > -glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, > - target_ulong addr, > - int mmu_idx, > - uintptr_t retaddr) > -{ > - DATA_TYPE res, res1, res2; > - int index, shift; > - hwaddr ioaddr; > - target_ulong tlb_addr, addr1, addr2; > - > - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); > - redo: > - tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; > - if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | > TLB_INVALID_MASK))) { > - if (tlb_addr & ~TARGET_PAGE_MASK) { > - /* IO access */ > - if ((addr & (DATA_SIZE - 1)) != 0) > - goto do_unaligned_access; > - ioaddr = env->iotlb[mmu_idx][index]; > - res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr); > - } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= > TARGET_PAGE_SIZE) { > - do_unaligned_access: > - /* slow unaligned access (it spans two pages) */ > - addr1 = addr & ~(DATA_SIZE - 1); > - addr2 = addr1 + DATA_SIZE; > - res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr1, > - mmu_idx, retaddr); > - res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr2, > - mmu_idx, retaddr); > - shift = (addr & (DATA_SIZE - 1)) * 8; > -#ifdef TARGET_WORDS_BIGENDIAN > - res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift)); > -#else > - res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift)); > -#endif > - res = (DATA_TYPE)res; > - } else { > - /* unaligned/aligned access in the same page */ > - uintptr_t addend = env->tlb_table[mmu_idx][index].addend; > - res = glue(glue(ld, USUFFIX), _raw)((uint8_t *)(intptr_t) > - (addr + addend)); > - } > - } else { > - /* the page is not in the TLB : fill it */ > - tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr); > - goto redo; > - } > - return res; > -} > - > #ifndef SOFTMMU_CODE_ACCESS > > -static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, > - target_ulong addr, > - DATA_TYPE val, > - int mmu_idx, > - uintptr_t retaddr); > - > static inline void glue(io_write, SUFFIX)(CPUArchState *env, > hwaddr physaddr, > DATA_TYPE val, > @@ -225,48 +178,66 @@ glue(glue(helper_ret_st, SUFFIX), > MMUSUFFIX)(CPUArchState *env, > target_ulong addr, DATA_TYPE > val, > int mmu_idx, uintptr_t retaddr) > { > - hwaddr ioaddr; > - target_ulong tlb_addr; > - int index; > + int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); > + target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; > + uintptr_t haddr; > > - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); > - redo: > - tlb_addr = env->tlb_table[mmu_idx][index].addr_write; > - if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | > TLB_INVALID_MASK))) { > - if (tlb_addr & ~TARGET_PAGE_MASK) { > - /* IO access */ > - if ((addr & (DATA_SIZE - 1)) != 0) > - goto do_unaligned_access; > - ioaddr = env->iotlb[mmu_idx][index]; > - glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr); > - } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= > TARGET_PAGE_SIZE) { > - do_unaligned_access: > + /* If the TLB entry is for a different page, reload and try again. */ > + if ((addr & TARGET_PAGE_MASK) > + != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { > #ifdef ALIGNED_ONLY > + if ((addr & (DATA_SIZE - 1)) != 0) { > do_unaligned_access(env, addr, 1, mmu_idx, retaddr); > + } > #endif > - glue(glue(slow_st, SUFFIX), MMUSUFFIX)(env, addr, val, > - mmu_idx, retaddr); > - } else { > - /* aligned/unaligned access in the same page */ > - uintptr_t addend; > + tlb_fill(env, addr, 1, mmu_idx, retaddr); > + tlb_addr = env->tlb_table[mmu_idx][index].addr_write; > + } > + > + /* Handle an IO access. */ > + if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { > + hwaddr ioaddr; > + if ((addr & (DATA_SIZE - 1)) != 0) { > + goto do_unaligned_access; > + } > + ioaddr = env->iotlb[mmu_idx][index]; > + glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr); > + return; > + } > + > + /* Handle slow unaligned access (it spans two pages or IO). */ > + if (DATA_SIZE > 1 > + && unlikely ((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 > + >= TARGET_PAGE_SIZE)) { > + int i; > + do_unaligned_access: > #ifdef ALIGNED_ONLY > - if ((addr & (DATA_SIZE - 1)) != 0) { > - do_unaligned_access(env, addr, 1, mmu_idx, retaddr); > - } > + do_unaligned_access(env, addr, 1, mmu_idx, retaddr); > +#endif > + /* XXX: not efficient, but simple */ > + /* Note: relies on the fact that tlb_fill() does not remove the > + * previous page from the TLB cache. */ > + for (i = DATA_SIZE - 1; i >= 0; i--) { > +#ifdef TARGET_WORDS_BIGENDIAN > + uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8)); > +#else > + uint8_t val8 = val >> (i * 8); > #endif > - addend = env->tlb_table[mmu_idx][index].addend; > - glue(glue(st, SUFFIX), _raw)((uint8_t *)(intptr_t) > - (addr + addend), val); > + glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, > + mmu_idx, retaddr); > } > - } else { > - /* the page is not in the TLB : fill it */ > + return; > + } > + > + /* Handle aligned access or unaligned access in the same page. */ > #ifdef ALIGNED_ONLY > - if ((addr & (DATA_SIZE - 1)) != 0) > - do_unaligned_access(env, addr, 1, mmu_idx, retaddr); > -#endif > - tlb_fill(env, addr, 1, mmu_idx, retaddr); > - goto redo; > + if ((addr & (DATA_SIZE - 1)) != 0) { > + do_unaligned_access(env, addr, 1, mmu_idx, retaddr); > } > +#endif > + > + haddr = addr + env->tlb_table[mmu_idx][index].addend; > + glue(glue(st, SUFFIX), _raw)((uint8_t *)haddr, val); > } > > void > @@ -277,56 +248,6 @@ glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState > *env, target_ulong addr, > GETPC_EXT()); > } > > -/* handles all unaligned cases */ > -static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env, > - target_ulong addr, > - DATA_TYPE val, > - int mmu_idx, > - uintptr_t retaddr) > -{ > - hwaddr ioaddr; > - target_ulong tlb_addr; > - int index, i; > - > - index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); > - redo: > - tlb_addr = env->tlb_table[mmu_idx][index].addr_write; > - if ((addr & TARGET_PAGE_MASK) == (tlb_addr & (TARGET_PAGE_MASK | > TLB_INVALID_MASK))) { > - if (tlb_addr & ~TARGET_PAGE_MASK) { > - /* IO access */ > - if ((addr & (DATA_SIZE - 1)) != 0) > - goto do_unaligned_access; > - ioaddr = env->iotlb[mmu_idx][index]; > - glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr); > - } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= > TARGET_PAGE_SIZE) { > - do_unaligned_access: > - /* XXX: not efficient, but simple */ > - /* Note: relies on the fact that tlb_fill() does not remove the > - * previous page from the TLB cache. */ > - for(i = DATA_SIZE - 1; i >= 0; i--) { > -#ifdef TARGET_WORDS_BIGENDIAN > - glue(slow_stb, MMUSUFFIX)(env, addr + i, > - val >> (((DATA_SIZE - 1) * 8) - (i > * 8)), > - mmu_idx, retaddr); > -#else > - glue(slow_stb, MMUSUFFIX)(env, addr + i, > - val >> (i * 8), > - mmu_idx, retaddr); > -#endif > - } > - } else { > - /* aligned/unaligned access in the same page */ > - uintptr_t addend = env->tlb_table[mmu_idx][index].addend; > - glue(glue(st, SUFFIX), _raw)((uint8_t *)(intptr_t) > - (addr + addend), val); > - } > - } else { > - /* the page is not in the TLB : fill it */ > - tlb_fill(env, addr, 1, mmu_idx, retaddr); > - goto redo; > - } > -} > - > #endif /* !defined(SOFTMMU_CODE_ACCESS) */ > > #undef READ_ACCESS_TYPE Beside the minor nitpick above: Reviewed-by: Aurelien Jarno <aurel...@aurel32.net> -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurel...@aurel32.net http://www.aurel32.net