Re: [PATCH v3 10/20] cputlb: Introduce TLB_BSWAP
On 22.09.19 05:54, Richard Henderson wrote: > Handle bswap on ram directly in load/store_helper. This fixes a > bug with the previous implementation in that one cannot use the > I/O path for RAM. > > Fixes: a26fc6f5152b47f1 > Signed-off-by: Richard Henderson > --- > include/exec/cpu-all.h | 4 +- > accel/tcg/cputlb.c | 108 + > 2 files changed, 59 insertions(+), 53 deletions(-) > > diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h > index 2db73c7a27..1ebd1b59ab 100644 > --- a/include/exec/cpu-all.h > +++ b/include/exec/cpu-all.h > @@ -346,12 +346,14 @@ CPUArchState *cpu_copy(CPUArchState *env); > #define TLB_MMIO(1 << (TARGET_PAGE_BITS_MIN - 3)) > /* Set if TLB entry contains a watchpoint. */ > #define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) > +/* Set if TLB entry requires byte swap. */ > +#define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5)) > > /* Use this mask to check interception with an alignment mask > * in a TCG backend. > */ > #define TLB_FLAGS_MASK \ > -(TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT) > +(TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT | TLB_BSWAP) > > /** > * tlb_hit_page: return true if page aligned @addr is a hit against the > diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c > index b4a63d3928..cb603917a2 100644 > --- a/accel/tcg/cputlb.c > +++ b/accel/tcg/cputlb.c > @@ -737,8 +737,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong > vaddr, > address |= TLB_INVALID_MASK; > } > if (attrs.byte_swap) { > -/* Force the access through the I/O slow path. */ > -address |= TLB_MMIO; > +address |= TLB_BSWAP; > } > if (!memory_region_is_ram(section->mr) && > !memory_region_is_romd(section->mr)) { > @@ -901,10 +900,6 @@ static uint64_t io_readx(CPUArchState *env, > CPUIOTLBEntry *iotlbentry, > bool locked = false; > MemTxResult r; > > -if (iotlbentry->attrs.byte_swap) { > -op ^= MO_BSWAP; > -} > - > section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); > mr = section->mr; > mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; > @@ -947,10 +942,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry > *iotlbentry, > bool locked = false; > MemTxResult r; > > -if (iotlbentry->attrs.byte_swap) { > -op ^= MO_BSWAP; > -} > - > section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); > mr = section->mr; > mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; > @@ -1133,8 +1124,8 @@ void *probe_access(CPUArchState *env, target_ulong > addr, int size, > wp_access, retaddr); > } > > -if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO)) { > -/* I/O access */ > +/* Reject I/O access, or other required slow-path. */ > +if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP)) { > return NULL; > } > > @@ -1311,7 +1302,8 @@ static inline uint64_t wrap_ldul_le(const void *haddr) > static inline uint64_t QEMU_ALWAYS_INLINE > load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, > uintptr_t retaddr, MemOp op, bool code_read, > -FullLoadHelper *full_load, LoadHelper *direct) > +FullLoadHelper *full_load, LoadHelper *direct, > +LoadHelper *direct_swap) > { > uintptr_t mmu_idx = get_mmuidx(oi); > uintptr_t index = tlb_index(env, mmu_idx, addr); > @@ -1361,17 +1353,21 @@ load_helper(CPUArchState *env, target_ulong addr, > TCGMemOpIdx oi, > /* On watchpoint hit, this will longjmp out. */ > cpu_check_watchpoint(env_cpu(env), addr, size, > iotlbentry->attrs, BP_MEM_READ, retaddr); > - > -/* The backing page may or may not require I/O. */ > -tlb_addr &= ~TLB_WATCHPOINT; > -if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) { > -goto do_aligned_access; > -} > } > > /* Handle I/O access. */ > -return io_readx(env, iotlbentry, mmu_idx, addr, > -retaddr, access_type, op); > +if (likely(tlb_addr & TLB_MMIO)) { > +return io_readx(env, iotlbentry, mmu_idx, addr, > +retaddr, access_type, > +op ^ (tlb_addr & TLB_BSWAP ? MO_BSWAP : 0)); > +} > + > +haddr = (void *)((uintptr_t)addr + entry->addend); > + > +if (unlikely(tlb_addr & TLB_BSWAP)) { > +return direct_swap(haddr); > +} > +return direct(haddr); > } > > /* Handle slow unaligned access (it spans two pages or IO). */ > @@ -1398,7 +1394,6 @@ load_helper(CPUArchState *env, target_ulong addr, > TCGMemOpIdx oi, > return res & MAKE_64BIT_MASK(0, size * 8); > } > > -
[PATCH v3 10/20] cputlb: Introduce TLB_BSWAP
Handle bswap on ram directly in load/store_helper. This fixes a bug with the previous implementation in that one cannot use the I/O path for RAM. Fixes: a26fc6f5152b47f1 Signed-off-by: Richard Henderson --- include/exec/cpu-all.h | 4 +- accel/tcg/cputlb.c | 108 + 2 files changed, 59 insertions(+), 53 deletions(-) diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 2db73c7a27..1ebd1b59ab 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -346,12 +346,14 @@ CPUArchState *cpu_copy(CPUArchState *env); #define TLB_MMIO(1 << (TARGET_PAGE_BITS_MIN - 3)) /* Set if TLB entry contains a watchpoint. */ #define TLB_WATCHPOINT (1 << (TARGET_PAGE_BITS_MIN - 4)) +/* Set if TLB entry requires byte swap. */ +#define TLB_BSWAP (1 << (TARGET_PAGE_BITS_MIN - 5)) /* Use this mask to check interception with an alignment mask * in a TCG backend. */ #define TLB_FLAGS_MASK \ -(TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT) +(TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO | TLB_WATCHPOINT | TLB_BSWAP) /** * tlb_hit_page: return true if page aligned @addr is a hit against the diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c index b4a63d3928..cb603917a2 100644 --- a/accel/tcg/cputlb.c +++ b/accel/tcg/cputlb.c @@ -737,8 +737,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, address |= TLB_INVALID_MASK; } if (attrs.byte_swap) { -/* Force the access through the I/O slow path. */ -address |= TLB_MMIO; +address |= TLB_BSWAP; } if (!memory_region_is_ram(section->mr) && !memory_region_is_romd(section->mr)) { @@ -901,10 +900,6 @@ static uint64_t io_readx(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; -if (iotlbentry->attrs.byte_swap) { -op ^= MO_BSWAP; -} - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -947,10 +942,6 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry, bool locked = false; MemTxResult r; -if (iotlbentry->attrs.byte_swap) { -op ^= MO_BSWAP; -} - section = iotlb_to_section(cpu, iotlbentry->addr, iotlbentry->attrs); mr = section->mr; mr_offset = (iotlbentry->addr & TARGET_PAGE_MASK) + addr; @@ -1133,8 +1124,8 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size, wp_access, retaddr); } -if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO)) { -/* I/O access */ +/* Reject I/O access, or other required slow-path. */ +if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO | TLB_BSWAP)) { return NULL; } @@ -1311,7 +1302,8 @@ static inline uint64_t wrap_ldul_le(const void *haddr) static inline uint64_t QEMU_ALWAYS_INLINE load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr, MemOp op, bool code_read, -FullLoadHelper *full_load, LoadHelper *direct) +FullLoadHelper *full_load, LoadHelper *direct, +LoadHelper *direct_swap) { uintptr_t mmu_idx = get_mmuidx(oi); uintptr_t index = tlb_index(env, mmu_idx, addr); @@ -1361,17 +1353,21 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, /* On watchpoint hit, this will longjmp out. */ cpu_check_watchpoint(env_cpu(env), addr, size, iotlbentry->attrs, BP_MEM_READ, retaddr); - -/* The backing page may or may not require I/O. */ -tlb_addr &= ~TLB_WATCHPOINT; -if ((tlb_addr & ~TARGET_PAGE_MASK) == 0) { -goto do_aligned_access; -} } /* Handle I/O access. */ -return io_readx(env, iotlbentry, mmu_idx, addr, -retaddr, access_type, op); +if (likely(tlb_addr & TLB_MMIO)) { +return io_readx(env, iotlbentry, mmu_idx, addr, +retaddr, access_type, +op ^ (tlb_addr & TLB_BSWAP ? MO_BSWAP : 0)); +} + +haddr = (void *)((uintptr_t)addr + entry->addend); + +if (unlikely(tlb_addr & TLB_BSWAP)) { +return direct_swap(haddr); +} +return direct(haddr); } /* Handle slow unaligned access (it spans two pages or IO). */ @@ -1398,7 +1394,6 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, return res & MAKE_64BIT_MASK(0, size * 8); } - do_aligned_access: haddr = (void *)((uintptr_t)addr + entry->addend); return direct(haddr); } @@ -1417,7 +1412,7 @@ static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr) { return load_helper(env, addr, oi,