From: Liu Ping Fan <pingf...@linux.vnet.ibm.com> Using seqlock to load dispatch context atomic. The dispatch context consist of: cur_map_nodes, cur_sections, cur_roots.
Also during the dispatch, we should get the terminal, and dup MemoryRegionSection. So after rcu unlock, the cur dispatch context can be dropped safely. Signed-off-by: Liu Ping Fan <pingf...@linux.vnet.ibm.com> -------- The tcg code related to tlb_set_page() should be fixed too. But it is a little complicated, separating it for easing review. --- exec.c | 239 ++++++++++++++++++++++++++++++++++++++++----------------------- 1 files changed, 152 insertions(+), 87 deletions(-) diff --git a/exec.c b/exec.c index 315960d..9a5c67f 100644 --- a/exec.c +++ b/exec.c @@ -112,6 +112,8 @@ static Node *next_map_nodes; static PhysPageEntry *next_roots; static AllocInfo *next_alloc_info; +QemuSeqLock dispatch_table_lock; + #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1) static void io_mem_init(void); @@ -199,31 +201,24 @@ static void phys_page_set(AddressSpaceDispatch *d, P_L2_LEVELS - 1); } -static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, hwaddr index, bool cur) +static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, + hwaddr index, + Node *map_base, + MemoryRegionSection *sections_base, + PhysPageEntry *root_base) { - PhysPageEntry lp; + PhysPageEntry lp = root_base[d->idx]; PhysPageEntry *p; int i; - Node *map_nodes; - MemoryRegionSection *sections; - if (likely(cur)) { - map_nodes = cur_map_nodes; - sections = cur_phys_sections; - lp = cur_roots[d->idx]; - } else { - map_nodes = next_map_nodes; - sections = next_phys_sections; - lp = next_roots[d->idx]; - } for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) { if (lp.ptr == PHYS_MAP_NODE_NIL) { - return §ions[phys_section_unassigned]; + return §ions_base[phys_section_unassigned]; } - p = map_nodes[lp.ptr]; + p = map_base[lp.ptr]; lp = p[(index >> (i * L2_BITS)) & (L2_SIZE - 1)]; } - return §ions[lp.ptr]; + return §ions_base[lp.ptr]; } bool memory_region_is_unassigned(MemoryRegion *mr) @@ -233,21 +228,28 @@ bool memory_region_is_unassigned(MemoryRegion *mr) } static MemoryRegionSection *address_space_lookup_region(AddressSpace *as, - hwaddr addr) + hwaddr addr, Node *map_base, MemoryRegionSection *sections_base, + PhysPageEntry *root_base) + { - return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS, true); + return phys_page_find(as->dispatch, addr >> TARGET_PAGE_BITS, + map_base, sections_base, root_base); } MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr, hwaddr *xlat, hwaddr *plen, - bool is_write) + bool is_write, + Node *map_base, + MemoryRegionSection *sections_base, + PhysPageEntry *root_base) { IOMMUTLBEntry iotlb; - MemoryRegionSection *section, *base = cur_phys_sections; + MemoryRegionSection *section; hwaddr len = *plen; for (;;) { - section = address_space_lookup_region(as, addr); + section = address_space_lookup_region(as, addr, map_base, + sections_base, root_base); /* Compute offset within MemoryRegionSection */ addr -= section->offset_within_address_space; @@ -265,7 +267,7 @@ MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr, | (addr & iotlb.addr_mask)); len = MIN(len, (addr | iotlb.addr_mask) - addr + 1); if (!(iotlb.perm & (1 << is_write))) { - section = &base[phys_section_unassigned]; + section = §ions_base[phys_section_unassigned]; break; } @@ -276,6 +278,42 @@ MemoryRegionSection *address_space_translate(AddressSpace *as, hwaddr addr, *xlat = addr; return section; } + +/* caller should hold rcu read lock */ +MemoryRegionSection address_space_get_terminal(AddressSpace *as, hwaddr addr, + hwaddr *xlat, hwaddr *plen, + bool is_write) +{ + MemoryRegionSection *mrs, *ret, *sections_base; + Node *map_base; + subpage_t *mmio; + unsigned int idx, start; + PhysPageEntry *root_base; + + /* Walk through all of the AddressSpace in consistent */ + do { + start = seqlock_read_begin(&dispatch_table_lock); + map_base = cur_map_nodes; + sections_base = cur_phys_sections; + /* get all AddressSpaceDispatch root */ + root_base = cur_roots; + } while (seqlock_read_check(&dispatch_table_lock, start); + + mrs = address_space_translate(as, addr, xlat, plen, is_write, + map_base, sections_base, root_base); + if (!mrs->mr->terminates) { + mmio = container_of(mrs->mr, subpage_t, mmio); + idx = SUBPAGE_IDX(addr); + ret = §ions_base[mmio->sub_section[idx]]; + *xlat += mmio->base; + *xlat -= mrs->offset_within_address_space; + *xlat += mrs->offset_within_region; + } else { + ret = mrs; + } + + return *ret; +} #endif void cpu_exec_init_all(void) @@ -1777,13 +1815,12 @@ static void core_commit(MemoryListener *listener) info->sections = cur_phys_sections; info->roots = cur_roots; - /* Fix me, in future, will be protected by wr seqlock when in parellel - * with reader - */ + seqlock_write_lock(&dispatch_table_lock); cur_map_nodes = next_map_nodes; cur_phys_sections = next_phys_sections; cur_alloc_info = next_alloc_info; cur_roots = next_roots; + seqlock_write_unlock(&dispatch_table_lock); /* since each CPU stores ram addresses in its TLB cache, we must reset the modified entries */ @@ -1890,8 +1927,13 @@ void address_space_destroy_dispatch(AddressSpace *as) static void memory_map_init(void) { + QemuMutex *mutex; + qemu_mutex_init(&id_lock); address_space_id_map = g_malloc0(MAX_IDX/BITS_PER_BYTE); + mutex = g_malloc0(sizeof(QemuMutex)); + qemu_mutex_init(mutex); + seqlock_init(&dispatch_table_lock, mutex); system_memory = g_malloc(sizeof(*system_memory)); memory_region_init(system_memory, "system", INT64_MAX); address_space_init(&address_space_memory, system_memory, "memory"); @@ -2001,59 +2043,68 @@ bool address_space_rw(AddressSpace *as, hwaddr addr, uint8_t *buf, uint8_t *ptr; uint64_t val; hwaddr addr1; - MemoryRegionSection *section; + MemoryRegionSection section; bool error = false; while (len > 0) { l = len; - section = address_space_translate(as, addr, &addr1, &l, is_write); + rcu_read_lock(); + section = address_space_get_terminal(as, addr, &addr1, &l, is_write); if (is_write) { - if (!memory_access_is_direct(section->mr, is_write)) { + if (!memory_access_is_direct(section.mr, is_write)) { l = memory_access_size(l, addr1); /* XXX: could force cpu_single_env to NULL to avoid potential bugs */ + memory_region_ref(section.mr); + rcu_read_unlock(); if (l == 4) { /* 32 bit write access */ val = ldl_p(buf); - error |= io_mem_write(section->mr, addr1, val, 4); + error |= io_mem_write(section.mr, addr1, val, 4); } else if (l == 2) { /* 16 bit write access */ val = lduw_p(buf); - error |= io_mem_write(section->mr, addr1, val, 2); + error |= io_mem_write(section.mr, addr1, val, 2); } else { /* 8 bit write access */ val = ldub_p(buf); - error |= io_mem_write(section->mr, addr1, val, 1); + error |= io_mem_write(section.mr, addr1, val, 1); } + memory_region_unref(section.mr); } else { - addr1 += memory_region_get_ram_addr(section->mr); + addr1 += memory_region_get_ram_addr(section.mr); /* RAM case */ ptr = qemu_get_ram_ptr(addr1); memcpy(ptr, buf, l); invalidate_and_set_dirty(addr1, l); + rcu_read_unlock(); } } else { - if (!memory_access_is_direct(section->mr, is_write)) { + if (!memory_access_is_direct(section.mr, is_write)) { /* I/O case */ + memory_region_ref(section.mr); + rcu_read_unlock(); l = memory_access_size(l, addr1); if (l == 4) { /* 32 bit read access */ - error |= io_mem_read(section->mr, addr1, &val, 4); + error |= io_mem_read(section.mr, addr1, &val, 4); stl_p(buf, val); } else if (l == 2) { /* 16 bit read access */ - error |= io_mem_read(section->mr, addr1, &val, 2); + error |= io_mem_read(section.mr, addr1, &val, 2); stw_p(buf, val); } else { /* 8 bit read access */ - error |= io_mem_read(section->mr, addr1, &val, 1); + error |= io_mem_read(section.mr, addr1, &val, 1); stb_p(buf, val); } + memory_region_unref(section.mr); } else { /* RAM case */ - ptr = qemu_get_ram_ptr(section->mr->ram_addr + addr1); + ptr = qemu_get_ram_ptr(section.mr->ram_addr + addr1); memcpy(buf, ptr, l); + rcu_read_unlock(); } } len -= l; @@ -2089,18 +2140,19 @@ void cpu_physical_memory_write_rom(hwaddr addr, hwaddr l; uint8_t *ptr; hwaddr addr1; - MemoryRegionSection *section; + MemoryRegionSection section; + rcu_read_lock(); while (len > 0) { l = len; - section = address_space_translate(&address_space_memory, + section = address_space_get_terminal(&address_space_memory, addr, &addr1, &l, true); - if (!(memory_region_is_ram(section->mr) || - memory_region_is_romd(section->mr))) { + if (!(memory_region_is_ram(section.mr) || + memory_region_is_romd(section.mr))) { /* do nothing */ } else { - addr1 += memory_region_get_ram_addr(section->mr); + addr1 += memory_region_get_ram_addr(section.mr); /* ROM/RAM case */ ptr = qemu_get_ram_ptr(addr1); memcpy(ptr, buf, l); @@ -2110,6 +2162,7 @@ void cpu_physical_memory_write_rom(hwaddr addr, buf += l; addr += l; } + rcu_read_unlock(); } typedef struct { @@ -2161,15 +2214,15 @@ static void cpu_notify_map_clients(void) bool address_space_access_valid(AddressSpace *as, hwaddr addr, int len, bool is_write) { - MemoryRegionSection *section; + MemoryRegionSection section; hwaddr l, xlat; while (len > 0) { l = len; - section = address_space_translate(as, addr, &xlat, &l, is_write); - if (!memory_access_is_direct(section->mr, is_write)) { + section = address_space_get_terminal(as, addr, &xlat, &l, is_write); + if (!memory_access_is_direct(section.mr, is_write)) { l = memory_access_size(l, addr); - if (!memory_region_access_valid(section->mr, xlat, l, is_write)) { + if (!memory_region_access_valid(section.mr, xlat, l, is_write)) { return false; } } @@ -2195,14 +2248,14 @@ void *address_space_map(AddressSpace *as, hwaddr len = *plen; hwaddr todo = 0; hwaddr l, xlat; - MemoryRegionSection *section; + MemoryRegionSection section; ram_addr_t raddr = RAM_ADDR_MAX; ram_addr_t rlen; void *ret; while (len > 0) { l = len; - section = address_space_translate(as, addr, &xlat, &l, is_write); + section = address_space_get_terminal(as, addr, &xlat, &l, is_write); if (!memory_access_is_direct(section->mr, is_write)) { if (todo || bounce.buffer) { @@ -2211,20 +2264,20 @@ void *address_space_map(AddressSpace *as, bounce.buffer = qemu_memalign(TARGET_PAGE_SIZE, TARGET_PAGE_SIZE); bounce.addr = addr; bounce.len = l; - bounce.mr = section->mr; + bounce.mr = section.mr; if (!is_write) { address_space_read(as, addr, bounce.buffer, l); } *plen = l; - memory_region_ref(section->mr); + memory_region_ref(section.mr); return bounce.buffer; } if (!todo) { - raddr = memory_region_get_ram_addr(section->mr) + xlat; - memory_region_ref(section->mr); + raddr = memory_region_get_ram_addr(section.mr) + xlat; + memory_region_ref(section.mr); } else { - if (memory_region_get_ram_addr(section->mr) + xlat != raddr + todo) { + if (memory_region_get_ram_addr(section.mr) + xlat != raddr + todo) { break; } } @@ -2297,15 +2350,17 @@ static inline uint32_t ldl_phys_internal(hwaddr addr, { uint8_t *ptr; uint64_t val; - MemoryRegionSection *section; + MemoryRegionSection section; hwaddr l = 4; hwaddr addr1; - section = address_space_translate(&address_space_memory, addr, &addr1, &l, - false); + rcu_read_lock(); + section = address_space_get_terminal(&address_space_memory, addr, &addr1, + &l, false); + rcu_read_unlock(); if (l < 4 || !memory_access_is_direct(section->mr, false)) { /* I/O case */ - io_mem_read(section->mr, addr1, &val, 4); + io_mem_read(section.mr, addr1, &val, 4); #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { val = bswap32(val); @@ -2360,8 +2415,10 @@ static inline uint64_t ldq_phys_internal(hwaddr addr, hwaddr l = 8; hwaddr addr1; - section = address_space_translate(&address_space_memory, addr, &addr1, &l, - false); + rcu_read_lock(); + section = address_space_get_terminal(&address_space_memory, addr, &addr1, + &l, false); + rcu_read_unlock(); if (l < 8 || !memory_access_is_direct(section->mr, false)) { /* I/O case */ io_mem_read(section->mr, addr1, &val, 8); @@ -2423,15 +2480,17 @@ static inline uint32_t lduw_phys_internal(hwaddr addr, { uint8_t *ptr; uint64_t val; - MemoryRegionSection *section; + MemoryRegionSection section; hwaddr l = 2; hwaddr addr1; - section = address_space_translate(&address_space_memory, addr, &addr1, &l, - false); - if (l < 2 || !memory_access_is_direct(section->mr, false)) { + rcu_read_lock(); + section = address_space_get_terminal(&address_space_memory, addr, &addr1, + &l, false); + rcu_read_unlock(); + if (l < 2 || !memory_access_is_direct(section.mr, false)) { /* I/O case */ - io_mem_read(section->mr, addr1, &val, 2); + io_mem_read(section.mr, addr1, &val, 2); #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { val = bswap16(val); @@ -2443,7 +2502,7 @@ static inline uint32_t lduw_phys_internal(hwaddr addr, #endif } else { /* RAM case */ - ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section->mr) + ptr = qemu_get_ram_ptr((memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK) + addr1); switch (endian) { @@ -2482,16 +2541,18 @@ uint32_t lduw_be_phys(hwaddr addr) void stl_phys_notdirty(hwaddr addr, uint32_t val) { uint8_t *ptr; - MemoryRegionSection *section; + MemoryRegionSection section; hwaddr l = 4; hwaddr addr1; - section = address_space_translate(&address_space_memory, addr, &addr1, &l, - true); - if (l < 4 || !memory_access_is_direct(section->mr, true)) { - io_mem_write(section->mr, addr1, val, 4); + rcu_read_lock(); + section = address_space_get_terminal(&address_space_memory, addr, &addr1, + &l, true); + rcu_read_unlock(); + if (l < 4 || !memory_access_is_direct(section.mr, true)) { + io_mem_write(section.mr, addr1, val, 4); } else { - addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK; + addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK; ptr = qemu_get_ram_ptr(addr1); stl_p(ptr, val); @@ -2512,13 +2573,15 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val, enum device_endian endian) { uint8_t *ptr; - MemoryRegionSection *section; + MemoryRegionSection section; hwaddr l = 4; hwaddr addr1; - section = address_space_translate(&address_space_memory, addr, &addr1, &l, - true); - if (l < 4 || !memory_access_is_direct(section->mr, true)) { + rcu_read_lock(); + section = address_space_get_terminal(&address_space_memory, addr, &addr1, + &l, true); + rcu_read_unlock(); + if (l < 4 || !memory_access_is_direct(section.mr, true)) { #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { val = bswap32(val); @@ -2528,10 +2591,10 @@ static inline void stl_phys_internal(hwaddr addr, uint32_t val, val = bswap32(val); } #endif - io_mem_write(section->mr, addr1, val, 4); + io_mem_write(section.mr, addr1, val, 4); } else { /* RAM case */ - addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK; + addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK; ptr = qemu_get_ram_ptr(addr1); switch (endian) { case DEVICE_LITTLE_ENDIAN: @@ -2575,13 +2638,13 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val, enum device_endian endian) { uint8_t *ptr; - MemoryRegionSection *section; + MemoryRegionSection section; hwaddr l = 2; hwaddr addr1; - section = address_space_translate(&address_space_memory, addr, &addr1, &l, - true); - if (l < 2 || !memory_access_is_direct(section->mr, true)) { + section = address_space_get_terminal(&address_space_memory, addr, &addr1, + &l, true); + if (l < 2 || !memory_access_is_direct(section.mr, true)) { #if defined(TARGET_WORDS_BIGENDIAN) if (endian == DEVICE_LITTLE_ENDIAN) { val = bswap16(val); @@ -2591,10 +2654,10 @@ static inline void stw_phys_internal(hwaddr addr, uint32_t val, val = bswap16(val); } #endif - io_mem_write(section->mr, addr1, val, 2); + io_mem_write(section.mr, addr1, val, 2); } else { /* RAM case */ - addr1 += memory_region_get_ram_addr(section->mr) & TARGET_PAGE_MASK; + addr1 += memory_region_get_ram_addr(section.mr) & TARGET_PAGE_MASK; ptr = qemu_get_ram_ptr(addr1); switch (endian) { case DEVICE_LITTLE_ENDIAN: @@ -2696,13 +2759,15 @@ bool virtio_is_big_endian(void) #ifndef CONFIG_USER_ONLY bool cpu_physical_memory_is_io(hwaddr phys_addr) { - MemoryRegionSection *section; + MemoryRegionSection section; hwaddr l = 1; - section = address_space_translate(&address_space_memory, + rcu_read_lock(); + section = address_space_get_terminal(&address_space_memory, phys_addr, &phys_addr, &l, false); + rcu_read_unlock(); - return !(memory_region_is_ram(section->mr) || - memory_region_is_romd(section->mr)); + return !(memory_region_is_ram(section.mr) || + memory_region_is_romd(section.mr)); } #endif -- 1.7.4.4