On Wed, May 29, 2013 at 4:31 PM, Paolo Bonzini <pbonz...@redhat.com> wrote: > Il 29/05/2013 09:48, liu ping fan ha scritto: >> On Wed, May 29, 2013 at 3:03 PM, Paolo Bonzini <pbonz...@redhat.com> wrote: >>> Il 29/05/2013 04:11, Liu Ping Fan ha scritto: >>>> From: Liu Ping Fan <pingf...@linux.vnet.ibm.com> >>>> >>>> All of AddressSpaceDispatch's roots are part of dispatch context, >>>> along with cur_map_nodes, cur_phys_sections, and we should walk >>>> through AddressSpaceDispatchs in the same dispatch context, ie >>>> the same memory topology. Concenter the roots, so we can switch >>>> to next more easily. >>>> >>>> Signed-off-by: Liu Ping Fan <pingf...@linux.vnet.ibm.com> >>>> --- >>>> exec.c | 48 >>>> ++++++++++++++++++++++++++++++++++++--- >>>> include/exec/memory-internal.h | 2 +- >>>> 2 files changed, 45 insertions(+), 5 deletions(-) >>>> >>>> diff --git a/exec.c b/exec.c >>>> index eb69a98..315960d 100644 >>>> --- a/exec.c >>>> +++ b/exec.c >>>> @@ -49,6 +49,7 @@ >>>> #include "translate-all.h" >>>> >>>> #include "exec/memory-internal.h" >>>> +#include "qemu/bitops.h" >>>> >>>> //#define DEBUG_SUBPAGE >>>> >>>> @@ -95,6 +96,7 @@ typedef struct AllocInfo { >>>> /* Only used for release purpose */ >>>> Node *map; >>>> MemoryRegionSection *sections; >>>> + PhysPageEntry *roots; >>>> } AllocInfo; >>> >>> I wouldn't put it here. I would put it in AddressSpaceDispatch (as >>> next_phys_map/next_sections/next_nodes: initialize >>> next_sections/next_nodes in the "begin" hook, switch under seqlock in >>> the "commit" hook). >>> >> Implement based on separate AddressSpaceDispatch is broken. We should >> walk through the ASD chain under the same mem topology. Take the >> following scenario: >> (ASD-x is followed by ASD-y) >> walk through ASD-x under topology-A >> ----before walk on ASD-y, mem topology changes, and switch >> from A to B >> continue to walk ASD-y in B (but it should be A not B) > > It is perfectly fine. If you do a topology change during an access, and > the topology change includes a change for the region that is being > accessed, it is undefined whether you get the "before" or the "after". > > In particular it is acceptable that, for example, the CPU accesses the > memory "after" the change and a device concurrently accesses the memory > "before" the change. > The rcu reader is guaranteed to see obj-prev or obj-next in atomically. Just as you said " it is undefined whether you get the "before" or the "after". But for both cases, the obj should be intact. Here the "obj" is memory topology, _not_ ADS(ADS just express it in radix tree). Combine ADS from different memory topology will give us a broken obj, not atomically. What is your opinion?
Thanks and regards, Pingfan > This is exactly why we can use RCU, in fact! > > Paolo > >> To elimate such issue, I concenter the >> roots/phys_sections/phys_map_nodes, so we can switch from topology-A >> to B atomiclly. >> >> Regards, >> Pingfan >>> This requires refcounting AllocInfo, but it removes the need for the >>> ->idx indirection and the id management. >>> >>> Paolo >>> >>>> /* For performance, fetch page map related pointers directly, other than >>>> @@ -102,10 +104,12 @@ typedef struct AllocInfo { >>>> */ >>>> static MemoryRegionSection *cur_phys_sections; >>>> static Node *cur_map_nodes; >>>> +static PhysPageEntry *cur_roots; >>>> static AllocInfo *cur_alloc_info; >>>> >>>> static MemoryRegionSection *next_phys_sections; >>>> static Node *next_map_nodes; >>>> +static PhysPageEntry *next_roots; >>>> static AllocInfo *next_alloc_info; >>>> >>>> #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1) >>>> @@ -191,12 +195,13 @@ static void phys_page_set(AddressSpaceDispatch *d, >>>> /* Wildly overreserve - it doesn't matter much. */ >>>> phys_map_node_reserve(3 * P_L2_LEVELS); >>>> >>>> - phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1); >>>> + phys_page_set_level(&next_roots[d->idx], &index, &nb, leaf, >>>> + P_L2_LEVELS - 1); >>>> } >>>> >>>> static MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, >>>> hwaddr index, bool cur) >>>> { >>>> - PhysPageEntry lp = d->phys_map; >>>> + PhysPageEntry lp; >>>> PhysPageEntry *p; >>>> int i; >>>> Node *map_nodes; >>>> @@ -205,9 +210,11 @@ static MemoryRegionSection >>>> *phys_page_find(AddressSpaceDispatch *d, hwaddr index >>>> if (likely(cur)) { >>>> map_nodes = cur_map_nodes; >>>> sections = cur_phys_sections; >>>> + lp = cur_roots[d->idx]; >>>> } else { >>>> map_nodes = next_map_nodes; >>>> sections = next_phys_sections; >>>> + lp = next_roots[d->idx]; >>>> } >>>> for (i = P_L2_LEVELS - 1; i >= 0 && !lp.is_leaf; i--) { >>>> if (lp.ptr == PHYS_MAP_NODE_NIL) { >>>> @@ -1729,7 +1736,8 @@ static void mem_begin(MemoryListener *listener) >>>> { >>>> AddressSpaceDispatch *d = container_of(listener, >>>> AddressSpaceDispatch, listener); >>>> >>>> - d->phys_map.ptr = PHYS_MAP_NODE_NIL; >>>> + next_roots[d->idx] = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, >>>> + is_leaf = 0 }; >>>> } >>>> >>>> static void core_begin(MemoryListener *listener) >>>> @@ -1737,6 +1745,7 @@ static void core_begin(MemoryListener *listener) >>>> uint16_t n; >>>> >>>> next_alloc_info = g_malloc0(sizeof(AllocInfo)); >>>> + next_roots = g_malloc0(2048*sizeof(PhysPageEntry)); >>>> n = dummy_section(&io_mem_unassigned); >>>> assert(phys_section_unassigned == n); >>>> n = dummy_section(&io_mem_notdirty); >>>> @@ -1752,6 +1761,7 @@ static void release_dispatch_map(AllocInfo *info) >>>> phys_sections_clear(info->phys_sections_nb, info->sections); >>>> g_free(info->map); >>>> g_free(info->sections); >>>> + g_free(info->roots); >>>> g_free(info); >>>> } >>>> >>>> @@ -1765,6 +1775,7 @@ static void core_commit(MemoryListener *listener) >>>> AllocInfo *info = cur_alloc_info; >>>> info->map = cur_map_nodes; >>>> info->sections = cur_phys_sections; >>>> + info->roots = cur_roots; >>>> >>>> /* Fix me, in future, will be protected by wr seqlock when in parellel >>>> * with reader >>>> @@ -1772,6 +1783,7 @@ static void core_commit(MemoryListener *listener) >>>> cur_map_nodes = next_map_nodes; >>>> cur_phys_sections = next_phys_sections; >>>> cur_alloc_info = next_alloc_info; >>>> + cur_roots = next_roots; >>>> >>>> /* since each CPU stores ram addresses in its TLB cache, we must >>>> reset the modified entries */ >>>> @@ -1826,11 +1838,36 @@ static MemoryListener io_memory_listener = { >>>> .priority = 0, >>>> }; >>>> >>>> +static MemoryListener tcg_memory_listener = { >>>> + .commit = tcg_commit, >>>> +}; >>> >>> Rebase artifact? >>> >>>> +#define MAX_IDX (1<<15) >>>> +static unsigned long *address_space_id_map; >>>> +static QemuMutex id_lock; >>>> + >>>> +static void address_space_release_id(int16_t idx) >>>> +{ >>>> + qemu_mutex_lock(&id_lock); >>>> + clear_bit(idx, address_space_id_map); >>>> + qemu_mutex_unlock(&id_lock); >>>> +} >>>> + >>>> +static int16_t address_space_alloc_id() >>>> +{ >>>> + unsigned long idx; >>>> + >>>> + qemu_mutex_lock(&id_lock); >>>> + idx = find_first_zero_bit(address_space_id_map, >>>> MAX_IDX/BITS_PER_LONG); >>>> + set_bit(idx, address_space_id_map); >>>> + qemu_mutex_unlock(&id_lock); >>>> +} >>>> + >>>> void address_space_init_dispatch(AddressSpace *as) >>>> { >>>> AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1); >>>> >>>> - d->phys_map = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = >>>> 0 }; >>>> + d->idx = address_space_alloc_id(); >>>> d->listener = (MemoryListener) { >>>> .begin = mem_begin, >>>> .region_add = mem_add, >>>> @@ -1845,6 +1882,7 @@ void address_space_destroy_dispatch(AddressSpace *as) >>>> { >>>> AddressSpaceDispatch *d = as->dispatch; >>>> >>>> + address_space_release_id(d->idx); >>>> memory_listener_unregister(&d->listener); >>>> g_free(d); >>>> as->dispatch = NULL; >>>> @@ -1852,6 +1890,8 @@ void address_space_destroy_dispatch(AddressSpace *as) >>>> >>>> static void memory_map_init(void) >>>> { >>>> + qemu_mutex_init(&id_lock); >>>> + address_space_id_map = g_malloc0(MAX_IDX/BITS_PER_BYTE); >>>> system_memory = g_malloc(sizeof(*system_memory)); >>>> memory_region_init(system_memory, "system", INT64_MAX); >>>> address_space_init(&address_space_memory, system_memory, "memory"); >>>> diff --git a/include/exec/memory-internal.h >>>> b/include/exec/memory-internal.h >>>> index 799c02a..54a3635 100644 >>>> --- a/include/exec/memory-internal.h >>>> +++ b/include/exec/memory-internal.h >>>> @@ -36,7 +36,7 @@ struct AddressSpaceDispatch { >>>> /* This is a multi-level map on the physical address space. >>>> * The bottom level has pointers to MemoryRegionSections. >>>> */ >>>> - PhysPageEntry phys_map; >>>> + int16_t idx; >>>> MemoryListener listener; >>>> }; >>>> >>>> >>> >