Il 09/10/2013 13:28, Juan Quintela ha scritto: > Hi > > This series split the dirty bitmap (8 bits per page, only three used) > into 3 individual bitmaps. Once the conversion is done, operations > are handled by bitmap operations, not bit by bit. > > - *_DIRTY_FLAG flags are gone, now we use memory.h DIRTY_MEMORY_* > everywhere. > > - We set/reset each flag individually > (set_dirty_flags(0xff&~CODE_DIRTY_FLAG)) are gone. > > - Rename several functions to clarify/make consistent things. > > - I know it dont't pass checkpatch for long lines, propper submission > should pass it. We have to have long lines, short variable names, or > ugly line splitting :p > > - DIRTY_MEMORY_NUM: how can one include exec/memory.h into cpu-all.h? > #include it don't work, as a workaround, I have copied its value, but > any better idea? I can always create "exec/migration-flags.h", though.
I think both files are too "central" and you get some sort of circular dependency. The solution could be to move RAM definitions from cpu-all.h to memory-internal.h. For example: diff --git a/arch_init.c b/arch_init.c index 7545d96..8752e27 100644 --- a/arch_init.c +++ b/arch_init.c @@ -47,7 +47,7 @@ #include "qemu/config-file.h" #include "qmp-commands.h" #include "trace.h" -#include "exec/cpu-all.h" +#include "exec/memory-internal.h" #include "hw/acpi/acpi.h" #ifdef DEBUG_ARCH_INIT diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 019dc20..4cfde66 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -435,57 +435,11 @@ void cpu_watchpoint_remove_all(CPUArchState *env, int mask); #if !defined(CONFIG_USER_ONLY) -/* memory API */ - extern ram_addr_t ram_size; - -/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ -#define RAM_PREALLOC_MASK (1 << 0) - -typedef struct RAMBlock { - struct MemoryRegion *mr; - uint8_t *host; - ram_addr_t offset; - ram_addr_t length; - uint32_t flags; - char idstr[256]; - /* Reads can take either the iothread or the ramlist lock. - * Writes must take both locks. - */ - QTAILQ_ENTRY(RAMBlock) next; - int fd; -} RAMBlock; - -#define DIRTY_MEMORY_NUM 3 - -typedef struct RAMList { - QemuMutex mutex; - /* Protected by the iothread lock. */ - unsigned long *dirty_memory[DIRTY_MEMORY_NUM]; - RAMBlock *mru_block; - /* Protected by the ramlist lock. */ - QTAILQ_HEAD(, RAMBlock) blocks; - uint32_t version; -} RAMList; -extern RAMList ram_list; - extern const char *mem_path; extern int mem_prealloc; -/* Flags stored in the low bits of the TLB virtual address. These are - defined so that fast path ram access is all zeros. */ -/* Zero if TLB entry is valid. */ -#define TLB_INVALID_MASK (1 << 3) -/* Set if TLB entry references a clean RAM page. The iotlb entry will - contain the page physical address. */ -#define TLB_NOTDIRTY (1 << 4) -/* Set if TLB entry is an IO callback. */ -#define TLB_MMIO (1 << 5) - void dump_exec_info(FILE *f, fprintf_function cpu_fprintf); -ram_addr_t last_ram_offset(void); -void qemu_mutex_lock_ramlist(void); -void qemu_mutex_unlock_ramlist(void); #endif /* !CONFIG_USER_ONLY */ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h index e21cb60..719fa27 100644 --- a/include/exec/cputlb.h +++ b/include/exec/cputlb.h @@ -20,6 +20,17 @@ #define CPUTLB_H #if !defined(CONFIG_USER_ONLY) + +/* Flags stored in the low bits of the TLB virtual address. These are + defined so that fast path ram access is all zeros. */ +/* Zero if TLB entry is valid. */ +#define TLB_INVALID_MASK (1 << 3) +/* Set if TLB entry references a clean RAM page. The iotlb entry will + contain the page physical address. */ +#define TLB_NOTDIRTY (1 << 4) +/* Set if TLB entry is an IO callback. */ +#define TLB_MMIO (1 << 5) + /* cputlb.c */ void tlb_protect_code(ram_addr_t ram_addr); void tlb_unprotect_code_phys(CPUArchState *env, ram_addr_t ram_addr, diff --git a/include/exec/memory-internal.h b/include/exec/memory-internal.h index d46570e..aaf76c2 100644 --- a/include/exec/memory-internal.h +++ b/include/exec/memory-internal.h @@ -22,6 +22,35 @@ #ifndef CONFIG_USER_ONLY #include "hw/xen/xen.h" +/* RAM is pre-allocated and passed into qemu_ram_alloc_from_ptr */ +#define RAM_PREALLOC_MASK (1 << 0) + +typedef struct RAMBlock { + struct MemoryRegion *mr; + uint8_t *host; + ram_addr_t offset; + ram_addr_t length; + uint32_t flags; + char idstr[256]; + /* Reads can take either the iothread or the ramlist lock. + * Writes must take both locks. + */ + QTAILQ_ENTRY(RAMBlock) next; + int fd; +} RAMBlock; + +#define DIRTY_MEMORY_NUM 3 + +typedef struct RAMList { + QemuMutex mutex; + /* Protected by the iothread lock. */ + unsigned long *dirty_memory[DIRTY_MEMORY_NUM]; + RAMBlock *mru_block; + /* Protected by the ramlist lock. */ + QTAILQ_HEAD(, RAMBlock) blocks; + uint32_t version; +} RAMList; +extern RAMList ram_list; typedef struct AddressSpaceDispatch AddressSpaceDispatch; @@ -105,6 +134,10 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t end, unsigned client); +ram_addr_t last_ram_offset(void); +void qemu_mutex_lock_ramlist(void); +void qemu_mutex_unlock_ramlist(void); + #endif #endif diff --git a/include/exec/softmmu_template.h b/include/exec/softmmu_template.h index 5bbc56a..cc27058 100644 --- a/include/exec/softmmu_template.h +++ b/include/exec/softmmu_template.h @@ -23,6 +23,7 @@ */ #include "qemu/timer.h" #include "exec/memory.h" +#include "exec/cputlb.h" #define DATA_SIZE (1 << SHIFT) Bonus points for splitting RAM save out of arch_init.c. :) > - create a lock for the bitmaps and fold migration bitmap into this > one. This would avoid a copy and make things easier? I think this is less important and not that easy. For example, how fine-grained should the locking be without bogging down TCG? You can speed up migration_bitmap_sync by a factor of 64 or more if you copy word-by-word instead of memory_region_test_and_clear_dirty and migration_bitmap_set_dirty. Once you do that and also merge KVM and vhost bitmaps one word at a time, it's likely that dirty bitmaps get almost out of the profile. > - As this code uses/abuses bitmaps, we need to change the type of the > index from int to long. With an int index, we can only access a > maximum of 8TB guest (yes, this is not urgent, we have a couple of > years to do it). Yes. > - merging KVM <-> QEMU bitmap as a bitmap and not bit-by-bit. Right. All of vhost_dev_sync_region, kvm_get_dirty_pages_log_range, xen_sync_dirty_bitmap are really working a word at a time. So it should be easy to optimize the log_sync implementations to work with a word-at-a-time API instead of memory_region_set_dirty. > - spliting the KVM bitmap synchronization into chunks, i.e. not > synchronize all memory, just enough to continue with migration. That can also help. However, it's not easy to do it without making ram_save_pending's computations too optimistic. So I'd just focus on speeding up migration_bitmap_sync first. It's easy and should "almost" do the entirety of the work. Paolo