Hi. This is a proposal for reducing the impact of kvm functions in core qemu code. This is by all means not ready, but I felt like posting it, so a discussion on it could follow.
The idea in this patch is to replace the specific kvm details from core qemu files like vl.c, with driver_yyy() functions. When kvm is not running, those functions would just return (most of time), absolutely reducing the impact of kvm code. As I wanted to test it, in this patch I changed the kvm functions to be called driver_yyy(), but that's not my final goal. I intend to use a function pointer schema, similar to what the linux kernel already do for a lot of its subsystem, to isolate the changes. Comments deeply welcome. --- qemu/exec.c | 11 +-- qemu/gdbstub.c | 8 +- qemu/hw/vmport.c | 6 +- qemu/monitor.c | 3 +- qemu/qemu-kvm.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++++- qemu/qemu-kvm.h | 1 + qemu/vl.c | 187 +++++------------------------------------------- 7 files changed, 239 insertions(+), 187 deletions(-) diff --git a/qemu/exec.c b/qemu/exec.c index b82d26d..7a16c78 100644 --- a/qemu/exec.c +++ b/qemu/exec.c @@ -1150,8 +1150,7 @@ int cpu_breakpoint_insert(CPUState *env, target_ulong pc) return -1; env->breakpoints[env->nb_breakpoints++] = pc; - if (kvm_enabled()) - kvm_update_debugger(env); + driver_update_debugger(env); breakpoint_invalidate(env, pc); return 0; @@ -1175,8 +1174,7 @@ int cpu_breakpoint_remove(CPUState *env, target_ulong pc) if (i < env->nb_breakpoints) env->breakpoints[i] = env->breakpoints[env->nb_breakpoints]; - if (kvm_enabled()) - kvm_update_debugger(env); + driver_update_debugger(env); breakpoint_invalidate(env, pc); return 0; @@ -1196,8 +1194,7 @@ void cpu_single_step(CPUState *env, int enabled) /* XXX: only flush what is necessary */ tb_flush(env); } - if (kvm_enabled()) - kvm_update_debugger(env); + driver_update_debugger(env); #endif } @@ -1246,7 +1243,7 @@ void cpu_interrupt(CPUState *env, int mask) env->interrupt_request |= mask; if (kvm_enabled() && !qemu_kvm_irqchip_in_kernel()) - kvm_update_interrupt_request(env); + kvm_update_interrupt_request(env); /* if the cpu is currently executing code, we must unlink it and all the potentially executing TB */ diff --git a/qemu/gdbstub.c b/qemu/gdbstub.c index 2252084..c574686 100644 --- a/qemu/gdbstub.c +++ b/qemu/gdbstub.c @@ -895,7 +895,7 @@ static int gdb_handle_packet(GDBState *s, CPUState *env, const char *line_buf) #if defined(TARGET_I386) env->eip = addr; if (kvm_enabled()) - kvm_load_registers(env); + driver_load_registers(env); #elif defined (TARGET_PPC) env->nip = addr; #elif defined (TARGET_SPARC) @@ -923,7 +923,7 @@ static int gdb_handle_packet(GDBState *s, CPUState *env, const char *line_buf) #if defined(TARGET_I386) env->eip = addr; if (kvm_enabled()) - kvm_load_registers(env); + driver_load_registers(env); #elif defined (TARGET_PPC) env->nip = addr; #elif defined (TARGET_SPARC) @@ -976,7 +976,7 @@ static int gdb_handle_packet(GDBState *s, CPUState *env, const char *line_buf) break; case 'g': if (kvm_enabled()) - kvm_save_registers(env); + driver_save_registers(env); reg_size = cpu_gdb_read_registers(env, mem_buf); memtohex(buf, mem_buf, reg_size); put_packet(s, buf); @@ -987,7 +987,7 @@ static int gdb_handle_packet(GDBState *s, CPUState *env, const char *line_buf) hextomem((uint8_t *)registers, p, len); cpu_gdb_write_registers(env, mem_buf, len); if (kvm_enabled()) - kvm_load_registers(env); + driver_load_registers(env); put_packet(s, "OK"); break; case 'm': diff --git a/qemu/hw/vmport.c b/qemu/hw/vmport.c index c09227d..a519152 100644 --- a/qemu/hw/vmport.c +++ b/qemu/hw/vmport.c @@ -59,8 +59,7 @@ static uint32_t vmport_ioport_read(void *opaque, uint32_t addr) uint32_t eax; uint32_t ret; - if (kvm_enabled()) - kvm_save_registers(s->env); + driver_save_registers(s->env); eax = s->env->regs[R_EAX]; if (eax != VMPORT_MAGIC) @@ -77,8 +76,7 @@ static uint32_t vmport_ioport_read(void *opaque, uint32_t addr) ret = s->func[command](s->opaque[command], addr); - if (kvm_enabled()) - kvm_load_registers(s->env); + driver_load_registers(s->env); return ret; } diff --git a/qemu/monitor.c b/qemu/monitor.c index 4ee0b19..bd538d9 100644 --- a/qemu/monitor.c +++ b/qemu/monitor.c @@ -286,8 +286,7 @@ static CPUState *mon_get_cpu(void) mon_set_cpu(0); } - if (kvm_enabled()) - kvm_save_registers(mon_cpu); + driver_save_registers(mon_cpu); return mon_cpu; } diff --git a/qemu/qemu-kvm.c b/qemu/qemu-kvm.c index 45fddd3..f3a7758 100644 --- a/qemu/qemu-kvm.c +++ b/qemu/qemu-kvm.c @@ -20,6 +20,11 @@ int kvm_irqchip = 1; #include <pthread.h> #include <sys/utsname.h> #include <sys/syscall.h> +#include <sys/mman.h> + +int hpagesize = 0; +unsigned int kvm_shadow_memory = 0; +extern char *mem_path; extern void perror(const char *s); @@ -114,16 +119,16 @@ static int pre_kvm_run(void *opaque, int vcpu) return 0; } -void kvm_load_registers(CPUState *env) +void driver_load_registers(CPUState *env) { if (kvm_enabled()) kvm_arch_load_regs(env); } -void kvm_save_registers(CPUState *env) +void driver_save_registers(CPUState *env) { if (kvm_enabled()) - kvm_arch_save_regs(env); + kvm_arch_save_regs(env); } int kvm_cpu_exec(CPUState *env) @@ -628,6 +633,11 @@ int kvm_update_debugger(CPUState *env) return kvm_guest_debug(kvm_context, env->cpu_index, &dbg); } +int driver_update_debugger(CPUState *env) +{ + if (kvm_enabled()) + kvm_update_debugger(env); +} /* * dirty pages logging @@ -774,3 +784,197 @@ void kvm_cpu_destroy_phys_mem(target_phys_addr_t start_addr, { kvm_destroy_phys_mem(kvm_context, start_addr, size); } + +/* FIXME: make it all beautiful when kvm is off, make room for other hypervisors, etc */ + +void decorate_application_name(char *appname, int max_len) +{ + if (kvm_enabled()) + { + int remain = max_len - strlen(appname) - 1; + + if (remain > 0) + strncat(appname, "/KVM", remain); + } +} + +static int gethugepagesize(void) +{ + int ret, fd; + char buf[4096]; + char *needle = "Hugepagesize:"; + char *size; + unsigned long hugepagesize; + + fd = open("/proc/meminfo", O_RDONLY); + if (fd < 0) { + perror("open"); + exit(0); + } + + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) { + perror("read"); + exit(0); + } + + size = strstr(buf, needle); + if (!size) + return 0; + size += strlen(needle); + hugepagesize = strtol(size, NULL, 0); + return hugepagesize; +} + + +void *alloc_mem_area(unsigned long memory, const char *path) +{ + char *filename; + void *area; + int fd; + + if (asprintf(&filename, "%s/kvm.XXXXXX", path) == -1) + return NULL; + + hpagesize = gethugepagesize() * 1024; + if (!hpagesize) + return NULL; + + fd = mkstemp(filename); + if (fd < 0) { + perror("mkstemp"); + free(filename); + return NULL; + } + unlink(filename); + free(filename); + + memory = (memory+hpagesize-1) & ~(hpagesize-1); + + if (ftruncate(fd, memory) == -1) { + perror("ftruncate"); + close(fd); + return NULL; + } + + area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); + if (area == MAP_FAILED) { + perror("mmap"); + close(fd); + return NULL; + } + + return area; +} + +void *qemu_alloc_physram(unsigned long memory) +{ + void *area = NULL; + + if (mem_path) + area = alloc_mem_area(memory, mem_path); + if (!area) + area = qemu_vmalloc(memory); + + return area; +} + + +void driver_cpu_save_end(QEMUFile *f, CPUState *env) +{ + int i; + if (kvm_enabled()) { + for (i = 0; i < NR_IRQ_WORDS ; i++) { + qemu_put_be32s(f, &env->kvm_interrupt_bitmap[i]); + } + qemu_put_be64s(f, &env->tsc); + } +} + +int driver_cpu_load(QEMUFile *f, CPUState *env, int version_id) +{ + int i; + if (kvm_enabled()) { + /* when in-kernel irqchip is used, HF_HALTED_MASK causes deadlock + because no userspace IRQs will ever clear this flag */ + env->hflags &= ~HF_HALTED_MASK; + for (i = 0; i < NR_IRQ_WORDS ; i++) { + qemu_get_be32s(f, &env->kvm_interrupt_bitmap[i]); + } + qemu_get_be64s(f, &env->tsc); + driver_load_registers(env); + } + return 0; +} + +int driver_allowed_page(target_ulong addr) +{ + if (kvm_enabled() && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */ + return 0; + return 1; +} + +int driver_main_loop(void) +{ + if (kvm_enabled()) { + kvm_main_loop(); + cpu_disable_ticks(); + return 0; + } + return -1; +} + +void driver_init_context(void) +{ +#if USE_KVM + if (kvm_enabled()) { + if (kvm_qemu_init() < 0) { + extern int kvm_allowed; + fprintf(stderr, "Could not initialize KVM, will disable KVM support\n"); +#ifdef NO_CPU_EMULATION + fprintf(stderr, "Compiled with --disable-cpu-emulation, exiting.\n"); + exit(1); +#endif + kvm_allowed = 0; + } + } +#endif +} + +int driver_init() +{ +#if defined(TARGET_I386) || defined(TARGET_X86_64) +#define KVM_EXTRA_PAGES 3 +#else +#define KVM_EXTRA_PAGES 0 +#endif + if (kvm_enabled()) { + phys_ram_size += KVM_EXTRA_PAGES * TARGET_PAGE_SIZE; + if (kvm_qemu_create_context() < 0) { + fprintf(stderr, "Could not create KVM context\n"); + exit(1); + } +#ifdef KVM_CAP_USER_MEMORY + { + int ret; + + ret = kvm_qemu_check_extension(KVM_CAP_USER_MEMORY); + if (ret) { + phys_ram_base = qemu_alloc_physram(phys_ram_size); + if (!phys_ram_base) { + fprintf(stderr, "Could not allocate physical memory\n"); + exit(1); + } + } + } +#endif + return 1; + } + return 0; +} + +void driver_smp_init(void) +{ + if (kvm_enabled()) + kvm_init_ap(); +} diff --git a/qemu/qemu-kvm.h b/qemu/qemu-kvm.h index 8e45f30..7953f4a 100644 --- a/qemu/qemu-kvm.h +++ b/qemu/qemu-kvm.h @@ -81,6 +81,7 @@ int handle_powerpc_dcr_write(int vcpu,uint32_t dcrn, uint32_t data); extern int kvm_allowed; extern kvm_context_t kvm_context; +extern unsigned int kvm_shadow_memory; #define kvm_enabled() (kvm_allowed) #define qemu_kvm_irqchip_in_kernel() kvm_irqchip_in_kernel(kvm_context) diff --git a/qemu/vl.c b/qemu/vl.c index a59f71c..4df410f 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -234,9 +234,7 @@ int nb_option_roms; int semihosting_enabled = 0; int autostart = 1; int time_drift_fix = 0; -unsigned int kvm_shadow_memory = 0; const char *mem_path = NULL; -int hpagesize = 0; const char *cpu_vendor_string; #ifdef TARGET_ARM int old_param = 0; @@ -259,17 +257,6 @@ static int event_pending = 1; #define TFR(expr) do { if ((expr) != -1) break; } while (errno == EINTR) -void decorate_application_name(char *appname, int max_len) -{ - if (kvm_enabled()) - { - int remain = max_len - strlen(appname) - 1; - - if (remain > 0) - strncat(appname, "/KVM", remain); - } -} - /***********************************************************/ /* x86 ISA bus support */ @@ -6544,8 +6531,7 @@ void cpu_save(QEMUFile *f, void *opaque) uint32_t hflags; int i; - if (kvm_enabled()) - kvm_save_registers(env); + driver_save_registers(env); for(i = 0; i < CPU_NB_REGS; i++) qemu_put_betls(f, &env->regs[i]); @@ -6632,12 +6618,7 @@ void cpu_save(QEMUFile *f, void *opaque) #endif qemu_put_be32s(f, &env->smbase); - if (kvm_enabled()) { - for (i = 0; i < NR_IRQ_WORDS ; i++) { - qemu_put_be32s(f, &env->kvm_interrupt_bitmap[i]); - } - qemu_put_be64s(f, &env->tsc); - } + driver_cpu_save_end(f, env); } #ifdef USE_X86LDOUBLE @@ -6780,17 +6761,7 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id) /* XXX: compute hflags from scratch, except for CPL and IIF */ env->hflags = hflags; tlb_flush(env, 1); - if (kvm_enabled()) { - /* when in-kernel irqchip is used, HF_HALTED_MASK causes deadlock - because no userspace IRQs will ever clear this flag */ - env->hflags &= ~HF_HALTED_MASK; - for (i = 0; i < NR_IRQ_WORDS ; i++) { - qemu_get_be32s(f, &env->kvm_interrupt_bitmap[i]); - } - qemu_get_be64s(f, &env->tsc); - kvm_load_registers(env); - } - return 0; + return driver_cpu_load(f, opaque, version_id); } #elif defined(TARGET_PPC) @@ -7126,7 +7097,7 @@ static int ram_load_v1(QEMUFile *f, void *opaque) if (qemu_get_be32(f) != phys_ram_size) return -EINVAL; for(i = 0; i < phys_ram_size; i+= TARGET_PAGE_SIZE) { - if (kvm_enabled() && (i>=0xa0000) && (i<0xc0000)) /* do not access video-addresses */ + if (!driver_allowed_page(i)) continue; ret = ram_get_page(f, phys_ram_base + i, TARGET_PAGE_SIZE); if (ret) @@ -7262,7 +7233,7 @@ static void ram_save_live(QEMUFile *f, void *opaque) target_ulong addr; for (addr = 0; addr < phys_ram_size; addr += TARGET_PAGE_SIZE) { - if (kvm_enabled() && (addr>=0xa0000) && (addr<0xc0000)) /* do not access video-addresses */ + if (!driver_allowed_page(addr)) continue; if (cpu_physical_memory_get_dirty(addr, MIGRATION_DIRTY_FLAG)) { qemu_put_be32(f, addr); @@ -7282,7 +7253,7 @@ static void ram_save_static(QEMUFile *f, void *opaque) if (ram_compress_open(s, f) < 0) return; for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) { - if (kvm_enabled() && (i>=0xa0000) && (i<0xc0000)) /* do not access video-addresses */ + if (!driver_allowed_page(i)) continue; #if 0 if (tight_savevm_enabled) { @@ -7355,7 +7326,7 @@ static int ram_load_static(QEMUFile *f, void *opaque) if (ram_decompress_open(s, f) < 0) return -EINVAL; for(i = 0; i < phys_ram_size; i+= BDRV_HASH_BLOCK_SIZE) { - if (kvm_enabled() && (i>=0xa0000) && (i<0xc0000)) /* do not access video-addresses */ + if (!driver_allowed_page(i)) continue; if (ram_decompress_buf(s, buf, 1) < 0) { fprintf(stderr, "Error while reading ram block header\n"); @@ -7846,6 +7817,11 @@ void main_loop_wait(int timeout) } +int driver_enabled(void) +{ + return 1; +} + static int main_loop(void) { int ret, timeout; @@ -7854,12 +7830,8 @@ static int main_loop(void) #endif CPUState *env; - - if (kvm_enabled()) { - kvm_main_loop(); - cpu_disable_ticks(); - return 0; - } + if (driver_enabled() && (ret = driver_main_loop() < 0)) + return ret; cur_cpu = first_cpu; next_cpu = cur_cpu->next_cpu ?: first_cpu; @@ -7902,15 +7874,16 @@ static int main_loop(void) if (reset_requested) { reset_requested = 0; qemu_system_reset(); - if (kvm_enabled()) - kvm_load_registers(env); + driver_load_registers(); ret = EXCP_INTERRUPT; } + if (powerdown_requested) { powerdown_requested = 0; qemu_system_powerdown(); ret = EXCP_INTERRUPT; } + if (ret == EXCP_DEBUG) { vm_stop(EXCP_DEBUG); } @@ -8564,87 +8537,6 @@ void qemu_get_launch_info(int *argc, char ***argv, int *opt_daemonize, const cha *opt_incoming = incoming; } - -static int gethugepagesize(void) -{ - int ret, fd; - char buf[4096]; - char *needle = "Hugepagesize:"; - char *size; - unsigned long hugepagesize; - - fd = open("/proc/meminfo", O_RDONLY); - if (fd < 0) { - perror("open"); - exit(0); - } - - ret = read(fd, buf, sizeof(buf)); - if (ret < 0) { - perror("read"); - exit(0); - } - - size = strstr(buf, needle); - if (!size) - return 0; - size += strlen(needle); - hugepagesize = strtol(size, NULL, 0); - return hugepagesize; -} - -void *alloc_mem_area(unsigned long memory, const char *path) -{ - char *filename; - void *area; - int fd; - - if (asprintf(&filename, "%s/kvm.XXXXXX", path) == -1) - return NULL; - - hpagesize = gethugepagesize() * 1024; - if (!hpagesize) - return NULL; - - fd = mkstemp(filename); - if (fd < 0) { - perror("mkstemp"); - free(filename); - return NULL; - } - unlink(filename); - free(filename); - - memory = (memory+hpagesize-1) & ~(hpagesize-1); - - if (ftruncate(fd, memory) == -1) { - perror("ftruncate"); - close(fd); - return NULL; - } - - area = mmap(0, memory, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); - if (area == MAP_FAILED) { - perror("mmap"); - close(fd); - return NULL; - } - - return area; -} - -void *qemu_alloc_physram(unsigned long memory) -{ - void *area = NULL; - - if (mem_path) - area = alloc_mem_area(memory, mem_path); - if (!area) - area = qemu_vmalloc(memory); - - return area; -} - int main(int argc, char **argv) { #ifdef CONFIG_GDBSTUB @@ -9355,19 +9247,7 @@ int main(int argc, char **argv) } #endif -#if USE_KVM - if (kvm_enabled()) { - if (kvm_qemu_init() < 0) { - extern int kvm_allowed; - fprintf(stderr, "Could not initialize KVM, will disable KVM support\n"); -#ifdef NO_CPU_EMULATION - fprintf(stderr, "Compiled with --disable-cpu-emulation, exiting.\n"); - exit(1); -#endif - kvm_allowed = 0; - } - } -#endif + driver_init_context(); if (pid_file && qemu_create_pidfile(pid_file) != 0) { if (daemonize) { @@ -9463,33 +9343,7 @@ int main(int argc, char **argv) /* init the memory */ phys_ram_size = ram_size + vga_ram_size + MAX_BIOS_SIZE; - /* Initialize kvm */ -#if defined(TARGET_I386) || defined(TARGET_X86_64) -#define KVM_EXTRA_PAGES 3 -#else -#define KVM_EXTRA_PAGES 0 -#endif - if (kvm_enabled()) { - phys_ram_size += KVM_EXTRA_PAGES * TARGET_PAGE_SIZE; - if (kvm_qemu_create_context() < 0) { - fprintf(stderr, "Could not create KVM context\n"); - exit(1); - } -#ifdef KVM_CAP_USER_MEMORY -{ - int ret; - - ret = kvm_qemu_check_extension(KVM_CAP_USER_MEMORY); - if (ret) { - phys_ram_base = qemu_alloc_physram(phys_ram_size); - if (!phys_ram_base) { - fprintf(stderr, "Could not allocate physical memory\n"); - exit(1); - } - } -} -#endif - } else { + if (!driver_init()) { phys_ram_base = qemu_vmalloc(phys_ram_size); if (!phys_ram_base) { fprintf(stderr, "Could not allocate physical memory\n"); @@ -9637,8 +9491,7 @@ int main(int argc, char **argv) qemu_mod_timer(display_state.gui_timer, qemu_get_clock(rt_clock)); } - if (kvm_enabled()) - kvm_init_ap(); + driver_smp_init(); #ifdef CONFIG_GDBSTUB if (use_gdbstub) { -- 1.5.0.6 ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel