Notice the magic page during translate, much like we already do for the arm32 commpage. At runtime, raise an exception to return cpu_loop for emulation.
Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- target/i386/cpu.h | 1 + linux-user/i386/cpu_loop.c | 104 +++++++++++++++++++++++++++++++++++++ target/i386/translate.c | 16 +++++- 3 files changed, 120 insertions(+), 1 deletion(-) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 164d038d1f..3fb2d2a986 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1000,6 +1000,7 @@ typedef uint64_t FeatureWordArray[FEATURE_WORDS]; #define EXCP_VMEXIT 0x100 /* only for system emulation */ #define EXCP_SYSCALL 0x101 /* only for user emulation */ +#define EXCP_VSYSCALL 0x102 /* only for user emulation */ /* i386-specific interrupt pending bits. */ #define CPU_INTERRUPT_POLL CPU_INTERRUPT_TGT_EXT_1 diff --git a/linux-user/i386/cpu_loop.c b/linux-user/i386/cpu_loop.c index e217cca5ee..8b7c9f7337 100644 --- a/linux-user/i386/cpu_loop.c +++ b/linux-user/i386/cpu_loop.c @@ -92,6 +92,105 @@ static void gen_signal(CPUX86State *env, int sig, int code, abi_ptr addr) queue_signal(env, info.si_signo, QEMU_SI_FAULT, &info); } +#ifdef TARGET_X86_64 +static bool write_ok_or_segv(CPUX86State *env, abi_ptr addr, size_t len) +{ + /* + * For all the vsyscalls, NULL means "don't write anything" not + * "write it at address 0". + */ + if (addr == 0 || access_ok(VERIFY_WRITE, addr, len)) { + return true; + } + + gen_signal(env, TARGET_SIGSEGV, TARGET_SEGV_MAPERR, addr); + return false; +} + +/* + * Since v3.1, the kernel traps and emulates the vsyscall page. + * Entry points other than the official generate SIGSEGV. + */ +static void emulate_vsyscall(CPUX86State *env) +{ + int syscall; + abi_ulong ret; + uint64_t caller; + + /* + * Validate the entry point. We have already validated the page + * during translation, now verify the offset. + */ + switch (env->eip & ~TARGET_PAGE_MASK) { + case 0x000: + syscall = TARGET_NR_gettimeofday; + break; + case 0x400: + syscall = TARGET_NR_time; + break; + case 0x800: + syscall = TARGET_NR_getcpu; + break; + default: + sigsegv: + /* Like force_sig(SIGSEGV). */ + gen_signal(env, TARGET_SIGSEGV, TARGET_SI_KERNEL, 0); + return; + } + + /* + * Validate the return address. + * Note that the kernel treats this the same as an invalid entry point. + */ + if (get_user_u64(caller, env->regs[R_ESP])) { + goto sigsegv; + } + + /* + * Validate the the pointer arguments. + */ + switch (syscall) { + case TARGET_NR_gettimeofday: + if (!write_ok_or_segv(env, env->regs[R_EDI], + sizeof(struct target_timeval)) || + !write_ok_or_segv(env, env->regs[R_ESI], + sizeof(struct target_timezone))) { + return; + } + break; + case TARGET_NR_time: + if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(abi_long))) { + return; + } + break; + case TARGET_NR_getcpu: + if (!write_ok_or_segv(env, env->regs[R_EDI], sizeof(uint32_t)) || + !write_ok_or_segv(env, env->regs[R_ESI], sizeof(uint32_t))) { + return; + } + break; + default: + g_assert_not_reached(); + } + + /* + * Perform the syscall. None of the vsyscalls should need restarting, + * and all faults should have been caught above. + */ + ret = do_syscall(env, syscall, env->regs[R_EDI], env->regs[R_ESI], + env->regs[R_EDX], env->regs[10], env->regs[8], + env->regs[9], 0, 0); + g_assert(ret != -TARGET_ERESTARTSYS); + g_assert(ret != -TARGET_QEMU_ESIGRETURN); + g_assert(ret != -TARGET_EFAULT); + env->regs[R_EAX] = ret; + + /* Emulate a ret instruction to leave the vsyscall page. */ + env->eip = caller; + env->regs[R_ESP] += 8; +} +#endif + void cpu_loop(CPUX86State *env) { CPUState *cs = env_cpu(env); @@ -141,6 +240,11 @@ void cpu_loop(CPUX86State *env) env->regs[R_EAX] = ret; } break; +#endif +#ifdef TARGET_X86_64 + case EXCP_VSYSCALL: + emulate_vsyscall(env); + break; #endif case EXCP0B_NOSEG: case EXCP0C_STACK: diff --git a/target/i386/translate.c b/target/i386/translate.c index 7c99ef1385..391b4ef149 100644 --- a/target/i386/translate.c +++ b/target/i386/translate.c @@ -8555,7 +8555,21 @@ static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu, static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - target_ulong pc_next = disas_insn(dc, cpu); + target_ulong pc_next; + +#if defined(TARGET_X86_64) && \ + defined(CONFIG_USER_ONLY) && \ + defined(CONFIG_LINUX) + /* + * Detect entry into the vsyscall page and invoke the syscall. + */ + if ((dc->base.pc_next & TARGET_PAGE_MASK) == 0xffffffffff600000ull) { + gen_exception(dc, EXCP_VSYSCALL, dc->base.pc_next); + return; + } +#endif + + pc_next = disas_insn(dc, cpu); if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) { /* if single step mode, we generate only one instruction and -- 2.20.1