This adds the arch-specific handling for partial contexts within the kernel's C code. As of this commit, there are no partial contexts; everything that comes in from trapentry62.S is a full context.
For hardware and software contexts, the distinction in x86 is whether or not TLS MSRs are dealt with. For partial contexts, GS is the kernel's desired GS, MSR_KERNEL_GS_BASE is the user's old GS, and FS is the user's FS. The TF's GS and FS are 0. After finalization, the GS is still the kernel's GS. MSR_KERNEL_GS_BASE is the kernel's GS, and the TF has the user's GS and FS. Signed-off-by: Barret Rhoden <[email protected]> --- kern/arch/x86/process64.c | 20 ++++++++++++++++---- kern/arch/x86/ros/trapframe64.h | 10 +++++----- kern/arch/x86/trap64.c | 8 ++++++-- kern/arch/x86/trap64.h | 40 ++++++++++++++++++++++++++++++++++++++-- 4 files changed, 65 insertions(+), 13 deletions(-) diff --git a/kern/arch/x86/process64.c b/kern/arch/x86/process64.c index bb15ae5c3a77..0397091cf7c2 100644 --- a/kern/arch/x86/process64.c +++ b/kern/arch/x86/process64.c @@ -15,8 +15,13 @@ void proc_pop_ctx(struct user_context *ctx) * gs bases */ if (ctx->type == ROS_HW_CTX) { struct hw_trapframe *tf = &ctx->tf.hw_tf; - write_msr(MSR_GS_BASE, (uint64_t)tf->tf_gsbase); - write_msr(MSR_FS_BASE, (uint64_t)tf->tf_fsbase); + + if (x86_hwtf_is_partial(tf)) { + swap_gs(); + } else { + write_msr(MSR_GS_BASE, (uint64_t)tf->tf_gsbase); + write_msr(MSR_FS_BASE, (uint64_t)tf->tf_fsbase); + } asm volatile ("movq %0, %%rsp; " "popq %%rax; " "popq %%rbx; " @@ -39,8 +44,13 @@ void proc_pop_ctx(struct user_context *ctx) panic("iretq failed"); } else { struct sw_trapframe *tf = &ctx->tf.sw_tf; - write_msr(MSR_GS_BASE, (uint64_t)tf->tf_gsbase); - write_msr(MSR_FS_BASE, (uint64_t)tf->tf_fsbase); + + if (x86_swtf_is_partial(tf)) { + swap_gs(); + } else { + write_msr(MSR_GS_BASE, (uint64_t)tf->tf_gsbase); + write_msr(MSR_FS_BASE, (uint64_t)tf->tf_fsbase); + } /* We need to 0 out any registers that aren't part of the sw_tf and that * we won't use/clobber on the out-path. While these aren't part of the * sw_tf, we also don't want to leak any kernel register content. */ @@ -111,6 +121,7 @@ void proc_secure_ctx(struct user_context *ctx) enforce_user_canon(&tf->tf_gsbase); enforce_user_canon(&tf->tf_fsbase); enforce_user_canon(&tf->tf_rip); + x86_swtf_clear_partial(tf); } else { /* If we aren't SW, we're assuming (and forcing) a HW ctx. If this is * somehow fucked up, userspace should die rather quickly. */ @@ -125,6 +136,7 @@ void proc_secure_ctx(struct user_context *ctx) tf->tf_ss = GD_UD | 3; tf->tf_cs = GD_UT | 3; tf->tf_rflags |= FL_IF; + x86_hwtf_clear_partial(tf); } } diff --git a/kern/arch/x86/ros/trapframe64.h b/kern/arch/x86/ros/trapframe64.h index eb209ddd717d..af2de4cfe886 100644 --- a/kern/arch/x86/ros/trapframe64.h +++ b/kern/arch/x86/ros/trapframe64.h @@ -23,20 +23,20 @@ struct hw_trapframe { uint64_t tf_r14; uint64_t tf_r15; uint32_t tf_trapno; - uint32_t tf_padding5; + uint32_t tf_padding5; /* used in trap reflection */ /* below here defined by x86 hardware (error code optional) */ uint32_t tf_err; - uint32_t tf_padding4; + uint32_t tf_padding4; /* used in trap reflection */ uint64_t tf_rip; uint16_t tf_cs; - uint16_t tf_padding3; + uint16_t tf_padding3; /* used in trap reflection */ uint32_t tf_padding2; uint64_t tf_rflags; /* unlike 32 bit, SS:RSP is always pushed, even when not changing rings */ uint64_t tf_rsp; uint16_t tf_ss; uint16_t tf_padding1; - uint32_t tf_padding0; + uint32_t tf_padding0; /* used for partial contexts */ }; struct sw_trapframe { @@ -52,5 +52,5 @@ struct sw_trapframe { uint64_t tf_rsp; uint32_t tf_mxcsr; uint16_t tf_fpucw; - uint16_t tf_padding0; + uint16_t tf_padding0; /* used for partial contexts */ }; diff --git a/kern/arch/x86/trap64.c b/kern/arch/x86/trap64.c index 754c90be708f..20f89ddb984b 100644 --- a/kern/arch/x86/trap64.c +++ b/kern/arch/x86/trap64.c @@ -33,7 +33,9 @@ void print_trapframe(struct hw_trapframe *hw_tf) * nuts when we print/panic */ pcpui->__lock_checking_enabled--; spin_lock_irqsave(&ptf_lock); - printk("HW TRAP frame at %p on core %d\n", hw_tf, core_id()); + printk("HW TRAP frame %sat %p on core %d\n", + x86_hwtf_is_partial(hw_tf) ? "(partial) " : "", + hw_tf, core_id()); printk(" rax 0x%016lx\n", hw_tf->tf_rax); printk(" rbx 0x%016lx\n", hw_tf->tf_rbx); printk(" rcx 0x%016lx\n", hw_tf->tf_rcx); @@ -79,7 +81,9 @@ void print_swtrapframe(struct sw_trapframe *sw_tf) struct per_cpu_info *pcpui = &per_cpu_info[core_id()]; pcpui->__lock_checking_enabled--; spin_lock_irqsave(&ptf_lock); - printk("SW TRAP frame at %p on core %d\n", sw_tf, core_id()); + printk("SW TRAP frame %sat %p on core %d\n", + x86_swtf_is_partial(sw_tf) ? "(partial) " : "", + sw_tf, core_id()); printk(" rbx 0x%016lx\n", sw_tf->tf_rbx); printk(" rbp 0x%016lx\n", sw_tf->tf_rbp); printk(" r12 0x%016lx\n", sw_tf->tf_r12); diff --git a/kern/arch/x86/trap64.h b/kern/arch/x86/trap64.h index 73bc420d310e..bda6ad87ce27 100644 --- a/kern/arch/x86/trap64.h +++ b/kern/arch/x86/trap64.h @@ -108,14 +108,25 @@ static inline void x86_set_stacktop_tss(struct taskstate *tss, uintptr_t top) tss->ts_rsp0 = top; } +/* Keep tf_padding0 in sync with trapentry64.S */ static inline bool x86_hwtf_is_partial(struct hw_trapframe *tf) { - return FALSE; + return tf->tf_padding0 == 1; } static inline bool x86_swtf_is_partial(struct sw_trapframe *tf) { - return FALSE; + return tf->tf_padding0 == 1; +} + +static inline void x86_hwtf_clear_partial(struct hw_trapframe *tf) +{ + tf->tf_padding0 = 0; +} + +static inline void x86_swtf_clear_partial(struct sw_trapframe *tf) +{ + tf->tf_padding0 = 0; } static inline bool arch_ctx_is_partial(struct user_context *ctx) @@ -129,14 +140,39 @@ static inline bool arch_ctx_is_partial(struct user_context *ctx) return FALSE; } +/* Partial contexts for HW and SW TFs have the user's gs in MSR_KERNEL_GS_BASE. + * The kernel's gs is loaded into gs. We need to put the kernel's gs into + * KERNEL_GS_BASE so the core is ready to run another full context, save the + * user's {GS,FS}_BASE into their TF so it can run on another core, and keep GS + * loaded with the current GS (the kernel's). */ +static inline void x86_finalize_hwtf(struct hw_trapframe *tf) +{ + tf->tf_gsbase = read_msr(MSR_KERNEL_GS_BASE); + write_msr(MSR_KERNEL_GS_BASE, read_msr(MSR_GS_BASE)); + tf->tf_fsbase = read_msr(MSR_FS_BASE); + x86_hwtf_clear_partial(tf); +} + +static inline void x86_finalize_swtf(struct sw_trapframe *tf) +{ + tf->tf_gsbase = read_msr(MSR_KERNEL_GS_BASE); + write_msr(MSR_KERNEL_GS_BASE, read_msr(MSR_GS_BASE)); + tf->tf_fsbase = read_msr(MSR_FS_BASE); + x86_swtf_clear_partial(tf); +} + /* Makes sure that the user context is fully saved into ctx and not split across * the struct and HW, meaning it is not a "partial context". */ static inline void arch_finalize_ctx(struct user_context *ctx) { + if (!arch_ctx_is_partial(ctx)) + return; switch (ctx->type) { case (ROS_HW_CTX): + x86_finalize_hwtf(&ctx->tf.hw_tf); break; case (ROS_SW_CTX): + x86_finalize_swtf(&ctx->tf.sw_tf); break; } } -- 2.6.0.rc2.230.g3dd15c0 -- You received this message because you are subscribed to the Google Groups "Akaros" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. For more options, visit https://groups.google.com/d/optout.
