[PATCH 3.2 093/102] KVM: x86: Handle errors when RIP is set during far jumps
3.2.64-rc1 review patch. If anyone has any objections, please let me know. -- From: Nadav Amit commit d1442d85cc30ea75f7d399474ca738e0bc96f715 upstream. Far jmp/call/ret may fault while loading a new RIP. Currently KVM does not handle this case, and may result in failed vm-entry once the assignment is done. The tricky part of doing so is that loading the new CS affects the VMCS/VMCB state, so if we fail during loading the new RIP, we are left in unconsistent state. Therefore, this patch saves on 64-bit the old CS descriptor and restores it if loading RIP failed. This fixes CVE-2014-3647. Signed-off-by: Nadav Amit Signed-off-by: Paolo Bonzini [bwh: Backported to 3.2: - Adjust context - __load_segment_descriptor() does not take an in_task_switch parameter] Signed-off-by: Ben Hutchings --- --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1234,7 +1234,8 @@ static int write_segment_descriptor(stru /* Does not support long mode */ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, -u16 selector, int seg, u8 cpl) +u16 selector, int seg, u8 cpl, +struct desc_struct *desc) { struct desc_struct seg_desc; u8 dpl, rpl; @@ -1342,6 +1343,8 @@ static int __load_segment_descriptor(str } load: ctxt->ops->set_segment(ctxt, selector, _desc, 0, seg); + if (desc) + *desc = seg_desc; return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1352,7 +1355,7 @@ static int load_segment_descriptor(struc u16 selector, int seg) { u8 cpl = ctxt->ops->cpl(ctxt); - return __load_segment_descriptor(ctxt, selector, seg, cpl); + return __load_segment_descriptor(ctxt, selector, seg, cpl, NULL); } static void write_register_operand(struct operand *op) @@ -1694,17 +1697,31 @@ static int em_iret(struct x86_emulate_ct static int em_jmp_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned short sel; + unsigned short sel, old_sel; + struct desc_struct old_desc, new_desc; + const struct x86_emulate_ops *ops = ctxt->ops; + u8 cpl = ctxt->ops->cpl(ctxt); + + /* Assignment of RIP may only fail in 64-bit mode */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + ops->get_segment(ctxt, _sel, _desc, NULL, +VCPU_SREG_CS); memcpy(, ctxt->src.valptr + ctxt->op_bytes, 2); - rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); + rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, + _desc); if (rc != X86EMUL_CONTINUE) return rc; - ctxt->_eip = 0; - memcpy(>_eip, ctxt->src.valptr, ctxt->op_bytes); - return X86EMUL_CONTINUE; + rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); + if (rc != X86EMUL_CONTINUE) { + WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); + /* assigning eip failed; restore the old cs */ + ops->set_segment(ctxt, old_sel, _desc, 0, VCPU_SREG_CS); + return rc; + } + return rc; } static int em_grp1a(struct x86_emulate_ctxt *ctxt) @@ -1856,21 +1873,34 @@ static int em_ret(struct x86_emulate_ctx static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned long cs; + unsigned long eip, cs; + u16 old_cs; int cpl = ctxt->ops->cpl(ctxt); + struct desc_struct old_desc, new_desc; + const struct x86_emulate_ops *ops = ctxt->ops; + + if (ctxt->mode == X86EMUL_MODE_PROT64) + ops->get_segment(ctxt, _cs, _desc, NULL, +VCPU_SREG_CS); - rc = emulate_pop(ctxt, >_eip, ctxt->op_bytes); + rc = emulate_pop(ctxt, , ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - if (ctxt->op_bytes == 4) - ctxt->_eip = (u32)ctxt->_eip; rc = emulate_pop(ctxt, , ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; /* Outer-privilege level return is not implemented */ if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) return X86EMUL_UNHANDLEABLE; - rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); + rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, + _desc); + if (rc != X86EMUL_CONTINUE) + return rc; + rc = assign_eip_far(ctxt, eip, new_desc.l); + if (rc != X86EMUL_CONTINUE) { + WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); + ops->set_segment(ctxt, old_cs, _desc, 0, VCPU_SREG_CS); + } return rc; } @@ -2248,19 +2278,24 @@ static int load_state_from_tss16(struct * Now
[PATCH 3.2 093/102] KVM: x86: Handle errors when RIP is set during far jumps
3.2.64-rc1 review patch. If anyone has any objections, please let me know. -- From: Nadav Amit na...@cs.technion.ac.il commit d1442d85cc30ea75f7d399474ca738e0bc96f715 upstream. Far jmp/call/ret may fault while loading a new RIP. Currently KVM does not handle this case, and may result in failed vm-entry once the assignment is done. The tricky part of doing so is that loading the new CS affects the VMCS/VMCB state, so if we fail during loading the new RIP, we are left in unconsistent state. Therefore, this patch saves on 64-bit the old CS descriptor and restores it if loading RIP failed. This fixes CVE-2014-3647. Signed-off-by: Nadav Amit na...@cs.technion.ac.il Signed-off-by: Paolo Bonzini pbonz...@redhat.com [bwh: Backported to 3.2: - Adjust context - __load_segment_descriptor() does not take an in_task_switch parameter] Signed-off-by: Ben Hutchings b...@decadent.org.uk --- --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1234,7 +1234,8 @@ static int write_segment_descriptor(stru /* Does not support long mode */ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, -u16 selector, int seg, u8 cpl) +u16 selector, int seg, u8 cpl, +struct desc_struct *desc) { struct desc_struct seg_desc; u8 dpl, rpl; @@ -1342,6 +1343,8 @@ static int __load_segment_descriptor(str } load: ctxt-ops-set_segment(ctxt, selector, seg_desc, 0, seg); + if (desc) + *desc = seg_desc; return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1352,7 +1355,7 @@ static int load_segment_descriptor(struc u16 selector, int seg) { u8 cpl = ctxt-ops-cpl(ctxt); - return __load_segment_descriptor(ctxt, selector, seg, cpl); + return __load_segment_descriptor(ctxt, selector, seg, cpl, NULL); } static void write_register_operand(struct operand *op) @@ -1694,17 +1697,31 @@ static int em_iret(struct x86_emulate_ct static int em_jmp_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned short sel; + unsigned short sel, old_sel; + struct desc_struct old_desc, new_desc; + const struct x86_emulate_ops *ops = ctxt-ops; + u8 cpl = ctxt-ops-cpl(ctxt); + + /* Assignment of RIP may only fail in 64-bit mode */ + if (ctxt-mode == X86EMUL_MODE_PROT64) + ops-get_segment(ctxt, old_sel, old_desc, NULL, +VCPU_SREG_CS); memcpy(sel, ctxt-src.valptr + ctxt-op_bytes, 2); - rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); + rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, + new_desc); if (rc != X86EMUL_CONTINUE) return rc; - ctxt-_eip = 0; - memcpy(ctxt-_eip, ctxt-src.valptr, ctxt-op_bytes); - return X86EMUL_CONTINUE; + rc = assign_eip_far(ctxt, ctxt-src.val, new_desc.l); + if (rc != X86EMUL_CONTINUE) { + WARN_ON(!ctxt-mode != X86EMUL_MODE_PROT64); + /* assigning eip failed; restore the old cs */ + ops-set_segment(ctxt, old_sel, old_desc, 0, VCPU_SREG_CS); + return rc; + } + return rc; } static int em_grp1a(struct x86_emulate_ctxt *ctxt) @@ -1856,21 +1873,34 @@ static int em_ret(struct x86_emulate_ctx static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned long cs; + unsigned long eip, cs; + u16 old_cs; int cpl = ctxt-ops-cpl(ctxt); + struct desc_struct old_desc, new_desc; + const struct x86_emulate_ops *ops = ctxt-ops; + + if (ctxt-mode == X86EMUL_MODE_PROT64) + ops-get_segment(ctxt, old_cs, old_desc, NULL, +VCPU_SREG_CS); - rc = emulate_pop(ctxt, ctxt-_eip, ctxt-op_bytes); + rc = emulate_pop(ctxt, eip, ctxt-op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - if (ctxt-op_bytes == 4) - ctxt-_eip = (u32)ctxt-_eip; rc = emulate_pop(ctxt, cs, ctxt-op_bytes); if (rc != X86EMUL_CONTINUE) return rc; /* Outer-privilege level return is not implemented */ if (ctxt-mode = X86EMUL_MODE_PROT16 (cs 3) cpl) return X86EMUL_UNHANDLEABLE; - rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); + rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, + new_desc); + if (rc != X86EMUL_CONTINUE) + return rc; + rc = assign_eip_far(ctxt, eip, new_desc.l); + if (rc != X86EMUL_CONTINUE) { + WARN_ON(!ctxt-mode != X86EMUL_MODE_PROT64); + ops-set_segment(ctxt, old_cs, old_desc, 0, VCPU_SREG_CS); + }