Gleb Natapov wrote:
> On Mon, Feb 15, 2010 at 02:20:31PM +0100, Jan Kiszka wrote:
>> Jan Kiszka wrote:
>>> Gleb Natapov wrote:
>>>> Lets check if SVM works. I can do that if you tell me how.
>>> - Fire up some Linux guest with gdb installed
>>> - Attach gdb to gdbstub of the VM
>>> - Set a soft breakpoint in guest kernel, ideally where it does not
>>>   immediately trigger, e.g. on sys_reboot (use grep sys_reboot
>>>   /proc/kallsyms if you don't have symbols for the guest kernel)
>>> - Start gdb /bin/true in the guest
>>> - run
>>>
>>> As gdb sets some automatic breakpoints, this already exercises the
>>> reinjection of #BP.
>> I just did this on our primary AMD platform (Embedded Opteron, 13KS EE),
>> and it just worked.
>>
>> But this is a fairly new processor. Consequently, it reports NextRIP
>> support via cpuid function 0x8000000A. Looking for an older one too.
>>
>> In the meantime I also browsed a bit more in the manuals, and I don't
>> think stepping over or (what is actually required) into an INT3 will
>> work. We can't step into as the processor clears TF on any event handler
>> entry. And stepping over would cause troubles
>>
>> a) as an unknown amount of code may run without #DB interception
>> b) we would fiddle with TF in code that is already under debugger
>>    control, thus we would very likely run into conflicts.
>>
>> Leaves us with tricky INT3 emulation. Sigh.
>>
> So the question is do we want to support this kind of debugging on older
> AMDs. May we don't.

So guests in this special scenario on older AMD hosts remain broken.
OK, but then I think we could at least reduce the breakage a bit by
emulating typical #DB re-injections:

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 52f78dd..5f35aaf 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -46,6 +46,7 @@ MODULE_LICENSE("GPL");
 #define SVM_FEATURE_NPT  (1 << 0)
 #define SVM_FEATURE_LBRV (1 << 1)
 #define SVM_FEATURE_SVML (1 << 2)
+#define SVM_FEATURE_NRIP (1 << 3)
 #define SVM_FEATURE_PAUSE_FILTER (1 << 10)
 
 #define NESTED_EXIT_HOST       0       /* Exit handled on host level */
@@ -109,6 +110,8 @@ struct vcpu_svm {
        struct nested_state nested;
 
        bool nmi_singlestep;
+
+       bool int3_injected;
 };
 
 /* enable NPT for AMD64 and X86 with PAE */
@@ -234,23 +237,6 @@ static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
        vcpu->arch.efer = efer;
 }
 
-static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
-                               bool has_error_code, u32 error_code)
-{
-       struct vcpu_svm *svm = to_svm(vcpu);
-
-       /* If we are within a nested VM we'd better #VMEXIT and let the
-          guest handle the exception */
-       if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
-               return;
-
-       svm->vmcb->control.event_inj = nr
-               | SVM_EVTINJ_VALID
-               | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
-               | SVM_EVTINJ_TYPE_EXEPT;
-       svm->vmcb->control.event_inj_err = error_code;
-}
-
 static int is_external_interrupt(u32 info)
 {
        info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
@@ -296,6 +282,29 @@ static void skip_emulated_instruction(struct kvm_vcpu 
*vcpu)
        svm_set_interrupt_shadow(vcpu, 0);
 }
 
+static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
+                               bool has_error_code, u32 error_code)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+
+       /* If we are within a nested VM we'd better #VMEXIT and let the
+          guest handle the exception */
+       if (nested_svm_check_exception(svm, nr, has_error_code, error_code))
+               return;
+
+       if (nr == BP_VECTOR && !svm_has(SVM_FEATURE_NRIP)) {
+               svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
+               skip_emulated_instruction(&svm->vcpu);
+               svm->int3_injected = true;
+       }
+
+       svm->vmcb->control.event_inj = nr
+               | SVM_EVTINJ_VALID
+               | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
+               | SVM_EVTINJ_TYPE_EXEPT;
+       svm->vmcb->control.event_inj_err = error_code;
+}
+
 static int has_svm(void)
 {
        const char *msg;
@@ -2653,6 +2662,9 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
                if (is_nested(svm))
                        break;
                if (kvm_exception_is_soft(vector))
+                       if (vector == BP_VECTOR && svm->int3_injected)
+                               kvm_rip_write(&svm->vcpu,
+                                             kvm_rip_read(&svm->vcpu) - 1);
                        break;
                if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
                        u32 err = svm->vmcb->control.exit_int_info_err;
@@ -2667,6 +2679,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
        default:
                break;
        }
+       svm->int3_injected =false;
 }
 
 #ifdef CONFIG_X86_64


i.e. move RIP by one when injecting INT3 and move it back in case we
catch a fault. It does not work for unintercepted faults or when int
is differently encoded - but it won't crash the guest more frequently,
rather less. What do you think?

> Then lets apply your patch for VMX with a comment that
> explains why we need to save instruction length here (int3 will be
> reinjected from userspace).

Will do this.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to