Re: [PATCH v7 08/12] Handle async PF in a guest.
Am 14.10.2010 11:22, Gleb Natapov wrote: When async PF capability is detected hook up special page fault handler that will handle async page fault events and bypass other page faults to regular page fault handler. Also add async PF handling to nested SVM emulation. Async PF always generates exit to L1 where vcpu thread will be scheduled out until page is available. Acked-by: Rik van Riel r...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com --- arch/x86/include/asm/kvm_para.h | 12 +++ arch/x86/include/asm/traps.h|1 + arch/x86/kernel/entry_32.S | 10 ++ arch/x86/kernel/entry_64.S |3 + arch/x86/kernel/kvm.c | 181 +++ arch/x86/kvm/svm.c | 45 -- 6 files changed, 243 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 2315398..fbfd367 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -65,6 +65,9 @@ struct kvm_mmu_op_release_pt { __u64 pt_phys; }; +#define KVM_PV_REASON_PAGE_NOT_PRESENT 1 +#define KVM_PV_REASON_PAGE_READY 2 + struct kvm_vcpu_pv_apf_data { __u32 reason; __u8 pad[60]; @@ -171,8 +174,17 @@ static inline unsigned int kvm_arch_para_features(void) #ifdef CONFIG_KVM_GUEST void __init kvm_guest_init(void); +void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wake(u32 token); +u32 kvm_read_and_reset_pf_reason(void); #else #define kvm_guest_init() do { } while (0) +#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wake(T) do {} while(0) +static u32 kvm_read_and_reset_pf_reason(void) +{ + return 0; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index f66cda5..0310da6 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -30,6 +30,7 @@ asmlinkage void segment_not_present(void); asmlinkage void stack_segment(void); asmlinkage void general_protection(void); asmlinkage void page_fault(void); +asmlinkage void async_page_fault(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void coprocessor_error(void); asmlinkage void alignment_check(void); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 227d009..e6e7273 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1496,6 +1496,16 @@ ENTRY(general_protection) CFI_ENDPROC END(general_protection) +#ifdef CONFIG_KVM_GUEST +ENTRY(async_page_fault) + RING0_EC_FRAME + pushl $do_async_page_fault + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +END(apf_page_fault) +#endif + /* * End of kprobes section */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 17be5ec..def98c3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1349,6 +1349,9 @@ errorentry xen_stack_segment do_stack_segment #endif errorentry general_protection do_general_protection errorentry page_fault do_page_fault +#ifdef CONFIG_KVM_GUEST +errorentry async_page_fault do_async_page_fault +#endif #ifdef CONFIG_X86_MCE paranoidzeroentry machine_check *machine_check_vector(%rip) #endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 032d03b..d564063 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -29,8 +29,14 @@ #include linux/hardirq.h #include linux/notifier.h #include linux/reboot.h +#include linux/hash.h +#include linux/sched.h +#include linux/slab.h +#include linux/kprobes.h #include asm/timer.h #include asm/cpu.h +#include asm/traps.h +#include asm/desc.h #define MMU_QUEUE_SIZE 1024 @@ -64,6 +70,168 @@ static void kvm_io_delay(void) { } +#define KVM_TASK_SLEEP_HASHBITS 8 +#define KVM_TASK_SLEEP_HASHSIZE (1KVM_TASK_SLEEP_HASHBITS) + +struct kvm_task_sleep_node { + struct hlist_node link; + wait_queue_head_t wq; + u32 token; + int cpu; +}; + +static struct kvm_task_sleep_head { + spinlock_t lock; + struct hlist_head list; +} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; + +static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, + u32 token) +{ + struct hlist_node *p; + + hlist_for_each(p, b-list) { + struct kvm_task_sleep_node *n = + hlist_entry(p, typeof(*n), link); + if (n-token == token) + return n; + } + + return NULL; +} + +void kvm_async_pf_task_wait(u32 token) +{ + u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); + struct kvm_task_sleep_head *b = async_pf_sleepers[key]; + struct kvm_task_sleep_node n, *e; + DEFINE_WAIT(wait); + + spin_lock(b-lock); + e = _find_apf_task(b,
Re: [PATCH v7 08/12] Handle async PF in a guest.
Am 20.10.2010 13:48, Jan Kiszka wrote: Am 14.10.2010 11:22, Gleb Natapov wrote: When async PF capability is detected hook up special page fault handler that will handle async page fault events and bypass other page faults to regular page fault handler. Also add async PF handling to nested SVM emulation. Async PF always generates exit to L1 where vcpu thread will be scheduled out until page is available. Acked-by: Rik van Riel r...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com --- arch/x86/include/asm/kvm_para.h | 12 +++ arch/x86/include/asm/traps.h|1 + arch/x86/kernel/entry_32.S | 10 ++ arch/x86/kernel/entry_64.S |3 + arch/x86/kernel/kvm.c | 181 +++ arch/x86/kvm/svm.c | 45 -- 6 files changed, 243 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 2315398..fbfd367 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -65,6 +65,9 @@ struct kvm_mmu_op_release_pt { __u64 pt_phys; }; +#define KVM_PV_REASON_PAGE_NOT_PRESENT 1 +#define KVM_PV_REASON_PAGE_READY 2 + struct kvm_vcpu_pv_apf_data { __u32 reason; __u8 pad[60]; @@ -171,8 +174,17 @@ static inline unsigned int kvm_arch_para_features(void) #ifdef CONFIG_KVM_GUEST void __init kvm_guest_init(void); +void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wake(u32 token); +u32 kvm_read_and_reset_pf_reason(void); #else #define kvm_guest_init() do { } while (0) +#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wake(T) do {} while(0) +static u32 kvm_read_and_reset_pf_reason(void) +{ +return 0; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index f66cda5..0310da6 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -30,6 +30,7 @@ asmlinkage void segment_not_present(void); asmlinkage void stack_segment(void); asmlinkage void general_protection(void); asmlinkage void page_fault(void); +asmlinkage void async_page_fault(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void coprocessor_error(void); asmlinkage void alignment_check(void); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 227d009..e6e7273 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1496,6 +1496,16 @@ ENTRY(general_protection) CFI_ENDPROC END(general_protection) +#ifdef CONFIG_KVM_GUEST +ENTRY(async_page_fault) +RING0_EC_FRAME +pushl $do_async_page_fault +CFI_ADJUST_CFA_OFFSET 4 +jmp error_code +CFI_ENDPROC +END(apf_page_fault) +#endif + /* * End of kprobes section */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 17be5ec..def98c3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1349,6 +1349,9 @@ errorentry xen_stack_segment do_stack_segment #endif errorentry general_protection do_general_protection errorentry page_fault do_page_fault +#ifdef CONFIG_KVM_GUEST +errorentry async_page_fault do_async_page_fault +#endif #ifdef CONFIG_X86_MCE paranoidzeroentry machine_check *machine_check_vector(%rip) #endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 032d03b..d564063 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -29,8 +29,14 @@ #include linux/hardirq.h #include linux/notifier.h #include linux/reboot.h +#include linux/hash.h +#include linux/sched.h +#include linux/slab.h +#include linux/kprobes.h #include asm/timer.h #include asm/cpu.h +#include asm/traps.h +#include asm/desc.h #define MMU_QUEUE_SIZE 1024 @@ -64,6 +70,168 @@ static void kvm_io_delay(void) { } +#define KVM_TASK_SLEEP_HASHBITS 8 +#define KVM_TASK_SLEEP_HASHSIZE (1KVM_TASK_SLEEP_HASHBITS) + +struct kvm_task_sleep_node { +struct hlist_node link; +wait_queue_head_t wq; +u32 token; +int cpu; +}; + +static struct kvm_task_sleep_head { +spinlock_t lock; +struct hlist_head list; +} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; + +static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, + u32 token) +{ +struct hlist_node *p; + +hlist_for_each(p, b-list) { +struct kvm_task_sleep_node *n = +hlist_entry(p, typeof(*n), link); +if (n-token == token) +return n; +} + +return NULL; +} + +void kvm_async_pf_task_wait(u32 token) +{ +u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); +struct kvm_task_sleep_head *b = async_pf_sleepers[key]; +struct kvm_task_sleep_node n, *e; +DEFINE_WAIT(wait); + +spin_lock(b-lock); +e =
Re: [PATCH v7 08/12] Handle async PF in a guest.
On Wed, 2010-10-20 at 13:48 +0200, Jan Kiszka wrote: + case KVM_PV_REASON_PAGE_READY: + svm-apf_reason = 0; + local_irq_disable(); + kvm_async_pf_task_wake(fault_address); + local_irq_enable(); + break; That's only available if CONFIG_KVM_GUEST is set, no? Is there anything I miss that resolves this dependency automatically? Otherwise, some more #ifdef CONFIG_KVM_GUEST might be needed. Could you please trim your replies? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v7 08/12] Handle async PF in a guest.
From: Gleb Natapov g...@redhat.com When async PF capability is detected hook up special page fault handler that will handle async page fault events and bypass other page faults to regular page fault handler. Also add async PF handling to nested SVM emulation. Async PF always generates exit to L1 where vcpu thread will be scheduled out until page is available. Acked-by: Rik van Riel r...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com --- arch/x86/include/asm/kvm_para.h | 12 +++ arch/x86/include/asm/traps.h|1 + arch/x86/kernel/entry_32.S | 10 ++ arch/x86/kernel/entry_64.S |3 + arch/x86/kernel/kvm.c | 181 +++ arch/x86/kvm/svm.c | 45 -- 6 files changed, 243 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 2315398..fbfd367 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -65,6 +65,9 @@ struct kvm_mmu_op_release_pt { __u64 pt_phys; }; +#define KVM_PV_REASON_PAGE_NOT_PRESENT 1 +#define KVM_PV_REASON_PAGE_READY 2 + struct kvm_vcpu_pv_apf_data { __u32 reason; __u8 pad[60]; @@ -171,8 +174,17 @@ static inline unsigned int kvm_arch_para_features(void) #ifdef CONFIG_KVM_GUEST void __init kvm_guest_init(void); +void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wake(u32 token); +u32 kvm_read_and_reset_pf_reason(void); #else #define kvm_guest_init() do { } while (0) +#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wake(T) do {} while(0) +static u32 kvm_read_and_reset_pf_reason(void) +{ + return 0; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index f66cda5..0310da6 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -30,6 +30,7 @@ asmlinkage void segment_not_present(void); asmlinkage void stack_segment(void); asmlinkage void general_protection(void); asmlinkage void page_fault(void); +asmlinkage void async_page_fault(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void coprocessor_error(void); asmlinkage void alignment_check(void); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 227d009..e6e7273 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1496,6 +1496,16 @@ ENTRY(general_protection) CFI_ENDPROC END(general_protection) +#ifdef CONFIG_KVM_GUEST +ENTRY(async_page_fault) + RING0_EC_FRAME + pushl $do_async_page_fault + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +END(apf_page_fault) +#endif + /* * End of kprobes section */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 17be5ec..def98c3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1349,6 +1349,9 @@ errorentry xen_stack_segment do_stack_segment #endif errorentry general_protection do_general_protection errorentry page_fault do_page_fault +#ifdef CONFIG_KVM_GUEST +errorentry async_page_fault do_async_page_fault +#endif #ifdef CONFIG_X86_MCE paranoidzeroentry machine_check *machine_check_vector(%rip) #endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 032d03b..d564063 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -29,8 +29,14 @@ #include linux/hardirq.h #include linux/notifier.h #include linux/reboot.h +#include linux/hash.h +#include linux/sched.h +#include linux/slab.h +#include linux/kprobes.h #include asm/timer.h #include asm/cpu.h +#include asm/traps.h +#include asm/desc.h #define MMU_QUEUE_SIZE 1024 @@ -64,6 +70,168 @@ static void kvm_io_delay(void) { } +#define KVM_TASK_SLEEP_HASHBITS 8 +#define KVM_TASK_SLEEP_HASHSIZE (1KVM_TASK_SLEEP_HASHBITS) + +struct kvm_task_sleep_node { + struct hlist_node link; + wait_queue_head_t wq; + u32 token; + int cpu; +}; + +static struct kvm_task_sleep_head { + spinlock_t lock; + struct hlist_head list; +} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; + +static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, + u32 token) +{ + struct hlist_node *p; + + hlist_for_each(p, b-list) { + struct kvm_task_sleep_node *n = + hlist_entry(p, typeof(*n), link); + if (n-token == token) + return n; + } + + return NULL; +} + +void kvm_async_pf_task_wait(u32 token) +{ + u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); + struct kvm_task_sleep_head *b = async_pf_sleepers[key]; + struct kvm_task_sleep_node n, *e; + DEFINE_WAIT(wait); + + spin_lock(b-lock); + e = _find_apf_task(b, token); + if (e) { + /* dummy entry exist - wake up was delivered ahead of PF */ +
[PATCH v7 08/12] Handle async PF in a guest.
When async PF capability is detected hook up special page fault handler that will handle async page fault events and bypass other page faults to regular page fault handler. Also add async PF handling to nested SVM emulation. Async PF always generates exit to L1 where vcpu thread will be scheduled out until page is available. Acked-by: Rik van Riel r...@redhat.com Signed-off-by: Gleb Natapov g...@redhat.com --- arch/x86/include/asm/kvm_para.h | 12 +++ arch/x86/include/asm/traps.h|1 + arch/x86/kernel/entry_32.S | 10 ++ arch/x86/kernel/entry_64.S |3 + arch/x86/kernel/kvm.c | 181 +++ arch/x86/kvm/svm.c | 45 -- 6 files changed, 243 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 2315398..fbfd367 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -65,6 +65,9 @@ struct kvm_mmu_op_release_pt { __u64 pt_phys; }; +#define KVM_PV_REASON_PAGE_NOT_PRESENT 1 +#define KVM_PV_REASON_PAGE_READY 2 + struct kvm_vcpu_pv_apf_data { __u32 reason; __u8 pad[60]; @@ -171,8 +174,17 @@ static inline unsigned int kvm_arch_para_features(void) #ifdef CONFIG_KVM_GUEST void __init kvm_guest_init(void); +void kvm_async_pf_task_wait(u32 token); +void kvm_async_pf_task_wake(u32 token); +u32 kvm_read_and_reset_pf_reason(void); #else #define kvm_guest_init() do { } while (0) +#define kvm_async_pf_task_wait(T) do {} while(0) +#define kvm_async_pf_task_wake(T) do {} while(0) +static u32 kvm_read_and_reset_pf_reason(void) +{ + return 0; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index f66cda5..0310da6 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -30,6 +30,7 @@ asmlinkage void segment_not_present(void); asmlinkage void stack_segment(void); asmlinkage void general_protection(void); asmlinkage void page_fault(void); +asmlinkage void async_page_fault(void); asmlinkage void spurious_interrupt_bug(void); asmlinkage void coprocessor_error(void); asmlinkage void alignment_check(void); diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 227d009..e6e7273 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1496,6 +1496,16 @@ ENTRY(general_protection) CFI_ENDPROC END(general_protection) +#ifdef CONFIG_KVM_GUEST +ENTRY(async_page_fault) + RING0_EC_FRAME + pushl $do_async_page_fault + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +END(apf_page_fault) +#endif + /* * End of kprobes section */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 17be5ec..def98c3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1349,6 +1349,9 @@ errorentry xen_stack_segment do_stack_segment #endif errorentry general_protection do_general_protection errorentry page_fault do_page_fault +#ifdef CONFIG_KVM_GUEST +errorentry async_page_fault do_async_page_fault +#endif #ifdef CONFIG_X86_MCE paranoidzeroentry machine_check *machine_check_vector(%rip) #endif diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 032d03b..d564063 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -29,8 +29,14 @@ #include linux/hardirq.h #include linux/notifier.h #include linux/reboot.h +#include linux/hash.h +#include linux/sched.h +#include linux/slab.h +#include linux/kprobes.h #include asm/timer.h #include asm/cpu.h +#include asm/traps.h +#include asm/desc.h #define MMU_QUEUE_SIZE 1024 @@ -64,6 +70,168 @@ static void kvm_io_delay(void) { } +#define KVM_TASK_SLEEP_HASHBITS 8 +#define KVM_TASK_SLEEP_HASHSIZE (1KVM_TASK_SLEEP_HASHBITS) + +struct kvm_task_sleep_node { + struct hlist_node link; + wait_queue_head_t wq; + u32 token; + int cpu; +}; + +static struct kvm_task_sleep_head { + spinlock_t lock; + struct hlist_head list; +} async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE]; + +static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, + u32 token) +{ + struct hlist_node *p; + + hlist_for_each(p, b-list) { + struct kvm_task_sleep_node *n = + hlist_entry(p, typeof(*n), link); + if (n-token == token) + return n; + } + + return NULL; +} + +void kvm_async_pf_task_wait(u32 token) +{ + u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); + struct kvm_task_sleep_head *b = async_pf_sleepers[key]; + struct kvm_task_sleep_node n, *e; + DEFINE_WAIT(wait); + + spin_lock(b-lock); + e = _find_apf_task(b, token); + if (e) { + /* dummy entry exist - wake up was delivered ahead of PF */ + hlist_del(e-link); +