[PATCH V2 3/10] KVM/MMU: Add last_level in the struct mmu_spte_page

2019-02-01 Thread lantianyu1986
From: Lan Tianyu 

This patch is to add last_level in the struct kvm_mmu_page. When build
flush tlb range list, last_level will be used to identify whehter the
page should be added into list.

Signed-off-by: Lan Tianyu 
---
 arch/x86/include/asm/kvm_host.h | 1 +
 arch/x86/kvm/mmu.c  | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4a3d3e58fe0a..9d858d68c17a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -325,6 +325,7 @@ struct kvm_mmu_page {
struct hlist_node flush_link;
struct hlist_node hash_link;
bool unsync;
+   bool last_level;
 
/*
 * The following two entries are used to key the shadow page in the
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ce770b446238..70cafd3f95ab 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2918,6 +2918,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
if (level > PT_PAGE_TABLE_LEVEL)
spte |= PT_PAGE_SIZE_MASK;
+
+   sp->last_level = is_last_spte(spte, level);
+
if (tdp_enabled)
spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
kvm_is_mmio_pfn(pfn));
-- 
2.14.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V2 5/10] KVM/MMU: Flush tlb with range list in sync_page()

2019-02-01 Thread lantianyu1986
From: Lan Tianyu 

This patch is to flush tlb via flush list function. Put
page into flush list when return value of set_spte()
includes flag SET_SPTE_NEED_REMOTE_TLB_FLUSH. kvm_flush_remote_
tlbs_with_list() checks whether the flush list is empty
or not. It also checks whether range tlb flush is available
and go back to tradiion flush if not.

Signed-off-by: Lan Tianyu 
---
Change since v1:
   Use check of list_empty in the kvm_flush_remote_tlbs_with_list()
   to determine flush or not instead of checking set_spte_ret.
 
arch/x86/kvm/paging_tmpl.h | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 6bdca39829bc..d84486e75345 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -970,7 +970,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *sp)
int i, nr_present = 0;
bool host_writable;
gpa_t first_pte_gpa;
-   int set_spte_ret = 0;
+   HLIST_HEAD(flush_list);
 
/* direct kvm_mmu_page can not be unsync. */
BUG_ON(sp->role.direct);
@@ -978,6 +978,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *sp)
first_pte_gpa = FNAME(get_level1_sp_gpa)(sp);
 
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
+   int set_spte_ret = 0;
unsigned pte_access;
pt_element_t gpte;
gpa_t pte_gpa;
@@ -1027,14 +1028,20 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, 
struct kvm_mmu_page *sp)
 
host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE;
 
-   set_spte_ret |= set_spte(vcpu, >spt[i],
+   set_spte_ret = set_spte(vcpu, >spt[i],
 pte_access, PT_PAGE_TABLE_LEVEL,
 gfn, spte_to_pfn(sp->spt[i]),
 true, false, host_writable);
+
+   if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH) {
+   struct kvm_mmu_page *leaf_sp = page_header(sp->spt[i]
+   & PT64_BASE_ADDR_MASK);
+   hlist_add_head(_sp->flush_link, _list);
+   }
+
}
 
-   if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH)
-   kvm_flush_remote_tlbs(vcpu->kvm);
+   kvm_flush_remote_tlbs_with_list(vcpu->kvm, _list);
 
return nr_present;
 }
-- 
2.14.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V2 8/10] KVM: Use tlb range flush in the kvm_vm_ioctl_get/clear_dirty_log()

2019-02-01 Thread lantianyu1986
From: Lan Tianyu 

This patch is to use tlb range flush to flush memslot's in the
kvm_vm_ioctl_get/clear_dirty_log() instead of flushing tlbs
of entire ept page table when range flush is available.

Signed-off-by: Lan Tianyu 
---
 arch/x86/kvm/mmu.c |  8 +---
 arch/x86/kvm/mmu.h |  7 +++
 arch/x86/kvm/x86.c | 16 
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6b5e9bed6665..63b3e36530e3 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -264,12 +264,6 @@ static void mmu_spte_set(u64 *sptep, u64 spte);
 static union kvm_mmu_page_role
 kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
 
-
-static inline bool kvm_available_flush_tlb_with_range(void)
-{
-   return kvm_x86_ops->tlb_remote_flush_with_range;
-}
-
 static void kvm_flush_remote_tlbs_with_range(struct kvm *kvm,
struct kvm_tlb_range *range)
 {
@@ -282,7 +276,7 @@ static void kvm_flush_remote_tlbs_with_range(struct kvm 
*kvm,
kvm_flush_remote_tlbs(kvm);
 }
 
-static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
+void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
u64 start_gfn, u64 pages)
 {
struct kvm_tlb_range range;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index c7b333147c4a..dddab78d8ed8 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -63,6 +63,13 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool 
execonly,
 bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
 int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len);
+void kvm_flush_remote_tlbs_with_address(struct kvm *kvm,
+   u64 start_gfn, u64 pages);
+
+static inline bool kvm_available_flush_tlb_with_range(void)
+{
+   return kvm_x86_ops->tlb_remote_flush_with_range;
+}
 
 static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3d32b8f5728d..0f70e07abfa1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4445,9 +4445,13 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct 
kvm_dirty_log *log)
 * kvm_mmu_slot_remove_write_access().
 */
lockdep_assert_held(>slots_lock);
-   if (flush)
-   kvm_flush_remote_tlbs(kvm);
+   if (flush) {
+   struct kvm_memory_slot *memslot = kvm_get_memslot(kvm,
+   log->slot);
 
+   kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
+   memslot->npages);
+   }
mutex_unlock(>slots_lock);
return r;
 }
@@ -4472,9 +4476,13 @@ int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, struct 
kvm_clear_dirty_log *lo
 * kvm_mmu_slot_remove_write_access().
 */
lockdep_assert_held(>slots_lock);
-   if (flush)
-   kvm_flush_remote_tlbs(kvm);
+   if (flush) {
+   struct kvm_memory_slot *memslot = kvm_get_memslot(kvm,
+   log->slot);
 
+   kvm_flush_remote_tlbs_with_address(kvm, memslot->base_gfn,
+   memslot->npages);
+   }
mutex_unlock(>slots_lock);
return r;
 }
-- 
2.14.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V2 6/10] KVM/MMU: Flush tlb directly in the kvm_mmu_slot_gfn_write_protect()

2019-02-01 Thread lantianyu1986
From: Lan Tianyu 

This patch is to flush tlb directly in the kvm_mmu_slot_gfn_write_protect()
when range flush is available.

Signed-off-by: Lan Tianyu 
---
 arch/x86/kvm/mmu.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d57574b49823..6b5e9bed6665 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1718,6 +1718,11 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
write_protected |= __rmap_write_protect(kvm, rmap_head, true);
}
 
+   if (write_protected && kvm_available_flush_tlb_with_range()) {
+   kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
+   write_protected = false;
+   }
+
return write_protected;
 }
 
-- 
2.14.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V2 9/10] KVM: Add flush parameter for kvm_age_hva()

2019-02-01 Thread lantianyu1986
From: Lan Tianyu 

This patch is to add flush parameter for kvm_aga_hva() and move tlb
flush from kvm_mmu_notifier_clear_flush_young() to kvm_age_hva().
kvm_age_hva() can check whether tlb flush is necessary when
return value young is more than 0. Flush tlb if both conditions
are met.

Signed-off-by: Lan Tianyu 
---
 arch/arm/include/asm/kvm_host.h |  3 ++-
 arch/arm64/include/asm/kvm_host.h   |  3 ++-
 arch/mips/include/asm/kvm_host.h|  3 ++-
 arch/mips/kvm/mmu.c | 11 +--
 arch/powerpc/include/asm/kvm_host.h |  3 ++-
 arch/powerpc/kvm/book3s.c   | 10 --
 arch/powerpc/kvm/e500_mmu_host.c|  3 ++-
 arch/x86/include/asm/kvm_host.h |  3 ++-
 arch/x86/kvm/mmu.c  | 10 --
 virt/kvm/arm/mmu.c  | 13 +++--
 virt/kvm/kvm_main.c |  6 ++
 11 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index ca56537b61bc..b3c6a6db8173 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -229,7 +229,8 @@ int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, 
pte_t pte);
 
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end,
+   bool flush);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 7732d0ba4e60..182bbb2de60a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -361,7 +361,8 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
 int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end,
+   bool flush);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 struct kvm_vcpu *kvm_arm_get_running_vcpu(void);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index d2abd98471e8..e055f49532c8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -937,7 +937,8 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct 
kvm_vcpu *vcpu,
 int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
 int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end,
+   bool flush);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 
 /* Emulation */
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 97e538a8c1be..288a22d70cf8 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -579,9 +579,16 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t 
gfn, gfn_t gfn_end,
return pte_young(*gpa_pte);
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end,
+   bool flush)
 {
-   return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
+   int young = handle_hva_to_gpa(kvm, start, end,
+   kvm_age_hva_handler, NULL);
+
+   if (flush && young > 0)
+   kvm_flush_remote_tlbs(kvm);
+
+   return young;
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 0f98f00da2ea..d160e6b8ccfb 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -70,7 +70,8 @@
 
 extern int kvm_unmap_hva_range(struct kvm *kvm,
   unsigned long start, unsigned long end);
-extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long 
end);
+extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end,
+  bool flush);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index bd1a677dd9e4..09a67ebbde8a 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -841,9 +841,15 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long 
start, unsigned long end)
return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned 

[PATCH V2 4/10] KVM/MMU: Introduce tlb flush with range list

2019-02-01 Thread lantianyu1986
From: Lan Tianyu 

This patch is to introduce tlb flush with range list interface and use
struct kvm_mmu_page as list entry. Use flush list function in the
kvm_mmu_commit_zap_page().

Signed-off-by: Lan Tianyu 
---
 arch/x86/kvm/mmu.c | 25 -
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 70cafd3f95ab..d57574b49823 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -289,6 +289,20 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm 
*kvm,
 
range.start_gfn = start_gfn;
range.pages = pages;
+   range.flush_list = NULL;
+
+   kvm_flush_remote_tlbs_with_range(kvm, );
+}
+
+static void kvm_flush_remote_tlbs_with_list(struct kvm *kvm,
+   struct hlist_head *flush_list)
+{
+   struct kvm_tlb_range range;
+
+   if (hlist_empty(flush_list))
+   return;
+
+   range.flush_list = flush_list;
 
kvm_flush_remote_tlbs_with_range(kvm, );
 }
@@ -2708,6 +2722,7 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list)
 {
struct kvm_mmu_page *sp, *nsp;
+   HLIST_HEAD(flush_list);
 
if (list_empty(invalid_list))
return;
@@ -2721,7 +2736,15 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
 * In addition, kvm_flush_remote_tlbs waits for all vcpus to exit
 * guest mode and/or lockless shadow page table walks.
 */
-   kvm_flush_remote_tlbs(kvm);
+   if (kvm_available_flush_tlb_with_range()) {
+   list_for_each_entry(sp, invalid_list, link)
+   if (sp->last_level)
+   hlist_add_head(>flush_link, _list);
+
+   kvm_flush_remote_tlbs_with_list(kvm, _list);
+   } else {
+   kvm_flush_remote_tlbs(kvm);
+   }
 
list_for_each_entry_safe(sp, nsp, invalid_list, link) {
WARN_ON(!sp->role.invalid || sp->root_count);
-- 
2.14.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V2 2/10] KVM/VMX: Fill range list in kvm_fill_hv_flush_list_func()

2019-02-01 Thread lantianyu1986
From: Lan Tianyu 

Populate ranges on the flush list into struct hv_guest_mapping_flush_list
when flush list is available in the struct kvm_tlb_range.

Signed-off-by: Lan Tianyu 
---
Change since v1:
   Make flush list as a "hlist" instead of a "list" in order to 
   keep struct kvm_mmu_page size.

arch/x86/include/asm/kvm_host.h |  7 +++
 arch/x86/kvm/vmx/vmx.c  | 18 --
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49f449f56434..4a3d3e58fe0a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -317,6 +317,12 @@ struct kvm_rmap_head {
 
 struct kvm_mmu_page {
struct list_head link;
+
+   /*
+* Tlb flush with range list uses struct kvm_mmu_page as list entry
+* and all list operations should be under protection of mmu_lock.
+*/
+   struct hlist_node flush_link;
struct hlist_node hash_link;
bool unsync;
 
@@ -443,6 +449,7 @@ struct kvm_mmu {
 struct kvm_tlb_range {
u64 start_gfn;
u64 pages;
+   struct hlist_head *flush_list;
 };
 
 enum pmc_type {
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 9d954b4adce3..6452d0efd2cc 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -427,9 +427,23 @@ int kvm_fill_hv_flush_list_func(struct 
hv_guest_mapping_flush_list *flush,
void *data)
 {
struct kvm_tlb_range *range = data;
+   struct kvm_mmu_page *sp;
 
-   return hyperv_fill_flush_guest_mapping_list(flush, 0, range->start_gfn,
-   range->pages);
+   if (!range->flush_list) {
+   return hyperv_fill_flush_guest_mapping_list(flush,
+   0, range->start_gfn, range->pages);
+   } else {
+   int offset = 0;
+
+   hlist_for_each_entry(sp, range->flush_list, flush_link) {
+   int pages = KVM_PAGES_PER_HPAGE(sp->role.level);
+
+   offset = hyperv_fill_flush_guest_mapping_list(flush,
+   offset, sp->gfn, pages);
+   }
+
+   return offset;
+   }
 }
 
 static inline int __hv_remote_flush_tlb_with_range(struct kvm *kvm,
-- 
2.14.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V2 1/10] X86/Hyper-V: Add parameter offset for hyperv_fill_flush_guest_mapping_list()

2019-02-01 Thread lantianyu1986
From: Lan Tianyu 

Add parameter offset to specify start position to add flush ranges in
guest address list of struct hv_guest_mapping_flush_list.

Signed-off-by: Lan Tianyu 
---
arch/x86/hyperv/nested.c| 4 ++--
 arch/x86/include/asm/mshyperv.h | 2 +-
 arch/x86/kvm/vmx/vmx.c  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c
index dd0a843f766d..96f8bac7476d 100644
--- a/arch/x86/hyperv/nested.c
+++ b/arch/x86/hyperv/nested.c
@@ -58,11 +58,11 @@ EXPORT_SYMBOL_GPL(hyperv_flush_guest_mapping);
 
 int hyperv_fill_flush_guest_mapping_list(
struct hv_guest_mapping_flush_list *flush,
-   u64 start_gfn, u64 pages)
+   int offset, u64 start_gfn, u64 pages)
 {
u64 cur = start_gfn;
u64 additional_pages;
-   int gpa_n = 0;
+   int gpa_n = offset;
 
do {
/*
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index cc60e617931c..d6be685ab6b0 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -357,7 +357,7 @@ int hyperv_flush_guest_mapping_range(u64 as,
hyperv_fill_flush_list_func fill_func, void *data);
 int hyperv_fill_flush_guest_mapping_list(
struct hv_guest_mapping_flush_list *flush,
-   u64 start_gfn, u64 end_gfn);
+   int offset, u64 start_gfn, u64 end_gfn);
 
 #ifdef CONFIG_X86_64
 void hv_apic_init(void);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index f6915f10e584..9d954b4adce3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -428,7 +428,7 @@ int kvm_fill_hv_flush_list_func(struct 
hv_guest_mapping_flush_list *flush,
 {
struct kvm_tlb_range *range = data;
 
-   return hyperv_fill_flush_guest_mapping_list(flush, range->start_gfn,
+   return hyperv_fill_flush_guest_mapping_list(flush, 0, range->start_gfn,
range->pages);
 }
 
-- 
2.14.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH V2 00/10] X86/KVM/Hyper-V: Add HV ept tlb range list flush support in KVM

2019-02-01 Thread lantianyu1986
From: Lan Tianyu 

This patchset is to introduce hv ept tlb range list flush function
support in the KVM MMU component. Flushing ept tlbs of several address
range can be done via single hypercall and new list flush function is
used in the kvm_mmu_commit_zap_page() and FNAME(sync_page). This patchset
also adds more hv ept tlb range flush support in more KVM MMU function.

Change since v1:
   1) Make flush list as a hlist instead of list in order to 
   keep struct kvm_mmu_page size.
   2) Add last_level flag in the struct kvm_mmu_page instead
   of spte pointer
   3) Move tlb flush from kvm_mmu_notifier_clear_flush_young() to 
kvm_age_hva()
   4) Use range flush in the kvm_vm_ioctl_get/clear_dirty_log()

Lan Tianyu (10):
  X86/Hyper-V: Add parameter offset for
hyperv_fill_flush_guest_mapping_list()
  KVM/VMX: Fill range list in kvm_fill_hv_flush_list_func()
  KVM/MMU: Add last_level in the struct mmu_spte_page
  KVM/MMU: Introduce tlb flush with range list
  KVM/MMU: Flush tlb with range list in sync_page()
  KVM/MMU: Flush tlb directly in the kvm_mmu_slot_gfn_write_protect()
  KVM: Add kvm_get_memslot() to get memslot via slot id
  KVM: Use tlb range flush in the kvm_vm_ioctl_get/clear_dirty_log()
  KVM: Add flush parameter for kvm_age_hva()
  KVM/MMU: Use tlb range flush  in the kvm_age_hva()

 arch/arm/include/asm/kvm_host.h |  3 ++-
 arch/arm64/include/asm/kvm_host.h   |  3 ++-
 arch/mips/include/asm/kvm_host.h|  3 ++-
 arch/mips/kvm/mmu.c | 11 ++--
 arch/powerpc/include/asm/kvm_host.h |  3 ++-
 arch/powerpc/kvm/book3s.c   | 10 ++--
 arch/powerpc/kvm/e500_mmu_host.c|  3 ++-
 arch/x86/hyperv/nested.c|  4 +--
 arch/x86/include/asm/kvm_host.h | 11 +++-
 arch/x86/include/asm/mshyperv.h |  2 +-
 arch/x86/kvm/mmu.c  | 51 +
 arch/x86/kvm/mmu.h  |  7 +
 arch/x86/kvm/paging_tmpl.h  | 15 ---
 arch/x86/kvm/vmx/vmx.c  | 18 +++--
 arch/x86/kvm/x86.c  | 16 +---
 include/linux/kvm_host.h|  1 +
 virt/kvm/arm/mmu.c  | 13 --
 virt/kvm/kvm_main.c | 51 +++--
 18 files changed, 160 insertions(+), 65 deletions(-)

-- 
2.14.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: Re: [PATCH v3 1/3] KVM: arm/arm64: vgic: Make vgic_irq->irq_lock a raw_spinlock

2019-02-01 Thread Julia Cartwright
On Fri, Feb 01, 2019 at 03:30:58PM +, Julien Grall wrote:
> Hi Julien,
> 
> On 07/01/2019 15:06, Julien Thierry wrote:
> > vgic_irq->irq_lock must always be taken with interrupts disabled as
> > it is used in interrupt context.
> 
> I am a bit confused with the reason here. The code mention that ap_list_lock
> could be taken from the timer interrupt handler interrupt. I assume it
> speaks about the handler kvm_arch_timer_handler. Looking at the
> configuration of the interrupt, the flag IRQF_NO_THREAD is not set, so the
> interrupt should be threaded when CONFIG_PREEMPT_FULL is set. If my
> understanding is correct, this means the interrupt thread would sleep if it
> takes the spinlock.
> 
> Did I miss anything? Do you have an exact path where the vGIC is actually
> called from an interrupt context?

The part you're missing is that percpu interrupts are not force
threaded:

static int irq_setup_forced_threading(struct irqaction *new)
{
if (!force_irqthreads)
return 0;
if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
return 0;

/* ...*/
}

   Julia
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 1/3] KVM: arm/arm64: vgic: Make vgic_irq->irq_lock a raw_spinlock

2019-02-01 Thread Julien Grall

Hi Julia,

On 01/02/2019 17:36, Julia Cartwright wrote:

On Fri, Feb 01, 2019 at 03:30:58PM +, Julien Grall wrote:

Hi Julien,

On 07/01/2019 15:06, Julien Thierry wrote:

vgic_irq->irq_lock must always be taken with interrupts disabled as
it is used in interrupt context.


I am a bit confused with the reason here. The code mention that ap_list_lock
could be taken from the timer interrupt handler interrupt. I assume it
speaks about the handler kvm_arch_timer_handler. Looking at the
configuration of the interrupt, the flag IRQF_NO_THREAD is not set, so the
interrupt should be threaded when CONFIG_PREEMPT_FULL is set. If my
understanding is correct, this means the interrupt thread would sleep if it
takes the spinlock.

Did I miss anything? Do you have an exact path where the vGIC is actually
called from an interrupt context?


The part you're missing is that percpu interrupts are not force
threaded:

static int irq_setup_forced_threading(struct irqaction *new)
{
if (!force_irqthreads)
return 0;
if (new->flags & (IRQF_NO_THREAD | IRQF_PERCPU | IRQF_ONESHOT))
return 0;

/* ...*/
}


Thank you for the pointer! I think it would be worth mentioning in the commit 
message that per-cpu interrupts are not threaded.


Best regards,

--
Julien Grall
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 1/3] KVM: arm/arm64: vgic: Make vgic_irq->irq_lock a raw_spinlock

2019-02-01 Thread Julien Grall

Hi Julien,

On 07/01/2019 15:06, Julien Thierry wrote:

vgic_irq->irq_lock must always be taken with interrupts disabled as
it is used in interrupt context.


I am a bit confused with the reason here. The code mention that ap_list_lock 
could be taken from the timer interrupt handler interrupt. I assume it speaks 
about the handler kvm_arch_timer_handler. Looking at the configuration of the 
interrupt, the flag IRQF_NO_THREAD is not set, so the interrupt should be 
threaded when CONFIG_PREEMPT_FULL is set. If my understanding is correct, this 
means the interrupt thread would sleep if it takes the spinlock.


Did I miss anything? Do you have an exact path where the vGIC is actually called 
from an interrupt context?


However, those functions can be called from section with hardirq disabled (see 
kvm_vgic_sync_hwstate). So I can see a reason to use raw_spin_lock here and the 
rest of the series.


Cheers,



For configurations such as PREEMPT_RT_FULL, this means that it should
be a raw_spinlock since RT spinlocks are interruptible.

Signed-off-by: Julien Thierry 
Acked-by: Christoffer Dall 
Cc: Christoffer Dall 
Cc: Marc Zyngier 
---
  include/kvm/arm_vgic.h   |  2 +-
  virt/kvm/arm/vgic/vgic-debug.c   |  4 +--
  virt/kvm/arm/vgic/vgic-init.c|  4 +--
  virt/kvm/arm/vgic/vgic-its.c | 14 
  virt/kvm/arm/vgic/vgic-mmio-v2.c | 14 
  virt/kvm/arm/vgic/vgic-mmio-v3.c | 12 +++
  virt/kvm/arm/vgic/vgic-mmio.c| 34 +--
  virt/kvm/arm/vgic/vgic-v2.c  |  4 +--
  virt/kvm/arm/vgic/vgic-v3.c  |  8 ++---
  virt/kvm/arm/vgic/vgic.c | 71 
  10 files changed, 83 insertions(+), 84 deletions(-)

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 4f31f96..b542605 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -100,7 +100,7 @@ enum vgic_irq_config {
  };
  
  struct vgic_irq {

-   spinlock_t irq_lock;/* Protects the content of the struct */
+   raw_spinlock_t irq_lock;/* Protects the content of the struct */
struct list_head lpi_list;  /* Used to link all LPIs together */
struct list_head ap_list;
  
diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c

index 07aa900..1f62f2b 100644
--- a/virt/kvm/arm/vgic/vgic-debug.c
+++ b/virt/kvm/arm/vgic/vgic-debug.c
@@ -251,9 +251,9 @@ static int vgic_debug_show(struct seq_file *s, void *v)
return 0;
}
  
-	spin_lock_irqsave(>irq_lock, flags);

+   raw_spin_lock_irqsave(>irq_lock, flags);
print_irq_state(s, irq, vcpu);
-   spin_unlock_irqrestore(>irq_lock, flags);
+   raw_spin_unlock_irqrestore(>irq_lock, flags);
  
  	vgic_put_irq(kvm, irq);

return 0;
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index c0c0b88..1128e97 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -171,7 +171,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int 
nr_spis)
  
  		irq->intid = i + VGIC_NR_PRIVATE_IRQS;

INIT_LIST_HEAD(>ap_list);
-   spin_lock_init(>irq_lock);
+   raw_spin_lock_init(>irq_lock);
irq->vcpu = NULL;
irq->target_vcpu = vcpu0;
kref_init(>refcount);
@@ -216,7 +216,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
struct vgic_irq *irq = _cpu->private_irqs[i];
  
  		INIT_LIST_HEAD(>ap_list);

-   spin_lock_init(>irq_lock);
+   raw_spin_lock_init(>irq_lock);
irq->intid = i;
irq->vcpu = NULL;
irq->target_vcpu = vcpu;
diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c
index eb2a390..911ba61 100644
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@@ -65,7 +65,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 
intid,
  
  	INIT_LIST_HEAD(>lpi_list);

INIT_LIST_HEAD(>ap_list);
-   spin_lock_init(>irq_lock);
+   raw_spin_lock_init(>irq_lock);
  
  	irq->config = VGIC_CONFIG_EDGE;

kref_init(>refcount);
@@ -287,7 +287,7 @@ static int update_lpi_config(struct kvm *kvm, struct 
vgic_irq *irq,
if (ret)
return ret;
  
-	spin_lock_irqsave(>irq_lock, flags);

+   raw_spin_lock_irqsave(>irq_lock, flags);
  
  	if (!filter_vcpu || filter_vcpu == irq->target_vcpu) {

irq->priority = LPI_PROP_PRIORITY(prop);
@@ -299,7 +299,7 @@ static int update_lpi_config(struct kvm *kvm, struct 
vgic_irq *irq,
}
}
  
-	spin_unlock_irqrestore(>irq_lock, flags);

+   raw_spin_unlock_irqrestore(>irq_lock, flags);
  
  	if (irq->hw)

return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
@@ -352,9 +352,9 @@ static int update_affinity(struct vgic_irq *irq, struct 
kvm_vcpu *vcpu)
int ret = 0;
unsigned long flags;
  
-	

[PATCH V2 00/10] X86/KVM/Hyper-V: Add HV ept tlb range list flush support in KVM

2019-02-01 Thread lantianyu1986
From: Lan Tianyu 

This patchset is to introduce hv ept tlb range list flush function
support in the KVM MMU component. Flushing ept tlbs of several address
range can be done via single hypercall and new list flush function is
used in the kvm_mmu_commit_zap_page() and FNAME(sync_page). This patchset
also adds more hv ept tlb range flush support in more KVM MMU function.

Change since v1:
   1) Make flush list as a hlist instead of list in order to 
   keep struct kvm_mmu_page size.
   2) Add last_level flag in the struct kvm_mmu_page instead
   of spte pointer
   3) Move tlb flush from kvm_mmu_notifier_clear_flush_young() to 
kvm_age_hva()
   4) Use range flush in the kvm_vm_ioctl_get/clear_dirty_log()

Lan Tianyu (10):
  X86/Hyper-V: Add parameter offset for
hyperv_fill_flush_guest_mapping_list()
  KVM/VMX: Fill range list in kvm_fill_hv_flush_list_func()
  KVM/MMU: Add last_level in the struct mmu_spte_page
  KVM/MMU: Introduce tlb flush with range list
  KVM/MMU: Flush tlb with range list in sync_page()
  KVM/MMU: Flush tlb directly in the kvm_mmu_slot_gfn_write_protect()
  KVM: Add kvm_get_memslot() to get memslot via slot id
  KVM: Use tlb range flush in the kvm_vm_ioctl_get/clear_dirty_log()
  KVM: Add flush parameter for kvm_age_hva()
  KVM/MMU: Use tlb range flush  in the kvm_age_hva()

 arch/arm/include/asm/kvm_host.h |  3 ++-
 arch/arm64/include/asm/kvm_host.h   |  3 ++-
 arch/mips/include/asm/kvm_host.h|  3 ++-
 arch/mips/kvm/mmu.c | 11 ++--
 arch/powerpc/include/asm/kvm_host.h |  3 ++-
 arch/powerpc/kvm/book3s.c   | 10 ++--
 arch/powerpc/kvm/e500_mmu_host.c|  3 ++-
 arch/x86/hyperv/nested.c|  4 +--
 arch/x86/include/asm/kvm_host.h | 11 +++-
 arch/x86/include/asm/mshyperv.h |  2 +-
 arch/x86/kvm/mmu.c  | 51 +
 arch/x86/kvm/mmu.h  |  7 +
 arch/x86/kvm/paging_tmpl.h  | 15 ---
 arch/x86/kvm/vmx/vmx.c  | 18 +++--
 arch/x86/kvm/x86.c  | 16 +---
 include/linux/kvm_host.h|  1 +
 virt/kvm/arm/mmu.c  | 13 --
 virt/kvm/kvm_main.c | 51 +++--
 18 files changed, 160 insertions(+), 65 deletions(-)

-- 
2.14.4

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v8 18/26] ACPI / APEI: Make GHES estatus header validation more user friendly

2019-02-01 Thread Borislav Petkov
On Tue, Jan 29, 2019 at 06:48:54PM +, James Morse wrote:
> ghes_read_estatus() checks various lengths in the top-level header to
> ensure the CPER records to be read aren't obviously corrupt.
> 
> Take the opportunity to make this more user-friendly, printing a
> (ratelimited) message about the nature of the header format error.
> 
> Suggested-by: Borislav Petkov 
> Signed-off-by: James Morse 
> ---
>  drivers/acpi/apei/ghes.c | 46 
>  1 file changed, 32 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c
> index f95db2398dd5..9391fff71344 100644
> --- a/drivers/acpi/apei/ghes.c
> +++ b/drivers/acpi/apei/ghes.c
> @@ -293,6 +293,30 @@ static void ghes_copy_tofrom_phys(void *buffer, u64 
> paddr, u32 len,
>   }
>  }
>  
> +/* Check the top-level record header has an appropriate size. */
> +int __ghes_check_estatus(struct ghes *ghes,

static.

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v8 09/26] ACPI / APEI: Generalise the estatus queue's notify code

2019-02-01 Thread Borislav Petkov
On Tue, Jan 29, 2019 at 06:48:45PM +, James Morse wrote:
> +static int ghes_in_nmi_spool_from_list(struct list_head *rcu_list)
> +{
> + int err, ret = -ENOENT;
> + struct ghes *ghes;
> +
> + rcu_read_lock();
> + list_for_each_entry_rcu(ghes, rcu_list, list) {
> + err = ghes_in_nmi_queue_one_entry(ghes);
> + if (!err)
> + ret = 0;

Do I understand this correctly that we want to do "ret = 0" for at least
one record which ghes_in_nmi_queue_one_entry() has succeeded queueing?

For those for which it has returned -ENOENT, estatus has been cleared,
nothing has been queued so we don't have to do anything for that
particular entry...

Btw, you don't really need the err variable:

if (!ghes_in_nmi_queue_one_entry(ghes))
ret = 0;

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 1/4] KVM: arm64: Forbid kprobing of the VHE world-switch code

2019-02-01 Thread Marc Zyngier
On 24/01/2019 16:32, James Morse wrote:
> On systems with VHE the kernel and KVM's world-switch code run at the
> same exception level. Code that is only used on a VHE system does not
> need to be annotated as __hyp_text as it can reside anywhere in the
> kernel text.
> 
> __hyp_text was also used to prevent kprobes from patching breakpoint
> instructions into this region, as this code runs at a different
> exception level. While this is no longer true with VHE, KVM still
> switches VBAR_EL1, meaning a kprobe's breakpoint executed in the
> world-switch code will cause a hyp-panic.

[...]

For what it is worth, I've now queued this patch as a fix for 5.0.

Thanks,

M.
-- 
Jazz is not dead. It just smells funny...
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v8 04/26] ACPI / APEI: Make hest.c manage the estatus memory pool

2019-02-01 Thread Borislav Petkov
On Tue, Jan 29, 2019 at 06:48:40PM +, James Morse wrote:
> ghes.c has a memory pool it uses for the estatus cache and the estatus
> queue. The cache is initialised when registering the platform driver.
> For the queue, an NMI-like notification has to grow/shrink the pool
> as it is registered and unregistered.
> 
> This is all pretty noisy when adding new NMI-like notifications, it
> would be better to replace this with a static pool size based on the
> number of users.
> 
> As a precursor, move the call that creates the pool from ghes_init(),
> into hest.c. Later this will take the number of ghes entries and
> consolidate the queue allocations.
> Remove ghes_estatus_pool_exit() as hest.c doesn't have anywhere to put
> this.
> 
> The pool is now initialised as part of ACPI's subsys_initcall():
> (acpi_init(), acpi_scan_init(), acpi_pci_root_init(), acpi_hest_init())
> Before this patch it happened later as a GHES specific device_initcall().
> 
> Signed-off-by: James Morse 
> ---
> Changes since v7:
> * Moved the pool init later, the driver isn't probed until device_init.
> ---
>  drivers/acpi/apei/ghes.c | 33 ++---
>  drivers/acpi/apei/hest.c | 10 +-
>  include/acpi/ghes.h  |  2 ++
>  3 files changed, 17 insertions(+), 28 deletions(-)

Reviewed-by: Borislav Petkov 

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v2 2/4] Makefile: support -s switch

2019-02-01 Thread Andre Przywara
"make -s" suppresses normal output, just shows warnings and errors.
But since we explicitly override the make output with our fancy concise
version, we miss out on this feature.

Do as the kernel does and explicitly suppress every normal output when -s
is given. This helps to spot warnings that scroll out of the terminal
window too quickly.

Signed-off-by: Andre Przywara 
---
 Makefile | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index c4faff66..ec75cd99 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,11 @@
 #
 
 ifeq ($(strip $(V)),)
-   E = @echo
+   ifeq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),)
+   E = @echo
+   else
+   E = @\#
+   endif
Q = @
 else
E = @\#
-- 
2.17.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v2 1/4] arm: fdt: add stdout-path to /chosen node

2019-02-01 Thread Andre Przywara
The DT spec describes the stdout-path property in the /chosen node to
contain the DT path for a default device usable for outputting characters.
The Linux kernel uses this for earlycon (without further parameters),
other DT users might rely on this as well.

Add a stdout-path property pointing to the "serial0" alias, then add an
aliases node at the end of the FDT, containing the actual path. This
allows the FDT generation code in hw/serial.c to set this string.

Even when we use the virtio console, the serial console is still there
and works, so we can expose this unconditionally. Putting the virtio
console path in there will not work anyway.

Signed-off-by: Andre Przywara 
---
 arm/fdt.c | 10 ++
 hw/serial.c   |  9 +
 include/kvm/fdt.h |  2 ++
 3 files changed, 21 insertions(+)

diff --git a/arm/fdt.c b/arm/fdt.c
index 7c50464a..624dbace 100644
--- a/arm/fdt.c
+++ b/arm/fdt.c
@@ -145,6 +145,7 @@ static int setup_fdt(struct kvm *kvm)
 kvm->cfg.real_cmdline));
 
_FDT(fdt_property_u64(fdt, "kaslr-seed", kvm->cfg.arch.kaslr_seed));
+   _FDT(fdt_property_string(fdt, "stdout-path", "serial0"));
 
/* Initrd */
if (kvm->arch.initrd_size != 0) {
@@ -210,6 +211,15 @@ static int setup_fdt(struct kvm *kvm)
_FDT(fdt_property_cell(fdt, "migrate", fns->migrate));
_FDT(fdt_end_node(fdt));
 
+   if (fdt_stdout_path) {
+   _FDT(fdt_begin_node(fdt, "aliases"));
+   _FDT(fdt_property_string(fdt, "serial0", fdt_stdout_path));
+   _FDT(fdt_end_node(fdt));
+
+   free(fdt_stdout_path);
+   fdt_stdout_path = NULL;
+   }
+
/* Finalise. */
_FDT(fdt_end_node(fdt));
_FDT(fdt_finish(fdt));
diff --git a/hw/serial.c b/hw/serial.c
index 2f19ba80..13c4663e 100644
--- a/hw/serial.c
+++ b/hw/serial.c
@@ -366,6 +366,9 @@ static bool serial8250_in(struct ioport *ioport, struct 
kvm_cpu *vcpu, u16 port,
 }
 
 #ifdef CONFIG_HAS_LIBFDT
+
+char *fdt_stdout_path = NULL;
+
 #define DEVICE_NAME_MAX_LEN 32
 static
 void serial8250_generate_fdt_node(struct ioport *ioport, void *fdt,
@@ -383,6 +386,12 @@ void serial8250_generate_fdt_node(struct ioport *ioport, 
void *fdt,
 
snprintf(dev_name, DEVICE_NAME_MAX_LEN, "U6_16550A@%llx", addr);
 
+   if (!fdt_stdout_path) {
+   fdt_stdout_path = malloc(strlen(dev_name) + 2);
+   /* Assumes that this node is a child of the root node. */
+   sprintf(fdt_stdout_path, "/%s", dev_name);
+   }
+
_FDT(fdt_begin_node(fdt, dev_name));
_FDT(fdt_property_string(fdt, "compatible", "ns16550a"));
_FDT(fdt_property(fdt, "reg", reg_prop, sizeof(reg_prop)));
diff --git a/include/kvm/fdt.h b/include/kvm/fdt.h
index beadc7f3..4e615725 100644
--- a/include/kvm/fdt.h
+++ b/include/kvm/fdt.h
@@ -25,6 +25,8 @@ enum irq_type {
IRQ_TYPE_LEVEL_MASK = (IRQ_TYPE_LEVEL_LOW | IRQ_TYPE_LEVEL_HIGH),
 };
 
+extern char *fdt_stdout_path;
+
 /* Helper for the various bits of code that generate FDT nodes */
 #define _FDT(exp)  \
do {\
-- 
2.17.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v2 4/4] arm: Auto-detect guest GIC type

2019-02-01 Thread Andre Przywara
At the moment kvmtool always tries to instantiate a virtual GICv2 for
the guest, and fails with some scary error message if that doesn't work.
The user has then to manually specify "--irqchip=gicv3", which is not
really obvious.
With the advent of more GICv3-only machines, let's try to be more
clever and implement some auto-detection of the GIC type needed:
We try gicv3-its, gicv3, gicv2m and gicv2, in that order. That first one
succeeding wins.
For GICv2 machines the first two will always fail. For GICv2-backwards
compatible GICv3 machines GICv3 is probably the better choice these days.

This algorithm is in effect is there is no explicit --irqchip parameter
on the command line. We still allow the GIC type to be set explicitly.

Signed-off-by: Andre Przywara 
---
 arm/gic.c| 25 +
 arm/include/arm-common/gic.h |  1 +
 2 files changed, 26 insertions(+)

diff --git a/arm/gic.c b/arm/gic.c
index abcbcc09..a86da20e 100644
--- a/arm/gic.c
+++ b/arm/gic.c
@@ -182,6 +182,8 @@ static int gic__create_device(struct kvm *kvm, enum 
irqchip_type type)
gic_device.type = KVM_DEV_TYPE_ARM_VGIC_V3;
dist_attr.attr  = KVM_VGIC_V3_ADDR_TYPE_DIST;
break;
+   case IRQCHIP_AUTO:
+   return -ENODEV;
}
 
err = ioctl(kvm->vm_fd, KVM_CREATE_DEVICE, _device);
@@ -199,6 +201,8 @@ static int gic__create_device(struct kvm *kvm, enum 
irqchip_type type)
case IRQCHIP_GICV3:
err = ioctl(gic_fd, KVM_SET_DEVICE_ATTR, _attr);
break;
+   case IRQCHIP_AUTO:
+   return -ENODEV;
}
if (err)
goto out_err;
@@ -249,9 +253,30 @@ static int gic__create_irqchip(struct kvm *kvm)
 
 int gic__create(struct kvm *kvm, enum irqchip_type type)
 {
+   enum irqchip_type try;
int err;
 
switch (type) {
+   case IRQCHIP_AUTO:
+   try = IRQCHIP_GICV3_ITS;
+   err = gic__create(kvm, try);
+   if (err) {
+   try = IRQCHIP_GICV3;
+   err = gic__create(kvm, try);
+   }
+   if (err) {
+   try = IRQCHIP_GICV2M;
+   err = gic__create(kvm, try);
+   }
+   if (err) {
+   try = IRQCHIP_GICV2;
+   err = gic__create(kvm, try);
+   }
+   if (err)
+   return err;
+
+   kvm->cfg.arch.irqchip = try;
+   return 0;
case IRQCHIP_GICV2M:
gic_msi_size = KVM_VGIC_V2M_SIZE;
gic_msi_base = ARM_GIC_CPUI_BASE - gic_msi_size;
diff --git a/arm/include/arm-common/gic.h b/arm/include/arm-common/gic.h
index 1125d601..ec9cf31a 100644
--- a/arm/include/arm-common/gic.h
+++ b/arm/include/arm-common/gic.h
@@ -24,6 +24,7 @@
 #define KVM_VGIC_V2M_SIZE  0x1000
 
 enum irqchip_type {
+   IRQCHIP_AUTO,
IRQCHIP_GICV2,
IRQCHIP_GICV2M,
IRQCHIP_GICV3,
-- 
2.17.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v2 3/4] Makefile: Remove echoing of kvmtools version file

2019-02-01 Thread Andre Przywara
On every build we report the kvmtool "version" number, which isn't
meaningful at all anymore.

Remove the line from the KVMTOOLS-VERSION-GEN script to drop a
pointless message.

Signed-off-by: Andre Przywara 
---
 util/KVMTOOLS-VERSION-GEN | 1 -
 1 file changed, 1 deletion(-)

diff --git a/util/KVMTOOLS-VERSION-GEN b/util/KVMTOOLS-VERSION-GEN
index f0dcfdea..91ee2c2f 100755
--- a/util/KVMTOOLS-VERSION-GEN
+++ b/util/KVMTOOLS-VERSION-GEN
@@ -35,6 +35,5 @@ else
VC=unset
 fi
 test "$VN" = "$VC" || {
-   echo >&2 "KVMTOOLS_VERSION = $VN"
echo "KVMTOOLS_VERSION = $VN" >$GVF
 }
-- 
2.17.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v2 0/4] Various convenience fixes

2019-02-01 Thread Andre Przywara
As I found myself using kvmtool more often in the last weeks, I got more
and more annoyed by some smaller "ticks" that kvmtool shows.

So this is an attempt to post various smaller fixes I gathered up over
the years, but never found worth enough to send out.
Version 2 drops the two patches that have been merged already, and
amends the remaining ones.

The patches address:
- add /chosen/stdout-path node in .dtb
- honour make -s switch
- remove pointless kvmtool version number
- introduce autodetection of supported GIC type

Changelog v1..v2:
- stdout-path: Let hw/serial.c set the node path and use an alias
- make -s: adapt to newest version from the Linux kernel
- kvmtool "version" number: unchanged, but verified to be compatible
  with the Debian package
- GIC autodetection: ignore --force-pci, always try to have an MSI
  controller

Please have a look!

Cheers,
Andre.

Andre Przywara (4):
  arm: fdt: add stdout-path to /chosen node
  Makefile: support -s switch
  Makefile: Remove echoing of kvmtools version file
  arm: Auto-detect guest GIC type

 Makefile |  6 +-
 arm/fdt.c| 10 ++
 arm/gic.c| 25 +
 arm/include/arm-common/gic.h |  1 +
 hw/serial.c  |  9 +
 include/kvm/fdt.h|  2 ++
 util/KVMTOOLS-VERSION-GEN|  1 -
 7 files changed, 52 insertions(+), 2 deletions(-)

-- 
2.17.1

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 3/4] arm64: hyp-stub: Forbid kprobing of the hyp-stub

2019-02-01 Thread James Morse
Hi Christoffer,

On 31/01/2019 08:04, Christoffer Dall wrote:
> On Thu, Jan 24, 2019 at 04:32:56PM +, James Morse wrote:
>> The hyp-stub is loaded by the kernel's early startup code at EL2
>> during boot, before KVM takes ownership later. The hyp-stub's
>> text is part of the regular kernel text, meaning it can be kprobed.
>>
>> A breakpoint in the hyp-stub causes the CPU to spin in el2_sync_invalid.
>>
>> Add it to the __hyp_text.

>> This has been a problem since kprobes was merged, it should
>> probably have been covered in 888b3c8720e0.
>>
>> I'm not sure __hyp_text is the right place. Its not idmaped,
>> and as it contains a set of vectors, adding it to the host/hyp
>> idmap sections could grow them beyond a page... but it does
>> run with the MMU off, so does need to be cleaned to PoC when
>> anything wacky, like hibernate happens. With this patch,
>> hibernate should clean the __hyp_text to PoC too.
> 
> How did this code get cleaned before?

It didn't need to be cleaned as KVM executes it with the MMU on.
KVM's MMU-off code lives in the hyp_idmap, which is cleaned. (as is the kernel's
idmap).

The hibernate-cache-cleaning was trying to do the absolute minimum, but the
hyp-stub got forgotten.


> Is there a problem you can identify with putting it in __hyp_text?

> Seems to me we should just stick it there if it has no negative
> side-effects and otherwise we have to make up a separate section with a
> specialized meaning.

Yup, there is no problem with the extra cache-maintenance.
The hyp-stub is the odd one out, its runtime code that runs with the MMU off,
but isn't idmaped. I wasn't sure if we wanted to create some special
section.(having to name it is a good enough reason not to!)


Thanks,

James
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v2 3/5] lib: arm: Use ns16550a UART when --vmm=kvmtool

2019-02-01 Thread Alexandru Elisei
When kvm-unit-tests is configured with --vmm=kvmtool, use the address for
the ns16550a UART that kvmtool emulates. When the virtual machine manager
is QEMU, use the address for the pl011 UART, as before.

Signed-off-by: Alexandru Elisei 
---
 configure| 10 +++---
 lib/arm/io.c | 27 ++-
 2 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/configure b/configure
index 0786e1604dba..f81352243dd8 100755
--- a/configure
+++ b/configure
@@ -114,7 +114,11 @@ if [ "$arch" = "i386" ] || [ "$arch" = "x86_64" ]; then
 testdir=x86
 elif [ "$arch" = "arm" ] || [ "$arch" = "arm64" ]; then
 testdir=arm
-if [ "$vmm" != "qemu" ] && [ "$vmm" != "kvmtool" ]; then
+if [ "$vmm" = "qemu" ]; then
+arm_uart_early_addr=0x0900
+elif [ "$vmm" = "kvmtool" ]; then
+arm_uart_early_addr=0x3f8
+else
 echo '--vmm must be one of "qemu" or "kvmtool"!'
 usage
 fi
@@ -216,11 +220,11 @@ cat < lib/config.h
  * Generated file. DO NOT MODIFY.
  *
  */
+
 EOF
 if [ "$arch" = "arm" ] || [ "$arch" = "arm64" ]; then
 cat <> lib/config.h
-
-#define UART_EARLY_BASE (unsigned long)0x0900
+#define UART_EARLY_BASE (unsigned long)${arm_uart_early_addr}
 
 EOF
 fi
diff --git a/lib/arm/io.c b/lib/arm/io.c
index 0973885d19f5..0d5ab9510ec8 100644
--- a/lib/arm/io.c
+++ b/lib/arm/io.c
@@ -21,32 +21,41 @@ extern void halt(int code);
 
 static struct spinlock uart_lock;
 /*
- * Use this guess for the pl011 base in order to make an attempt at
+ * Use this guess for the uart base in order to make an attempt at
  * having earlier printf support. We'll overwrite it with the real
  * base address that we read from the device tree later. This is
- * the address we expect QEMU's mach-virt machine type to put in
+ * the address we expect the virtual machine manager to put in
  * its generated device tree.
  */
 static volatile u8 *uart0_base = (u8 *)UART_EARLY_BASE;
 
 static void uart0_init(void)
 {
-   const char *compatible = "arm,pl011";
+   /*
+* kvm-unit-tests uses the uart only for output. Both uart models have
+* the TX register at offset 0 from the base address, so there is no
+* need to treat them separately.
+*/
+   const char *compatible[] = {"arm,pl011", "ns16550a"};
struct dt_pbus_reg base;
-   int ret;
+   int i, ret;
 
ret = dt_get_default_console_node();
assert(ret >= 0 || ret == -FDT_ERR_NOTFOUND);
 
if (ret == -FDT_ERR_NOTFOUND) {
 
-   ret = dt_pbus_get_base_compatible(compatible, );
-   assert(ret == 0 || ret == -FDT_ERR_NOTFOUND);
+   for (i = 0; i < ARRAY_SIZE(compatible); i++) {
+   ret = dt_pbus_get_base_compatible(compatible[i], );
+   assert(ret == 0 || ret == -FDT_ERR_NOTFOUND);
+
+   if (ret == 0)
+   break;
+   }
 
if (ret) {
-   printf("%s: %s not found in the device tree, "
-   "aborting...\n",
-   __func__, compatible);
+   printf("%s: Compatible uart not found in the device 
tree, "
+   "aborting...\n", __func__);
abort();
}
 
-- 
2.17.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v2 5/5] lib: arm: Fallback to psci_system_off() in exit()

2019-02-01 Thread Alexandru Elisei
On arm and arm64, kvm-unit-tests uses the QEMU chr-testdev device to shut
down the virtual machine at the end of a test. The function
psci_system_off() provides another mechanism for terminating the virtual
machine.

chr-testdev is implemented on top of virtio console. If the virtual machine
manager doesn't emulate a virtio console, then chr_testdev_exit() will fail.
If this happens, try to use psci_system_off() to terminate the test.

This patch makes it possible for a virtual machine manager which doesn't
have support for chr-testdev, but has been configured not to emulate a
virtio console, to gracefully terminate a virtual machine after a test has
been completed.

There is one limitation to using psci_system_off() to terminate a test:
chr-testdev allows kvm-unit-tests to specify an exit code;
psci_system_off() has no such mechanism.

Signed-off-by: Alexandru Elisei 
---
 lib/arm/io.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/arm/io.c b/lib/arm/io.c
index 0d5ab9510ec8..bbb93e63fa3d 100644
--- a/lib/arm/io.c
+++ b/lib/arm/io.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -90,6 +91,7 @@ void puts(const char *s)
 void exit(int code)
 {
chr_testdev_exit(code);
+   psci_system_off();
halt(code);
__builtin_unreachable();
 }
-- 
2.17.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v2 4/5] lib: arm: Implement PSCI SYSTEM_OFF in psci_system_off()

2019-02-01 Thread Alexandru Elisei
A new function, psci_system_off(), is added which implements the PSCI
SYSTEM_OFF function. A call causes the hypervisor to terminate the virtual
machine.

We take this opportunity to rename psci_sys_reset() to psci_system_reset()
to match the name of the PSCI function SYSTEM_RESET that it implements.

Consumers for the function will be added in a later patch.

Signed-off-by: Alexandru Elisei 
---
 lib/arm/asm/psci.h | 3 ++-
 lib/arm/psci.c | 8 +++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/lib/arm/asm/psci.h b/lib/arm/asm/psci.h
index ed51708fd265..7b956bf5987d 100644
--- a/lib/arm/asm/psci.h
+++ b/lib/arm/asm/psci.h
@@ -6,8 +6,9 @@
 extern int psci_invoke(unsigned long function_id, unsigned long arg0,
   unsigned long arg1, unsigned long arg2);
 extern int psci_cpu_on(unsigned long cpuid, unsigned long entry_point);
-extern void psci_sys_reset(void);
+extern void psci_system_reset(void);
 extern int cpu_psci_cpu_boot(unsigned int cpu);
 extern void cpu_psci_cpu_die(void);
+extern void psci_system_off(void);
 
 #endif /* _ASMARM_PSCI_H_ */
diff --git a/lib/arm/psci.c b/lib/arm/psci.c
index 119f74e57e91..c3d399064ae3 100644
--- a/lib/arm/psci.c
+++ b/lib/arm/psci.c
@@ -48,7 +48,13 @@ void cpu_psci_cpu_die(void)
printf("CPU%d unable to power off (error = %d)\n", smp_processor_id(), 
err);
 }
 
-void psci_sys_reset(void)
+void psci_system_reset(void)
 {
psci_invoke(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0);
 }
+
+void psci_system_off(void)
+{
+   int err = psci_invoke(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0);
+   printf("CPU%d unable to do system off (error = %d)\n", 
smp_processor_id(), err);
+}
-- 
2.17.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v2 1/5] lib: arm: Use UART address from generated config.h

2019-02-01 Thread Alexandru Elisei
Generate lib/config.h when configuring kvm-unit-tests. The file is empty
for all architectures except for arm and arm64, where it is used to store
the UART base address. This removes the hardcoded address from lib/arm/io.c
and provides a mechanism for using different UART addresses in the future.

Signed-off-by: Alexandru Elisei 
---
 configure| 17 +
 Makefile |  2 +-
 lib/arm/io.c |  5 ++---
 .gitignore   |  1 +
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/configure b/configure
index df8581e3a906..44708b026422 100755
--- a/configure
+++ b/configure
@@ -198,3 +198,20 @@ ENVIRON_DEFAULT=$environ_default
 ERRATATXT=errata.txt
 U32_LONG_FMT=$u32_long
 EOF
+
+cat < lib/config.h
+#ifndef CONFIG_H
+#define CONFIG_H 1
+/*
+ * Generated file. DO NOT MODIFY.
+ *
+ */
+EOF
+if [ "$arch" = "arm" ] || [ "$arch" = "arm64" ]; then
+cat <> lib/config.h
+
+#define UART_EARLY_BASE (unsigned long)0x0900
+
+EOF
+fi
+echo "#endif" >> lib/config.h
diff --git a/Makefile b/Makefile
index e9f02272e156..643af05678ad 100644
--- a/Makefile
+++ b/Makefile
@@ -115,7 +115,7 @@ libfdt_clean:
$(LIBFDT_objdir)/.*.d
 
 distclean: clean libfdt_clean
-   $(RM) lib/asm config.mak $(TEST_DIR)-run msr.out cscope.* build-head
+   $(RM) lib/asm lib/config.h config.mak $(TEST_DIR)-run msr.out cscope.* 
build-head
$(RM) -r tests logs logs.old
 
 cscope: cscope_dirs = lib lib/libfdt lib/linux $(TEST_DIR) $(ARCH_LIBDIRS) 
lib/asm-generic
diff --git a/lib/arm/io.c b/lib/arm/io.c
index d2c1a07c19ee..0973885d19f5 100644
--- a/lib/arm/io.c
+++ b/lib/arm/io.c
@@ -11,6 +11,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -18,6 +19,7 @@
 
 extern void halt(int code);
 
+static struct spinlock uart_lock;
 /*
  * Use this guess for the pl011 base in order to make an attempt at
  * having earlier printf support. We'll overwrite it with the real
@@ -25,9 +27,6 @@ extern void halt(int code);
  * the address we expect QEMU's mach-virt machine type to put in
  * its generated device tree.
  */
-#define UART_EARLY_BASE 0x0900UL
-
-static struct spinlock uart_lock;
 static volatile u8 *uart0_base = (u8 *)UART_EARLY_BASE;
 
 static void uart0_init(void)
diff --git a/.gitignore b/.gitignore
index 2405a8087ae5..483f7c7a09ea 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ patches
 cscope.*
 *.swp
 /lib/asm
+/lib/config.h
 /config.mak
 /*-run
 /msr.out
-- 
2.17.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v2 2/5] configure: arm/arm64: Add --vmm option with no effect

2019-02-01 Thread Alexandru Elisei
Add configuration option --vmm to specify the virtual machine manager.
Valid choices are 'qemu' and 'kvmtool', the default being 'qemu'. This
option is only available for the arm and arm64 architectures and does
nothing for now.

Signed-off-by: Alexandru Elisei 
---
 configure | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/configure b/configure
index 44708b026422..0786e1604dba 100755
--- a/configure
+++ b/configure
@@ -16,6 +16,7 @@ endian=""
 pretty_print_stacks=yes
 environ_default=yes
 u32_long=
+vmm="qemu"
 
 usage() {
 cat <<-EOF
@@ -24,6 +25,8 @@ usage() {
Options include:
--arch=ARCHarchitecture to compile for ($arch)
--processor=PROCESSOR  processor to compile for ($arch)
+   --vmm=VMM  virtual machine monitor to compile for (qemu
+  or kvmtool, default is qemu) (arm/arm64 only)
--cross-prefix=PREFIX  cross compiler prefix
--cc=CCc compiler to use ($cc)
--cxx=CXX  c++ compiler to use ($cxx)
@@ -56,6 +59,9 @@ while [[ "$1" = -* ]]; do
 --processor)
processor="$arg"
;;
+   --vmm)
+   vmm="$arg"
+   ;;
--cross-prefix)
cross_prefix="$arg"
;;
@@ -108,6 +114,10 @@ if [ "$arch" = "i386" ] || [ "$arch" = "x86_64" ]; then
 testdir=x86
 elif [ "$arch" = "arm" ] || [ "$arch" = "arm64" ]; then
 testdir=arm
+if [ "$vmm" != "qemu" ] && [ "$vmm" != "kvmtool" ]; then
+echo '--vmm must be one of "qemu" or "kvmtool"!'
+usage
+fi
 elif [ "$arch" = "ppc64" ]; then
 testdir=powerpc
 firmware="$testdir/boot_rom.bin"
-- 
2.17.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[kvm-unit-tests PATCH v2 0/5] arm/arm64: Add support for running under kvmtool

2019-02-01 Thread Alexandru Elisei
kvm-unit-tests is designed to be run with QEMU as the virtual machine
monitor. It relies on devices emulated by QEMU (like isa-debug-exit or
testdev) and it makes certain assumptions based on the implicit QEMU
virtual environment configuration (like the serial base address).

kvmtool [1] is a lightweight virtual machine monitor for running KVM
guests. kvmtool has reduced complexity compared to QEMU and is easily
hackable.

This patch series aims to make it possible to run kvm-unit-tests using
kvmtool on the arm and arm64 architectures, with two caveats:

(1) When terminating a test, the userspace process won't exit with an exit
code that signals the success or failure of the test. Output from the test
can still be parsed to determine the outcome of the test.

(2) kvmtool has been designed to work with a linux guest and it
automatically generates the command line arguments for a Linux kernel. This
causes the arm/arm64 selftest and gic tests to fail because those tests are
very specific with regards to the command line arguments that are provided
by the virtual machine manager.

With regards to (2), a patch was posted that seeks too eliminate this
behavior for kvmtool [2].

The run scripts haven't been modified. To run a test under kvmtool, one
needs to launch kvmtool manually. For example, to run the timer test the
following command can be used:

lkvm run -c 1 --console=serial -k timer.flat.

Changes in v2:
* Generate lib/config.h when configuring kvm-unit-tests; arm/arm64 uses it
  to get the UART address.
* Added --vmm configure option for arm/arm64 which will set the UART
  address in lib/config.h when the tests are run under QEMU or kvmtool.
* Renamed psci_sys_reset() to psci_system_reset().
* Dropped patches that allowed a test to ignore unexpected command line
  arguments.

Summary:
* Patches 1, 2 and 3 add support for configuring kvm-unit-tests on arm and
  arm64 to use the ns16550a UART emulated by kvmtool.
* Patches 4 and 5 provide an alternative mechanism for terminating the
  virtual machine by using PSCI.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/will/kvmtool.git/
[2] https://www.spinics.net/lists/kvm-arm/msg34352.html

Alexandru Elisei (5):
  lib: arm: Use UART address from generated config.h
  configure: arm/arm64: Add --vmm option with no effect
  lib: arm: Use ns16550a UART when --vmm=kvmtool
  lib: arm: Implement PSCI SYSTEM_OFF in psci_system_off()
  lib: arm: Fallback to psci_system_off() in exit()

 configure  | 31 +++
 Makefile   |  2 +-
 lib/arm/asm/psci.h |  3 ++-
 lib/arm/io.c   | 34 ++
 lib/arm/psci.c |  8 +++-
 .gitignore |  1 +
 6 files changed, 64 insertions(+), 15 deletions(-)

-- 
2.17.0

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH kvmtool 2/6] arm: fdt: add stdout-path to /chosen node

2019-02-01 Thread Andre Przywara
On Fri, 1 Feb 2019 06:26:58 +
Will Deacon  wrote:

Hi,

> On Thu, Jan 31, 2019 at 02:57:11PM +, Andre Przywara wrote:
> > On Wed, 30 Jan 2019 18:20:19 +
> > Will Deacon  wrote:  
> > > On Fri, Jan 25, 2019 at 06:07:57PM +, Andre Przywara wrote:  
> > > > The DT spec describes the stdout-path property in the /chosen
> > > > node to contain the DT path for a default device usable for
> > > > outputting characters. The Linux kernel uses this for earlycon
> > > > (without further parameters), other DT users might rely on this
> > > > as well.
> > > > 
> > > > Add a property containing the path to our emulated 8250 serial
> > > > device.
> > > > 
> > > > Even when we use the virtio console, the serial console is still
> > > > there and works, so we can expose this unconditionally. Putting
> > > > the virtio console path in there will not work anyway.
> > > > 
> > > > Signed-off-by: Andre Przywara 
> > > > ---
> > > >  arm/fdt.c | 1 +
> > > >  1 file changed, 1 insertion(+)
> > > > 
> > > > diff --git a/arm/fdt.c b/arm/fdt.c
> > > > index 28ba1c2c..8cda3ded 100644
> > > > --- a/arm/fdt.c
> > > > +++ b/arm/fdt.c
> > > > @@ -143,6 +143,7 @@ static int setup_fdt(struct kvm *kvm)
> > > > _FDT(fdt_property_cell(fdt, "linux,pci-probe-only",
> > > > 1)); _FDT(fdt_property_string(fdt, "bootargs",
> > > > kvm->cfg.real_cmdline)); _FDT(fdt_property_u64(fdt,
> > > > "kaslr-seed", kvm->cfg.arch.kaslr_seed));
> > > > +   _FDT(fdt_property_string(fdt, "stdout-path",
> > > > "/U6_16550A@3f8"));
> > > 
> > > Since the last string here has to match the one in hw/serial.c, I
> > > think we should be retrieving it from there rather than
> > > hardcoding it here.  
> > 
> > Are you thinking about something like setting a "char
> > *primary_console_path" in
> > hw/serial.c:serial8250_generate_fdt_node(), then using this here in
> > arm/fdt.c? Or shall the generate function directly set the
> > stdout-path?  
> 
> It's probably a bit dodgy doing it from the generate function,
> because I think we'd then be relying on the the /chosen node being
> created before the serial node (which is true, but I wouldn't like to
> rely on it).

Agreed.

> So I think either have a way to register the primary console device,
> or a way to squirrel the string away somewhere. Is there a variant of
> stdout-path which uses a phandle instead of a string?  That might end
> up being cleaner to implement.

stdout-path must be a string, so a phandle will not work. However that
string can be an alias, so I now let stdout-path always be "serial0",
then add an aliases node at the end of the FDT generation, assigning
serial0 to the path of the first instantiated serial device.

I will post a v2 with that.

Cheers,
Andre.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 1/4] KVM: arm64: Forbid kprobing of the VHE world-switch code

2019-02-01 Thread Christoffer Dall
On Thu, Jan 31, 2019 at 06:53:06PM +, James Morse wrote:
> Hey Christoffer,
> 
> On 31/01/2019 08:08, Christoffer Dall wrote:
> > On Thu, Jan 24, 2019 at 04:32:54PM +, James Morse wrote:
> >> On systems with VHE the kernel and KVM's world-switch code run at the
> >> same exception level. Code that is only used on a VHE system does not
> >> need to be annotated as __hyp_text as it can reside anywhere in the
> >> kernel text.
> >>
> >> __hyp_text was also used to prevent kprobes from patching breakpoint
> >> instructions into this region, as this code runs at a different
> >> exception level. While this is no longer true with VHE, KVM still
> >> switches VBAR_EL1, meaning a kprobe's breakpoint executed in the
> >> world-switch code will cause a hyp-panic.
> > 
> > Forgive potentially very stupid questions here, but:
> > 
> >  (1) Would it make sense to move the save/restore VBAR_EL1 to the last
> >  possible moment, and would that actually allow kprobes to work for
> >  the world-switch code, or does that just result in other weird
> >  problems?
> 
> This would work for taking the debug exception. But next kprobes wants to
> single-step the probed instruction in an out-of-line slot. I don't think we 
> can
> do this if we've already configured the debug hardware for the guest.
> (If could at least turn single-step off when we return to guest-EL0, which
> guest-EL1 was single-stepping)
> 
> 

I suspected something like that, let's not go there.

> >  (2) Are we sure that this catches every call path of every non-inlined
> >  function called after switchign VBAR_EL1?  Can kprobes only be
> >  called on exported symbols, or can you (if you know the address
> >  somehow) put a kprobe on a static function as well.  If there are
> >  any concerns in this area, we might want to consider (1) more
> >  closely.
> 
> Hmmm, good question. The blacklisting applies to whole symbols as seen by
> kallsyms, the compiler has no idea what is going on.
> 
> If it chose not to inline something, it would be kprobe'able yes.
> 
> __kprobes uses a section function-attribute instead. The gcc manual[0] doesn't
> say what happens when inline and the section attributes are used together. (or
> at least I couldn't find it)
> 
> A quick experiment with gcc 8.2.0 shows adding __kprobes on the inlines gets
> discarded when they are inlined. I'm not sure how to trick the compiler into
> not-inlining it to see what happens, but adding 'noinline' to the header file
> causes it to duplicate the function everywhere, but puts it in the __kprobes
> section.
> 
> (For KVM we could use the 'flatten' attribute, but that does say 'if 
> possible'.
> Alternatively we can decorate all the inline helpers we know we use with
> __kprobes as a safety net.)
> 
> I think this is a wider problem with kprobes.
> 

Sounds like it.  Probably in the "you did something crazy, and your
kernel is going to suffer from it" category.

Let's stick to your approach.

Thanks for the explanation.

Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm