Re: [PATCH v2 1/2] KVM: Refactor kvm_arch_vcpu_fault() to return a struct page pointer

2021-08-12 Thread David Hildenbrand
_arch_vcpu_fault(vcpu, vmf);
+   if (!page)
+   return VM_FAULT_SIGBUS;
+
get_page(page);
vmf->page = page;
    return 0;



Reviewed-by: David Hildenbrand 

But at the same time I wonder if we should just get rid of 
CONFIG_KVM_S390_UCONTROL and consequently kvm_arch_vcpu_fault().



In practice CONFIG_KVM_S390_UCONTROL, is never enabled in any reasonable 
kernel build and consequently it's never tested; further, exposing the 
sie_block to user space allows user space to generate random SIE 
validity intercepts.


CONFIG_KVM_S390_UCONTROL feels like something that should just be 
maintained out of tree by someone who really needs to hack deep into hw 
virtualization for testing purposes etc.


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH v2 7/7] x86: unify header guards

2021-06-09 Thread David Hildenbrand
f8ccbfe 100644
--- a/x86/svm.h
+++ b/x86/svm.h
@@ -1,5 +1,5 @@
-#ifndef __SVM_H
-#define __SVM_H
+#ifndef X86_SVM_H
+#define X86_SVM_H
  
  #include "libcflat.h"
  
diff --git a/x86/types.h b/x86/types.h

index 047556e854d6..56ce5ececdec 100644
--- a/x86/types.h
+++ b/x86/types.h
@@ -1,5 +1,5 @@
-#ifndef __TYPES_H
-#define __TYPES_H
+#ifndef X86_TYPES_H
+#define X86_TYPES_H
  
  #define DE_VECTOR 0

  #define DB_VECTOR 1
diff --git a/x86/vmx.h b/x86/vmx.h
index 7e39b843cafb..2c534ca4b801 100644
--- a/x86/vmx.h
+++ b/x86/vmx.h
@@ -1,5 +1,5 @@
-#ifndef __VMX_H
-#define __VMX_H
+#ifndef X86_VMX_H
+#define X86_VMX_H
  
  #include "libcflat.h"

  #include "processor.h"



Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH v2 6/7] s390x: unify header guards

2021-06-09 Thread David Hildenbrand

On 09.06.21 16:37, Cornelia Huck wrote:

Standardize header guards to _ASMS390X_HEADER_H_, _S390X_HEADER_H_,
and S390X_HEADER_H, respectively.

Signed-off-by: Cornelia Huck 
---
  lib/s390x/asm/arch_def.h | 4 ++--
  lib/s390x/asm/barrier.h  | 4 ++--
  lib/s390x/asm/cpacf.h| 6 +++---
  lib/s390x/asm/facility.h | 4 ++--
  lib/s390x/asm/float.h| 4 ++--
  lib/s390x/asm/mem.h  | 4 ++--
  lib/s390x/asm/sigp.h | 6 +++---
  lib/s390x/asm/spinlock.h | 4 ++--
  lib/s390x/asm/time.h | 4 ++--
  lib/s390x/asm/uv.h   | 4 ++--
  lib/s390x/css.h  | 4 ++--
  lib/s390x/interrupt.h| 4 ++--
  lib/s390x/mmu.h  | 4 ++--
  lib/s390x/sclp.h | 6 +++---
  lib/s390x/sie.h  | 6 +++---
  lib/s390x/smp.h  | 4 ++--
  lib/s390x/uv.h   | 4 ++--
  lib/s390x/vm.h   | 6 +++---
  s390x/sthyi.h| 4 ++--
  19 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/lib/s390x/asm/arch_def.h b/lib/s390x/asm/arch_def.h
index 7e2c5e623ab2..76f9e3862965 100644
--- a/lib/s390x/asm/arch_def.h
+++ b/lib/s390x/asm/arch_def.h
@@ -5,8 +5,8 @@
   * Authors:
   *  David Hildenbrand 
   */
-#ifndef _ASM_S390X_ARCH_DEF_H_
-#define _ASM_S390X_ARCH_DEF_H_
+#ifndef _ASMS390X_ARCH_DEF_H_
+#define _ASMS390X_ARCH_DEF_H_
  
  struct stack_frame {

struct stack_frame *back_chain;
diff --git a/lib/s390x/asm/barrier.h b/lib/s390x/asm/barrier.h
index 8e2fd6d90034..9534f9e8fa96 100644
--- a/lib/s390x/asm/barrier.h
+++ b/lib/s390x/asm/barrier.h
@@ -6,8 +6,8 @@
   *  Thomas Huth 
   *  David Hildenbrand 
   */
-#ifndef _ASM_S390X_BARRIER_H_
-#define _ASM_S390X_BARRIER_H_
+#ifndef _ASMS390X_BARRIER_H_
+#define _ASMS390X_BARRIER_H_
  
  #include 
  
diff --git a/lib/s390x/asm/cpacf.h b/lib/s390x/asm/cpacf.h

index 805fcf1a2d71..685262b0ff62 100644
--- a/lib/s390x/asm/cpacf.h
+++ b/lib/s390x/asm/cpacf.h
@@ -8,8 +8,8 @@
   *  Harald Freudenberger (fre...@de.ibm.com)
   *  Martin Schwidefsky 
   */
-#ifndef _ASM_S390_CPACF_H
-#define _ASM_S390_CPACF_H
+#ifndef _ASMS390X_CPACF_H_
+#define _ASMS390X_CPACF_H_
  
  #include 

  #include 
@@ -471,4 +471,4 @@ static inline void cpacf_pckmo(long func, void *param)
: "cc", "memory");
  }
  
-#endif	/* _ASM_S390_CPACF_H */

+#endif /* _ASMS390X_CPACF_H_ */
diff --git a/lib/s390x/asm/facility.h b/lib/s390x/asm/facility.h
index 95d4a15fe34f..ef0fd037ed35 100644
--- a/lib/s390x/asm/facility.h
+++ b/lib/s390x/asm/facility.h
@@ -5,8 +5,8 @@
   * Authors:
   *  David Hildenbrand 
   */
-#ifndef _ASM_S390X_FACILITY_H_
-#define _ASM_S390X_FACILITY_H_
+#ifndef _ASMS390X_FACILITY_H_
+#define _ASMS390X_FACILITY_H_
  
  #include 

  #include 
diff --git a/lib/s390x/asm/float.h b/lib/s390x/asm/float.h
index 136794475849..eb752050b162 100644
--- a/lib/s390x/asm/float.h
+++ b/lib/s390x/asm/float.h
@@ -5,8 +5,8 @@
   * Authors:
   *  David Hildenbrand 
   */
-#ifndef _ASM_S390X_FLOAT_H_
-#define _ASM_S390X_FLOAT_H_
+#ifndef _ASMS390X_FLOAT_H_
+#define _ASMS390X_FLOAT_H_
  
  static inline void set_fpc(uint32_t fpc)

  {
diff --git a/lib/s390x/asm/mem.h b/lib/s390x/asm/mem.h
index 281390ebd816..1963cef7ec03 100644
--- a/lib/s390x/asm/mem.h
+++ b/lib/s390x/asm/mem.h
@@ -5,8 +5,8 @@
   * Copyright IBM Corp. 2018
   * Author(s): Janosch Frank 
   */
-#ifndef _ASM_S390_MEM_H
-#define _ASM_S390_MEM_H
+#ifndef _ASMS390X_MEM_H_
+#define _ASMS390X_MEM_H_
  
  #define SKEY_ACC	0xf0

  #define SKEY_FP   0x08
diff --git a/lib/s390x/asm/sigp.h b/lib/s390x/asm/sigp.h
index 00844d26d15a..61d2c6256fed 100644
--- a/lib/s390x/asm/sigp.h
+++ b/lib/s390x/asm/sigp.h
@@ -5,8 +5,8 @@
   * Copied from the Linux kernel file arch/s390/include/asm/sigp.h
   */
  
-#ifndef ASM_S390X_SIGP_H

-#define ASM_S390X_SIGP_H
+#ifndef _ASMS390X_SIGP_H_
+#define _ASMS390X_SIGP_H_
  
  /* SIGP order codes */

  #define SIGP_SENSE1
@@ -73,4 +73,4 @@ static inline int sigp_retry(uint16_t addr, uint8_t order, 
unsigned long parm,
  }
  
  #endif /* __ASSEMBLER__ */

-#endif /* ASM_S390X_SIGP_H */
+#endif /* _ASMS390X_SIGP_H_ */
diff --git a/lib/s390x/asm/spinlock.h b/lib/s390x/asm/spinlock.h
index 677d2cd6e287..22d4d3899569 100644
--- a/lib/s390x/asm/spinlock.h
+++ b/lib/s390x/asm/spinlock.h
@@ -6,8 +6,8 @@
   *  Thomas Huth 
   *  David Hildenbrand 
   */
-#ifndef __ASMS390X_SPINLOCK_H
-#define __ASMS390X_SPINLOCK_H
+#ifndef _ASMS390X_SPINLOCK_H_
+#define _ASMS390X_SPINLOCK_H_
  
  #include 
  
diff --git a/lib/s390x/asm/time.h b/lib/s390x/asm/time.h

index 0d67f7231992..7652a151e87a 100644
--- a/lib/s390x/asm/time.h
+++ b/lib/s390x/asm/time.h
@@ -8,8 +8,8 @@
   * Copied from the s390/intercept test by:
   *  Pierre Morel 
   */
-#ifndef ASM_S390X_TIME_H
-#define ASM_S390X_TIME_H
+#ifndef _ASMS390X_TIME_H_
+#define _ASMS390X_TIME_H_
  
  #define STCK_SHIFT_US	(63 - 51)

  #define STCK_MAX  ((1UL << 52) - 1)
diff --git a/lib/s390x/asm/uv.h b/lib/s390x/asm/uv.h
index b22cbaa87109..dc3e02dea1b4 

Re: [kvm-unit-tests PATCH v2 5/7] powerpc: unify header guards

2021-06-09 Thread David Hildenbrand

On 09.06.21 16:37, Cornelia Huck wrote:

Only spapr.h needed a tweak.

Signed-off-by: Cornelia Huck 
---
  powerpc/spapr.h | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/powerpc/spapr.h b/powerpc/spapr.h
index b41aece07968..3a29598be44f 100644
--- a/powerpc/spapr.h
+++ b/powerpc/spapr.h
@@ -1,6 +1,6 @@
-#ifndef _ASMPOWERPC_SPAPR_H_
-#define _ASMPOWERPC_SPAPR_H_
+#ifndef POWERPC_SPAPR_H
+#define POWERPC_SPAPR_H
  
  #define SPAPR_KERNEL_LOAD_ADDR 0x40
  
-#endif /* _ASMPOWERPC_SPAPR_H_ */

+#endif /* POWERPC_SPAPR_H */



Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH v2 4/7] arm: unify header guards

2021-06-09 Thread David Hildenbrand

On 09.06.21 16:37, Cornelia Huck wrote:

The assembler.h files were the only ones not already following
the convention.

Signed-off-by: Cornelia Huck 
---
  lib/arm/asm/assembler.h   | 6 +++---
  lib/arm64/asm/assembler.h | 6 +++---
  2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/arm/asm/assembler.h b/lib/arm/asm/assembler.h
index dfd3c51bf6ad..4200252dd14d 100644
--- a/lib/arm/asm/assembler.h
+++ b/lib/arm/asm/assembler.h
@@ -8,8 +8,8 @@
  #error "Only include this from assembly code"
  #endif
  
-#ifndef __ASM_ASSEMBLER_H

-#define __ASM_ASSEMBLER_H
+#ifndef _ASMARM_ASSEMBLER_H_
+#define _ASMARM_ASSEMBLER_H_
  
  /*

   * dcache_line_size - get the minimum D-cache line size from the CTR register
@@ -50,4 +50,4 @@
dsb \domain
.endm
  
-#endif	/* __ASM_ASSEMBLER_H */

+#endif /* _ASMARM_ASSEMBLER_H_ */
diff --git a/lib/arm64/asm/assembler.h b/lib/arm64/asm/assembler.h
index 0a6ab9720bdd..a271e4ceefe6 100644
--- a/lib/arm64/asm/assembler.h
+++ b/lib/arm64/asm/assembler.h
@@ -12,8 +12,8 @@
  #error "Only include this from assembly code"
  #endif
  
-#ifndef __ASM_ASSEMBLER_H

-#define __ASM_ASSEMBLER_H
+#ifndef _ASMARM64_ASSEMBLER_H_
+#define _ASMARM64_ASSEMBLER_H_
  
  /*

   * raw_dcache_line_size - get the minimum D-cache line size on this CPU
@@ -51,4 +51,4 @@
dsb \domain
.endm
  
-#endif	/* __ASM_ASSEMBLER_H */

+#endif /* _ASMARM64_ASSEMBLER_H_ */



Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH v2 3/7] asm-generic: unify header guards

2021-06-09 Thread David Hildenbrand

On 09.06.21 16:37, Cornelia Huck wrote:

Standardize header guards to _ASM_GENERIC_HEADER_H_.

Signed-off-by: Cornelia Huck 
---
  lib/asm-generic/atomic.h  | 4 ++--
  lib/asm-generic/barrier.h | 6 +++---
  lib/asm-generic/memory_areas.h| 4 ++--
  lib/asm-generic/pci-host-bridge.h | 4 ++--
  4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/lib/asm-generic/atomic.h b/lib/asm-generic/atomic.h
index 26b645a7cc18..b09ce95053e7 100644
--- a/lib/asm-generic/atomic.h
+++ b/lib/asm-generic/atomic.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_GENERIC_ATOMIC_H__
-#define __ASM_GENERIC_ATOMIC_H__
+#ifndef _ASM_GENERIC_ATOMIC_H_
+#define _ASM_GENERIC_ATOMIC_H_
  
  /* From QEMU include/qemu/atomic.h */

  #define atomic_fetch_inc(ptr)  __sync_fetch_and_add(ptr, 1)
diff --git a/lib/asm-generic/barrier.h b/lib/asm-generic/barrier.h
index 6a990ff8d5a5..5499a5664d4d 100644
--- a/lib/asm-generic/barrier.h
+++ b/lib/asm-generic/barrier.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_BARRIER_H_
-#define _ASM_BARRIER_H_
+#ifndef _ASM_GENERIC_BARRIER_H_
+#define _ASM_GENERIC_BARRIER_H_
  /*
   * asm-generic/barrier.h
   *
@@ -32,4 +32,4 @@
  #define cpu_relax()   asm volatile ("":::"memory")
  #endif
  
-#endif /* _ASM_BARRIER_H_ */

+#endif /* _ASM_GENERIC_BARRIER_H_ */
diff --git a/lib/asm-generic/memory_areas.h b/lib/asm-generic/memory_areas.h
index 3074afe23393..c86db255ecee 100644
--- a/lib/asm-generic/memory_areas.h
+++ b/lib/asm-generic/memory_areas.h
@@ -1,5 +1,5 @@
-#ifndef __ASM_GENERIC_MEMORY_AREAS_H__
-#define __ASM_GENERIC_MEMORY_AREAS_H__
+#ifndef _ASM_GENERIC_MEMORY_AREAS_H_
+#define _ASM_GENERIC_MEMORY_AREAS_H_
  
  #define AREA_NORMAL_PFN 0

  #define AREA_NORMAL_NUMBER 0
diff --git a/lib/asm-generic/pci-host-bridge.h 
b/lib/asm-generic/pci-host-bridge.h
index 9e91499b9446..174ff341dd0d 100644
--- a/lib/asm-generic/pci-host-bridge.h
+++ b/lib/asm-generic/pci-host-bridge.h
@@ -1,5 +1,5 @@
-#ifndef _ASM_PCI_HOST_BRIDGE_H_
-#define _ASM_PCI_HOST_BRIDGE_H_
+#ifndef _ASM_GENERIC_PCI_HOST_BRIDGE_H_
+#define _ASM_GENERIC_PCI_HOST_BRIDGE_H_
  /*
   * Copyright (C) 2016, Red Hat Inc, Alexander Gordeev 
   *



Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH v2 2/7] lib: unify header guards

2021-06-09 Thread David Hildenbrand

On 09.06.21 16:37, Cornelia Huck wrote:

Standardize header guards to _LIB_HEADER_H_.

Signed-off-by: Cornelia Huck 
---
  lib/alloc_page.h   | 4 ++--
  lib/libcflat.h | 4 ++--
  lib/list.h | 4 ++--
  lib/pci-edu.h  | 4 ++--
  lib/pci-host-generic.h | 4 ++--
  lib/setjmp.h   | 4 ++--
  lib/string.h   | 6 +++---
  lib/vmalloc.h  | 4 ++--
  8 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/lib/alloc_page.h b/lib/alloc_page.h
index 1af1419d49b6..eed2ba06eeaf 100644
--- a/lib/alloc_page.h
+++ b/lib/alloc_page.h
@@ -5,8 +5,8 @@
   * with byte granularity.
   */
  
-#ifndef ALLOC_PAGE_H

-#define ALLOC_PAGE_H 1
+#ifndef _ALLOC_PAGE_H_
+#define _ALLOC_PAGE_H_
  
  #include 

  #include 
diff --git a/lib/libcflat.h b/lib/libcflat.h
index 460a1234ea6a..f40b431d1550 100644
--- a/lib/libcflat.h
+++ b/lib/libcflat.h
@@ -17,8 +17,8 @@
   * Authors: Hollis Blanchard 
   */
  
-#ifndef __LIBCFLAT_H

-#define __LIBCFLAT_H
+#ifndef _LIBCFLAT_H_
+#define _LIBCFLAT_H_
  
  #ifndef __ASSEMBLY__
  
diff --git a/lib/list.h b/lib/list.h

index 7f9717ef6258..ed3e52b40075 100644
--- a/lib/list.h
+++ b/lib/list.h
@@ -1,5 +1,5 @@
-#ifndef LIST_H
-#define LIST_H
+#ifndef _LIST_H_
+#define _LIST_H_
  
  #include 
  
diff --git a/lib/pci-edu.h b/lib/pci-edu.h

index 44b4ba168768..9db94aec0bc7 100644
--- a/lib/pci-edu.h
+++ b/lib/pci-edu.h
@@ -12,8 +12,8 @@
   * Edu device is a virtualized device in QEMU. Please refer to
   * docs/specs/edu.txt in QEMU repository for EDU device manual.
   */
-#ifndef __PCI_EDU_H__
-#define __PCI_EDU_H__
+#ifndef _PCI_EDU_H_
+#define _PCI_EDU_H_
  
  #include "pci.h"

  #include "asm/io.h"
diff --git a/lib/pci-host-generic.h b/lib/pci-host-generic.h
index 0ffe6380ec8f..3020ee22c837 100644
--- a/lib/pci-host-generic.h
+++ b/lib/pci-host-generic.h
@@ -1,5 +1,5 @@
-#ifndef PCI_HOST_GENERIC_H
-#define PCI_HOST_GENERIC_H
+#ifndef _PCI_HOST_GENERIC_H_
+#define _PCI_HOST_GENERIC_H_
  /*
   * PCI host bridge supporting structures and constants
   *
diff --git a/lib/setjmp.h b/lib/setjmp.h
index 2c56b4c68aaa..6afdf665681a 100644
--- a/lib/setjmp.h
+++ b/lib/setjmp.h
@@ -4,8 +4,8 @@
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU Library General Public License version 2.
   */
-#ifndef LIBCFLAT_SETJMP_H
-#define LIBCFLAT_SETJMP_H 1
+#ifndef _LIBCFLAT_SETJMP_H_
+#define _LIBCFLAT_SETJMP_H_
  
  typedef struct jmp_buf_tag {

long int regs[8];
diff --git a/lib/string.h b/lib/string.h
index e1febfed7fb2..b07763eaef10 100644
--- a/lib/string.h
+++ b/lib/string.h
@@ -4,8 +4,8 @@
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU Library General Public License version 2.
   */
-#ifndef __STRING_H
-#define __STRING_H
+#ifndef _STRING_H_
+#define _STRING_H_
  
  extern size_t strlen(const char *buf);

  extern size_t strnlen(const char *buf, size_t maxlen);
@@ -23,4 +23,4 @@ extern int memcmp(const void *s1, const void *s2, size_t n);
  extern void *memmove(void *dest, const void *src, size_t n);
  extern void *memchr(const void *s, int c, size_t n);
  
-#endif /* _STRING_H */

+#endif /* _STRING_H_ */
diff --git a/lib/vmalloc.h b/lib/vmalloc.h
index 8b158f591d75..346f94f198c5 100644
--- a/lib/vmalloc.h
+++ b/lib/vmalloc.h
@@ -1,5 +1,5 @@
-#ifndef VMALLOC_H
-#define VMALLOC_H 1
+#ifndef _VMALLOC_H_
+#define _VMALLOC_H_
  
  #include 
  



Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [kvm-unit-tests PATCH v2 1/7] README.md: add guideline for header guards format

2021-06-09 Thread David Hildenbrand

On 09.06.21 16:37, Cornelia Huck wrote:

Signed-off-by: Cornelia Huck 
---
  README.md | 9 +
  1 file changed, 9 insertions(+)

diff --git a/README.md b/README.md
index 24d4bdaaee0d..687ff50d0af1 100644
--- a/README.md
+++ b/README.md
@@ -156,6 +156,15 @@ Exceptions:
  
- While the kernel standard requires 80 columns, we allow up to 120.
  
+Header guards:

+
+Please try to adhere to adhere to the following patterns when adding
+"#ifndef <...> #define <...>" header guards:
+./lib: _HEADER_H_
+./lib/:  _ARCH_HEADER_H_
+./lib//asm:  _ASMARCH_HEADER_H_


I'd have used _ARCH_ASM_HEADER_H_

Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: arm32: panic in move_freepages (Was [PATCH v2 0/4] arm64: drop pfn_valid_within() and simplify pfn_valid())

2021-05-03 Thread David Hildenbrand

On 03.05.21 08:26, Mike Rapoport wrote:

On Fri, Apr 30, 2021 at 07:24:37PM +0800, Kefeng Wang wrote:



On 2021/4/30 17:51, Mike Rapoport wrote:

On Thu, Apr 29, 2021 at 06:22:55PM +0800, Kefeng Wang wrote:


On 2021/4/29 14:57, Mike Rapoport wrote:


Do you use SPARSMEM? If yes, what is your section size?
What is the value if CONFIG_FORCE_MAX_ZONEORDER in your configuration?

Yes,

CONFIG_SPARSEMEM=y

CONFIG_SPARSEMEM_STATIC=y

CONFIG_FORCE_MAX_ZONEORDER = 11

CONFIG_PAGE_OFFSET=0xC000
CONFIG_HAVE_ARCH_PFN_VALID=y
CONFIG_HIGHMEM=y
#define SECTION_SIZE_BITS26
#define MAX_PHYSADDR_BITS32
#define MAX_PHYSMEM_BITS 32



With the patch,  the addr is aligned, but the panic still occurred,


Is this the same panic at move_freepages() for range [de600, de7ff]?

Do you enable CONFIG_ARM_LPAE?


no, the CONFIG_ARM_LPAE is not set, and yes with same panic at
move_freepages at

start_pfn/end_pfn [de600, de7ff], [de60, de7ff000] :  pfn =de600, page
=ef3cc000, page-flags = ,  pfn2phy = de60


__free_memory_core, range: 0xb020 - 0xc000, pfn: b0200 - b0200
__free_memory_core, range: 0xcc00 - 0xdca0, pfn: cc000 - b0200
__free_memory_core, range: 0xde70 - 0xdea0, pfn: de700 - b0200


Hmm, [de600, de7ff] is not added to the free lists which is correct. But
then it's unclear how the page for de600 gets to move_freepages()...

Can't say I have any bright ideas to try here...


Are we missing some checks (e.g., PageReserved()) that 
pfn_valid_within() would have "caught" before?


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 3/4] arm64: decouple check whether pfn is in linear map from pfn_valid()

2021-04-22 Thread David Hildenbrand

On 22.04.21 08:19, Mike Rapoport wrote:

From: Mike Rapoport 

The intended semantics of pfn_valid() is to verify whether there is a
struct page for the pfn in question and nothing else.

Yet, on arm64 it is used to distinguish memory areas that are mapped in the
linear map vs those that require ioremap() to access them.

Introduce a dedicated pfn_is_map_memory() wrapper for
memblock_is_map_memory() to perform such check and use it where
appropriate.

Using a wrapper allows to avoid cyclic include dependencies.

While here also update style of pfn_valid() so that both pfn_valid() and
pfn_is_map_memory() declarations will be consistent.

Signed-off-by: Mike Rapoport 
---
  arch/arm64/include/asm/memory.h |  2 +-
  arch/arm64/include/asm/page.h   |  3 ++-
  arch/arm64/kvm/mmu.c|  2 +-
  arch/arm64/mm/init.c| 12 
  arch/arm64/mm/ioremap.c |  4 ++--
  arch/arm64/mm/mmu.c |  2 +-
  6 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0aabc3be9a75..194f9f993d30 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -351,7 +351,7 @@ static inline void *phys_to_virt(phys_addr_t x)
  
  #define virt_addr_valid(addr)	({	\

__typeof__(addr) __addr = __tag_reset(addr);\
-   __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr));  \
+   __is_lm_address(__addr) && pfn_is_map_memory(virt_to_pfn(__addr));  
\
  })
  
  void dump_mem_limit(void);

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 012cffc574e8..75ddfe671393 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -37,7 +37,8 @@ void copy_highpage(struct page *to, struct page *from);
  
  typedef struct page *pgtable_t;
  
-extern int pfn_valid(unsigned long);

+int pfn_valid(unsigned long pfn);
+int pfn_is_map_memory(unsigned long pfn);
  
  #include 
  
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index 8711894db8c2..23dd99e29b23 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -85,7 +85,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
  
  static bool kvm_is_device_pfn(unsigned long pfn)

  {
-   return !pfn_valid(pfn);
+   return !pfn_is_map_memory(pfn);
  }
  
  /*

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 3685e12aba9b..966a7a18d528 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -258,6 +258,18 @@ int pfn_valid(unsigned long pfn)
  }
  EXPORT_SYMBOL(pfn_valid);
  
+int pfn_is_map_memory(unsigned long pfn)

+{
+   phys_addr_t addr = PFN_PHYS(pfn);
+
+   /* avoid false positives for bogus PFNs, see comment in pfn_valid() */
+   if (PHYS_PFN(addr) != pfn)
+   return 0;
+
+   return memblock_is_map_memory(addr);
+}
+EXPORT_SYMBOL(pfn_is_map_memory);
+
  static phys_addr_t memory_limit = PHYS_ADDR_MAX;
  
  /*

diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index b5e83c46b23e..b7c81dacabf0 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -43,7 +43,7 @@ static void __iomem *__ioremap_caller(phys_addr_t phys_addr, 
size_t size,
/*
 * Don't allow RAM to be mapped.
 */
-   if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr
+   if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr
return NULL;
  
  	area = get_vm_area_caller(size, VM_IOREMAP, caller);

@@ -84,7 +84,7 @@ EXPORT_SYMBOL(iounmap);
  void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
  {
/* For normal memory we already have a cacheable mapping. */
-   if (pfn_valid(__phys_to_pfn(phys_addr)))
+   if (pfn_is_map_memory(__phys_to_pfn(phys_addr)))
return (void __iomem *)__phys_to_virt(phys_addr);
  
  	return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL),

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 5d9550fdb9cf..26045e9adbd7 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -81,7 +81,7 @@ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
  pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
  unsigned long size, pgprot_t vma_prot)
  {
-   if (!pfn_valid(pfn))
+   if (!pfn_is_map_memory(pfn))
return pgprot_noncached(vma_prot);
else if (file->f_flags & O_SYNC)
return pgprot_writecombine(vma_prot);



Acked-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 2/4] memblock: update initialization of reserved pages

2021-04-21 Thread David Hildenbrand

On 21.04.21 08:51, Mike Rapoport wrote:

From: Mike Rapoport 

The struct pages representing a reserved memory region are initialized
using reserve_bootmem_range() function. This function is called for each
reserved region just before the memory is freed from memblock to the buddy
page allocator.

The struct pages for MEMBLOCK_NOMAP regions are kept with the default
values set by the memory map initialization which makes it necessary to
have a special treatment for such pages in pfn_valid() and
pfn_valid_within().

Split out initialization of the reserved pages to a function with a
meaningful name and treat the MEMBLOCK_NOMAP regions the same way as the
reserved regions and mark struct pages for the NOMAP regions as
PageReserved.

Signed-off-by: Mike Rapoport 
---
  include/linux/memblock.h |  4 +++-
  mm/memblock.c| 28 ++--
  2 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 5984fff3f175..634c1a578db8 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -30,7 +30,9 @@ extern unsigned long long max_possible_pfn;
   * @MEMBLOCK_NONE: no special request
   * @MEMBLOCK_HOTPLUG: hotpluggable region
   * @MEMBLOCK_MIRROR: mirrored region
- * @MEMBLOCK_NOMAP: don't add to kernel direct mapping
+ * @MEMBLOCK_NOMAP: don't add to kernel direct mapping and treat as
+ * reserved in the memory map; refer to memblock_mark_nomap() description
+ * for futher details
   */
  enum memblock_flags {
MEMBLOCK_NONE   = 0x0,  /* No special request */
diff --git a/mm/memblock.c b/mm/memblock.c
index afaefa8fc6ab..3abf2c3fea7f 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -906,6 +906,11 @@ int __init_memblock memblock_mark_mirror(phys_addr_t base, 
phys_addr_t size)
   * @base: the base phys addr of the region
   * @size: the size of the region
   *
+ * The memory regions marked with %MEMBLOCK_NOMAP will not be added to the
+ * direct mapping of the physical memory. These regions will still be
+ * covered by the memory map. The struct page representing NOMAP memory
+ * frames in the memory map will be PageReserved()
+ *
   * Return: 0 on success, -errno on failure.
   */
  int __init_memblock memblock_mark_nomap(phys_addr_t base, phys_addr_t size)
@@ -2002,6 +2007,26 @@ static unsigned long __init 
__free_memory_core(phys_addr_t start,
return end_pfn - start_pfn;
  }
  
+static void __init memmap_init_reserved_pages(void)

+{
+   struct memblock_region *region;
+   phys_addr_t start, end;
+   u64 i;
+
+   /* initialize struct pages for the reserved regions */
+   for_each_reserved_mem_range(i, , )
+   reserve_bootmem_region(start, end);
+
+   /* and also treat struct pages for the NOMAP regions as PageReserved */
+   for_each_mem_region(region) {
+   if (memblock_is_nomap(region)) {
+   start = region->base;
+   end = start + region->size;
+   reserve_bootmem_region(start, end);
+   }
+   }
+}
+
  static unsigned long __init free_low_memory_core_early(void)
  {
unsigned long count = 0;
@@ -2010,8 +2035,7 @@ static unsigned long __init 
free_low_memory_core_early(void)
  
  	memblock_clear_hotplug(0, -1);
  
-	for_each_reserved_mem_range(i, , )

-   reserve_bootmem_region(start, end);
+   memmap_init_reserved_pages();
  
  	/*

 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id



Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 4/4] arm64: drop pfn_valid_within() and simplify pfn_valid()

2021-04-21 Thread David Hildenbrand

On 21.04.21 08:51, Mike Rapoport wrote:

From: Mike Rapoport 

The arm64's version of pfn_valid() differs from the generic because of two
reasons:

* Parts of the memory map are freed during boot. This makes it necessary to
   verify that there is actual physical memory that corresponds to a pfn
   which is done by querying memblock.

* There are NOMAP memory regions. These regions are not mapped in the
   linear map and until the previous commit the struct pages representing
   these areas had default values.

As the consequence of absence of the special treatment of NOMAP regions in
the memory map it was necessary to use memblock_is_map_memory() in
pfn_valid() and to have pfn_valid_within() aliased to pfn_valid() so that
generic mm functionality would not treat a NOMAP page as a normal page.

Since the NOMAP regions are now marked as PageReserved(), pfn walkers and
the rest of core mm will treat them as unusable memory and thus
pfn_valid_within() is no longer required at all and can be disabled by
removing CONFIG_HOLES_IN_ZONE on arm64.

pfn_valid() can be slightly simplified by replacing
memblock_is_map_memory() with memblock_is_memory().

Signed-off-by: Mike Rapoport 
---
  arch/arm64/Kconfig   | 3 ---
  arch/arm64/mm/init.c | 4 ++--
  2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e4e1b6550115..58e439046d05 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1040,9 +1040,6 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
def_bool y
depends on NUMA
  
-config HOLES_IN_ZONE

-   def_bool y
-
  source "kernel/Kconfig.hz"
  
  config ARCH_SPARSEMEM_ENABLE

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index dc03bdc12c0f..eb3f56fb8c7c 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -243,7 +243,7 @@ int pfn_valid(unsigned long pfn)
  
  	/*

 * ZONE_DEVICE memory does not have the memblock entries.
-* memblock_is_map_memory() check for ZONE_DEVICE based
+* memblock_is_memory() check for ZONE_DEVICE based
 * addresses will always fail. Even the normal hotplugged
 * memory will never have MEMBLOCK_NOMAP flag set in their
 * memblock entries. Skip memblock search for all non early
@@ -254,7 +254,7 @@ int pfn_valid(unsigned long pfn)
return pfn_section_valid(ms, pfn);
  }
  #endif
-   return memblock_is_map_memory(addr);
+   return memblock_is_memory(addr);
  }
  EXPORT_SYMBOL(pfn_valid);
  



Acked-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v1 4/4] arm64: drop pfn_valid_within() and simplify pfn_valid()

2021-04-20 Thread David Hildenbrand

On 20.04.21 11:09, Mike Rapoport wrote:

From: Mike Rapoport 

The arm64's version of pfn_valid() differs from the generic because of two
reasons:

* Parts of the memory map are freed during boot. This makes it necessary to
   verify that there is actual physical memory that corresponds to a pfn
   which is done by querying memblock.

* There are NOMAP memory regions. These regions are not mapped in the
   linear map and until the previous commit the struct pages representing
   these areas had default values.

As the consequence of absence of the special treatment of NOMAP regions in
the memory map it was necessary to use memblock_is_map_memory() in
pfn_valid() and to have pfn_valid_within() aliased to pfn_valid() so that
generic mm functionality would not treat a NOMAP page as a normal page.

Since the NOMAP regions are now marked as PageReserved(), pfn walkers and
the rest of core mm will treat them as unusable memory and thus
pfn_valid_within() is no longer required at all and can be disabled by
removing CONFIG_HOLES_IN_ZONE on arm64.

pfn_valid() can be slightly simplified by replacing
memblock_is_map_memory() with memblock_is_memory().

Signed-off-by: Mike Rapoport 
---
  arch/arm64/Kconfig   | 3 ---
  arch/arm64/mm/init.c | 4 ++--
  2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e4e1b6550115..58e439046d05 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1040,9 +1040,6 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK
def_bool y
depends on NUMA
  
-config HOLES_IN_ZONE

-   def_bool y
-
  source "kernel/Kconfig.hz"
  
  config ARCH_SPARSEMEM_ENABLE

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index c54e329aca15..370f33765b64 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -243,7 +243,7 @@ int pfn_valid(unsigned long pfn)
  
  	/*

 * ZONE_DEVICE memory does not have the memblock entries.
-* memblock_is_map_memory() check for ZONE_DEVICE based
+* memblock_is_memory() check for ZONE_DEVICE based
 * addresses will always fail. Even the normal hotplugged
 * memory will never have MEMBLOCK_NOMAP flag set in their
 * memblock entries. Skip memblock search for all non early
@@ -254,7 +254,7 @@ int pfn_valid(unsigned long pfn)
return pfn_section_valid(ms, pfn);
  }
  #endif
-   return memblock_is_map_memory(addr);
+   return memblock_is_memory(addr);
  }
  EXPORT_SYMBOL(pfn_valid);
  



What are the steps needed to get rid of custom pfn_valid() completely?

I'd assume we would have to stop freeing parts of the mem map during 
boot. How relevant is that for arm64 nowadays, especially with reduced 
section sizes?


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v1 3/4] arm64: decouple check whether pfn is in linear map from pfn_valid()

2021-04-20 Thread David Hildenbrand

On 20.04.21 11:09, Mike Rapoport wrote:

From: Mike Rapoport 

The intended semantics of pfn_valid() is to verify whether there is a
struct page for the pfn in question and nothing else.

Yet, on arm64 it is used to distinguish memory areas that are mapped in the
linear map vs those that require ioremap() to access them.

Introduce a dedicated pfn_is_map_memory() wrapper for
memblock_is_map_memory() to perform such check and use it where
appropriate.

Using a wrapper allows to avoid cyclic include dependencies.

Signed-off-by: Mike Rapoport 
---
  arch/arm64/include/asm/memory.h | 2 +-
  arch/arm64/include/asm/page.h   | 1 +
  arch/arm64/kvm/mmu.c| 2 +-
  arch/arm64/mm/init.c| 6 ++
  arch/arm64/mm/ioremap.c | 4 ++--
  arch/arm64/mm/mmu.c | 2 +-
  6 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0aabc3be9a75..194f9f993d30 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -351,7 +351,7 @@ static inline void *phys_to_virt(phys_addr_t x)
  
  #define virt_addr_valid(addr)	({	\

__typeof__(addr) __addr = __tag_reset(addr);\
-   __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr));  \
+   __is_lm_address(__addr) && pfn_is_map_memory(virt_to_pfn(__addr));  
\
  })
  
  void dump_mem_limit(void);

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 012cffc574e8..99a6da91f870 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -38,6 +38,7 @@ void copy_highpage(struct page *to, struct page *from);
  typedef struct page *pgtable_t;
  
  extern int pfn_valid(unsigned long);

+extern int pfn_is_map_memory(unsigned long);
  
  #include 
  
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index 8711894db8c2..23dd99e29b23 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -85,7 +85,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
  
  static bool kvm_is_device_pfn(unsigned long pfn)

  {
-   return !pfn_valid(pfn);
+   return !pfn_is_map_memory(pfn);
  }
  
  /*

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 3685e12aba9b..c54e329aca15 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -258,6 +258,12 @@ int pfn_valid(unsigned long pfn)
  }
  EXPORT_SYMBOL(pfn_valid);
  
+int pfn_is_map_memory(unsigned long pfn)

+{


I think you might have to add (see pfn_valid())

if (PHYS_PFN(PFN_PHYS(pfn)) != pfn)
return 0;

to catch false positives.


+   return memblock_is_map_memory(PFN_PHYS(pfn));
+}
+EXPORT_SYMBOL(pfn_is_map_memory);
+
  static phys_addr_t memory_limit = PHYS_ADDR_MAX;
  
  /*

diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c
index b5e83c46b23e..b7c81dacabf0 100644
--- a/arch/arm64/mm/ioremap.c
+++ b/arch/arm64/mm/ioremap.c
@@ -43,7 +43,7 @@ static void __iomem *__ioremap_caller(phys_addr_t phys_addr, 
size_t size,
/*
 * Don't allow RAM to be mapped.
 */
-   if (WARN_ON(pfn_valid(__phys_to_pfn(phys_addr
+   if (WARN_ON(pfn_is_map_memory(__phys_to_pfn(phys_addr
return NULL;
  
  	area = get_vm_area_caller(size, VM_IOREMAP, caller);

@@ -84,7 +84,7 @@ EXPORT_SYMBOL(iounmap);
  void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size)
  {
/* For normal memory we already have a cacheable mapping. */
-   if (pfn_valid(__phys_to_pfn(phys_addr)))
+   if (pfn_is_map_memory(__phys_to_pfn(phys_addr)))
return (void __iomem *)__phys_to_virt(phys_addr);
  
  	return __ioremap_caller(phys_addr, size, __pgprot(PROT_NORMAL),

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 5d9550fdb9cf..26045e9adbd7 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -81,7 +81,7 @@ void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd)
  pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
  unsigned long size, pgprot_t vma_prot)
  {
-   if (!pfn_valid(pfn))
+   if (!pfn_is_map_memory(pfn))
return pgprot_noncached(vma_prot);
else if (file->f_flags & O_SYNC)
return pgprot_writecombine(vma_prot);



As discussed, in the future it would be nice if we could just rely on 
the memmap state. There are cases where pfn_is_map_memory() will now be 
slower than pfn_valid() -- e.g., we don't check for valid_section() in 
case of CONFIG_SPARSEMEM. This would apply where pfn_valid() would have 
returned "0".


As we're not creating the direct map, kern_addr_valid() shouldn't need 
love. It'd be some kind of ugly if some generic code used by arm64 would 
be relying in case of arm64 on pfn_valid() to return the expected 
result; I doubt it.


Acked-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
k

Re: [PATCH v1 2/4] memblock: update initialization of reserved pages

2021-04-20 Thread David Hildenbrand

On 20.04.21 17:03, Mike Rapoport wrote:

On Tue, Apr 20, 2021 at 03:56:28PM +0200, David Hildenbrand wrote:

On 20.04.21 11:09, Mike Rapoport wrote:

From: Mike Rapoport 

The struct pages representing a reserved memory region are initialized
using reserve_bootmem_range() function. This function is called for each
reserved region just before the memory is freed from memblock to the buddy
page allocator.

The struct pages for MEMBLOCK_NOMAP regions are kept with the default
values set by the memory map initialization which makes it necessary to
have a special treatment for such pages in pfn_valid() and
pfn_valid_within().


Just a general question while thinking about it:

Would we right now initialize the memmap of these pages already via
memmap_init_zone()->memmap_init_range()? (IOW, not marking the
PageReserved?)


Yep. These pages are part of memblock.memory so they are initialized in
memmap_init_zone()->memmap_init_range() to the default values.



So instead of fully initializing them again, we mostly would only have 
to set PageReserved(). Not sure how big that memory usually is -- IOW, 
if we really care about optimizing the double-init.


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v1 2/4] memblock: update initialization of reserved pages

2021-04-20 Thread David Hildenbrand

On 20.04.21 11:09, Mike Rapoport wrote:

From: Mike Rapoport 

The struct pages representing a reserved memory region are initialized
using reserve_bootmem_range() function. This function is called for each
reserved region just before the memory is freed from memblock to the buddy
page allocator.

The struct pages for MEMBLOCK_NOMAP regions are kept with the default
values set by the memory map initialization which makes it necessary to
have a special treatment for such pages in pfn_valid() and
pfn_valid_within().


Just a general question while thinking about it:

Would we right now initialize the memmap of these pages already via 
memmap_init_zone()->memmap_init_range()? (IOW, not marking the 
PageReserved?)


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v1 1/4] include/linux/mmzone.h: add documentation for pfn_valid()

2021-04-20 Thread David Hildenbrand

On 20.04.21 14:57, Mike Rapoport wrote:

On Tue, Apr 20, 2021 at 11:22:53AM +0200, David Hildenbrand wrote:

On 20.04.21 11:09, Mike Rapoport wrote:

From: Mike Rapoport 

Add comment describing the semantics of pfn_valid() that clarifies that
pfn_valid() only checks for availability of a memory map entry (i.e. struct
page) for a PFN rather than availability of usable memory backing that PFN.

The most "generic" version of pfn_valid() used by the configurations with
SPARSEMEM enabled resides in include/linux/mmzone.h so this is the most
suitable place for documentation about semantics of pfn_valid().

Suggested-by: Anshuman Khandual 
Signed-off-by: Mike Rapoport 
---
   include/linux/mmzone.h | 11 +++
   1 file changed, 11 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 47946cec7584..961f0eeefb62 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1410,6 +1410,17 @@ static inline int pfn_section_valid(struct mem_section 
*ms, unsigned long pfn)
   #endif
   #ifndef CONFIG_HAVE_ARCH_PFN_VALID
+/**
+ * pfn_valid - check if there is a valid memory map entry for a PFN
+ * @pfn: the page frame number to check
+ *
+ * Check if there is a valid memory map entry aka struct page for the @pfn.
+ * Note, that availability of the memory map entry does not imply that
+ * there is actual usable memory at that @pfn. The struct page may
+ * represent a hole or an unusable page frame.
+ *
+ * Return: 1 for PFNs that have memory map entries and 0 otherwise
+ */
   static inline int pfn_valid(unsigned long pfn)
   {
struct mem_section *ms;



I'd rephrase all "there is a valid memory map" to "there is a memory map"
and add "pfn_valid() does to indicate whether the memory map as actually
initialized -- see pfn_to_online_page()."

pfn_valid() means that we can do a pfn_to_page() and don't get a fault when
accessing the "struct page". It doesn't state anything about the content.


Well, I mean valid in the sense you can access the struct page :)
How about:

/**
  * pfn_valid - check if there is a memory map entry for a PFN
  * @pfn: the page frame number to check
  *
  * Check if there is a memory map entry aka struct page for the @pfn and it
  * is safe to access that struct page; the struct page state may be
  * uninitialized -- see pfn_to_online_page().
  *
  * Note, that availability of the memory map entry does not imply that
  * there is actual usable memory at that @pfn. The struct page may
  * represent a hole or an unusable page frame.
  *
  * Return: 1 for PFNs that have memory map entries and 0 otherwise.
  */



Sounds good to me -- thanks!

Reviewed-by: David Hildenbrand 

--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v1 1/4] include/linux/mmzone.h: add documentation for pfn_valid()

2021-04-20 Thread David Hildenbrand

On 20.04.21 11:09, Mike Rapoport wrote:

From: Mike Rapoport 

Add comment describing the semantics of pfn_valid() that clarifies that
pfn_valid() only checks for availability of a memory map entry (i.e. struct
page) for a PFN rather than availability of usable memory backing that PFN.

The most "generic" version of pfn_valid() used by the configurations with
SPARSEMEM enabled resides in include/linux/mmzone.h so this is the most
suitable place for documentation about semantics of pfn_valid().

Suggested-by: Anshuman Khandual 
Signed-off-by: Mike Rapoport 
---
  include/linux/mmzone.h | 11 +++
  1 file changed, 11 insertions(+)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 47946cec7584..961f0eeefb62 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1410,6 +1410,17 @@ static inline int pfn_section_valid(struct mem_section 
*ms, unsigned long pfn)
  #endif
  
  #ifndef CONFIG_HAVE_ARCH_PFN_VALID

+/**
+ * pfn_valid - check if there is a valid memory map entry for a PFN
+ * @pfn: the page frame number to check
+ *
+ * Check if there is a valid memory map entry aka struct page for the @pfn.
+ * Note, that availability of the memory map entry does not imply that
+ * there is actual usable memory at that @pfn. The struct page may
+ * represent a hole or an unusable page frame.
+ *
+ * Return: 1 for PFNs that have memory map entries and 0 otherwise
+ */
  static inline int pfn_valid(unsigned long pfn)
  {
struct mem_section *ms;



I'd rephrase all "there is a valid memory map" to "there is a memory 
map" and add "pfn_valid() does to indicate whether the memory map as 
actually initialized -- see pfn_to_online_page()."


pfn_valid() means that we can do a pfn_to_page() and don't get a fault 
when accessing the "struct page". It doesn't state anything about the 
content.


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH 1/3] memblock: update initialization of reserved pages

2021-04-16 Thread David Hildenbrand

On 16.04.21 13:44, Mike Rapoport wrote:

On Thu, Apr 15, 2021 at 11:30:12AM +0200, David Hildenbrand wrote:

Not sure we really need a new pagetype here, PG_Reserved seems to be quite
enough to say "don't touch this".  I generally agree that we could make
PG_Reserved a PageType and then have several sub-types for reserved memory.
This definitely will add clarity but I'm not sure that this justifies
amount of churn and effort required to audit uses of PageResrved().

Then, we could mostly avoid having to query memblock at runtime to figure
out that this is special memory. This would obviously be an extension to
this series. Just a thought.


Stop pushing memblock out of kernel! ;-)


Can't stop. Won't stop. :D

It's lovely for booting up a kernel until we have other data-structures in
place ;)


A bit more seriously, we don't have any data structure that reliably
represents physical memory layout and arch-independent fashion.
memblock is probably the best starting point for eventually having one.


We have the (slowish) kernel resource tree after boot and the (faster) 
memmap. I really don't see why we really need another slowish variant.


We might be better off to just extend and speed up the kernel resource tree.

Memblock as is is not a reasonable datastructure to keep around after 
boot: for example, how we handle boottime allocations and reserve 
regions both as reserved.


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH 2/3] arm64: decouple check whether pfn is normal memory from pfn_valid()

2021-04-15 Thread David Hildenbrand

On 14.04.21 22:29, Mike Rapoport wrote:

On Wed, Apr 14, 2021 at 05:58:26PM +0200, David Hildenbrand wrote:

On 08.04.21 07:14, Anshuman Khandual wrote:


On 4/7/21 10:56 PM, Mike Rapoport wrote:

From: Mike Rapoport 

The intended semantics of pfn_valid() is to verify whether there is a
struct page for the pfn in question and nothing else.


Should there be a comment affirming this semantics interpretation, above the
generic pfn_valid() in include/linux/mmzone.h ?



Yet, on arm64 it is used to distinguish memory areas that are mapped in the
linear map vs those that require ioremap() to access them.

Introduce a dedicated pfn_is_memory() to perform such check and use it
where appropriate.

Signed-off-by: Mike Rapoport 
---
   arch/arm64/include/asm/memory.h | 2 +-
   arch/arm64/include/asm/page.h   | 1 +
   arch/arm64/kvm/mmu.c| 2 +-
   arch/arm64/mm/init.c| 6 ++
   arch/arm64/mm/ioremap.c | 4 ++--
   arch/arm64/mm/mmu.c | 2 +-
   6 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0aabc3be9a75..7e77fdf71b9d 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -351,7 +351,7 @@ static inline void *phys_to_virt(phys_addr_t x)
   #define virt_addr_valid(addr)({  
\
__typeof__(addr) __addr = __tag_reset(addr);\
-   __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr));  \
+   __is_lm_address(__addr) && pfn_is_memory(virt_to_pfn(__addr));  \
   })
   void dump_mem_limit(void);
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 012cffc574e8..32b485bcc6ff 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -38,6 +38,7 @@ void copy_highpage(struct page *to, struct page *from);
   typedef struct page *pgtable_t;
   extern int pfn_valid(unsigned long);
+extern int pfn_is_memory(unsigned long);
   #include 
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 8711894db8c2..ad2ea65a3937 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -85,7 +85,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
   static bool kvm_is_device_pfn(unsigned long pfn)
   {
-   return !pfn_valid(pfn);
+   return !pfn_is_memory(pfn);
   }
   /*
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 3685e12aba9b..258b1905ed4a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -258,6 +258,12 @@ int pfn_valid(unsigned long pfn)
   }
   EXPORT_SYMBOL(pfn_valid);
+int pfn_is_memory(unsigned long pfn)
+{
+   return memblock_is_map_memory(PFN_PHYS(pfn));
+}
+EXPORT_SYMBOL(pfn_is_memory);> +


Should not this be generic though ? There is nothing platform or arm64
specific in here. Wondering as pfn_is_memory() just indicates that the
pfn is linear mapped, should not it be renamed as pfn_is_linear_memory()
instead ? Regardless, it's fine either way.


TBH, I dislike (generic) pfn_is_memory(). It feels like we're mixing
concepts.


Yeah, at the moment NOMAP is very much arm specific so I'd keep it this way
for now.


  NOMAP memory vs !NOMAP memory; even NOMAP is some kind of memory
after all. pfn_is_map_memory() would be more expressive, although still
sub-optimal.

We'd actually want some kind of arm64-specific pfn_is_system_memory() or the
inverse pfn_is_device_memory() -- to be improved.


In my current version (to be posted soon) I've started with
pfn_lineary_mapped() but then ended up with pfn_mapped() to make it
"upward" compatible with architectures that use direct rather than linear
map :)


And even that is moot. It doesn't tell you if a PFN is *actually* mapped 
(hello secretmem).


I'd suggest to just use memblock_is_map_memory() in arch specific code. 
Then it's clear what we are querying exactly and what the semantics 
might be.


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH 1/3] memblock: update initialization of reserved pages

2021-04-15 Thread David Hildenbrand

Not sure we really need a new pagetype here, PG_Reserved seems to be quite
enough to say "don't touch this".  I generally agree that we could make
PG_Reserved a PageType and then have several sub-types for reserved memory.
This definitely will add clarity but I'm not sure that this justifies
amount of churn and effort required to audit uses of PageResrved().
  

Then, we could mostly avoid having to query memblock at runtime to figure
out that this is special memory. This would obviously be an extension to
this series. Just a thought.


Stop pushing memblock out of kernel! ;-)


Can't stop. Won't stop. :D

It's lovely for booting up a kernel until we have other data-structures 
in place ;)



--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH 1/3] memblock: update initialization of reserved pages

2021-04-14 Thread David Hildenbrand
Mike Rapoport  schrieb am Mi. 14. Apr. 2021 um 22:06:

> On Wed, Apr 14, 2021 at 05:12:11PM +0200, David Hildenbrand wrote:
> > On 07.04.21 19:26, Mike Rapoport wrote:
> > > From: Mike Rapoport 
> > >
> > > The struct pages representing a reserved memory region are initialized
> > > using reserve_bootmem_range() function. This function is called for
> each
> > > reserved region just before the memory is freed from memblock to the
> buddy
> > > page allocator.
> > >
> > > The struct pages for MEMBLOCK_NOMAP regions are kept with the default
> > > values set by the memory map initialization which makes it necessary to
> > > have a special treatment for such pages in pfn_valid() and
> > > pfn_valid_within().
> >
> > I assume these pages are never given to the buddy, because we don't have
> a
> > direct mapping. So to the kernel, it's essentially just like a memory
> hole
> > with benefits.
>
> The pages should not be accessed as normal memory so they do not have a
> direct (or in ARMish linear) mapping and are never given to buddy.
> After looking at ACPI standard I don't see a fundamental reason for this
> but they've already made this mess and we need to cope with it.
>
> > I can spot that we want to export such memory like any special memory
> > thingy/hole in /proc/iomem -- "reserved", which makes sense.
>
> It does, but let's wait with /proc/iomem changes. We don't really have a
> 100% consistent view of it on different architectures, so adding yet
> another type there does not seem, well, urgent.
>

To clarify: this is already done on arm64.


> > I would assume that MEMBLOCK_NOMAP is a special type of *reserved*
> memory.
> > IOW, that for_each_reserved_mem_range() should already succeed on these
> as
> > well -- we should mark anything that is MEMBLOCK_NOMAP implicitly as
> > reserved. Or are there valid reasons not to do so? What can anyone do
> with
> > that memory?
> >
> > I assume they are pretty much useless for the kernel, right? Like other
> > reserved memory ranges.
>
> I agree that there is a lot of commonality between NOMAP and reserved. The
> problem is that even semantics for reserved is different between
> architectures. Moreover, on the same architecture there could be
> E820_TYPE_RESERVED and memblock.reserved with different properties.
>
> I'd really prefer moving in baby steps here because any change in the boot
> mm can bear several month of early hangs debugging ;-)


Yeah I know. We just should have the desired target state figured out :)



>
> > > Split out initialization of the reserved pages to a function with a
> > > meaningful name and treat the MEMBLOCK_NOMAP regions the same way as
> the
> > > reserved regions and mark struct pages for the NOMAP regions as
> > > PageReserved.
> > >
> > > Signed-off-by: Mike Rapoport 
> > > ---
> > >   mm/memblock.c | 23 +--
> > >   1 file changed, 21 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/mm/memblock.c b/mm/memblock.c
> > > index afaefa8fc6ab..6b7ea9d86310 100644
> > > --- a/mm/memblock.c
> > > +++ b/mm/memblock.c
> > > @@ -2002,6 +2002,26 @@ static unsigned long __init
> __free_memory_core(phys_addr_t start,
> > > return end_pfn - start_pfn;
> > >   }
> > > +static void __init memmap_init_reserved_pages(void)
> > > +{
> > > +   struct memblock_region *region;
> > > +   phys_addr_t start, end;
> > > +   u64 i;
> > > +
> > > +   /* initialize struct pages for the reserved regions */
> > > +   for_each_reserved_mem_range(i, , )
> > > +   reserve_bootmem_region(start, end);
> > > +
> > > +   /* and also treat struct pages for the NOMAP regions as
> PageReserved */
> > > +   for_each_mem_region(region) {
> > > +   if (memblock_is_nomap(region)) {
> > > +   start = region->base;
> > > +   end = start + region->size;
> > > +   reserve_bootmem_region(start, end);
> > > +   }
> > > +   }
> > > +}
> > > +
> > >   static unsigned long __init free_low_memory_core_early(void)
> > >   {
> > > unsigned long count = 0;
> > > @@ -2010,8 +2030,7 @@ static unsigned long __init
> free_low_memory_core_early(void)
> > > memblock_clear_hotplug(0, -1);
> > > -   for_each_reserved_mem_range(i, , )
> > > -   reserve_bootmem_region(start, end);
> > > +   memmap_init_reserved_pages();
> > > /*
> > >  * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id
>
> --
> Sincerely yours,
> Mike.
>
> --
Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH 2/3] arm64: decouple check whether pfn is normal memory from pfn_valid()

2021-04-14 Thread David Hildenbrand

On 08.04.21 07:14, Anshuman Khandual wrote:


On 4/7/21 10:56 PM, Mike Rapoport wrote:

From: Mike Rapoport 

The intended semantics of pfn_valid() is to verify whether there is a
struct page for the pfn in question and nothing else.


Should there be a comment affirming this semantics interpretation, above the
generic pfn_valid() in include/linux/mmzone.h ?



Yet, on arm64 it is used to distinguish memory areas that are mapped in the
linear map vs those that require ioremap() to access them.

Introduce a dedicated pfn_is_memory() to perform such check and use it
where appropriate.

Signed-off-by: Mike Rapoport 
---
  arch/arm64/include/asm/memory.h | 2 +-
  arch/arm64/include/asm/page.h   | 1 +
  arch/arm64/kvm/mmu.c| 2 +-
  arch/arm64/mm/init.c| 6 ++
  arch/arm64/mm/ioremap.c | 4 ++--
  arch/arm64/mm/mmu.c | 2 +-
  6 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 0aabc3be9a75..7e77fdf71b9d 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -351,7 +351,7 @@ static inline void *phys_to_virt(phys_addr_t x)
  
  #define virt_addr_valid(addr)	({	\

__typeof__(addr) __addr = __tag_reset(addr);\
-   __is_lm_address(__addr) && pfn_valid(virt_to_pfn(__addr));  \
+   __is_lm_address(__addr) && pfn_is_memory(virt_to_pfn(__addr));  \
  })
  
  void dump_mem_limit(void);

diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 012cffc574e8..32b485bcc6ff 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -38,6 +38,7 @@ void copy_highpage(struct page *to, struct page *from);
  typedef struct page *pgtable_t;
  
  extern int pfn_valid(unsigned long);

+extern int pfn_is_memory(unsigned long);
  
  #include 
  
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c

index 8711894db8c2..ad2ea65a3937 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -85,7 +85,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
  
  static bool kvm_is_device_pfn(unsigned long pfn)

  {
-   return !pfn_valid(pfn);
+   return !pfn_is_memory(pfn);
  }
  
  /*

diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 3685e12aba9b..258b1905ed4a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -258,6 +258,12 @@ int pfn_valid(unsigned long pfn)
  }
  EXPORT_SYMBOL(pfn_valid);
  
+int pfn_is_memory(unsigned long pfn)

+{
+   return memblock_is_map_memory(PFN_PHYS(pfn));
+}
+EXPORT_SYMBOL(pfn_is_memory);> +


Should not this be generic though ? There is nothing platform or arm64
specific in here. Wondering as pfn_is_memory() just indicates that the
pfn is linear mapped, should not it be renamed as pfn_is_linear_memory()
instead ? Regardless, it's fine either way.


TBH, I dislike (generic) pfn_is_memory(). It feels like we're mixing 
concepts. NOMAP memory vs !NOMAP memory; even NOMAP is some kind of 
memory after all. pfn_is_map_memory() would be more expressive, although 
still sub-optimal.


We'd actually want some kind of arm64-specific pfn_is_system_memory() or 
the inverse pfn_is_device_memory() -- to be improved.


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH 1/3] memblock: update initialization of reserved pages

2021-04-14 Thread David Hildenbrand

On 14.04.21 17:27, Ard Biesheuvel wrote:

On Wed, 14 Apr 2021 at 17:14, David Hildenbrand  wrote:


On 07.04.21 19:26, Mike Rapoport wrote:

From: Mike Rapoport 

The struct pages representing a reserved memory region are initialized
using reserve_bootmem_range() function. This function is called for each
reserved region just before the memory is freed from memblock to the buddy
page allocator.

The struct pages for MEMBLOCK_NOMAP regions are kept with the default
values set by the memory map initialization which makes it necessary to
have a special treatment for such pages in pfn_valid() and
pfn_valid_within().


I assume these pages are never given to the buddy, because we don't have
a direct mapping. So to the kernel, it's essentially just like a memory
hole with benefits.

I can spot that we want to export such memory like any special memory
thingy/hole in /proc/iomem -- "reserved", which makes sense.

I would assume that MEMBLOCK_NOMAP is a special type of *reserved*
memory. IOW, that for_each_reserved_mem_range() should already succeed
on these as well -- we should mark anything that is MEMBLOCK_NOMAP
implicitly as reserved. Or are there valid reasons not to do so? What
can anyone do with that memory?

I assume they are pretty much useless for the kernel, right? Like other
reserved memory ranges.



On ARM, we need to know whether any physical regions that do not
contain system memory contain something with device semantics or not.
One of the examples is ACPI tables: these are in reserved memory, and
so they are not covered by the linear region. However, when the ACPI
core ioremap()s an arbitrary memory region, we don't know whether it
is mapping a memory region or a device region unless we keep track of
this in some way. (Device mappings require device attributes, but
firmware tables require memory attributes, as they might be accessed
using misaligned reads)


Using generically sounding NOMAP ("don't create direct mapping") to 
identify device regions feels like a hack. I know, it was introduced 
just for that purpose.


Looking at memblock_mark_nomap(), we consider "device regions"

1) ACPI tables

2) VIDEO_TYPE_EFI memory

3) some device-tree regions in of/fdt.c


IIUC, right now we end up creating a memmap for this NOMAP memory, but 
hide it away in pfn_valid(). This patch set at least fixes that.


Assuming these pages are never mapped to user space via the struct page 
(which better be the case), we could further use a new pagetype to mark 
these pages in a special way, such that we can identify them directly 
via pfn_to_page().


Then, we could mostly avoid having to query memblock at runtime to 
figure out that this is special memory. This would obviously be an 
extension to this series. Just a thought.


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH 1/3] memblock: update initialization of reserved pages

2021-04-14 Thread David Hildenbrand

On 07.04.21 19:26, Mike Rapoport wrote:

From: Mike Rapoport 

The struct pages representing a reserved memory region are initialized
using reserve_bootmem_range() function. This function is called for each
reserved region just before the memory is freed from memblock to the buddy
page allocator.

The struct pages for MEMBLOCK_NOMAP regions are kept with the default
values set by the memory map initialization which makes it necessary to
have a special treatment for such pages in pfn_valid() and
pfn_valid_within().


I assume these pages are never given to the buddy, because we don't have 
a direct mapping. So to the kernel, it's essentially just like a memory 
hole with benefits.


I can spot that we want to export such memory like any special memory 
thingy/hole in /proc/iomem -- "reserved", which makes sense.


I would assume that MEMBLOCK_NOMAP is a special type of *reserved* 
memory. IOW, that for_each_reserved_mem_range() should already succeed 
on these as well -- we should mark anything that is MEMBLOCK_NOMAP 
implicitly as reserved. Or are there valid reasons not to do so? What 
can anyone do with that memory?


I assume they are pretty much useless for the kernel, right? Like other 
reserved memory ranges.





Split out initialization of the reserved pages to a function with a
meaningful name and treat the MEMBLOCK_NOMAP regions the same way as the
reserved regions and mark struct pages for the NOMAP regions as
PageReserved.

Signed-off-by: Mike Rapoport 
---
  mm/memblock.c | 23 +--
  1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index afaefa8fc6ab..6b7ea9d86310 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -2002,6 +2002,26 @@ static unsigned long __init 
__free_memory_core(phys_addr_t start,
return end_pfn - start_pfn;
  }
  
+static void __init memmap_init_reserved_pages(void)

+{
+   struct memblock_region *region;
+   phys_addr_t start, end;
+   u64 i;
+
+   /* initialize struct pages for the reserved regions */
+   for_each_reserved_mem_range(i, , )
+   reserve_bootmem_region(start, end);
+
+   /* and also treat struct pages for the NOMAP regions as PageReserved */
+   for_each_mem_region(region) {
+   if (memblock_is_nomap(region)) {
+   start = region->base;
+   end = start + region->size;
+   reserve_bootmem_region(start, end);
+   }
+   }
+}
+
  static unsigned long __init free_low_memory_core_early(void)
  {
unsigned long count = 0;
@@ -2010,8 +2030,7 @@ static unsigned long __init 
free_low_memory_core_early(void)
  
  	memblock_clear_hotplug(0, -1);
  
-	for_each_reserved_mem_range(i, , )

-   reserve_bootmem_region(start, end);
+   memmap_init_reserved_pages();
  
  	/*

 * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id




--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v10 2/6] arm64: kvm: Introduce MTE VM feature

2021-04-08 Thread David Hildenbrand

On 08.04.21 16:18, Catalin Marinas wrote:

On Wed, Apr 07, 2021 at 04:52:54PM +0100, Steven Price wrote:

On 07/04/2021 16:14, Catalin Marinas wrote:

On Wed, Apr 07, 2021 at 11:20:18AM +0100, Steven Price wrote:

On 31/03/2021 19:43, Catalin Marinas wrote:

When a slot is added by the VMM, if it asked for MTE in guest (I guess
that's an opt-in by the VMM, haven't checked the other patches), can we
reject it if it's is going to be mapped as Normal Cacheable but it is a
ZONE_DEVICE (i.e. !kvm_is_device_pfn() + one of David's suggestions to
check for ZONE_DEVICE)? This way we don't need to do more expensive
checks in set_pte_at().


The problem is that KVM allows the VMM to change the memory backing a slot
while the guest is running. This is obviously useful for the likes of
migration, but ultimately means that even if you were to do checks at the
time of slot creation, you would need to repeat the checks at set_pte_at()
time to ensure a mischievous VMM didn't swap the page for a problematic one.


Does changing the slot require some KVM API call? Can we intercept it
and do the checks there?


As David has already replied - KVM uses MMU notifiers, so there's not really
a good place to intercept this before the fault.


Maybe a better alternative for the time being is to add a new
kvm_is_zone_device_pfn() and force KVM_PGTABLE_PROT_DEVICE if it returns
true _and_ the VMM asked for MTE in guest. We can then only set
PG_mte_tagged if !device.


KVM already has a kvm_is_device_pfn(), and yes I agree restricting the MTE
checks to only !kvm_is_device_pfn() makes sense (I have the fix in my branch
locally).


Indeed, you can skip it if kvm_is_device_pfn(). In addition, with MTE,
I'd also mark a pfn as 'device' in user_mem_abort() if
pfn_to_online_page() is NULL as we don't want to map it as Cacheable in
Stage 2. It's unlikely that we'll trip over this path but just in case.

(can we have a ZONE_DEVICE _online_ pfn or by definition they are
considered offline?)


By definition (and implementation) offline. When you get a page = 
pfn_to_online_page() with page != NULL, that one should never be 
ZONE_DEVICE (otherwise it would be a BUG).


As I said, things are different when exposing dax memory via dax/kmem to 
the buddy. But then, we are no longer talking about ZONE_DEVICE.


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v10 2/6] arm64: kvm: Introduce MTE VM feature

2021-04-07 Thread David Hildenbrand

On 07.04.21 17:14, Catalin Marinas wrote:

On Wed, Apr 07, 2021 at 11:20:18AM +0100, Steven Price wrote:

On 31/03/2021 19:43, Catalin Marinas wrote:

When a slot is added by the VMM, if it asked for MTE in guest (I guess
that's an opt-in by the VMM, haven't checked the other patches), can we
reject it if it's is going to be mapped as Normal Cacheable but it is a
ZONE_DEVICE (i.e. !kvm_is_device_pfn() + one of David's suggestions to
check for ZONE_DEVICE)? This way we don't need to do more expensive
checks in set_pte_at().


The problem is that KVM allows the VMM to change the memory backing a slot
while the guest is running. This is obviously useful for the likes of
migration, but ultimately means that even if you were to do checks at the
time of slot creation, you would need to repeat the checks at set_pte_at()
time to ensure a mischievous VMM didn't swap the page for a problematic one.


Does changing the slot require some KVM API call? Can we intercept it
and do the checks there?


User space can simply mmap(MAP_FIXED) the user space area registered in 
a KVM memory slot. You cannot really intercept that. You can only check 
in the KVM MMU code at fault time that the VMA which you hold in your 
hands is still in a proper state. The KVM MMU is synchronous, which 
means that updates to the VMA layout are reflected in the KVM MMU page 
tables -- e.g., via mmu notifier calls.


E.g., in s390x code we cannot handle VMAs with gigantic pages. We check 
that when faulting (creating the links in the page table) via __gmap_link().


You could either check the page itself (ZONE_DEVICE) or might even be 
able to check via the VMA/file.


--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v10 2/6] arm64: kvm: Introduce MTE VM feature

2021-03-31 Thread David Hildenbrand

On 31.03.21 12:41, Steven Price wrote:

On 31/03/2021 10:32, David Hildenbrand wrote:

On 31.03.21 11:21, Catalin Marinas wrote:

On Wed, Mar 31, 2021 at 09:34:44AM +0200, David Hildenbrand wrote:

On 30.03.21 12:30, Catalin Marinas wrote:

On Mon, Mar 29, 2021 at 05:06:51PM +0100, Steven Price wrote:

On 28/03/2021 13:21, Catalin Marinas wrote:

On Sat, Mar 27, 2021 at 03:23:24PM +, Catalin Marinas wrote:

On Fri, Mar 12, 2021 at 03:18:58PM +, Steven Price wrote:

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 77cb2d28f2a4..b31b7a821f90 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -879,6 +879,22 @@ static int user_mem_abort(struct kvm_vcpu
*vcpu, phys_addr_t fault_ipa,
     if (vma_pagesize == PAGE_SIZE && !force_pte)
     vma_pagesize = transparent_hugepage_adjust(memslot,
hva,
    , _ipa);
+
+    if (fault_status != FSC_PERM && kvm_has_mte(kvm) &&
pfn_valid(pfn)) {
+    /*
+ * VM will be able to see the page's tags, so we must
ensure
+ * they have been initialised. if PG_mte_tagged is set,
tags
+ * have already been initialised.
+ */
+    struct page *page = pfn_to_page(pfn);
+    unsigned long i, nr_pages = vma_pagesize >> PAGE_SHIFT;
+
+    for (i = 0; i < nr_pages; i++, page++) {
+    if (!test_and_set_bit(PG_mte_tagged, >flags))
+    mte_clear_page_tags(page_address(page));
+    }
+    }


This pfn_valid() check may be problematic. Following commit
eeb0753ba27b
("arm64/mm: Fix pfn_valid() for ZONE_DEVICE based memory"), it
returns
true for ZONE_DEVICE memory but such memory is allowed not to
support
MTE.


Some more thinking, this should be safe as any ZONE_DEVICE would be
mapped as untagged memory in the kernel linear map. It could be
slightly
inefficient if it unnecessarily tries to clear tags in ZONE_DEVICE,
untagged memory. Another overhead is pfn_valid() which will likely
end
up calling memblock_is_map_memory().

However, the bigger issue is that Stage 2 cannot disable tagging for
Stage 1 unless the memory is Non-cacheable or Device at S2. Is
there a
way to detect what gets mapped in the guest as Normal Cacheable
memory
and make sure it's only early memory or hotplug but no ZONE_DEVICE
(or
something else like on-chip memory)?  If we can't guarantee that all
Cacheable memory given to a guest supports tags, we should disable
the
feature altogether.


In stage 2 I believe we only have two types of mapping - 'normal' or
DEVICE_nGnRE (see stage2_map_set_prot_attr()). Filtering out the
latter is a
case of checking the 'device' variable, and makes sense to avoid the
overhead you describe.

This should also guarantee that all stage-2 cacheable memory
supports tags,
as kvm_is_device_pfn() is simply !pfn_valid(), and pfn_valid()
should only
be true for memory that Linux considers "normal".


If you think "normal" == "normal System RAM", that's wrong; see below.


By "normal" I think both Steven and I meant the Normal Cacheable memory
attribute (another being the Device memory attribute).


Sadly there's no good standardised terminology here. Aarch64 provides
the "normal (cacheable)" definition. Memory which is mapped as "Normal
Cacheable" is implicitly MTE capable when shared with a guest (because
the stage 2 mappings don't allow restricting MTE other than mapping it
as Device memory).

So MTE also forces us to have a definition of memory which is "bog
standard memory"[1] separate from the mapping attributes. This is the
main memory which fully supports MTE.

Separate from the "bog standard" we have the "special"[1] memory, e.g.
ZONE_DEVICE memory may be mapped as "Normal Cacheable" at stage 1 but
that memory may not support MTE tags. This memory can only be safely
shared with a guest in the following situations:

   1. MTE is completely disabled for the guest

   2. The stage 2 mappings are 'device' (e.g. DEVICE_nGnRE)

   3. We have some guarantee that guest MTE access are in some way safe.

(1) is the situation today (without this patch series). But it prevents
the guest from using MTE in any form.

(2) is pretty terrible for general memory, but is the get-out clause for
mapping devices into the guest.

(3) isn't something we have any architectural way of discovering. We'd
need to know what the device did with the MTE accesses (and any caches
between the CPU and the device) to ensure there aren't any side-channels
or h/w lockup issues. We'd also need some way of describing this memory
to the guest.

So at least for the time being the approach is to avoid letting a guest
with MTE enabled have access to this sort of memory.

[1] Neither "bog standard" nor "special" are real terms - like I said
there's a lack of standardised terminology.


That's the problem. With Anshuman's c

Re: [PATCH v10 2/6] arm64: kvm: Introduce MTE VM feature

2021-03-31 Thread David Hildenbrand

On 31.03.21 11:21, Catalin Marinas wrote:

On Wed, Mar 31, 2021 at 09:34:44AM +0200, David Hildenbrand wrote:

On 30.03.21 12:30, Catalin Marinas wrote:

On Mon, Mar 29, 2021 at 05:06:51PM +0100, Steven Price wrote:

On 28/03/2021 13:21, Catalin Marinas wrote:

On Sat, Mar 27, 2021 at 03:23:24PM +, Catalin Marinas wrote:

On Fri, Mar 12, 2021 at 03:18:58PM +, Steven Price wrote:

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 77cb2d28f2a4..b31b7a821f90 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -879,6 +879,22 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
if (vma_pagesize == PAGE_SIZE && !force_pte)
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
   , _ipa);
+
+   if (fault_status != FSC_PERM && kvm_has_mte(kvm) && pfn_valid(pfn)) {
+   /*
+* VM will be able to see the page's tags, so we must ensure
+* they have been initialised. if PG_mte_tagged is set, tags
+* have already been initialised.
+*/
+   struct page *page = pfn_to_page(pfn);
+   unsigned long i, nr_pages = vma_pagesize >> PAGE_SHIFT;
+
+   for (i = 0; i < nr_pages; i++, page++) {
+   if (!test_and_set_bit(PG_mte_tagged, >flags))
+   mte_clear_page_tags(page_address(page));
+   }
+   }


This pfn_valid() check may be problematic. Following commit eeb0753ba27b
("arm64/mm: Fix pfn_valid() for ZONE_DEVICE based memory"), it returns
true for ZONE_DEVICE memory but such memory is allowed not to support
MTE.


Some more thinking, this should be safe as any ZONE_DEVICE would be
mapped as untagged memory in the kernel linear map. It could be slightly
inefficient if it unnecessarily tries to clear tags in ZONE_DEVICE,
untagged memory. Another overhead is pfn_valid() which will likely end
up calling memblock_is_map_memory().

However, the bigger issue is that Stage 2 cannot disable tagging for
Stage 1 unless the memory is Non-cacheable or Device at S2. Is there a
way to detect what gets mapped in the guest as Normal Cacheable memory
and make sure it's only early memory or hotplug but no ZONE_DEVICE (or
something else like on-chip memory)?  If we can't guarantee that all
Cacheable memory given to a guest supports tags, we should disable the
feature altogether.


In stage 2 I believe we only have two types of mapping - 'normal' or
DEVICE_nGnRE (see stage2_map_set_prot_attr()). Filtering out the latter is a
case of checking the 'device' variable, and makes sense to avoid the
overhead you describe.

This should also guarantee that all stage-2 cacheable memory supports tags,
as kvm_is_device_pfn() is simply !pfn_valid(), and pfn_valid() should only
be true for memory that Linux considers "normal".


If you think "normal" == "normal System RAM", that's wrong; see below.


By "normal" I think both Steven and I meant the Normal Cacheable memory
attribute (another being the Device memory attribute).


That's the problem. With Anshuman's commit I mentioned above,
pfn_valid() returns true for ZONE_DEVICE mappings (e.g. persistent
memory, not talking about some I/O mapping that requires Device_nGnRE).
So kvm_is_device_pfn() is false for such memory and it may be mapped as
Normal but it is not guaranteed to support tagging.


pfn_valid() means "there is a struct page"; if you do pfn_to_page() and
touch the page, you won't fault. So Anshuman's commit is correct.


I agree.


pfn_to_online_page() means, "there is a struct page and it's system RAM
that's in use; the memmap has a sane content"


Does pfn_to_online_page() returns a valid struct page pointer for
ZONE_DEVICE pages? IIUC, these are not guaranteed to be system RAM, for
some definition of system RAM (I assume NVDIMM != system RAM). For
example, pmem_attach_disk() calls devm_memremap_pages() and this would
use the Normal Cacheable memory attribute without necessarily being
system RAM.


No, not for ZONE_DEVICE.

However, if you expose PMEM via dax/kmem as System RAM to the system (-> 
add_memory_driver_managed()), then PMEM (managed via ZONE_NOMRAL or 
ZONE_MOVABLE) would work with pfn_to_online_page() -- as the system 
thinks it's "ordinary system RAM" and the memory is managed by the buddy.




So if pfn_valid() is not equivalent to system RAM, we have a potential
issue with MTE. Even if "system RAM" includes NVDIMMs, we still have
this issue and we may need a new term to describe MTE-safe memory. In
the kernel we assume MTE-safe all pages that can be mapped as
MAP_ANONYMOUS and I don't think these include ZONE_DEVICE pages.

Thanks.




--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v10 2/6] arm64: kvm: Introduce MTE VM feature

2021-03-31 Thread David Hildenbrand

On 30.03.21 12:30, Catalin Marinas wrote:

On Mon, Mar 29, 2021 at 05:06:51PM +0100, Steven Price wrote:

On 28/03/2021 13:21, Catalin Marinas wrote:

On Sat, Mar 27, 2021 at 03:23:24PM +, Catalin Marinas wrote:

On Fri, Mar 12, 2021 at 03:18:58PM +, Steven Price wrote:

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 77cb2d28f2a4..b31b7a821f90 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -879,6 +879,22 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
phys_addr_t fault_ipa,
if (vma_pagesize == PAGE_SIZE && !force_pte)
vma_pagesize = transparent_hugepage_adjust(memslot, hva,
   , _ipa);
+
+   if (fault_status != FSC_PERM && kvm_has_mte(kvm) && pfn_valid(pfn)) {
+   /*
+* VM will be able to see the page's tags, so we must ensure
+* they have been initialised. if PG_mte_tagged is set, tags
+* have already been initialised.
+*/
+   struct page *page = pfn_to_page(pfn);
+   unsigned long i, nr_pages = vma_pagesize >> PAGE_SHIFT;
+
+   for (i = 0; i < nr_pages; i++, page++) {
+   if (!test_and_set_bit(PG_mte_tagged, >flags))
+   mte_clear_page_tags(page_address(page));
+   }
+   }


This pfn_valid() check may be problematic. Following commit eeb0753ba27b
("arm64/mm: Fix pfn_valid() for ZONE_DEVICE based memory"), it returns
true for ZONE_DEVICE memory but such memory is allowed not to support
MTE.


Some more thinking, this should be safe as any ZONE_DEVICE would be
mapped as untagged memory in the kernel linear map. It could be slightly
inefficient if it unnecessarily tries to clear tags in ZONE_DEVICE,
untagged memory. Another overhead is pfn_valid() which will likely end
up calling memblock_is_map_memory().

However, the bigger issue is that Stage 2 cannot disable tagging for
Stage 1 unless the memory is Non-cacheable or Device at S2. Is there a
way to detect what gets mapped in the guest as Normal Cacheable memory
and make sure it's only early memory or hotplug but no ZONE_DEVICE (or
something else like on-chip memory)?  If we can't guarantee that all
Cacheable memory given to a guest supports tags, we should disable the
feature altogether.


In stage 2 I believe we only have two types of mapping - 'normal' or
DEVICE_nGnRE (see stage2_map_set_prot_attr()). Filtering out the latter is a
case of checking the 'device' variable, and makes sense to avoid the
overhead you describe.

This should also guarantee that all stage-2 cacheable memory supports tags,
as kvm_is_device_pfn() is simply !pfn_valid(), and pfn_valid() should only
be true for memory that Linux considers "normal".


If you think "normal" == "normal System RAM", that's wrong; see below.



That's the problem. With Anshuman's commit I mentioned above,
pfn_valid() returns true for ZONE_DEVICE mappings (e.g. persistent
memory, not talking about some I/O mapping that requires Device_nGnRE).
So kvm_is_device_pfn() is false for such memory and it may be mapped as
Normal but it is not guaranteed to support tagging.


pfn_valid() means "there is a struct page"; if you do pfn_to_page() and 
touch the page, you won't fault. So Anshuman's commit is correct.


pfn_to_online_page() means, "there is a struct page and it's system RAM 
that's in use; the memmap has a sane content"




For user MTE, we get away with this as the MAP_ANONYMOUS requirement
would filter it out while arch_add_memory() will ensure it's mapped as
untagged in the linear map. See another recent fix for hotplugged
memory: d15dfd31384b ("arm64: mte: Map hotplugged memory as Normal
Tagged"). We needed to ensure that ZONE_DEVICE doesn't end up as tagged,
only hoplugged memory. Both handled via arch_add_memory() in the arch
code with ZONE_DEVICE starting at devm_memremap_pages().


I now wonder if we can get a MAP_ANONYMOUS mapping of ZONE_DEVICE pfn
even without virtualisation.


I haven't checked all the code paths but I don't think we can get a
MAP_ANONYMOUS mapping of ZONE_DEVICE memory as we normally need a file
descriptor.


I certainly hope this is the case - it's the weird corner cases of device
drivers that worry me. E.g. I know i915 has a "hidden" mmap behind an ioctl
(see i915_gem_mmap_ioctl(), although this case is fine - it's MAP_SHARED).
Mali's kbase did something similar in the past.


I think this should be fine since it's not a MAP_ANONYMOUS (we do allow
MAP_SHARED to be tagged).




--
Thanks,

David / dhildenb

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Qemu-devel] [Qemu-arm] VCPU hotplug on KVM/ARM

2018-03-01 Thread David Hildenbrand
On 01.03.2018 11:05, Peter Maydell wrote:
> On 1 March 2018 at 09:50, Igor Mammedov  wrote:
>> In QEMU on x86 (and I think ppc, s390 as well), we create vCPUs on demand.>>
>> It would be nice if ARM would be able to do that too,
>> so that it could take advantage of the same code.
> 
> It's not clear to me how that would work, given that for
> instance the interrupt controller wants to know up-front
> how many CPUs it has to deal with.
>

So how is cpu hotplug handled in HW? Or doesn't it even exist there?

(we have max_cpus for the interrupt controller, but not sure if that is
what we want)

> thanks
> -- PMM
> 
-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 07/16] KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_sregs

2017-12-08 Thread David Hildenbrand

>  
>  int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs 
> *regs)
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index f647e121070e..cdf0be02c95a 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -1632,18 +1632,25 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu 
> *vcpu,
>  {
>   int ret;
>  
> + vcpu_load(vcpu);
> +
> + ret = -EINVAL;

you can initialize this directly.

>   if (vcpu->arch.pvr != sregs->pvr)
> - return -EINVAL;
> + goto out;
>  
>   ret = set_sregs_base(vcpu, sregs);
>   if (ret < 0)
> - return ret;
> + goto out;
>  
>   ret = set_sregs_arch206(vcpu, sregs);
>   if (ret < 0)
> - return ret;
> + goto out;
> +
> + ret = vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
>  
> - return vcpu->kvm->arch.kvm_ops->set_sregs(vcpu, sregs);
> +out:
> + vcpu_put(vcpu);
> + return ret;
>  }
>  
>  int kvmppc_get_one_reg(struct kvm_vcpu *vcpu, u64 id,
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 18011fc4ac49..d95b4f15e52b 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2729,8 +2729,12 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu 
> *vcpu, struct kvm_regs *regs)
>  int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
> struct kvm_sregs *sregs)
>  {
> + vcpu_load(vcpu);
> +
>   memcpy(>run->s.regs.acrs, >acrs, sizeof(sregs->acrs));
>   memcpy(>arch.sie_block->gcr, >crs, sizeof(sregs->crs));
> +
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 20a5f6776eea..a31a80aee0b9 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7500,15 +7500,19 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu 
> *vcpu,
>   int mmu_reset_needed = 0;
>   int pending_vec, max_bits, idx;
>   struct desc_ptr dt;
> + int ret;
> +
> + vcpu_load(vcpu);
>  
> + ret = -EINVAL;

dito


Reviewed-by: David Hildenbrand <da...@redhat.com>

-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 06/16] KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_sregs

2017-12-08 Thread David Hildenbrand
On 04.12.2017 21:35, Christoffer Dall wrote:
> From: Christoffer Dall <christoffer.d...@linaro.org>
> 
> Move vcpu_load() and vcpu_put() into the architecture specific
> implementations of kvm_arch_vcpu_ioctl_get_sregs().
> 
> Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> ---
>  arch/powerpc/kvm/book3s.c | 8 +++-
>  arch/powerpc/kvm/booke.c  | 9 -
>  arch/s390/kvm/kvm-s390.c  | 4 
>  arch/x86/kvm/x86.c| 3 +++
>  virt/kvm/kvm_main.c   | 2 --
>  5 files changed, 22 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 24bc7aabfc44..6cc2377549f7 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -484,7 +484,13 @@ void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
>  int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
> struct kvm_sregs *sregs)
>  {
> - return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
> + int ret;
> +
> + vcpu_load(vcpu);
> + ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
> + vcpu_put(vcpu);
> +
> + return ret;
>  }
>  
>  int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index bcbbeddc3430..f647e121070e 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -1613,11 +1613,18 @@ int kvmppc_set_sregs_ivor(struct kvm_vcpu *vcpu, 
> struct kvm_sregs *sregs)
>  int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
>struct kvm_sregs *sregs)
>  {
> + int ret;
> +
> + vcpu_load(vcpu);
> +
>   sregs->pvr = vcpu->arch.pvr;
>  
>   get_sregs_base(vcpu, sregs);
>   get_sregs_arch206(vcpu, sregs);
> - return vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
> + ret = vcpu->kvm->arch.kvm_ops->get_sregs(vcpu, sregs);
> +
> + vcpu_put(vcpu);
> + return ret;
>  }
>  
>  int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index e3476430578a..18011fc4ac49 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2737,8 +2737,12 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu 
> *vcpu,
>  int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
> struct kvm_sregs *sregs)
>  {
> + vcpu_load(vcpu);
> +
>   memcpy(>acrs, >run->s.regs.acrs, sizeof(sregs->acrs));
>   memcpy(>crs, >arch.sie_block->gcr, sizeof(sregs->crs));
> +
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 75eacce78f59..20a5f6776eea 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7400,6 +7400,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
>  {
>   struct desc_ptr dt;
>  
> + vcpu_load(vcpu);
> +
>   kvm_get_segment(vcpu, >cs, VCPU_SREG_CS);
>   kvm_get_segment(vcpu, >ds, VCPU_SREG_DS);
>   kvm_get_segment(vcpu, >es, VCPU_SREG_ES);
> @@ -7431,6 +7433,7 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
>   set_bit(vcpu->arch.interrupt.nr,
>   (unsigned long *)sregs->interrupt_bitmap);
>  
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 963e249d7b79..779c03e39fa4 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2581,9 +2581,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
>   r = -ENOMEM;
>   if (!kvm_sregs)
>   goto out;
> - vcpu_load(vcpu);
>   r = kvm_arch_vcpu_ioctl_get_sregs(vcpu, kvm_sregs);
> - vcpu_put(vcpu);
>   if (r)
>   goto out;
>   r = -EFAULT;
> 

Reviewed-by: David Hildenbrand <da...@redhat.com>

-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 05/16] KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_regs

2017-12-08 Thread David Hildenbrand
On 04.12.2017 21:35, Christoffer Dall wrote:
> From: Christoffer Dall <christoffer.d...@linaro.org>
> 
> Move vcpu_load() and vcpu_put() into the architecture specific
> implementations of kvm_arch_vcpu_ioctl_set_regs().
> 
> Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> ---
>  arch/mips/kvm/mips.c  | 3 +++
>  arch/powerpc/kvm/book3s.c | 3 +++
>  arch/powerpc/kvm/booke.c  | 3 +++
>  arch/s390/kvm/kvm-s390.c  | 2 ++
>  arch/x86/kvm/x86.c| 3 +++
>  virt/kvm/kvm_main.c   | 2 --
>  6 files changed, 14 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
> index adfca57420d1..3a898712d6cd 100644
> --- a/arch/mips/kvm/mips.c
> +++ b/arch/mips/kvm/mips.c
> @@ -1151,6 +1151,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>  {
>   int i;
>  
> + vcpu_load(vcpu);
> +
>   for (i = 1; i < ARRAY_SIZE(vcpu->arch.gprs); i++)
>   vcpu->arch.gprs[i] = regs->gpr[i];
>   vcpu->arch.gprs[0] = 0; /* zero is special, and cannot be set. */
> @@ -1158,6 +1160,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>   vcpu->arch.lo = regs->lo;
>   vcpu->arch.pc = regs->pc;
>  
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index d85bfd733ccd..24bc7aabfc44 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -528,6 +528,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>  {
>   int i;
>  
> + vcpu_load(vcpu);
> +
>   kvmppc_set_pc(vcpu, regs->pc);
>   kvmppc_set_cr(vcpu, regs->cr);
>   kvmppc_set_ctr(vcpu, regs->ctr);
> @@ -548,6 +550,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>   for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
>   kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
>  
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index e0e4f04c5535..bcbbeddc3430 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -1462,6 +1462,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>  {
>   int i;
>  
> + vcpu_load(vcpu);
> +
>   vcpu->arch.pc = regs->pc;
>   kvmppc_set_cr(vcpu, regs->cr);
>   vcpu->arch.ctr = regs->ctr;
> @@ -1483,6 +1485,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>   for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
>   kvmppc_set_gpr(vcpu, i, regs->gpr[i]);
>  
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 37b7caae2484..e3476430578a 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2712,7 +2712,9 @@ static int kvm_arch_vcpu_ioctl_initial_reset(struct 
> kvm_vcpu *vcpu)
>  
>  int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs 
> *regs)
>  {
> + vcpu_load(vcpu);
>   memcpy(>run->s.regs.gprs, >gprs, sizeof(regs->gprs));
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 597e1f8fc8da..75eacce78f59 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7350,6 +7350,8 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>  
>  int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs 
> *regs)
>  {
> + vcpu_load(vcpu);
> +
>   vcpu->arch.emulate_regs_need_sync_from_vcpu = true;
>   vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
>  
> @@ -7379,6 +7381,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, 
> struct kvm_regs *regs)
>  
>   kvm_make_request(KVM_REQ_EVENT, vcpu);
>  
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 843d481f58cb..963e249d7b79 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2572,9 +2572,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
>   r = PTR_ERR(kvm_regs);
>   goto out;
>   }
> - vcpu_load(vcpu);
>   r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
> - vcpu_put(vcpu);
>   kfree(kvm_regs);
>   break;
>   }
> 

Reviewed-by: David Hildenbrand <da...@redhat.com>

-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 08/16] KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_mpstate

2017-11-29 Thread David Hildenbrand
On 29.11.2017 17:41, Christoffer Dall wrote:
> Move vcpu_load() and vcpu_put() into the architecture specific
> implementations of kvm_arch_vcpu_ioctl_get_mpstate().
> 
> Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> ---
>  arch/s390/kvm/kvm-s390.c | 11 +--
>  arch/x86/kvm/x86.c   |  3 +++
>  virt/kvm/arm/arm.c   |  3 +++
>  virt/kvm/kvm_main.c  |  2 --
>  4 files changed, 15 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index d95b4f1..396fc3d 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2836,9 +2836,16 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct 
> kvm_vcpu *vcpu,
>  int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
>   struct kvm_mp_state *mp_state)
>  {
> + int ret;
> +
> + vcpu_load(vcpu);
> +
>   /* CHECK_STOP and LOAD are not supported yet */
> - return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
> -KVM_MP_STATE_OPERATING;
> + ret = is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
> +   KVM_MP_STATE_OPERATING;
> +
> + vcpu_put(vcpu);
> + return ret;
>  }
>  
>  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a31a80a..9bf62c3 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7440,6 +7440,8 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
>  int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
>   struct kvm_mp_state *mp_state)
>  {
> + vcpu_load(vcpu);
> +
>   kvm_apic_accept_events(vcpu);
>   if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
>   vcpu->arch.pv.pv_unhalted)
> @@ -7447,6 +7449,7 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu 
> *vcpu,
>   else
>   mp_state->mp_state = vcpu->arch.mp_state;
>  
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index 1f448b2..a717170 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -381,11 +381,14 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu)
>  int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
>   struct kvm_mp_state *mp_state)
>  {
> + vcpu_load(vcpu);
> +
>   if (vcpu->arch.power_off)
>   mp_state->mp_state = KVM_MP_STATE_STOPPED;
>   else
>   mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
>  
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 19cf2d1..eac3c29 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2603,9 +2603,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
>   case KVM_GET_MP_STATE: {
>   struct kvm_mp_state mp_state;
>  
> - vcpu_load(vcpu);
>   r = kvm_arch_vcpu_ioctl_get_mpstate(vcpu, _state);
> - vcpu_put(vcpu);
>   if (r)
>   goto out;
>   r = -EFAULT;
> 

Reviewed-by: David Hildenbrand <da...@redhat.com>

-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 09/16] KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_mpstate

2017-11-29 Thread David Hildenbrand
On 29.11.2017 17:41, Christoffer Dall wrote:
> Move vcpu_load() and vcpu_put() into the architecture specific
> implementations of kvm_arch_vcpu_ioctl_set_mpstate().
> 
> Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> ---
>  arch/s390/kvm/kvm-s390.c |  3 +++
>  arch/x86/kvm/x86.c   | 15 ---
>  virt/kvm/arm/arm.c   |  9 +++--
>  virt/kvm/kvm_main.c  |  2 --
>  4 files changed, 22 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 396fc3d..8fade85 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2853,6 +2853,8 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu 
> *vcpu,
>  {
>   int rc = 0;
>  
> + vcpu_load(vcpu);
> +
>   /* user space knows about this interface - let it control the state */
>   vcpu->kvm->arch.user_cpu_state_ctrl = 1;
>  
> @@ -2870,6 +2872,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu 
> *vcpu,
>   rc = -ENXIO;
>   }
>  
> + vcpu_put(vcpu);
>   return rc;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 9bf62c3..ee357b6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7456,15 +7456,20 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu 
> *vcpu,
>  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
>   struct kvm_mp_state *mp_state)
>  {
> + int ret;

initialize ret directly to -EINVAL ?

> +
> + vcpu_load(vcpu);
> +
> + ret = -EINVAL;
>   if (!lapic_in_kernel(vcpu) &&
>   mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
> - return -EINVAL;
> + goto out;
>  
>   /* INITs are latched while in SMM */
>   if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
>   (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
>mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
> - return -EINVAL;
> + goto out;
>  
>   if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
>   vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
> @@ -7472,7 +7477,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu 
> *vcpu,
>   } else
>   vcpu->arch.mp_state = mp_state->mp_state;
>   kvm_make_request(KVM_REQ_EVENT, vcpu);
> - return 0;
> +
> + ret = 0;
> +out:
> + vcpu_put(vcpu);
> + return ret;
>  }
>  
>  int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
> index a717170..9a3acbc 100644
> --- a/virt/kvm/arm/arm.c
> +++ b/virt/kvm/arm/arm.c
> @@ -395,6 +395,10 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu 
> *vcpu,
>  int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
>   struct kvm_mp_state *mp_state)
>  {
> + int ret = 0;
> +
> + vcpu_load(vcpu);
> +
>   switch (mp_state->mp_state) {
>   case KVM_MP_STATE_RUNNABLE:
>   vcpu->arch.power_off = false;
> @@ -403,10 +407,11 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu 
> *vcpu,
>   vcpu_power_off(vcpu);
>   break;
>   default:
> - return -EINVAL;
> + ret = -EINVAL;
>   }
>  
> - return 0;
> + vcpu_put(vcpu);
> + return ret;
>  }
>  
>  /**
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index eac3c29..f360005 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2618,9 +2618,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
>   r = -EFAULT;
>   if (copy_from_user(_state, argp, sizeof(mp_state)))
>   goto out;
> - vcpu_load(vcpu);
>   r = kvm_arch_vcpu_ioctl_set_mpstate(vcpu, _state);
> - vcpu_put(vcpu);
>   break;
>   }
>   case KVM_TRANSLATE: {
> 

Reviewed-by: David Hildenbrand <da...@redhat.com>

-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 11/16] KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_guest_debug

2017-11-29 Thread David Hildenbrand
f80b5 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2804,13 +2804,19 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct 
> kvm_vcpu *vcpu,
>  {
>   int rc = 0;
>  
> + vcpu_load(vcpu);
> +
>   vcpu->guest_debug = 0;
>   kvm_s390_clear_bp_data(vcpu);
>  
> - if (dbg->control & ~VALID_GUESTDBG_FLAGS)
> - return -EINVAL;
> - if (!sclp.has_gpere)
> - return -EINVAL;
> + if (dbg->control & ~VALID_GUESTDBG_FLAGS) {
> + rc = -EINVAL;
> + goto out;
> + }
> + if (!sclp.has_gpere) {
> + rc = -EINVAL;
> + goto out;
> + }
>  
>   if (dbg->control & KVM_GUESTDBG_ENABLE) {
>   vcpu->guest_debug = dbg->control;
> @@ -2830,6 +2836,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
> *vcpu,
>   atomic_andnot(CPUSTAT_P, >arch.sie_block->cpuflags);
>   }
>  
> +out:
> + vcpu_put(vcpu);
>   return rc;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index eb70974..a074b0bd 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7602,6 +7602,8 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
> *vcpu,
>   unsigned long rflags;
>   int i, r;
>  
> + vcpu_load(vcpu);
> +
>   if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
>   r = -EBUSY;
>   if (vcpu->arch.exception.pending)
> @@ -7647,7 +7649,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
> *vcpu,
>   r = 0;
>  
>  out:
> -
> + vcpu_put(vcpu);
>   return r;
>  }
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 0a8a490..c688eb7 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2642,9 +2642,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
>   r = -EFAULT;
>   if (copy_from_user(, argp, sizeof(dbg)))
>   goto out;
> - vcpu_load(vcpu);
>   r = kvm_arch_vcpu_ioctl_set_guest_debug(vcpu, );
> - vcpu_put(vcpu);
>   break;
>   }
>   case KVM_SET_SIGNAL_MASK: {
> 

Reviewed-by: David Hildenbrand <da...@redhat.com>

-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 12/16] KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_fpu

2017-11-29 Thread David Hildenbrand
On 29.11.2017 17:41, Christoffer Dall wrote:
> Move vcpu_load() and vcpu_put() into the architecture specific
> implementations of kvm_arch_vcpu_ioctl_get_fpu().
> 
> Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> ---
>  arch/s390/kvm/kvm-s390.c | 4 
>  arch/x86/kvm/x86.c   | 7 +--
>  virt/kvm/kvm_main.c  | 2 --
>  3 files changed, 9 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 4bf80b5..88dcb89 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2765,6 +2765,8 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, 
> struct kvm_fpu *fpu)
>  
>  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>  {
> + vcpu_load(vcpu);
> +
>   /* make sure we have the latest values */
>   save_fpu_regs();
>   if (MACHINE_HAS_VX)
> @@ -2773,6 +2775,8 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, 
> struct kvm_fpu *fpu)
>   else
>   memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
>   fpu->fpc = vcpu->run->s.regs.fpc;
> +
> + vcpu_put(vcpu);

This is one example where we need the vcpu_put/load.

>   return 0;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index a074b0bd..8b54567 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7679,9 +7679,11 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu 
> *vcpu,
>  
>  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>  {
> - struct fxregs_state *fxsave =
> - >arch.guest_fpu.state.fxsave;
> + struct fxregs_state *fxsave;
>  
> + vcpu_load(vcpu);
> +
> + fxsave = >arch.guest_fpu.state.fxsave;
>   memcpy(fpu->fpr, fxsave->st_space, 128);
>   fpu->fcw = fxsave->cwd;
>   fpu->fsw = fxsave->swd;
> @@ -7691,6 +7693,7 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, 
> struct kvm_fpu *fpu)
>   fpu->last_dp = fxsave->rdp;
>   memcpy(fpu->xmm, fxsave->xmm_space, sizeof fxsave->xmm_space);
>  
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index c688eb7..73ad70a 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2673,9 +2673,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
>   r = -ENOMEM;
>   if (!fpu)
>   goto out;
> - vcpu_load(vcpu);
>   r = kvm_arch_vcpu_ioctl_get_fpu(vcpu, fpu);
> - vcpu_put(vcpu);
>   if (r)
>   goto out;
>   r = -EFAULT;
> 

Reviewed-by: David Hildenbrand <da...@redhat.com>

-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 13/16] KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_fpu

2017-11-29 Thread David Hildenbrand
On 29.11.2017 17:41, Christoffer Dall wrote:
> Move vcpu_load() and vcpu_put() into the architecture specific
> implementations of kvm_arch_vcpu_ioctl_set_fpu().
> 
> Signed-off-by: Christoffer Dall <christoffer.d...@linaro.org>
> ---
>  arch/s390/kvm/kvm-s390.c | 15 ---
>  arch/x86/kvm/x86.c   |  8 ++--
>  virt/kvm/kvm_main.c  |  2 --
>  3 files changed, 18 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
> index 88dcb89..43278f3 100644
> --- a/arch/s390/kvm/kvm-s390.c
> +++ b/arch/s390/kvm/kvm-s390.c
> @@ -2752,15 +2752,24 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu 
> *vcpu,
>  
>  int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>  {
> - if (test_fp_ctl(fpu->fpc))
> - return -EINVAL;
> + int ret = 0;
> +
> + vcpu_load(vcpu);
> +
> + if (test_fp_ctl(fpu->fpc)) {
> + ret = -EINVAL;
> + goto out;
> + }
>   vcpu->run->s.regs.fpc = fpu->fpc;
>   if (MACHINE_HAS_VX)
>   convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
>(freg_t *) fpu->fprs);
>   else
>   memcpy(vcpu->run->s.regs.fprs, >fprs, sizeof(fpu->fprs));
> - return 0;
> +
> +out:
> + vcpu_put(vcpu);
> + return ret;
>  }
>  
>  int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 8b54567..fd8b92f 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7699,8 +7699,11 @@ int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, 
> struct kvm_fpu *fpu)
>  
>  int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
>  {
> - struct fxregs_state *fxsave =
> - >arch.guest_fpu.state.fxsave;
> + struct fxregs_state *fxsave;
> +
> + vcpu_load(vcpu);
> +
> + fxsave = >arch.guest_fpu.state.fxsave;
>  
>   memcpy(fxsave->st_space, fpu->fpr, 128);
>   fxsave->cwd = fpu->fcw;
> @@ -7711,6 +7714,7 @@ int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, 
> struct kvm_fpu *fpu)
>   fxsave->rdp = fpu->last_dp;
>   memcpy(fxsave->xmm_space, fpu->xmm, sizeof fxsave->xmm_space);
>  
> + vcpu_put(vcpu);
>   return 0;
>  }
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 73ad70a..06751bb 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -2689,9 +2689,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
>   fpu = NULL;
>   goto out;
>   }
> - vcpu_load(vcpu);
>   r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
> - vcpu_put(vcpu);
>   break;
>   }
>   default:
> 

Reviewed-by: David Hildenbrand <da...@redhat.com>

-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 15/16] KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls than KVM_RUN

2017-11-29 Thread David Hildenbrand

> +++ b/virt/kvm/arm/arm.c
> @@ -381,14 +381,11 @@ static void vcpu_power_off(struct kvm_vcpu *vcpu)
>  int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
>   struct kvm_mp_state *mp_state)
>  {
> - vcpu_load(vcpu);
> -
>   if (vcpu->arch.power_off)
>   mp_state->mp_state = KVM_MP_STATE_STOPPED;
>   else
>   mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
>  
> - vcpu_put(vcpu);
>   return 0;
>  }

Okay, this also makes sense on other architectures. The important thing
is only that we hold the vcpu mutex.


-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 01/16] KVM: Take vcpu->mutex outside vcpu_load

2017-11-29 Thread David Hildenbrand
On 29.11.2017 17:41, Christoffer Dall wrote:
> As we're about to call vcpu_load() from architecture-specific
> implementations of the KVM vcpu ioctls, but yet we access data
> structures protected by the vcpu->mutex in the generic code, factor
> this logic out from vcpu_load().
> 
> Signed-off-by: Christoffer Dall 
> ---
>  arch/x86/kvm/vmx.c   |  4 +---
>  arch/x86/kvm/x86.c   | 20 +++-
>  include/linux/kvm_host.h |  2 +-
>  virt/kvm/kvm_main.c  | 17 ++---
>  4 files changed, 15 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 714a067..e7c46d2 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -9559,10 +9559,8 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, 
> struct loaded_vmcs *vmcs)
>  static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
>  {
> struct vcpu_vmx *vmx = to_vmx(vcpu);
> -   int r;
>  
> -   r = vcpu_load(vcpu);
> -   BUG_ON(r);
> +   vcpu_load(vcpu);
I am most likely missing something, why don't we have to take the lock
in these cases?

> vmx_switch_vmcs(vcpu, >vmcs01);
> free_nested(vmx);
> vcpu_put(vcpu);
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 34c85aa..9b8f864 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -7747,16 +7747,12 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
>  
>  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>  {
> - int r;
> -
>   kvm_vcpu_mtrr_init(vcpu);
> - r = vcpu_load(vcpu);
> - if (r)
> - return r;
> + vcpu_load(vcpu);
>   kvm_vcpu_reset(vcpu, false);
>   kvm_mmu_setup(vcpu);
>   vcpu_put(vcpu);
> - return r;
> + return 0;
>  }
>  
>  void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
> @@ -7766,13 +7762,15 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
>  
>   kvm_hv_vcpu_postcreate(vcpu);
>  
> - if (vcpu_load(vcpu))
> + if (mutex_lock_killable(>mutex))
>   return;
> + vcpu_load(vcpu);
>   msr.data = 0x0;
>   msr.index = MSR_IA32_TSC;
>   msr.host_initiated = true;
>   kvm_write_tsc(vcpu, );
>   vcpu_put(vcpu);
> + mutex_unlock(>mutex);
>  
>   if (!kvmclock_periodic_sync)
>   return;
> @@ -7783,11 +7781,9 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
>  
>  void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>  {
> - int r;
>   vcpu->arch.apf.msr_val = 0;
>  
> - r = vcpu_load(vcpu);
> - BUG_ON(r);
> + vcpu_load(vcpu);
>   kvm_mmu_unload(vcpu);
>   vcpu_put(vcpu);
>  
> @@ -8155,9 +8151,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long 
> type)
>  
>  static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
>  {
> - int r;
> - r = vcpu_load(vcpu);
> - BUG_ON(r);
> + vcpu_load(vcpu);
>   kvm_mmu_unload(vcpu);
>   vcpu_put(vcpu);
>  }
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 2e754b7..a000dd8 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -533,7 +533,7 @@ static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu)
>  int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
>  void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
>  
> -int __must_check vcpu_load(struct kvm_vcpu *vcpu);
> +void vcpu_load(struct kvm_vcpu *vcpu);
>  void vcpu_put(struct kvm_vcpu *vcpu);
>  
>  #ifdef __KVM_HAVE_IOAPIC
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index f169ecc..39961fb 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -146,17 +146,12 @@ bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
>  /*
>   * Switches to specified vcpu, until a matching vcpu_put()
>   */
> -int vcpu_load(struct kvm_vcpu *vcpu)
> +void vcpu_load(struct kvm_vcpu *vcpu)
>  {
> - int cpu;
> -
> - if (mutex_lock_killable(>mutex))
> - return -EINTR;
> - cpu = get_cpu();
> + int cpu = get_cpu();

missing empty line.

>   preempt_notifier_register(>preempt_notifier);
>   kvm_arch_vcpu_load(vcpu, cpu);
>   put_cpu();
> - return 0;
>  }
>  EXPORT_SYMBOL_GPL(vcpu_load);
>  
> @@ -166,7 +161,6 @@ void vcpu_put(struct kvm_vcpu *vcpu)
>   kvm_arch_vcpu_put(vcpu);
>   preempt_notifier_unregister(>preempt_notifier);
>   preempt_enable();
> - mutex_unlock(>mutex);
>  }
>  EXPORT_SYMBOL_GPL(vcpu_put);
>  
> @@ -2529,9 +2523,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
>  #endif
>  
>  
> - r = vcpu_load(vcpu);
> - if (r)
> - return r;
> + if (mutex_lock_killable(>mutex))
> + return -EINTR;
> + vcpu_load(vcpu);
>   switch (ioctl) {
>   case KVM_RUN: {
>   struct pid *oldpid;
> @@ -2704,6 +2698,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
>   }
>  out:
>   vcpu_put(vcpu);
> + mutex_unlock(>mutex);
>   kfree(fpu);
>   kfree(kvm_sregs);
>   

Re: [PATCH 00/15] Move vcpu_load and vcpu_put calls to arch code

2017-11-28 Thread David Hildenbrand
On 25.11.2017 21:57, Christoffer Dall wrote:
> Some architectures may decide to do different things during
> kvm_arch_vcpu_load depending on the ioctl being executed.  For example,
> arm64 is about to do significant work in vcpu load/put when running a
> vcpu, but it's problematic to do this for any other vcpu ioctl than
> KVM_RUN.
> 
> Further, while it may be possible to call kvm_arch_vcpu_load() for a
> number of non-KVM_RUN ioctls, it makes the KVM/ARM code more difficult
> to reason about, especially after my optimization series, because a lot
> of things can now happen, where we have to consider if we're really in
> the process of running a vcpu or not.
> 
> This series will first move the vcpu_load() and vcpu_put() calls in the
> arch generic dispatch function into each case of the switch statement
> and then, one-by-one, pushed the calls down into the architecture
> specific code making the changes for each ioctl as required.
> 
> Thanks,
> -Christoffer
> 
> Christoffer Dall (15):
>   KVM: Prepare for moving vcpu_load/vcpu_put into arch specific code
>   KVM: Factor out vcpu->pid adjustment for KVM_RUN
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_run
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_regs
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_regs
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_sregs
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_sregs
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_mpstate
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_mpstate
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_translate
>   KVM: Move vcpu_load to arch-specific
> kvm_arch_vcpu_ioctl_set_guest_debug
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_get_fpu
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl_set_fpu
>   KVM: Move vcpu_load to arch-specific kvm_arch_vcpu_ioctl
>   KVM: arm/arm64: Avoid vcpu_load for other vcpu ioctls than KVM_RUN
> 
>  arch/arm64/kvm/guest.c |  17 +--
>  arch/mips/kvm/mips.c   |  72 +++
>  arch/powerpc/kvm/book3s.c  |  38 +-
>  arch/powerpc/kvm/booke.c   |  65 +++-
>  arch/powerpc/kvm/powerpc.c |  24 ++---
>  arch/s390/kvm/kvm-s390.c   | 119 +---
>  arch/x86/kvm/x86.c | 121 
> ++---
>  include/linux/kvm_host.h   |   2 +
>  virt/kvm/arm/arm.c |  91 +-
>  virt/kvm/kvm_main.c|  43 +++-
>  10 files changed, 463 insertions(+), 129 deletions(-)
> 

Looking at the amount of code we duplicate, I wonder if simple ifdefery
(if possible) would be easier for the single known "special" case.

(most probably an unpopular opinion :) )

-- 

Thanks,

David / dhildenb
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: race-free exit from KVM_RUN without POSIX signals

2017-02-16 Thread David Hildenbrand
>   post_kvm_run_save(vcpu);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 7964b970b9ad..f51d5082a377 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -218,7 +218,8 @@ struct kvm_hyperv_exit {
>  struct kvm_run {
>   /* in */
>   __u8 request_interrupt_window;
> - __u8 padding1[7];
> + __u8 immediate_exit;

As mentioned already on IRC, maybe something like "block_vcpu_run" would
fit better now.

But this is also ok and looks good to me.

Reviewed-by: David Hildenbrand <da...@redhat.com>

-- 
Thanks,

David
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC 00/55] Nested Virtualization on KVM/ARM

2017-01-09 Thread David Hildenbrand



Even though this work is not complete (see limitations below), I'd appreciate
early feedback on this RFC. Specifically, I'm interested in:
- Is it better to have a kernel config or to make it configurable at runtime?


x86 and s390x have a kernel module parameter (nested) that can only be
changed when loading the module and should default to false. So the
admin explicitly has to enable it. Maybe going the same path makes
sense.

--

David
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v4 02/12] KVM: define common KVM_GUESTDBG_USE_SW/HW_BP bits

2015-05-15 Thread David Hildenbrand
 Am 15.05.2015 um 16:27 schrieb Alex Bennée:
  +++ b/arch/s390/include/uapi/asm/kvm.h
  @@ -114,8 +114,6 @@ struct kvm_fpu {
  __u64 fprs[16];
   };
  
  -#define KVM_GUESTDBG_USE_HW_BP 0x0001
 [...]
  +++ b/include/uapi/linux/kvm.h
 [...]
  +#define KVM_GUESTDBG_USE_SW_BP (1  16)
  +#define KVM_GUESTDBG_USE_HW_BP (1  17)
 
 This is an ABI break for s390, no?
 
 David, do you remember why we do not use KVM_GUESTDBG_USE_SW_BP?
 

We never had to tell the kernel about software breakpoints as this is all
handled via 4 byte DIAG instructions until now. We don't have to turn this
mechanism on. QEMU can directly insert the desired DIAG instructions and gets
notified when they are about to get executed.

(But we still have 2 byte breakpoint support todo - still tbd how exactly this
will be realized - could be turned on via such a mechanism)

The problem is, that these bits are arch specific, now Alex wants to unify
them for all archs.

So yes, this is an ABI break for us and breaks hardware breakpoints.(I think
the first version of this patch didn't contain this ABI break when I had a look)

I wonder if it wouldn't make more sense to

- introduce new bits in the arch-unspecific section
- rework the existing implementers to accept both bits

Or to simply leave stuff as it is and handle it via arch specific bits.

David

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 04/10] KVM: arm: guest debug, add stub KVM_SET_GUEST_DEBUG ioctl

2015-04-14 Thread David Hildenbrand
 On Tue, Mar 31, 2015 at 04:08:02PM +0100, Alex Bennée wrote:
  This commit adds a stub function to support the KVM_SET_GUEST_DEBUG
  ioctl. Currently any operation flag will return EINVAL. Actual
  functionality will be added with further patches.
  
  Signed-off-by: Alex Bennée alex.ben...@linaro.org.
  
  ---
  v2
- simplified form of the ioctl (stuff will go into setup_debug)
  
  diff --git a/Documentation/virtual/kvm/api.txt 
  b/Documentation/virtual/kvm/api.txt
  index b112efc..06c5064 100644
  --- a/Documentation/virtual/kvm/api.txt
  +++ b/Documentation/virtual/kvm/api.txt
  @@ -2604,7 +2604,7 @@ handled.
   4.87 KVM_SET_GUEST_DEBUG
   
   Capability: KVM_CAP_SET_GUEST_DEBUG
  -Architectures: x86, s390, ppc
  +Architectures: x86, s390, ppc, arm64
   Type: vcpu ioctl
   Parameters: struct kvm_guest_debug (in)
   Returns: 0 on success; -1 on error
  diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
  index 5560f74..445933d 100644
  --- a/arch/arm/kvm/arm.c
  +++ b/arch/arm/kvm/arm.c
  @@ -183,6 +183,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
  ext)
  case KVM_CAP_ARM_PSCI:
  case KVM_CAP_ARM_PSCI_0_2:
  case KVM_CAP_READONLY_MEM:
  +   case KVM_CAP_SET_GUEST_DEBUG:
  r = 1;
  break;
 
 shouldn't you wait with advertising this capability until you've
 implemented support for it?
 

I think this would work for now, however it's not very practical
- in the end one has to sense which debug flags are actually supported.

Question is if he wants to add initial support and extend functionality and
flags with each patch or enable the whole set of features in one shot at the
end.

Doing the latter seems more practicable to me (especially as the debug features
are added in the same patch series).

David

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 05/10] KVM: arm: introduce kvm_arch_setup/clear_debug()

2015-04-01 Thread David Hildenbrand
 This is a precursor for later patches which will need to do more to
 setup debug state before entering the hyp.S switch code. The existing
 functionality for setting mdcr_el2 has been moved out of hyp.S and now
 uses the value kept in vcpu-arch.mdcr_el2.
 
 This also moves the conditional setting of the TDA bit from the hyp code
 into the C code.
 
 Signed-off-by: Alex Bennée alex.ben...@linaro.org
 
  create mode 100644 arch/arm64/kvm/debug.c
 
 diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
 index 41008cd..8c01c97 100644
 --- a/arch/arm/include/asm/kvm_host.h
 +++ b/arch/arm/include/asm/kvm_host.h
 @@ -242,5 +242,7 @@ static inline void kvm_arch_hardware_unsetup(void) {}
  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 +static inline void kvm_arch_setup_debug(struct kvm_vcpu *vcpu) {}
 +static inline void kvm_arch_clear_debug(struct kvm_vcpu *vcpu) {}

Do you really want to call these functions kvm_arch.. although they are not
defined for other arch and not triggered by common code?

kvm_setup ... or kvm_arm_setup...

 
  #endif /* __ARM_KVM_HOST_H__ */
 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
 index 445933d..7ea8b0e 100644
 --- a/arch/arm/kvm/arm.c
[...]
 +#include linux/kvm_host.h
 +
 +#include asm/kvm_arm.h
 +#include asm/kvm_host.h
 +
 +/**
 + * kvm_arch_setup_debug - set-up debug related stuff
 + *
 + * @vcpu:the vcpu pointer
 + *
 + * This is called before each entry in to the hypervisor to setup any
 + * debug related registers. Currently this just ensures we will trap
 + * access to:
 + *  - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
 + *  - Debug ROM Address (MDCR_EL2_TDRA)
 + *  - Power down debug registers (MDCR_EL2_TDOSA)
 + *
 + * Additionally the hypervisor lazily saves/restores the debug
 + * register state. If it is not currently doing so (arch.debug_flags)
 + * then we also need to ensure we trap if the guest messes with them
 + * so we know we need to save them.
 + */
 +
 +void kvm_arch_setup_debug(struct kvm_vcpu *vcpu)
 +{
 + vcpu-arch.mdcr_el2 |= (MDCR_EL2_TPM | MDCR_EL2_TPMCR);
 + vcpu-arch.mdcr_el2 |= (MDCR_EL2_TDRA | MDCR_EL2_TDOSA);

I'd put that into a single assignment.

 +
 + if (!vcpu-arch.debug_flags  KVM_ARM64_DEBUG_DIRTY)
 + vcpu-arch.mdcr_el2 |= MDCR_EL2_TDA;
 + else
 + vcpu-arch.mdcr_el2 = ~MDCR_EL2_TDA;
 +
 +}
 +
 +void kvm_arch_clear_debug(struct kvm_vcpu *vcpu)
 +{
 + /* Nothing to do yet */

Wonder if that would be the right place to clear MDCR_EL2_TDA unconditionally?
But see my comment below.

 +}
 diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
 index 5befd01..be92bfe1 100644
 --- a/arch/arm64/kvm/hyp.S
 +++ b/arch/arm64/kvm/hyp.S
 @@ -768,17 +768,8 @@
   mov x2, #(1  15)  // Trap CP15 Cr=15
   msr hstr_el2, x2
 
 - mrs x2, mdcr_el2
 - and x2, x2, #MDCR_EL2_HPMN_MASK
 - orr x2, x2, #(MDCR_EL2_TPM | MDCR_EL2_TPMCR)
 - orr x2, x2, #(MDCR_EL2_TDRA | MDCR_EL2_TDOSA)
 -
 - // Check for KVM_ARM64_DEBUG_DIRTY, and set debug to trap
 - // if not dirty.
 - ldr x3, [x0, #VCPU_DEBUG_FLAGS]
 - tbnzx3, #KVM_ARM64_DEBUG_DIRTY_SHIFT, 1f
 - orr x2, x2,  #MDCR_EL2_TDA
 -1:

*neither an assembler nor arm expert*
The existing code always cleared all bits except MDCR_EL2_HPMN_MASK. So they
remained set.

We would now always overwrite these bits with the values from 
vcpu-arch.mdcr_el2, is that okay?

 + // Monitor Debug Config - see kvm_arch_setup_debug()
 + ldr x2, [x0, #VCPU_MDCR_EL2]
   msr mdcr_el2, x2
  .endm
 



David

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm