[PATCH v7.1 9/9] KVM: arm64: Add support for creating PUD hugepages at stage 2

2018-09-25 Thread Punit Agrawal
KVM only supports PMD hugepages at stage 2. Now that the various page
handling routines are updated, extend the stage 2 fault handling to
map in PUD hugepages.

Addition of PUD hugepage support enables additional page sizes (e.g.,
1G with 4K granule) which can be useful on cores that support mapping
larger block sizes in the TLB entries.

Signed-off-by: Punit Agrawal 
Cc: Christoffer Dall 
Cc: Marc Zyngier 
Cc: Russell King 
Cc: Catalin Marinas 
Cc: Will Deacon 
---

v7 -> v7.1

* Added arm helper kvm_stage2_has_pud()
* Added check for PUD level present at stage 2
* Dropped redundant comment
* Fixed up kvm_pud_mkhuge() to complain on arm

 arch/arm/include/asm/kvm_mmu.h |  20 +
 arch/arm/include/asm/stage2_pgtable.h  |   5 ++
 arch/arm64/include/asm/kvm_mmu.h   |  16 
 arch/arm64/include/asm/pgtable-hwdef.h |   2 +
 arch/arm64/include/asm/pgtable.h   |   2 +
 virt/kvm/arm/mmu.c | 106 +++--
 6 files changed, 145 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index a42b9505c9a7..da5f078ae68c 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -84,11 +84,14 @@ void kvm_clear_hyp_idmap(void);
 
 #define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
 #define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
+#define kvm_pfn_pud(pfn, prot) (__pud(0))
 
 #define kvm_pud_pfn(pud)   ({ BUG(); 0; })
 
 
 #define kvm_pmd_mkhuge(pmd)pmd_mkhuge(pmd)
+/* No support for pud hugepages */
+#define kvm_pud_mkhuge(pud)( {BUG(); pud; })
 
 /*
  * The following kvm_*pud*() functions are provided strictly to allow
@@ -105,6 +108,23 @@ static inline bool kvm_s2pud_readonly(pud_t *pud)
return false;
 }
 
+static inline void kvm_set_pud(pud_t *pud, pud_t new_pud)
+{
+   BUG();
+}
+
+static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
+{
+   BUG();
+   return pud;
+}
+
+static inline pud_t kvm_s2pud_mkexec(pud_t pud)
+{
+   BUG();
+   return pud;
+}
+
 static inline bool kvm_s2pud_exec(pud_t *pud)
 {
BUG();
diff --git a/arch/arm/include/asm/stage2_pgtable.h 
b/arch/arm/include/asm/stage2_pgtable.h
index f6a7ea805232..ec1567d9eb4b 100644
--- a/arch/arm/include/asm/stage2_pgtable.h
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -68,4 +68,9 @@ stage2_pmd_addr_end(struct kvm *kvm, phys_addr_t addr, 
phys_addr_t end)
 #define stage2_pmd_table_empty(kvm, pmdp)  kvm_page_empty(pmdp)
 #define stage2_pud_table_empty(kvm, pudp)  false
 
+static inline bool kvm_stage2_has_pud(struct kvm *kvm)
+{
+   return KVM_VTCR_SL0 == VTCR_SL_L1;
+}
+
 #endif /* __ARM_S2_PGTABLE_H_ */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 3baf72705dcc..b4e9c2cceecb 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -184,12 +184,16 @@ void kvm_clear_hyp_idmap(void);
 #define kvm_mk_pgd(pudp)   \
__pgd(__phys_to_pgd_val(__pa(pudp)) | PUD_TYPE_TABLE)
 
+#define kvm_set_pud(pudp, pud) set_pud(pudp, pud)
+
 #define kvm_pfn_pte(pfn, prot) pfn_pte(pfn, prot)
 #define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
+#define kvm_pfn_pud(pfn, prot) pfn_pud(pfn, prot)
 
 #define kvm_pud_pfn(pud)   pud_pfn(pud)
 
 #define kvm_pmd_mkhuge(pmd)pmd_mkhuge(pmd)
+#define kvm_pud_mkhuge(pud)pud_mkhuge(pud)
 
 static inline pte_t kvm_s2pte_mkwrite(pte_t pte)
 {
@@ -203,6 +207,12 @@ static inline pmd_t kvm_s2pmd_mkwrite(pmd_t pmd)
return pmd;
 }
 
+static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
+{
+   pud_val(pud) |= PUD_S2_RDWR;
+   return pud;
+}
+
 static inline pte_t kvm_s2pte_mkexec(pte_t pte)
 {
pte_val(pte) &= ~PTE_S2_XN;
@@ -215,6 +225,12 @@ static inline pmd_t kvm_s2pmd_mkexec(pmd_t pmd)
return pmd;
 }
 
+static inline pud_t kvm_s2pud_mkexec(pud_t pud)
+{
+   pud_val(pud) &= ~PUD_S2_XN;
+   return pud;
+}
+
 static inline void kvm_set_s2pte_readonly(pte_t *ptep)
 {
pteval_t old_pteval, pteval;
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h 
b/arch/arm64/include/asm/pgtable-hwdef.h
index 10ae592b78b8..e327665e94d1 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -193,6 +193,8 @@
 #define PMD_S2_RDWR(_AT(pmdval_t, 3) << 6)   /* HAP[2:1] */
 #define PMD_S2_XN  (_AT(pmdval_t, 2) << 53)  /* XN[1:0] */
 
+#define PUD_S2_RDONLY  (_AT(pudval_t, 1) << 6)   /* HAP[2:1] */
+#define PUD_S2_RDWR(_AT(pudval_t, 3) << 6)   /* HAP[2:1] */
 #define PUD_S2_XN  (_AT(pudval_t, 2) << 53)  /* XN[1:0] */
 
 /*
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 4d9476e420d9..0afc34f94ff5 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -389,6 +389,8 @@ static inline int pmd_protnone(pmd_t 

Re: [PATCH v6 00/18] APEI in_nmi() rework

2018-09-25 Thread Borislav Petkov
On Fri, Sep 21, 2018 at 11:16:47PM +0100, James Morse wrote:
> Hello,
> 
> The GHES driver has collected quite a few bugs:
> 
> ghes_proc() at ghes_probe() time can be interrupted by an NMI that
> will clobber the ghes->estatus fields, flags, and the buffer_paddr.
> 
> ghes_copy_tofrom_phys() uses in_nmi() to decide which path to take. arm64's
> SEA taking both paths, depending on what it interrupted.
> 
> There is no guarantee that queued memory_failure() errors will be processed
> before this CPU returns to user-space.
> 
> x86 can't TLBI from interrupt-masked code which this driver does all the
> time.
> 
> 
> This series aims to fix the first three, with an eye to fixing the
> last one with a follow-up series.
> 
> Previous postings included the SDEI notification calls, which I haven't
> finished re-testing. This series is big enough as it is.

Yeah, and everywhere I look, this thing looks overengineered. Like,
for example, what's the purpose of this ghes_esource_prealloc_size()
computing a size each time the pool changes size?

AFAICT, this size can be computed exactly *once* at driver init and be
done with it. Right?

Or am I missing something subtle?

-- 
Regards/Gruss,
Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 11/18] kvm: arm64: Dynamic configuration of VTTBR mask

2018-09-25 Thread Auger Eric
Hi Suzuki,

On 9/20/18 5:22 PM, Suzuki K Poulose wrote:
> 
> 
> On 20/09/18 15:07, Auger Eric wrote:
>> Hi Suzuki,
>> On 9/17/18 12:41 PM, Suzuki K Poulose wrote:
>>> On arm64 VTTBR_EL2:BADDR holds the base address for the stage2
>>> translation table. The Arm ARM mandates that the bits BADDR[x-1:0]
>>> should be 0, where 'x' is defined for a given IPA Size and the
>>> number of levels for a translation granule size. It is defined
>>> using some magical constants. This patch is a reverse engineered
>>> implementation to calculate the 'x' at runtime for a given ipa and
>>> number of page table levels. See patch for more details.
>>>
>>> Cc: Marc Zyngier 
>>> Cc: Christoffer Dall 
>>> Signed-off-by: Suzuki K Poulose 
>>
>>> ---
>>> Changes since V3:
>>>   - Update reference to latest ARM ARM and improve commentary
>>> ---
>>>   arch/arm64/include/asm/kvm_arm.h | 63 +---
>>>   arch/arm64/include/asm/kvm_mmu.h | 25 -
>>>   2 files changed, 81 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/arch/arm64/include/asm/kvm_arm.h
>>> b/arch/arm64/include/asm/kvm_arm.h
>>> index 14317b3a1820..3fb1d440be6e 100644
>>> --- a/arch/arm64/include/asm/kvm_arm.h
>>> +++ b/arch/arm64/include/asm/kvm_arm.h
>>> @@ -123,7 +123,6 @@
>>>   #define VTCR_EL2_SL0_MASK  (3 << VTCR_EL2_SL0_SHIFT)
>>>   #define VTCR_EL2_SL0_LVL1  (1 << VTCR_EL2_SL0_SHIFT)
>>>   #define VTCR_EL2_T0SZ_MASK 0x3f
>>> -#define VTCR_EL2_T0SZ_40B   24
>>>   #define VTCR_EL2_VS_SHIFT  19
>>>   #define VTCR_EL2_VS_8BIT   (0 << VTCR_EL2_VS_SHIFT)
>>>   #define VTCR_EL2_VS_16BIT  (1 << VTCR_EL2_VS_SHIFT)
>>> @@ -140,11 +139,8 @@
>>>    * Note that when using 4K pages, we concatenate two first level
>>> page tables
>>>    * together. With 16K pages, we concatenate 16 first level page
>>> tables.
>>>    *
>>> - * The magic numbers used for VTTBR_X in this patch can be found in
>>> Tables
>>> - * D4-23 and D4-25 in ARM DDI 0487A.b.
>>>    */
>>>
>>> -#define VTCR_EL2_T0SZ_IPA   VTCR_EL2_T0SZ_40B
>>>   #define VTCR_EL2_COMMON_BITS   (VTCR_EL2_SH0_INNER |
>>> VTCR_EL2_ORGN0_WBWA | \
>>>   VTCR_EL2_IRGN0_WBWA | VTCR_EL2_RES1)
>>>
>>> @@ -175,9 +171,64 @@
>>>   #endif
>>>
>>>   #define VTCR_EL2_FLAGS (VTCR_EL2_COMMON_BITS |
>>> VTCR_EL2_TGRAN_FLAGS)
>>> -#define VTTBR_X (VTTBR_X_TGRAN_MAGIC -
>>> VTCR_EL2_T0SZ_IPA)
>>> +/*
>>> + * ARM VMSAv8-64 defines an algorithm for finding the translation table
>>> + * descriptors in section D4.2.8 in ARM DDI 0487C.a.
>>> + *
>>> + * The algorithm defines the expectations on the BaseAddress (for
>>> the page
>>> + * table) bits resolved at each level based on the page size, entry
>>> level
>>> + * and T0SZ. The variable "x" in the algorithm also affects the
>>> VTTBR:BADDR
>>> + * for stage2 page table.
>>> + *
>>> + * The value of "x" is calculated as :
>>> + *  x = Magic_N - T0SZ
>>
>> What is not crystal clear to me is the "if SL0b,c = n" case where x get
>> a value not based on Magic_N. Please could you explain why it is not
>> relevant?
> 
> We only care about the "x" for the "entry" level of the table look up
> to make sure that the VTTBR is physical address meets the required
> alignment. In both cases, if SL0 b,c == n, x is (PAGE_SHIFT) iff the
> level you are looking at is not the "entry level". So this should always
> be page aligned, like any intermediate level table.

Oh OK I get it now.
> 
> The Magic value is needed only needed for the "entry" level due to the
> fact that we may have lesser bits to resolve (i.e, depending on your
> PAMax or in other words T0SZ) than the intermediate levels (where we
> always resolve {PAGE_SHIFT - 3} bits. This is further complicated by the
> fact that Stage2 could use different number of levels for a given T0SZ
> than the stage1.
> I acknowledge that the algorithm is a bit too cryptic and I spent quite
> sometime decode it to the formula we use below ;-).
> 
> I could update the comment to :
> 
> /*
>  * ARM VMSAv8-64 defines an algorithm for finding the translation table
>  * descriptors in section D4.2.8 in ARM DDI 0487C.a.
>  *
>  * The algorithm defines the expectations on the translation table
>  * addresses for each level, based on PAGE_SIZE, entry level
>  * and the translation table size (T0SZ). The variable "x" in the
>  * algorithm determines the alignment of a table base address at a given
>  * level and thus determines the alignment of VTTBR:BADDR for stage2
>  * page table entry level.
>  * Since the number of bits resolved at the entry level could vary
>  * depending on the T0SZ, the value of "x" is defined based on a
>  * Magic constant for a given PAGE_SIZE and Entry Level. The
>  * intermediate levels must be always aligned to the PAGE_SIZE (i.e,
>  * x = PAGE_SHIFT).
>  *
>  * The value of "x" for entry level is calculated as :
>  * x = Magic_N - T0SZ
>  *
Looks OK.

Thank you for the explanation.

Eric
> 
> ...
> 
> Suzuki
> IMPORTANT NOTICE: 

Re: [PATCH v5 16/18] kvm: arm64: Set a limit on the IPA size

2018-09-25 Thread Suzuki K Poulose

On 09/25/2018 10:59 AM, Auger Eric wrote:

Hi Suzuki,

On 9/17/18 12:41 PM, Suzuki K Poulose wrote:

So far we have restricted the IPA size of the VM to the default
value (40bits). Now that we can manage the IPA size per VM and
support dynamic stage2 page tables, we can allow VMs to have
larger IPA. This patch introduces a the maximum IPA size
supported on the host.

to be reworded
  This is decided by the following factors :


Sure



diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 51ecf0f7c912..76972b19bdd7 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -34,6 +34,9 @@
  #include 
  #include 
  
+/* Maximum phys_shift supported for any VM on this host */

+static u32 kvm_ipa_limit;
+
  /*
   * ARMv8 Reset Values
   */
@@ -135,6 +138,46 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
return kvm_timer_vcpu_reset(vcpu);
  }
  
+void kvm_set_ipa_limit(void)

+{
+   unsigned int ipa_max, va_max, parange;
+
+   parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7;
+   ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
+
+   /* Raise the limit to the default size for backward compatibility */
+   if (ipa_max < KVM_PHYS_SHIFT) {
+   WARN_ONCE(1,
+ "PARange is %d bits, unsupported configuration!",
+ ipa_max);
+   ipa_max = KVM_PHYS_SHIFT;

I don't really get what does happen in this case. The CPU cannot handle
PA up to ipa_max so can the VM run properly? In case it is a
showstopper, kvm_set_ipa_limit should return an error, cascaded by
init_common_resources. Otherwise the warning message may be reworded.


I think this was a warning added to warn against the older
Foundation model which had a 36bit PA size. So the VTCR was progammed
with a 36bit limit, while the KVM guest was allowed to create 40bit
IPA space, though it wouldn't fly well if someone tried to.

With this series, I think we may expose the real IPA_MAX (which could
be < 40bit) and warn the user if someone tried to create a VM with
40bit IPA (vm_type == 0) and let the call succeed (for the sake of ABI).

Marc, Christoffer, Eric

Thoughts ?


+   }
+
+   /* Clamp it to the PA size supported by the kernel */
+   ipa_max = (ipa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : ipa_max;
+   /*
+* Since our stage2 table is dependent on the stage1 page table code,
+* we must always honor the following condition:
+*
+*  Number of levels in Stage1 >= Number of levels in Stage2.
+*
+* So clamp the ipa limit further down to limit the number of levels.
+* Since we can concatenate upto 16 tables at entry level, we could
+* go upto 4bits above the maximum VA addressible with the current

addressable?


Sure


+* number of levels.
+*/
+   va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
+   va_max += 4;
+
+   if (va_max < ipa_max) {
+   kvm_info("Limiting IPA limit to %dbytes due to host VA bits 
limitation\n",
+va_max);
+   ipa_max = va_max;

you have a trace for this limitation but none for the comparison against
PHYS_MASK_SHIFT.


May be I could add a message which only mentions what is the limiting
factor kernel VA vs kernel PA support


+   }
+
+   kvm_ipa_limit = ipa_max;
+}
+
  /*
   * Configure the VTCR_EL2 for this VM. The VTCR value is common
   * across all the physical CPUs on the system. We use system wide
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 43e716bc3f08..631f9a3ad99a 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1413,6 +1413,8 @@ static int init_common_resources(void)
kvm_vmid_bits = kvm_get_vmid_bits();
kvm_info("%d-bit VMID\n", kvm_vmid_bits);
  
+	kvm_set_ipa_limit();

As we have a kvm_info for the supported vmid_bits, may be good to output
the max IPA size supported by the host whatever the applied clamps?


Sure, will do that.

Thanks
Suzuki
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 14/18] vgic: Add support for 52bit guest physical address

2018-09-25 Thread Suzuki K Poulose

Hi Eric

On 09/21/2018 03:57 PM, Auger Eric wrote:

Hi Suzuki,

On 9/17/18 12:41 PM, Suzuki K Poulose wrote:

From: Kristina Martsenko 

Add support for handling 52bit guest physical address to the
VGIC layer. So far we have limited the guest physical address
to 48bits, by explicitly masking the upper bits. This patch
removes the restriction. We do not have to check if the host
supports 52bit as the gpa is always validated during an access.
(e.g, kvm_{read/write}_guest, kvm_is_visible_gfn()).
Also, the ITS table save-restore is also not affected with
the enhancement. The DTE entries already store the bits[51:8]
of the ITT_addr (with a 256byte alignment).

Cc: Marc Zyngier 
Cc: Christoffer Dall 
Signed-off-by: Kristina Martsenko 
[ Macro clean ups, fix PROPBASER and PENDBASER accesses ]
Signed-off-by: Suzuki K Poulose 
---
  include/linux/irqchip/arm-gic-v3.h |  5 +
  virt/kvm/arm/vgic/vgic-its.c   | 36 +-
  virt/kvm/arm/vgic/vgic-mmio-v3.c   |  2 --
  3 files changed, 15 insertions(+), 28 deletions(-)

diff --git a/include/linux/irqchip/arm-gic-v3.h 
b/include/linux/irqchip/arm-gic-v3.h
index 8bdbb5f29494..e961f40992d7 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -357,6 +357,8 @@
  #define GITS_CBASER_RaWaWtGIC_BASER_CACHEABILITY(GITS_CBASER, INNER, 
RaWaWt)
  #define GITS_CBASER_RaWaWbGIC_BASER_CACHEABILITY(GITS_CBASER, INNER, 
RaWaWb)
  
+#define GITS_CBASER_ADDRESS(cbaser)	((cbaser) & GENMASK_ULL(52, 12))

nit GENMASK_ULL(51, 12), bit 52 is RES0


I will fix this.


diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c
index a2a175b08b17..b3d1f0985117 100644
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@@ -364,7 +364,6 @@ static u64 vgic_sanitise_pendbaser(u64 reg)
  vgic_sanitise_outer_an);
  
  	reg &= ~PENDBASER_RES0_MASK;

-   reg &= ~GENMASK_ULL(51, 48);
  
  	return reg;

  }
@@ -382,7 +381,6 @@ static u64 vgic_sanitise_propbaser(u64 reg)
  vgic_sanitise_outer_cacheability);
  
  	reg &= ~PROPBASER_RES0_MASK;

-   reg &= ~GENMASK_ULL(51, 48);
return reg;
  }
  


Besides looks good to me.
Reviewed-by: Eric Auger 


Thanks
Suzuki
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 17/18] kvm: arm64: Limit the minimum number of page table levels

2018-09-25 Thread Suzuki K Poulose

On 09/25/2018 11:00 AM, Auger Eric wrote:

Hi Suzuki,

On 9/17/18 12:41 PM, Suzuki K Poulose wrote:

Since we are about to remove the lower limit on the IPA size,
make sure that we do not go to 1 level page table (e.g, with
32bit IPA on 64K host with concatenation) to avoid splitting
the host PMD huge pages at stage2.

Cc: Marc Zyngier 
Cc: Christoffer Dall 
Signed-off-by: Suzuki K Poulose 
---
  arch/arm64/include/asm/stage2_pgtable.h |  8 +++-
  arch/arm64/kvm/reset.c  | 12 +++-
  2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/stage2_pgtable.h 
b/arch/arm64/include/asm/stage2_pgtable.h
index 352ec4158fdf..6a56fdff0823 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -72,8 +72,14 @@
  /*
   * The number of PTRS across all concatenated stage2 tables given by the
   * number of bits resolved at the initial level.
+ * If we force more number of levels than necessary, we may have

more levels?




diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 76972b19bdd7..0393bb974b23 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c



@@ -210,7 +219,8 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long type)
vtcr |= (kvm_get_vmid_bits() == 16) ?
VTCR_EL2_VS_16BIT :
VTCR_EL2_VS_8BIT;
-   vtcr |= VTCR_EL2_LVLS_TO_SL0(stage2_pgtable_levels(KVM_PHYS_SHIFT));
+

nit: new line not requested



Fixed all the above

Suzuki
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 18/18] kvm: arm64: Allow tuning the physical address size for VM

2018-09-25 Thread Suzuki K Poulose

On 09/25/2018 11:00 AM, Auger Eric wrote:

Hi Suzuki,
On 9/17/18 12:41 PM, Suzuki K Poulose wrote:

Allow specifying the physical address size limit for a new
VM via the kvm_type argument for the KVM_CREATE_VM ioctl. This
allows us to finalise the stage2 page table as early as possible
and hence perform the right checks on the memory slots
without complication. The size is ecnoded as Log2(PA_Size) in

encoded


...



diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index c664064f76fb..f860251ff27c 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -122,6 +122,14 @@ the default trap & emulate implementation (which changes 
the virtual
  memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
  flag KVM_VM_MIPS_VZ.
  
+To configure the physical address space size for a VM (IPA size) on arm64,

+check KVM_CAP_ARM_VM_PHYS_SHIFT (which returns the maximum limit for the
+IPA shift) and use KVM_VM_TYPE_ARM_PHYS_SHIFT(PHYS_SHIFT). Bits[7-0] of the
+machine type has been reserved for specifying the PHYS_SHIFT.

are reserved to pass the PHYS_SHIFT?

+The supported range is [32...IPA_LIMIT], where IPA_LIMIT could be

s/could be/is





diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 07548de5c988..2a6b29c446db 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -750,6 +750,15 @@ struct kvm_ppc_resize_hpt {
  
  #define KVM_S390_SIE_PAGE_OFFSET 1
  
+/*

+ * On arm64, machine type can be used to request the physical
+ * address size for the VM. Bits[7-0] has been reserved for the PA

s/has been reserved/are?


Thanks for spotting, fixed all the above.

Suzuki
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 17/18] kvm: arm64: Limit the minimum number of page table levels

2018-09-25 Thread Auger Eric
Hi Suzuki,

On 9/17/18 12:41 PM, Suzuki K Poulose wrote:
> Since we are about to remove the lower limit on the IPA size,
> make sure that we do not go to 1 level page table (e.g, with
> 32bit IPA on 64K host with concatenation) to avoid splitting
> the host PMD huge pages at stage2.
> 
> Cc: Marc Zyngier 
> Cc: Christoffer Dall 
> Signed-off-by: Suzuki K Poulose 
> ---
>  arch/arm64/include/asm/stage2_pgtable.h |  8 +++-
>  arch/arm64/kvm/reset.c  | 12 +++-
>  2 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/stage2_pgtable.h 
> b/arch/arm64/include/asm/stage2_pgtable.h
> index 352ec4158fdf..6a56fdff0823 100644
> --- a/arch/arm64/include/asm/stage2_pgtable.h
> +++ b/arch/arm64/include/asm/stage2_pgtable.h
> @@ -72,8 +72,14 @@
>  /*
>   * The number of PTRS across all concatenated stage2 tables given by the
>   * number of bits resolved at the initial level.
> + * If we force more number of levels than necessary, we may have
more levels?
> + * stage2_pgdir_shift > IPA, in which case, stage2_pgd_ptrs will have
> + * one entry.
>   */
> -#define __s2_pgd_ptrs(ipa, lvls) (1 << ((ipa) - 
> pt_levels_pgdir_shift((lvls
> +#define pgd_ptrs_shift(ipa, pgdir_shift) \
> + ((ipa) > (pgdir_shift) ? ((ipa) - (pgdir_shift)) : 0)
> +#define __s2_pgd_ptrs(ipa, lvls) \
> + (1 << (pgd_ptrs_shift((ipa), pt_levels_pgdir_shift(lvls
>  #define __s2_pgd_size(ipa, lvls) (__s2_pgd_ptrs((ipa), (lvls)) * 
> sizeof(pgd_t))
>  
>  #define stage2_pgd_ptrs(kvm) __s2_pgd_ptrs(kvm_phys_shift(kvm), 
> kvm_stage2_levels(kvm))
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 76972b19bdd7..0393bb974b23 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -190,10 +190,19 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long 
> type)
>  {
>   u64 vtcr = VTCR_EL2_FLAGS;
>   u64 parange;
> + u8 lvls;
>  
>   if (type)
>   return -EINVAL;
>  
> + /*
> +  * Use a minimum 2 level page table to prevent splitting
> +  * host PMD huge pages at stage2.
> +  */
> + lvls = stage2_pgtable_levels(KVM_PHYS_SHIFT);
> + if (lvls < 2)
> + lvls = 2;
> +
>   parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7;
>   if (parange > ID_AA64MMFR0_PARANGE_MAX)
>   parange = ID_AA64MMFR0_PARANGE_MAX;
> @@ -210,7 +219,8 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long type)
>   vtcr |= (kvm_get_vmid_bits() == 16) ?
>   VTCR_EL2_VS_16BIT :
>   VTCR_EL2_VS_8BIT;
> - vtcr |= VTCR_EL2_LVLS_TO_SL0(stage2_pgtable_levels(KVM_PHYS_SHIFT));
> +
nit: new line not requested

Thanks

Eric
> + vtcr |= VTCR_EL2_LVLS_TO_SL0(lvls);
>   vtcr |= VTCR_EL2_T0SZ(KVM_PHYS_SHIFT);
>  
>   kvm->arch.vtcr = vtcr;
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v5 18/18] kvm: arm64: Allow tuning the physical address size for VM

2018-09-25 Thread Auger Eric
Hi Suzuki,
On 9/17/18 12:41 PM, Suzuki K Poulose wrote:
> Allow specifying the physical address size limit for a new
> VM via the kvm_type argument for the KVM_CREATE_VM ioctl. This
> allows us to finalise the stage2 page table as early as possible
> and hence perform the right checks on the memory slots
> without complication. The size is ecnoded as Log2(PA_Size) in
encoded
> bits[7:0] of the type field. For backward compatibility the
> value 0 is reserved and implies 40bits. Also, lift the limit
> of the IPA to host limit and allow lower IPA sizes (e.g, 32).
> 
> The userspace could check the extension KVM_CAP_ARM_VM_PHYS_SHIFT
> for the availability of this feature. The cap check returns the
> maximum limit for the physical address shift supported by the host.
> 
> Cc: Marc Zyngier 
> Cc: Christoffer Dall 
> Cc: Peter Maydel 
> Cc: Paolo Bonzini 
> Cc: Radim Krčmář 
> Signed-off-by: Suzuki K Poulose 
> ---
> Changes since v4:
>  - Fold the introduction of the KVM_CAP_ARM_VM_PHYS_SHIFT to this
>patch to allow detection of the availability of the feature for
>userspace.
>  - Document the API
>  - Restrict the feature only to arm64.
> Changes since V3:
>  - Switch to a CAP, that can be checkd via EXTENSIONS on KVM device
>fd, rather than a dedicated ioctl.
> ---
>  Documentation/virtual/kvm/api.txt   |  8 
>  arch/arm64/include/asm/stage2_pgtable.h | 20 
>  arch/arm64/kvm/reset.c  | 20 
>  include/uapi/linux/kvm.h| 10 ++
>  4 files changed, 34 insertions(+), 24 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index c664064f76fb..f860251ff27c 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -122,6 +122,14 @@ the default trap & emulate implementation (which changes 
> the virtual
>  memory layout to fit in user mode), check KVM_CAP_MIPS_VZ and use the
>  flag KVM_VM_MIPS_VZ.
>  
> +To configure the physical address space size for a VM (IPA size) on arm64,
> +check KVM_CAP_ARM_VM_PHYS_SHIFT (which returns the maximum limit for the
> +IPA shift) and use KVM_VM_TYPE_ARM_PHYS_SHIFT(PHYS_SHIFT). Bits[7-0] of the
> +machine type has been reserved for specifying the PHYS_SHIFT.
are reserved to pass the PHYS_SHIFT?
> +The supported range is [32...IPA_LIMIT], where IPA_LIMIT could be
s/could be/is
> +identified by checking KVM_CAP_ARM_VM_PHYS_SHIFT. For backward compatibility
> +a value of 0 selects 40bits.
> +
>  
>  4.3 KVM_GET_MSR_INDEX_LIST, KVM_GET_MSR_FEATURE_INDEX_LIST
>  
> diff --git a/arch/arm64/include/asm/stage2_pgtable.h 
> b/arch/arm64/include/asm/stage2_pgtable.h
> index 6a56fdff0823..0b339f5a4a7c 100644
> --- a/arch/arm64/include/asm/stage2_pgtable.h
> +++ b/arch/arm64/include/asm/stage2_pgtable.h
> @@ -42,28 +42,8 @@
>   * the range (IPA_SHIFT, IPA_SHIFT - 4).
>   */
>  #define stage2_pgtable_levels(ipa)   ARM64_HW_PGTABLE_LEVELS((ipa) - 4)
> -#define STAGE2_PGTABLE_LEVELS
> stage2_pgtable_levels(KVM_PHYS_SHIFT)
>  #define kvm_stage2_levels(kvm)   VTCR_EL2_LVLS(kvm->arch.vtcr)
>  
> -/*
> - * With all the supported VA_BITs and 40bit guest IPA, the following 
> condition
> - * is always true:
> - *
> - *   STAGE2_PGTABLE_LEVELS <= CONFIG_PGTABLE_LEVELS
> - *
> - * We base our stage-2 page table walker helpers on this assumption and
> - * fall back to using the host version of the helper wherever possible.
> - * i.e, if a particular level is not folded (e.g, PUD) at stage2, we fall 
> back
> - * to using the host version, since it is guaranteed it is not folded at 
> host.
> - *
> - * If the condition breaks in the future, we can rearrange the host level
> - * definitions and reuse them for stage2. Till then...
> - */
> -#if STAGE2_PGTABLE_LEVELS > CONFIG_PGTABLE_LEVELS
> -#error "Unsupported combination of guest IPA and host VA_BITS."
> -#endif
> -
> -
>  /* stage2_pgdir_shift() is the size mapped by top-level stage2 entry for the 
> VM */
>  #define stage2_pgdir_shift(kvm)  
> pt_levels_pgdir_shift(kvm_stage2_levels(kvm))
>  #define stage2_pgdir_size(kvm)   (1ULL << 
> stage2_pgdir_shift(kvm))
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 0393bb974b23..c9640159e11f 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -89,6 +89,9 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, 
> long ext)
>   case KVM_CAP_VCPU_EVENTS:
>   r = 1;
>   break;
> + case KVM_CAP_ARM_VM_PHYS_SHIFT:
> + r = kvm_ipa_limit;
> + break;
>   default:
>   r = 0;
>   }
> @@ -190,16 +193,25 @@ int kvm_arm_config_vm(struct kvm *kvm, unsigned long 
> type)
>  {
>   u64 vtcr = VTCR_EL2_FLAGS;
>   u64 parange;
> - u8 lvls;
> + u8 lvls, ipa_shift;
>  
> - if (type)
> + if (type & ~KVM_VM_TYPE_ARM_PHYS_SHIFT_MASK)
>  

Re: [PATCH v5 16/18] kvm: arm64: Set a limit on the IPA size

2018-09-25 Thread Auger Eric
Hi Suzuki,

On 9/17/18 12:41 PM, Suzuki K Poulose wrote:
> So far we have restricted the IPA size of the VM to the default
> value (40bits). Now that we can manage the IPA size per VM and
> support dynamic stage2 page tables, we can allow VMs to have
> larger IPA. This patch introduces a the maximum IPA size
> supported on the host.
to be reworded
 This is decided by the following factors :
> 
>  1) Maximum PARange supported by the CPUs - This can be inferred
> from the system wide safe value.
>  2) Maximum PA size supported by the host kernel (48 vs 52)
>  3) Number of levels in the host page table (as we base our
> stage2 tables on the host table helpers).
> 
> Since the stage2 page table code is dependent on the stage1
> page table, we always ensure that :
> 
>   Number of Levels at Stage1 >= Number of Levels at Stage2
> 
> So we limit the IPA to make sure that the above condition
> is satisfied. This will affect the following combinations
> of VA_BITS and IPA for different page sizes.
> 
>   Host configuration | Unsupported IPA ranges
>   39bit VA, 4K   | [44, 48]
>   36bit VA, 16K  | [41, 48]
>   42bit VA, 64K  | [47, 52]
> 
> Supporting the above combinations need independent stage2
> page table manipulation code, which would need substantial
> changes. We could purse the solution independently and
> switch the page table code once we have it ready.
> 
> Cc: Catalin Marinas 
> Cc: Marc Zyngier 
> Cc: Christoffer Dall 
> Signed-off-by: Suzuki K Poulose 
> ---
> Changes since V2:
>  - Restrict the IPA size to limit the number of page table
>levels in stage2 to that of stage1 or less.
> ---
>  arch/arm/include/asm/kvm_mmu.h|  2 ++
>  arch/arm64/include/asm/kvm_host.h |  2 ++
>  arch/arm64/kvm/reset.c| 43 +++
>  virt/kvm/arm/arm.c|  2 ++
>  4 files changed, 49 insertions(+)
> 
> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
> index 12ae5fbbcf01..5ad1a54f98dc 100644
> --- a/arch/arm/include/asm/kvm_mmu.h
> +++ b/arch/arm/include/asm/kvm_mmu.h
> @@ -358,6 +358,8 @@ static inline int hyp_map_aux_data(void)
>  
>  #define kvm_phys_to_vttbr(addr)  (addr)
>  
> +static inline void kvm_set_ipa_limit(void) {}
> +
>  #endif   /* !__ASSEMBLY__ */
>  
>  #endif /* __ARM_KVM_MMU_H__ */
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 5ecd457bce7d..f0474061851d 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -513,6 +513,8 @@ static inline int kvm_arm_have_ssbd(void)
>  void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu);
>  void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu);
>  
> +void kvm_set_ipa_limit(void);
> +
>  #define __KVM_HAVE_ARCH_VM_ALLOC
>  struct kvm *kvm_arch_alloc_vm(void);
>  void kvm_arch_free_vm(struct kvm *kvm);
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 51ecf0f7c912..76972b19bdd7 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -34,6 +34,9 @@
>  #include 
>  #include 
>  
> +/* Maximum phys_shift supported for any VM on this host */
> +static u32 kvm_ipa_limit;
> +
>  /*
>   * ARMv8 Reset Values
>   */
> @@ -135,6 +138,46 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>   return kvm_timer_vcpu_reset(vcpu);
>  }
>  
> +void kvm_set_ipa_limit(void)
> +{
> + unsigned int ipa_max, va_max, parange;
> +
> + parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7;
> + ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange);
> +
> + /* Raise the limit to the default size for backward compatibility */
> + if (ipa_max < KVM_PHYS_SHIFT) {
> + WARN_ONCE(1,
> +   "PARange is %d bits, unsupported configuration!",
> +   ipa_max);
> + ipa_max = KVM_PHYS_SHIFT;
I don't really get what does happen in this case. The CPU cannot handle
PA up to ipa_max so can the VM run properly? In case it is a
showstopper, kvm_set_ipa_limit should return an error, cascaded by
init_common_resources. Otherwise the warning message may be reworded.
> + }
> +
> + /* Clamp it to the PA size supported by the kernel */
> + ipa_max = (ipa_max > PHYS_MASK_SHIFT) ? PHYS_MASK_SHIFT : ipa_max;
> + /*
> +  * Since our stage2 table is dependent on the stage1 page table code,
> +  * we must always honor the following condition:
> +  *
> +  *  Number of levels in Stage1 >= Number of levels in Stage2.
> +  *
> +  * So clamp the ipa limit further down to limit the number of levels.
> +  * Since we can concatenate upto 16 tables at entry level, we could
> +  * go upto 4bits above the maximum VA addressible with the current
addressable?
> +  * number of levels.
> +  */
> + va_max = PGDIR_SHIFT + PAGE_SHIFT - 3;
> + va_max += 4;
> +
> + if (va_max < ipa_max) {
> + kvm_info("Limiting IPA limit to %dbytes due to host 

Re: [PATCH v5 15/18] kvm: arm64: Add 52bit support for PAR to HPFAR conversoin

2018-09-25 Thread Auger Eric
Hi Suzuki,

On 9/17/18 12:41 PM, Suzuki K Poulose wrote:
> Add support for handling 52bit addresses in PAR to HPFAR
> conversion. Instead of hardcoding the address limits, we
> now use PHYS_MASK_SHIFT.
> 
> Cc: Marc Zyngier 
> Cc: Christoffer Dall 
> Signed-off-by: Suzuki K Poulose 
Reviewed-by: Eric Auger 

Thanks

Eric

> ---
>  arch/arm64/include/asm/kvm_arm.h | 7 +++
>  arch/arm64/kvm/hyp/switch.c  | 2 +-
>  2 files changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_arm.h 
> b/arch/arm64/include/asm/kvm_arm.h
> index 0a37c0513ede..241d1622fa19 100644
> --- a/arch/arm64/include/asm/kvm_arm.h
> +++ b/arch/arm64/include/asm/kvm_arm.h
> @@ -308,6 +308,13 @@
>  
>  /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
>  #define HPFAR_MASK   (~UL(0xf))
> +/*
> + * We have
> + *   PAR [PA_Shift - 1   : 12] = PA  [PA_Shift - 1 : 12]
> + *   HPFAR   [PA_Shift - 9   : 4]  = FIPA[PA_Shift - 1 : 12]
> + */
> +#define PAR_TO_HPFAR(par)\
> + (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
>  
>  #define kvm_arm_exception_type   \
>   {0, "IRQ" },\
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 9d5ce1a3039a..7cc175c88a37 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -263,7 +263,7 @@ static bool __hyp_text __translate_far_to_hpfar(u64 far, 
> u64 *hpfar)
>   return false; /* Translation failed, back to guest */
>  
>   /* Convert PAR to HPFAR format */
> - *hpfar = ((tmp >> 12) & ((1UL << 36) - 1)) << 4;
> + *hpfar = PAR_TO_HPFAR(tmp);
>   return true;
>  }
>  
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v7 9/9] KVM: arm64: Add support for creating PUD hugepages at stage 2

2018-09-25 Thread Punit Agrawal
Suzuki K Poulose  writes:

> Hi Punit,
>
>
> On 09/24/2018 06:45 PM, Punit Agrawal wrote:
>> KVM only supports PMD hugepages at stage 2. Now that the various page
>> handling routines are updated, extend the stage 2 fault handling to
>> map in PUD hugepages.
>>
>> Addition of PUD hugepage support enables additional page sizes (e.g.,
>> 1G with 4K granule) which can be useful on cores that support mapping
>> larger block sizes in the TLB entries.
>>
>> Signed-off-by: Punit Agrawal 
>> Cc: Christoffer Dall 
>> Cc: Marc Zyngier 
>> Cc: Russell King 
>> Cc: Catalin Marinas 
>> Cc: Will Deacon 
>
>
>>
>> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
>> index a42b9505c9a7..a8e86b926ee0 100644
>> --- a/arch/arm/include/asm/kvm_mmu.h
>> +++ b/arch/arm/include/asm/kvm_mmu.h
>> @@ -84,11 +84,14 @@ void kvm_clear_hyp_idmap(void);
>> #define kvm_pfn_pte(pfn, prot)   pfn_pte(pfn, prot)
>>   #define kvm_pfn_pmd(pfn, prot) pfn_pmd(pfn, prot)
>> +#define kvm_pfn_pud(pfn, prot)  (__pud(0))
>> #define kvm_pud_pfn(pud) ({ BUG(); 0; })
>>   #define kvm_pmd_mkhuge(pmd)pmd_mkhuge(pmd)
>> +/* No support for pud hugepages */
>> +#define kvm_pud_mkhuge(pud) (pud)
>>   
>
> shouldn't this be BUG() like other PUD huge helpers for arm32 ?
>
>>   /*
>>* The following kvm_*pud*() functions are provided strictly to allow
>> @@ -105,6 +108,23 @@ static inline bool kvm_s2pud_readonly(pud_t *pud)
>>  return false;
>>   }
>>   +static inline void kvm_set_pud(pud_t *pud, pud_t new_pud)
>> +{
>> +BUG();
>> +}
>> +
>> +static inline pud_t kvm_s2pud_mkwrite(pud_t pud)
>> +{
>> +BUG();
>> +return pud;
>> +}
>> +
>> +static inline pud_t kvm_s2pud_mkexec(pud_t pud)
>> +{
>> +BUG();
>> +return pud;
>> +}
>> +
>
>
>> diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
>> index 3ff7ebb262d2..5b8163537bc2 100644
>> --- a/virt/kvm/arm/mmu.c
>> +++ b/virt/kvm/arm/mmu.c
>
> ...
>
>
>> @@ -1669,7 +1746,28 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
>> phys_addr_t fault_ipa,
>>  needs_exec = exec_fault ||
>>  (fault_status == FSC_PERM && stage2_is_exec(kvm, fault_ipa));
>>   -  if (hugetlb && vma_pagesize == PMD_SIZE) {
>> +if (hugetlb && vma_pagesize == PUD_SIZE) {
>> +/*
>> + * Assuming that PUD level always exists at Stage 2 -
>> + * this is true for 4k pages with 40 bits IPA
>> + * currently supported.
>> + *
>> + * When using 64k pages, 40bits of IPA results in
>> + * using only 2-levels at Stage 2. Overlooking this
>> + * problem for now as a PUD hugepage with 64k pages is
>> + * too big (4TB) to be practical.
>> + */
>> +pud_t new_pud = kvm_pfn_pud(pfn, mem_type);
>
> Is this based on the Dynamic IPA series ? The cover letter seems
> to suggest that it is. But I don't see the check to make sure we have
> stage2 PUD level here before we go ahead and try PUD huge page at
> stage2. Also the comment above seems outdated in that case.

It is indeed based on the Dynamic IPA series but I seem to have lost the
actual changes introducing the checks for PUD level. Let me fix that up
and post an update.

Sorry for the noise.

Punit

>
>> +
>> +new_pud = kvm_pud_mkhuge(new_pud);
>> +if (writable)
>> +new_pud = kvm_s2pud_mkwrite(new_pud);
>> +
>> +if (needs_exec)
>> +new_pud = kvm_s2pud_mkexec(new_pud);
>> +
>> +ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, _pud);
>> +} else if (hugetlb && vma_pagesize == PMD_SIZE) {
>>  pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type);
>>  new_pmd = kvm_pmd_mkhuge(new_pmd);
>>
>
>
> Suzuki
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm