Re: [PATCH v8 5/8] KVM: x86/mmu: Don't pass FOLL_GET to __kvm_follow_pfn

2023-08-24 Thread Mika Penttilä

On 8/24/23 11:04, David Stevens wrote:

From: David Stevens 

Stop passing FOLL_GET to __kvm_follow_pfn. This allows the host to map
memory into the guest that is backed by un-refcounted struct pages - for
example, the tail pages of higher order non-compound pages allocated by
the amdgpu driver via ttm_pool_alloc_page.

The bulk of this change is tracking the is_refcounted_page flag so that
non-refcounted pages don't trigger page_count() == 0 warnings. This is
done by storing the flag in an unused bit in the sptes. This only bit is
not available in PAE SPTEs, so FOLL_GET is only omitted for TDP on
x86-64.

Signed-off-by: David Stevens 
---
  arch/x86/kvm/mmu/mmu.c  | 55 +++--
  arch/x86/kvm/mmu/mmu_internal.h |  1 +
  arch/x86/kvm/mmu/paging_tmpl.h  |  8 +++--
  arch/x86/kvm/mmu/spte.c |  4 ++-
  arch/x86/kvm/mmu/spte.h | 12 ++-
  arch/x86/kvm/mmu/tdp_mmu.c  | 22 +++--
  include/linux/kvm_host.h|  3 ++
  virt/kvm/kvm_main.c |  6 ++--
  8 files changed, 79 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index dabae67f198b..4f5d33e95c6e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -553,12 +553,14 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte)
  
  	if (is_accessed_spte(old_spte) && !is_accessed_spte(new_spte)) {

flush = true;
-   kvm_set_pfn_accessed(spte_to_pfn(old_spte));
+   if (is_refcounted_page_pte(old_spte))
+   
kvm_set_page_accessed(pfn_to_page(spte_to_pfn(old_spte)));
}
  
  	if (is_dirty_spte(old_spte) && !is_dirty_spte(new_spte)) {

flush = true;
-   kvm_set_pfn_dirty(spte_to_pfn(old_spte));
+   if (is_refcounted_page_pte(old_spte))
+   kvm_set_page_dirty(pfn_to_page(spte_to_pfn(old_spte)));
}
  
  	return flush;

@@ -596,14 +598,18 @@ static u64 mmu_spte_clear_track_bits(struct kvm *kvm, u64 
*sptep)
 * before they are reclaimed.  Sanity check that, if the pfn is backed
 * by a refcounted page, the refcount is elevated.
 */
-   page = kvm_pfn_to_refcounted_page(pfn);
-   WARN_ON(page && !page_count(page));
+   if (is_refcounted_page_pte(old_spte)) {
+   page = kvm_pfn_to_refcounted_page(pfn);
+   WARN_ON(!page || !page_count(page));
+   }
  
-	if (is_accessed_spte(old_spte))

-   kvm_set_pfn_accessed(pfn);
+   if (is_refcounted_page_pte(old_spte)) {
+   if (is_accessed_spte(old_spte))
+   kvm_set_page_accessed(pfn_to_page(pfn));
  
-	if (is_dirty_spte(old_spte))

-   kvm_set_pfn_dirty(pfn);
+   if (is_dirty_spte(old_spte))
+   kvm_set_page_dirty(pfn_to_page(pfn));
+   }
  
  	return old_spte;

  }
@@ -639,8 +645,8 @@ static bool mmu_spte_age(u64 *sptep)
 * Capture the dirty status of the page, so that it doesn't get
 * lost when the SPTE is marked for access tracking.
 */
-   if (is_writable_pte(spte))
-   kvm_set_pfn_dirty(spte_to_pfn(spte));
+   if (is_writable_pte(spte) && is_refcounted_page_pte(spte))
+   kvm_set_page_dirty(pfn_to_page(spte_to_pfn(spte)));
  
  		spte = mark_spte_for_access_track(spte);

mmu_spte_update_no_track(sptep, spte);
@@ -1278,8 +1284,8 @@ static bool spte_wrprot_for_clear_dirty(u64 *sptep)
  {
bool was_writable = test_and_clear_bit(PT_WRITABLE_SHIFT,
   (unsigned long *)sptep);
-   if (was_writable && !spte_ad_enabled(*sptep))
-   kvm_set_pfn_dirty(spte_to_pfn(*sptep));
+   if (was_writable && !spte_ad_enabled(*sptep) && 
is_refcounted_page_pte(*sptep))
+   kvm_set_page_dirty(pfn_to_page(spte_to_pfn(*sptep)));
  
  	return was_writable;

  }
@@ -2937,6 +2943,11 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct 
kvm_memory_slot *slot,
bool host_writable = !fault || fault->map_writable;
bool prefetch = !fault || fault->prefetch;
bool write_fault = fault && fault->write;
+   /*
+* Prefetching uses gfn_to_page_many_atomic, which never gets
+* non-refcounted pages.
+*/
+   bool is_refcounted = !fault || fault->is_refcounted_page;
  
  	pgprintk("%s: spte %llx write_fault %d gfn %llx\n", __func__,

 *sptep, write_fault, gfn);
@@ -2969,7 +2980,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct 
kvm_memory_slot *slot,
}
  
  	wrprot = make_spte(vcpu, sp, slot, pte_access, gfn, pfn, *sptep, prefetch,

-  true, host_writable, &spte);
+  true, host_writable, is_refcounted, &spte);
  
  	if (*sptep == spte) {

ret = RET_PF_SPURIOUS;
@@ -4296,11 +4307,19 @@ vo

Re: [PATCH] hmm-tests: Fix migrate_dirty_page test

2022-09-13 Thread Mika Penttilä




On 13.9.2022 8.22, Alistair Popple wrote:

As noted by John Hubbard the original test relied on side effects of the
implementation of migrate_vma_setup() to detect if pages had been
swapped to disk or not. This is subject to change in future so
explicitly check for swap entries via pagemap instead. Fix a spelling
mistake while we're at it.

Signed-off-by: Alistair Popple 
Fixes: 5cc88e844e87 ("selftests/hmm-tests: add test for dirty bits")
---
  tools/testing/selftests/vm/hmm-tests.c | 50 +++---
  1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/vm/hmm-tests.c 
b/tools/testing/selftests/vm/hmm-tests.c
index 70fdb49b59ed..b5f6a7dc1f12 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -1261,9 +1261,47 @@ static int destroy_cgroup(void)
return 0;
  }
  
+static uint64_t get_pfn(int fd, uint64_t ptr)

+{
+   uint64_t pfn;
+   int ret;
+
+   ret = pread(fd, &pfn, sizeof(ptr),
+   (uint64_t) ptr / getpagesize() * sizeof(ptr));
+   if (ret != sizeof(ptr))
+   return 0;
+
+   return pfn;
+}
+
+#define PAGEMAP_SWAPPED (1ULL << 62)
+
+/* Returns true if at least one page in the range is on swap */
+static bool pages_swapped(void *ptr, unsigned long pages)
+{
+   uint64_t pfn;
+   int fd = open("/proc/self/pagemap", O_RDONLY);
+   unsigned long i;
+
+   if (fd < 0)
+   return false;
+
+   for (i = 0; i < pages; i++) {
+   pfn = get_pfn(fd, (uint64_t) ptr + i * getpagesize());
+
+   if (pfn & PAGEMAP_SWAPPED) {
+   close(fd);
+   return true;
+   }
+   }
+
+   close(fd);
+   return false;
+}
+
  /*
   * Try and migrate a dirty page that has previously been swapped to disk. This
- * checks that we don't loose dirty bits.
+ * checks that we don't lose dirty bits.
   */
  TEST_F(hmm, migrate_dirty_page)
  {
@@ -1300,6 +1338,10 @@ TEST_F(hmm, migrate_dirty_page)
  
  	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
  
+	/* Make sure at least some pages got paged to disk. */

+   if (!pages_swapped(buffer->ptr, npages))
+   SKIP(return, "Pages weren't swapped when they should have 
been");
+
/* Fault pages back in from swap as clean pages */
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
tmp += ptr[i];
@@ -1309,10 +1351,10 @@ TEST_F(hmm, migrate_dirty_page)
ptr[i] = i;
  
  	/*

-* Attempt to migrate memory to device, which should fail because
-* hopefully some pages are backed by swap storage.
+* Attempt to migrate memory to device. This might fail if some pages
+* are/were backed by swap but that's ok.
 */
-   ASSERT_TRUE(hmm_migrate_sys_to_dev(self->fd, buffer, npages));
+   hmm_migrate_sys_to_dev(self->fd, buffer, npages);
  
  	ASSERT_FALSE(write_cgroup_param(cgroup, "memory.reclaim", 1UL<<30));
  



Reviewed-by: Mika Penttilä 



Re: [PATCH v4 11/11] KVM: x86: First attempt at converting nested virtual APIC page to gpc

2021-11-20 Thread Mika Penttilä




On 20.11.2021 18.21, David Woodhouse wrote:

On Sat, 2021-11-20 at 17:48 +0200, Mika Penttilä wrote:

@@ -9785,6 +9787,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
local_irq_disable();
vcpu->mode = IN_GUEST_MODE;

+ /*

+  * If the guest requires direct access to mapped L1 pages, check
+  * the caches are valid. Will raise KVM_REQ_GET_NESTED_STATE_PAGES
+  * to go and revalidate them, if necessary.
+  */
+ if (is_guest_mode(vcpu) && kvm_x86_ops.nested_ops->check_guest_maps)
+ kvm_x86_ops.nested_ops->check_guest_maps(vcpu);

But KVM_REQ_GET_NESTED_STATE_PAGES is not check until next
vcpu_enter_guest() entry ?

Sure, but that's why this call to ->check_guest_maps() comes just a few
lines *before* the 'if (kvm_cpu_exit_request(vcpu))' that will bounce
us back out so that we go through vcpu_enter_guest() from the start
again?

Yes, I had forgotten that...

Thanks,
Mika



Re: [PATCH v4 11/11] KVM: x86: First attempt at converting nested virtual APIC page to gpc

2021-11-20 Thread Mika Penttilä

Hi,

On 20.11.2021 12.28, David Woodhouse wrote:

From: David Woodhouse 

This is what evolved during the discussion at
https://lore.kernel.org/kvm/960e233f-ec0b-4fb5-ba2e-c8d2ccb38...@infradead.org/T/#m11d75fcfe2da357ec1dabba0d0e3abb91fd13665

As discussed, an alternative approach might be to augment
kvm_arch_memslots_updated() to raise KVM_REQ_GET_NESTED_STATE_PAGES to
each vCPU (and make that req only do anything on a given vCPU if that
vCPU is actually in L2 guest mode).

That would mean the reload gets actively triggered even on memslot
changes rather than only on MMU notifiers as is the case now. It could
*potentially* mean we can drop the new 'check_guest_maps' function.

The 'check_guest_maps' function could be a lot simpler than it is,
though. It only really needs to get kvm->memslots->generation, then
check each gpc->generation against that, and each gpc->valid.

Also I suspect we *shouldn't* destroy the virtual_apic_cache in
nested_vmx_vmexit(). We can just leave it there for next time the
vCPU enters guest mode. If it happens to get invalidated in the
meantime, that's fine and we'll refresh it on the way back in.
We probably *would* want to actively do something on memslot changes
in that case though, to ensure that even if the vCPU isn't in guest
mode any more, we *release* the cached page.

Signed-off-by: David Woodhouse 
---
  arch/x86/include/asm/kvm_host.h |  1 +
  arch/x86/kvm/vmx/nested.c   | 50 -
  arch/x86/kvm/vmx/vmx.c  | 12 +---
  arch/x86/kvm/vmx/vmx.h  |  2 +-
  arch/x86/kvm/x86.c  | 10 +++
  5 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ea2446ab851..24f6f3e2de47 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1511,6 +1511,7 @@ struct kvm_x86_nested_ops {
int (*enable_evmcs)(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);
uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
+   void (*check_guest_maps)(struct kvm_vcpu *vcpu);
  };
  
  struct kvm_x86_init_ops {

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 1e2f66951566..01bfabcfbbce 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -309,7 +309,7 @@ static void free_nested(struct kvm_vcpu *vcpu)
kvm_release_page_clean(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = NULL;
}
-   kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
+   kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, 
&vmx->nested.virtual_apic_cache);
kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
vmx->nested.pi_desc = NULL;
  
@@ -3179,10 +3179,12 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)

}
  
  	if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {

-   map = &vmx->nested.virtual_apic_map;
+   struct gfn_to_pfn_cache *gpc = &vmx->nested.virtual_apic_cache;
  
-		if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {

-   vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 
pfn_to_hpa(map->pfn));
+   if (!kvm_gfn_to_pfn_cache_init(vcpu->kvm, gpc, vcpu, true, true,
+  vmcs12->virtual_apic_page_addr,
+  PAGE_SIZE, true)) {
+   vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, 
pfn_to_hpa(gpc->pfn));
} else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
   nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) 
&&
   !nested_cpu_has2(vmcs12, 
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
@@ -3207,6 +3209,9 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
if (nested_cpu_has_posted_intr(vmcs12)) {
map = &vmx->nested.pi_desc_map;
  
+		if (kvm_vcpu_mapped(map))

+   kvm_vcpu_unmap(vcpu, map, true);
+
if (!kvm_vcpu_map(vcpu, 
gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
vmx->nested.pi_desc =
(struct pi_desc *)(((void *)map->hva) +
@@ -3251,6 +3256,29 @@ static bool vmx_get_nested_state_pages(struct kvm_vcpu 
*vcpu)
return true;
  }
  
+static void nested_vmx_check_guest_maps(struct kvm_vcpu *vcpu)

+{
+   struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   struct gfn_to_pfn_cache *gpc;
+
+   int valid;
+
+   if (nested_cpu_has_posted_intr(vmcs12)) {
+   gpc = &vmx->nested.virtual_apic_cache;
+
+   read_lock(&gpc->lock);
+   valid = kvm_gfn_to_pfn_cache_check(vcpu->kvm, gpc,
+  
vmcs12->virtual_apic_page_addr,
+  PAGE_SIZE)

Re: [PATCH] ASoC: fsl_ssi: Fix mode setting when changing channel number

2018-04-09 Thread Mika Penttilä
On 04/08/2018 07:40 AM, Nicolin Chen wrote:
> This is a partial revert (in a cleaner way) of commit ebf08ae3bc90
> ("ASoC: fsl_ssi: Keep ssi->i2s_net updated") to fix a regression
> at test cases when switching between mono and stereo audio.
> 
> The problem is that ssi->i2s_net is initialized in set_dai_fmt()
> only, while this set_dai_fmt() is only called during the dai-link
> probe(). The original patch assumed set_dai_fmt() would be called
> during every playback instance, so it failed at the overriding use
> cases.
> 
> This patch adds the local variable i2s_net back to let regular use
> cases still follow the mode settings from the set_dai_fmt().
> 
> Meanwhile, the original commit of keeping ssi->i2s_net updated was
> to make set_tdm_slot() clean by checking the ssi->i2s_net directly
> instead of reading SCR register. However, the change itself is not
> necessary (or even harmful) because the set_tdm_slot() might fail
> to check the slot number for Normal-Mode-None-Net settings while
> mono audio cases still need 2 slots. So this patch can also fix it.
> And it adds an extra line of comments to declare ssi->i2s_net does
> not reflect the register value but merely the initial setting from
> the set_dai_fmt().
> 
> Reported-by: Mika Penttilä 
> Signed-off-by: Nicolin Chen 
> Cc: Mika Penttilä 
> ---
>  sound/soc/fsl/fsl_ssi.c | 14 +++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c
> index 0823b08..89df2d9 100644
> --- a/sound/soc/fsl/fsl_ssi.c
> +++ b/sound/soc/fsl/fsl_ssi.c
> @@ -217,6 +217,7 @@ struct fsl_ssi_soc_data {
>   * @dai_fmt: DAI configuration this device is currently used with
>   * @streams: Mask of current active streams: BIT(TX) and BIT(RX)
>   * @i2s_net: I2S and Network mode configurations of SCR register
> + *   (this is the initial settings based on the DAI format)
>   * @synchronous: Use synchronous mode - both of TX and RX use STCK and SFCK
>   * @use_dma: DMA is used or FIQ with stream filter
>   * @use_dual_fifo: DMA with support for dual FIFO mode
> @@ -829,16 +830,23 @@ static int fsl_ssi_hw_params(struct snd_pcm_substream 
> *substream,
>   }
>  
>   if (!fsl_ssi_is_ac97(ssi)) {
> + /*
> +  * Keep the ssi->i2s_net intact while having a local variable
> +  * to override settings for special use cases. Otherwise, the
> +  * ssi->i2s_net will lose the settings for regular use cases.
> +  */
> + u8 i2s_net = ssi->i2s_net;
> +
>   /* Normal + Network mode to send 16-bit data in 32-bit frames */
>   if (fsl_ssi_is_i2s_cbm_cfs(ssi) && sample_size == 16)
> - ssi->i2s_net = SSI_SCR_I2S_MODE_NORMAL | SSI_SCR_NET;
> + i2s_net = SSI_SCR_I2S_MODE_NORMAL | SSI_SCR_NET;
>  
>   /* Use Normal mode to send mono data at 1st slot of 2 slots */
>   if (channels == 1)
> - ssi->i2s_net = SSI_SCR_I2S_MODE_NORMAL;
> + i2s_net = SSI_SCR_I2S_MODE_NORMAL;
>  
>   regmap_update_bits(regs, REG_SSI_SCR,
> -    SSI_SCR_I2S_NET_MASK, ssi->i2s_net);
> +SSI_SCR_I2S_NET_MASK, i2s_net);
>   }
>  
>   /* In synchronous mode, the SSI uses STCCR for capture */
> 

This patch fixes my problems, so: 

Tested-by: Mika Penttilä 


--Mika