Re: [PATCH v2 05/10] Revert "tcg/cputlb: remove other-cpu capability from TLB flushing"

2025-02-25 Thread Igor Mammedov
On Tue, 25 Feb 2025 18:19:03 +0100
Igor Mammedov  wrote:

> On Tue, 25 Feb 2025 12:42:24 +
> Alex Bennée  wrote:
> 
> > Igor Mammedov  writes:
> >   
> > > 1)
> > > This reverts commit 30933c4fb4f3df95ae44c4c3c86a5df049852c01.
> > >   ("tcg/cputlb: remove other-cpu capability from TLB flushing")
> > >
> > > The commit caused a regression which went unnoticed due to
> > > affected being disabled by default (DEBUG_TLB_GATE 0)
> > > Previous patch switched to using tcg_debug_assert() so that
> > > at least on debug builds assert_cpu_is_self() path would be exercised.
> > >
> > > And that lead to exposing regression introduced by [1] with abort during 
> > > tests.
> > > to reproduce:
> > >   $ configure  --target-list=x86_64-softmmu --enable-debug
> > >   $ make && ./qemu-system-x86_64
> > >
> > >   accel/tcg/cputlb.c:419: tlb_flush_by_mmuidx:
> > > Assertion `!(cpu)->created || qemu_cpu_is_self(cpu)' failed.
> > >
> > > which is triggered by usage outside of cpu thread:
> > > x86_cpu_new -> ... ->
> > >   x86_cpu_realizefn -> cpu_reset -> ... ->
> > >   tcg_cpu_reset_hold
> > 
> > If the reset case is the only one I don't think we need to revert the
> > rest of the changes as only tlb_flush needs calling. How about something
> > like:
> > 
> > --8<---cut here---start->8---
> > cputlb: introduce tlb_flush_other_cpu for reset use  
> 
> while that works for my reproducer it's not sufficient,
> 'make check' is some tests fails anyway
> 
> ex:
> 
> 10/378 qemu:qtest+qtest-x86_64 / qtest-x86_64/ahci-test ERROR 13.47s killed 
> by signal 6 SIGABRT
> stderr:
> qemu-system-x86_64: qemu/accel/tcg/cputlb.c:419: tlb_flush_by_mmuidx: 
> Assertion `qemu_cpu_is_self(cpu)' failed.
> Broken pipe
> qemu/tests/qtest/libqtest.c:208: kill_qemu() detected QEMU death from signal 
> 6 (Aborted) (core dumped)

here is v3 rebased on top of the patch

https://gitlab.com/imammedo/qemu/-/commits/qemu_cpu_cond_v3?ref_type=heads

it seems that reset path is not the only one that relied on 'cpu->created' 
workaround


> 
> 
> 
> > 
> > The commit 30933c4fb4 (tcg/cputlb: remove other-cpu capability from
> > TLB flushing) introduced a regression that only shows up when
> > --enable-debug-tcg is used. The main use case of tlb_flush outside of
> > the current_cpu context is for handling reset and CPU creation. Rather
> > than revert the commit introduce a new helper and tweak the
> > documentation to make it clear where it should be used.
> > 
> > Signed-off-by: Alex Bennée 
> > 
> > 3 files changed, 26 insertions(+), 5 deletions(-)
> > include/exec/exec-all.h   | 20 
> > accel/tcg/cputlb.c|  9 +
> > accel/tcg/tcg-accel-ops.c |  2 +-
> > 
> > modified   include/exec/exec-all.h
> > @@ -64,12 +64,24 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, 
> > vaddr addr);
> >   * tlb_flush:
> >   * @cpu: CPU whose TLB should be flushed
> >   *
> > - * Flush the entire TLB for the specified CPU. Most CPU architectures
> > - * allow the implementation to drop entries from the TLB at any time
> > - * so this is generally safe. If more selective flushing is required
> > - * use one of the other functions for efficiency.
> > + * Flush the entire TLB for the specified current CPU.
> > + *
> > + * Most CPU architectures allow the implementation to drop entries
> > + * from the TLB at any time so this is generally safe. If more
> > + * selective flushing is required use one of the other functions for
> > + * efficiency.
> >   */
> >  void tlb_flush(CPUState *cpu);
> > +/**
> > + * tlb_flush_other_cpu:
> > + * @cpu: CPU whose TLB should be flushed
> > + *
> > + * Flush the entire TLB for a specified CPU. For cross vCPU flushes
> > + * you shuld be using a more selective function. This is really only
> > + * used for flushing CPUs being reset from outside their current
> > + * context.
> > + */
> > +void tlb_flush_other_cpu(CPUState *cpu);
> >  /**
> >   * tlb_flush_all_cpus_synced:
> >   * @cpu: src CPU of the flush
> > modified   accel/tcg/cputlb.c
> > @@ -414,6 +414,15 @@ void tlb_flush(CPUState *cpu)
> >  tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
> >  }
> >  
> > +void tlb_flush_other_cpu(CPUState *cpu)
> > +{
> > +g_assert(!qemu_cpu_is_self(cpu));
> > +
> > +async_run_on_cpu(cpu,
> > + tlb_flush_by_mmuidx_async_work,
> > + RUN_ON_CPU_HOST_INT(ALL_MMUIDX_BITS));
> > +}
> > +
> >  void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t 
> > idxmap)
> >  {
> >  const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
> > modified   accel/tcg/tcg-accel-ops.c
> > @@ -85,7 +85,7 @@ static void tcg_cpu_reset_hold(CPUState *cpu)
> >  {
> >  tcg_flush_jmp_cache(cpu);
> >  
> > -tlb_flush(cpu);
> > +tlb_flush_other_cpu(cpu);
> >  }
> >  
> > --8<---cut here---end--->8---
> >   
> 




Re: [PATCH v2 05/10] Revert "tcg/cputlb: remove other-cpu capability from TLB flushing"

2025-02-25 Thread Igor Mammedov
On Tue, 25 Feb 2025 12:42:24 +
Alex Bennée  wrote:

> Igor Mammedov  writes:
> 
> > 1)
> > This reverts commit 30933c4fb4f3df95ae44c4c3c86a5df049852c01.
> >   ("tcg/cputlb: remove other-cpu capability from TLB flushing")
> >
> > The commit caused a regression which went unnoticed due to
> > affected being disabled by default (DEBUG_TLB_GATE 0)
> > Previous patch switched to using tcg_debug_assert() so that
> > at least on debug builds assert_cpu_is_self() path would be exercised.
> >
> > And that lead to exposing regression introduced by [1] with abort during 
> > tests.
> > to reproduce:
> >   $ configure  --target-list=x86_64-softmmu --enable-debug
> >   $ make && ./qemu-system-x86_64
> >
> >   accel/tcg/cputlb.c:419: tlb_flush_by_mmuidx:
> > Assertion `!(cpu)->created || qemu_cpu_is_self(cpu)' failed.
> >
> > which is triggered by usage outside of cpu thread:
> > x86_cpu_new -> ... ->
> >   x86_cpu_realizefn -> cpu_reset -> ... ->
> >   tcg_cpu_reset_hold  
> 
> If the reset case is the only one I don't think we need to revert the
> rest of the changes as only tlb_flush needs calling. How about something
> like:
> 
> --8<---cut here---start->8---
> cputlb: introduce tlb_flush_other_cpu for reset use

while that works for my reproducer it's not sufficient,
'make check' is some tests fails anyway

ex:

10/378 qemu:qtest+qtest-x86_64 / qtest-x86_64/ahci-test ERROR 13.47s killed by 
signal 6 SIGABRT
stderr:
qemu-system-x86_64: qemu/accel/tcg/cputlb.c:419: tlb_flush_by_mmuidx: Assertion 
`qemu_cpu_is_self(cpu)' failed.
Broken pipe
qemu/tests/qtest/libqtest.c:208: kill_qemu() detected QEMU death from signal 6 
(Aborted) (core dumped)



> 
> The commit 30933c4fb4 (tcg/cputlb: remove other-cpu capability from
> TLB flushing) introduced a regression that only shows up when
> --enable-debug-tcg is used. The main use case of tlb_flush outside of
> the current_cpu context is for handling reset and CPU creation. Rather
> than revert the commit introduce a new helper and tweak the
> documentation to make it clear where it should be used.
> 
> Signed-off-by: Alex Bennée 
> 
> 3 files changed, 26 insertions(+), 5 deletions(-)
> include/exec/exec-all.h   | 20 
> accel/tcg/cputlb.c|  9 +
> accel/tcg/tcg-accel-ops.c |  2 +-
> 
> modified   include/exec/exec-all.h
> @@ -64,12 +64,24 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr 
> addr);
>   * tlb_flush:
>   * @cpu: CPU whose TLB should be flushed
>   *
> - * Flush the entire TLB for the specified CPU. Most CPU architectures
> - * allow the implementation to drop entries from the TLB at any time
> - * so this is generally safe. If more selective flushing is required
> - * use one of the other functions for efficiency.
> + * Flush the entire TLB for the specified current CPU.
> + *
> + * Most CPU architectures allow the implementation to drop entries
> + * from the TLB at any time so this is generally safe. If more
> + * selective flushing is required use one of the other functions for
> + * efficiency.
>   */
>  void tlb_flush(CPUState *cpu);
> +/**
> + * tlb_flush_other_cpu:
> + * @cpu: CPU whose TLB should be flushed
> + *
> + * Flush the entire TLB for a specified CPU. For cross vCPU flushes
> + * you shuld be using a more selective function. This is really only
> + * used for flushing CPUs being reset from outside their current
> + * context.
> + */
> +void tlb_flush_other_cpu(CPUState *cpu);
>  /**
>   * tlb_flush_all_cpus_synced:
>   * @cpu: src CPU of the flush
> modified   accel/tcg/cputlb.c
> @@ -414,6 +414,15 @@ void tlb_flush(CPUState *cpu)
>  tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
>  }
>  
> +void tlb_flush_other_cpu(CPUState *cpu)
> +{
> +g_assert(!qemu_cpu_is_self(cpu));
> +
> +async_run_on_cpu(cpu,
> + tlb_flush_by_mmuidx_async_work,
> + RUN_ON_CPU_HOST_INT(ALL_MMUIDX_BITS));
> +}
> +
>  void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
>  {
>  const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
> modified   accel/tcg/tcg-accel-ops.c
> @@ -85,7 +85,7 @@ static void tcg_cpu_reset_hold(CPUState *cpu)
>  {
>  tcg_flush_jmp_cache(cpu);
>  
> -tlb_flush(cpu);
> +tlb_flush_other_cpu(cpu);
>  }
>  
> --8<---cut here---end--->8---
> 




Re: [PATCH v2 05/10] Revert "tcg/cputlb: remove other-cpu capability from TLB flushing"

2025-02-25 Thread Alex Bennée
Igor Mammedov  writes:

> 1)
> This reverts commit 30933c4fb4f3df95ae44c4c3c86a5df049852c01.
>   ("tcg/cputlb: remove other-cpu capability from TLB flushing")
>
> The commit caused a regression which went unnoticed due to
> affected being disabled by default (DEBUG_TLB_GATE 0)
> Previous patch switched to using tcg_debug_assert() so that
> at least on debug builds assert_cpu_is_self() path would be exercised.
>
> And that lead to exposing regression introduced by [1] with abort during 
> tests.
> to reproduce:
>   $ configure  --target-list=x86_64-softmmu --enable-debug
>   $ make && ./qemu-system-x86_64
>
>   accel/tcg/cputlb.c:419: tlb_flush_by_mmuidx:
> Assertion `!(cpu)->created || qemu_cpu_is_self(cpu)' failed.
>
> which is triggered by usage outside of cpu thread:
> x86_cpu_new -> ... ->
>   x86_cpu_realizefn -> cpu_reset -> ... ->
>   tcg_cpu_reset_hold

If the reset case is the only one I don't think we need to revert the
rest of the changes as only tlb_flush needs calling. How about something
like:

--8<---cut here---start->8---
cputlb: introduce tlb_flush_other_cpu for reset use

The commit 30933c4fb4 (tcg/cputlb: remove other-cpu capability from
TLB flushing) introduced a regression that only shows up when
--enable-debug-tcg is used. The main use case of tlb_flush outside of
the current_cpu context is for handling reset and CPU creation. Rather
than revert the commit introduce a new helper and tweak the
documentation to make it clear where it should be used.

Signed-off-by: Alex Bennée 

3 files changed, 26 insertions(+), 5 deletions(-)
include/exec/exec-all.h   | 20 
accel/tcg/cputlb.c|  9 +
accel/tcg/tcg-accel-ops.c |  2 +-

modified   include/exec/exec-all.h
@@ -64,12 +64,24 @@ void tlb_flush_page_all_cpus_synced(CPUState *src, vaddr 
addr);
  * tlb_flush:
  * @cpu: CPU whose TLB should be flushed
  *
- * Flush the entire TLB for the specified CPU. Most CPU architectures
- * allow the implementation to drop entries from the TLB at any time
- * so this is generally safe. If more selective flushing is required
- * use one of the other functions for efficiency.
+ * Flush the entire TLB for the specified current CPU.
+ *
+ * Most CPU architectures allow the implementation to drop entries
+ * from the TLB at any time so this is generally safe. If more
+ * selective flushing is required use one of the other functions for
+ * efficiency.
  */
 void tlb_flush(CPUState *cpu);
+/**
+ * tlb_flush_other_cpu:
+ * @cpu: CPU whose TLB should be flushed
+ *
+ * Flush the entire TLB for a specified CPU. For cross vCPU flushes
+ * you shuld be using a more selective function. This is really only
+ * used for flushing CPUs being reset from outside their current
+ * context.
+ */
+void tlb_flush_other_cpu(CPUState *cpu);
 /**
  * tlb_flush_all_cpus_synced:
  * @cpu: src CPU of the flush
modified   accel/tcg/cputlb.c
@@ -414,6 +414,15 @@ void tlb_flush(CPUState *cpu)
 tlb_flush_by_mmuidx(cpu, ALL_MMUIDX_BITS);
 }
 
+void tlb_flush_other_cpu(CPUState *cpu)
+{
+g_assert(!qemu_cpu_is_self(cpu));
+
+async_run_on_cpu(cpu,
+ tlb_flush_by_mmuidx_async_work,
+ RUN_ON_CPU_HOST_INT(ALL_MMUIDX_BITS));
+}
+
 void tlb_flush_by_mmuidx_all_cpus_synced(CPUState *src_cpu, uint16_t idxmap)
 {
 const run_on_cpu_func fn = tlb_flush_by_mmuidx_async_work;
modified   accel/tcg/tcg-accel-ops.c
@@ -85,7 +85,7 @@ static void tcg_cpu_reset_hold(CPUState *cpu)
 {
 tcg_flush_jmp_cache(cpu);
 
-tlb_flush(cpu);
+tlb_flush_other_cpu(cpu);
 }
 
--8<---cut here---end--->8---

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



[PATCH v2 05/10] Revert "tcg/cputlb: remove other-cpu capability from TLB flushing"

2025-02-07 Thread Igor Mammedov
1)
This reverts commit 30933c4fb4f3df95ae44c4c3c86a5df049852c01.
  ("tcg/cputlb: remove other-cpu capability from TLB flushing")

The commit caused a regression which went unnoticed due to
affected being disabled by default (DEBUG_TLB_GATE 0)
Previous patch switched to using tcg_debug_assert() so that
at least on debug builds assert_cpu_is_self() path would be exercised.

And that lead to exposing regression introduced by [1] with abort during tests.
to reproduce:
  $ configure  --target-list=x86_64-softmmu --enable-debug
  $ make && ./qemu-system-x86_64

  accel/tcg/cputlb.c:419: tlb_flush_by_mmuidx:
Assertion `!(cpu)->created || qemu_cpu_is_self(cpu)' failed.

which is triggered by usage outside of cpu thread:
x86_cpu_new -> ... ->
  x86_cpu_realizefn -> cpu_reset -> ... ->
  tcg_cpu_reset_hold

Drop offending commit for now, until a propper fix that doesn't break
'make check' is available.

PS:
fixup g_memdup() checkpatch error s/g_memdup/g_memdup2/

Signed-off-by: Igor Mammedov 
---
I'll leave it upto TCG folz to fix it up propperly.

CC: [email protected]
CC: BALATON Zoltan 

---
 accel/tcg/cputlb.c | 42 +-
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 7380b29da3..3d1d7d2409 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -416,9 +416,12 @@ void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
 {
 tlb_debug("mmu_idx: 0x%" PRIx16 "\n", idxmap);
 
-assert_cpu_is_self(cpu);
-
-tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
+if (cpu->created && !qemu_cpu_is_self(cpu)) {
+async_run_on_cpu(cpu, tlb_flush_by_mmuidx_async_work,
+ RUN_ON_CPU_HOST_INT(idxmap));
+} else {
+tlb_flush_by_mmuidx_async_work(cpu, RUN_ON_CPU_HOST_INT(idxmap));
+}
 }
 
 void tlb_flush(CPUState *cpu)
@@ -607,12 +610,28 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, vaddr addr, 
uint16_t idxmap)
 {
 tlb_debug("addr: %016" VADDR_PRIx " mmu_idx:%" PRIx16 "\n", addr, idxmap);
 
-assert_cpu_is_self(cpu);
-
 /* This should already be page aligned */
 addr &= TARGET_PAGE_MASK;
 
-tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
+if (qemu_cpu_is_self(cpu)) {
+tlb_flush_page_by_mmuidx_async_0(cpu, addr, idxmap);
+} else if (idxmap < TARGET_PAGE_SIZE) {
+/*
+ * Most targets have only a few mmu_idx.  In the case where
+ * we can stuff idxmap into the low TARGET_PAGE_BITS, avoid
+ * allocating memory for this operation.
+ */
+async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_1,
+ RUN_ON_CPU_TARGET_PTR(addr | idxmap));
+} else {
+TLBFlushPageByMMUIdxData *d = g_new(TLBFlushPageByMMUIdxData, 1);
+
+/* Otherwise allocate a structure, freed by the worker.  */
+d->addr = addr;
+d->idxmap = idxmap;
+async_run_on_cpu(cpu, tlb_flush_page_by_mmuidx_async_2,
+ RUN_ON_CPU_HOST_PTR(d));
+}
 }
 
 void tlb_flush_page(CPUState *cpu, vaddr addr)
@@ -775,8 +794,6 @@ void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
 {
 TLBFlushRangeData d;
 
-assert_cpu_is_self(cpu);
-
 /*
  * If all bits are significant, and len is small,
  * this devolves to tlb_flush_page.
@@ -797,7 +814,14 @@ void tlb_flush_range_by_mmuidx(CPUState *cpu, vaddr addr,
 d.idxmap = idxmap;
 d.bits = bits;
 
-tlb_flush_range_by_mmuidx_async_0(cpu, d);
+if (qemu_cpu_is_self(cpu)) {
+tlb_flush_range_by_mmuidx_async_0(cpu, d);
+} else {
+/* Otherwise allocate a structure, freed by the worker.  */
+TLBFlushRangeData *p = g_memdup2(&d, sizeof(d));
+async_run_on_cpu(cpu, tlb_flush_range_by_mmuidx_async_1,
+ RUN_ON_CPU_HOST_PTR(p));
+}
 }
 
 void tlb_flush_page_bits_by_mmuidx(CPUState *cpu, vaddr addr,
-- 
2.43.0