Use the new incoherent icache (incoherent TB) feature in the ppc target.

Performance problems with notdirty write accesses have been encountered
in two places now. One is where a large number of executable pages have
been freed (typically in KVM when a guest exits) and are being cleared
for reuse, most stores in a page will take the notdirty slowpath, which
can cause such s slowdown that the OS reports lockups. The other case is
PowerVM boot firmware which has real-mode interrupt handler code that
stores to memory in the same page-sized region as interrupt handler code
which causes significant slowdowns.

ppc implements TARGET_HAS_LAZY_ICACHE by calling tb_flush_incoherent()
from the ICBI instruction, which should conform to the ISA's CMODX (aka
SMC) requirement.
---
 target/ppc/cpu.h        | 16 ++++++++++++++++
 target/ppc/mem_helper.c |  2 ++
 target/ppc/translate.c  |  1 +
 3 files changed, 19 insertions(+)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 74ed28c8dac..de274d29637 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -34,6 +34,22 @@
 #define TARGET_PAGE_BITS_64K 16
 #define TARGET_PAGE_BITS_16M 24
 
+/* icaches are not kept coherent with dcaches. target is to call
+ * tb_flush_incoherent() to bring them into coherency */
+#define TARGET_HAS_LAZY_ICACHE
+/*
+ * Note that this does not model implementation specific behaviour of all
+ * CPUs, notably recent Power CPUs do keep i/d coherent, and only require
+ * context synchronization after code modification to ensure CPU pipeline
+ * is coherent. The ISA and User Manuals do say that icbi (to any address) ;
+ * isync should be used even for these CPUs, so tb_flush_incoherent() in
+ * icbi should work reasonably. The ppc target should continue to work without
+ * TARGET_HAS_LAZY_ICACHE, but some performance corner cases benefit (e.g.,
+ * KVM when clearing a lot of memory freed from a guest that has a lot of exec
+ * pages; PowerVM PFW/boot firmware that stores to globals in the same page as
+ * it executes from).
+ */
+
 #if defined(TARGET_PPC64)
 #define PPC_ELF_MACHINE     EM_PPC64
 #else
diff --git a/target/ppc/mem_helper.c b/target/ppc/mem_helper.c
index 51b137febd6..647d37195dd 100644
--- a/target/ppc/mem_helper.c
+++ b/target/ppc/mem_helper.c
@@ -24,6 +24,7 @@
 #include "exec/helper-proto.h"
 #include "helper_regs.h"
 #include "exec/cpu_ldst.h"
+#include "exec/tb-flush.h"
 #include "internal.h"
 #include "qemu/atomic128.h"
 
@@ -335,6 +336,7 @@ void helper_icbi(CPUPPCState *env, target_ulong addr)
      * do the load "by hand".
      */
     cpu_ldl_data_ra(env, addr, GETPC());
+    tb_flush_incoherent(env_cpu(env));
 }
 
 void helper_icbiep(CPUPPCState *env, target_ulong addr)
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 7f933537aaa..5e610bf29a5 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -4565,6 +4565,7 @@ static void gen_dss(DisasContext *ctx)
 static void gen_icbi(DisasContext *ctx)
 {
     TCGv t0;
+    translator_io_start(&ctx->base);
     gen_set_access_type(ctx, ACCESS_CACHE);
     t0 = tcg_temp_new();
     gen_addr_reg_index(ctx, t0);
-- 
2.47.1


Reply via email to