Re: [PATCH v2 41/45] target/hppa: Implement CF_PCREL

2024-05-14 Thread Helge Deller
* Richard Henderson :
> Now that the groundwork has been laid, enabling CF_PCREL within the
> translator proper is a simple matter of updating copy_iaoq_entry
> and install_iaq_entries.
> 
> We also need to modify the unwind info, since we no longer have
> absolute addresses to install.
> 
> As expected, this reduces the runtime overhead of compilation when
> running a Linux kernel with address space randomization enabled.

Ah! I was wondering why you tried to convert to CF_PCREL at all.
So, that's the overall reason.

> Signed-off-by: Richard Henderson 

Reviewed-by: Helge Deller 



[PATCH v2 41/45] target/hppa: Implement CF_PCREL

2024-05-13 Thread Richard Henderson
Now that the groundwork has been laid, enabling CF_PCREL within the
translator proper is a simple matter of updating copy_iaoq_entry
and install_iaq_entries.

We also need to modify the unwind info, since we no longer have
absolute addresses to install.

As expected, this reduces the runtime overhead of compilation when
running a Linux kernel with address space randomization enabled.

Signed-off-by: Richard Henderson 
---
 target/hppa/cpu.c   | 19 ++--
 target/hppa/translate.c | 68 -
 2 files changed, 55 insertions(+), 32 deletions(-)

diff --git a/target/hppa/cpu.c b/target/hppa/cpu.c
index 5f0df0697a..f0507874ce 100644
--- a/target/hppa/cpu.c
+++ b/target/hppa/cpu.c
@@ -62,10 +62,6 @@ void cpu_get_tb_cpu_state(CPUHPPAState *env, vaddr *pc,
 *pc = hppa_cpu_get_pc(env_cpu(env));
 flags |= (env->iaoq_f & 3) << TB_FLAG_PRIV_SHIFT;
 
-if (hppa_is_pa20(env)) {
-cs_base = env->iaoq_f & MAKE_64BIT_MASK(32, 32);
-}
-
 /*
  * The only really interesting case is if IAQ_Back is on the same page
  * as IAQ_Front, so that we can use goto_tb between the blocks.  In all
@@ -113,19 +109,19 @@ static void hppa_restore_state_to_opc(CPUState *cs,
   const TranslationBlock *tb,
   const uint64_t *data)
 {
-HPPACPU *cpu = HPPA_CPU(cs);
+CPUHPPAState *env = cpu_env(cs);
 
-cpu->env.iaoq_f = data[0];
-if (data[1] != (target_ulong)-1) {
-cpu->env.iaoq_b = data[1];
+env->iaoq_f = (env->iaoq_f & TARGET_PAGE_MASK) | data[0];
+if (data[1] != INT32_MIN) {
+env->iaoq_b = env->iaoq_f + data[1];
 }
-cpu->env.unwind_breg = data[2];
+env->unwind_breg = data[2];
 /*
  * Since we were executing the instruction at IAOQ_F, and took some
  * sort of action that provoked the cpu_restore_state, we can infer
  * that the instruction was not nullified.
  */
-cpu->env.psw_n = 0;
+env->psw_n = 0;
 }
 
 static bool hppa_cpu_has_work(CPUState *cs)
@@ -191,6 +187,9 @@ static void hppa_cpu_realizefn(DeviceState *dev, Error 
**errp)
 hppa_ptlbe(>env);
 }
 #endif
+
+/* Use pc-relative instructions always to simplify the translator. */
+tcg_cflags_set(cs, CF_PCREL);
 }
 
 static void hppa_cpu_initfn(Object *obj)
diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index fa79116d5b..79e29d722f 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -47,7 +47,7 @@ typedef struct DisasIAQE {
 TCGv_i64 space;
 /* IAOQ base; may be null for relative address. */
 TCGv_i64 base;
-/* IAOQ addend; if base is null, relative to ctx->iaoq_first. */
+/* IAOQ addend; if base is null, relative to cpu_iaoq_f. */
 int64_t disp;
 } DisasIAQE;
 
@@ -664,11 +664,7 @@ static DisasIAQE iaqe_next_absv(DisasContext *ctx, 
TCGv_i64 var)
 static void copy_iaoq_entry(DisasContext *ctx, TCGv_i64 dest,
 const DisasIAQE *src)
 {
-if (src->base == NULL) {
-tcg_gen_movi_i64(dest, ctx->iaoq_first + src->disp);
-} else {
-tcg_gen_addi_i64(dest, src->base, src->disp);
-}
+tcg_gen_addi_i64(dest, src->base ? : cpu_iaoq_f, src->disp);
 }
 
 static void install_iaq_entries(DisasContext *ctx, const DisasIAQE *f,
@@ -680,8 +676,28 @@ static void install_iaq_entries(DisasContext *ctx, const 
DisasIAQE *f,
 b_next = iaqe_incr(f, 4);
 b = _next;
 }
-copy_iaoq_entry(ctx, cpu_iaoq_f, f);
-copy_iaoq_entry(ctx, cpu_iaoq_b, b);
+
+/*
+ * There is an edge case
+ *bv   r0(rN)
+ *b,l  disp,r0
+ * for which F will use cpu_iaoq_b (from the indirect branch),
+ * and B will use cpu_iaoq_f (from the direct branch).
+ * In this case we need an extra temporary.
+ */
+if (f->base != cpu_iaoq_b) {
+copy_iaoq_entry(ctx, cpu_iaoq_b, b);
+copy_iaoq_entry(ctx, cpu_iaoq_f, f);
+} else if (f->base == b->base) {
+copy_iaoq_entry(ctx, cpu_iaoq_f, f);
+tcg_gen_addi_i64(cpu_iaoq_b, cpu_iaoq_f, b->disp - f->disp);
+} else {
+TCGv_i64 tmp = tcg_temp_new_i64();
+copy_iaoq_entry(ctx, tmp, b);
+copy_iaoq_entry(ctx, cpu_iaoq_f, f);
+tcg_gen_mov_i64(cpu_iaoq_b, tmp);
+}
+
 if (f->space) {
 tcg_gen_mov_i64(cpu_iasq_f, f->space);
 }
@@ -3979,9 +3995,8 @@ static bool trans_b_gate(DisasContext *ctx, arg_b_gate *a)
 /* Adjust the dest offset for the privilege change from the PTE. */
 TCGv_i64 off = tcg_temp_new_i64();
 
-gen_helper_b_gate_priv(off, tcg_env,
-   tcg_constant_i64(ctx->iaoq_first
-+ ctx->iaq_f.disp));
+copy_iaoq_entry(ctx, off, >iaq_f);
+gen_helper_b_gate_priv(off, tcg_env, off);
 
 ctx->iaq_j.base = off;
 ctx->iaq_j.disp = disp + 8;
@@ -4602,7 +4617,7 @@ static bool