Re: [PATCH v5] target/riscv: Implement dynamic establishment of custom decoder

2024-05-13 Thread Alistair Francis
On Mon, May 6, 2024 at 12:37 PM Huang Tao  wrote:
>
> In this patch, we modify the decoder to be a freely composable data
> structure instead of a hardcoded one. It can be dynamically builded up
> according to the extensions.
> This approach has several benefits:
> 1. Provides support for heterogeneous cpu architectures. As we add decoder in
>RISCVCPU, each cpu can have their own decoder, and the decoders can be
>different due to cpu's features.
> 2. Improve the decoding efficiency. We run the guard_func to see if the 
> decoder
>can be added to the dynamic_decoder when building up the decoder. 
> Therefore,
>there is no need to run the guard_func when decoding each instruction. It 
> can
>improve the decoding efficiency
> 3. For vendor or dynamic cpus, it allows them to customize their own decoder
>functions to improve decoding efficiency, especially when vendor-defined
>instruction sets increase. Because of dynamic building up, it can skip the 
> other
>decoder guard functions when decoding.
> 4. Pre patch for allowing adding a vendor decoder before decode_insn32() with 
> minimal
>overhead for users that don't need this particular vendor decoder.
>
> Signed-off-by: Huang Tao 
> Suggested-by: Christoph Muellner 
> Co-authored-by: LIU Zhiwei 
> Reviewed-by: Richard Henderson 
> Reviewed-by: Alistair Francis 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
> Changes in v5:
> - rebase on https://github.com/alistair23/qemu/tree/riscv-to-apply.next
>
> Changes in v4:
> - fix typo
> - rename function
> - add 'if tcg_enable()'
> - move function to tcg-cpu.c and declarations to tcg-cpu.h
>
> Changes in v3:
> - use GPtrArray to save decode function poionter list.
> ---
>  target/riscv/cpu.c |  1 +
>  target/riscv/cpu.h |  1 +
>  target/riscv/tcg/tcg-cpu.c | 15 +++
>  target/riscv/tcg/tcg-cpu.h | 15 +++
>  target/riscv/translate.c   | 31 +++
>  5 files changed, 47 insertions(+), 16 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index a74f0eb29c..2cb145121d 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1134,6 +1134,7 @@ void riscv_cpu_finalize_features(RISCVCPU *cpu, Error 
> **errp)
>  error_propagate(errp, local_err);
>  return;
>  }
> +riscv_tcg_cpu_finalize_dynamic_decoder(cpu);
>  } else if (kvm_enabled()) {
>  riscv_kvm_cpu_finalize_features(cpu, _err);
>  if (local_err != NULL) {
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index e0dd1828b5..2838d9640b 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -455,6 +455,7 @@ struct ArchCPU {
>  uint32_t pmu_avail_ctrs;
>  /* Mapping of events to counters */
>  GHashTable *pmu_event_ctr_map;
> +const GPtrArray *decoders;
>  };
>
>  /**
> diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
> index 4ebebebe09..3a4ec3662a 100644
> --- a/target/riscv/tcg/tcg-cpu.c
> +++ b/target/riscv/tcg/tcg-cpu.c
> @@ -863,6 +863,21 @@ void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, 
> Error **errp)
>  }
>  }
>
> +void riscv_tcg_cpu_finalize_dynamic_decoder(RISCVCPU *cpu)
> +{
> +GPtrArray *dynamic_decoders;
> +dynamic_decoders = g_ptr_array_sized_new(decoder_table_size);
> +for (size_t i = 0; i < decoder_table_size; ++i) {
> +if (decoder_table[i].guard_func &&
> +decoder_table[i].guard_func(>cfg)) {
> +g_ptr_array_add(dynamic_decoders,
> +(gpointer)decoder_table[i].riscv_cpu_decode_fn);
> +}
> +}
> +
> +cpu->decoders = dynamic_decoders;
> +}
> +
>  bool riscv_cpu_tcg_compatible(RISCVCPU *cpu)
>  {
>  return object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST) == NULL;
> diff --git a/target/riscv/tcg/tcg-cpu.h b/target/riscv/tcg/tcg-cpu.h
> index f7b32417f8..ce94253fe4 100644
> --- a/target/riscv/tcg/tcg-cpu.h
> +++ b/target/riscv/tcg/tcg-cpu.h
> @@ -26,4 +26,19 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, 
> Error **errp);
>  void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp);
>  bool riscv_cpu_tcg_compatible(RISCVCPU *cpu);
>
> +struct DisasContext;
> +struct RISCVCPUConfig;
> +typedef struct RISCVDecoder {
> +bool (*guard_func)(const struct RISCVCPUConfig *);
> +bool (*riscv_cpu_decode_fn)(struct DisasContext *, uint32_t);
> +} RISCVDecoder;
> +
> +typedef bool (*riscv_cpu_decode_fn)(struct DisasContext *, uint32_t);
> +
> +extern const size_t decoder_table_size;
> +
> +extern const RISCVDecoder decoder_table[];
> +
> +void riscv_tcg_cpu_finalize_dynamic_decoder(RISCVCPU *cpu);
> +
>  #endif
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 9ff09ebdb6..15e7123a68 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -37,6 +37,8 @@
>  #include "exec/helper-info.c.inc"
>  #undef  HELPER_H
>
> +#include "tcg/tcg-cpu.h"
> +
>  

[PATCH v5] target/riscv: Implement dynamic establishment of custom decoder

2024-05-05 Thread Huang Tao
In this patch, we modify the decoder to be a freely composable data
structure instead of a hardcoded one. It can be dynamically builded up
according to the extensions.
This approach has several benefits:
1. Provides support for heterogeneous cpu architectures. As we add decoder in
   RISCVCPU, each cpu can have their own decoder, and the decoders can be
   different due to cpu's features.
2. Improve the decoding efficiency. We run the guard_func to see if the decoder
   can be added to the dynamic_decoder when building up the decoder. Therefore,
   there is no need to run the guard_func when decoding each instruction. It can
   improve the decoding efficiency
3. For vendor or dynamic cpus, it allows them to customize their own decoder
   functions to improve decoding efficiency, especially when vendor-defined
   instruction sets increase. Because of dynamic building up, it can skip the 
other
   decoder guard functions when decoding.
4. Pre patch for allowing adding a vendor decoder before decode_insn32() with 
minimal
   overhead for users that don't need this particular vendor decoder.

Signed-off-by: Huang Tao 
Suggested-by: Christoph Muellner 
Co-authored-by: LIU Zhiwei 
Reviewed-by: Richard Henderson 
Reviewed-by: Alistair Francis 
---
Changes in v5:
- rebase on https://github.com/alistair23/qemu/tree/riscv-to-apply.next

Changes in v4:
- fix typo
- rename function
- add 'if tcg_enable()'
- move function to tcg-cpu.c and declarations to tcg-cpu.h

Changes in v3:
- use GPtrArray to save decode function poionter list.
---
 target/riscv/cpu.c |  1 +
 target/riscv/cpu.h |  1 +
 target/riscv/tcg/tcg-cpu.c | 15 +++
 target/riscv/tcg/tcg-cpu.h | 15 +++
 target/riscv/translate.c   | 31 +++
 5 files changed, 47 insertions(+), 16 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index a74f0eb29c..2cb145121d 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -1134,6 +1134,7 @@ void riscv_cpu_finalize_features(RISCVCPU *cpu, Error 
**errp)
 error_propagate(errp, local_err);
 return;
 }
+riscv_tcg_cpu_finalize_dynamic_decoder(cpu);
 } else if (kvm_enabled()) {
 riscv_kvm_cpu_finalize_features(cpu, _err);
 if (local_err != NULL) {
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index e0dd1828b5..2838d9640b 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -455,6 +455,7 @@ struct ArchCPU {
 uint32_t pmu_avail_ctrs;
 /* Mapping of events to counters */
 GHashTable *pmu_event_ctr_map;
+const GPtrArray *decoders;
 };
 
 /**
diff --git a/target/riscv/tcg/tcg-cpu.c b/target/riscv/tcg/tcg-cpu.c
index 4ebebebe09..3a4ec3662a 100644
--- a/target/riscv/tcg/tcg-cpu.c
+++ b/target/riscv/tcg/tcg-cpu.c
@@ -863,6 +863,21 @@ void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error 
**errp)
 }
 }
 
+void riscv_tcg_cpu_finalize_dynamic_decoder(RISCVCPU *cpu)
+{
+GPtrArray *dynamic_decoders;
+dynamic_decoders = g_ptr_array_sized_new(decoder_table_size);
+for (size_t i = 0; i < decoder_table_size; ++i) {
+if (decoder_table[i].guard_func &&
+decoder_table[i].guard_func(>cfg)) {
+g_ptr_array_add(dynamic_decoders,
+(gpointer)decoder_table[i].riscv_cpu_decode_fn);
+}
+}
+
+cpu->decoders = dynamic_decoders;
+}
+
 bool riscv_cpu_tcg_compatible(RISCVCPU *cpu)
 {
 return object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_CPU_HOST) == NULL;
diff --git a/target/riscv/tcg/tcg-cpu.h b/target/riscv/tcg/tcg-cpu.h
index f7b32417f8..ce94253fe4 100644
--- a/target/riscv/tcg/tcg-cpu.h
+++ b/target/riscv/tcg/tcg-cpu.h
@@ -26,4 +26,19 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error 
**errp);
 void riscv_tcg_cpu_finalize_features(RISCVCPU *cpu, Error **errp);
 bool riscv_cpu_tcg_compatible(RISCVCPU *cpu);
 
+struct DisasContext;
+struct RISCVCPUConfig;
+typedef struct RISCVDecoder {
+bool (*guard_func)(const struct RISCVCPUConfig *);
+bool (*riscv_cpu_decode_fn)(struct DisasContext *, uint32_t);
+} RISCVDecoder;
+
+typedef bool (*riscv_cpu_decode_fn)(struct DisasContext *, uint32_t);
+
+extern const size_t decoder_table_size;
+
+extern const RISCVDecoder decoder_table[];
+
+void riscv_tcg_cpu_finalize_dynamic_decoder(RISCVCPU *cpu);
+
 #endif
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 9ff09ebdb6..15e7123a68 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -37,6 +37,8 @@
 #include "exec/helper-info.c.inc"
 #undef  HELPER_H
 
+#include "tcg/tcg-cpu.h"
+
 /* global register indices */
 static TCGv cpu_gpr[32], cpu_gprh[32], cpu_pc, cpu_vl, cpu_vstart;
 static TCGv_i64 cpu_fpr[32]; /* assume F and D extensions */
@@ -116,6 +118,7 @@ typedef struct DisasContext {
 /* FRM is known to contain a valid value. */
 bool frm_valid;
 bool insn_start_updated;
+const GPtrArray *decoders;
 } DisasContext;