Re: [PATCH] target/riscv: Use check for relationship between Zdinx/Zhinx{min} and Zfinx

2023-04-10 Thread Alistair Francis
On Sun, Apr 9, 2023 at 12:00 AM Weiwei Li  wrote:
>
> Zdinx/Zhinx{min} require Zfinx. And require relationship is usually done
> by check currently.
>
> Signed-off-by: Weiwei Li 
> Signed-off-by: Junqiang Wang 

Acked-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 1a5099382c..35bee8ff42 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -930,8 +930,9 @@ static void riscv_cpu_validate_set_extensions(RISCVCPU 
> *cpu, Error **errp)
>  cpu->cfg.ext_zhinxmin = true;
>  }
>
> -if (cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinxmin) {
> -cpu->cfg.ext_zfinx = true;
> +if ((cpu->cfg.ext_zdinx || cpu->cfg.ext_zhinxmin) && 
> !cpu->cfg.ext_zfinx) {
> +error_setg(errp, "Zdinx/Zhinx/Zhinxmin extensions require Zfinx");
> +return;
>  }
>
>  if (cpu->cfg.ext_zfinx) {
> --
> 2.25.1
>
>



Re: [PATCH v6 00/25] target/riscv: MSTATUS_SUM + cleanups

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:58 PM Richard Henderson
 wrote:
>
> This builds on Fei and Zhiwei's SUM and TB_FLAGS changes.
>
>   * Reclaim 5 TB_FLAGS bits, since we nearly ran out.
>
>   * Using cpu_mmu_index(env, true) is insufficient to implement
> HLVX properly.  While that chooses the correct mmu_idx, it
> does not perform the read with execute permission.
> I add a new tcg interface to perform a read-for-execute with
> an arbitrary mmu_idx.  This is still not 100% compliant, but
> it's closer.
>
>   * Handle mstatus.MPV in cpu_mmu_index.
>   * Use vsstatus.SUM when required for MMUIdx_S_SUM.
>   * Cleanups for get_physical_address.
>
> While this passes check-avocado, I'm sure that's insufficient.
> Please have a close look.
>
>
> r~
>
>
> Fei Wu (2):
>   target/riscv: Separate priv from mmu_idx
>   target/riscv: Reduce overhead of MSTATUS_SUM change
>
> LIU Zhiwei (4):
>   target/riscv: Extract virt enabled state from tb flags
>   target/riscv: Add a general status enum for extensions
>   target/riscv: Encode the FS and VS on a normal way for tb flags
>   target/riscv: Add a tb flags field for vstart
>
> Richard Henderson (19):
>   target/riscv: Remove mstatus_hs_{fs,vs} from tb_flags
>   accel/tcg: Add cpu_ld*_code_mmu
>   target/riscv: Use cpu_ld*_code_mmu for HLVX
>   target/riscv: Handle HLV, HSV via helpers
>   target/riscv: Rename MMU_HYP_ACCESS_BIT to MMU_2STAGE_BIT
>   target/riscv: Introduce mmuidx_sum
>   target/riscv: Introduce mmuidx_priv
>   target/riscv: Introduce mmuidx_2stage
>   target/riscv: Move hstatus.spvp check to check_access_hlsv
>   target/riscv: Set MMU_2STAGE_BIT in riscv_cpu_mmu_index
>   target/riscv: Check SUM in the correct register
>   target/riscv: Hoist second stage mode change to callers
>   target/riscv: Hoist pbmte and hade out of the level loop
>   target/riscv: Move leaf pte processing out of level loop
>   target/riscv: Suppress pte update with is_debug
>   target/riscv: Don't modify SUM with is_debug
>   target/riscv: Merge checks for reserved pte flags
>   target/riscv: Reorg access check in get_physical_address
>   target/riscv: Reorg sum check in get_physical_address

Thanks for the patches!

This has been reviewed and tested. Do you mind sending a v7 rebased on
https://github.com/alistair23/qemu/tree/riscv-to-apply.next?

Alistair

>
>  include/exec/cpu_ldst.h   |   9 +
>  target/riscv/cpu.h|  47 ++-
>  target/riscv/cpu_bits.h   |  12 +-
>  target/riscv/helper.h |  12 +-
>  target/riscv/internals.h  |  35 ++
>  accel/tcg/cputlb.c|  48 +++
>  accel/tcg/user-exec.c |  58 +++
>  target/riscv/cpu.c|   2 +-
>  target/riscv/cpu_helper.c | 393 +-
>  target/riscv/csr.c|  21 +-
>  target/riscv/op_helper.c  | 113 -
>  target/riscv/translate.c  |  72 ++--
>  .../riscv/insn_trans/trans_privileged.c.inc   |   2 +-
>  target/riscv/insn_trans/trans_rvf.c.inc   |   2 +-
>  target/riscv/insn_trans/trans_rvh.c.inc   | 135 +++---
>  target/riscv/insn_trans/trans_rvv.c.inc   |  22 +-
>  target/riscv/insn_trans/trans_xthead.c.inc|   7 +-
>  17 files changed, 595 insertions(+), 395 deletions(-)
>
> --
> 2.34.1
>
>



Re: [PATCH v6 25/25] target/riscv: Reorg sum check in get_physical_address

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 10:56 PM Richard Henderson
 wrote:
>
> Implement this by adjusting prot, which reduces the set of
> checks required.  This prevents exec to be set for U pages
> in MMUIdx_S_SUM.  While it had been technically incorrect,
> it did not manifest as a bug, because we will never attempt
> to execute from MMUIdx_S_SUM.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 22 +++---
>  1 file changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 725ca45106..7336d1273b 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -800,7 +800,7 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>  *ret_prot = 0;
>
>  hwaddr base;
> -int levels, ptidxbits, ptesize, vm, sum, widened;
> +int levels, ptidxbits, ptesize, vm, widened;
>
>  if (first_stage == true) {
>  if (use_background) {
> @@ -831,7 +831,7 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>  }
>  widened = 2;
>  }
> -sum = mmuidx_sum(mmu_idx);
> +
>  switch (vm) {
>  case VM_1_10_SV32:
>levels = 2; ptidxbits = 10; ptesize = 4; break;
> @@ -999,15 +999,15 @@ restart:
>  prot |= PAGE_EXEC;
>  }
>
> -if ((pte & PTE_U) &&
> -((mode != PRV_U) && (!sum || access_type == MMU_INST_FETCH))) {
> -/*
> - * User PTE flags when not U mode and mstatus.SUM is not set,
> - * or the access type is an instruction fetch.
> - */
> -return TRANSLATE_FAIL;
> -}
> -if (!(pte & PTE_U) && (mode != PRV_S)) {
> +if (pte & PTE_U) {
> +if (mode != PRV_U) {
> +if (!mmuidx_sum(mmu_idx)) {
> +return TRANSLATE_FAIL;
> +}
> +/* SUM allows only read+write, not execute. */
> +prot &= PAGE_READ | PAGE_WRITE;
> +}
> +} else if (mode != PRV_S) {
>  /* Supervisor PTE flags when not S mode */
>  return TRANSLATE_FAIL;
>  }
> --
> 2.34.1
>
>



Re: [PATCH v3 0/3] target/riscv: Fix mstatus.MPP related support

2023-04-10 Thread Alistair Francis
On Fri, Apr 7, 2023 at 11:49 AM Weiwei Li  wrote:
>
> This patchset tries to fix some problems in current implementation for 
> mstatus.MPP
>
> The port is available here:
> https://github.com/plctlab/plct-qemu/tree/plct-mpp-fix-v3
>
> v3:
> * add patch 2 to remove PRV_H, and use PRV_RESERVED instead in some cases
> * improve legalize_mpp and assert error message in patch 3
>
> v2:
> * Modify commit message and add comment to specify MPP field becomes a WARL 
> field since priv version 1.11 in patch 2
> * rebase on riscv-to-apply.next
>
> Weiwei Li (3):
>   target/riscv: Fix the mstatus.MPP value after executing MRET
>   target/riscv: Use PRV_RESERVED instead of PRV_H
>   target/riscv: Legalize MPP value in write_mstatus

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  target/riscv/cpu.h|  2 +-
>  target/riscv/cpu_bits.h   |  2 +-
>  target/riscv/cpu_helper.c |  8 ++--
>  target/riscv/csr.c| 32 
>  target/riscv/gdbstub.c|  2 +-
>  target/riscv/op_helper.c  |  5 +++--
>  6 files changed, 40 insertions(+), 11 deletions(-)
>
> --
> 2.25.1
>
>



Re: [PATCH v6 24/25] target/riscv: Reorg access check in get_physical_address

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:51 PM Richard Henderson
 wrote:
>
> We were effectively computing the protection bits twice,
> once while performing access checks and once while returning
> the valid bits to the caller.  Reorg so we do this once.
>
> Move the computation of mxr close to its single use.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 69 ---
>  1 file changed, 36 insertions(+), 33 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 82a7c5f9dd..725ca45106 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -762,7 +762,7 @@ static int get_physical_address_pmp(CPURISCVState *env, 
> int *prot,
>   * @is_debug: Is this access from a debugger or the monitor?
>   */
>  static int get_physical_address(CPURISCVState *env, hwaddr *physical,
> -int *prot, target_ulong addr,
> +int *ret_prot, target_ulong addr,
>  target_ulong *fault_pte_addr,
>  int access_type, int mmu_idx,
>  bool first_stage, bool two_stage,
> @@ -793,20 +793,14 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>
>  if (mode == PRV_M || !riscv_cpu_cfg(env)->mmu) {
>  *physical = addr;
> -*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
> +*ret_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
>  return TRANSLATE_SUCCESS;
>  }
>
> -*prot = 0;
> +*ret_prot = 0;
>
>  hwaddr base;
> -int levels, ptidxbits, ptesize, vm, sum, mxr, widened;
> -
> -if (first_stage == true) {
> -mxr = get_field(env->mstatus, MSTATUS_MXR);
> -} else {
> -mxr = get_field(env->vsstatus, MSTATUS_MXR);
> -}
> +int levels, ptidxbits, ptesize, vm, sum, widened;
>
>  if (first_stage == true) {
>  if (use_background) {
> @@ -849,7 +843,7 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>levels = 5; ptidxbits = 9; ptesize = 8; break;
>  case VM_1_10_MBARE:
>  *physical = addr;
> -*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
> +*ret_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
>  return TRANSLATE_SUCCESS;
>  default:
>g_assert_not_reached();
> @@ -984,6 +978,27 @@ restart:
>  return TRANSLATE_FAIL;
>  }
>
> +int prot = 0;
> +if (pte & PTE_R) {
> +prot |= PAGE_READ;
> +}
> +if (pte & PTE_W) {
> +prot |= PAGE_WRITE;
> +}
> +if (pte & PTE_X) {
> +bool mxr;
> +
> +if (first_stage == true) {
> +mxr = get_field(env->mstatus, MSTATUS_MXR);
> +} else {
> +mxr = get_field(env->vsstatus, MSTATUS_MXR);
> +}
> +if (mxr) {
> +prot |= PAGE_READ;
> +}
> +prot |= PAGE_EXEC;
> +}
> +
>  if ((pte & PTE_U) &&
>  ((mode != PRV_U) && (!sum || access_type == MMU_INST_FETCH))) {
>  /*
> @@ -996,17 +1011,9 @@ restart:
>  /* Supervisor PTE flags when not S mode */
>  return TRANSLATE_FAIL;
>  }
> -if (access_type == MMU_DATA_LOAD &&
> -!((pte & PTE_R) || ((pte & PTE_X) && mxr))) {
> -/* Read access check failed */
> -return TRANSLATE_FAIL;
> -}
> -if (access_type == MMU_DATA_STORE && !(pte & PTE_W)) {
> -/* Write access check failed */
> -return TRANSLATE_FAIL;
> -}
> -if (access_type == MMU_INST_FETCH && !(pte & PTE_X)) {
> -/* Fetch access check failed */
> +
> +if (!((prot >> access_type) & 1)) {
> +/* Access check failed */
>  return TRANSLATE_FAIL;
>  }
>
> @@ -1071,20 +1078,16 @@ restart:
>(vpn & (((target_ulong)1 << ptshift) - 1))
>   ) << PGSHIFT) | (addr & ~TARGET_PAGE_MASK);
>
> -/* set permissions on the TLB entry */
> -if ((pte & PTE_R) || ((pte & PTE_X) && mxr)) {
> -*prot |= PAGE_READ;
> -}
> -if (pte & PTE_X) {
> -*prot |= PAGE_EXEC;
> -}
>  /*
> - * Add write permission on stores or if the page is already dirty,
> - * so that we TLB miss on later writes to update the dirty bit.
> + * Remove write permission unless this is a store, or the page is
> + * already dirty, so that we TLB miss on later writes to update
> + * the dirty bit.
>   */
> -if ((pte & PTE_W) && (access_type == MMU_DATA_STORE || (pte & PTE_D))) {
> -*prot |= PAGE_WRITE;
> +if (access_type != MMU_DATA_STORE && !(pte & PTE_D)) {
> +prot &= ~PAGE_WRITE;
>  }
> +*ret_prot = prot;
> +
>  return TRANSLATE_SUCCESS;
>  }
>
> --
> 2.34.1
>
>



Re: [PATCH v6 23/25] target/riscv: Merge checks for reserved pte flags

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 10:35 PM Richard Henderson
 wrote:
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 850817edfd..82a7c5f9dd 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -976,14 +976,14 @@ restart:
>  /* Reserved without Svpbmt. */
>  return TRANSLATE_FAIL;
>  }
> -if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
> -/* Reserved leaf PTE flags: PTE_W */
> -return TRANSLATE_FAIL;
> -}
> -if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
> -/* Reserved leaf PTE flags: PTE_W + PTE_X */
> +
> +/* Check for reserved combinations of RWX flags. */
> +switch (pte & (PTE_R | PTE_W | PTE_X)) {
> +case PTE_W:
> +case PTE_W | PTE_X:
>  return TRANSLATE_FAIL;
>  }
> +
>  if ((pte & PTE_U) &&
>  ((mode != PRV_U) && (!sum || access_type == MMU_INST_FETCH))) {
>  /*
> --
> 2.34.1
>
>



Re: [PATCH v6 21/25] target/riscv: Suppress pte update with is_debug

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 10:56 PM Richard Henderson
 wrote:
>
> The debugger should not modify PTE_A or PTE_D.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index ce12dcec1d..b26840e46c 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -1015,7 +1015,7 @@ restart:
>  (access_type == MMU_DATA_STORE ? PTE_D : 0);
>
>  /* Page table updates need to be atomic with MTTCG enabled */
> -if (updated_pte != pte) {
> +if (updated_pte != pte && !is_debug) {
>  if (!hade) {
>  return TRANSLATE_FAIL;
>  }
> --
> 2.34.1
>
>



Re: [PATCH v6 22/25] target/riscv: Don't modify SUM with is_debug

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:55 PM Richard Henderson
 wrote:
>
> If we want to give the debugger a greater view of memory than
> the cpu, we should simply disable the access check entirely,
> not simply for this one corner case.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index b26840e46c..850817edfd 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -837,7 +837,7 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>  }
>  widened = 2;
>  }
> -sum = mmuidx_sum(mmu_idx) || is_debug;
> +sum = mmuidx_sum(mmu_idx);
>  switch (vm) {
>  case VM_1_10_SV32:
>levels = 2; ptidxbits = 10; ptesize = 4; break;
> --
> 2.34.1
>
>



Re: [PATCH v6 20/25] target/riscv: Move leaf pte processing out of level loop

2023-04-10 Thread Alistair Francis
On Sun, Mar 26, 2023 at 2:03 AM Richard Henderson
 wrote:
>
> Move the code that never loops outside of the loop.
> Unchain the if-return-else statements.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 234 +-
>  1 file changed, 127 insertions(+), 107 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 00f70a3dd5..ce12dcec1d 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -879,6 +879,8 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>  }
>
>  int ptshift = (levels - 1) * ptidxbits;
> +target_ulong pte;
> +hwaddr pte_addr;
>  int i;
>
>  #if !TCG_OVERSIZED_GUEST
> @@ -895,7 +897,6 @@ restart:
>  }
>
>  /* check that physical address of PTE is legal */
> -hwaddr pte_addr;
>
>  if (two_stage && first_stage) {
>  int vbase_prot;
> @@ -927,7 +928,6 @@ restart:
>  return TRANSLATE_PMP_FAIL;
>  }
>
> -target_ulong pte;
>  if (riscv_cpu_mxl(env) == MXL_RV32) {
>  pte = address_space_ldl(cs->as, pte_addr, attrs, );
>  } else {
> @@ -952,120 +952,140 @@ restart:
>  if (!(pte & PTE_V)) {
>  /* Invalid PTE */
>  return TRANSLATE_FAIL;
> -} else if (!pbmte && (pte & PTE_PBMT)) {
> +}
> +if (pte & (PTE_R | PTE_W | PTE_X)) {
> +goto leaf;
> +}
> +
> +/* Inner PTE, continue walking */
> +if (pte & (PTE_D | PTE_A | PTE_U | PTE_ATTR)) {
>  return TRANSLATE_FAIL;
> -} else if (!(pte & (PTE_R | PTE_W | PTE_X))) {
> -/* Inner PTE, continue walking */
> -if (pte & (PTE_D | PTE_A | PTE_U | PTE_ATTR)) {
> -return TRANSLATE_FAIL;
> -}
> -base = ppn << PGSHIFT;
> -} else if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
> -/* Reserved leaf PTE flags: PTE_W */
> -return TRANSLATE_FAIL;
> -} else if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
> -/* Reserved leaf PTE flags: PTE_W + PTE_X */
> -return TRANSLATE_FAIL;
> -} else if ((pte & PTE_U) && ((mode != PRV_U) &&
> -   (!sum || access_type == MMU_INST_FETCH))) {
> -/* User PTE flags when not U mode and mstatus.SUM is not set,
> -   or the access type is an instruction fetch */
> -return TRANSLATE_FAIL;
> -} else if (!(pte & PTE_U) && (mode != PRV_S)) {
> -/* Supervisor PTE flags when not S mode */
> -return TRANSLATE_FAIL;
> -} else if (ppn & ((1ULL << ptshift) - 1)) {
> -/* Misaligned PPN */
> -return TRANSLATE_FAIL;
> -} else if (access_type == MMU_DATA_LOAD && !((pte & PTE_R) ||
> -   ((pte & PTE_X) && mxr))) {
> -/* Read access check failed */
> -return TRANSLATE_FAIL;
> -} else if (access_type == MMU_DATA_STORE && !(pte & PTE_W)) {
> -/* Write access check failed */
> -return TRANSLATE_FAIL;
> -} else if (access_type == MMU_INST_FETCH && !(pte & PTE_X)) {
> -/* Fetch access check failed */
> -return TRANSLATE_FAIL;
> -} else {
> -/* if necessary, set accessed and dirty bits. */
> -target_ulong updated_pte = pte | PTE_A |
> +}
> +base = ppn << PGSHIFT;
> +}
> +
> +/* No leaf pte at any translation level. */
> +return TRANSLATE_FAIL;
> +
> + leaf:
> +if (ppn & ((1ULL << ptshift) - 1)) {
> +/* Misaligned PPN */
> +return TRANSLATE_FAIL;
> +}
> +if (!pbmte && (pte & PTE_PBMT)) {
> +/* Reserved without Svpbmt. */
> +return TRANSLATE_FAIL;
> +}
> +if ((pte & (PTE_R | PTE_W | PTE_X)) == PTE_W) {
> +/* Reserved leaf PTE flags: PTE_W */
> +return TRANSLATE_FAIL;
> +}
> +if ((pte & (PTE_R | PTE_W | PTE_X)) == (PTE_W | PTE_X)) {
> +/* Reserved leaf PTE flags: PTE_W + PTE_X */
> +return TRANSLATE_FAIL;
> +}
> +if ((pte & PTE_U) &&
> +((mode != PRV_U) && (!sum || access_type == MMU_INST_FETCH))) {
> +/*
> + * User PTE flags when not U mode and mstatus.SUM is not set,
> + * or the access type is an instruction fetch.
> + */
> +return TRANSLATE_FAIL;
> +}
> +if (!(pte & PTE_U) && (mode != PRV_S)) {
> +/* Supervisor PTE flags when not S mode */
> +return TRANSLATE_FAIL;
> +}
> +if (access_type == MMU_DATA_LOAD &&
> +!((pte & PTE_R) || ((pte & PTE_X) && mxr))) {
> +/* Read access check failed */
> +return TRANSLATE_FAIL;
> +}
> +if (access_type == MMU_DATA_STORE && !(pte & PTE_W)) {
> +/* Write access check failed */
> +   

Re: [PATCH v6 19/25] target/riscv: Hoist pbmte and hade out of the level loop

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:56 PM Richard Henderson
 wrote:
>
> These values are constant for every level of pte lookup.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 833ea6d3fa..00f70a3dd5 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -870,6 +870,14 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>  return TRANSLATE_FAIL;
>  }
>
> +bool pbmte = env->menvcfg & MENVCFG_PBMTE;
> +bool hade = env->menvcfg & MENVCFG_HADE;
> +
> +if (first_stage && two_stage && riscv_cpu_virt_enabled(env)) {
> +pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE);
> +hade = hade && (env->henvcfg & HENVCFG_HADE);
> +}
> +
>  int ptshift = (levels - 1) * ptidxbits;
>  int i;
>
> @@ -930,14 +938,6 @@ restart:
>  return TRANSLATE_FAIL;
>  }
>
> -bool pbmte = env->menvcfg & MENVCFG_PBMTE;
> -bool hade = env->menvcfg & MENVCFG_HADE;
> -
> -if (first_stage && two_stage && riscv_cpu_virt_enabled(env)) {
> -pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE);
> -hade = hade && (env->henvcfg & HENVCFG_HADE);
> -}
> -
>  if (riscv_cpu_sxl(env) == MXL_RV32) {
>  ppn = pte >> PTE_PPN_SHIFT;
>  } else if (pbmte || cpu->cfg.ext_svnapot) {
> --
> 2.34.1
>
>



Re: [PATCH v6 18/25] target/riscv: Hoist second stage mode change to callers

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 10:29 PM Richard Henderson
 wrote:
>
> Move the check from the top of get_physical_address to
> the two callers, where passing mmu_idx makes no sense.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 10 ++
>  1 file changed, 2 insertions(+), 8 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 0017ecbf37..833ea6d3fa 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -791,12 +791,6 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>  use_background = true;
>  }
>
> -if (first_stage == false) {
> -/* We are in stage 2 translation, this is similar to stage 1. */
> -/* Stage 2 is always taken as U-mode */
> -mode = PRV_U;
> -}
> -
>  if (mode == PRV_M || !riscv_cpu_cfg(env)->mmu) {
>  *physical = addr;
>  *prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
> @@ -902,7 +896,7 @@ restart:
>  /* Do the second stage translation on the base PTE address. */
>  int vbase_ret = get_physical_address(env, , _prot,
>   base, NULL, MMU_DATA_LOAD,
> - mmu_idx, false, true,
> + MMUIdx_U, false, true,
>   is_debug);
>
>  if (vbase_ret != TRANSLATE_SUCCESS) {
> @@ -1274,7 +1268,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> int size,
>  im_address = pa;
>
>  ret = get_physical_address(env, , , im_address, NULL,
> -   access_type, mmu_idx, false, true,
> +   access_type, MMUIdx_U, false, true,
> false);
>
>  qemu_log_mask(CPU_LOG_MMU,
> --
> 2.34.1
>
>



Re: [PATCH v6 17/25] target/riscv: Check SUM in the correct register

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 10:42 PM Richard Henderson
 wrote:
>
> Table 9.5 "Effect of MPRV..." specifies that MPV=1 uses VS-level
> vsstatus.SUM instead of HS-level sstatus.SUM.
>
> For HLV/HSV instructions, the HS-level register does not apply, but
> the VS-level register presumably does, though this is not mentioned
> explicitly in the manual.  However, it matches the behavior for MPV.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 12 
>  target/riscv/op_helper.c  |  6 +-
>  2 files changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 6c42f9c6fd..0017ecbf37 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -42,11 +42,16 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
>
>  /* All priv -> mmu_idx mapping are here */
>  if (!ifetch) {
> -if (mode == PRV_M && get_field(env->mstatus, MSTATUS_MPRV)) {
> +uint64_t status = env->mstatus;
> +
> +if (mode == PRV_M && get_field(status, MSTATUS_MPRV)) {
>  mode = get_field(env->mstatus, MSTATUS_MPP);
>  virt = get_field(env->mstatus, MSTATUS_MPV);
> +if (virt) {
> +status = env->vsstatus;
> +}
>  }
> -if (mode == PRV_S && get_field(env->mstatus, MSTATUS_SUM)) {
> +if (mode == PRV_S && get_field(status, MSTATUS_SUM)) {
>  mode = MMUIdx_S_SUM;
>  }
>  }
> @@ -838,8 +843,7 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>  }
>  widened = 2;
>  }
> -/* status.SUM will be ignored if execute on background */
> -sum = mmuidx_sum(mmu_idx) || use_background || is_debug;
> +sum = mmuidx_sum(mmu_idx) || is_debug;
>  switch (vm) {
>  case VM_1_10_SV32:
>levels = 2; ptidxbits = 10; ptesize = 4; break;
> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> index db7252e09d..93d4ae8b3e 100644
> --- a/target/riscv/op_helper.c
> +++ b/target/riscv/op_helper.c
> @@ -437,7 +437,11 @@ static int check_access_hlsv(CPURISCVState *env, bool x, 
> uintptr_t ra)
>  riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, ra);
>  }
>
> -return get_field(env->hstatus, HSTATUS_SPVP) | MMU_2STAGE_BIT;
> +int mode = get_field(env->hstatus, HSTATUS_SPVP);
> +if (!x && mode == PRV_S && get_field(env->vsstatus, MSTATUS_SUM)) {
> +mode = MMUIdx_S_SUM;
> +}
> +return mode | MMU_2STAGE_BIT;
>  }
>
>  target_ulong helper_hyp_hlv_bu(CPURISCVState *env, target_ulong addr)
> --
> 2.34.1
>
>



Re: [PATCH 0/5] Cleanup [h_enter|spapr_exit]_nested routines

2023-04-10 Thread Harsh Prateek Bora

Hi Nick, Fabiano,

Any review comments, please?

regards,
Harsh

On 3/31/23 16:36, Daniel Henrique Barboza wrote:



On 3/31/23 07:39, Cédric Le Goater wrote:

On 3/31/23 08:53, Harsh Prateek Bora wrote:

This patchset introduces helper routines to enable (and does) cleaning
up of h_enter_nested() and spapr_exit_nested() routines in existing api
for nested virtualization on Power/SPAPR for better code readability /
maintenance. No functional changes intended with this patchset.


Adding Nick since he did most of this work.



And also Fabiano.


Daniel



C.




Harsh Prateek Bora (5):
   ppc: spapr: cleanup cr get/store with helper routines.
   ppc: spapr: cleanup h_enter_nested() with helper routines.
   ppc: spapr: assert early rather late in h_enter_nested()
   ppc: spapr: cleanup spapr_exit_nested() with helper routines.
   MAINTAINERS: Adding myself in the list for ppc/spapr

  MAINTAINERS  |   1 +
  hw/ppc/spapr_hcall.c | 251 ---
  target/ppc/cpu.c |  17 +++
  target/ppc/cpu.h |   2 +
  4 files changed, 161 insertions(+), 110 deletions(-)







[PATCH] physmem: use PR_SET_VMA_ANON_NAME to set ram block name

2023-04-10 Thread Eiichi Tsukata
Use linux specific PR_SET_VMA_ANON_NAME (introduced in v5.17) to set ram
block name in the kernel. This makes each ram block distinguishable and
can help debugging and inspection. The names of ram blocks are shown in
/proc/pid/maps like this:

  7f00e940-7f00f140 rw-p  00:00 0  [anon:pc.ram]
  7f011520-7f0115201000 rw-p  00:00 0  [anon:/rom@etc/acpi/rsdp]
  7f011540-7f011541 rw-p  00:00 0  
[anon:/rom@etc/table-loader]
  7f011560-7f011580 rw-p  00:00 0  
[anon:/rom@etc/acpi/tables]
  7f0115a0-7f0115a4 rw-p  00:00 0  [anon:e1000.rom]
  ...

Signed-off-by: Eiichi Tsukata 
---
 meson.build   | 2 ++
 softmmu/physmem.c | 9 +
 2 files changed, 11 insertions(+)

diff --git a/meson.build b/meson.build
index 29f8644d6d..aca889f5f0 100644
--- a/meson.build
+++ b/meson.build
@@ -2040,6 +2040,8 @@ config_host_data.set('HAVE_OPTRESET',
  cc.has_header_symbol('getopt.h', 'optreset'))
 config_host_data.set('HAVE_IPPROTO_MPTCP',
  cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
+config_host_data.set('CONFIG_PRCTL_PR_SET_VMA_ANON_NAME',
+ cc.has_header_symbol('sys/prctl.h', 
'PR_SET_VMA_ANON_NAME'))
 
 # has_member
 config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index e35061bba4..4fe3e14193 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -80,6 +80,10 @@
 #include 
 #endif
 
+#ifdef CONFIG_PRCTL_PR_SET_VMA_ANON_NAME
+#include 
+#endif
+
 //#define DEBUG_SUBPAGE
 
 /* ram_list is read under rcu_read_lock()/rcu_read_unlock().  Writes
@@ -1810,6 +1814,11 @@ static void ram_block_add(RAMBlock *new_block, Error 
**errp)
 return;
 }
 memory_try_enable_merging(new_block->host, new_block->max_length);
+#ifdef CONFIG_PRCTL_PR_SET_VMA_ANON_NAME
+prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME,
+  (unsigned long) new_block->host, new_block->max_length,
+  (unsigned long) new_block->mr->name);
+#endif
 }
 }
 
-- 
2.39.2




Re: [PATCH v6 16/25] target/riscv: Set MMU_2STAGE_BIT in riscv_cpu_mmu_index

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:58 PM Richard Henderson
 wrote:
>
> Incorporate the virt_enabled and MPV checks into the cpu_mmu_index
> function, so we don't have to keep doing it within tlb_fill and
> subroutines.  This also elides a flush on changes to MPV.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 50 +--
>  target/riscv/csr.c|  6 +
>  2 files changed, 18 insertions(+), 38 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 0adfd4a12b..6c42f9c6fd 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -37,19 +37,21 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
>  #ifdef CONFIG_USER_ONLY
>  return 0;
>  #else
> -if (ifetch) {
> -return env->priv;
> -}
> +bool virt = riscv_cpu_virt_enabled(env);
> +int mode = env->priv;
>
>  /* All priv -> mmu_idx mapping are here */
> -int mode = env->priv;
> -if (mode == PRV_M && get_field(env->mstatus, MSTATUS_MPRV)) {
> -mode = get_field(env->mstatus, MSTATUS_MPP);
> +if (!ifetch) {
> +if (mode == PRV_M && get_field(env->mstatus, MSTATUS_MPRV)) {
> +mode = get_field(env->mstatus, MSTATUS_MPP);
> +virt = get_field(env->mstatus, MSTATUS_MPV);
> +}
> +if (mode == PRV_S && get_field(env->mstatus, MSTATUS_SUM)) {
> +mode = MMUIdx_S_SUM;
> +}
>  }
> -if (mode == PRV_S && get_field(env->mstatus, MSTATUS_SUM)) {
> -return MMUIdx_S_SUM;
> -}
> -return mode;
> +
> +return mode | (virt ? MMU_2STAGE_BIT : 0);
>  #endif
>  }
>
> @@ -1165,8 +1167,7 @@ void riscv_cpu_do_transaction_failed(CPUState *cs, 
> hwaddr physaddr,
>  }
>
>  env->badaddr = addr;
> -env->two_stage_lookup = riscv_cpu_virt_enabled(env) ||
> -mmuidx_2stage(mmu_idx);
> +env->two_stage_lookup = mmuidx_2stage(mmu_idx);
>  env->two_stage_indirect_lookup = false;
>  cpu_loop_exit_restore(cs, retaddr);
>  }
> @@ -1191,8 +1192,7 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr 
> addr,
>  g_assert_not_reached();
>  }
>  env->badaddr = addr;
> -env->two_stage_lookup = riscv_cpu_virt_enabled(env) ||
> -mmuidx_2stage(mmu_idx);
> +env->two_stage_lookup = mmuidx_2stage(mmu_idx);
>  env->two_stage_indirect_lookup = false;
>  cpu_loop_exit_restore(cs, retaddr);
>  }
> @@ -1230,7 +1230,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> int size,
>  int prot, prot2, prot_pmp;
>  bool pmp_violation = false;
>  bool first_stage_error = true;
> -bool two_stage_lookup = false;
> +bool two_stage_lookup = mmuidx_2stage(mmu_idx);
>  bool two_stage_indirect_error = false;
>  int ret = TRANSLATE_FAIL;
>  int mode = mmu_idx;
> @@ -1242,22 +1242,8 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> int size,
>  qemu_log_mask(CPU_LOG_MMU, "%s ad %" VADDR_PRIx " rw %d mmu_idx %d\n",
>__func__, address, access_type, mmu_idx);
>
> -/* MPRV does not affect the virtual-machine load/store
> -   instructions, HLV, HLVX, and HSV. */
> -if (mmuidx_2stage(mmu_idx)) {
> -;
> -} else if (mode == PRV_M && access_type != MMU_INST_FETCH &&
> -   get_field(env->mstatus, MSTATUS_MPRV)) {
> -mode = get_field(env->mstatus, MSTATUS_MPP);
> -if (riscv_has_ext(env, RVH) && get_field(env->mstatus, MSTATUS_MPV)) 
> {
> -two_stage_lookup = true;
> -}
> -}
> -
>  pmu_tlb_fill_incr_ctr(cpu, access_type);
> -if (riscv_cpu_virt_enabled(env) ||
> -((mmuidx_2stage(mmu_idx) || two_stage_lookup) &&
> - access_type != MMU_INST_FETCH)) {
> +if (two_stage_lookup) {
>  /* Two stage lookup */
>  ret = get_physical_address(env, , , address,
> >guest_phys_fault_addr, access_type,
> @@ -1352,9 +1338,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> int size,
>  return false;
>  } else {
>  raise_mmu_exception(env, address, access_type, pmp_violation,
> -first_stage_error,
> -riscv_cpu_virt_enabled(env) ||
> -mmuidx_2stage(mmu_idx),
> +first_stage_error, two_stage_lookup,
>  two_stage_indirect_error);
>  cpu_loop_exit_restore(cs, retaddr);
>  }
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index b79758a606..1b635373c6 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -1246,7 +1246,7 @@ static RISCVException write_mstatus(CPURISCVState *env, 
> int csrno,
>  RISCVMXL xl = riscv_cpu_mxl(env);
>
>  /* flush tlb on mstatus fields that affect VM */
> -if ((val ^ mstatus) & 

Re: [PATCH v6 15/25] target/riscv: Move hstatus.spvp check to check_access_hlsv

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:57 PM Richard Henderson
 wrote:
>
> The current cpu_mmu_index value is really irrelevant to
> the HLV/HSV lookup.  Provide the correct priv level directly.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c | 8 +---
>  target/riscv/op_helper.c  | 2 +-
>  2 files changed, 2 insertions(+), 8 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 8a124888cd..0adfd4a12b 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -784,12 +784,6 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>  use_background = true;
>  }
>
> -/* MPRV does not affect the virtual-machine load/store
> -   instructions, HLV, HLVX, and HSV. */
> -if (mmuidx_2stage(mmu_idx)) {
> -mode = get_field(env->hstatus, HSTATUS_SPVP);
> -}
> -
>  if (first_stage == false) {
>  /* We are in stage 2 translation, this is similar to stage 1. */
>  /* Stage 2 is always taken as U-mode */
> @@ -1251,7 +1245,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> int size,
>  /* MPRV does not affect the virtual-machine load/store
> instructions, HLV, HLVX, and HSV. */
>  if (mmuidx_2stage(mmu_idx)) {
> -mode = get_field(env->hstatus, HSTATUS_SPVP);
> +;
>  } else if (mode == PRV_M && access_type != MMU_INST_FETCH &&
> get_field(env->mstatus, MSTATUS_MPRV)) {
>  mode = get_field(env->mstatus, MSTATUS_MPP);
> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> index 81362537b6..db7252e09d 100644
> --- a/target/riscv/op_helper.c
> +++ b/target/riscv/op_helper.c
> @@ -437,7 +437,7 @@ static int check_access_hlsv(CPURISCVState *env, bool x, 
> uintptr_t ra)
>  riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, ra);
>  }
>
> -return cpu_mmu_index(env, x) | MMU_2STAGE_BIT;
> +return get_field(env->hstatus, HSTATUS_SPVP) | MMU_2STAGE_BIT;
>  }
>
>  target_ulong helper_hyp_hlv_bu(CPURISCVState *env, target_ulong addr)
> --
> 2.34.1
>
>



Re: [PATCH v6 14/25] target/riscv: Introduce mmuidx_2stage

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:53 PM Richard Henderson
 wrote:
>
> Move and rename riscv_cpu_two_stage_lookup, to match
> the other mmuidx_* functions.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h|  1 -
>  target/riscv/internals.h  |  5 +
>  target/riscv/cpu_helper.c | 17 ++---
>  3 files changed, 11 insertions(+), 12 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index f03ff1f10c..b6bcfb3834 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -586,7 +586,6 @@ void riscv_cpu_set_geilen(CPURISCVState *env, 
> target_ulong geilen);
>  bool riscv_cpu_vector_enabled(CPURISCVState *env);
>  bool riscv_cpu_virt_enabled(CPURISCVState *env);
>  void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool enable);
> -bool riscv_cpu_two_stage_lookup(int mmu_idx);
>  int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch);
>  G_NORETURN void  riscv_cpu_do_unaligned_access(CPUState *cs, vaddr addr,
> MMUAccessType access_type, 
> int mmu_idx,
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 4aa1cb409f..b5f823c7ec 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -51,6 +51,11 @@ static inline bool mmuidx_sum(int mmu_idx)
>  return (mmu_idx & 3) == MMUIdx_S_SUM;
>  }
>
> +static inline bool mmuidx_2stage(int mmu_idx)
> +{
> +return mmu_idx & MMU_2STAGE_BIT;
> +}
> +
>  /* share data between vector helpers and decode code */
>  FIELD(VDATA, VM, 0, 1)
>  FIELD(VDATA, LMUL, 1, 3)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index cb260b88ea..8a124888cd 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -603,11 +603,6 @@ void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool 
> enable)
>  }
>  }
>
> -bool riscv_cpu_two_stage_lookup(int mmu_idx)
> -{
> -return mmu_idx & MMU_2STAGE_BIT;
> -}
> -
>  int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t interrupts)
>  {
>  CPURISCVState *env = >env;
> @@ -791,7 +786,7 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>
>  /* MPRV does not affect the virtual-machine load/store
> instructions, HLV, HLVX, and HSV. */
> -if (riscv_cpu_two_stage_lookup(mmu_idx)) {
> +if (mmuidx_2stage(mmu_idx)) {
>  mode = get_field(env->hstatus, HSTATUS_SPVP);
>  }
>
> @@ -1177,7 +1172,7 @@ void riscv_cpu_do_transaction_failed(CPUState *cs, 
> hwaddr physaddr,
>
>  env->badaddr = addr;
>  env->two_stage_lookup = riscv_cpu_virt_enabled(env) ||
> -riscv_cpu_two_stage_lookup(mmu_idx);
> +mmuidx_2stage(mmu_idx);
>  env->two_stage_indirect_lookup = false;
>  cpu_loop_exit_restore(cs, retaddr);
>  }
> @@ -1203,7 +1198,7 @@ void riscv_cpu_do_unaligned_access(CPUState *cs, vaddr 
> addr,
>  }
>  env->badaddr = addr;
>  env->two_stage_lookup = riscv_cpu_virt_enabled(env) ||
> -riscv_cpu_two_stage_lookup(mmu_idx);
> +mmuidx_2stage(mmu_idx);
>  env->two_stage_indirect_lookup = false;
>  cpu_loop_exit_restore(cs, retaddr);
>  }
> @@ -1255,7 +1250,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> int size,
>
>  /* MPRV does not affect the virtual-machine load/store
> instructions, HLV, HLVX, and HSV. */
> -if (riscv_cpu_two_stage_lookup(mmu_idx)) {
> +if (mmuidx_2stage(mmu_idx)) {
>  mode = get_field(env->hstatus, HSTATUS_SPVP);
>  } else if (mode == PRV_M && access_type != MMU_INST_FETCH &&
> get_field(env->mstatus, MSTATUS_MPRV)) {
> @@ -1267,7 +1262,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> int size,
>
>  pmu_tlb_fill_incr_ctr(cpu, access_type);
>  if (riscv_cpu_virt_enabled(env) ||
> -((riscv_cpu_two_stage_lookup(mmu_idx) || two_stage_lookup) &&
> +((mmuidx_2stage(mmu_idx) || two_stage_lookup) &&
>   access_type != MMU_INST_FETCH)) {
>  /* Two stage lookup */
>  ret = get_physical_address(env, , , address,
> @@ -1365,7 +1360,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, 
> int size,
>  raise_mmu_exception(env, address, access_type, pmp_violation,
>  first_stage_error,
>  riscv_cpu_virt_enabled(env) ||
> -riscv_cpu_two_stage_lookup(mmu_idx),
> +mmuidx_2stage(mmu_idx),
>  two_stage_indirect_error);
>  cpu_loop_exit_restore(cs, retaddr);
>  }
> --
> 2.34.1
>
>



Re: [PATCH v6 13/25] target/riscv: Introduce mmuidx_priv

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:53 PM Richard Henderson
 wrote:
>
> Use the priv level encoded into the mmu_idx, rather than
> starting from env->priv.  We have already checked MPRV+MPP
> in riscv_cpu_mmu_index -- no need to repeat that.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/internals.h  | 9 +
>  target/riscv/cpu_helper.c | 6 +-
>  2 files changed, 10 insertions(+), 5 deletions(-)
>
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 0b61f337dd..4aa1cb409f 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -37,6 +37,15 @@
>  #define MMUIdx_M3
>  #define MMU_2STAGE_BIT  (1 << 2)
>
> +static inline int mmuidx_priv(int mmu_idx)
> +{
> +int ret = mmu_idx & 3;
> +if (ret == MMUIdx_S_SUM) {
> +ret = PRV_S;
> +}
> +return ret;
> +}
> +
>  static inline bool mmuidx_sum(int mmu_idx)
>  {
>  return (mmu_idx & 3) == MMUIdx_S_SUM;
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 7e6cd8e0fd..cb260b88ea 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -771,7 +771,7 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>   * (riscv_cpu_do_interrupt) is correct */
>  MemTxResult res;
>  MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
> -int mode = env->priv;
> +int mode = mmuidx_priv(mmu_idx);
>  bool use_background = false;
>  hwaddr ppn;
>  RISCVCPU *cpu = env_archcpu(env);
> @@ -793,10 +793,6 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
> instructions, HLV, HLVX, and HSV. */
>  if (riscv_cpu_two_stage_lookup(mmu_idx)) {
>  mode = get_field(env->hstatus, HSTATUS_SPVP);
> -} else if (mode == PRV_M && access_type != MMU_INST_FETCH) {
> -if (get_field(env->mstatus, MSTATUS_MPRV)) {
> -mode = get_field(env->mstatus, MSTATUS_MPP);
> -}
>  }
>
>  if (first_stage == false) {
> --
> 2.34.1
>
>



Re: riscv: g_assert for NULL predicate?

2023-04-10 Thread Bin Meng
On Wed, Apr 5, 2023 at 2:07 PM Alistair Francis  wrote:
>
> On Mon, Apr 3, 2023 at 11:43 PM Wu, Fei  wrote:
> >
> > Recent commit 0ee342256af92 switches to g_assert() for the predicate()
> > NULL check from returning RISCV_EXCP_ILLEGAL_INST. Qemu doesn't have
> > predicate() for un-allocated CSRs, then a buggy userspace application
> > reads CSR such as 0x4 causes qemu to exit, I don't think it's expected.
>
> Hm That's not good. Userspace shouldn't be able to crash QEMU. I
> think we want to revert that patch then.
>
> @Bin Meng any thoughts?
>

Agree, I will send a patch for this.

Regards,
Bin



Re: [PATCH v6 12/25] target/riscv: Introduce mmuidx_sum

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:55 PM Richard Henderson
 wrote:
>
> In get_physical_address, we should use the setting passed
> via mmu_idx rather than checking env->mstatus directly.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/internals.h  | 5 +
>  target/riscv/cpu_helper.c | 2 +-
>  2 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 7b63c0f1b6..0b61f337dd 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -37,6 +37,11 @@
>  #define MMUIdx_M3
>  #define MMU_2STAGE_BIT  (1 << 2)
>
> +static inline bool mmuidx_sum(int mmu_idx)
> +{
> +return (mmu_idx & 3) == MMUIdx_S_SUM;
> +}
> +
>  /* share data between vector helpers and decode code */
>  FIELD(VDATA, VM, 0, 1)
>  FIELD(VDATA, LMUL, 1, 3)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 888f7ae0ef..7e6cd8e0fd 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -852,7 +852,7 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>  widened = 2;
>  }
>  /* status.SUM will be ignored if execute on background */
> -sum = get_field(env->mstatus, MSTATUS_SUM) || use_background || is_debug;
> +sum = mmuidx_sum(mmu_idx) || use_background || is_debug;
>  switch (vm) {
>  case VM_1_10_SV32:
>levels = 2; ptidxbits = 10; ptesize = 4; break;
> --
> 2.34.1
>
>



Re: [PATCH v6 11/25] target/riscv: Rename MMU_HYP_ACCESS_BIT to MMU_2STAGE_BIT

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:57 PM Richard Henderson
 wrote:
>
> We will enable more uses of this bit in the future.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/internals.h  | 6 --
>  target/riscv/cpu_helper.c | 2 +-
>  target/riscv/op_helper.c  | 2 +-
>  3 files changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index b55152a7dc..7b63c0f1b6 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -27,13 +27,15 @@
>   *  - S 0b001
>   *  - S+SUM 0b010
>   *  - M 0b011
> - *  - HLV/HLVX/HSV adds 0b100
> + *  - U+2STAGE  0b100
> + *  - S+2STAGE  0b101
> + *  - S+SUM+2STAGE  0b110
>   */
>  #define MMUIdx_U0
>  #define MMUIdx_S1
>  #define MMUIdx_S_SUM2
>  #define MMUIdx_M3
> -#define MMU_HYP_ACCESS_BIT  (1 << 2)
> +#define MMU_2STAGE_BIT  (1 << 2)
>
>  /* share data between vector helpers and decode code */
>  FIELD(VDATA, VM, 0, 1)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 9bb84be4e1..888f7ae0ef 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -605,7 +605,7 @@ void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool 
> enable)
>
>  bool riscv_cpu_two_stage_lookup(int mmu_idx)
>  {
> -return mmu_idx & MMU_HYP_ACCESS_BIT;
> +return mmu_idx & MMU_2STAGE_BIT;
>  }
>
>  int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t interrupts)
> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> index 0f81645adf..81362537b6 100644
> --- a/target/riscv/op_helper.c
> +++ b/target/riscv/op_helper.c
> @@ -437,7 +437,7 @@ static int check_access_hlsv(CPURISCVState *env, bool x, 
> uintptr_t ra)
>  riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, ra);
>  }
>
> -return cpu_mmu_index(env, x) | MMU_HYP_ACCESS_BIT;
> +return cpu_mmu_index(env, x) | MMU_2STAGE_BIT;
>  }
>
>  target_ulong helper_hyp_hlv_bu(CPURISCVState *env, target_ulong addr)
> --
> 2.34.1
>
>



Re: [PATCH v6 10/25] target/riscv: Handle HLV, HSV via helpers

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:53 PM Richard Henderson
 wrote:
>
> Implement these instructions via helpers, in expectation
> of determining the mmu_idx to use at runtime.  This allows
> the permission check to also be moved out of line, which
> allows HLSX to be removed from TB_FLAGS.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h  |   6 +-
>  target/riscv/helper.h   |  12 ++-
>  target/riscv/cpu_helper.c   |  26 ++---
>  target/riscv/op_helper.c|  99 +++--
>  target/riscv/translate.c|   2 -
>  target/riscv/insn_trans/trans_rvh.c.inc | 135 ++--
>  6 files changed, 169 insertions(+), 111 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 5e589db106..f03ff1f10c 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -641,8 +641,7 @@ FIELD(TB_FLAGS, LMUL, 7, 3)
>  FIELD(TB_FLAGS, SEW, 10, 3)
>  FIELD(TB_FLAGS, VL_EQ_VLMAX, 13, 1)
>  FIELD(TB_FLAGS, VILL, 14, 1)
> -/* Is a Hypervisor instruction load/store allowed? */
> -FIELD(TB_FLAGS, HLSX, 15, 1)
> +FIELD(TB_FLAGS, VSTART_EQ_ZERO, 15, 1)
>  /* The combination of MXL/SXL/UXL that applies to the current cpu mode. */
>  FIELD(TB_FLAGS, XL, 16, 2)
>  /* If PointerMasking should be applied */
> @@ -654,8 +653,7 @@ FIELD(TB_FLAGS, VMA, 21, 1)
>  FIELD(TB_FLAGS, ITRIGGER, 22, 1)
>  /* Virtual mode enabled */
>  FIELD(TB_FLAGS, VIRT_ENABLED, 23, 1)
> -FIELD(TB_FLAGS, VSTART_EQ_ZERO, 24, 1)
> -FIELD(TB_FLAGS, PRIV, 25, 2)
> +FIELD(TB_FLAGS, PRIV, 24, 2)
>
>  #ifdef TARGET_RISCV32
>  #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
> diff --git a/target/riscv/helper.h b/target/riscv/helper.h
> index 37b54e0991..be60bd1525 100644
> --- a/target/riscv/helper.h
> +++ b/target/riscv/helper.h
> @@ -123,8 +123,16 @@ DEF_HELPER_1(itrigger_match, void, env)
>  #ifndef CONFIG_USER_ONLY
>  DEF_HELPER_1(hyp_tlb_flush, void, env)
>  DEF_HELPER_1(hyp_gvma_tlb_flush, void, env)
> -DEF_HELPER_2(hyp_hlvx_hu, tl, env, tl)
> -DEF_HELPER_2(hyp_hlvx_wu, tl, env, tl)
> +DEF_HELPER_FLAGS_2(hyp_hlv_bu, TCG_CALL_NO_WG, tl, env, tl)
> +DEF_HELPER_FLAGS_2(hyp_hlv_hu, TCG_CALL_NO_WG, tl, env, tl)
> +DEF_HELPER_FLAGS_2(hyp_hlv_wu, TCG_CALL_NO_WG, tl, env, tl)
> +DEF_HELPER_FLAGS_2(hyp_hlv_d, TCG_CALL_NO_WG, tl, env, tl)
> +DEF_HELPER_FLAGS_2(hyp_hlvx_hu, TCG_CALL_NO_WG, tl, env, tl)
> +DEF_HELPER_FLAGS_2(hyp_hlvx_wu, TCG_CALL_NO_WG, tl, env, tl)
> +DEF_HELPER_FLAGS_3(hyp_hsv_b, TCG_CALL_NO_WG, void, env, tl, tl)
> +DEF_HELPER_FLAGS_3(hyp_hsv_h, TCG_CALL_NO_WG, void, env, tl, tl)
> +DEF_HELPER_FLAGS_3(hyp_hsv_w, TCG_CALL_NO_WG, void, env, tl, tl)
> +DEF_HELPER_FLAGS_3(hyp_hsv_d, TCG_CALL_NO_WG, void, env, tl, tl)
>  #endif
>
>  /* Vector functions */
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 052fdd2d9d..9bb84be4e1 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -102,24 +102,16 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, 
> target_ulong *pc,
>  fs = get_field(env->mstatus, MSTATUS_FS);
>  vs = get_field(env->mstatus, MSTATUS_VS);
>
> -if (riscv_has_ext(env, RVH)) {
> -if (env->priv == PRV_M ||
> -(env->priv == PRV_S && !riscv_cpu_virt_enabled(env)) ||
> -(env->priv == PRV_U && !riscv_cpu_virt_enabled(env) &&
> -get_field(env->hstatus, HSTATUS_HU))) {
> -flags = FIELD_DP32(flags, TB_FLAGS, HLSX, 1);
> -}
> -
> -if (riscv_cpu_virt_enabled(env)) {
> -flags = FIELD_DP32(flags, TB_FLAGS, VIRT_ENABLED, 1);
> -/*
> - * Merge DISABLED and !DIRTY states using MIN.
> - * We will set both fields when dirtying.
> - */
> -fs = MIN(fs, get_field(env->mstatus_hs, MSTATUS_FS));
> -vs = MIN(vs, get_field(env->mstatus_hs, MSTATUS_VS));
> -}
> +if (riscv_cpu_virt_enabled(env)) {
> +flags = FIELD_DP32(flags, TB_FLAGS, VIRT_ENABLED, 1);
> +/*
> + * Merge DISABLED and !DIRTY states using MIN.
> + * We will set both fields when dirtying.
> + */
> +fs = MIN(fs, get_field(env->mstatus_hs, MSTATUS_FS));
> +vs = MIN(vs, get_field(env->mstatus_hs, MSTATUS_VS));
>  }
> +
>  if (cpu->cfg.debug && !icount_enabled()) {
>  flags = FIELD_DP32(flags, TB_FLAGS, ITRIGGER, env->itrigger_enabled);
>  }
> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> index b2169a99ff..0f81645adf 100644
> --- a/target/riscv/op_helper.c
> +++ b/target/riscv/op_helper.c
> @@ -427,6 +427,91 @@ void helper_hyp_gvma_tlb_flush(CPURISCVState *env)
>  helper_hyp_tlb_flush(env);
>  }
>
> +static int check_access_hlsv(CPURISCVState *env, bool x, uintptr_t ra)
> +{
> +if (env->priv == PRV_M) {
> +/* always allowed */
> +} else if (riscv_cpu_virt_enabled(env)) {
> +

Re: [PATCH v6 09/25] target/riscv: Use cpu_ld*_code_mmu for HLVX

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:53 PM Richard Henderson
 wrote:
>
> Use the new functions to properly check execute permission
> for the read rather than read permission.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/op_helper.c | 13 +++--
>  1 file changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> index 962a061228..b2169a99ff 100644
> --- a/target/riscv/op_helper.c
> +++ b/target/riscv/op_helper.c
> @@ -427,18 +427,27 @@ void helper_hyp_gvma_tlb_flush(CPURISCVState *env)
>  helper_hyp_tlb_flush(env);
>  }
>
> +/*
> + * TODO: These implementations are not quite correct.  They perform the
> + * access using execute permission just fine, but the final PMP check
> + * is supposed to have read permission as well.  Without replicating
> + * a fair fraction of cputlb.c, fixing this requires adding new mmu_idx
> + * which would imply that exact check in tlb_fill.
> + */
>  target_ulong helper_hyp_hlvx_hu(CPURISCVState *env, target_ulong address)
>  {
>  int mmu_idx = cpu_mmu_index(env, true) | MMU_HYP_ACCESS_BIT;
> +MemOpIdx oi = make_memop_idx(MO_TEUW, mmu_idx);
>
> -return cpu_lduw_mmuidx_ra(env, address, mmu_idx, GETPC());
> +return cpu_ldw_code_mmu(env, address, oi, GETPC());
>  }
>
>  target_ulong helper_hyp_hlvx_wu(CPURISCVState *env, target_ulong address)
>  {
>  int mmu_idx = cpu_mmu_index(env, true) | MMU_HYP_ACCESS_BIT;
> +MemOpIdx oi = make_memop_idx(MO_TEUL, mmu_idx);
>
> -return cpu_ldl_mmuidx_ra(env, address, mmu_idx, GETPC());
> +return cpu_ldl_code_mmu(env, address, oi, GETPC());
>  }
>
>  #endif /* !CONFIG_USER_ONLY */
> --
> 2.34.1
>
>



Re: [PATCH v6 08/25] accel/tcg: Add cpu_ld*_code_mmu

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:52 PM Richard Henderson
 wrote:
>
> At least RISC-V has the need to be able to perform a read
> using execute permissions, outside of translation.
> Add helpers to facilitate this.
>
> Signed-off-by: Richard Henderson 

Acked-by: Alistair Francis 

Alistair

> ---
>  include/exec/cpu_ldst.h |  9 +++
>  accel/tcg/cputlb.c  | 48 ++
>  accel/tcg/user-exec.c   | 58 +
>  3 files changed, 115 insertions(+)
>
> diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
> index 09b55cc0ee..c141f0394f 100644
> --- a/include/exec/cpu_ldst.h
> +++ b/include/exec/cpu_ldst.h
> @@ -445,6 +445,15 @@ static inline CPUTLBEntry *tlb_entry(CPUArchState *env, 
> uintptr_t mmu_idx,
>  # define cpu_stq_mmu  cpu_stq_le_mmu
>  #endif
>
> +uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t ra);
> +uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
> +  MemOpIdx oi, uintptr_t ra);
> +uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
> +  MemOpIdx oi, uintptr_t ra);
> +uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
> +  MemOpIdx oi, uintptr_t ra);
> +
>  uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr addr);
>  uint32_t cpu_lduw_code(CPUArchState *env, abi_ptr addr);
>  uint32_t cpu_ldl_code(CPUArchState *env, abi_ptr addr);
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index e984a98dc4..e62c8f3c3f 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -2768,3 +2768,51 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr addr)
>  MemOpIdx oi = make_memop_idx(MO_TEUQ, cpu_mmu_index(env, true));
>  return full_ldq_code(env, addr, oi, 0);
>  }
> +
> +uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t retaddr)
> +{
> +return full_ldub_code(env, addr, oi, retaddr);
> +}
> +
> +uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
> +  MemOpIdx oi, uintptr_t retaddr)
> +{
> +MemOp mop = get_memop(oi);
> +int idx = get_mmuidx(oi);
> +uint16_t ret;
> +
> +ret = full_lduw_code(env, addr, make_memop_idx(MO_TEUW, idx), retaddr);
> +if ((mop & MO_BSWAP) != MO_TE) {
> +ret = bswap16(ret);
> +}
> +return ret;
> +}
> +
> +uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
> +  MemOpIdx oi, uintptr_t retaddr)
> +{
> +MemOp mop = get_memop(oi);
> +int idx = get_mmuidx(oi);
> +uint32_t ret;
> +
> +ret = full_ldl_code(env, addr, make_memop_idx(MO_TEUL, idx), retaddr);
> +if ((mop & MO_BSWAP) != MO_TE) {
> +ret = bswap32(ret);
> +}
> +return ret;
> +}
> +
> +uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
> +  MemOpIdx oi, uintptr_t retaddr)
> +{
> +MemOp mop = get_memop(oi);
> +int idx = get_mmuidx(oi);
> +uint64_t ret;
> +
> +ret = full_ldq_code(env, addr, make_memop_idx(MO_TEUQ, idx), retaddr);
> +if ((mop & MO_BSWAP) != MO_TE) {
> +ret = bswap64(ret);
> +}
> +return ret;
> +}
> diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
> index 7b37fd229e..44e0ea55ba 100644
> --- a/accel/tcg/user-exec.c
> +++ b/accel/tcg/user-exec.c
> @@ -1222,6 +1222,64 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
>  return ret;
>  }
>
> +uint8_t cpu_ldb_code_mmu(CPUArchState *env, abi_ptr addr,
> + MemOpIdx oi, uintptr_t ra)
> +{
> +void *haddr;
> +uint8_t ret;
> +
> +haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
> +ret = ldub_p(haddr);
> +clear_helper_retaddr();
> +return ret;
> +}
> +
> +uint16_t cpu_ldw_code_mmu(CPUArchState *env, abi_ptr addr,
> +  MemOpIdx oi, uintptr_t ra)
> +{
> +void *haddr;
> +uint16_t ret;
> +
> +haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
> +ret = lduw_p(haddr);
> +clear_helper_retaddr();
> +if (get_memop(oi) & MO_BSWAP) {
> +ret = bswap16(ret);
> +}
> +return ret;
> +}
> +
> +uint32_t cpu_ldl_code_mmu(CPUArchState *env, abi_ptr addr,
> +  MemOpIdx oi, uintptr_t ra)
> +{
> +void *haddr;
> +uint32_t ret;
> +
> +haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_INST_FETCH);
> +ret = ldl_p(haddr);
> +clear_helper_retaddr();
> +if (get_memop(oi) & MO_BSWAP) {
> +ret = bswap32(ret);
> +}
> +return ret;
> +}
> +
> +uint64_t cpu_ldq_code_mmu(CPUArchState *env, abi_ptr addr,
> +  MemOpIdx oi, uintptr_t ra)
> +{
> +void *haddr;
> +uint64_t ret;
> +
> +validate_memop(oi, MO_BEUQ);
> +haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
> +ret = ldq_p(haddr);
> +clear_helper_retaddr();
> +if (get_memop(oi) & 

Re: [PATCH v2 18/19] bsd-user: Update system call list

2023-04-10 Thread Warner Losh
On Mon, Apr 10, 2023 at 7:37 PM Richard Henderson <
richard.hender...@linaro.org> wrote:

> On 4/10/23 11:20, Warner Losh wrote:
> > Update the system call list. We have one hokey thing in here for swapoff
> > that depends on the version number (so this is not completely generated
> > at the moment). For this, we need to include sys/param.h. The method of
> > generation has changed, so this diff looks way bigger than it needs to
> > be to add the few lines of code for the new system calls.
> >
> > Signed-off-by: Warner Losh
> > ---
> >   bsd-user/freebsd/os-syscall.h |2 +
> >   bsd-user/freebsd/syscall_nr.h | 1035 +
> >   2 files changed, 529 insertions(+), 508 deletions(-)
>
> What is the method of generation?
>
> If it's complicated, it should be in scripts/.
> If it's trivial, e.g.
>
>  sed 's/xxx/yyy/' < in.h > out.h
>
> it is worth including the command in the commit message.
>

I'll add it to the commit message... but I'm also contemplating generating
it
on the fly if it's not too hard... Thanks for the suggestion... It's
usually a sed,
but someone (likely me) edited it directly for an unwise hack that Ineed to
unwind first...


> Anyway,
> Acked-by: Richard Henderson 
>

Warner


RE: [PATCH] Hexagon (target/hexagon) Add overrides for cache/sync/barrier instructions

2023-04-10 Thread Taylor Simpson


> -Original Message-
> From: Richard Henderson 
> Sent: Monday, April 10, 2023 8:30 PM
> To: Taylor Simpson ; qemu-devel@nongnu.org
> Cc: phi...@linaro.org; a...@rev.ng; a...@rev.ng; Brian Cain
> ; Matheus Bernardino (QUIC)
> 
> Subject: Re: [PATCH] Hexagon (target/hexagon) Add overrides for
> cache/sync/barrier instructions
> 
> On 4/10/23 13:24, Taylor Simpson wrote:
> > Most of these are not modelled in QEMU, so save the overhead of
> > calling a helper.
> >
> > The only exception is dczeroa.  It assigns to hex_dczero_addr, which
> > is handled during packet commit.
> >
> > Signed-off-by: Taylor Simpson
> > ---
> >   target/hexagon/gen_tcg.h | 24 
> >   target/hexagon/macros.h  | 18 --
> >   2 files changed, 28 insertions(+), 14 deletions(-)
> 
> Reviewed-by: Richard Henderson 
> 
> Something to look at in the future: I believe quite a lot of these variables 
> like
> dczero_addr are not "real" architectural state, in that they do not persist
> beyond the lifetime of the packet.  There are others, e.g. pkt_has_store_s1.

That's correct.

> These variables could be moved to DisasContext and allocated on demand.
> Even recently this was tedious, because of TCG temporary lifetime issues,
> but no longer.

I'll work on this.  The obvious advantage is to allow the TCG optimizer more 
opportunity to fold copies and propagate constants.

Any other advantage?

Thanks,
Taylor



Re: Reducing vdpa migration downtime because of memory pin / maps

2023-04-10 Thread Jason Wang
On Mon, Apr 10, 2023 at 5:05 PM Eugenio Perez Martin
 wrote:
>
> On Mon, Apr 10, 2023 at 5:22 AM Jason Wang  wrote:
> >
> > On Mon, Apr 10, 2023 at 11:17 AM Longpeng (Mike, Cloud Infrastructure
> > Service Product Dept.)  wrote:
> > >
> > >
> > >
> > > 在 2023/4/10 10:14, Jason Wang 写道:
> > > > On Wed, Apr 5, 2023 at 7:38 PM Eugenio Perez Martin 
> > > >  wrote:
> > > >>
> > > >> Hi!
> > > >>
> > > >> As mentioned in the last upstream virtio-networking meeting, one of
> > > >> the factors that adds more downtime to migration is the handling of
> > > >> the guest memory (pin, map, etc). At this moment this handling is
> > > >> bound to the virtio life cycle (DRIVER_OK, RESET). In that sense, the
> > > >> destination device waits until all the guest memory / state is
> > > >> migrated to start pinning all the memory.
> > > >>
> > > >> The proposal is to bind it to the char device life cycle (open vs
> > > >> close), so all the guest memory can be pinned for all the guest / qemu
> > > >> lifecycle.
> > > >>
> > > >> This has two main problems:
> > > >> * At this moment the reset semantics forces the vdpa device to unmap
> > > >> all the memory. So this change needs a vhost vdpa feature flag.
> > > >
> > > > Is this true? I didn't find any codes to unmap the memory in
> > > > vhost_vdpa_set_status().
> > > >
> > >
> > > It could depend on the vendor driver, for example, the vdpasim would do
> > > something like that.
> > >
> > > vhost_vdpa_set_status->vdpa_reset->vdpasim_reset->vdpasim_do_reset->vhost_iotlb_reset
> >
> > This looks like a bug. Or I wonder if any user space depends on this
> > behaviour, if yes, we really need a new flag then.
> >
>
> My understanding was that we depend on this for cases like qemu
> crashes. We don't do an unmap(-1ULL) or anything like that to make
> sure the device is clean when we bind a second qemu to the same
> device. That's why I think that close() should clean them.

In vhost_vdpa_release() we do:

vhost_vdpa_release()
vhost_vdpa_cleanup()
for_each_as()
vhost_vdpa_remove_as()
vhost_vdpa_iotlb_unmap(0ULL, 0ULL - 1)
vhost_vdpa_free_domain()

Anything wrong here?

Conceptually, the address mapping is not a part of the abstraction for
a virtio device now. So resetting the memory mapping during virtio
device reset seems wrong.

Thanks

> Or maybe
> even open().
>
> The only other option I see is to remove the whole vhost-vdpa device
> every time, or am I missing something?
>
> Thanks!
>




Re: [PATCH v6 07/25] target/riscv: Reduce overhead of MSTATUS_SUM change

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:57 PM Richard Henderson
 wrote:
>
> From: Fei Wu 
>
> Kernel needs to access user mode memory e.g. during syscalls, the window
> is usually opened up for a very limited time through MSTATUS.SUM, the
> overhead is too much if tlb_flush() gets called for every SUM change.
>
> This patch creates a separate MMU index for S+SUM, so that it's not
> necessary to flush tlb anymore when SUM changes. This is similar to how
> ARM handles Privileged Access Never (PAN).
>
> Result of 'pipe 10' from unixbench boosts from 223656 to 1705006. Many
> other syscalls benefit a lot from this too.
>
> Reviewed-by: Richard Henderson 
> Signed-off-by: Fei Wu 
> Message-Id: <20230324054154.414846-3-fei2...@intel.com>

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h  |  2 --
>  target/riscv/internals.h| 14 ++
>  target/riscv/cpu_helper.c   | 17 +++--
>  target/riscv/csr.c  |  3 +--
>  target/riscv/op_helper.c|  5 +++--
>  target/riscv/insn_trans/trans_rvh.c.inc |  4 ++--
>  6 files changed, 35 insertions(+), 10 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 3e59dbb3fd..5e589db106 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -631,8 +631,6 @@ G_NORETURN void riscv_raise_exception(CPURISCVState *env,
>  target_ulong riscv_cpu_get_fflags(CPURISCVState *env);
>  void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
>
> -#define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)
> -
>  #include "exec/cpu-all.h"
>
>  FIELD(TB_FLAGS, MEM_IDX, 0, 3)
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 5620fbffb6..b55152a7dc 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -21,6 +21,20 @@
>
>  #include "hw/registerfields.h"
>
> +/*
> + * The current MMU Modes are:
> + *  - U 0b000
> + *  - S 0b001
> + *  - S+SUM 0b010
> + *  - M 0b011
> + *  - HLV/HLVX/HSV adds 0b100
> + */
> +#define MMUIdx_U0
> +#define MMUIdx_S1
> +#define MMUIdx_S_SUM2
> +#define MMUIdx_M3
> +#define MMU_HYP_ACCESS_BIT  (1 << 2)
> +
>  /* share data between vector helpers and decode code */
>  FIELD(VDATA, VM, 0, 1)
>  FIELD(VDATA, LMUL, 1, 3)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 5753126c7a..052fdd2d9d 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -21,6 +21,7 @@
>  #include "qemu/log.h"
>  #include "qemu/main-loop.h"
>  #include "cpu.h"
> +#include "internals.h"
>  #include "pmu.h"
>  #include "exec/exec-all.h"
>  #include "instmap.h"
> @@ -36,7 +37,19 @@ int riscv_cpu_mmu_index(CPURISCVState *env, bool ifetch)
>  #ifdef CONFIG_USER_ONLY
>  return 0;
>  #else
> -return env->priv;
> +if (ifetch) {
> +return env->priv;
> +}
> +
> +/* All priv -> mmu_idx mapping are here */
> +int mode = env->priv;
> +if (mode == PRV_M && get_field(env->mstatus, MSTATUS_MPRV)) {
> +mode = get_field(env->mstatus, MSTATUS_MPP);
> +}
> +if (mode == PRV_S && get_field(env->mstatus, MSTATUS_SUM)) {
> +return MMUIdx_S_SUM;
> +}
> +return mode;
>  #endif
>  }
>
> @@ -600,7 +613,7 @@ void riscv_cpu_set_virt_enabled(CPURISCVState *env, bool 
> enable)
>
>  bool riscv_cpu_two_stage_lookup(int mmu_idx)
>  {
> -return mmu_idx & TB_FLAGS_PRIV_HYP_ACCESS_MASK;
> +return mmu_idx & MMU_HYP_ACCESS_BIT;
>  }
>
>  int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint64_t interrupts)
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index abea7b749e..b79758a606 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -1246,8 +1246,7 @@ static RISCVException write_mstatus(CPURISCVState *env, 
> int csrno,
>  RISCVMXL xl = riscv_cpu_mxl(env);
>
>  /* flush tlb on mstatus fields that affect VM */
> -if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
> -MSTATUS_MPRV | MSTATUS_SUM)) {
> +if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPV)) {
>  tlb_flush(env_cpu(env));
>  }
>  mask = MSTATUS_SIE | MSTATUS_SPIE | MSTATUS_MIE | MSTATUS_MPIE |
> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> index 84ee018f7d..962a061228 100644
> --- a/target/riscv/op_helper.c
> +++ b/target/riscv/op_helper.c
> @@ -20,6 +20,7 @@
>
>  #include "qemu/osdep.h"
>  #include "cpu.h"
> +#include "internals.h"
>  #include "qemu/main-loop.h"
>  #include "exec/exec-all.h"
>  #include "exec/helper-proto.h"
> @@ -428,14 +429,14 @@ void helper_hyp_gvma_tlb_flush(CPURISCVState *env)
>
>  target_ulong helper_hyp_hlvx_hu(CPURISCVState *env, target_ulong address)
>  {
> -int mmu_idx = cpu_mmu_index(env, true) | TB_FLAGS_PRIV_HYP_ACCESS_MASK;
> +int mmu_idx = cpu_mmu_index(env, true) | MMU_HYP_ACCESS_BIT;
>
>  return cpu_lduw_mmuidx_ra(env, address, 

Re: [PATCH v6 06/25] target/riscv: Separate priv from mmu_idx

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 10:01 PM Richard Henderson
 wrote:
>
> From: Fei Wu 
>
> Currently it's assumed the 2 low bits of mmu_idx map to privilege mode,
> this assumption won't last as we are about to add more mmu_idx. Here an
> individual priv field is added into TB_FLAGS.
>
> Reviewed-by: Richard Henderson 
> Signed-off-by: Fei Wu 
> Message-Id: <20230324054154.414846-2-fei2...@intel.com>

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h | 2 +-
>  target/riscv/cpu_helper.c  | 4 +++-
>  target/riscv/translate.c   | 2 ++
>  target/riscv/insn_trans/trans_privileged.c.inc | 2 +-
>  target/riscv/insn_trans/trans_xthead.c.inc | 7 +--
>  5 files changed, 8 insertions(+), 9 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 86a82e25dc..3e59dbb3fd 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -631,7 +631,6 @@ G_NORETURN void riscv_raise_exception(CPURISCVState *env,
>  target_ulong riscv_cpu_get_fflags(CPURISCVState *env);
>  void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
>
> -#define TB_FLAGS_PRIV_MMU_MASK3
>  #define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)
>
>  #include "exec/cpu-all.h"
> @@ -658,6 +657,7 @@ FIELD(TB_FLAGS, ITRIGGER, 22, 1)
>  /* Virtual mode enabled */
>  FIELD(TB_FLAGS, VIRT_ENABLED, 23, 1)
>  FIELD(TB_FLAGS, VSTART_EQ_ZERO, 24, 1)
> +FIELD(TB_FLAGS, PRIV, 25, 2)
>
>  #ifdef TARGET_RISCV32
>  #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 4f0999d50b..5753126c7a 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -83,6 +83,8 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong 
> *pc,
>  fs = EXT_STATUS_DIRTY;
>  vs = EXT_STATUS_DIRTY;
>  #else
> +flags = FIELD_DP32(flags, TB_FLAGS, PRIV, env->priv);
> +
>  flags |= cpu_mmu_index(env, 0);
>  fs = get_field(env->mstatus, MSTATUS_FS);
>  vs = get_field(env->mstatus, MSTATUS_VS);
> @@ -764,7 +766,7 @@ static int get_physical_address(CPURISCVState *env, 
> hwaddr *physical,
>   * (riscv_cpu_do_interrupt) is correct */
>  MemTxResult res;
>  MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
> -int mode = mmu_idx & TB_FLAGS_PRIV_MMU_MASK;
> +int mode = env->priv;
>  bool use_background = false;
>  hwaddr ppn;
>  RISCVCPU *cpu = env_archcpu(env);
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index f8c077525c..abfc152553 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -67,6 +67,7 @@ typedef struct DisasContext {
>  RISCVExtStatus mstatus_fs;
>  RISCVExtStatus mstatus_vs;
>  uint32_t mem_idx;
> +uint32_t priv;
>  /* Remember the rounding mode encoded in the previous fp instruction,
> which we have already installed into env->fp_status.  Or -1 for
> no previous fp instruction.  Note that we exit the TB when writing
> @@ -1140,6 +1141,7 @@ static void 
> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>  uint32_t tb_flags = ctx->base.tb->flags;
>
>  ctx->pc_succ_insn = ctx->base.pc_first;
> +ctx->priv = FIELD_EX32(tb_flags, TB_FLAGS, PRIV);
>  ctx->mem_idx = FIELD_EX32(tb_flags, TB_FLAGS, MEM_IDX);
>  ctx->mstatus_fs = FIELD_EX32(tb_flags, TB_FLAGS, FS);
>  ctx->mstatus_vs = FIELD_EX32(tb_flags, TB_FLAGS, VS);
> diff --git a/target/riscv/insn_trans/trans_privileged.c.inc 
> b/target/riscv/insn_trans/trans_privileged.c.inc
> index 59501b2780..9305b18299 100644
> --- a/target/riscv/insn_trans/trans_privileged.c.inc
> +++ b/target/riscv/insn_trans/trans_privileged.c.inc
> @@ -52,7 +52,7 @@ static bool trans_ebreak(DisasContext *ctx, arg_ebreak *a)
>   * that no exception will be raised when fetching them.
>   */
>
> -if (semihosting_enabled(ctx->mem_idx < PRV_S) &&
> +if (semihosting_enabled(ctx->priv < PRV_S) &&
>  (pre_addr & TARGET_PAGE_MASK) == (post_addr & TARGET_PAGE_MASK)) {
>  pre= opcode_at(>base, pre_addr);
>  ebreak = opcode_at(>base, ebreak_addr);
> diff --git a/target/riscv/insn_trans/trans_xthead.c.inc 
> b/target/riscv/insn_trans/trans_xthead.c.inc
> index df504c3f2c..adfb53cb4c 100644
> --- a/target/riscv/insn_trans/trans_xthead.c.inc
> +++ b/target/riscv/insn_trans/trans_xthead.c.inc
> @@ -265,12 +265,7 @@ static bool trans_th_tst(DisasContext *ctx, arg_th_tst 
> *a)
>
>  static inline int priv_level(DisasContext *ctx)
>  {
> -#ifdef CONFIG_USER_ONLY
> -return PRV_U;
> -#else
> - /* Priv level is part of mem_idx. */
> -return ctx->mem_idx & TB_FLAGS_PRIV_MMU_MASK;
> -#endif
> +return ctx->priv;
>  }
>
>  /* Test if priv level is M, S, or U (cannot fail). */
> --
> 2.34.1
>
>



Re: [PATCH v6 05/25] target/riscv: Add a tb flags field for vstart

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:52 PM Richard Henderson
 wrote:
>
> From: LIU Zhiwei 
>
> Once we mistook the vstart directly from the env->vstart. As env->vstart is 
> not
> a constant, we should record it in the tb flags if we want to use
> it in translation.
>
> Reported-by: Richard Henderson 
> Reviewed-by: Richard Henderson 
> Signed-off-by: LIU Zhiwei 
> Reviewed-by: Weiwei Li 
> Message-Id: <20230324143031.1093-5-zhiwei_...@linux.alibaba.com>

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h  |  1 +
>  target/riscv/cpu_helper.c   |  1 +
>  target/riscv/translate.c|  4 ++--
>  target/riscv/insn_trans/trans_rvv.c.inc | 14 +++---
>  4 files changed, 11 insertions(+), 9 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index d9e0eaaf9b..86a82e25dc 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -657,6 +657,7 @@ FIELD(TB_FLAGS, VMA, 21, 1)
>  FIELD(TB_FLAGS, ITRIGGER, 22, 1)
>  /* Virtual mode enabled */
>  FIELD(TB_FLAGS, VIRT_ENABLED, 23, 1)
> +FIELD(TB_FLAGS, VSTART_EQ_ZERO, 24, 1)
>
>  #ifdef TARGET_RISCV32
>  #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 4fdd6fe021..4f0999d50b 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -74,6 +74,7 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong 
> *pc,
>  FIELD_EX64(env->vtype, VTYPE, VTA));
>  flags = FIELD_DP32(flags, TB_FLAGS, VMA,
>  FIELD_EX64(env->vtype, VTYPE, VMA));
> +flags = FIELD_DP32(flags, TB_FLAGS, VSTART_EQ_ZERO, env->vstart == 
> 0);
>  } else {
>  flags = FIELD_DP32(flags, TB_FLAGS, VILL, 1);
>  }
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 74d0b9889d..f8c077525c 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -97,7 +97,7 @@ typedef struct DisasContext {
>  uint8_t vta;
>  uint8_t vma;
>  bool cfg_vta_all_1s;
> -target_ulong vstart;
> +bool vstart_eq_zero;
>  bool vl_eq_vlmax;
>  CPUState *cs;
>  TCGv zero;
> @@ -1155,7 +1155,7 @@ static void 
> riscv_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cs)
>  ctx->vta = FIELD_EX32(tb_flags, TB_FLAGS, VTA) && cpu->cfg.rvv_ta_all_1s;
>  ctx->vma = FIELD_EX32(tb_flags, TB_FLAGS, VMA) && cpu->cfg.rvv_ma_all_1s;
>  ctx->cfg_vta_all_1s = cpu->cfg.rvv_ta_all_1s;
> -ctx->vstart = env->vstart;
> +ctx->vstart_eq_zero = FIELD_EX32(tb_flags, TB_FLAGS, VSTART_EQ_ZERO);
>  ctx->vl_eq_vlmax = FIELD_EX32(tb_flags, TB_FLAGS, VL_EQ_VLMAX);
>  ctx->misa_mxl_max = env->misa_mxl_max;
>  ctx->xl = FIELD_EX32(tb_flags, TB_FLAGS, XL);
> diff --git a/target/riscv/insn_trans/trans_rvv.c.inc 
> b/target/riscv/insn_trans/trans_rvv.c.inc
> index 6297c3b50d..32b3b9a8e5 100644
> --- a/target/riscv/insn_trans/trans_rvv.c.inc
> +++ b/target/riscv/insn_trans/trans_rvv.c.inc
> @@ -547,7 +547,7 @@ static bool vext_check_sds(DisasContext *s, int vd, int 
> vs1, int vs2, int vm)
>   */
>  static bool vext_check_reduction(DisasContext *s, int vs2)
>  {
> -return require_align(vs2, s->lmul) && (s->vstart == 0);
> +return require_align(vs2, s->lmul) && s->vstart_eq_zero;
>  }
>
>  /*
> @@ -3083,7 +3083,7 @@ static bool trans_vcpop_m(DisasContext *s, arg_rmr *a)
>  {
>  if (require_rvv(s) &&
>  vext_check_isa_ill(s) &&
> -s->vstart == 0) {
> +s->vstart_eq_zero) {
>  TCGv_ptr src2, mask;
>  TCGv dst;
>  TCGv_i32 desc;
> @@ -3112,7 +3112,7 @@ static bool trans_vfirst_m(DisasContext *s, arg_rmr *a)
>  {
>  if (require_rvv(s) &&
>  vext_check_isa_ill(s) &&
> -s->vstart == 0) {
> +s->vstart_eq_zero) {
>  TCGv_ptr src2, mask;
>  TCGv dst;
>  TCGv_i32 desc;
> @@ -3146,7 +3146,7 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a)   
>\
>  vext_check_isa_ill(s) &&   \
>  require_vm(a->vm, a->rd) &&\
>  (a->rd != a->rs2) &&   \
> -(s->vstart == 0)) {\
> +s->vstart_eq_zero) {   \
>  uint32_t data = 0; \
>  gen_helper_gvec_3_ptr *fn = gen_helper_##NAME; \
>  TCGLabel *over = gen_new_label();  \
> @@ -3187,7 +3187,7 @@ static bool trans_viota_m(DisasContext *s, arg_viota_m 
> *a)
>  !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) &&
>  require_vm(a->vm, a->rd) &&
>  require_align(a->rd, s->lmul) &&
> -(s->vstart == 0)) {
> +s->vstart_eq_zero) {
>  uint32_t data = 0;
>  TCGLabel *over = gen_new_label();
>  

Re: [PATCH v6 02/25] target/riscv: Add a general status enum for extensions

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 9:58 PM Richard Henderson
 wrote:
>
> From: LIU Zhiwei 
>
> The pointer masking is the only extension that directly use status.
> The vector or float extension uses the status in an indirect way.
>
> Replace the pointer masking extension special status fields with
> the general status.
>
> Reviewed-by: Richard Henderson 
> Signed-off-by: LIU Zhiwei 
> Message-Id: <20230324143031.1093-3-zhiwei_...@linux.alibaba.com>
> [rth: Add a typedef for the enum]
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h  |  8 
>  target/riscv/cpu_bits.h | 12 
>  target/riscv/cpu.c  |  2 +-
>  target/riscv/csr.c  | 14 +++---
>  4 files changed, 20 insertions(+), 16 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 12fe8d8546..30d9828d59 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -99,6 +99,14 @@ enum {
>  TRANSLATE_G_STAGE_FAIL
>  };
>
> +/* Extension context status */
> +typedef enum {
> +EXT_STATUS_DISABLED = 0,
> +EXT_STATUS_INITIAL,
> +EXT_STATUS_CLEAN,
> +EXT_STATUS_DIRTY,
> +} RISCVExtStatus;
> +
>  #define MMU_USER_IDX 3
>
>  #define MAX_RISCV_PMPS (16)
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index fca7ef0cef..b84f62f8d6 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -9,6 +9,9 @@
>   (((uint64_t)(val) * ((mask) & ~((mask) << 1))) & \
>   (uint64_t)(mask)))
>
> +/* Extension context status mask */
> +#define EXT_STATUS_MASK 0x3ULL
> +
>  /* Floating point round mode */
>  #define FSR_RD_SHIFT5
>  #define FSR_RD  (0x7 << FSR_RD_SHIFT)
> @@ -734,13 +737,6 @@ typedef enum RISCVException {
>  #define PM_ENABLE   0x0001ULL
>  #define PM_CURRENT  0x0002ULL
>  #define PM_INSN 0x0004ULL
> -#define PM_XS_MASK  0x0003ULL
> -
> -/* PointerMasking XS bits values */
> -#define PM_EXT_DISABLE  0xULL
> -#define PM_EXT_INITIAL  0x0001ULL
> -#define PM_EXT_CLEAN0x0002ULL
> -#define PM_EXT_DIRTY0x0003ULL
>
>  /* Execution enviornment configuration bits */
>  #define MENVCFG_FIOM   BIT(0)
> @@ -780,7 +776,7 @@ typedef enum RISCVException {
>  #define S_OFFSET 5ULL
>  #define M_OFFSET 8ULL
>
> -#define PM_XS_BITS   (PM_XS_MASK << XS_OFFSET)
> +#define PM_XS_BITS   (EXT_STATUS_MASK << XS_OFFSET)
>  #define U_PM_ENABLE  (PM_ENABLE  << U_OFFSET)
>  #define U_PM_CURRENT (PM_CURRENT << U_OFFSET)
>  #define U_PM_INSN(PM_INSN<< U_OFFSET)
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 1e97473af2..1135106b3e 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -764,7 +764,7 @@ static void riscv_cpu_reset_hold(Object *obj)
>  i++;
>  }
>  /* mmte is supposed to have pm.current hardwired to 1 */
> -env->mmte |= (PM_EXT_INITIAL | MMTE_M_PM_CURRENT);
> +env->mmte |= (EXT_STATUS_INITIAL | MMTE_M_PM_CURRENT);
>  #endif
>  env->xl = riscv_cpu_mxl(env);
>  riscv_cpu_update_mask(env);
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index d522efc0b6..abea7b749e 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -3513,7 +3513,7 @@ static RISCVException write_mmte(CPURISCVState *env, 
> int csrno,
>
>  /* hardwiring pm.instruction bit to 0, since it's not supported yet */
>  wpri_val &= ~(MMTE_M_PM_INSN | MMTE_S_PM_INSN | MMTE_U_PM_INSN);
> -env->mmte = wpri_val | PM_EXT_DIRTY;
> +env->mmte = wpri_val | EXT_STATUS_DIRTY;
>  riscv_cpu_update_mask(env);
>
>  /* Set XS and SD bits, since PM CSRs are dirty */
> @@ -3593,7 +3593,7 @@ static RISCVException write_mpmmask(CPURISCVState *env, 
> int csrno,
>  if ((env->priv == PRV_M) && (env->mmte & M_PM_ENABLE)) {
>  env->cur_pmmask = val;
>  }
> -env->mmte |= PM_EXT_DIRTY;
> +env->mmte |= EXT_STATUS_DIRTY;
>
>  /* Set XS and SD bits, since PM CSRs are dirty */
>  mstatus = env->mstatus | MSTATUS_XS;
> @@ -3621,7 +3621,7 @@ static RISCVException write_spmmask(CPURISCVState *env, 
> int csrno,
>  if ((env->priv == PRV_S) && (env->mmte & S_PM_ENABLE)) {
>  env->cur_pmmask = val;
>  }
> -env->mmte |= PM_EXT_DIRTY;
> +env->mmte |= EXT_STATUS_DIRTY;
>
>  /* Set XS and SD bits, since PM CSRs are dirty */
>  mstatus = env->mstatus | MSTATUS_XS;
> @@ -3649,7 +3649,7 @@ static RISCVException write_upmmask(CPURISCVState *env, 
> int csrno,
>  if ((env->priv == PRV_U) && (env->mmte & U_PM_ENABLE)) {
>  env->cur_pmmask = val;
>  }
> -env->mmte |= PM_EXT_DIRTY;
> +env->mmte |= EXT_STATUS_DIRTY;
>
>  /* Set XS and SD bits, since PM CSRs are dirty */
>  mstatus = env->mstatus | MSTATUS_XS;
> @@ -3673,7 +3673,7 @@ static RISCVException write_mpmbase(CPURISCVState *env, 
> int csrno,
>  if ((env->priv == PRV_M) && (env->mmte & 

Re: [PATCH v2 2/4] target/riscv: add query-cpy-definitions support

2023-04-10 Thread Richard Henderson

On 4/10/23 09:52, Daniel Henrique Barboza wrote:

This command is used by tooling like libvirt to retrieve a list of
supported CPUs. Each entry returns a CpuDefinitionInfo object that
contains more information about each CPU.

This initial support includes only the name of the CPU and its typename.
Here's what the command produces for the riscv64 target:

$ ./build/qemu-system-riscv64 -S -M virt -display none -qmp stdio
{"QMP": {"version": (...)}
{"execute": "qmp_capabilities", "arguments": {"enable": ["oob"]}}
{"return": {}}
{"execute": "query-cpu-definitions"}
{"return": [
{"name": "rv64", "typename": "rv64-riscv-cpu", "static": false, "deprecated": 
false},
{"name": "sifive-e51", "typename": "sifive-e51-riscv-cpu", "static": false, 
"deprecated": false},
{"name": "any", "typename": "any-riscv-cpu", "static": false, "deprecated": 
false},
{"name": "x-rv128", "typename": "x-rv128-riscv-cpu", "static": false, 
"deprecated": false},
{"name": "shakti-c", "typename": "shakti-c-riscv-cpu", "static": false, 
"deprecated": false},
{"name": "thead-c906", "typename": "thead-c906-riscv-cpu", "static": false, 
"deprecated": false},
{"name": "sifive-u54", "typename": "sifive-u54-riscv-cpu", "static": false, 
"deprecated": false}]
}

Next patches will implement the 'static' attribute of CpuDefinitionInfo.

Signed-off-by: Daniel Henrique Barboza
---
  qapi/machine-target.json  |  6 ++--
  target/riscv/meson.build  |  3 +-
  target/riscv/riscv-qmp-cmds.c | 53 +++
  3 files changed, 59 insertions(+), 3 deletions(-)
  create mode 100644 target/riscv/riscv-qmp-cmds.c


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v2 3/4] target/riscv: add 'static' attribute of query-cpu-definitions

2023-04-10 Thread Richard Henderson

On 4/10/23 09:52, Daniel Henrique Barboza wrote:

'static' is defined in the QMP doc as:

"whether a CPU definition is static and will not change depending on
QEMU version, machine type, machine options and accelerator options. A
static model is always migration-safe."

For RISC-V we'll consider all named CPUs as static since their
extensions can't be changed by user input. Generic CPUs will be
considered non-static.

We aren't ready to make the change for generic CPUs yet because we're
using the same class init for every CPU. We'll deal with it next.

Signed-off-by: Daniel Henrique Barboza
---
  target/riscv/cpu-qom.h| 3 +++
  target/riscv/cpu.c| 6 ++
  target/riscv/riscv-qmp-cmds.c | 2 ++
  3 files changed, 11 insertions(+)


Is 'static = true' really what you want as default?
Perhaps 'dynamic = false' (considering zero initialization) would be better?
Do you want an attribute that can be changed at all?

You could plausibly implement this via class inheritance instead.
E.g.

static const TypeInfo dynamic_cpu_type_info = {
.name = TYPE_RISCV_DYN_CPU,
.parent = TYPE_RISCV_CPU,
.abstract = true,
...
};

and then the dynamic cpus inherit from that.  Your dynamic attribute becomes 
object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_DYN_CPU) != NULL.



r~



Re: [PATCH v6 04/25] target/riscv: Remove mstatus_hs_{fs, vs} from tb_flags

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 10:36 PM Richard Henderson
 wrote:
>
> Merge with mstatus_{fs,vs}.  We might perform a redundant
> assignment to one or the other field, but it's a trivial
> and saves 4 bits from TB_FLAGS.
>
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h| 16 +++-
>  target/riscv/cpu_helper.c | 34 --
>  target/riscv/translate.c  | 32 ++--
>  3 files changed, 33 insertions(+), 49 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index f787145a21..d9e0eaaf9b 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -646,19 +646,17 @@ FIELD(TB_FLAGS, VL_EQ_VLMAX, 13, 1)
>  FIELD(TB_FLAGS, VILL, 14, 1)
>  /* Is a Hypervisor instruction load/store allowed? */
>  FIELD(TB_FLAGS, HLSX, 15, 1)
> -FIELD(TB_FLAGS, MSTATUS_HS_FS, 16, 2)
> -FIELD(TB_FLAGS, MSTATUS_HS_VS, 18, 2)
>  /* The combination of MXL/SXL/UXL that applies to the current cpu mode. */
> -FIELD(TB_FLAGS, XL, 20, 2)
> +FIELD(TB_FLAGS, XL, 16, 2)
>  /* If PointerMasking should be applied */
> -FIELD(TB_FLAGS, PM_MASK_ENABLED, 22, 1)
> -FIELD(TB_FLAGS, PM_BASE_ENABLED, 23, 1)
> -FIELD(TB_FLAGS, VTA, 24, 1)
> -FIELD(TB_FLAGS, VMA, 25, 1)
> +FIELD(TB_FLAGS, PM_MASK_ENABLED, 18, 1)
> +FIELD(TB_FLAGS, PM_BASE_ENABLED, 19, 1)
> +FIELD(TB_FLAGS, VTA, 20, 1)
> +FIELD(TB_FLAGS, VMA, 21, 1)
>  /* Native debug itrigger */
> -FIELD(TB_FLAGS, ITRIGGER, 26, 1)
> +FIELD(TB_FLAGS, ITRIGGER, 22, 1)
>  /* Virtual mode enabled */
> -FIELD(TB_FLAGS, VIRT_ENABLED, 27, 1)
> +FIELD(TB_FLAGS, VIRT_ENABLED, 23, 1)
>
>  #ifdef TARGET_RISCV32
>  #define riscv_cpu_mxl(env)  ((void)(env), MXL_RV32)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 1e7ee9aa30..4fdd6fe021 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -45,7 +45,7 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, target_ulong 
> *pc,
>  {
>  CPUState *cs = env_cpu(env);
>  RISCVCPU *cpu = RISCV_CPU(cs);
> -
> +RISCVExtStatus fs, vs;
>  uint32_t flags = 0;
>
>  *pc = env->xl == MXL_RV32 ? env->pc & UINT32_MAX : env->pc;
> @@ -79,18 +79,12 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, 
> target_ulong *pc,
>  }
>
>  #ifdef CONFIG_USER_ONLY
> -flags = FIELD_DP32(flags, TB_FLAGS, FS, EXT_STATUS_DIRTY);
> -flags = FIELD_DP32(flags, TB_FLAGS, VS, EXT_STATUS_DIRTY);
> +fs = EXT_STATUS_DIRTY;
> +vs = EXT_STATUS_DIRTY;
>  #else
>  flags |= cpu_mmu_index(env, 0);
> -if (riscv_cpu_fp_enabled(env)) {
> -flags =  FIELD_DP32(flags, TB_FLAGS, FS,
> -get_field(env->mstatus,  MSTATUS_FS));
> -}
> -if (riscv_cpu_vector_enabled(env)) {
> -flags =  FIELD_DP32(flags, TB_FLAGS, VS,
> -get_field(env->mstatus, MSTATUS_VS));
> -}
> +fs = get_field(env->mstatus, MSTATUS_FS);
> +vs = get_field(env->mstatus, MSTATUS_VS);
>
>  if (riscv_has_ext(env, RVH)) {
>  if (env->priv == PRV_M ||
> @@ -100,19 +94,23 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, 
> target_ulong *pc,
>  flags = FIELD_DP32(flags, TB_FLAGS, HLSX, 1);
>  }
>
> -flags = FIELD_DP32(flags, TB_FLAGS, MSTATUS_HS_FS,
> -   get_field(env->mstatus_hs, MSTATUS_FS));
> -
> -flags = FIELD_DP32(flags, TB_FLAGS, MSTATUS_HS_VS,
> -   get_field(env->mstatus_hs, MSTATUS_VS));
> -flags = FIELD_DP32(flags, TB_FLAGS, VIRT_ENABLED,
> -   get_field(env->virt, VIRT_ONOFF));
> +if (riscv_cpu_virt_enabled(env)) {
> +flags = FIELD_DP32(flags, TB_FLAGS, VIRT_ENABLED, 1);
> +/*
> + * Merge DISABLED and !DIRTY states using MIN.
> + * We will set both fields when dirtying.
> + */
> +fs = MIN(fs, get_field(env->mstatus_hs, MSTATUS_FS));
> +vs = MIN(vs, get_field(env->mstatus_hs, MSTATUS_VS));
> +}
>  }
>  if (cpu->cfg.debug && !icount_enabled()) {
>  flags = FIELD_DP32(flags, TB_FLAGS, ITRIGGER, env->itrigger_enabled);
>  }
>  #endif
>
> +flags = FIELD_DP32(flags, TB_FLAGS, FS, fs);
> +flags = FIELD_DP32(flags, TB_FLAGS, VS, vs);
>  flags = FIELD_DP32(flags, TB_FLAGS, XL, env->xl);
>  if (env->cur_pmmask < (env->xl == MXL_RV32 ? UINT32_MAX : UINT64_MAX)) {
>  flags = FIELD_DP32(flags, TB_FLAGS, PM_MASK_ENABLED, 1);
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index b897bf6006..74d0b9889d 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -66,8 +66,6 @@ typedef struct DisasContext {
>  uint32_t opcode;
>  RISCVExtStatus mstatus_fs;
>  RISCVExtStatus mstatus_vs;
> -RISCVExtStatus mstatus_hs_fs;
> -RISCVExtStatus mstatus_hs_vs;
>  uint32_t mem_idx;
>  /* Remember the rounding mode encoded 

Re: [PATCH v6 03/25] target/riscv: Encode the FS and VS on a normal way for tb flags

2023-04-10 Thread Alistair Francis
On Sat, Mar 25, 2023 at 10:01 PM Richard Henderson
 wrote:
>
> From: LIU Zhiwei 
>
> Reuse the MSTATUS_FS and MSTATUS_VS for the tb flags positions is not a
> normal way.
>
> It will make it hard to change the tb flags layout. And even worse, if we
> want to keep tb flags for a same extension togather without a hole.
>
> Reviewed-by: Richard Henderson 
> Signed-off-by: LIU Zhiwei 
> Reviewed-by: Weiwei Li 
> Message-Id: <20230324143031.1093-4-zhiwei_...@linux.alibaba.com>
> [rth: Adjust trans_rvf.c.inc as well; use the typedef]
> Signed-off-by: Richard Henderson 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h  | 15 +--
>  target/riscv/cpu_helper.c   | 11 
>  target/riscv/translate.c| 34 -
>  target/riscv/insn_trans/trans_rvf.c.inc |  2 +-
>  target/riscv/insn_trans/trans_rvv.c.inc |  8 +++---
>  5 files changed, 34 insertions(+), 36 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 30d9828d59..f787145a21 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -633,18 +633,17 @@ void riscv_cpu_set_fflags(CPURISCVState *env, 
> target_ulong);
>
>  #define TB_FLAGS_PRIV_MMU_MASK3
>  #define TB_FLAGS_PRIV_HYP_ACCESS_MASK   (1 << 2)
> -#define TB_FLAGS_MSTATUS_FS MSTATUS_FS
> -#define TB_FLAGS_MSTATUS_VS MSTATUS_VS
>
>  #include "exec/cpu-all.h"
>
>  FIELD(TB_FLAGS, MEM_IDX, 0, 3)
> -FIELD(TB_FLAGS, LMUL, 3, 3)
> -FIELD(TB_FLAGS, SEW, 6, 3)
> -/* Skip MSTATUS_VS (0x600) bits */
> -FIELD(TB_FLAGS, VL_EQ_VLMAX, 11, 1)
> -FIELD(TB_FLAGS, VILL, 12, 1)
> -/* Skip MSTATUS_FS (0x6000) bits */
> +FIELD(TB_FLAGS, FS, 3, 2)
> +/* Vector flags */
> +FIELD(TB_FLAGS, VS, 5, 2)
> +FIELD(TB_FLAGS, LMUL, 7, 3)
> +FIELD(TB_FLAGS, SEW, 10, 3)
> +FIELD(TB_FLAGS, VL_EQ_VLMAX, 13, 1)
> +FIELD(TB_FLAGS, VILL, 14, 1)
>  /* Is a Hypervisor instruction load/store allowed? */
>  FIELD(TB_FLAGS, HLSX, 15, 1)
>  FIELD(TB_FLAGS, MSTATUS_HS_FS, 16, 2)
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 9d50e7bbb6..1e7ee9aa30 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -79,16 +79,17 @@ void cpu_get_tb_cpu_state(CPURISCVState *env, 
> target_ulong *pc,
>  }
>
>  #ifdef CONFIG_USER_ONLY
> -flags |= TB_FLAGS_MSTATUS_FS;
> -flags |= TB_FLAGS_MSTATUS_VS;
> +flags = FIELD_DP32(flags, TB_FLAGS, FS, EXT_STATUS_DIRTY);
> +flags = FIELD_DP32(flags, TB_FLAGS, VS, EXT_STATUS_DIRTY);
>  #else
>  flags |= cpu_mmu_index(env, 0);
>  if (riscv_cpu_fp_enabled(env)) {
> -flags |= env->mstatus & MSTATUS_FS;
> +flags =  FIELD_DP32(flags, TB_FLAGS, FS,
> +get_field(env->mstatus,  MSTATUS_FS));
>  }
> -
>  if (riscv_cpu_vector_enabled(env)) {
> -flags |= env->mstatus & MSTATUS_VS;
> +flags =  FIELD_DP32(flags, TB_FLAGS, VS,
> +get_field(env->mstatus, MSTATUS_VS));
>  }
>
>  if (riscv_has_ext(env, RVH)) {
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 880f6318aa..b897bf6006 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -64,10 +64,10 @@ typedef struct DisasContext {
>  RISCVMXL xl;
>  uint32_t misa_ext;
>  uint32_t opcode;
> -uint32_t mstatus_fs;
> -uint32_t mstatus_vs;
> -uint32_t mstatus_hs_fs;
> -uint32_t mstatus_hs_vs;
> +RISCVExtStatus mstatus_fs;
> +RISCVExtStatus mstatus_vs;
> +RISCVExtStatus mstatus_hs_fs;
> +RISCVExtStatus mstatus_hs_vs;
>  uint32_t mem_idx;
>  /* Remember the rounding mode encoded in the previous fp instruction,
> which we have already installed into env->fp_status.  Or -1 for
> @@ -598,8 +598,7 @@ static TCGv get_address_indexed(DisasContext *ctx, int 
> rs1, TCGv offs)
>  }
>
>  #ifndef CONFIG_USER_ONLY
> -/* The states of mstatus_fs are:
> - * 0 = disabled, 1 = initial, 2 = clean, 3 = dirty
> +/*
>   * We will have already diagnosed disabled state,
>   * and need to turn initial/clean into dirty.
>   */
> @@ -611,9 +610,9 @@ static void mark_fs_dirty(DisasContext *ctx)
>  return;
>  }
>
> -if (ctx->mstatus_fs != MSTATUS_FS) {
> +if (ctx->mstatus_fs != EXT_STATUS_DIRTY) {
>  /* Remember the state change for the rest of the TB. */
> -ctx->mstatus_fs = MSTATUS_FS;
> +ctx->mstatus_fs = EXT_STATUS_DIRTY;
>
>  tmp = tcg_temp_new();
>  tcg_gen_ld_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
> @@ -621,9 +620,9 @@ static void mark_fs_dirty(DisasContext *ctx)
>  tcg_gen_st_tl(tmp, cpu_env, offsetof(CPURISCVState, mstatus));
>  }
>
> -if (ctx->virt_enabled && ctx->mstatus_hs_fs != MSTATUS_FS) {
> +if (ctx->virt_enabled && ctx->mstatus_hs_fs != EXT_STATUS_DIRTY) {
>  /* Remember the stage change for the rest of the TB. */
> -ctx->mstatus_hs_fs = MSTATUS_FS;
> +

Re: [PATCH v2 1/4] target/riscv: add CPU QOM header

2023-04-10 Thread Richard Henderson

On 4/10/23 09:52, Daniel Henrique Barboza wrote:

QMP CPU commands are usually implemented by a separated file,
-qmp-cmds.c, to allow them to be build only for softmmu targets.
This file uses a CPU QOM header with basic QOM declarations for the
arch.

We'll introduce query-cpu-definitions for RISC-V CPUs in the next patch,
but first we need a cpu-qom.h header with the definitions of
TYPE_RISCV_CPU and RISCVCPUClass declarations. These were moved from
cpu.h to the new file, and cpu.h now includes "cpu-qom.h".

Signed-off-by: Daniel Henrique Barboza
---
  target/riscv/cpu-qom.h | 70 ++
  target/riscv/cpu.h | 46 +--
  2 files changed, 71 insertions(+), 45 deletions(-)
  create mode 100644 target/riscv/cpu-qom.h


Reviewed-by: Richard Henderson 

r~



Re: [RFC PATCH 3/4] target/riscv: check smstateen fcsr flag

2023-04-10 Thread Richard Henderson

On 4/10/23 07:13, Mayuresh Chitale wrote:

+#ifndef CONFIG_USER_ONLY
+#define smstateen_fcsr_check(ctx) do { \
+if (!ctx->smstateen_fcsr_ok) { \
+if (ctx->virt_enabled) { \
+generate_exception(ctx, RISCV_EXCP_VIRT_INSTRUCTION_FAULT); \
+} else { \
+generate_exception(ctx, RISCV_EXCP_ILLEGAL_INST); \
+} \
+return true; \
+} \
+} while (0)
+#else
+#define smstateen_fcsr_check(ctx)
+#endif
+
+#define REQUIRE_ZFINX_OR_F(ctx) do { \
+if (!has_ext(ctx, RVF)) { \
+if (!ctx->cfg_ptr->ext_zfinx) { \
+return false; \
+} \
+smstateen_fcsr_check(ctx); \
  } \
  } while (0)


As a matter of style, I strongly object to a *nested* macro returning from the calling 
function.  These should all be changed to normal functions of the form


if (!require_xyz(ctx) || !require_abc(ctx)) {
return something;
}

etc.  insn_trans/trans_rvv.c.inc is much much cleaner in this respect.


r~



Re: [PATCH v3 3/3] target/riscv: Legalize MPP value in write_mstatus

2023-04-10 Thread Alistair Francis
On Fri, Apr 7, 2023 at 11:49 AM Weiwei Li  wrote:
>
> mstatus.MPP field is a WARL field since priv version 1.11, so we
> remain it unchanged if an invalid value is written into it. And
> after this, RVH shouldn't be passed to riscv_cpu_set_mode().
>
> Signed-off-by: Weiwei Li 
> Signed-off-by: Junqiang Wang 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu_helper.c |  8 ++--
>  target/riscv/csr.c| 32 
>  2 files changed, 34 insertions(+), 6 deletions(-)
>
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index 2310c7905f..433ea529b0 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -647,12 +647,8 @@ void riscv_cpu_set_aia_ireg_rmw_fn(CPURISCVState *env, 
> uint32_t priv,
>
>  void riscv_cpu_set_mode(CPURISCVState *env, target_ulong newpriv)
>  {
> -if (newpriv > PRV_M) {
> -g_assert_not_reached();
> -}
> -if (newpriv == PRV_H) {
> -newpriv = PRV_U;
> -}
> +g_assert(newpriv <= PRV_M && newpriv != PRV_RESERVED);
> +
>  if (icount_enabled() && newpriv != env->priv) {
>  riscv_itrigger_update_priv(env);
>  }
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index e0b871f6dc..f4d2dcfdc8 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -1230,6 +1230,32 @@ static bool validate_vm(CPURISCVState *env, 
> target_ulong vm)
> satp_mode_max_from_map(riscv_cpu_cfg(env)->satp_mode.map);
>  }
>
> +static target_ulong legalize_mpp(CPURISCVState *env, target_ulong old_mpp,
> + target_ulong val)
> +{
> +bool valid = false;
> +target_ulong new_mpp = get_field(val, MSTATUS_MPP);
> +
> +switch (new_mpp) {
> +case PRV_M:
> +valid = true;
> +break;
> +case PRV_S:
> +valid = riscv_has_ext(env, RVS);
> +break;
> +case PRV_U:
> +valid = riscv_has_ext(env, RVU);
> +break;
> +}
> +
> +/* Remain field unchanged if new_mpp value is invalid */
> +if (!valid) {
> +val = set_field(val, MSTATUS_MPP, old_mpp);
> +}
> +
> +return val;
> +}
> +
>  static RISCVException write_mstatus(CPURISCVState *env, int csrno,
>  target_ulong val)
>  {
> @@ -1237,6 +1263,12 @@ static RISCVException write_mstatus(CPURISCVState 
> *env, int csrno,
>  uint64_t mask = 0;
>  RISCVMXL xl = riscv_cpu_mxl(env);
>
> +/*
> + * MPP field have been made WARL since priv version 1.11. However,
> + * legalization for it will not break any software running on 1.10.
> + */
> +val = legalize_mpp(env, get_field(mstatus, MSTATUS_MPP), val);
> +
>  /* flush tlb on mstatus fields that affect VM */
>  if ((val ^ mstatus) & (MSTATUS_MXR | MSTATUS_MPP | MSTATUS_MPV |
>  MSTATUS_MPRV | MSTATUS_SUM)) {
> --
> 2.25.1
>
>



Re: [PATCH v3 2/3] target/riscv: Use PRV_RESERVED instead of PRV_H

2023-04-10 Thread Alistair Francis
On Fri, Apr 7, 2023 at 11:49 AM Weiwei Li  wrote:
>
> PRV_H has no real meaning, but just a reserved privilege mode currently.
>
> Signed-off-by: Weiwei Li 
> Signed-off-by: Junqiang Wang 

Reviewed-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.h   | 2 +-
>  target/riscv/cpu_bits.h  | 2 +-
>  target/riscv/gdbstub.c   | 2 +-
>  target/riscv/op_helper.c | 2 +-
>  4 files changed, 4 insertions(+), 4 deletions(-)
>
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index cbf3de2708..4af8ebc558 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -699,7 +699,7 @@ static inline RISCVMXL cpu_recompute_xl(CPURISCVState 
> *env)
>  case PRV_U:
>  xl = get_field(env->mstatus, MSTATUS64_UXL);
>  break;
> -default: /* PRV_S | PRV_H */
> +default: /* PRV_S */
>  xl = get_field(env->mstatus, MSTATUS64_SXL);
>  break;
>  }
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index 101702cb4a..a16bfaf43f 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -608,7 +608,7 @@ typedef enum {
>  /* Privilege modes */
>  #define PRV_U 0
>  #define PRV_S 1
> -#define PRV_H 2 /* Reserved */
> +#define PRV_RESERVED 2
>  #define PRV_M 3
>
>  /* RV32 satp CSR field masks */
> diff --git a/target/riscv/gdbstub.c b/target/riscv/gdbstub.c
> index fa537aed74..524bede865 100644
> --- a/target/riscv/gdbstub.c
> +++ b/target/riscv/gdbstub.c
> @@ -203,7 +203,7 @@ static int riscv_gdb_set_virtual(CPURISCVState *cs, 
> uint8_t *mem_buf, int n)
>  if (n == 0) {
>  #ifndef CONFIG_USER_ONLY
>  cs->priv = ldtul_p(mem_buf) & 0x3;
> -if (cs->priv == PRV_H) {
> +if (cs->priv == PRV_RESERVED) {
>  cs->priv = PRV_S;
>  }
>  #endif
> diff --git a/target/riscv/op_helper.c b/target/riscv/op_helper.c
> index b8a03afebb..bd21c6eeef 100644
> --- a/target/riscv/op_helper.c
> +++ b/target/riscv/op_helper.c
> @@ -141,7 +141,7 @@ static void check_zicbo_envcfg(CPURISCVState *env, 
> target_ulong envbits,
>  }
>
>  if (env->virt_enabled &&
> -(((env->priv < PRV_H) && !get_field(env->henvcfg, envbits)) ||
> +(((env->priv <= PRV_S) && !get_field(env->henvcfg, envbits)) ||
>   ((env->priv < PRV_S) && !get_field(env->senvcfg, envbits {
>  riscv_raise_exception(env, RISCV_EXCP_VIRT_INSTRUCTION_FAULT, ra);
>  }
> --
> 2.25.1
>
>



Re: [RFC PATCH 2/4] target/riscv: Add fcsr field in tb->flags

2023-04-10 Thread Richard Henderson

On 4/10/23 07:13, Mayuresh Chitale wrote:

The state of smstateen0.FCSR bit impacts the execution of floating point
instructions when misa.F==0. Add a field in the tb->flags which stores
the current state of smstateen0.fcsr and will be used by floating point
translation routines.


Are you certain that you require a new bit?

Could the same effect be achieved by forcing one or more of the existing 
TB_FLAGS.{FS,HS_FS} fields to 0 within cpu_get_tb_cpu_state?  I.e. for the purposes of 
translation, pretend the FS state is DISABLED?


These bits are scarce, are we are nearly out.


r~




Re: [PATCH v4 00/20] remove MISA ext_N flags from cpu->cfg

2023-04-10 Thread Alistair Francis
On Fri, Apr 7, 2023 at 4:06 AM Daniel Henrique Barboza
 wrote:
>
> Hi,
>
> This new version was rebased on top of Alistair's riscv-to-apply.next @
> 9c60ca583cb ("hw/riscv: Add signature dump function ...").
>
> No other changes made.
>
> Changes from v3:
> - rebased with riscv-to-apply.next @ 9c60ca583cb
> - v3 link: https://lists.gnu.org/archive/html/qemu-devel/2023-03/msg06911.html
>
> Daniel Henrique Barboza (20):
>   target/riscv: sync env->misa_ext* with cpu->cfg in realize()
>   target/riscv: remove MISA properties from isa_edata_arr[]
>   target/riscv/cpu.c: remove 'multi_letter' from isa_ext_data
>   target/riscv: introduce riscv_cpu_add_misa_properties()
>   target/riscv: remove cpu->cfg.ext_a
>   target/riscv: remove cpu->cfg.ext_c
>   target/riscv: remove cpu->cfg.ext_d
>   target/riscv: remove cpu->cfg.ext_f
>   target/riscv: remove cpu->cfg.ext_i
>   target/riscv: remove cpu->cfg.ext_e
>   target/riscv: remove cpu->cfg.ext_m
>   target/riscv: remove cpu->cfg.ext_s
>   target/riscv: remove cpu->cfg.ext_u
>   target/riscv: remove cpu->cfg.ext_h
>   target/riscv: remove cpu->cfg.ext_j
>   target/riscv: remove cpu->cfg.ext_v
>   target/riscv: remove riscv_cpu_sync_misa_cfg()
>   target/riscv: remove cfg.ext_g setup from rv64_thead_c906_cpu_init()
>   target/riscv: add RVG and remove cpu->cfg.ext_g
>   target/riscv/cpu.c: redesign register_cpu_props()

Thanks!

Applied to riscv-to-apply.next

Alistair

>
>  target/riscv/cpu.c| 412 +++---
>  target/riscv/cpu.h|  19 +-
>  target/riscv/insn_trans/trans_rvzce.c.inc |   2 +-
>  3 files changed, 216 insertions(+), 217 deletions(-)
>
> --
> 2.39.2
>
>



Re: [PATCH v2 18/19] bsd-user: Update system call list

2023-04-10 Thread Richard Henderson

On 4/10/23 11:20, Warner Losh wrote:

Update the system call list. We have one hokey thing in here for swapoff
that depends on the version number (so this is not completely generated
at the moment). For this, we need to include sys/param.h. The method of
generation has changed, so this diff looks way bigger than it needs to
be to add the few lines of code for the new system calls.

Signed-off-by: Warner Losh
---
  bsd-user/freebsd/os-syscall.h |2 +
  bsd-user/freebsd/syscall_nr.h | 1035 +
  2 files changed, 529 insertions(+), 508 deletions(-)


What is the method of generation?

If it's complicated, it should be in scripts/.
If it's trivial, e.g.

sed 's/xxx/yyy/' < in.h > out.h

it is worth including the command in the commit message.

Anyway,
Acked-by: Richard Henderson 


r~



Re: [PATCH v2 19/19] bsd-user: Eliminate USE_ELF_CORE_DUMP

2023-04-10 Thread Richard Henderson

On 4/10/23 11:20, Warner Losh wrote:

It's enabled on all platforms (even in the fork), so we can remove it
from here.

Signed-off-by: Warner Losh
---
  bsd-user/arm/target_arch_elf.h| 1 -
  bsd-user/elfcore.c| 3 ---
  bsd-user/elfload.c| 5 -
  bsd-user/i386/target_arch_elf.h   | 1 -
  bsd-user/x86_64/target_arch_elf.h | 1 -
  5 files changed, 11 deletions(-)


Thanks,

Reviewed-by: Richard Henderson 

r~



Re: [RFC PATCH v3 2/2] selftests: restrictedmem: Check hugepage-ness of shmem file backing restrictedmem fd

2023-04-10 Thread Ackerley Tng

David Hildenbrand  writes:


On 01.04.23 01:50, Ackerley Tng wrote:

For memfd_restricted() calls without a userspace mount, the backing
file should be the shmem mount in the kernel, and the size of backing
pages should be as defined by system-wide shmem configuration.



If a userspace mount is provided, the size of backing pages should be
as defined in the mount.



Also includes negative tests for invalid inputs, including fds
representing read-only superblocks/mounts.




When you talk about "hugepage" in this patch, do you mean THP or
hugetlb? I suspect thp, so it might be better to spell that out. IIRC,
there are plans to support actual huge pages in the future, at which
point "hugepage" terminology could be misleading.



Thanks for pointing this out! I've replaced references to hugepage with
thp, please see RFC v4 at
https://lore.kernel.org/lkml/cover.1681176340.git.ackerley...@google.com/T/


Signed-off-by: Ackerley Tng 
---
   tools/testing/selftests/Makefile  |   1 +
   .../selftests/restrictedmem/.gitignore|   3 +
   .../testing/selftests/restrictedmem/Makefile  |  15 +
   .../testing/selftests/restrictedmem/common.c  |   9 +
   .../testing/selftests/restrictedmem/common.h  |   8 +
   .../restrictedmem_hugepage_test.c | 486 ++
   6 files changed, 522 insertions(+)
   create mode 100644 tools/testing/selftests/restrictedmem/.gitignore
   create mode 100644 tools/testing/selftests/restrictedmem/Makefile
   create mode 100644 tools/testing/selftests/restrictedmem/common.c
   create mode 100644 tools/testing/selftests/restrictedmem/common.h
   create mode 100644  
tools/testing/selftests/restrictedmem/restrictedmem_hugepage_test.c



...





Re: [PATCH v2 17/19] bsd-user: Remove host-os.h

2023-04-10 Thread Richard Henderson

On 4/10/23 11:20, Warner Losh wrote:

It only defines the default system call scheme to use. However, that
feature was removed in a941a16f6f52.

Signed-off-by: Warner Losh
---
  bsd-user/freebsd/host-os.h | 25 -
  bsd-user/main.c|  1 -
  bsd-user/netbsd/host-os.h  | 25 -
  bsd-user/openbsd/host-os.h | 25 -
  4 files changed, 76 deletions(-)
  delete mode 100644 bsd-user/freebsd/host-os.h
  delete mode 100644 bsd-user/netbsd/host-os.h
  delete mode 100644 bsd-user/openbsd/host-os.h


Reviewed-by: Richard Henderson 

r~



Re: [PATCH v2 02/19] bsd-user: Ifdef a few MAP_ constants for NetBSD / OpenBSD.

2023-04-10 Thread Richard Henderson

On 4/10/23 11:20, Warner Losh wrote:

MAP_GUARD, MAP_EXCL, and MAP_NOCORE are FreeBSD only. Define them to be
0 if they aren't defined, and rely on the compiler to optimize away
sections not relevant. Added only to the top of mmap.c since that's the
only place we need this.

Signed-off-by: Warner Losh
---
  bsd-user/mmap.c | 14 ++
  1 file changed, 14 insertions(+)


Reviewed-by: Richard Henderson 

r~



[RFC PATCH v4 0/2] Providing mount in memfd_restricted() syscall

2023-04-10 Thread Ackerley Tng
Hello,

This patchset builds upon the memfd_restricted() system call that was
discussed in the 'KVM: mm: fd-based approach for supporting KVM' patch
series, at
https://lore.kernel.org/lkml/20221202061347.1070246-1-chao.p.p...@linux.intel.com/T/

The tree can be found at:
https://github.com/googleprodkernel/linux-cc/tree/restrictedmem-provide-mount-fd-rfc-v4

In this patchset, a modification to the memfd_restricted() syscall is
proposed, which allows userspace to provide a mount, on which the
restrictedmem file will be created and returned from the
memfd_restricted().

Allowing userspace to provide a mount allows userspace to control
various memory binding policies via tmpfs mount options, such as
Transparent HugePage memory allocation policy through
'huge=always/never' and NUMA memory allocation policy through
'mpol=local/bind:*'.

Changes since RFCv3:
+ Added check to ensure that bind mounts must be bind mounts of the
  whole filesystem
+ Removed inappropriate check on fd’s permissions as Christian
  suggested
+ Renamed RMFD_USERMNT to MEMFD_RSTD_USERMNT as David suggested
+ Added selftest to check that bind mounts must be bind mounts of the
  whole filesystem

Changes since RFCv2:
+ Tightened semantics to accept only fds of the root of a tmpfs mount,
  as Christian suggested
+ Added permissions check on the inode represented by the fd to guard
  against creation of restrictedmem files on read-only tmpfs
  filesystems or mounts
+ Renamed RMFD_TMPFILE to RMFD_USERMNT to better represent providing a
  userspace mount to create a restrictedmem file on
+ Updated selftests for tighter semantics and added selftests to check
  for permissions

Changes since RFCv1:
+ Use fd to represent mount instead of path string, as Kirill
  suggested. I believe using fds makes this syscall interface more
  aligned with the other syscalls like fsopen(), fsconfig(), and
  fsmount() in terms of using and passing around fds
+ Remove unused variable char *orig_shmem_enabled from selftests

Dependencies:
+ Chao’s work on UPM, at
  https://github.com/chao-p/linux/commits/privmem-v11.5

Links to earlier patch series:
+ RFC v3: 
https://lore.kernel.org/lkml/cover.1680306489.git.ackerley...@google.com/T/
+ RFC v2: 
https://lore.kernel.org/lkml/cover.1679428901.git.ackerley...@google.com/T/
+ RFC v1: 
https://lore.kernel.org/lkml/cover.1676507663.git.ackerley...@google.com/T/

Ackerley Tng (2):
  mm: restrictedmem: Allow userspace to specify mount for
memfd_restricted
  selftests: restrictedmem: Check memfd_restricted()'s handling of
provided userspace mount

 include/linux/syscalls.h  |   2 +-
 include/uapi/linux/restrictedmem.h|   8 +
 mm/restrictedmem.c|  73 ++-
 tools/testing/selftests/mm/.gitignore |   1 +
 tools/testing/selftests/mm/Makefile   |   1 +
 .../selftests/mm/memfd_restricted_usermnt.c   | 529 ++
 tools/testing/selftests/mm/run_vmtests.sh |   3 +
 7 files changed, 611 insertions(+), 6 deletions(-)
 create mode 100644 include/uapi/linux/restrictedmem.h
 create mode 100644 tools/testing/selftests/mm/memfd_restricted_usermnt.c

--
2.40.0.577.gac1e443424-goog



Re: [PATCH] Hexagon (target/hexagon) Add overrides for cache/sync/barrier instructions

2023-04-10 Thread Richard Henderson

On 4/10/23 13:24, Taylor Simpson wrote:

Most of these are not modelled in QEMU, so save the overhead of
calling a helper.

The only exception is dczeroa.  It assigns to hex_dczero_addr, which
is handled during packet commit.

Signed-off-by: Taylor Simpson
---
  target/hexagon/gen_tcg.h | 24 
  target/hexagon/macros.h  | 18 --
  2 files changed, 28 insertions(+), 14 deletions(-)


Reviewed-by: Richard Henderson 

Something to look at in the future: I believe quite a lot of these variables like 
dczero_addr are not "real" architectural state, in that they do not persist beyond the 
lifetime of the packet.  There are others, e.g. pkt_has_store_s1.


These variables could be moved to DisasContext and allocated on demand.  Even recently 
this was tedious, because of TCG temporary lifetime issues, but no longer.


Just a thought.


r~



[RFC PATCH v4 2/2] selftests: restrictedmem: Check memfd_restricted()'s handling of provided userspace mount

2023-04-10 Thread Ackerley Tng
For memfd_restricted() calls without a userspace mount, the backing
file should be the shmem mount in the kernel, and the size of backing
pages should be as defined by system-wide shmem configuration.

If a userspace mount is provided, the size of backing pages should be
as defined in the mount.

Also includes negative tests for invalid inputs, including fds
representing read-only superblocks/mounts.

Signed-off-by: Ackerley Tng 
---
 tools/testing/selftests/mm/.gitignore |   1 +
 tools/testing/selftests/mm/Makefile   |   1 +
 .../selftests/mm/memfd_restricted_usermnt.c   | 529 ++
 tools/testing/selftests/mm/run_vmtests.sh |   3 +
 4 files changed, 534 insertions(+)
 create mode 100644 tools/testing/selftests/mm/memfd_restricted_usermnt.c

diff --git a/tools/testing/selftests/mm/.gitignore 
b/tools/testing/selftests/mm/.gitignore
index fb6e4233374d..dba320c8151a 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -31,6 +31,7 @@ map_fixed_noreplace
 write_to_hugetlbfs
 hmm-tests
 memfd_restricted
+memfd_restricted_usermnt
 memfd_secret
 soft-dirty
 split_huge_page_test
diff --git a/tools/testing/selftests/mm/Makefile 
b/tools/testing/selftests/mm/Makefile
index 5ec338ea1fed..2f5df7a12ea5 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -46,6 +46,7 @@ TEST_GEN_FILES += map_fixed_noreplace
 TEST_GEN_FILES += map_hugetlb
 TEST_GEN_FILES += map_populate
 TEST_GEN_FILES += memfd_restricted
+TEST_GEN_FILES += memfd_restricted_usermnt
 TEST_GEN_FILES += memfd_secret
 TEST_GEN_FILES += migration
 TEST_GEN_FILES += mlock-random-test
diff --git a/tools/testing/selftests/mm/memfd_restricted_usermnt.c 
b/tools/testing/selftests/mm/memfd_restricted_usermnt.c
new file mode 100644
index ..0be04e3d714d
--- /dev/null
+++ b/tools/testing/selftests/mm/memfd_restricted_usermnt.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE /* for O_PATH */
+#define _POSIX_C_SOURCE /* for PATH_MAX */
+#include 
+#include 
+#include 
+#include 
+
+#include "linux/restrictedmem.h"
+
+#include "../kselftest_harness.h"
+
+static int memfd_restricted(unsigned int flags, int fd)
+{
+   return syscall(__NR_memfd_restricted, flags, fd);
+}
+
+static int get_hpage_pmd_size(void)
+{
+   FILE *fp;
+   char buf[100];
+   char *ret;
+   int size;
+
+   fp = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
+   if (!fp)
+   return -1;
+
+   ret = fgets(buf, 100, fp);
+   if (ret != buf) {
+   size = -1;
+   goto out;
+   }
+
+   if (sscanf(buf, "%d\n", ) != 1)
+   size = -1;
+
+out:
+   fclose(fp);
+
+   return size;
+}
+
+static int write_string_to_file(const char *path, const char *string)
+{
+   FILE *fp;
+   size_t len = strlen(string);
+   int ret = -1;
+
+   fp = fopen(path, "w");
+   if (!fp)
+   return ret;
+
+   if (fwrite(string, 1, len, fp) != len)
+   goto out;
+
+   ret = 0;
+
+out:
+   fclose(fp);
+   return ret;
+}
+
+/*
+ * Expect shmem thp policy to be one of always, within_size, advise, never,
+ * deny, force
+ */
+#define POLICY_BUF_SIZE 12
+
+static bool is_valid_shmem_thp_policy(char *policy)
+{
+   if (strcmp(policy, "always") == 0)
+   return true;
+   if (strcmp(policy, "within_size") == 0)
+   return true;
+   if (strcmp(policy, "advise") == 0)
+   return true;
+   if (strcmp(policy, "never") == 0)
+   return true;
+   if (strcmp(policy, "deny") == 0)
+   return true;
+   if (strcmp(policy, "force") == 0)
+   return true;
+
+   return false;
+}
+
+static int get_shmem_thp_policy(char *policy)
+{
+   FILE *fp;
+   char buf[100];
+   char *left = NULL;
+   char *right = NULL;
+   int ret = -1;
+
+   fp = fopen("/sys/kernel/mm/transparent_hugepage/shmem_enabled", "r");
+   if (!fp)
+   return -1;
+
+   if (fgets(buf, 100, fp) != buf)
+   goto out;
+
+   /*
+* Expect shmem_enabled to be of format like "always within_size advise
+* [never] deny force"
+*/
+   left = memchr(buf, '[', 100);
+   if (!left)
+   goto out;
+
+   right = memchr(buf, ']', 100);
+   if (!right)
+   goto out;
+
+   memcpy(policy, left + 1, right - left - 1);
+
+   ret = !is_valid_shmem_thp_policy(policy);
+
+out:
+   fclose(fp);
+   return ret;
+}
+
+static int set_shmem_thp_policy(char *policy)
+{
+   int ret = -1;
+   /* +1 for newline */
+   char to_write[POLICY_BUF_SIZE + 1] = { 0 };
+
+   if (!is_valid_shmem_thp_policy(policy))
+   return ret;
+
+   ret = snprintf(to_write, POLICY_BUF_SIZE + 1, "%s\n", policy);
+   if (ret != strlen(policy) + 

[RFC PATCH v4 1/2] mm: restrictedmem: Allow userspace to specify mount for memfd_restricted

2023-04-10 Thread Ackerley Tng
By default, the backing shmem file for a restrictedmem fd is created
on shmem's kernel space mount.

With this patch, an optional tmpfs mount can be specified via an fd,
which will be used as the mountpoint for backing the shmem file
associated with a restrictedmem fd.

This will help restrictedmem fds inherit the properties of the
provided tmpfs mounts, for example, hugepage (THP) allocation hints,
NUMA binding hints, etc.

Permissions for the fd passed to memfd_restricted() is modeled after
the openat() syscall, since both of these allow creation of a file
upon a mount/directory.

Permission to reference the mount the fd represents is checked upon fd
creation by other syscalls (e.g. fsmount(), open(), or open_tree(),
etc) and any process that can present memfd_restricted() with a valid
fd is expected to have obtained permission to use the mount
represented by the fd. This behavior is intended to parallel that of
the openat() syscall.

memfd_restricted() will check that the tmpfs superblock is
writable, and that the mount is also writable, before attempting to
create a restrictedmem file on the mount.

Signed-off-by: Ackerley Tng 
---
 include/linux/syscalls.h   |  2 +-
 include/uapi/linux/restrictedmem.h |  8 
 mm/restrictedmem.c | 73 --
 3 files changed, 77 insertions(+), 6 deletions(-)
 create mode 100644 include/uapi/linux/restrictedmem.h

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 660be0bf89d5..90c73b9e14e5 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1058,7 +1058,7 @@ asmlinkage long sys_memfd_secret(unsigned int flags);
 asmlinkage long sys_set_mempolicy_home_node(unsigned long start, unsigned long 
len,
unsigned long home_node,
unsigned long flags);
-asmlinkage long sys_memfd_restricted(unsigned int flags);
+asmlinkage long sys_memfd_restricted(unsigned int flags, int mount_fd);
 
 /*
  * Architecture-specific system calls
diff --git a/include/uapi/linux/restrictedmem.h 
b/include/uapi/linux/restrictedmem.h
new file mode 100644
index ..73e31bce73dc
--- /dev/null
+++ b/include/uapi/linux/restrictedmem.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_RESTRICTEDMEM_H
+#define _UAPI_LINUX_RESTRICTEDMEM_H
+
+/* flags for memfd_restricted */
+#define MEMFD_RSTD_USERMNT 0x0001U
+
+#endif /* _UAPI_LINUX_RESTRICTEDMEM_H */
diff --git a/mm/restrictedmem.c b/mm/restrictedmem.c
index 55e99e6c09a1..032ad1f15138 100644
--- a/mm/restrictedmem.c
+++ b/mm/restrictedmem.c
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 struct restrictedmem {
@@ -250,19 +251,20 @@ static struct address_space_operations restricted_aops = {
 #endif
 };
 
-SYSCALL_DEFINE1(memfd_restricted, unsigned int, flags)
+static int restrictedmem_create(struct vfsmount *mount)
 {
struct file *file, *restricted_file;
int fd, err;
 
-   if (flags)
-   return -EINVAL;
-
fd = get_unused_fd_flags(0);
if (fd < 0)
return fd;
 
-   file = shmem_file_setup("memfd:restrictedmem", 0, VM_NORESERVE);
+   if (mount)
+   file = shmem_file_setup_with_mnt(mount, "memfd:restrictedmem", 
0, VM_NORESERVE);
+   else
+   file = shmem_file_setup("memfd:restrictedmem", 0, VM_NORESERVE);
+
if (IS_ERR(file)) {
err = PTR_ERR(file);
goto err_fd;
@@ -286,6 +288,67 @@ SYSCALL_DEFINE1(memfd_restricted, unsigned int, flags)
return err;
 }
 
+static struct vfsmount *restrictedmem_get_user_mount(struct file *file)
+{
+   int ret;
+   struct vfsmount *mnt;
+   struct path *path;
+
+   path = >f_path;
+   if (path->dentry != path->mnt->mnt_root)
+   return ERR_PTR(-EINVAL);
+
+   /*
+* Disallow bind-mounts that aren't bind-mounts of the whole
+* filesystem
+*/
+   mnt = path->mnt;
+   if (mnt->mnt_root != mnt->mnt_sb->s_root)
+   return ERR_PTR(-EINVAL);
+
+   if (mnt->mnt_sb->s_magic != TMPFS_MAGIC)
+   return ERR_PTR(-EINVAL);
+
+   ret = mnt_want_write(mnt);
+   if (ret)
+   return ERR_PTR(ret);
+
+   return mnt;
+}
+
+SYSCALL_DEFINE2(memfd_restricted, unsigned int, flags, int, mount_fd)
+{
+   int ret;
+   struct fd f = {};
+   struct vfsmount *mnt = NULL;
+
+   if (flags & ~MEMFD_RSTD_USERMNT)
+   return -EINVAL;
+
+   if (flags & MEMFD_RSTD_USERMNT) {
+   f = fdget_raw(mount_fd);
+   if (!f.file)
+   return -EBADF;
+
+   mnt = restrictedmem_get_user_mount(f.file);
+   if (IS_ERR(mnt)) {
+   ret = PTR_ERR(mnt);
+   goto out;
+   }
+   }
+
+   ret = 

Re: [RFC PATCH v2] riscv: Add support for the Zfa extension

2023-04-10 Thread Richard Henderson

On 3/31/23 11:28, Christoph Muellner wrote:

+/*
+ * Implement float64 to int32_t conversion without saturation;
+ * the result is supplied modulo 2^32.
+ * Rounding mode is RTZ.
+ * Flag behaviour identical to fcvt.w.d (see F specification).
+ *
+ * Similar conversion of this function can be found in
+ * target/arm/vfp_helper.c (fjcvtzs): f64->i32 with other fflag behaviour, and
+ * target/alpha/fpu_helper.c (do_cvttq): f64->i64 with support for several
+ * rounding modes and different fflag behaviour.
+ */
+uint64_t helper_fcvtmod_w_d(CPURISCVState *env, uint64_t value)


I am still of the opinion this should be moved to fpu/softfloat-parts.c.
The "other fflag" behaviour is very likely a bug in one or more of the three 
implementations.


r~



Re: [PATCH for-8.0] docs: Fix typo (wphx => whpx)

2023-04-10 Thread Richard Henderson

On 4/9/23 13:10, Stefan Weil wrote:

Resolves:https://gitlab.com/qemu-project/qemu/-/issues/1529
Signed-off-by: Stefan Weil
---

I suggest to apply the patch for 8.0 because it fixes documentation.


Reviewed-by: Richard Henderson 

r~



[PATCH v2 11/54] tcg/mips: Conditionalize tcg_out_exts_i32_i64

2023-04-10 Thread Richard Henderson
Since TCG_TYPE_I32 values are kept sign-extended in registers, we need not
extend if the register matches.  This is already relied upon by comparisons.

Signed-off-by: Richard Henderson 
---
 tcg/mips/tcg-target.c.inc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index df36bec5c0..2bc885e00e 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -582,7 +582,9 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg 
rs)
 
 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rs)
 {
-tcg_out_ext32s(s, rd, rs);
+if (rd != rs) {
+tcg_out_ext32s(s, rd, rs);
+}
 }
 
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
-- 
2.34.1




Re: [PATCH for-8.0] docs/cxl: Fix sentence

2023-04-10 Thread Richard Henderson

On 4/9/23 13:18, Stefan Weil via wrote:

Signed-off-by: Stefan Weil 
---

If my change is okay I suggest to apply the patch for 8.0
because it fixes documentation.

Regards,
Stefan W.

  docs/system/devices/cxl.rst | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst
index f25783a4ec..4c38223069 100644
--- a/docs/system/devices/cxl.rst
+++ b/docs/system/devices/cxl.rst
@@ -111,7 +111,7 @@ Interfaces provided include:
  
  CXL Root Ports (CXL RP)

  ~~~
-A CXL Root Port servers te same purpose as a PCIe Root Port.
+A CXL Root Port serves the same purpose as a PCIe Root Port.
  There are a number of CXL specific Designated Vendor Specific
  Extended Capabilities (DVSEC) in PCIe Configuration Space
  and associated component register access via PCI bars.


Reviewed-by: Richard Henderson 

r~



[PATCH v2 40/54] tcg/loongarch64: Convert tcg_out_qemu_{ld, st}_slow_path

2023-04-10 Thread Richard Henderson
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
and tcg_out_st_helper_args.

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 37 ++--
 1 file changed, 11 insertions(+), 26 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 0daefa18fc..5ecae7cef0 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -893,51 +893,36 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, 
MemOpIdx oi,
 label->label_ptr[0] = label_ptr[0];
 }
 
+static const TCGLdstHelperParam ldst_helper_param = {
+.ntmp = 1, .tmp = { TCG_REG_TMP0 }
+};
+
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-MemOpIdx oi = l->oi;
-MemOp opc = get_memop(oi);
-MemOp size = opc & MO_SIZE;
+MemOp opc = get_memop(l->oi);
 
 /* resolve label address */
 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
 return false;
 }
 
-/* call load helper */
-tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
-tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A2, oi);
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, (tcg_target_long)l->raddr);
-
-tcg_out_call_int(s, qemu_ld_helpers[size], false);
-
-tcg_out_movext(s, l->type, l->datalo_reg,
-   TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_A0);
+tcg_out_ld_helper_args(s, l, _helper_param);
+tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE], false);
+tcg_out_ld_helper_ret(s, l, false, _helper_param);
 return tcg_out_goto(s, l->raddr);
 }
 
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-MemOpIdx oi = l->oi;
-MemOp opc = get_memop(oi);
-MemOp size = opc & MO_SIZE;
+MemOp opc = get_memop(l->oi);
 
 /* resolve label address */
 if (!reloc_br_sk16(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
 return false;
 }
 
-/* call store helper */
-tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A0, TCG_AREG0);
-tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_A1, l->addrlo_reg);
-tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I32 : TCG_TYPE_I32, TCG_REG_A2,
-   l->type, size, l->datalo_reg);
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A3, oi);
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A4, (tcg_target_long)l->raddr);
-
-tcg_out_call_int(s, qemu_st_helpers[size], false);
-
+tcg_out_st_helper_args(s, l, _helper_param);
+tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
 return tcg_out_goto(s, l->raddr);
 }
 #else
-- 
2.34.1




[PATCH v2 19/54] tcg: Clear TCGLabelQemuLdst on allocation

2023-04-10 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 tcg/tcg-ldst.c.inc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc
index 6c6848d034..403cbb0f06 100644
--- a/tcg/tcg-ldst.c.inc
+++ b/tcg/tcg-ldst.c.inc
@@ -72,6 +72,7 @@ static inline TCGLabelQemuLdst *new_ldst_label(TCGContext *s)
 {
 TCGLabelQemuLdst *l = tcg_malloc(sizeof(*l));
 
+memset(l, 0, sizeof(*l));
 QSIMPLEQ_INSERT_TAIL(>ldst_labels, l, next);
 
 return l;
-- 
2.34.1




[PATCH v2 31/54] tcg: Move TCGLabelQemuLdst to tcg.c

2023-04-10 Thread Richard Henderson
This will shortly be used by sparc64 without also using
TCG_TARGET_NEED_LDST_LABELS.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c  | 13 +
 tcg/tcg-ldst.c.inc | 14 --
 2 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index cfd3262a4a..6f5daaee5f 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -94,6 +94,19 @@ typedef struct QEMU_PACKED {
 DebugFrameFDEHeader fde;
 } DebugFrameHeader;
 
+typedef struct TCGLabelQemuLdst {
+bool is_ld; /* qemu_ld: true, qemu_st: false */
+MemOpIdx oi;
+TCGType type;   /* result type of a load */
+TCGReg addrlo_reg;  /* reg index for low word of guest virtual addr */
+TCGReg addrhi_reg;  /* reg index for high word of guest virtual addr */
+TCGReg datalo_reg;  /* reg index for low word to be loaded or stored */
+TCGReg datahi_reg;  /* reg index for high word to be loaded or stored 
*/
+const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
+tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
+QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
+} TCGLabelQemuLdst;
+
 static void tcg_register_jit_int(const void *buf, size_t size,
  const void *debug_frame,
  size_t debug_frame_size)
diff --git a/tcg/tcg-ldst.c.inc b/tcg/tcg-ldst.c.inc
index 403cbb0f06..ffada04af0 100644
--- a/tcg/tcg-ldst.c.inc
+++ b/tcg/tcg-ldst.c.inc
@@ -20,20 +20,6 @@
  * THE SOFTWARE.
  */
 
-typedef struct TCGLabelQemuLdst {
-bool is_ld; /* qemu_ld: true, qemu_st: false */
-MemOpIdx oi;
-TCGType type;   /* result type of a load */
-TCGReg addrlo_reg;  /* reg index for low word of guest virtual addr */
-TCGReg addrhi_reg;  /* reg index for high word of guest virtual addr */
-TCGReg datalo_reg;  /* reg index for low word to be loaded or stored */
-TCGReg datahi_reg;  /* reg index for high word to be loaded or stored 
*/
-const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
-tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
-QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
-} TCGLabelQemuLdst;
-
-
 /*
  * Generate TB finalization at the end of block
  */
-- 
2.34.1




[PATCH v2 10/54] tcg/loongarch64: Conditionalize tcg_out_exts_i32_i64

2023-04-10 Thread Richard Henderson
Since TCG_TYPE_I32 values are kept sign-extended in registers,
via ".w" instructions, we need not extend if the register matches.
This is already relied upon by comparisons.

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 989632e08a..b2146988be 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -458,7 +458,9 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, 
TCGReg arg)
 
 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
 {
-tcg_out_ext32s(s, ret, arg);
+if (ret != arg) {
+tcg_out_ext32s(s, ret, arg);
+}
 }
 
 static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
-- 
2.34.1




[PATCH v2 45/54] tcg/loongarch64: Simplify constraints on qemu_ld/st

2023-04-10 Thread Richard Henderson
The softmmu tlb uses TCG_REG_TMP[0-2], not any of the normally available
registers.  Now that we handle overlap betwen inputs and helper arguments,
we can allow any allocatable reg.

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target-con-set.h |  2 --
 tcg/loongarch64/tcg-target-con-str.h |  1 -
 tcg/loongarch64/tcg-target.c.inc | 23 ---
 3 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index 172c107289..c2bde44613 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -17,9 +17,7 @@
 C_O0_I1(r)
 C_O0_I2(rZ, r)
 C_O0_I2(rZ, rZ)
-C_O0_I2(LZ, L)
 C_O1_I1(r, r)
-C_O1_I1(r, L)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rI)
diff --git a/tcg/loongarch64/tcg-target-con-str.h 
b/tcg/loongarch64/tcg-target-con-str.h
index 541ff47fa9..6e9ccca3ad 100644
--- a/tcg/loongarch64/tcg-target-con-str.h
+++ b/tcg/loongarch64/tcg-target-con-str.h
@@ -14,7 +14,6 @@
  * REGS(letter, register_mask)
  */
 REGS('r', ALL_GENERAL_REGS)
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
 
 /*
  * Define constraint letters for constants:
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 5ecae7cef0..23a8dbde5f 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -133,18 +133,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind 
kind, int slot)
 #define TCG_CT_CONST_C12   0x1000
 #define TCG_CT_CONST_WSZ   0x2000
 
-#define ALL_GENERAL_REGS  MAKE_64BIT_MASK(0, 32)
-/*
- * For softmmu, we need to avoid conflicts with the first 5
- * argument registers to call the helper.  Some of these are
- * also used for the tlb lookup.
- */
-#ifdef CONFIG_SOFTMMU
-#define SOFTMMU_RESERVE_REGS  MAKE_64BIT_MASK(TCG_REG_A0, 5)
-#else
-#define SOFTMMU_RESERVE_REGS  0
-#endif
-
+#define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
 
 static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
 {
@@ -1583,16 +1572,14 @@ static TCGConstraintSetIndex 
tcg_target_op_def(TCGOpcode op)
 case INDEX_op_st32_i64:
 case INDEX_op_st_i32:
 case INDEX_op_st_i64:
+case INDEX_op_qemu_st_i32:
+case INDEX_op_qemu_st_i64:
 return C_O0_I2(rZ, r);
 
 case INDEX_op_brcond_i32:
 case INDEX_op_brcond_i64:
 return C_O0_I2(rZ, rZ);
 
-case INDEX_op_qemu_st_i32:
-case INDEX_op_qemu_st_i64:
-return C_O0_I2(LZ, L);
-
 case INDEX_op_ext8s_i32:
 case INDEX_op_ext8s_i64:
 case INDEX_op_ext8u_i32:
@@ -1628,11 +1615,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_ld32u_i64:
 case INDEX_op_ld_i32:
 case INDEX_op_ld_i64:
-return C_O1_I1(r, r);
-
 case INDEX_op_qemu_ld_i32:
 case INDEX_op_qemu_ld_i64:
-return C_O1_I1(r, L);
+return C_O1_I1(r, r);
 
 case INDEX_op_andc_i32:
 case INDEX_op_andc_i64:
-- 
2.34.1




[PATCH v2 17/54] tcg: Introduce tcg_out_xchg

2023-04-10 Thread Richard Henderson
We will want a backend interface for register swapping.
This is only properly defined for x86; all others get a
stub version that always indicates failure.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c| 2 ++
 tcg/aarch64/tcg-target.c.inc | 5 +
 tcg/arm/tcg-target.c.inc | 5 +
 tcg/i386/tcg-target.c.inc| 8 
 tcg/loongarch64/tcg-target.c.inc | 5 +
 tcg/mips/tcg-target.c.inc| 5 +
 tcg/ppc/tcg-target.c.inc | 5 +
 tcg/riscv/tcg-target.c.inc   | 5 +
 tcg/s390x/tcg-target.c.inc   | 5 +
 tcg/sparc64/tcg-target.c.inc | 5 +
 tcg/tci/tcg-target.c.inc | 5 +
 11 files changed, 55 insertions(+)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 328e018a80..fde5ccc57c 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -115,6 +115,8 @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, 
TCGReg arg);
 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
+__attribute__((unused));
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
 static void tcg_out_goto_tb(TCGContext *s, int which);
 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 29bc97ed1c..4ec3cf3172 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1106,6 +1106,11 @@ static void tcg_out_movi(TCGContext *s, TCGType type, 
TCGReg rd,
 tcg_out_insn(s, 3305, LDR, 0, rd);
 }
 
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
+{
+return false;
+}
+
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
  tcg_target_long imm)
 {
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index f865294861..4a5d57a41c 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -2607,6 +2607,11 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
 tcg_out_movi32(s, COND_AL, ret, arg);
 }
 
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
+{
+return false;
+}
+
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
  tcg_target_long imm)
 {
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 4847da7e1a..ce87f8fbc9 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -460,6 +460,7 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 #define OPC_VPTERNLOGQ  (0x25 | P_EXT3A | P_DATA16 | P_VEXW | P_EVEX)
 #define OPC_VZEROUPPER  (0x77 | P_EXT)
 #define OPC_XCHG_ax_r32(0x90)
+#define OPC_XCHG_EvGv   (0x87)
 
 #define OPC_GRP3_Eb (0xf6)
 #define OPC_GRP3_Ev (0xf7)
@@ -1078,6 +1079,13 @@ static void tcg_out_movi(TCGContext *s, TCGType type,
 }
 }
 
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
+{
+int rexw = type == TCG_TYPE_I32 ? 0 : P_REXW;
+tcg_out_modrm(s, OPC_XCHG_EvGv + rexw, r1, r2);
+return true;
+}
+
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
  tcg_target_long imm)
 {
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index fc98b9b31b..0940788c6f 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -419,6 +419,11 @@ static void tcg_out_addi(TCGContext *s, TCGType type, 
TCGReg rd,
 }
 }
 
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
+{
+return false;
+}
+
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
  tcg_target_long imm)
 {
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index f103cdb4e6..a83ebe8729 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -597,6 +597,11 @@ static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg 
rd, TCGReg rs)
 tcg_out_ext32s(s, rd, rs);
 }
 
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
+{
+return false;
+}
+
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
  tcg_target_long imm)
 {
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index b1d9c0bbe4..77abb7d20c 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -1154,6 +1154,11 @@ static void tcg_out_movi(TCGContext *s, TCGType type, 
TCGReg ret,
 }
 }
 
+static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2)
+{
+return false;
+}
+
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg rd, TCGReg rs,
  tcg_target_long imm)
 {
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 

[PATCH v2 46/54] tcg/mips: Remove MO_BSWAP handling

2023-04-10 Thread Richard Henderson
While performing the load in the delay slot of the call to the common
bswap helper function is cute, it is not worth the added complexity.

Signed-off-by: Richard Henderson 
---
 tcg/mips/tcg-target.h |   4 +-
 tcg/mips/tcg-target.c.inc | 284 ++
 2 files changed, 48 insertions(+), 240 deletions(-)

diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 2431fc5353..42bd7fff01 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -204,8 +204,8 @@ extern bool use_mips32r2_instructions;
 #define TCG_TARGET_HAS_ext16u_i64   0 /* andi rt, rs, 0x */
 #endif
 
-#define TCG_TARGET_DEFAULT_MO (0)
-#define TCG_TARGET_HAS_MEMORY_BSWAP 1
+#define TCG_TARGET_DEFAULT_MO   0
+#define TCG_TARGET_HAS_MEMORY_BSWAP 0
 
 #define TCG_TARGET_NEED_LDST_LABELS
 
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 9f7c9cd688..b6db8c6884 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -1088,31 +1088,35 @@ static void tcg_out_call(TCGContext *s, const 
tcg_insn_unit *arg,
 }
 
 #if defined(CONFIG_SOFTMMU)
-static void * const qemu_ld_helpers[(MO_SSIZE | MO_BSWAP) + 1] = {
+static void * const qemu_ld_helpers[MO_SSIZE + 1] = {
 [MO_UB]   = helper_ret_ldub_mmu,
 [MO_SB]   = helper_ret_ldsb_mmu,
-[MO_LEUW] = helper_le_lduw_mmu,
-[MO_LESW] = helper_le_ldsw_mmu,
-[MO_LEUL] = helper_le_ldul_mmu,
-[MO_LEUQ] = helper_le_ldq_mmu,
-[MO_BEUW] = helper_be_lduw_mmu,
-[MO_BESW] = helper_be_ldsw_mmu,
-[MO_BEUL] = helper_be_ldul_mmu,
-[MO_BEUQ] = helper_be_ldq_mmu,
-#if TCG_TARGET_REG_BITS == 64
-[MO_LESL] = helper_le_ldsl_mmu,
-[MO_BESL] = helper_be_ldsl_mmu,
+#if HOST_BIG_ENDIAN
+[MO_UW] = helper_be_lduw_mmu,
+[MO_SW] = helper_be_ldsw_mmu,
+[MO_UL] = helper_be_ldul_mmu,
+[MO_SL] = helper_be_ldsl_mmu,
+[MO_UQ] = helper_be_ldq_mmu,
+#else
+[MO_UW] = helper_le_lduw_mmu,
+[MO_SW] = helper_le_ldsw_mmu,
+[MO_UL] = helper_le_ldul_mmu,
+[MO_UQ] = helper_le_ldq_mmu,
+[MO_SL] = helper_le_ldsl_mmu,
 #endif
 };
 
-static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
+static void * const qemu_st_helpers[MO_SIZE + 1] = {
 [MO_UB]   = helper_ret_stb_mmu,
-[MO_LEUW] = helper_le_stw_mmu,
-[MO_LEUL] = helper_le_stl_mmu,
-[MO_LEUQ] = helper_le_stq_mmu,
-[MO_BEUW] = helper_be_stw_mmu,
-[MO_BEUL] = helper_be_stl_mmu,
-[MO_BEUQ] = helper_be_stq_mmu,
+#if HOST_BIG_ENDIAN
+[MO_UW] = helper_be_stw_mmu,
+[MO_UL] = helper_be_stl_mmu,
+[MO_UQ] = helper_be_stq_mmu,
+#else
+[MO_UW] = helper_le_stw_mmu,
+[MO_UL] = helper_le_stl_mmu,
+[MO_UQ] = helper_le_stq_mmu,
+#endif
 };
 
 /* We expect to use a 16-bit negative offset from ENV.  */
@@ -1248,7 +1252,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 
 tcg_out_ld_helper_args(s, l, _helper_param);
 
-tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
+tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
 /* delay slot */
 tcg_out_nop(s);
 
@@ -1278,7 +1282,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 
 tcg_out_st_helper_args(s, l, _helper_param);
 
-tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false);
+tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
 /* delay slot */
 tcg_out_nop(s);
 
@@ -1371,52 +1375,19 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
TCGReg base, MemOp opc, TCGType type)
 {
-switch (opc & (MO_SSIZE | MO_BSWAP)) {
+switch (opc & MO_SSIZE) {
 case MO_UB:
 tcg_out_opc_imm(s, OPC_LBU, lo, base, 0);
 break;
 case MO_SB:
 tcg_out_opc_imm(s, OPC_LB, lo, base, 0);
 break;
-case MO_UW | MO_BSWAP:
-tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
-tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
-break;
 case MO_UW:
 tcg_out_opc_imm(s, OPC_LHU, lo, base, 0);
 break;
-case MO_SW | MO_BSWAP:
-tcg_out_opc_imm(s, OPC_LHU, TCG_TMP1, base, 0);
-tcg_out_bswap16(s, lo, TCG_TMP1, TCG_BSWAP_IZ | TCG_BSWAP_OS);
-break;
 case MO_SW:
 tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
 break;
-case MO_UL | MO_BSWAP:
-if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
-if (use_mips32r2_instructions) {
-tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
-tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
-} else {
-tcg_out_bswap_subr(s, bswap32u_addr);
-/* delay slot */
-tcg_out_opc_imm(s, OPC_LWU, TCG_TMP0, base, 0);
-tcg_out_mov(s, TCG_TYPE_I64, lo, TCG_TMP3);
-  

[PATCH v2 33/54] tcg: Introduce arg_slot_stk_ofs

2023-04-10 Thread Richard Henderson
Unify all computation of argument stack offset in one function.
This requires that we adjust ref_slot to be in the same units,
by adding max_reg_slots during init_call_layout.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index fa28db0188..057423c121 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -816,6 +816,15 @@ static inline bool arg_slot_reg_p(unsigned arg_slot)
 return arg_slot < nreg;
 }
 
+static inline int arg_slot_stk_ofs(unsigned arg_slot)
+{
+unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
+unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
+
+tcg_debug_assert(stk_slot < max);
+return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
+}
+
 typedef struct TCGCumulativeArgs {
 int arg_idx;/* tcg_gen_callN args[] */
 int info_in_idx;/* TCGHelperInfo in[] */
@@ -1055,6 +1064,7 @@ static void init_call_layout(TCGHelperInfo *info)
 }
 }
 assert(ref_base + cum.ref_slot <= max_stk_slots);
+ref_base += max_reg_slots;
 
 if (ref_base != 0) {
 for (int i = cum.info_in_idx - 1; i >= 0; --i) {
@@ -4826,7 +4836,7 @@ static void load_arg_reg(TCGContext *s, TCGReg reg, 
TCGTemp *ts,
 }
 }
 
-static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
+static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
  TCGRegSet allocated_regs)
 {
 /*
@@ -4836,8 +4846,7 @@ static void load_arg_stk(TCGContext *s, int stk_slot, 
TCGTemp *ts,
  */
 temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
 tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
-   TCG_TARGET_CALL_STACK_OFFSET +
-   stk_slot * sizeof(tcg_target_long));
+   arg_slot_stk_ofs(arg_slot));
 }
 
 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
@@ -4848,18 +4857,16 @@ static void load_arg_normal(TCGContext *s, const 
TCGCallArgumentLoc *l,
 load_arg_reg(s, reg, ts, *allocated_regs);
 tcg_regset_set_reg(*allocated_regs, reg);
 } else {
-load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
- ts, *allocated_regs);
+load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
 }
 }
 
-static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
+static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
  intptr_t ref_off, TCGRegSet *allocated_regs)
 {
 TCGReg reg;
-int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
 
-if (stk_slot < 0) {
+if (arg_slot_reg_p(arg_slot)) {
 reg = tcg_target_call_iarg_regs[arg_slot];
 tcg_reg_free(s, reg, *allocated_regs);
 tcg_out_addi_ptr(s, reg, ref_base, ref_off);
@@ -4869,8 +4876,7 @@ static void load_arg_ref(TCGContext *s, int arg_slot, 
TCGReg ref_base,
 *allocated_regs, 0, false);
 tcg_out_addi_ptr(s, reg, ref_base, ref_off);
 tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
-   TCG_TARGET_CALL_STACK_OFFSET
-   + stk_slot * sizeof(tcg_target_long));
+   arg_slot_stk_ofs(arg_slot));
 }
 }
 
@@ -4900,8 +4906,7 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
 case TCG_CALL_ARG_BY_REF:
 load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
 load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
- TCG_TARGET_CALL_STACK_OFFSET
- + loc->ref_slot * sizeof(tcg_target_long),
+ arg_slot_stk_ofs(loc->ref_slot),
  _regs);
 break;
 case TCG_CALL_ARG_BY_REF_N:
-- 
2.34.1




[PATCH v2 08/54] tcg: Split out tcg_out_ext32u

2023-04-10 Thread Richard Henderson
We will need a backend interface for performing 32-bit zero-extend.
Use it in tcg_reg_alloc_op in the meantime.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c|  4 
 tcg/aarch64/tcg-target.c.inc |  9 +++--
 tcg/arm/tcg-target.c.inc |  5 +
 tcg/i386/tcg-target.c.inc|  4 ++--
 tcg/loongarch64/tcg-target.c.inc |  2 +-
 tcg/mips/tcg-target.c.inc|  3 ++-
 tcg/ppc/tcg-target.c.inc |  4 +++-
 tcg/riscv/tcg-target.c.inc   |  2 +-
 tcg/s390x/tcg-target.c.inc   | 20 ++--
 tcg/sparc64/tcg-target.c.inc | 17 +++--
 tcg/tci/tcg-target.c.inc |  9 -
 11 files changed, 54 insertions(+), 25 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 84aa8d639e..a182771c01 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -110,6 +110,7 @@ static void tcg_out_ext16s(TCGContext *s, TCGType type, 
TCGReg ret, TCGReg arg);
 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
+static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
 static void tcg_out_goto_tb(TCGContext *s, int which);
@@ -4525,6 +4526,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 case INDEX_op_ext32s_i64:
 tcg_out_ext32s(s, new_args[0], new_args[1]);
 break;
+case INDEX_op_ext32u_i64:
+tcg_out_ext32u(s, new_args[0], new_args[1]);
+break;
 default:
 if (def->flags & TCG_OPF_VECTOR) {
 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index d7964734c3..bca5f03dfb 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1452,6 +1452,11 @@ static void tcg_out_ext16u(TCGContext *s, TCGReg rd, 
TCGReg rn)
 tcg_out_uxt(s, MO_16, rd, rn);
 }
 
+static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
+}
+
 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
 TCGReg rn, int64_t aimm)
 {
@@ -2259,8 +2264,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
 break;
 case INDEX_op_extu_i32_i64:
-case INDEX_op_ext32u_i64:
-tcg_out_movr(s, TCG_TYPE_I32, a0, a1);
+tcg_out_ext32u(s, a0, a1);
 break;
 
 case INDEX_op_deposit_i64:
@@ -2327,6 +2331,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_ext16u_i64:
 case INDEX_op_ext16u_i32:
 case INDEX_op_ext32s_i64:
+case INDEX_op_ext32u_i64:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 401769bdd6..5c48b92f83 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -998,6 +998,11 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, 
TCGReg rn)
 g_assert_not_reached();
 }
 
+static void tcg_out_ext32u(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+g_assert_not_reached();
+}
+
 static void tcg_out_bswap16(TCGContext *s, ARMCond cond,
 TCGReg rd, TCGReg rn, int flags)
 {
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index f4ac877aba..7d63403693 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1287,7 +1287,7 @@ static void tcg_out_ext16s(TCGContext *s, TCGType type, 
TCGReg dest, TCGReg src)
 tcg_out_modrm(s, OPC_MOVSWL + rexw, dest, src);
 }
 
-static inline void tcg_out_ext32u(TCGContext *s, int dest, int src)
+static void tcg_out_ext32u(TCGContext *s, TCGReg dest, TCGReg src)
 {
 /* 32-bit mov zero extends.  */
 tcg_out_modrm(s, OPC_MOVL_GvEv, dest, src);
@@ -2754,7 +2754,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 tcg_out_bswap64(s, a0);
 break;
 case INDEX_op_extu_i32_i64:
-case INDEX_op_ext32u_i64:
 case INDEX_op_extrl_i64_i32:
 tcg_out_ext32u(s, a0, a1);
 break;
@@ -2838,6 +2837,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 case INDEX_op_ext16u_i32:
 case INDEX_op_ext16u_i64:
 case INDEX_op_ext32s_i64:
+case INDEX_op_ext32u_i64:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 037474510c..d2511eda7a 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1246,7 +1246,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
 break;
 
-case INDEX_op_ext32u_i64:
 case INDEX_op_extu_i32_i64:
 tcg_out_ext32u(s, 

[PATCH v2 43/54] tcg/riscv: Convert tcg_out_qemu_{ld,st}_slow_path

2023-04-10 Thread Richard Henderson
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
and tcg_out_st_helper_args.

Signed-off-by: Richard Henderson 
---
 tcg/riscv/tcg-target.c.inc | 37 ++---
 1 file changed, 10 insertions(+), 27 deletions(-)

diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index d4134bc86f..425ea8902e 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -994,14 +994,14 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, 
MemOpIdx oi,
 label->label_ptr[0] = label_ptr[0];
 }
 
+/* We have three temps, we might as well expose them. */
+static const TCGLdstHelperParam ldst_helper_param = {
+.ntmp = 3, .tmp = { TCG_REG_TMP0, TCG_REG_TMP1, TCG_REG_TMP2 }
+};
+
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-MemOpIdx oi = l->oi;
-MemOp opc = get_memop(oi);
-TCGReg a0 = tcg_target_call_iarg_regs[0];
-TCGReg a1 = tcg_target_call_iarg_regs[1];
-TCGReg a2 = tcg_target_call_iarg_regs[2];
-TCGReg a3 = tcg_target_call_iarg_regs[3];
+MemOp opc = get_memop(l->oi);
 
 /* resolve label address */
 if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@@ -1009,13 +1009,9 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 }
 
 /* call load helper */
-tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
-tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
-tcg_out_movi(s, TCG_TYPE_PTR, a2, oi);
-tcg_out_movi(s, TCG_TYPE_PTR, a3, (tcg_target_long)l->raddr);
-
+tcg_out_ld_helper_args(s, l, _helper_param);
 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SSIZE], false);
-tcg_out_mov(s, (opc & MO_SIZE) == MO_64, l->datalo_reg, a0);
+tcg_out_ld_helper_ret(s, l, true, _helper_param);
 
 tcg_out_goto(s, l->raddr);
 return true;
@@ -1023,14 +1019,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-MemOpIdx oi = l->oi;
-MemOp opc = get_memop(oi);
-MemOp s_bits = opc & MO_SIZE;
-TCGReg a0 = tcg_target_call_iarg_regs[0];
-TCGReg a1 = tcg_target_call_iarg_regs[1];
-TCGReg a2 = tcg_target_call_iarg_regs[2];
-TCGReg a3 = tcg_target_call_iarg_regs[3];
-TCGReg a4 = tcg_target_call_iarg_regs[4];
+MemOp opc = get_memop(l->oi);
 
 /* resolve label address */
 if (!reloc_sbimm12(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@@ -1038,13 +1027,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 }
 
 /* call store helper */
-tcg_out_mov(s, TCG_TYPE_PTR, a0, TCG_AREG0);
-tcg_out_mov(s, TCG_TYPE_PTR, a1, l->addrlo_reg);
-tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, a2,
-   l->type, s_bits, l->datalo_reg);
-tcg_out_movi(s, TCG_TYPE_PTR, a3, oi);
-tcg_out_movi(s, TCG_TYPE_PTR, a4, (tcg_target_long)l->raddr);
-
+tcg_out_st_helper_args(s, l, _helper_param);
 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE], false);
 
 tcg_out_goto(s, l->raddr);
-- 
2.34.1




[PATCH v2 42/54] tcg/ppc: Convert tcg_out_qemu_{ld,st}_slow_path

2023-04-10 Thread Richard Henderson
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
and tcg_out_st_helper_args.

Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target.c.inc | 88 
 1 file changed, 26 insertions(+), 62 deletions(-)

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 90093a6509..1b60166d2f 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2137,44 +2137,38 @@ static void add_qemu_ldst_label(TCGContext *s, bool 
is_ld,
 label->label_ptr[0] = lptr;
 }
 
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
+{
+if (arg < 0) {
+arg = TCG_REG_TMP1;
+}
+tcg_out32(s, MFSPR | RT(arg) | LR);
+return arg;
+}
+
+/*
+ * For the purposes of ppc32 sorting 4 input registers into 4 argument
+ * registers, there is an outside chance we would require 3 temps.
+ * Because of constraints, no inputs are in r3, and env will not be
+ * placed into r3 until after the sorting is done, and is thus free.
+ */
+static const TCGLdstHelperParam ldst_helper_param = {
+.ra_gen = ldst_ra_gen,
+.ntmp = 3,
+.tmp = { TCG_REG_TMP1, TCG_REG_R0, TCG_REG_R3 }
+};
+
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-MemOpIdx oi = lb->oi;
-MemOp opc = get_memop(oi);
-TCGReg hi, lo, arg = TCG_REG_R3;
+MemOp opc = get_memop(lb->oi);
 
 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
 return false;
 }
 
-tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
-
-lo = lb->addrlo_reg;
-hi = lb->addrhi_reg;
-if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
-tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
-tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
-} else {
-/* If the address needed to be zero-extended, we'll have already
-   placed it in R4.  The only remaining case is 64-bit guest.  */
-tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
-}
-
-tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
-tcg_out32(s, MFSPR | RT(arg) | LR);
-
+tcg_out_ld_helper_args(s, lb, _helper_param);
 tcg_out_call_int(s, LK, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
-
-lo = lb->datalo_reg;
-hi = lb->datahi_reg;
-if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
-tcg_out_mov(s, TCG_TYPE_I32, lo, TCG_REG_R4);
-tcg_out_mov(s, TCG_TYPE_I32, hi, TCG_REG_R3);
-} else {
-tcg_out_movext(s, lb->type, lo,
-   TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_R3);
-}
+tcg_out_ld_helper_ret(s, lb, false, _helper_param);
 
 tcg_out_b(s, 0, lb->raddr);
 return true;
@@ -2182,43 +2176,13 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-MemOpIdx oi = lb->oi;
-MemOp opc = get_memop(oi);
-MemOp s_bits = opc & MO_SIZE;
-TCGReg hi, lo, arg = TCG_REG_R3;
+MemOp opc = get_memop(lb->oi);
 
 if (!reloc_pc14(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
 return false;
 }
 
-tcg_out_mov(s, TCG_TYPE_PTR, arg++, TCG_AREG0);
-
-lo = lb->addrlo_reg;
-hi = lb->addrhi_reg;
-if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
-tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
-tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
-} else {
-/* If the address needed to be zero-extended, we'll have already
-   placed it in R4.  The only remaining case is 64-bit guest.  */
-tcg_out_mov(s, TCG_TYPE_TL, arg++, lo);
-}
-
-lo = lb->datalo_reg;
-hi = lb->datahi_reg;
-if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
-arg |= (TCG_TARGET_CALL_ARG_I64 == TCG_CALL_ARG_EVEN);
-tcg_out_mov(s, TCG_TYPE_I32, arg++, hi);
-tcg_out_mov(s, TCG_TYPE_I32, arg++, lo);
-} else {
-tcg_out_movext(s, s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
-   arg++, lb->type, s_bits, lo);
-}
-
-tcg_out_movi(s, TCG_TYPE_I32, arg++, oi);
-tcg_out32(s, MFSPR | RT(arg) | LR);
-
+tcg_out_st_helper_args(s, lb, _helper_param);
 tcg_out_call_int(s, LK, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
 
 tcg_out_b(s, 0, lb->raddr);
-- 
2.34.1




[PATCH v2 21/54] tcg/aarch64: Rationalize args to tcg_out_qemu_{ld, st}

2023-04-10 Thread Richard Henderson
Mark the argument registers const, because they must be passed to
add_qemu_ldst_label unmodified.  Rename the 'ext' parameter 'data_type' to
make the use clearer; pass it to tcg_out_qemu_st as well to even out the
interfaces.  Rename the 'otype' local 'addr_type' to make the use clearer.

Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target.c.inc | 42 ++--
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 4ec3cf3172..251464ae6f 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1850,23 +1850,23 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp 
memop,
 }
 }
 
-static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
-MemOpIdx oi, TCGType ext)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGReg data_reg,
+const TCGReg addr_reg, const MemOpIdx oi,
+TCGType data_type)
 {
 MemOp memop = get_memop(oi);
-const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
 
 /* Byte swapping is left to middle-end expansion. */
 tcg_debug_assert((memop & MO_BSWAP) == 0);
 
 #ifdef CONFIG_SOFTMMU
-unsigned mem_index = get_mmuidx(oi);
 tcg_insn_unit *label_ptr;
 
-tcg_out_tlb_read(s, addr_reg, memop, _ptr, mem_index, 1);
-tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
-   TCG_REG_X1, otype, addr_reg);
-add_qemu_ldst_label(s, true, oi, ext, data_reg, addr_reg,
+tcg_out_tlb_read(s, addr_reg, memop, _ptr, get_mmuidx(oi), 1);
+tcg_out_qemu_ld_direct(s, memop, data_type, data_reg,
+   TCG_REG_X1, addr_type, addr_reg);
+add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
 s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
 unsigned a_bits = get_alignment_bits(memop);
@@ -1874,33 +1874,33 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg 
data_reg, TCGReg addr_reg,
 tcg_out_test_alignment(s, true, addr_reg, a_bits);
 }
 if (USE_GUEST_BASE) {
-tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
-   TCG_REG_GUEST_BASE, otype, addr_reg);
+tcg_out_qemu_ld_direct(s, memop, data_type, data_reg,
+   TCG_REG_GUEST_BASE, addr_type, addr_reg);
 } else {
-tcg_out_qemu_ld_direct(s, memop, ext, data_reg,
+tcg_out_qemu_ld_direct(s, memop, data_type, data_reg,
addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
 }
 #endif /* CONFIG_SOFTMMU */
 }
 
-static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
-MemOpIdx oi)
+static void tcg_out_qemu_st(TCGContext *s, const TCGReg data_reg,
+const TCGReg addr_reg, const MemOpIdx oi,
+TCGType data_type)
 {
 MemOp memop = get_memop(oi);
-const TCGType otype = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+TCGType addr_type = TARGET_LONG_BITS == 64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
 
 /* Byte swapping is left to middle-end expansion. */
 tcg_debug_assert((memop & MO_BSWAP) == 0);
 
 #ifdef CONFIG_SOFTMMU
-unsigned mem_index = get_mmuidx(oi);
 tcg_insn_unit *label_ptr;
 
-tcg_out_tlb_read(s, addr_reg, memop, _ptr, mem_index, 0);
+tcg_out_tlb_read(s, addr_reg, memop, _ptr, get_mmuidx(oi), 0);
 tcg_out_qemu_st_direct(s, memop, data_reg,
-   TCG_REG_X1, otype, addr_reg);
-add_qemu_ldst_label(s, false, oi, (memop & MO_SIZE)== MO_64,
-data_reg, addr_reg, s->code_ptr, label_ptr);
+   TCG_REG_X1, addr_type, addr_reg);
+add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
+s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
 unsigned a_bits = get_alignment_bits(memop);
 if (a_bits) {
@@ -1908,7 +1908,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg 
data_reg, TCGReg addr_reg,
 }
 if (USE_GUEST_BASE) {
 tcg_out_qemu_st_direct(s, memop, data_reg,
-   TCG_REG_GUEST_BASE, otype, addr_reg);
+   TCG_REG_GUEST_BASE, addr_type, addr_reg);
 } else {
 tcg_out_qemu_st_direct(s, memop, data_reg,
addr_reg, TCG_TYPE_I64, TCG_REG_XZR);
@@ -2249,7 +2249,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 break;
 case INDEX_op_qemu_st_i32:
 case INDEX_op_qemu_st_i64:
-tcg_out_qemu_st(s, REG0(0), a1, a2);
+tcg_out_qemu_st(s, REG0(0), a1, a2, ext);
 break;
 
 case INDEX_op_bswap64_i64:
-- 
2.34.1




[PATCH v2 51/54] tcg/ppc: Remove unused constraints A, B, C, D

2023-04-10 Thread Richard Henderson
These constraints have not been used for quite some time.

Fixes: 77b73de67632 ("Use rem/div[u]_i32 drop div[u]2_i32")
Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target-con-str.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
index f3bf030bc3..9dcbc3df50 100644
--- a/tcg/ppc/tcg-target-con-str.h
+++ b/tcg/ppc/tcg-target-con-str.h
@@ -10,10 +10,6 @@
  */
 REGS('r', ALL_GENERAL_REGS)
 REGS('v', ALL_VECTOR_REGS)
-REGS('A', 1u << TCG_REG_R3)
-REGS('B', 1u << TCG_REG_R4)
-REGS('C', 1u << TCG_REG_R5)
-REGS('D', 1u << TCG_REG_R6)
 
 /*
  * Define constraint letters for constants:
-- 
2.34.1




[PATCH v2 37/54] tcg/i386: Convert tcg_out_qemu_st_slow_path

2023-04-10 Thread Richard Henderson
Use tcg_out_st_helper_args.  This eliminates the use of
a tail call to the store helper.  This may or may not be
an improvement, depending on the call/return branch
prediction of the host microarchitecture.

Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 55 +++
 1 file changed, 4 insertions(+), 51 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 0b3d7db14c..f05755b20e 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1962,11 +1962,8 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
  */
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-MemOpIdx oi = l->oi;
-MemOp opc = get_memop(oi);
-MemOp s_bits = opc & MO_SIZE;
+MemOp opc = get_memop(l->oi);
 tcg_insn_unit **label_ptr = >label_ptr[0];
-TCGReg retaddr;
 
 /* resolve label address */
 tcg_patch32(label_ptr[0], s->code_ptr - label_ptr[0] - 4);
@@ -1974,55 +1971,11 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
 }
 
-if (TCG_TARGET_REG_BITS == 32) {
-int ofs = 0;
+tcg_out_st_helper_args(s, l, _helper_param);
 
-tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
-ofs += 4;
+tcg_out_branch(s, 1, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
 
-tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
-ofs += 4;
-
-if (TARGET_LONG_BITS == 64) {
-tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
-ofs += 4;
-}
-
-tcg_out_st(s, TCG_TYPE_I32, l->datalo_reg, TCG_REG_ESP, ofs);
-ofs += 4;
-
-if (s_bits == MO_64) {
-tcg_out_st(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_ESP, ofs);
-ofs += 4;
-}
-
-tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
-ofs += 4;
-
-retaddr = TCG_REG_EAX;
-tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
-tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP, ofs);
-} else {
-tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
-/* The second argument is already loaded with addrlo.  */
-tcg_out_mov(s, (s_bits == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-tcg_target_call_iarg_regs[2], l->datalo_reg);
-tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], oi);
-
-if (ARRAY_SIZE(tcg_target_call_iarg_regs) > 4) {
-retaddr = tcg_target_call_iarg_regs[4];
-tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
-} else {
-retaddr = TCG_REG_RAX;
-tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
-tcg_out_st(s, TCG_TYPE_PTR, retaddr, TCG_REG_ESP,
-   TCG_TARGET_CALL_STACK_OFFSET);
-}
-}
-
-/* "Tail call" to the helper, with the return address back inline.  */
-tcg_out_push(s, retaddr);
-tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+tcg_out_jmp(s, l->raddr);
 return true;
 }
 #else
-- 
2.34.1




[PATCH v2 54/54] tcg/s390x: Simplify constraints on qemu_ld/st

2023-04-10 Thread Richard Henderson
Adjust the softmmu tlb to use R0+R1, not any of the normally available
registers.  Since we handle overlap betwen inputs and helper arguments,
we can allow any allocatable reg.

Signed-off-by: Richard Henderson 
---
 tcg/s390x/tcg-target-con-set.h |  2 --
 tcg/s390x/tcg-target-con-str.h |  1 -
 tcg/s390x/tcg-target.c.inc | 36 --
 3 files changed, 12 insertions(+), 27 deletions(-)

diff --git a/tcg/s390x/tcg-target-con-set.h b/tcg/s390x/tcg-target-con-set.h
index 15f1c55103..ecc079bb6d 100644
--- a/tcg/s390x/tcg-target-con-set.h
+++ b/tcg/s390x/tcg-target-con-set.h
@@ -10,12 +10,10 @@
  * tcg-target-con-str.h; the constraint combination is inclusive or.
  */
 C_O0_I1(r)
-C_O0_I2(L, L)
 C_O0_I2(r, r)
 C_O0_I2(r, ri)
 C_O0_I2(r, rA)
 C_O0_I2(v, r)
-C_O1_I1(r, L)
 C_O1_I1(r, r)
 C_O1_I1(v, r)
 C_O1_I1(v, v)
diff --git a/tcg/s390x/tcg-target-con-str.h b/tcg/s390x/tcg-target-con-str.h
index 6fa64a1ed6..25675b449e 100644
--- a/tcg/s390x/tcg-target-con-str.h
+++ b/tcg/s390x/tcg-target-con-str.h
@@ -9,7 +9,6 @@
  * REGS(letter, register_mask)
  */
 REGS('r', ALL_GENERAL_REGS)
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
 REGS('v', ALL_VECTOR_REGS)
 REGS('o', 0x) /* odd numbered general regs */
 
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 42d3e13e08..a380982f86 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -44,18 +44,6 @@
 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 16)
 #define ALL_VECTOR_REGS  MAKE_64BIT_MASK(32, 32)
 
-/*
- * For softmmu, we need to avoid conflicts with the first 3
- * argument registers to perform the tlb lookup, and to call
- * the helper function.
- */
-#ifdef CONFIG_SOFTMMU
-#define SOFTMMU_RESERVE_REGS MAKE_64BIT_MASK(TCG_REG_R2, 3)
-#else
-#define SOFTMMU_RESERVE_REGS 0
-#endif
-
-
 /* Several places within the instruction set 0 means "no register"
rather than TCG_REG_R0.  */
 #define TCG_REG_NONE0
@@ -1734,10 +1722,10 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addr_reg, MemOp opc,
 int ofs, a_off;
 uint64_t tlb_mask;
 
-tcg_out_sh64(s, RSY_SRLG, TCG_REG_R2, addr_reg, TCG_REG_NONE,
+tcg_out_sh64(s, RSY_SRLG, TCG_TMP0, addr_reg, TCG_REG_NONE,
  TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-tcg_out_insn(s, RXY, NG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, mask_off);
-tcg_out_insn(s, RXY, AG, TCG_REG_R2, TCG_AREG0, TCG_REG_NONE, table_off);
+tcg_out_insn(s, RXY, NG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, mask_off);
+tcg_out_insn(s, RXY, AG, TCG_TMP0, TCG_AREG0, TCG_REG_NONE, table_off);
 
 /* For aligned accesses, we check the first byte and include the alignment
bits within the address.  For unaligned access, we check that we don't
@@ -1745,10 +1733,10 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addr_reg, MemOp opc,
 a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
 tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;
 if (a_off == 0) {
-tgen_andi_risbg(s, TCG_REG_R3, addr_reg, tlb_mask);
+tgen_andi_risbg(s, TCG_REG_R0, addr_reg, tlb_mask);
 } else {
-tcg_out_insn(s, RX, LA, TCG_REG_R3, addr_reg, TCG_REG_NONE, a_off);
-tgen_andi(s, TCG_TYPE_TL, TCG_REG_R3, tlb_mask);
+tcg_out_insn(s, RX, LA, TCG_REG_R0, addr_reg, TCG_REG_NONE, a_off);
+tgen_andi(s, TCG_TYPE_TL, TCG_REG_R0, tlb_mask);
 }
 
 if (is_ld) {
@@ -1757,14 +1745,14 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addr_reg, MemOp opc,
 ofs = offsetof(CPUTLBEntry, addr_write);
 }
 if (TARGET_LONG_BITS == 32) {
-tcg_out_insn(s, RX, C, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
+tcg_out_insn(s, RX, C, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
 } else {
-tcg_out_insn(s, RXY, CG, TCG_REG_R3, TCG_REG_R2, TCG_REG_NONE, ofs);
+tcg_out_insn(s, RXY, CG, TCG_REG_R0, TCG_TMP0, TCG_REG_NONE, ofs);
 }
 
-tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
+tcg_out_insn(s, RXY, LG, TCG_TMP0, TCG_TMP0, TCG_REG_NONE,
  offsetof(CPUTLBEntry, addend));
-return TCG_REG_R2;
+return TCG_TMP0;
 }
 
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
@@ -3185,10 +3173,10 @@ static TCGConstraintSetIndex 
tcg_target_op_def(TCGOpcode op)
 
 case INDEX_op_qemu_ld_i32:
 case INDEX_op_qemu_ld_i64:
-return C_O1_I1(r, L);
+return C_O1_I1(r, r);
 case INDEX_op_qemu_st_i64:
 case INDEX_op_qemu_st_i32:
-return C_O0_I2(L, L);
+return C_O0_I2(r, r);
 
 case INDEX_op_deposit_i32:
 case INDEX_op_deposit_i64:
-- 
2.34.1




[PATCH v2 48/54] tcg/mips: Simplify constraints on qemu_ld/st

2023-04-10 Thread Richard Henderson
The softmmu tlb uses TCG_REG_TMP[0-3], not any of the normally available
registers.  Now that we handle overlap betwen inputs and helper arguments,
we can allow any allocatable reg.

Signed-off-by: Richard Henderson 
---
 tcg/mips/tcg-target-con-set.h | 13 +
 tcg/mips/tcg-target-con-str.h |  2 --
 tcg/mips/tcg-target.c.inc | 30 --
 3 files changed, 13 insertions(+), 32 deletions(-)

diff --git a/tcg/mips/tcg-target-con-set.h b/tcg/mips/tcg-target-con-set.h
index fe3e868a2f..864034f468 100644
--- a/tcg/mips/tcg-target-con-set.h
+++ b/tcg/mips/tcg-target-con-set.h
@@ -12,15 +12,13 @@
 C_O0_I1(r)
 C_O0_I2(rZ, r)
 C_O0_I2(rZ, rZ)
-C_O0_I2(SZ, S)
-C_O0_I3(SZ, S, S)
-C_O0_I3(SZ, SZ, S)
+C_O0_I3(rZ, r, r)
+C_O0_I3(rZ, rZ, r)
 C_O0_I4(rZ, rZ, rZ, rZ)
-C_O0_I4(SZ, SZ, S, S)
-C_O1_I1(r, L)
+C_O0_I4(rZ, rZ, r, r)
 C_O1_I1(r, r)
 C_O1_I2(r, 0, rZ)
-C_O1_I2(r, L, L)
+C_O1_I2(r, r, r)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rI)
 C_O1_I2(r, r, rIK)
@@ -30,7 +28,6 @@ C_O1_I2(r, rZ, rN)
 C_O1_I2(r, rZ, rZ)
 C_O1_I4(r, rZ, rZ, rZ, 0)
 C_O1_I4(r, rZ, rZ, rZ, rZ)
-C_O2_I1(r, r, L)
-C_O2_I2(r, r, L, L)
+C_O2_I1(r, r, r)
 C_O2_I2(r, r, r, r)
 C_O2_I4(r, r, rZ, rZ, rN, rN)
diff --git a/tcg/mips/tcg-target-con-str.h b/tcg/mips/tcg-target-con-str.h
index e4b2965c72..413c280a7a 100644
--- a/tcg/mips/tcg-target-con-str.h
+++ b/tcg/mips/tcg-target-con-str.h
@@ -9,8 +9,6 @@
  * REGS(letter, register_mask)
  */
 REGS('r', ALL_GENERAL_REGS)
-REGS('L', ALL_QLOAD_REGS)
-REGS('S', ALL_QSTORE_REGS)
 
 /*
  * Define constraint letters for constants:
diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index 2a6376cd0a..08ef62f567 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -176,20 +176,6 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 #define TCG_CT_CONST_WSZ  0x2000   /* word size */
 
 #define ALL_GENERAL_REGS  0xu
-#define NOA0_REGS (ALL_GENERAL_REGS & ~(1 << TCG_REG_A0))
-
-#ifdef CONFIG_SOFTMMU
-#define ALL_QLOAD_REGS \
-(NOA0_REGS & ~((TCG_TARGET_REG_BITS < TARGET_LONG_BITS) << TCG_REG_A2))
-#define ALL_QSTORE_REGS \
-(NOA0_REGS & ~(TCG_TARGET_REG_BITS < TARGET_LONG_BITS   \
-   ? (1 << TCG_REG_A2) | (1 << TCG_REG_A3)  \
-   : (1 << TCG_REG_A1)))
-#else
-#define ALL_QLOAD_REGS   NOA0_REGS
-#define ALL_QSTORE_REGS  NOA0_REGS
-#endif
-
 
 static bool is_p2m1(tcg_target_long val)
 {
@@ -2293,18 +2279,18 @@ static TCGConstraintSetIndex 
tcg_target_op_def(TCGOpcode op)
 
 case INDEX_op_qemu_ld_i32:
 return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
-? C_O1_I1(r, L) : C_O1_I2(r, L, L));
+? C_O1_I1(r, r) : C_O1_I2(r, r, r));
 case INDEX_op_qemu_st_i32:
 return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
-? C_O0_I2(SZ, S) : C_O0_I3(SZ, S, S));
+? C_O0_I2(rZ, r) : C_O0_I3(rZ, r, r));
 case INDEX_op_qemu_ld_i64:
-return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
-: TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, L)
-: C_O2_I2(r, r, L, L));
+return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
+: TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
+: C_O2_I2(r, r, r, r));
 case INDEX_op_qemu_st_i64:
-return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(SZ, S)
-: TARGET_LONG_BITS == 32 ? C_O0_I3(SZ, SZ, S)
-: C_O0_I4(SZ, SZ, S, S));
+return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(rZ, r)
+: TARGET_LONG_BITS == 32 ? C_O0_I3(rZ, rZ, r)
+: C_O0_I4(rZ, rZ, r, r));
 
 default:
 g_assert_not_reached();
-- 
2.34.1




[PATCH v2 52/54] tcg/riscv: Simplify constraints on qemu_ld/st

2023-04-10 Thread Richard Henderson
The softmmu tlb uses TCG_REG_TMP[0-2], not any of the normally available
registers.  Now that we handle overlap betwen inputs and helper arguments,
we can allow any allocatable reg.

Signed-off-by: Richard Henderson 
---
 tcg/riscv/tcg-target-con-set.h |  2 --
 tcg/riscv/tcg-target-con-str.h |  1 -
 tcg/riscv/tcg-target.c.inc | 16 +++-
 3 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h
index c11710d117..1a8b8e9f2b 100644
--- a/tcg/riscv/tcg-target-con-set.h
+++ b/tcg/riscv/tcg-target-con-set.h
@@ -10,11 +10,9 @@
  * tcg-target-con-str.h; the constraint combination is inclusive or.
  */
 C_O0_I1(r)
-C_O0_I2(LZ, L)
 C_O0_I2(rZ, r)
 C_O0_I2(rZ, rZ)
 C_O0_I4(rZ, rZ, rZ, rZ)
-C_O1_I1(r, L)
 C_O1_I1(r, r)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rI)
diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h
index 8d8afaee53..6f1cfb976c 100644
--- a/tcg/riscv/tcg-target-con-str.h
+++ b/tcg/riscv/tcg-target-con-str.h
@@ -9,7 +9,6 @@
  * REGS(letter, register_mask)
  */
 REGS('r', ALL_GENERAL_REGS)
-REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
 
 /*
  * Define constraint letters for constants:
diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 425ea8902e..35f04ddda9 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -125,17 +125,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind 
kind, int slot)
 #define TCG_CT_CONST_N12   0x400
 #define TCG_CT_CONST_M12   0x800
 
-#define ALL_GENERAL_REGS  MAKE_64BIT_MASK(0, 32)
-/*
- * For softmmu, we need to avoid conflicts with the first 5
- * argument registers to call the helper.  Some of these are
- * also used for the tlb lookup.
- */
-#ifdef CONFIG_SOFTMMU
-#define SOFTMMU_RESERVE_REGS  MAKE_64BIT_MASK(TCG_REG_A0, 5)
-#else
-#define SOFTMMU_RESERVE_REGS  0
-#endif
+#define ALL_GENERAL_REGS   MAKE_64BIT_MASK(0, 32)
 
 #define sextreg  sextract64
 
@@ -1653,10 +1643,10 @@ static TCGConstraintSetIndex 
tcg_target_op_def(TCGOpcode op)
 
 case INDEX_op_qemu_ld_i32:
 case INDEX_op_qemu_ld_i64:
-return C_O1_I1(r, L);
+return C_O1_I1(r, r);
 case INDEX_op_qemu_st_i32:
 case INDEX_op_qemu_st_i64:
-return C_O0_I2(LZ, L);
+return C_O0_I2(rZ, r);
 
 default:
 g_assert_not_reached();
-- 
2.34.1




[PATCH v2 49/54] tcg/ppc: Reorg tcg_out_tlb_read

2023-04-10 Thread Richard Henderson
Allocate TCG_REG_TMP2.  Use R0, TMP1, TMP2 instead of any of
the normally allocated registers for the tlb load.

Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target.c.inc | 84 +++-
 1 file changed, 49 insertions(+), 35 deletions(-)

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 1b60166d2f..613cd73583 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -68,6 +68,7 @@
 #else
 # define TCG_REG_TMP1   TCG_REG_R12
 #endif
+#define TCG_REG_TMP2TCG_REG_R11
 
 #define TCG_VEC_TMP1TCG_REG_V0
 #define TCG_VEC_TMP2TCG_REG_V1
@@ -2007,10 +2008,11 @@ static void * const qemu_st_helpers[(MO_SIZE | 
MO_BSWAP) + 1] = {
 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
 
-/* Perform the TLB load and compare.  Places the result of the comparison
-   in CR7, loads the addend of the TLB into R3, and returns the register
-   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. 
*/
-
+/*
+ * Perform the TLB load and compare.  Places the result of the comparison
+ * in CR7, loads the addend of the TLB into TMP1, and returns the register
+ * containing the guest address (zero-extended into TMP2).  Clobbers R0.
+ */
 static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
TCGReg addrlo, TCGReg addrhi,
int mem_index, bool is_read)
@@ -2026,40 +2028,44 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
 unsigned a_bits = get_alignment_bits(opc);
 
 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
-tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0, mask_off);
-tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, TCG_AREG0, table_off);
+tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, mask_off);
+tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_AREG0, table_off);
 
 /* Extract the page index, shifted into place for tlb index.  */
 if (TCG_TARGET_REG_BITS == 32) {
-tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
+tcg_out_shri32(s, TCG_REG_R0, addrlo,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 } else {
-tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
+tcg_out_shri64(s, TCG_REG_R0, addrlo,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 }
-tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
+tcg_out32(s, AND | SAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_R0));
 
-/* Load the TLB comparator.  */
+/* Load the (low part) TLB comparator into TMP2. */
 if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
 uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
 ? LWZUX : LDUX);
-tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
+tcg_out32(s, lxu | TAB(TCG_REG_TMP2, TCG_REG_TMP1, TCG_REG_TMP2));
 } else {
-tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
+tcg_out32(s, ADD | TAB(TCG_REG_TMP1, TCG_REG_TMP1, TCG_REG_TMP2));
 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
-tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
+tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP2,
+   TCG_REG_TMP1, cmp_off + 4);
 } else {
-tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
+tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP2, TCG_REG_TMP1, cmp_off);
 }
 }
 
-/* Load the TLB addend for use on the fast path.  Do this asap
-   to minimize any load use delay.  */
-tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
-   offsetof(CPUTLBEntry, addend));
+/*
+ * Load the TLB addend for use on the fast path.
+ * Do this asap to minimize any load use delay.
+ */
+if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
+tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1,
+   offsetof(CPUTLBEntry, addend));
+}
 
-/* Clear the non-page, non-alignment bits from the address */
+/* Clear the non-page, non-alignment bits from the address into R0. */
 if (TCG_TARGET_REG_BITS == 32) {
 /* We don't support unaligned accesses on 32-bits.
  * Preserve the bottom bits and thus trigger a comparison
@@ -2090,9 +2096,6 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
 if (TARGET_LONG_BITS == 32) {
 tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
 (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
-/* Zero-extend the address for use in the final address.  */
-tcg_out_ext32u(s, TCG_REG_R4, addrlo);
-addrlo = TCG_REG_R4;
 } else if (a_bits == 0) {
 tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
 } else {
@@ -2102,16 +2105,28 @@ 

[PATCH v2 13/54] tcg: Split out tcg_out_extu_i32_i64

2023-04-10 Thread Richard Henderson
We will need a backend interface for type extension with zero.
Use it in tcg_reg_alloc_op in the meantime.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c|  4 
 tcg/aarch64/tcg-target.c.inc | 10 ++
 tcg/arm/tcg-target.c.inc |  5 +
 tcg/i386/tcg-target.c.inc|  7 ++-
 tcg/loongarch64/tcg-target.c.inc | 10 ++
 tcg/mips/tcg-target.c.inc|  9 ++---
 tcg/ppc/tcg-target.c.inc | 10 ++
 tcg/riscv/tcg-target.c.inc   | 10 ++
 tcg/s390x/tcg-target.c.inc   | 10 ++
 tcg/sparc64/tcg-target.c.inc |  9 ++---
 tcg/tci/tcg-target.c.inc |  7 ++-
 11 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index b0498170ea..17bd6d4581 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -112,6 +112,7 @@ static void tcg_out_ext16u(TCGContext *s, TCGReg ret, 
TCGReg arg);
 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
+static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
 static void tcg_out_goto_tb(TCGContext *s, int which);
@@ -4533,6 +4534,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 case INDEX_op_ext_i32_i64:
 tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
 break;
+case INDEX_op_extu_i32_i64:
+tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
+break;
 default:
 if (def->flags & TCG_OPF_VECTOR) {
 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 58596eaa4b..ca8b25865b 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1462,6 +1462,11 @@ static void tcg_out_ext32u(TCGContext *s, TCGReg rd, 
TCGReg rn)
 tcg_out_movr(s, TCG_TYPE_I32, rd, rn);
 }
 
+static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+tcg_out_ext32u(s, rd, rn);
+}
+
 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
 TCGReg rn, int64_t aimm)
 {
@@ -2265,10 +2270,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 }
 break;
 
-case INDEX_op_extu_i32_i64:
-tcg_out_ext32u(s, a0, a1);
-break;
-
 case INDEX_op_deposit_i64:
 case INDEX_op_deposit_i32:
 tcg_out_dep(s, ext, a0, REG0(2), args[3], args[4]);
@@ -2335,6 +2336,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_ext32s_i64:
 case INDEX_op_ext32u_i64:
 case INDEX_op_ext_i32_i64:
+case INDEX_op_extu_i32_i64:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 2ca25a3d81..2135616e12 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1008,6 +1008,11 @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg 
rd, TCGReg rn)
 g_assert_not_reached();
 }
 
+static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+g_assert_not_reached();
+}
+
 static void tcg_out_bswap16(TCGContext *s, ARMCond cond,
 TCGReg rd, TCGReg rn, int flags)
 {
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index fd4c4e20c8..40d661072b 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1304,6 +1304,11 @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg 
dest, TCGReg src)
 tcg_out_ext32s(s, dest, src);
 }
 
+static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
+{
+tcg_out_ext32u(s, dest, src);
+}
+
 static inline void tcg_out_bswap64(TCGContext *s, int reg)
 {
 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
@@ -2758,7 +2763,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 case INDEX_op_bswap64_i64:
 tcg_out_bswap64(s, a0);
 break;
-case INDEX_op_extu_i32_i64:
 case INDEX_op_extrl_i64_i32:
 tcg_out_ext32u(s, a0, a1);
 break;
@@ -2841,6 +2845,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 case INDEX_op_ext32s_i64:
 case INDEX_op_ext32u_i64:
 case INDEX_op_ext_i32_i64:
+case INDEX_op_extu_i32_i64:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index b2146988be..d83bd9de49 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -463,6 +463,11 @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg 
ret, TCGReg arg)
 }
 }
 
+static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+tcg_out_ext32u(s, ret, arg);
+}
+
 

[PATCH v2 26/54] tcg/s390x: Pass TCGType to tcg_out_qemu_{ld,st}

2023-04-10 Thread Richard Henderson
We need to set this in TCGLabelQemuLdst, so plumb this
all the way through from tcg_out_op.

Signed-off-by: Richard Henderson 
---
 tcg/s390x/tcg-target.c.inc | 22 ++
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index b399798664..d610fe4fbb 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -1770,13 +1770,14 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addr_reg, MemOp opc,
 }
 
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
-TCGReg data, TCGReg addr,
+TCGType type, TCGReg data, TCGReg addr,
 tcg_insn_unit *raddr, tcg_insn_unit *label_ptr)
 {
 TCGLabelQemuLdst *label = new_ldst_label(s);
 
 label->is_ld = is_ld;
 label->oi = oi;
+label->type = type;
 label->datalo_reg = data;
 label->addrlo_reg = addr;
 label->raddr = tcg_splitwx_to_rx(raddr);
@@ -1900,7 +1901,7 @@ static void tcg_prepare_user_ldst(TCGContext *s, TCGReg 
*addr_reg,
 #endif /* CONFIG_SOFTMMU */
 
 static void tcg_out_qemu_ld(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
-MemOpIdx oi)
+MemOpIdx oi, TCGType data_type)
 {
 MemOp opc = get_memop(oi);
 #ifdef CONFIG_SOFTMMU
@@ -1916,7 +1917,8 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg 
data_reg, TCGReg addr_reg,
 
 tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
 
-add_qemu_ldst_label(s, 1, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
+add_qemu_ldst_label(s, 1, oi, data_type, data_reg, addr_reg,
+s->code_ptr, label_ptr);
 #else
 TCGReg index_reg;
 tcg_target_long disp;
@@ -1931,7 +1933,7 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg 
data_reg, TCGReg addr_reg,
 }
 
 static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg,
-MemOpIdx oi)
+MemOpIdx oi, TCGType data_type)
 {
 MemOp opc = get_memop(oi);
 #ifdef CONFIG_SOFTMMU
@@ -1947,7 +1949,8 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg 
data_reg, TCGReg addr_reg,
 
 tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
 
-add_qemu_ldst_label(s, 0, oi, data_reg, addr_reg, s->code_ptr, label_ptr);
+add_qemu_ldst_label(s, 0, oi, data_type, data_reg, addr_reg,
+s->code_ptr, label_ptr);
 #else
 TCGReg index_reg;
 tcg_target_long disp;
@@ -2307,13 +2310,16 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 break;
 
 case INDEX_op_qemu_ld_i32:
-/* ??? Technically we can use a non-extending instruction.  */
+tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I32);
+break;
 case INDEX_op_qemu_ld_i64:
-tcg_out_qemu_ld(s, args[0], args[1], args[2]);
+tcg_out_qemu_ld(s, args[0], args[1], args[2], TCG_TYPE_I64);
 break;
 case INDEX_op_qemu_st_i32:
+tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I32);
+break;
 case INDEX_op_qemu_st_i64:
-tcg_out_qemu_st(s, args[0], args[1], args[2]);
+tcg_out_qemu_st(s, args[0], args[1], args[2], TCG_TYPE_I64);
 break;
 
 case INDEX_op_ld16s_i64:
-- 
2.34.1




[PATCH v2 24/54] tcg/loongarch64: Rationalize args to tcg_out_qemu_{ld, st}

2023-04-10 Thread Richard Henderson
Interpret the variable argument placement in the caller.
Mark the argument registers const, because they must be passed to
add_qemu_ldst_label unmodified.  Shift some code around slightly
to share more between softmmu and user-only.

Signed-off-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 102 +--
 1 file changed, 44 insertions(+), 58 deletions(-)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 0940788c6f..0daefa18fc 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1049,39 +1049,32 @@ static void tcg_out_qemu_ld_indexed(TCGContext *s, 
TCGReg rd, TCGReg rj,
 }
 }
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGType type)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGReg data_reg,
+const TCGReg addr_reg, const MemOpIdx oi,
+TCGType data_type)
 {
-TCGReg addr_regl;
-TCGReg data_regl;
-MemOpIdx oi;
-MemOp opc;
-#if defined(CONFIG_SOFTMMU)
+MemOp opc = get_memop(oi);
+TCGReg base, index;
+
+#ifdef CONFIG_SOFTMMU
 tcg_insn_unit *label_ptr[1];
-#else
-unsigned a_bits;
-#endif
-TCGReg base;
 
-data_regl = *args++;
-addr_regl = *args++;
-oi = *args++;
-opc = get_memop(oi);
-
-#if defined(CONFIG_SOFTMMU)
-tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 1);
-base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
-tcg_out_qemu_ld_indexed(s, data_regl, base, TCG_REG_TMP2, opc, type);
-add_qemu_ldst_label(s, 1, oi, type,
-data_regl, addr_regl,
-s->code_ptr, label_ptr);
+tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
+index = TCG_REG_TMP2;
 #else
-a_bits = get_alignment_bits(opc);
+unsigned a_bits = get_alignment_bits(opc);
 if (a_bits) {
-tcg_out_test_alignment(s, true, addr_regl, a_bits);
+tcg_out_test_alignment(s, true, addr_reg, a_bits);
 }
-base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
-TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
-tcg_out_qemu_ld_indexed(s, data_regl, base, guest_base_reg, opc, type);
+index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
+#endif
+
+base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
+tcg_out_qemu_ld_indexed(s, data_reg, base, index, opc, data_type);
+
+#ifdef CONFIG_SOFTMMU
+add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
+s->code_ptr, label_ptr);
 #endif
 }
 
@@ -1109,39 +1102,32 @@ static void tcg_out_qemu_st_indexed(TCGContext *s, 
TCGReg data,
 }
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGType type)
+static void tcg_out_qemu_st(TCGContext *s, const TCGReg data_reg,
+const TCGReg addr_reg, const MemOpIdx oi,
+TCGType data_type)
 {
-TCGReg addr_regl;
-TCGReg data_regl;
-MemOpIdx oi;
-MemOp opc;
-#if defined(CONFIG_SOFTMMU)
+MemOp opc = get_memop(oi);
+TCGReg base, index;
+
+#ifdef CONFIG_SOFTMMU
 tcg_insn_unit *label_ptr[1];
-#else
-unsigned a_bits;
-#endif
-TCGReg base;
 
-data_regl = *args++;
-addr_regl = *args++;
-oi = *args++;
-opc = get_memop(oi);
-
-#if defined(CONFIG_SOFTMMU)
-tcg_out_tlb_load(s, addr_regl, oi, label_ptr, 0);
-base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
-tcg_out_qemu_st_indexed(s, data_regl, base, TCG_REG_TMP2, opc);
-add_qemu_ldst_label(s, 0, oi, type,
-data_regl, addr_regl,
-s->code_ptr, label_ptr);
+tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
+index = TCG_REG_TMP2;
 #else
-a_bits = get_alignment_bits(opc);
+unsigned a_bits = get_alignment_bits(opc);
 if (a_bits) {
-tcg_out_test_alignment(s, false, addr_regl, a_bits);
+tcg_out_test_alignment(s, false, addr_reg, a_bits);
 }
-base = tcg_out_zext_addr_if_32_bit(s, addr_regl, TCG_REG_TMP0);
-TCGReg guest_base_reg = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
-tcg_out_qemu_st_indexed(s, data_regl, base, guest_base_reg, opc);
+index = USE_GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_ZERO;
+#endif
+
+base = tcg_out_zext_addr_if_32_bit(s, addr_reg, TCG_REG_TMP0);
+tcg_out_qemu_st_indexed(s, data_reg, base, index, opc);
+
+#ifdef CONFIG_SOFTMMU
+add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
+s->code_ptr, label_ptr);
 #endif
 }
 
@@ -1564,16 +1550,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 break;
 
 case INDEX_op_qemu_ld_i32:
-tcg_out_qemu_ld(s, args, TCG_TYPE_I32);
+tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
 break;
 case INDEX_op_qemu_ld_i64:
-tcg_out_qemu_ld(s, args, TCG_TYPE_I64);
+

[PATCH v2 50/54] tcg/ppc: Adjust constraints on qemu_ld/st

2023-04-10 Thread Richard Henderson
The softmmu tlb uses TCG_REG_{TMP1,TMP2,R0}, not any of the normally
available registers.  Now that we handle overlap betwen inputs and
helper arguments, we can allow any allocatable reg.

Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target-con-set.h | 11 ---
 tcg/ppc/tcg-target-con-str.h |  2 --
 tcg/ppc/tcg-target.c.inc | 32 ++--
 3 files changed, 14 insertions(+), 31 deletions(-)

diff --git a/tcg/ppc/tcg-target-con-set.h b/tcg/ppc/tcg-target-con-set.h
index a1a345883d..f206b29205 100644
--- a/tcg/ppc/tcg-target-con-set.h
+++ b/tcg/ppc/tcg-target-con-set.h
@@ -12,18 +12,15 @@
 C_O0_I1(r)
 C_O0_I2(r, r)
 C_O0_I2(r, ri)
-C_O0_I2(S, S)
 C_O0_I2(v, r)
-C_O0_I3(S, S, S)
+C_O0_I3(r, r, r)
 C_O0_I4(r, r, ri, ri)
-C_O0_I4(S, S, S, S)
-C_O1_I1(r, L)
+C_O0_I4(r, r, r, r)
 C_O1_I1(r, r)
 C_O1_I1(v, r)
 C_O1_I1(v, v)
 C_O1_I1(v, vr)
 C_O1_I2(r, 0, rZ)
-C_O1_I2(r, L, L)
 C_O1_I2(r, rI, ri)
 C_O1_I2(r, rI, rT)
 C_O1_I2(r, r, r)
@@ -36,7 +33,7 @@ C_O1_I2(v, v, v)
 C_O1_I3(v, v, v, v)
 C_O1_I4(r, r, ri, rZ, rZ)
 C_O1_I4(r, r, r, ri, ri)
-C_O2_I1(L, L, L)
-C_O2_I2(L, L, L, L)
+C_O2_I1(r, r, r)
+C_O2_I2(r, r, r, r)
 C_O2_I4(r, r, rI, rZM, r, r)
 C_O2_I4(r, r, r, r, rI, rZM)
diff --git a/tcg/ppc/tcg-target-con-str.h b/tcg/ppc/tcg-target-con-str.h
index 298ca20d5b..f3bf030bc3 100644
--- a/tcg/ppc/tcg-target-con-str.h
+++ b/tcg/ppc/tcg-target-con-str.h
@@ -14,8 +14,6 @@ REGS('A', 1u << TCG_REG_R3)
 REGS('B', 1u << TCG_REG_R4)
 REGS('C', 1u << TCG_REG_R5)
 REGS('D', 1u << TCG_REG_R6)
-REGS('L', ALL_QLOAD_REGS)
-REGS('S', ALL_QSTORE_REGS)
 
 /*
  * Define constraint letters for constants:
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 613cd73583..e94f3131a3 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -93,18 +93,6 @@
 #define ALL_GENERAL_REGS  0xu
 #define ALL_VECTOR_REGS   0xull
 
-#ifdef CONFIG_SOFTMMU
-#define ALL_QLOAD_REGS \
-(ALL_GENERAL_REGS & \
- ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | (1 << TCG_REG_R5)))
-#define ALL_QSTORE_REGS \
-(ALL_GENERAL_REGS & ~((1 << TCG_REG_R3) | (1 << TCG_REG_R4) | \
-  (1 << TCG_REG_R5) | (1 << TCG_REG_R6)))
-#else
-#define ALL_QLOAD_REGS  (ALL_GENERAL_REGS & ~(1 << TCG_REG_R3))
-#define ALL_QSTORE_REGS ALL_QLOAD_REGS
-#endif
-
 TCGPowerISA have_isa;
 static bool have_isel;
 bool have_altivec;
@@ -3791,23 +3779,23 @@ static TCGConstraintSetIndex 
tcg_target_op_def(TCGOpcode op)
 
 case INDEX_op_qemu_ld_i32:
 return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
-? C_O1_I1(r, L)
-: C_O1_I2(r, L, L));
+? C_O1_I1(r, r)
+: C_O1_I2(r, r, r));
 
 case INDEX_op_qemu_st_i32:
 return (TCG_TARGET_REG_BITS == 64 || TARGET_LONG_BITS == 32
-? C_O0_I2(S, S)
-: C_O0_I3(S, S, S));
+? C_O0_I2(r, r)
+: C_O0_I3(r, r, r));
 
 case INDEX_op_qemu_ld_i64:
-return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, L)
-: TARGET_LONG_BITS == 32 ? C_O2_I1(L, L, L)
-: C_O2_I2(L, L, L, L));
+return (TCG_TARGET_REG_BITS == 64 ? C_O1_I1(r, r)
+: TARGET_LONG_BITS == 32 ? C_O2_I1(r, r, r)
+: C_O2_I2(r, r, r, r));
 
 case INDEX_op_qemu_st_i64:
-return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(S, S)
-: TARGET_LONG_BITS == 32 ? C_O0_I3(S, S, S)
-: C_O0_I4(S, S, S, S));
+return (TCG_TARGET_REG_BITS == 64 ? C_O0_I2(r, r)
+: TARGET_LONG_BITS == 32 ? C_O0_I3(r, r, r)
+: C_O0_I4(r, r, r, r));
 
 case INDEX_op_add_vec:
 case INDEX_op_sub_vec:
-- 
2.34.1




[PATCH v2 53/54] tcg/s390x: Use ALGFR in constructing host address for qemu_ld/st

2023-04-10 Thread Richard Henderson
Rather than zero-extend the guest address into a register,
use an add instruction which zero-extends the second input.

Signed-off-by: Richard Henderson 
---
 tcg/s390x/tcg-target.c.inc | 38 ++
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 6d7b056931..42d3e13e08 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -149,6 +149,7 @@ typedef enum S390Opcode {
 RRE_ALGR= 0xb90a,
 RRE_ALCR= 0xb998,
 RRE_ALCGR   = 0xb988,
+RRE_ALGFR   = 0xb91a,
 RRE_CGR = 0xb920,
 RRE_CLGR= 0xb921,
 RRE_DLGR= 0xb987,
@@ -1716,8 +1717,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp 
opc, TCGReg data,
 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 19));
 
-/* Load and compare a TLB entry, leaving the flags set.  Loads the TLB
-   addend into R2.  Returns a register with the santitized guest address.  */
+/*
+ * Load and compare a TLB entry, leaving the flags set.
+ * Loads the TLB addend and returns the register.
+ */
 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, MemOp opc,
int mem_index, bool is_ld)
 {
@@ -1761,12 +1764,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addr_reg, MemOp opc,
 
 tcg_out_insn(s, RXY, LG, TCG_REG_R2, TCG_REG_R2, TCG_REG_NONE,
  offsetof(CPUTLBEntry, addend));
-
-if (TARGET_LONG_BITS == 32) {
-tcg_out_ext32u(s, TCG_REG_R3, addr_reg);
-return TCG_REG_R3;
-}
-return addr_reg;
+return TCG_REG_R2;
 }
 
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
@@ -1892,16 +1890,20 @@ static void tcg_out_qemu_ld(TCGContext* s, TCGReg 
data_reg, TCGReg addr_reg,
 #ifdef CONFIG_SOFTMMU
 unsigned mem_index = get_mmuidx(oi);
 tcg_insn_unit *label_ptr;
-TCGReg base_reg;
+TCGReg addend;
 
-base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
+addend = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 1);
 
 tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
 label_ptr = s->code_ptr;
 s->code_ptr += 1;
 
-tcg_out_qemu_ld_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
-
+if (TARGET_LONG_BITS == 32) {
+tcg_out_insn(s, RRE, ALGFR, addend, addr_reg);
+tcg_out_qemu_ld_direct(s, opc, data_reg, addend, TCG_REG_NONE, 0);
+} else {
+tcg_out_qemu_ld_direct(s, opc, data_reg, addend, addr_reg, 0);
+}
 add_qemu_ldst_label(s, 1, oi, data_type, data_reg, addr_reg,
 s->code_ptr, label_ptr);
 #else
@@ -1924,16 +1926,20 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg 
data_reg, TCGReg addr_reg,
 #ifdef CONFIG_SOFTMMU
 unsigned mem_index = get_mmuidx(oi);
 tcg_insn_unit *label_ptr;
-TCGReg base_reg;
+TCGReg addend;
 
-base_reg = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
+addend = tcg_out_tlb_read(s, addr_reg, opc, mem_index, 0);
 
 tcg_out16(s, RI_BRC | (S390_CC_NE << 4));
 label_ptr = s->code_ptr;
 s->code_ptr += 1;
 
-tcg_out_qemu_st_direct(s, opc, data_reg, base_reg, TCG_REG_R2, 0);
-
+if (TARGET_LONG_BITS == 32) {
+tcg_out_insn(s, RRE, ALGFR, addend, addr_reg);
+tcg_out_qemu_st_direct(s, opc, data_reg, addend, TCG_REG_NONE, 0);
+} else {
+tcg_out_qemu_st_direct(s, opc, data_reg, addend, addr_reg, 0);
+}
 add_qemu_ldst_label(s, 0, oi, data_type, data_reg, addr_reg,
 s->code_ptr, label_ptr);
 #else
-- 
2.34.1




[PATCH v2 44/54] tcg/s390x: Convert tcg_out_qemu_{ld,st}_slow_path

2023-04-10 Thread Richard Henderson
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
and tcg_out_st_helper_args.

Signed-off-by: Richard Henderson 
---
 tcg/s390x/tcg-target.c.inc | 35 ++-
 1 file changed, 10 insertions(+), 25 deletions(-)

diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index d610fe4fbb..6d7b056931 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -1784,26 +1784,22 @@ static void add_qemu_ldst_label(TCGContext *s, bool 
is_ld, MemOpIdx oi,
 label->label_ptr[0] = label_ptr;
 }
 
+static const TCGLdstHelperParam ldst_helper_param = {
+.ntmp = 1, .tmp = { TCG_TMP0 }
+};
+
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-TCGReg addr_reg = lb->addrlo_reg;
-TCGReg data_reg = lb->datalo_reg;
-MemOpIdx oi = lb->oi;
-MemOp opc = get_memop(oi);
+MemOp opc = get_memop(lb->oi);
 
 if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
  (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
 return false;
 }
 
-tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
-if (TARGET_LONG_BITS == 64) {
-tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
-}
-tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, oi);
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R5, (uintptr_t)lb->raddr);
-tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)]);
-tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_R2);
+tcg_out_ld_helper_args(s, lb, _helper_param);
+tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+tcg_out_ld_helper_ret(s, lb, false, _helper_param);
 
 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
 return true;
@@ -1811,25 +1807,14 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-TCGReg addr_reg = lb->addrlo_reg;
-TCGReg data_reg = lb->datalo_reg;
-MemOpIdx oi = lb->oi;
-MemOp opc = get_memop(oi);
-MemOp size = opc & MO_SIZE;
+MemOp opc = get_memop(lb->oi);
 
 if (!patch_reloc(lb->label_ptr[0], R_390_PC16DBL,
  (intptr_t)tcg_splitwx_to_rx(s->code_ptr), 2)) {
 return false;
 }
 
-tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R2, TCG_AREG0);
-if (TARGET_LONG_BITS == 64) {
-tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, addr_reg);
-}
-tcg_out_movext(s, size == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
-   TCG_REG_R4, lb->type, size, data_reg);
-tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, oi);
-tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, (uintptr_t)lb->raddr);
+tcg_out_st_helper_args(s, lb, _helper_param);
 tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
 
 tgen_gotoi(s, S390_CC_ALWAYS, lb->raddr);
-- 
2.34.1




[PATCH v2 09/54] tcg: Split out tcg_out_exts_i32_i64

2023-04-10 Thread Richard Henderson
We will need a backend interface for type extension with sign.
Use it in tcg_reg_alloc_op in the meantime.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c| 4 
 tcg/aarch64/tcg-target.c.inc | 9 ++---
 tcg/arm/tcg-target.c.inc | 5 +
 tcg/i386/tcg-target.c.inc| 9 ++---
 tcg/loongarch64/tcg-target.c.inc | 7 ++-
 tcg/mips/tcg-target.c.inc| 7 ++-
 tcg/ppc/tcg-target.c.inc | 9 ++---
 tcg/riscv/tcg-target.c.inc   | 7 ++-
 tcg/s390x/tcg-target.c.inc   | 9 ++---
 tcg/sparc64/tcg-target.c.inc | 9 ++---
 tcg/tci/tcg-target.c.inc | 7 ++-
 11 files changed, 63 insertions(+), 19 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index a182771c01..b0498170ea 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -111,6 +111,7 @@ static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg 
arg);
 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
+static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
 static void tcg_out_goto_tb(TCGContext *s, int which);
@@ -4529,6 +4530,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 case INDEX_op_ext32u_i64:
 tcg_out_ext32u(s, new_args[0], new_args[1]);
 break;
+case INDEX_op_ext_i32_i64:
+tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
+break;
 default:
 if (def->flags & TCG_OPF_VECTOR) {
 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index bca5f03dfb..58596eaa4b 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1434,6 +1434,11 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg rd, 
TCGReg rn)
 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, rd, rn);
 }
 
+static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+tcg_out_ext32s(s, rd, rn);
+}
+
 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
TCGReg rd, TCGReg rn)
 {
@@ -2260,9 +2265,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 }
 break;
 
-case INDEX_op_ext_i32_i64:
-tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
-break;
 case INDEX_op_extu_i32_i64:
 tcg_out_ext32u(s, a0, a1);
 break;
@@ -2332,6 +2334,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_ext16u_i32:
 case INDEX_op_ext32s_i64:
 case INDEX_op_ext32u_i64:
+case INDEX_op_ext_i32_i64:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 5c48b92f83..2ca25a3d81 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1003,6 +1003,11 @@ static void tcg_out_ext32u(TCGContext *s, TCGReg rd, 
TCGReg rn)
 g_assert_not_reached();
 }
 
+static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+g_assert_not_reached();
+}
+
 static void tcg_out_bswap16(TCGContext *s, ARMCond cond,
 TCGReg rd, TCGReg rn, int flags)
 {
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 7d63403693..fd4c4e20c8 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1299,6 +1299,11 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg dest, 
TCGReg src)
 tcg_out_modrm(s, OPC_MOVSLQ, dest, src);
 }
 
+static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
+{
+tcg_out_ext32s(s, dest, src);
+}
+
 static inline void tcg_out_bswap64(TCGContext *s, int reg)
 {
 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
@@ -2757,9 +2762,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 case INDEX_op_extrl_i64_i32:
 tcg_out_ext32u(s, a0, a1);
 break;
-case INDEX_op_ext_i32_i64:
-tcg_out_ext32s(s, a0, a1);
-break;
 case INDEX_op_extrh_i64_i32:
 tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32);
 break;
@@ -2838,6 +2840,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 case INDEX_op_ext16u_i64:
 case INDEX_op_ext32s_i64:
 case INDEX_op_ext32u_i64:
+case INDEX_op_ext_i32_i64:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index d2511eda7a..989632e08a 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -456,6 +456,11 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, 
TCGReg arg)
 tcg_out_opc_addi_w(s, ret, arg, 0);
 }
 
+static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+

[PATCH v2 25/54] tcg/ppc: Rationalize args to tcg_out_qemu_{ld,st}

2023-04-10 Thread Richard Henderson
Interpret the variable argument placement in the caller.
Mark the argument register const, because they must be passed to
add_qemu_ldst_label unmodified.  This requires a bit of local
variable renaming, because addrlo was being modified.

Pass data_type instead of is64 -- there are several places where
we already convert back from bool to type.  Clean things up by
using type throughout.

Signed-off-by: Richard Henderson 
---
 tcg/ppc/tcg-target.c.inc | 164 +--
 1 file changed, 89 insertions(+), 75 deletions(-)

diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index 77abb7d20c..90093a6509 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -2118,7 +2118,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, MemOp opc,
 /* Record the context of a call to the out of line helper code for the slow
path for a load or store, so that we can later generate the correct
helper code.  */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
+TCGType type, MemOpIdx oi,
 TCGReg datalo_reg, TCGReg datahi_reg,
 TCGReg addrlo_reg, TCGReg addrhi_reg,
 tcg_insn_unit *raddr, tcg_insn_unit *lptr)
@@ -2126,6 +2127,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool 
is_ld, MemOpIdx oi,
 TCGLabelQemuLdst *label = new_ldst_label(s);
 
 label->is_ld = is_ld;
+label->type = type;
 label->oi = oi;
 label->datalo_reg = datalo_reg;
 label->datahi_reg = datahi_reg;
@@ -2288,30 +2290,19 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 
 #endif /* SOFTMMU */
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
+static void tcg_out_qemu_ld(TCGContext *s,
+const TCGReg datalo, const TCGReg datahi,
+const TCGReg addrlo, const TCGReg addrhi,
+const MemOpIdx oi, TCGType data_type)
 {
-TCGReg datalo, datahi, addrlo, rbase;
-TCGReg addrhi __attribute__((unused));
-MemOpIdx oi;
-MemOp opc, s_bits;
+MemOp opc = get_memop(oi);
+MemOp s_bits = opc & MO_SIZE;
+TCGReg rbase, index;
+
 #ifdef CONFIG_SOFTMMU
-int mem_index;
 tcg_insn_unit *label_ptr;
-#else
-unsigned a_bits;
-#endif
 
-datalo = *args++;
-datahi = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
-addrlo = *args++;
-addrhi = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
-oi = *args++;
-opc = get_memop(oi);
-s_bits = opc & MO_SIZE;
-
-#ifdef CONFIG_SOFTMMU
-mem_index = get_mmuidx(oi);
-addrlo = tcg_out_tlb_read(s, opc, addrlo, addrhi, mem_index, true);
+index = tcg_out_tlb_read(s, opc, addrlo, addrhi, get_mmuidx(oi), true);
 
 /* Load a pointer into the current opcode w/conditional branch-link. */
 label_ptr = s->code_ptr;
@@ -2319,80 +2310,71 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is_64)
 
 rbase = TCG_REG_R3;
 #else  /* !CONFIG_SOFTMMU */
-a_bits = get_alignment_bits(opc);
+unsigned a_bits = get_alignment_bits(opc);
 if (a_bits) {
 tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
 }
 rbase = guest_base ? TCG_GUEST_BASE_REG : 0;
 if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
 tcg_out_ext32u(s, TCG_REG_TMP1, addrlo);
-addrlo = TCG_REG_TMP1;
+index = TCG_REG_TMP1;
+} else {
+index = addrlo;
 }
 #endif
 
 if (TCG_TARGET_REG_BITS == 32 && s_bits == MO_64) {
 if (opc & MO_BSWAP) {
-tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
-tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
+tcg_out32(s, ADDI | TAI(TCG_REG_R0, index, 4));
+tcg_out32(s, LWBRX | TAB(datalo, rbase, index));
 tcg_out32(s, LWBRX | TAB(datahi, rbase, TCG_REG_R0));
 } else if (rbase != 0) {
-tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, 4));
-tcg_out32(s, LWZX | TAB(datahi, rbase, addrlo));
+tcg_out32(s, ADDI | TAI(TCG_REG_R0, index, 4));
+tcg_out32(s, LWZX | TAB(datahi, rbase, index));
 tcg_out32(s, LWZX | TAB(datalo, rbase, TCG_REG_R0));
-} else if (addrlo == datahi) {
-tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
-tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
+} else if (index == datahi) {
+tcg_out32(s, LWZ | TAI(datalo, index, 4));
+tcg_out32(s, LWZ | TAI(datahi, index, 0));
 } else {
-tcg_out32(s, LWZ | TAI(datahi, addrlo, 0));
-tcg_out32(s, LWZ | TAI(datalo, addrlo, 4));
+tcg_out32(s, LWZ | TAI(datahi, index, 0));
+tcg_out32(s, LWZ | TAI(datalo, index, 4));
 }
 } else {
 uint32_t insn = 

[PATCH v2 03/54] tcg: Split out tcg_out_ext8s

2023-04-10 Thread Richard Henderson
We will need a backend interface for performing 8-bit sign-extend.
Use it in tcg_reg_alloc_op in the meantime.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c| 21 -
 tcg/aarch64/tcg-target.c.inc | 11 +++
 tcg/arm/tcg-target.c.inc | 10 --
 tcg/i386/tcg-target.c.inc| 10 +-
 tcg/loongarch64/tcg-target.c.inc | 11 ---
 tcg/mips/tcg-target.c.inc| 12 
 tcg/ppc/tcg-target.c.inc | 10 --
 tcg/riscv/tcg-target.c.inc   |  9 +++--
 tcg/s390x/tcg-target.c.inc   | 10 +++---
 tcg/sparc64/tcg-target.c.inc |  7 +++
 tcg/tci/tcg-target.c.inc | 21 -
 11 files changed, 81 insertions(+), 51 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index c3a8578951..76ba3e28cd 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -105,6 +105,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg 
ret, TCGReg arg1,
 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 static void tcg_out_movi(TCGContext *s, TCGType type,
  TCGReg ret, tcg_target_long arg);
+static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
 static void tcg_out_goto_tb(TCGContext *s, int which);
@@ -4496,11 +4497,21 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 }
 
 /* emit instruction */
-if (def->flags & TCG_OPF_VECTOR) {
-tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
-   new_args, const_args);
-} else {
-tcg_out_op(s, op->opc, new_args, const_args);
+switch (op->opc) {
+case INDEX_op_ext8s_i32:
+tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
+break;
+case INDEX_op_ext8s_i64:
+tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
+break;
+default:
+if (def->flags & TCG_OPF_VECTOR) {
+tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
+   new_args, const_args);
+} else {
+tcg_out_op(s, op->opc, new_args, const_args);
+}
+break;
 }
 
 /* move the outputs in the correct register if needed */
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 1315cb92ab..4f4f814293 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1419,6 +1419,11 @@ static inline void tcg_out_sxt(TCGContext *s, TCGType 
ext, MemOp s_bits,
 tcg_out_sbfm(s, ext, rd, rn, 0, bits);
 }
 
+static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
+{
+tcg_out_sxt(s, type, MO_8, rd, rn);
+}
+
 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
TCGReg rd, TCGReg rn)
 {
@@ -2230,10 +2235,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 }
 break;
 
-case INDEX_op_ext8s_i64:
-case INDEX_op_ext8s_i32:
-tcg_out_sxt(s, ext, MO_8, a0, a1);
-break;
 case INDEX_op_ext16s_i64:
 case INDEX_op_ext16s_i32:
 tcg_out_sxt(s, ext, MO_16, a0, a1);
@@ -2310,6 +2311,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_call: /* Always emitted via tcg_out_call.  */
 case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
 case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
+case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
+case INDEX_op_ext8s_i64:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index b4daa97e7a..04a860897f 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -958,10 +958,10 @@ static void tcg_out_udiv(TCGContext *s, ARMCond cond,
 tcg_out32(s, 0x0730f010 | (cond << 28) | (rd << 16) | rn | (rm << 8));
 }
 
-static void tcg_out_ext8s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
+static void tcg_out_ext8s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn)
 {
 /* sxtb */
-tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | rn);
+tcg_out32(s, 0x06af0070 | (COND_AL << 28) | (rd << 12) | rn);
 }
 
 static void __attribute__((unused))
@@ -1533,7 +1533,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 datahi = lb->datahi_reg;
 switch (opc & MO_SSIZE) {
 case MO_SB:
-tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
+tcg_out_ext8s(s, TCG_TYPE_I32, datalo, TCG_REG_R0);
 break;
 case MO_SW:
 tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
@@ -2244,9 +2244,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_bswap32(s, COND_AL, args[0], args[1]);
 break;
 
-case INDEX_op_ext8s_i32:
-

[PATCH v2 23/54] tcg/mips: Rationalize args to tcg_out_qemu_{ld,st}

2023-04-10 Thread Richard Henderson
Interpret the variable argument placement in the caller.
Mark the argument registers const, because they must be passed to
add_qemu_ldst_label unmodified.  There are several places where we
already convert back from bool to type.  Clean things up by using
type throughout.

Signed-off-by: Richard Henderson 
---
 tcg/mips/tcg-target.c.inc | 188 --
 1 file changed, 97 insertions(+), 91 deletions(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index a83ebe8729..ee5826c2b5 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -1479,7 +1479,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 #endif /* SOFTMMU */
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi,
-   TCGReg base, MemOp opc, bool is_64)
+   TCGReg base, MemOp opc, TCGType type)
 {
 switch (opc & (MO_SSIZE | MO_BSWAP)) {
 case MO_UB:
@@ -1503,7 +1503,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg 
lo, TCGReg hi,
 tcg_out_opc_imm(s, OPC_LH, lo, base, 0);
 break;
 case MO_UL | MO_BSWAP:
-if (TCG_TARGET_REG_BITS == 64 && is_64) {
+if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
 if (use_mips32r2_instructions) {
 tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
 tcg_out_bswap32(s, lo, lo, TCG_BSWAP_IZ | TCG_BSWAP_OZ);
@@ -1528,7 +1528,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg 
lo, TCGReg hi,
 }
 break;
 case MO_UL:
-if (TCG_TARGET_REG_BITS == 64 && is_64) {
+if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64) {
 tcg_out_opc_imm(s, OPC_LWU, lo, base, 0);
 break;
 }
@@ -1583,7 +1583,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg 
lo, TCGReg hi,
 }
 
 static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi,
-TCGReg base, MemOp opc, bool is_64)
+TCGReg base, MemOp opc, TCGType type)
 {
 const MIPSInsn lw1 = MIPS_BE ? OPC_LWL : OPC_LWR;
 const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL;
@@ -1623,7 +1623,7 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg 
lo, TCGReg hi,
 case MO_UL:
 tcg_out_opc_imm(s, lw1, lo, base, 0);
 tcg_out_opc_imm(s, lw2, lo, base, 3);
-if (TCG_TARGET_REG_BITS == 64 && is_64 && !sgn) {
+if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn) {
 tcg_out_ext32u(s, lo, lo);
 }
 break;
@@ -1634,18 +1634,18 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, 
TCGReg lo, TCGReg hi,
 tcg_out_opc_imm(s, lw1, lo, base, 0);
 tcg_out_opc_imm(s, lw2, lo, base, 3);
 tcg_out_bswap32(s, lo, lo,
-TCG_TARGET_REG_BITS == 64 && is_64
+TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64
 ? (sgn ? TCG_BSWAP_OS : TCG_BSWAP_OZ) : 0);
 } else {
 const tcg_insn_unit *subr =
-(TCG_TARGET_REG_BITS == 64 && is_64 && !sgn
+(TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I64 && !sgn
  ? bswap32u_addr : bswap32_addr);
 
 tcg_out_opc_imm(s, lw1, TCG_TMP0, base, 0);
 tcg_out_bswap_subr(s, subr);
 /* delay slot */
 tcg_out_opc_imm(s, lw2, TCG_TMP0, base, 3);
-tcg_out_mov(s, is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32, lo, TCG_TMP3);
+tcg_out_mov(s, type, lo, TCG_TMP3);
 }
 break;
 
@@ -1702,68 +1702,60 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, 
TCGReg lo, TCGReg hi,
 }
 }
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
+static void tcg_out_qemu_ld(TCGContext *s,
+const TCGReg datalo, const TCGReg datahi,
+const TCGReg addrlo, const TCGReg addrhi,
+const MemOpIdx oi, TCGType data_type)
 {
-TCGReg addr_regl, addr_regh __attribute__((unused));
-TCGReg data_regl, data_regh;
-MemOpIdx oi;
-MemOp opc;
-#if defined(CONFIG_SOFTMMU)
-tcg_insn_unit *label_ptr[2];
-#else
-#endif
-unsigned a_bits, s_bits;
-TCGReg base = TCG_REG_A0;
-
-data_regl = *args++;
-data_regh = (TCG_TARGET_REG_BITS == 32 && is_64 ? *args++ : 0);
-addr_regl = *args++;
-addr_regh = (TCG_TARGET_REG_BITS < TARGET_LONG_BITS ? *args++ : 0);
-oi = *args++;
-opc = get_memop(oi);
-a_bits = get_alignment_bits(opc);
-s_bits = opc & MO_SIZE;
+MemOp opc = get_memop(oi);
+unsigned a_bits = get_alignment_bits(opc);
+unsigned s_bits = opc & MO_SIZE;
+TCGReg base;
 
 /*
  * R6 removes the left/right instructions but requires the
  * system to support misaligned memory 

[PATCH v2 47/54] tcg/mips: Reorg tcg_out_tlb_load

2023-04-10 Thread Richard Henderson
Compare the address vs the tlb entry with sign-extended values.
This simplifies the page+alignment mask constant, and the
generation of the last byte address for the misaligned test.

Move the tlb addend load up, and the zero-extension down.

This frees up a register, which allows us to drop the 'base'
parameter, with which the caller was giving us a 5th temporary.

Signed-off-by: Richard Henderson 
---
 tcg/mips/tcg-target.c.inc | 51 ++-
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index b6db8c6884..2a6376cd0a 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -370,6 +370,8 @@ typedef enum {
 ALIAS_PADDI= sizeof(void *) == 4 ? OPC_ADDIU : OPC_DADDIU,
 ALIAS_TSRL = TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
  ? OPC_SRL : OPC_DSRL,
+ALIAS_TADDI= TARGET_LONG_BITS == 32 || TCG_TARGET_REG_BITS == 32
+ ? OPC_ADDIU : OPC_DADDIU,
 } MIPSInsn;
 
 /*
@@ -1125,12 +1127,12 @@ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
 
 /*
  * Perform the tlb comparison operation.
- * The complete host address is placed in BASE.
  * Clobbers TMP0, TMP1, TMP2, TMP3.
+ * Returns the register containing the complete host address.
  */
-static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
- TCGReg addrh, MemOpIdx oi,
- tcg_insn_unit *label_ptr[2], bool is_load)
+static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addrl, TCGReg addrh,
+   MemOpIdx oi, bool is_load,
+   tcg_insn_unit *label_ptr[2])
 {
 MemOp opc = get_memop(oi);
 unsigned a_bits = get_alignment_bits(opc);
@@ -1144,7 +1146,6 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, 
TCGReg addrl,
 int add_off = offsetof(CPUTLBEntry, addend);
 int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
: offsetof(CPUTLBEntry, addr_write));
-target_ulong tlb_mask;
 
 /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
 tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_AREG0, mask_off);
@@ -1162,15 +1163,12 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg 
base, TCGReg addrl,
 if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
 tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + LO_OFF);
 } else {
-tcg_out_ldst(s, (TARGET_LONG_BITS == 64 ? OPC_LD
- : TCG_TARGET_REG_BITS == 64 ? OPC_LWU : OPC_LW),
- TCG_TMP0, TCG_TMP3, cmp_off);
+tcg_out_ld(s, TCG_TYPE_TL, TCG_TMP0, TCG_TMP3, cmp_off);
 }
 
-/* Zero extend a 32-bit guest address for a 64-bit host. */
-if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
-tcg_out_ext32u(s, base, addrl);
-addrl = base;
+if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
+/* Load the tlb addend for the fast path.  */
+tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
 }
 
 /*
@@ -1178,18 +1176,18 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg 
base, TCGReg addrl,
  * For unaligned accesses, compare against the end of the access to
  * verify that it does not cross a page boundary.
  */
-tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
-tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1, tlb_mask);
-if (a_mask >= s_mask) {
-tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
-} else {
-tcg_out_opc_imm(s, ALIAS_PADDI, TCG_TMP2, addrl, s_mask - a_mask);
+tcg_out_movi(s, TCG_TYPE_TL, TCG_TMP1, TARGET_PAGE_MASK | a_mask);
+if (a_mask < s_mask) {
+tcg_out_opc_imm(s, ALIAS_TADDI, TCG_TMP2, addrl, s_mask - a_mask);
 tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, TCG_TMP2);
+} else {
+tcg_out_opc_reg(s, OPC_AND, TCG_TMP1, TCG_TMP1, addrl);
 }
 
-if (TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
-/* Load the tlb addend for the fast path.  */
-tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
+/* Zero extend a 32-bit guest address for a 64-bit host. */
+if (TCG_TARGET_REG_BITS > TARGET_LONG_BITS) {
+tcg_out_ext32u(s, TCG_TMP2, addrl);
+addrl = TCG_TMP2;
 }
 
 label_ptr[0] = s->code_ptr;
@@ -1201,14 +1199,15 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg 
base, TCGReg addrl,
 tcg_out_ldst(s, OPC_LW, TCG_TMP0, TCG_TMP3, cmp_off + HI_OFF);
 
 /* Load the tlb addend for the fast path.  */
-tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP2, TCG_TMP3, add_off);
+tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP3, TCG_TMP3, add_off);
 
 label_ptr[1] = s->code_ptr;
 tcg_out_opc_br(s, OPC_BNE, addrh, TCG_TMP0);
 }
 
 /* delay slot */
-tcg_out_opc_reg(s, ALIAS_PADD, base, TCG_TMP2, addrl);
+tcg_out_opc_reg(s, ALIAS_PADD, TCG_TMP3, TCG_TMP3, addrl);
+return TCG_TMP3;
 }
 

[PATCH v2 36/54] tcg/i386: Convert tcg_out_qemu_ld_slow_path

2023-04-10 Thread Richard Henderson
Use tcg_out_ld_helper_args and tcg_out_ld_helper_ret.

Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 69 ---
 1 file changed, 28 insertions(+), 41 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 2b2759d696..0b3d7db14c 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1909,13 +1909,37 @@ static void add_qemu_ldst_label(TCGContext *s, bool 
is_ld,
 }
 }
 
+/*
+ * Because i686 has no register parameters and because x86_64 has xchg
+ * to handle addr/data register overlap, we have placed all input arguments
+ * before we need might need a scratch reg.
+ *
+ * Even then, a scratch is only needed for l->raddr.  Rather than expose
+ * a general-purpose scratch when we don't actually know it's available,
+ * use the ra_gen hook to load into RAX if needed.
+ */
+#if TCG_TARGET_REG_BITS == 64
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
+{
+if (arg < 0) {
+arg = TCG_REG_RAX;
+}
+tcg_out_movi(s, TCG_TYPE_PTR, arg, (uintptr_t)l->raddr);
+return arg;
+}
+static const TCGLdstHelperParam ldst_helper_param = {
+.ra_gen = ldst_ra_gen
+};
+#else
+static const TCGLdstHelperParam ldst_helper_param = { };
+#endif
+
 /*
  * Generate code for the slow path for a load at the end of block
  */
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
-MemOpIdx oi = l->oi;
-MemOp opc = get_memop(oi);
+MemOp opc = get_memop(l->oi);
 tcg_insn_unit **label_ptr = >label_ptr[0];
 
 /* resolve label address */
@@ -1924,48 +1948,11 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 tcg_patch32(label_ptr[1], s->code_ptr - label_ptr[1] - 4);
 }
 
-if (TCG_TARGET_REG_BITS == 32) {
-int ofs = 0;
-
-tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
-ofs += 4;
-
-tcg_out_st(s, TCG_TYPE_I32, l->addrlo_reg, TCG_REG_ESP, ofs);
-ofs += 4;
-
-if (TARGET_LONG_BITS == 64) {
-tcg_out_st(s, TCG_TYPE_I32, l->addrhi_reg, TCG_REG_ESP, ofs);
-ofs += 4;
-}
-
-tcg_out_sti(s, TCG_TYPE_I32, oi, TCG_REG_ESP, ofs);
-ofs += 4;
-
-tcg_out_sti(s, TCG_TYPE_PTR, (uintptr_t)l->raddr, TCG_REG_ESP, ofs);
-} else {
-tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
-/* The second argument is already loaded with addrlo.  */
-tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], oi);
-tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
- (uintptr_t)l->raddr);
-}
+tcg_out_ld_helper_args(s, l, _helper_param);
 
 tcg_out_branch(s, 1, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
 
-if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
-TCGMovExtend ext[2] = {
-{ .dst = l->datalo_reg, .dst_type = TCG_TYPE_I32,
-  .src = TCG_REG_EAX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
-{ .dst = l->datahi_reg, .dst_type = TCG_TYPE_I32,
-  .src = TCG_REG_EDX, .src_type = TCG_TYPE_I32, .src_ext = MO_UL },
-};
-tcg_out_movext2(s, [0], [1], -1);
-} else {
-tcg_out_movext(s, l->type, l->datalo_reg,
-   TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_EAX);
-}
-
-/* Jump to the code corresponding to next IR of qemu_st */
+tcg_out_ld_helper_ret(s, l, false, _helper_param);
 tcg_out_jmp(s, l->raddr);
 return true;
 }
-- 
2.34.1




[PATCH v2 32/54] tcg: Replace REG_P with arg_loc_reg_p

2023-04-10 Thread Richard Henderson
An inline function is safer than a macro, and REG_P
was rather too generic.

Signed-off-by: Richard Henderson 
---
 tcg/tcg-internal.h |  4 
 tcg/tcg.c  | 16 +---
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/tcg/tcg-internal.h b/tcg/tcg-internal.h
index e542a4e9b7..0f1ba01a9a 100644
--- a/tcg/tcg-internal.h
+++ b/tcg/tcg-internal.h
@@ -58,10 +58,6 @@ typedef struct TCGCallArgumentLoc {
 unsigned tmp_subindex   : 2;
 } TCGCallArgumentLoc;
 
-/* Avoid "unsigned < 0 is always false" Werror, when iarg_regs is empty. */
-#define REG_P(L) \
-((int)(L)->arg_slot < (int)ARRAY_SIZE(tcg_target_call_iarg_regs))
-
 typedef struct TCGHelperInfo {
 void *func;
 const char *name;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 6f5daaee5f..fa28db0188 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -806,6 +806,16 @@ static void init_ffi_layouts(void)
 }
 #endif /* CONFIG_TCG_INTERPRETER */
 
+static inline bool arg_slot_reg_p(unsigned arg_slot)
+{
+/*
+ * Split the sizeof away from the comparison to avoid Werror from
+ * "unsigned < 0 is always false", when iarg_regs is empty.
+ */
+unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
+return arg_slot < nreg;
+}
+
 typedef struct TCGCumulativeArgs {
 int arg_idx;/* tcg_gen_callN args[] */
 int info_in_idx;/* TCGHelperInfo in[] */
@@ -3231,7 +3241,7 @@ liveness_pass_1(TCGContext *s)
 case TCG_CALL_ARG_NORMAL:
 case TCG_CALL_ARG_EXTEND_U:
 case TCG_CALL_ARG_EXTEND_S:
-if (REG_P(loc)) {
+if (arg_slot_reg_p(loc->arg_slot)) {
 *la_temp_pref(ts) = 0;
 break;
 }
@@ -3258,7 +3268,7 @@ liveness_pass_1(TCGContext *s)
 case TCG_CALL_ARG_NORMAL:
 case TCG_CALL_ARG_EXTEND_U:
 case TCG_CALL_ARG_EXTEND_S:
-if (REG_P(loc)) {
+if (arg_slot_reg_p(loc->arg_slot)) {
 tcg_regset_set_reg(*la_temp_pref(ts),
 tcg_target_call_iarg_regs[loc->arg_slot]);
 }
@@ -4833,7 +4843,7 @@ static void load_arg_stk(TCGContext *s, int stk_slot, 
TCGTemp *ts,
 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
 TCGTemp *ts, TCGRegSet *allocated_regs)
 {
-if (REG_P(l)) {
+if (arg_slot_reg_p(l->arg_slot)) {
 TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
 load_arg_reg(s, reg, ts, *allocated_regs);
 tcg_regset_set_reg(*allocated_regs, reg);
-- 
2.34.1




[PATCH v2 20/54] tcg/i386: Rationalize args to tcg_out_qemu_{ld,st}

2023-04-10 Thread Richard Henderson
Interpret the variable argument placement in the caller.
Mark the argument register const, because they must be passed to
add_qemu_ldst_label unmodified.

Pass data_type instead of is64 -- there are several places where
we already convert back from bool to type.  Clean things up by
using type throughout.

Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 113 ++
 1 file changed, 52 insertions(+), 61 deletions(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 238a75b17e..2b2759d696 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1886,8 +1886,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
  * Record the context of a call to the out of line helper code for the slow 
path
  * for a load or store, so that we can later generate the correct helper code
  */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, bool is_64,
-MemOpIdx oi,
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
+TCGType type, MemOpIdx oi,
 TCGReg datalo, TCGReg datahi,
 TCGReg addrlo, TCGReg addrhi,
 tcg_insn_unit *raddr,
@@ -1897,7 +1897,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool 
is_ld, bool is_64,
 
 label->is_ld = is_ld;
 label->oi = oi;
-label->type = is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
+label->type = type;
 label->datalo_reg = datalo;
 label->datahi_reg = datahi;
 label->addrlo_reg = addrlo;
@@ -2154,11 +2154,10 @@ static inline int setup_guest_base_seg(void)
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
TCGReg base, int index, intptr_t ofs,
-   int seg, bool is64, MemOp memop)
+   int seg, TCGType type, MemOp memop)
 {
-TCGType type = is64 ? TCG_TYPE_I64 : TCG_TYPE_I32;
 bool use_movbe = false;
-int rexw = is64 * P_REXW;
+int rexw = (type == TCG_TYPE_I32 ? 0 : P_REXW);
 int movop = OPC_MOVL_GvEv;
 
 /* Do big-endian loads with movbe.  */
@@ -2248,50 +2247,35 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, 
TCGReg datalo, TCGReg datahi,
 }
 }
 
-/* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
-   EAX. It will be useful once fixed registers globals are less
-   common. */
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
+static void tcg_out_qemu_ld(TCGContext *s,
+const TCGReg datalo, const TCGReg datahi,
+const TCGReg addrlo, const TCGReg addrhi,
+const MemOpIdx oi, TCGType data_type)
 {
-TCGReg datalo, datahi, addrlo;
-TCGReg addrhi __attribute__((unused));
-MemOpIdx oi;
-MemOp opc;
+MemOp opc = get_memop(oi);
+
 #if defined(CONFIG_SOFTMMU)
-int mem_index;
 tcg_insn_unit *label_ptr[2];
-#else
-unsigned a_bits;
-#endif
 
-datalo = *args++;
-datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
-addrlo = *args++;
-addrhi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS ? *args++ : 0);
-oi = *args++;
-opc = get_memop(oi);
-
-#if defined(CONFIG_SOFTMMU)
-mem_index = get_mmuidx(oi);
-
-tcg_out_tlb_load(s, addrlo, addrhi, mem_index, opc,
+tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
  label_ptr, offsetof(CPUTLBEntry, addr_read));
 
 /* TLB Hit.  */
-tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, -1, 0, 0, is64, opc);
+tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1,
+   -1, 0, 0, data_type, opc);
 
 /* Record the current context of a load into ldst label */
-add_qemu_ldst_label(s, true, is64, oi, datalo, datahi, addrlo, addrhi,
-s->code_ptr, label_ptr);
+add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
+addrlo, addrhi, s->code_ptr, label_ptr);
 #else
-a_bits = get_alignment_bits(opc);
+unsigned a_bits = get_alignment_bits(opc);
 if (a_bits) {
 tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
 }
 
 tcg_out_qemu_ld_direct(s, datalo, datahi, addrlo, x86_guest_base_index,
x86_guest_base_offset, x86_guest_base_seg,
-   is64, opc);
+   data_type, opc);
 #endif
 }
 
@@ -2347,40 +2331,27 @@ static void tcg_out_qemu_st_direct(TCGContext *s, 
TCGReg datalo, TCGReg datahi,
 }
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
+static void tcg_out_qemu_st(TCGContext *s,
+const TCGReg datalo, const TCGReg datahi,
+const TCGReg addrlo, const TCGReg addrhi,
+

[PATCH v2 14/54] tcg/i386: Conditionalize tcg_out_extu_i32_i64

2023-04-10 Thread Richard Henderson
Since TCG_TYPE_I32 values are kept zero-extended in registers, via
omission of the REXW bit, we need not extend if the register matches.
This is already relied upon by qemu_{ld,st}.

Signed-off-by: Richard Henderson 
---
 tcg/i386/tcg-target.c.inc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 40d661072b..a156929477 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1306,7 +1306,9 @@ static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg 
dest, TCGReg src)
 
 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg dest, TCGReg src)
 {
-tcg_out_ext32u(s, dest, src);
+if (dest != src) {
+tcg_out_ext32u(s, dest, src);
+}
 }
 
 static inline void tcg_out_bswap64(TCGContext *s, int reg)
-- 
2.34.1




[PATCH v2 01/54] tcg: Replace if + tcg_abort with tcg_debug_assert

2023-04-10 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 tcg/tcg.c | 4 +---
 tcg/i386/tcg-target.c.inc | 8 +++-
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index bb52bc060b..100f81edb2 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1174,9 +1174,7 @@ static TCGTemp *tcg_global_reg_new_internal(TCGContext 
*s, TCGType type,
 {
 TCGTemp *ts;
 
-if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
-tcg_abort();
-}
+tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
 
 ts = tcg_global_alloc(s);
 ts->base_type = type;
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 5a151fe64a..dfd41c7bf1 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1369,8 +1369,8 @@ static void tcg_out_addi(TCGContext *s, int reg, 
tcg_target_long val)
 }
 }
 
-/* Use SMALL != 0 to force a short forward branch.  */
-static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, int small)
+/* Set SMALL to force a short forward branch.  */
+static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel *l, bool small)
 {
 int32_t val, val1;
 
@@ -1385,9 +1385,7 @@ static void tcg_out_jxx(TCGContext *s, int opc, TCGLabel 
*l, int small)
 }
 tcg_out8(s, val1);
 } else {
-if (small) {
-tcg_abort();
-}
+tcg_debug_assert(!small);
 if (opc == -1) {
 tcg_out8(s, OPC_JMP_long);
 tcg_out32(s, val - 5);
-- 
2.34.1




[PATCH v2 28/54] tcg/riscv: Rationalize args to tcg_out_qemu_{ld,st}

2023-04-10 Thread Richard Henderson
Interpret the variable argument placement in the caller.
Mark the argument registers const, because they must be passed to
add_qemu_ldst_label unmodified.

Pass data_type instead of is64 -- there are several places where
we already convert back from bool to type.  Clean things up by
using type throughout.

Signed-off-by: Richard Henderson 
---
 tcg/riscv/tcg-target.c.inc | 68 +++---
 1 file changed, 26 insertions(+), 42 deletions(-)

diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 1edc3b1c4d..d4134bc86f 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -1101,7 +1101,7 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 #endif /* CONFIG_SOFTMMU */
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg val,
-   TCGReg base, MemOp opc, bool is_64)
+   TCGReg base, MemOp opc, TCGType type)
 {
 /* Byte swapping is left to middle-end expansion. */
 tcg_debug_assert((opc & MO_BSWAP) == 0);
@@ -1120,7 +1120,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg 
val,
 tcg_out_opc_imm(s, OPC_LH, val, base, 0);
 break;
 case MO_UL:
-if (is_64) {
+if (type == TCG_TYPE_I64) {
 tcg_out_opc_imm(s, OPC_LWU, val, base, 0);
 break;
 }
@@ -1136,30 +1136,22 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, 
TCGReg val,
 }
 }
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is_64)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGReg data_reg,
+const TCGReg addr_reg, const MemOpIdx oi,
+TCGType data_type)
 {
-TCGReg addr_reg, data_reg;
-MemOpIdx oi;
-MemOp opc;
-#if defined(CONFIG_SOFTMMU)
-tcg_insn_unit *label_ptr[1];
-#else
-unsigned a_bits;
-#endif
+MemOp opc = get_memop(oi);
 TCGReg base;
 
-data_reg = *args++;
-addr_reg = *args++;
-oi = *args++;
-opc = get_memop(oi);
-
 #if defined(CONFIG_SOFTMMU)
+tcg_insn_unit *label_ptr[1];
+
 base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 1);
-tcg_out_qemu_ld_direct(s, data_reg, base, opc, is_64);
-add_qemu_ldst_label(s, 1, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-data_reg, addr_reg, s->code_ptr, label_ptr);
+tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
+add_qemu_ldst_label(s, true, oi, data_type, data_reg, addr_reg,
+s->code_ptr, label_ptr);
 #else
-a_bits = get_alignment_bits(opc);
+unsigned a_bits = get_alignment_bits(opc);
 if (a_bits) {
 tcg_out_test_alignment(s, true, addr_reg, a_bits);
 }
@@ -1172,7 +1164,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is_64)
 tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base);
 base = TCG_REG_TMP0;
 }
-tcg_out_qemu_ld_direct(s, data_reg, base, opc, is_64);
+tcg_out_qemu_ld_direct(s, data_reg, base, opc, data_type);
 #endif
 }
 
@@ -1200,30 +1192,22 @@ static void tcg_out_qemu_st_direct(TCGContext *s, 
TCGReg val,
 }
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
+static void tcg_out_qemu_st(TCGContext *s, const TCGReg data_reg,
+const TCGReg addr_reg, const MemOpIdx oi,
+TCGType data_type)
 {
-TCGReg addr_reg, data_reg;
-MemOpIdx oi;
-MemOp opc;
-#if defined(CONFIG_SOFTMMU)
-tcg_insn_unit *label_ptr[1];
-#else
-unsigned a_bits;
-#endif
+MemOp opc = get_memop(oi);
 TCGReg base;
 
-data_reg = *args++;
-addr_reg = *args++;
-oi = *args++;
-opc = get_memop(oi);
-
 #if defined(CONFIG_SOFTMMU)
+tcg_insn_unit *label_ptr[1];
+
 base = tcg_out_tlb_load(s, addr_reg, oi, label_ptr, 0);
 tcg_out_qemu_st_direct(s, data_reg, base, opc);
-add_qemu_ldst_label(s, 0, oi, (is_64 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-data_reg, addr_reg, s->code_ptr, label_ptr);
+add_qemu_ldst_label(s, false, oi, data_type, data_reg, addr_reg,
+s->code_ptr, label_ptr);
 #else
-a_bits = get_alignment_bits(opc);
+unsigned a_bits = get_alignment_bits(opc);
 if (a_bits) {
 tcg_out_test_alignment(s, false, addr_reg, a_bits);
 }
@@ -1528,16 +1512,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 break;
 
 case INDEX_op_qemu_ld_i32:
-tcg_out_qemu_ld(s, args, false);
+tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
 break;
 case INDEX_op_qemu_ld_i64:
-tcg_out_qemu_ld(s, args, true);
+tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
 break;
 case INDEX_op_qemu_st_i32:
-tcg_out_qemu_st(s, args, false);
+tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
 break;
 case INDEX_op_qemu_st_i64:
-

[PATCH v2 30/54] tcg/sparc64: Pass TCGType to tcg_out_qemu_{ld,st}

2023-04-10 Thread Richard Henderson
We need to set this in TCGLabelQemuLdst, so plumb this
all the way through from tcg_out_op.

Signed-off-by: Richard Henderson 
---
 tcg/sparc64/tcg-target.c.inc | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index f3e5e856d6..7e6466d3b6 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -1178,7 +1178,7 @@ static const int qemu_st_opc[(MO_SIZE | MO_BSWAP) + 1] = {
 };
 
 static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr,
-MemOpIdx oi, bool is_64)
+MemOpIdx oi, TCGType data_type)
 {
 MemOp memop = get_memop(oi);
 tcg_insn_unit *label_ptr;
@@ -1324,7 +1324,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, 
TCGReg addr,
 }
 
 static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr,
-MemOpIdx oi, bool is64)
+MemOpIdx oi, TCGType data_type)
 {
 MemOp memop = get_memop(oi);
 tcg_insn_unit *label_ptr;
@@ -1351,8 +1351,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, 
TCGReg addr,
 
 tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz);
 tcg_out_movext(s, (memop & MO_SIZE) == MO_64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
-   TCG_REG_O2, is64 ? TCG_TYPE_I64 : TCG_TYPE_I32,
-   memop & MO_SIZE, data);
+   TCG_REG_O2, data_type, memop & MO_SIZE, data);
 
 func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)];
 tcg_debug_assert(func != NULL);
@@ -1637,16 +1636,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 break;
 
 case INDEX_op_qemu_ld_i32:
-tcg_out_qemu_ld(s, a0, a1, a2, false);
+tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32);
 break;
 case INDEX_op_qemu_ld_i64:
-tcg_out_qemu_ld(s, a0, a1, a2, true);
+tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I64);
 break;
 case INDEX_op_qemu_st_i32:
-tcg_out_qemu_st(s, a0, a1, a2, false);
+tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I32);
 break;
 case INDEX_op_qemu_st_i64:
-tcg_out_qemu_st(s, a0, a1, a2, true);
+tcg_out_qemu_st(s, a0, a1, a2, TCG_TYPE_I64);
 break;
 
 case INDEX_op_ld32s_i64:
-- 
2.34.1




[PATCH v2 22/54] tcg/arm: Rationalize args to tcg_out_qemu_{ld,st}

2023-04-10 Thread Richard Henderson
Interpret the variable argument placement in the caller.
Mark the argument registers const, because they must be passed to
add_qemu_ldst_label unmodified.

Pass data_type instead of is_64.  We need to set this in
TCGLabelQemuLdst, so plumb this all the way through from tcg_out_op.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 115 ---
 1 file changed, 58 insertions(+), 57 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 83c818a58b..3706a3b93e 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1526,15 +1526,18 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
 /* Record the context of a call to the out of line helper code for the slow
path for a load or store, so that we can later generate the correct
helper code.  */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, MemOpIdx oi,
-TCGReg datalo, TCGReg datahi, TCGReg addrlo,
-TCGReg addrhi, tcg_insn_unit *raddr,
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
+MemOpIdx oi, TCGType type,
+TCGReg datalo, TCGReg datahi,
+TCGReg addrlo, TCGReg addrhi,
+tcg_insn_unit *raddr,
 tcg_insn_unit *label_ptr)
 {
 TCGLabelQemuLdst *label = new_ldst_label(s);
 
 label->is_ld = is_ld;
 label->oi = oi;
+label->type = type;
 label->datalo_reg = datalo;
 label->datahi_reg = datahi;
 label->addrlo_reg = addrlo;
@@ -1796,41 +1799,29 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp 
opc, TCGReg datalo,
 }
 #endif
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
+static void tcg_out_qemu_ld(TCGContext *s,
+const TCGReg datalo, const TCGReg datahi,
+const TCGReg addrlo, const TCGReg addrhi,
+const MemOpIdx oi, TCGType data_type)
 {
-TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
-MemOpIdx oi;
-MemOp opc;
-#ifdef CONFIG_SOFTMMU
-int mem_index;
-TCGReg addend;
-tcg_insn_unit *label_ptr;
-#else
-unsigned a_bits;
-#endif
-
-datalo = *args++;
-datahi = (is64 ? *args++ : 0);
-addrlo = *args++;
-addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
-oi = *args++;
-opc = get_memop(oi);
+MemOp opc = get_memop(oi);
 
 #ifdef CONFIG_SOFTMMU
-mem_index = get_mmuidx(oi);
-addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);
+TCGReg addend= tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
 
-/* This a conditional BL only to load a pointer within this opcode into LR
-   for the slow path.  We will not be using the value for a tail call.  */
-label_ptr = s->code_ptr;
+/*
+ * This a conditional BL only to load a pointer within this opcode into
+ * LR for the slow path.  We will not be using the value for a tail call.
+ */
+tcg_insn_unit *label_ptr = s->code_ptr;
 tcg_out_bl_imm(s, COND_NE, 0);
 
 tcg_out_qemu_ld_index(s, opc, datalo, datahi, addrlo, addend, true);
 
-add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
-s->code_ptr, label_ptr);
+add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
+addrlo, addrhi, s->code_ptr, label_ptr);
 #else /* !CONFIG_SOFTMMU */
-a_bits = get_alignment_bits(opc);
+unsigned a_bits = get_alignment_bits(opc);
 if (a_bits) {
 tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
 }
@@ -1918,41 +1909,27 @@ static void tcg_out_qemu_st_direct(TCGContext *s, MemOp 
opc, TCGReg datalo,
 }
 #endif
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
+static void tcg_out_qemu_st(TCGContext *s,
+const TCGReg datalo, const TCGReg datahi,
+const TCGReg addrlo, const TCGReg addrhi,
+const MemOpIdx oi, TCGType data_type)
 {
-TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
-MemOpIdx oi;
-MemOp opc;
-#ifdef CONFIG_SOFTMMU
-int mem_index;
-TCGReg addend;
-tcg_insn_unit *label_ptr;
-#else
-unsigned a_bits;
-#endif
-
-datalo = *args++;
-datahi = (is64 ? *args++ : 0);
-addrlo = *args++;
-addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
-oi = *args++;
-opc = get_memop(oi);
+MemOp opc = get_memop(oi);
 
 #ifdef CONFIG_SOFTMMU
-mem_index = get_mmuidx(oi);
-addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);
+TCGReg addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 
0);
 
 tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi,
   addrlo, addend, true);
 
 /* 

[PATCH v2 41/54] tcg/mips: Convert tcg_out_qemu_{ld,st}_slow_path

2023-04-10 Thread Richard Henderson
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
and tcg_out_st_helper_args.  This allows our local
tcg_out_arg_* infrastructure to be removed.

We are no longer filling the call or return branch
delay slots, nor are we tail-calling for the store,
but this seems a small price to pay.

Signed-off-by: Richard Henderson 
---
 tcg/mips/tcg-target.c.inc | 156 ++
 1 file changed, 23 insertions(+), 133 deletions(-)

diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
index ee5826c2b5..9f7c9cd688 100644
--- a/tcg/mips/tcg-target.c.inc
+++ b/tcg/mips/tcg-target.c.inc
@@ -1115,72 +1115,6 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) 
+ 1] = {
 [MO_BEUQ] = helper_be_stq_mmu,
 };
 
-/* Helper routines for marshalling helper function arguments into
- * the correct registers and stack.
- * I is where we want to put this argument, and is updated and returned
- * for the next call. ARG is the argument itself.
- *
- * We provide routines for arguments which are: immediate, 32 bit
- * value in register, 16 and 8 bit values in register (which must be zero
- * extended before use) and 64 bit value in a lo:hi register pair.
- */
-
-static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
-{
-if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
-tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
-} else {
-/* For N32 and N64, the initial offset is different.  But there
-   we also have 8 argument register so we don't run out here.  */
-tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
-tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
-}
-return i + 1;
-}
-
-static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg)
-{
-TCGReg tmp = TCG_TMP0;
-if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
-tmp = tcg_target_call_iarg_regs[i];
-}
-tcg_out_ext8u(s, tmp, arg);
-return tcg_out_call_iarg_reg(s, i, tmp);
-}
-
-static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg)
-{
-TCGReg tmp = TCG_TMP0;
-if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
-tmp = tcg_target_call_iarg_regs[i];
-}
-tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0x);
-return tcg_out_call_iarg_reg(s, i, tmp);
-}
-
-static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
-{
-TCGReg tmp = TCG_TMP0;
-if (arg == 0) {
-tmp = TCG_REG_ZERO;
-} else {
-if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
-tmp = tcg_target_call_iarg_regs[i];
-}
-tcg_out_movi(s, TCG_TYPE_REG, tmp, arg);
-}
-return tcg_out_call_iarg_reg(s, i, tmp);
-}
-
-static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
-{
-tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
-i = (i + 1) & ~1;
-i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
-i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
-return i;
-}
-
 /* We expect to use a 16-bit negative offset from ENV.  */
 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
 QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -32768);
@@ -1295,13 +1229,15 @@ static void add_qemu_ldst_label(TCGContext *s, int 
is_ld, MemOpIdx oi,
 }
 }
 
+/* We have four temps, we might as well expose three of them. */
+static const TCGLdstHelperParam ldst_helper_param = {
+.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
+};
+
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
 const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
-MemOpIdx oi = l->oi;
-MemOp opc = get_memop(oi);
-TCGReg v0;
-int i;
+MemOp opc = get_memop(l->oi);
 
 /* resolve label address */
 if (!reloc_pc16(l->label_ptr[0], tgt_rx)
@@ -1310,29 +1246,13 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 return false;
 }
 
-i = 1;
-if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
-} else {
-i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
-}
-i = tcg_out_call_iarg_imm(s, i, oi);
-i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr);
+tcg_out_ld_helper_args(s, l, _helper_param);
+
 tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
 /* delay slot */
-tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+tcg_out_nop(s);
 
-v0 = l->datalo_reg;
-if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
-/* We eliminated V0 from the possible output registers, so it
-   cannot be clobbered here.  So we must move V1 first.  */
-if (MIPS_BE) {
-tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1);
-v0 = l->datahi_reg;
-} else {
-tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1);
-}
-}
+tcg_out_ld_helper_ret(s, l, true, _helper_param);
 
 

[PATCH v2 12/54] tcg/riscv: Conditionalize tcg_out_exts_i32_i64

2023-04-10 Thread Richard Henderson
Since TCG_TYPE_I32 values are kept sign-extended in registers,
via "w" instructions, we need not extend if the register matches.
This is already relied upon by comparisons.

Signed-off-by: Richard Henderson 
---
 tcg/riscv/tcg-target.c.inc | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc
index 7bd3b421ad..2b9aab29ec 100644
--- a/tcg/riscv/tcg-target.c.inc
+++ b/tcg/riscv/tcg-target.c.inc
@@ -604,7 +604,9 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, 
TCGReg arg)
 
 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg)
 {
-tcg_out_ext32s(s, ret, arg);
+if (ret != arg) {
+tcg_out_ext32s(s, ret, arg);
+}
 }
 
 static void tcg_out_ldst(TCGContext *s, RISCVInsn opc, TCGReg data,
-- 
2.34.1




[PATCH v2 39/54] tcg/arm: Convert tcg_out_qemu_{ld,st}_slow_path

2023-04-10 Thread Richard Henderson
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
and tcg_out_st_helper_args.  This allows our local
tcg_out_arg_* infrastructure to be removed.

Signed-off-by: Richard Henderson 
---
 tcg/arm/tcg-target.c.inc | 146 ++-
 1 file changed, 21 insertions(+), 125 deletions(-)

diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 3706a3b93e..57319674e5 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -690,8 +690,8 @@ tcg_out_ldrd_rwb(TCGContext *s, ARMCond cond, TCGReg rt, 
TCGReg rn, TCGReg rm)
 tcg_out_memop_r(s, cond, INSN_LDRD_REG, rt, rn, rm, 1, 1, 1);
 }
 
-static void tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt,
-   TCGReg rn, int imm8)
+static void __attribute__((unused))
+tcg_out_strd_8(TCGContext *s, ARMCond cond, TCGReg rt, TCGReg rn, int imm8)
 {
 tcg_out_memop_8(s, cond, INSN_STRD_IMM, rt, rn, imm8, 1, 0);
 }
@@ -969,28 +969,16 @@ static void tcg_out_ext8u(TCGContext *s, TCGReg rd, 
TCGReg rn)
 tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, 0xff);
 }
 
-static void __attribute__((unused))
-tcg_out_ext8u_cond(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
-{
-tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
-}
-
 static void tcg_out_ext16s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn)
 {
 /* sxth */
 tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | (rd << 12) | rn);
 }
 
-static void tcg_out_ext16u_cond(TCGContext *s, ARMCond cond,
-TCGReg rd, TCGReg rn)
-{
-/* uxth */
-tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | rn);
-}
-
 static void tcg_out_ext16u(TCGContext *s, TCGReg rd, TCGReg rn)
 {
-tcg_out_ext16u_cond(s, COND_AL, rd, rn);
+/* uxth */
+tcg_out32(s, 0x06ff0070 | (COND_AL << 28) | (rd << 12) | rn);
 }
 
 static void tcg_out_ext32s(TCGContext *s, TCGReg rd, TCGReg rn)
@@ -1375,58 +1363,6 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
 #endif
 };
 
-/* Helper routines for marshalling helper function arguments into
- * the correct registers and stack.
- * argreg is where we want to put this argument, arg is the argument itself.
- * Return value is the updated argreg ready for the next call.
- * Note that argreg 0..3 is real registers, 4+ on stack.
- *
- * We provide routines for arguments which are: immediate, 32 bit
- * value in register, 16 and 8 bit values in register (which must be zero
- * extended before use) and 64 bit value in a lo:hi register pair.
- */
-#define DEFINE_TCG_OUT_ARG(NAME, ARGTYPE, MOV_ARG, EXT_ARG)\
-static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE arg)  \
-{  \
-if (argreg < 4) {  \
-MOV_ARG(s, COND_AL, argreg, arg);  \
-} else {   \
-int ofs = (argreg - 4) * 4;\
-EXT_ARG;   \
-tcg_debug_assert(ofs + 4 <= TCG_STATIC_CALL_ARGS_SIZE);\
-tcg_out_st32_12(s, COND_AL, arg, TCG_REG_CALL_STACK, ofs); \
-}  \
-return argreg + 1; \
-}
-
-DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
-(tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u_cond,
-(tcg_out_ext8u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u_cond,
-(tcg_out_ext16u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
-
-static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
-TCGReg arglo, TCGReg arghi)
-{
-/* 64 bit arguments must go in even/odd register pairs
- * and in 8-aligned stack slots.
- */
-if (argreg & 1) {
-argreg++;
-}
-if (argreg >= 4 && (arglo & 1) == 0 && arghi == arglo + 1) {
-tcg_out_strd_8(s, COND_AL, arglo,
-   TCG_REG_CALL_STACK, (argreg - 4) * 4);
-return argreg + 2;
-} else {
-argreg = tcg_out_arg_reg32(s, argreg, arglo);
-argreg = tcg_out_arg_reg32(s, argreg, arghi);
-return argreg;
-}
-}
-
 #define TLB_SHIFT  (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
 
 /* We expect to use an 9-bit sign-magnitude negative offset from ENV.  */
@@ -1546,40 +1482,29 @@ static void add_qemu_ldst_label(TCGContext *s, bool 
is_ld,
 label->label_ptr[0] = label_ptr;
 }
 
+static TCGReg ldst_ra_gen(TCGContext *s, const TCGLabelQemuLdst *l, int arg)
+{
+/* We arrive at the slow path via "BLNE", so R14 

[PATCH v2 15/54] tcg: Split out tcg_out_extrl_i64_i32

2023-04-10 Thread Richard Henderson
We will need a backend interface for type truncation.  For those backends
that did not enable TCG_TARGET_HAS_extrl_i64_i32, use tcg_out_mov.
Use it in tcg_reg_alloc_op in the meantime.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c|  4 
 tcg/aarch64/tcg-target.c.inc |  6 ++
 tcg/arm/tcg-target.c.inc |  5 +
 tcg/i386/tcg-target.c.inc|  9 ++---
 tcg/loongarch64/tcg-target.c.inc | 10 ++
 tcg/mips/tcg-target.c.inc|  9 ++---
 tcg/ppc/tcg-target.c.inc |  7 +++
 tcg/riscv/tcg-target.c.inc   | 10 ++
 tcg/s390x/tcg-target.c.inc   |  6 ++
 tcg/sparc64/tcg-target.c.inc |  9 ++---
 tcg/tci/tcg-target.c.inc |  7 +++
 11 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 17bd6d4581..0188152c37 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -113,6 +113,7 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, 
TCGReg arg);
 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
+static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
 static void tcg_out_goto_tb(TCGContext *s, int which);
@@ -4537,6 +4538,9 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 case INDEX_op_extu_i32_i64:
 tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
 break;
+case INDEX_op_extrl_i64_i32:
+tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
+break;
 default:
 if (def->flags & TCG_OPF_VECTOR) {
 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index ca8b25865b..bd1fab193e 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1467,6 +1467,11 @@ static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg 
rd, TCGReg rn)
 tcg_out_ext32u(s, rd, rn);
 }
 
+static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+tcg_out_mov(s, TCG_TYPE_I32, rd, rn);
+}
+
 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
 TCGReg rn, int64_t aimm)
 {
@@ -2337,6 +2342,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_ext32u_i64:
 case INDEX_op_ext_i32_i64:
 case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 2135616e12..1820655ee3 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1013,6 +1013,11 @@ static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg 
rd, TCGReg rn)
 g_assert_not_reached();
 }
 
+static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+g_assert_not_reached();
+}
+
 static void tcg_out_bswap16(TCGContext *s, ARMCond cond,
 TCGReg rd, TCGReg rn, int flags)
 {
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index a156929477..a166a195c4 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1311,6 +1311,11 @@ static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg 
dest, TCGReg src)
 }
 }
 
+static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg dest, TCGReg src)
+{
+tcg_out_ext32u(s, dest, src);
+}
+
 static inline void tcg_out_bswap64(TCGContext *s, int reg)
 {
 tcg_out_opc(s, OPC_BSWAP + P_REXW + LOWREGMASK(reg), 0, reg, 0);
@@ -2765,9 +2770,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 case INDEX_op_bswap64_i64:
 tcg_out_bswap64(s, a0);
 break;
-case INDEX_op_extrl_i64_i32:
-tcg_out_ext32u(s, a0, a1);
-break;
 case INDEX_op_extrh_i64_i32:
 tcg_out_shifti(s, SHIFT_SHR + P_REXW, a0, 32);
 break;
@@ -2848,6 +2850,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 case INDEX_op_ext32u_i64:
 case INDEX_op_ext_i32_i64:
 case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index d83bd9de49..b0e076c462 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -468,6 +468,11 @@ static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg 
ret, TCGReg arg)
 tcg_out_ext32u(s, ret, arg);
 }
 
+static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+tcg_out_ext32s(s, ret, arg);
+}
+
 static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
TCGReg a0, TCGReg a1, TCGReg a2,
bool 

[PATCH v2 29/54] tcg/sparc64: Drop is_64 test from tcg_out_qemu_ld data return

2023-04-10 Thread Richard Henderson
In tcg_canonicalize_memop, we remove MO_SIGN from MO_32 operations
with TCG_TYPE_I32.  Thus this is never set.  We already have an
identical test just above which does not include is_64

Signed-off-by: Richard Henderson 
---
 tcg/sparc64/tcg-target.c.inc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc
index 086981f097..f3e5e856d6 100644
--- a/tcg/sparc64/tcg-target.c.inc
+++ b/tcg/sparc64/tcg-target.c.inc
@@ -1220,7 +1220,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, 
TCGReg addr,
 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi);
 
 /* We let the helper sign-extend SB and SW, but leave SL for here.  */
-if (is_64 && (memop & MO_SSIZE) == MO_SL) {
+if ((memop & MO_SSIZE) == MO_SL) {
 tcg_out_ext32s(s, data, TCG_REG_O0);
 } else {
 tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0);
-- 
2.34.1




[PATCH v2 04/54] tcg: Split out tcg_out_ext8u

2023-04-10 Thread Richard Henderson
We will need a backend interface for performing 8-bit zero-extend.
Use it in tcg_reg_alloc_op in the meantime.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c|  5 +
 tcg/aarch64/tcg-target.c.inc | 11 +++
 tcg/arm/tcg-target.c.inc | 12 +---
 tcg/i386/tcg-target.c.inc|  7 +++
 tcg/loongarch64/tcg-target.c.inc |  7 ++-
 tcg/mips/tcg-target.c.inc|  9 -
 tcg/ppc/tcg-target.c.inc |  7 +++
 tcg/riscv/tcg-target.c.inc   |  7 ++-
 tcg/s390x/tcg-target.c.inc   | 14 +-
 tcg/sparc64/tcg-target.c.inc |  9 -
 tcg/tci/tcg-target.c.inc | 14 +-
 11 files changed, 69 insertions(+), 33 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 76ba3e28cd..b02ffc5679 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -106,6 +106,7 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg 
ret, TCGReg arg);
 static void tcg_out_movi(TCGContext *s, TCGType type,
  TCGReg ret, tcg_target_long arg);
 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
+static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
 static void tcg_out_goto_tb(TCGContext *s, int which);
@@ -4504,6 +4505,10 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 case INDEX_op_ext8s_i64:
 tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
 break;
+case INDEX_op_ext8u_i32:
+case INDEX_op_ext8u_i64:
+tcg_out_ext8u(s, new_args[0], new_args[1]);
+break;
 default:
 if (def->flags & TCG_OPF_VECTOR) {
 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 4f4f814293..cca91363ce 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1432,6 +1432,11 @@ static inline void tcg_out_uxt(TCGContext *s, MemOp 
s_bits,
 tcg_out_ubfm(s, 0, rd, rn, 0, bits);
 }
 
+static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+tcg_out_uxt(s, MO_8, rd, rn);
+}
+
 static void tcg_out_addsubi(TCGContext *s, int ext, TCGReg rd,
 TCGReg rn, int64_t aimm)
 {
@@ -2243,10 +2248,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_ext32s_i64:
 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
 break;
-case INDEX_op_ext8u_i64:
-case INDEX_op_ext8u_i32:
-tcg_out_uxt(s, MO_8, a0, a1);
-break;
 case INDEX_op_ext16u_i64:
 case INDEX_op_ext16u_i32:
 tcg_out_uxt(s, MO_16, a0, a1);
@@ -2313,6 +2314,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
 case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
 case INDEX_op_ext8s_i64:
+case INDEX_op_ext8u_i32:
+case INDEX_op_ext8u_i64:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 04a860897f..b99f08a54b 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -964,8 +964,13 @@ static void tcg_out_ext8s(TCGContext *s, TCGType t, TCGReg 
rd, TCGReg rn)
 tcg_out32(s, 0x06af0070 | (COND_AL << 28) | (rd << 12) | rn);
 }
 
+static void tcg_out_ext8u(TCGContext *s, TCGReg rd, TCGReg rn)
+{
+tcg_out_dat_imm(s, COND_AL, ARITH_AND, rd, rn, 0xff);
+}
+
 static void __attribute__((unused))
-tcg_out_ext8u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
+tcg_out_ext8u_cond(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
 {
 tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 }
@@ -1365,8 +1370,8 @@ static TCGReg NAME(TCGContext *s, TCGReg argreg, ARGTYPE 
arg)  \
 
 DEFINE_TCG_OUT_ARG(tcg_out_arg_imm32, uint32_t, tcg_out_movi32,
 (tcg_out_movi32(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
-DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u,
-(tcg_out_ext8u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
+DEFINE_TCG_OUT_ARG(tcg_out_arg_reg8, TCGReg, tcg_out_ext8u_cond,
+(tcg_out_ext8u_cond(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg16, TCGReg, tcg_out_ext16u,
 (tcg_out_ext16u(s, COND_AL, TCG_REG_TMP, arg), arg = TCG_REG_TMP))
 DEFINE_TCG_OUT_ARG(tcg_out_arg_reg32, TCGReg, tcg_out_mov_reg, )
@@ -2299,6 +2304,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_exit_tb:  /* Always emitted via tcg_out_exit_tb.  */
 case INDEX_op_goto_tb:  /* Always emitted via tcg_out_goto_tb.  */
 case INDEX_op_ext8s_i32:  /* Always emitted via tcg_reg_alloc_op.  */
+case INDEX_op_ext8u_i32:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/i386/tcg-target.c.inc 

[PATCH v2 38/54] tcg/aarch64: Convert tcg_out_qemu_{ld,st}_slow_path

2023-04-10 Thread Richard Henderson
Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
and tcg_out_st_helper_args.

Signed-off-by: Richard Henderson 
---
 tcg/aarch64/tcg-target.c.inc | 40 +++-
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 251464ae6f..ed0968133c 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1580,13 +1580,6 @@ static void tcg_out_cltz(TCGContext *s, TCGType ext, 
TCGReg d,
 }
 }
 
-static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
-{
-ptrdiff_t offset = tcg_pcrel_diff(s, target);
-tcg_debug_assert(offset == sextract64(offset, 0, 21));
-tcg_out_insn(s, 3406, ADR, rd, offset);
-}
-
 #ifdef CONFIG_SOFTMMU
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  * MemOpIdx oi, uintptr_t ra)
@@ -1621,42 +1614,34 @@ static void * const qemu_st_helpers[MO_SIZE + 1] = {
 #endif
 };
 
+static const TCGLdstHelperParam ldst_helper_param = {
+.ntmp = 1, .tmp = { TCG_REG_TMP }
+};
+
 static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-MemOpIdx oi = lb->oi;
-MemOp opc = get_memop(oi);
+MemOp opc = get_memop(lb->oi);
 
 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
 return false;
 }
 
-tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
-tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
-tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X2, oi);
-tcg_out_adr(s, TCG_REG_X3, lb->raddr);
+tcg_out_ld_helper_args(s, lb, _helper_param);
 tcg_out_call_int(s, qemu_ld_helpers[opc & MO_SIZE]);
-
-tcg_out_movext(s, lb->type, lb->datalo_reg,
-   TCG_TYPE_REG, opc & MO_SSIZE, TCG_REG_X0);
+tcg_out_ld_helper_ret(s, lb, false, _helper_param);
 tcg_out_goto(s, lb->raddr);
 return true;
 }
 
 static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-MemOpIdx oi = lb->oi;
-MemOp opc = get_memop(oi);
-MemOp size = opc & MO_SIZE;
+MemOp opc = get_memop(lb->oi);
 
 if (!reloc_pc19(lb->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
 return false;
 }
 
-tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_X0, TCG_AREG0);
-tcg_out_mov(s, TARGET_LONG_BITS == 64, TCG_REG_X1, lb->addrlo_reg);
-tcg_out_mov(s, size == MO_64, TCG_REG_X2, lb->datalo_reg);
-tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_X3, oi);
-tcg_out_adr(s, TCG_REG_X4, lb->raddr);
+tcg_out_st_helper_args(s, lb, _helper_param);
 tcg_out_call_int(s, qemu_st_helpers[opc & MO_SIZE]);
 tcg_out_goto(s, lb->raddr);
 return true;
@@ -1768,6 +1753,13 @@ static void tcg_out_test_alignment(TCGContext *s, bool 
is_ld, TCGReg addr_reg,
 label->raddr = tcg_splitwx_to_rx(s->code_ptr);
 }
 
+static void tcg_out_adr(TCGContext *s, TCGReg rd, const void *target)
+{
+ptrdiff_t offset = tcg_pcrel_diff(s, target);
+tcg_debug_assert(offset == sextract64(offset, 0, 21));
+tcg_out_insn(s, 3406, ADR, rd, offset);
+}
+
 static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
 {
 if (!reloc_pc19(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
-- 
2.34.1




[PATCH v2 35/54] tcg: Add routines for calling slow-path helpers

2023-04-10 Thread Richard Henderson
Add tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
and tcg_out_st_helper_args.  These and their subroutines
use the existing knowledge of the host function call abi
to load the function call arguments and return results.

These will be used to simplify the backends in turn.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c | 461 +-
 1 file changed, 458 insertions(+), 3 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 057423c121..610df88626 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -181,6 +181,22 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct);
 static int tcg_out_ldst_finalize(TCGContext *s);
 #endif
 
+typedef struct TCGLdstHelperParam {
+TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
+unsigned ntmp;
+int tmp[3];
+} TCGLdstHelperParam;
+
+static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
+   const TCGLdstHelperParam *p)
+__attribute__((unused));
+static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
+  bool load_sign, const TCGLdstHelperParam *p)
+__attribute__((unused));
+static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
+   const TCGLdstHelperParam *p)
+__attribute__((unused));
+
 TCGContext tcg_init_ctx;
 __thread TCGContext *tcg_ctx;
 
@@ -459,9 +475,8 @@ static void tcg_out_movext1(TCGContext *s, const 
TCGMovExtend *i)
  * between the sources and destinations.
  */
 
-static void __attribute__((unused))
-tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
-const TCGMovExtend *i2, int scratch)
+static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
+const TCGMovExtend *i2, int scratch)
 {
 TCGReg src1 = i1->src;
 TCGReg src2 = i2->src;
@@ -715,6 +730,50 @@ static TCGHelperInfo all_helpers[] = {
 };
 static GHashTable *helper_table;
 
+#if TCG_TARGET_REG_BITS == 32
+# define dh_typecode_ttl  dh_typecode_i32
+#else
+# define dh_typecode_ttl  dh_typecode_i64
+#endif
+
+static TCGHelperInfo info_helper_ld32_mmu = {
+.flags = TCG_CALL_NO_WG,
+.typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
+  | dh_typemask(env, 1)
+  | dh_typemask(tl, 2)   /* target_ulong addr */
+  | dh_typemask(i32, 3)  /* unsigned oi */
+  | dh_typemask(ptr, 4)  /* uintptr_t ra */
+};
+
+static TCGHelperInfo info_helper_ld64_mmu = {
+.flags = TCG_CALL_NO_WG,
+.typemask = dh_typemask(i64, 0)  /* return uint64_t */
+  | dh_typemask(env, 1)
+  | dh_typemask(tl, 2)   /* target_ulong addr */
+  | dh_typemask(i32, 3)  /* unsigned oi */
+  | dh_typemask(ptr, 4)  /* uintptr_t ra */
+};
+
+static TCGHelperInfo info_helper_st32_mmu = {
+.flags = TCG_CALL_NO_WG,
+.typemask = dh_typemask(void, 0)
+  | dh_typemask(env, 1)
+  | dh_typemask(tl, 2)   /* target_ulong addr */
+  | dh_typemask(i32, 3)  /* uint32_t data */
+  | dh_typemask(i32, 4)  /* unsigned oi */
+  | dh_typemask(ptr, 5)  /* uintptr_t ra */
+};
+
+static TCGHelperInfo info_helper_st64_mmu = {
+.flags = TCG_CALL_NO_WG,
+.typemask = dh_typemask(void, 0)
+  | dh_typemask(env, 1)
+  | dh_typemask(tl, 2)   /* target_ulong addr */
+  | dh_typemask(i64, 3)  /* uint64_t data */
+  | dh_typemask(i32, 4)  /* unsigned oi */
+  | dh_typemask(ptr, 5)  /* uintptr_t ra */
+};
+
 #ifdef CONFIG_TCG_INTERPRETER
 static ffi_type *typecode_to_ffi(int argmask)
 {
@@ -1126,6 +1185,11 @@ static void tcg_context_init(unsigned max_cpus)
 (gpointer)_helpers[i]);
 }
 
+init_call_layout(_helper_ld32_mmu);
+init_call_layout(_helper_ld64_mmu);
+init_call_layout(_helper_st32_mmu);
+init_call_layout(_helper_st64_mmu);
+
 #ifdef CONFIG_TCG_INTERPRETER
 init_ffi_layouts();
 #endif
@@ -5011,6 +5075,397 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
 }
 }
 
+/*
+ * Similarly for qemu_ld/st slow path helpers.
+ * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
+ * using only the provided backend tcg_out_* functions.
+ */
+
+static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
+{
+int ofs = arg_slot_stk_ofs(slot);
+
+/*
+ * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
+ * require extension to uint64_t, adjust the address for uint32_t.
+ */
+if (HOST_BIG_ENDIAN &&
+TCG_TARGET_REG_BITS == 64 &&
+type == TCG_TYPE_I32) {
+ofs += 4;
+}
+return ofs;
+}
+
+static void tcg_out_helper_load_regs(TCGContext *s,
+ unsigned nmov, TCGMovExtend *mov,
+ unsigned ntmp, 

[PATCH v2 05/54] tcg: Split out tcg_out_ext16s

2023-04-10 Thread Richard Henderson
We will need a backend interface for performing 16-bit sign-extend.
Use it in tcg_reg_alloc_op in the meantime.

Signed-off-by: Richard Henderson 
---
 tcg/tcg.c|  7 +++
 tcg/aarch64/tcg-target.c.inc | 13 -
 tcg/arm/tcg-target.c.inc | 10 --
 tcg/i386/tcg-target.c.inc| 16 
 tcg/loongarch64/tcg-target.c.inc | 13 +
 tcg/mips/tcg-target.c.inc| 11 ---
 tcg/ppc/tcg-target.c.inc | 12 +---
 tcg/riscv/tcg-target.c.inc   |  9 +++--
 tcg/s390x/tcg-target.c.inc   | 12 
 tcg/sparc64/tcg-target.c.inc |  7 +++
 tcg/tci/tcg-target.c.inc | 21 -
 11 files changed, 79 insertions(+), 52 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index b02ffc5679..739f92c2ee 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -106,6 +106,7 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg 
ret, TCGReg arg);
 static void tcg_out_movi(TCGContext *s, TCGType type,
  TCGReg ret, tcg_target_long arg);
 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
+static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg 
arg);
 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
@@ -4509,6 +4510,12 @@ static void tcg_reg_alloc_op(TCGContext *s, const TCGOp 
*op)
 case INDEX_op_ext8u_i64:
 tcg_out_ext8u(s, new_args[0], new_args[1]);
 break;
+case INDEX_op_ext16s_i32:
+tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
+break;
+case INDEX_op_ext16s_i64:
+tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
+break;
 default:
 if (def->flags & TCG_OPF_VECTOR) {
 tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index cca91363ce..3527c14d04 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -1424,6 +1424,11 @@ static void tcg_out_ext8s(TCGContext *s, TCGType type, 
TCGReg rd, TCGReg rn)
 tcg_out_sxt(s, type, MO_8, rd, rn);
 }
 
+static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rn)
+{
+tcg_out_sxt(s, type, MO_16, rd, rn);
+}
+
 static inline void tcg_out_uxt(TCGContext *s, MemOp s_bits,
TCGReg rd, TCGReg rn)
 {
@@ -2233,17 +2238,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_rev(s, TCG_TYPE_I32, MO_16, a0, a1);
 if (a2 & TCG_BSWAP_OS) {
 /* Output must be sign-extended. */
-tcg_out_sxt(s, ext, MO_16, a0, a0);
+tcg_out_ext16s(s, ext, a0, a0);
 } else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
 /* Output must be zero-extended, but input isn't. */
 tcg_out_uxt(s, MO_16, a0, a0);
 }
 break;
 
-case INDEX_op_ext16s_i64:
-case INDEX_op_ext16s_i32:
-tcg_out_sxt(s, ext, MO_16, a0, a1);
-break;
 case INDEX_op_ext_i32_i64:
 case INDEX_op_ext32s_i64:
 tcg_out_sxt(s, TCG_TYPE_I64, MO_32, a0, a1);
@@ -2316,6 +2317,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 case INDEX_op_ext8s_i64:
 case INDEX_op_ext8u_i32:
 case INDEX_op_ext8u_i64:
+case INDEX_op_ext16s_i64:
+case INDEX_op_ext16s_i32:
 default:
 g_assert_not_reached();
 }
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index b99f08a54b..cddf977a58 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -975,10 +975,10 @@ tcg_out_ext8u_cond(TCGContext *s, ARMCond cond, TCGReg 
rd, TCGReg rn)
 tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn, 0xff);
 }
 
-static void tcg_out_ext16s(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
+static void tcg_out_ext16s(TCGContext *s, TCGType t, TCGReg rd, TCGReg rn)
 {
 /* sxth */
-tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | rn);
+tcg_out32(s, 0x06bf0070 | (COND_AL << 28) | (rd << 12) | rn);
 }
 
 static void tcg_out_ext16u(TCGContext *s, ARMCond cond, TCGReg rd, TCGReg rn)
@@ -1541,7 +1541,7 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 tcg_out_ext8s(s, TCG_TYPE_I32, datalo, TCG_REG_R0);
 break;
 case MO_SW:
-tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
+tcg_out_ext16s(s, TCG_TYPE_I32, datalo, TCG_REG_R0);
 break;
 default:
 tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
@@ -2249,9 +2249,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_bswap32(s, COND_AL, args[0], args[1]);
 break;
 
-case INDEX_op_ext16s_i32:
-tcg_out_ext16s(s, COND_AL, args[0], args[1]);
-break;
 case 

  1   2   3   >