date:20210921

Re: [PATCH] nvdimm: release the correct device list

2021-09-21 Thread Michael S. Tsirkin

On Fri, Sep 17, 2021 at 02:59:33PM +0200, Igor Mammedov wrote:
> On Mon, 13 Sep 2021 06:40:01 +
> "lizhij...@fujitsu.com"  wrote:
> 
> > ping again
> 
> Michael,
> 
> can you include this in your next pull req, please?

ok

> > 
> > 
> > 
> > On 30/08/2021 09:04, Li Zhijian wrote:
> > > ping
> > >
> > >
> > > On 03/08/2021 12:00, Li, Zhijian wrote:  
> > >> ping
> > >>
> > >> Any body could help to review/queue this patch ?
> > >>
> > >>
> > >>
> > >> On 2021/6/29 22:05, Igor Mammedov wrote:  
> > >>> On Thu, 24 Jun 2021 19:04:15 +0800
> > >>> Li Zhijian  wrote:
> > >>>  
> >  Signed-off-by: Li Zhijian   
> > >>> Reviewed-by: Igor Mammedov 
> > >>>  
> >  ---
> >    hw/acpi/nvdimm.c | 12 ++--
> >    1 file changed, 6 insertions(+), 6 deletions(-)
> > 
> >  diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
> >  index e3d5fe19392..ff317263e85 100644
> >  --- a/hw/acpi/nvdimm.c
> >  +++ b/hw/acpi/nvdimm.c
> >  @@ -355,10 +355,10 @@ nvdimm_build_structure_caps(GArray *structures, 
> >  uint32_t capabilities)
> >      static GArray *nvdimm_build_device_structure(NVDIMMState *state)
> >    {
> >  -    GSList *device_list = nvdimm_get_device_list();
> >  +    GSList *device_list, *list = nvdimm_get_device_list();
> >    GArray *structures = g_array_new(false, true /* clear */, 1);
> >    -    for (; device_list; device_list = device_list->next) {
> >  +    for (device_list = list; device_list; device_list = 
> >  device_list->next) {
> >    DeviceState *dev = device_list->data;
> >      /* build System Physical Address Range Structure. */
> >  @@ -373,7 +373,7 @@ static GArray 
> >  *nvdimm_build_device_structure(NVDIMMState *state)
> >    /* build NVDIMM Control Region Structure. */
> >    nvdimm_build_structure_dcr(structures, dev);
> >    }
> >  -    g_slist_free(device_list);
> >  +    g_slist_free(list);
> >      if (state->persistence) {
> >    nvdimm_build_structure_caps(structures, state->persistence);
> >  @@ -1339,9 +1339,9 @@ static void nvdimm_build_ssdt(GArray 
> >  *table_offsets, GArray *table_data,
> >      void nvdimm_build_srat(GArray *table_data)
> >    {
> >  -    GSList *device_list = nvdimm_get_device_list();
> >  +    GSList *device_list, *list = nvdimm_get_device_list();
> >    -    for (; device_list; device_list = device_list->next) {
> >  +    for (device_list = list; device_list; device_list = 
> >  device_list->next) {
> >    AcpiSratMemoryAffinity *numamem = NULL;
> >    DeviceState *dev = device_list->data;
> >    Object *obj = OBJECT(dev);
> >  @@ -1356,7 +1356,7 @@ void nvdimm_build_srat(GArray *table_data)
> >    build_srat_memory(numamem, addr, size, node,
> >      MEM_AFFINITY_ENABLED | 
> >  MEM_AFFINITY_NON_VOLATILE);
> >    }
> >  -    g_slist_free(device_list);
> >  +    g_slist_free(list);
> >    }
> >      void nvdimm_build_acpi(GArray *table_offsets, GArray *table_data,  
> > >>>
> > >>>  
> > >>
> > >>
> > >>  
> > >

[PATCH 11/14] bsd-user/sysarch: Move to using do_freebsd_arch_sysarch interface

2021-09-21 Thread Warner Losh

Convert the #ifdef'd i386 code to calling the i386 sysarch code we have
living in i386,x86_64/target_arch_sysarch.h do_freebsd_arch_sysarch
rather than having a separate copy. This is in preparation to remove it
entirely.

Signed-Off-By: Warner Losh 
---
 bsd-user/syscall.c | 45 +
 1 file changed, 1 insertion(+), 44 deletions(-)

diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c
index a579d52ede..9bc72501b2 100644
--- a/bsd-user/syscall.c
+++ b/bsd-user/syscall.c
@@ -91,50 +91,7 @@ static abi_long do_obreak(abi_ulong new_brk)
 #if defined(TARGET_I386)
 static abi_long do_freebsd_sysarch(CPUX86State *env, int op, abi_ulong parms)
 {
-abi_long ret = 0;
-abi_ulong val;
-int idx;
-
-switch (op) {
-#ifdef TARGET_ABI32
-case TARGET_FREEBSD_I386_SET_GSBASE:
-case TARGET_FREEBSD_I386_SET_FSBASE:
-if (op == TARGET_FREEBSD_I386_SET_GSBASE)
-#else
-case TARGET_FREEBSD_AMD64_SET_GSBASE:
-case TARGET_FREEBSD_AMD64_SET_FSBASE:
-if (op == TARGET_FREEBSD_AMD64_SET_GSBASE)
-#endif
-idx = R_GS;
-else
-idx = R_FS;
-if (get_user(val, parms, abi_ulong))
-return -TARGET_EFAULT;
-cpu_x86_load_seg(env, idx, 0);
-env->segs[idx].base = val;
-break;
-#ifdef TARGET_ABI32
-case TARGET_FREEBSD_I386_GET_GSBASE:
-case TARGET_FREEBSD_I386_GET_FSBASE:
-if (op == TARGET_FREEBSD_I386_GET_GSBASE)
-#else
-case TARGET_FREEBSD_AMD64_GET_GSBASE:
-case TARGET_FREEBSD_AMD64_GET_FSBASE:
-if (op == TARGET_FREEBSD_AMD64_GET_GSBASE)
-#endif
-idx = R_GS;
-else
-idx = R_FS;
-val = env->segs[idx].base;
-if (put_user(val, parms, abi_ulong))
-return -TARGET_EFAULT;
-break;
-/* XXX handle the others... */
-default:
-ret = -TARGET_EINVAL;
-break;
-}
-return ret;
+do_freebsd_arch_sysarch(env, op, parms);
 }
 #endif
 
-- 
2.32.0

[PATCH 10/14] bsd-user: Add stop_all_tasks

2021-09-21 Thread Warner Losh

Similar to the same function in linux-user: this stops all the current tasks.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
---
 bsd-user/main.c | 9 +
 bsd-user/qemu.h | 1 +
 2 files changed, 10 insertions(+)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index ee84554854..cb5ea40236 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -195,6 +195,15 @@ static void usage(void)
 
 __thread CPUState *thread_cpu;
 
+void stop_all_tasks(void)
+{
+/*
+ * We trust when using NPTL (pthreads) start_exclusive() handles thread
+ * stopping correctly.
+ */
+start_exclusive();
+}
+
 bool qemu_cpu_is_self(CPUState *cpu)
 {
 return thread_cpu == cpu;
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 431c5cfc03..4ee57b91f0 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -103,6 +103,7 @@ typedef struct TaskState {
 } __attribute__((aligned(16))) TaskState;
 
 void init_task_state(TaskState *ts);
+void stop_all_tasks(void);
 extern const char *qemu_uname_release;
 
 /*
-- 
2.32.0

[PATCH 14/14] bsd-user/signal: Create a dummy signal queueing function

2021-09-21 Thread Warner Losh

Create dummy signal queueing function so we can start to integrate other
architectures (at the cost of signals remaining broken) to tame the
dependency graph a bit and to bring in signals in a more controlled
fashion.

Signed-off-by: Warner Losh 
---
 bsd-user/qemu.h   | 1 +
 bsd-user/signal.c | 8 
 2 files changed, 9 insertions(+)

diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 5a2fd87e44..85d1f8fd2a 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -209,6 +209,7 @@ void process_pending_signals(CPUArchState *cpu_env);
 void signal_init(void);
 long do_sigreturn(CPUArchState *env);
 long do_rt_sigreturn(CPUArchState *env);
+int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info);
 abi_long do_sigaltstack(abi_ulong uss_addr, abi_ulong uoss_addr, abi_ulong sp);
 
 /* mmap.c */
diff --git a/bsd-user/signal.c b/bsd-user/signal.c
index ad6d935569..4e7f618944 100644
--- a/bsd-user/signal.c
+++ b/bsd-user/signal.c
@@ -19,6 +19,14 @@
 #include "qemu/osdep.h"
 
 #include "qemu.h"
+/*
+ * Queue a signal so that it will be send to the virtual CPU as soon as
+ * possible.
+ */
+int queue_signal(CPUArchState *env, int sig, target_siginfo_t *info)
+{
+return 1;
+}
 
 void signal_init(void)
 {
-- 
2.32.0

[PATCH 09/14] bsd-user: Remove used from TaskState

2021-09-21 Thread Warner Losh

The used field of TaskState is write only. Eliminate it.

Signed-off-by: Warner Losh 
---
 bsd-user/main.c | 1 -
 bsd-user/qemu.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index 48643eeabc..ee84554854 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -210,7 +210,6 @@ void init_task_state(TaskState *ts)
 {
 int i;
 
-ts->used = 1;
 ts->first_free = ts->sigqueue_table;
 for (i = 0; i < MAX_SIGQUEUE_SIZE - 1; i++) {
 ts->sigqueue_table[i].next = &ts->sigqueue_table[i + 1];
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 22fc3a6c30..431c5cfc03 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -92,7 +92,6 @@ typedef struct TaskState {
 
 struct TaskState *next;
 struct bsd_binprm *bprm;
-int used; /* non zero if used */
 struct image_info *info;
 
 struct emulated_sigtable sigtab[TARGET_NSIG];
-- 
2.32.0

[PATCH 04/14] bsd-user: export get_errno and is_error from syscall.c

2021-09-21 Thread Warner Losh

Make get_errno and is_error global so files other than syscall.c can use
them.

Signed-off-by: Warner Losh 
---
 bsd-user/qemu.h|  4 
 bsd-user/syscall.c | 10 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 522d6c4031..22fc3a6c30 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -235,6 +235,10 @@ extern unsigned long target_dflssiz;
 extern unsigned long target_maxssiz;
 extern unsigned long target_sgrowsiz;
 
+/* syscall.c */
+abi_long get_errno(abi_long ret);
+int is_error(abi_long ret);
+
 /* user access */
 
 #define VERIFY_READ  PAGE_READ
diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c
index 372836d44d..a579d52ede 100644
--- a/bsd-user/syscall.c
+++ b/bsd-user/syscall.c
@@ -33,18 +33,18 @@
 static abi_ulong target_brk;
 static abi_ulong target_original_brk;
 
-static inline abi_long get_errno(abi_long ret)
+abi_long get_errno(abi_long ret)
 {
-if (ret == -1)
+if (ret == -1) {
 /* XXX need to translate host -> target errnos here */
 return -(errno);
-else
-return ret;
+}
+return ret;
 }
 
 #define target_to_host_bitmask(x, tbl) (x)
 
-static inline int is_error(abi_long ret)
+int is_error(abi_long ret)
 {
 return (abi_ulong)ret >= (abi_ulong)(-4096);
 }
-- 
2.32.0

[PATCH 08/14] bsd-user/target_os_elf: If ELF_HWCAP2 is defined, publish it

2021-09-21 Thread Warner Losh

Some architecutres publish AT_HWCAP2 as well as AT_HWCAP. Those
architectures will define this in their target_arch_elf.h files.  If it
is defined, then publish it.

Signed-off-by: Warner Losh 
---
 bsd-user/freebsd/target_os_elf.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/bsd-user/freebsd/target_os_elf.h b/bsd-user/freebsd/target_os_elf.h
index adcffd1ddb..e5ac8e8e50 100644
--- a/bsd-user/freebsd/target_os_elf.h
+++ b/bsd-user/freebsd/target_os_elf.h
@@ -112,6 +112,10 @@ static abi_ulong target_create_elf_tables(abi_ulong p, int 
argc, int envc,
 NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry);
 features = ELF_HWCAP;
 NEW_AUX_ENT(FREEBSD_AT_HWCAP, features);
+#ifdef ELF_HWCAP2
+features = ELF_HWCAP2;
+NEW_AUX_ENT(FREEBSD_AT_HWCAP2, features);
+#endif
 NEW_AUX_ENT(AT_UID, (abi_ulong)getuid());
 NEW_AUX_ENT(AT_EUID, (abi_ulong)geteuid());
 NEW_AUX_ENT(AT_GID, (abi_ulong)getgid());
-- 
2.32.0

Re: [PATCH v6 00/21] Add LoongArch linux-user emulation support

2021-09-21 Thread Song Gao

Hi, Richard.

On 09/21/2021 05:17 AM, Richard Henderson wrote:
> On 9/17/21 1:12 AM, Song Gao wrote:
>> The 'o32' code has been deleted at the latest kernel [1]. This series only 
>> support
>> linux-user emulation.
> 
> I have now reviewed all but the linux-user/ portion.
> 
Thank you!
> I see that kernel upstreaming is in progress,
> 
> https://lore.kernel.org/linux-kernel/20210917035736.3934017-1-chenhua...@loongson.cn/
> 
> so hopefully this will be resolved soon.
> 
> Have you started working on system mode support for LoongArch, so that one 
> may run that kernel?
> Yes. We already support running the old kernel, but we don't support running 
> the latest kernel yet.

Song Gao
thanks
> 
> r~

[PATCH 03/14] bsd-user: TARGET_RESET define is unused, remove it

2021-09-21 Thread Warner Losh

Signed-off-by: Warner Losh 
---
 bsd-user/i386/target_arch_cpu.h   | 2 --
 bsd-user/x86_64/target_arch_cpu.h | 2 --
 2 files changed, 4 deletions(-)

diff --git a/bsd-user/i386/target_arch_cpu.h b/bsd-user/i386/target_arch_cpu.h
index 978e8066af..b28602adbb 100644
--- a/bsd-user/i386/target_arch_cpu.h
+++ b/bsd-user/i386/target_arch_cpu.h
@@ -23,8 +23,6 @@
 
 #define TARGET_DEFAULT_CPU_MODEL "qemu32"
 
-#define TARGET_CPU_RESET(cpu)
-
 static inline void target_cpu_init(CPUX86State *env,
 struct target_pt_regs *regs)
 {
diff --git a/bsd-user/x86_64/target_arch_cpu.h 
b/bsd-user/x86_64/target_arch_cpu.h
index 5f5ee602f9..5172b230f0 100644
--- a/bsd-user/x86_64/target_arch_cpu.h
+++ b/bsd-user/x86_64/target_arch_cpu.h
@@ -23,8 +23,6 @@
 
 #define TARGET_DEFAULT_CPU_MODEL "qemu64"
 
-#define TARGET_CPU_RESET(cpu)
-
 static inline void target_cpu_init(CPUX86State *env,
 struct target_pt_regs *regs)
 {
-- 
2.32.0

[PATCH 07/14] bsd-user/target_os_elf.h: Remove fallback ELF_HWCAP and reorder

2021-09-21 Thread Warner Losh

All architectures have a ELF_HWCAP, so remove the fallback ifdef.
Place ELF_HWCAP in the same order as on native FreeBSD.

Signed-off-by: Warner Losh 
---
 bsd-user/freebsd/target_os_elf.h | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/bsd-user/freebsd/target_os_elf.h b/bsd-user/freebsd/target_os_elf.h
index 2d03a883aa..adcffd1ddb 100644
--- a/bsd-user/freebsd/target_os_elf.h
+++ b/bsd-user/freebsd/target_os_elf.h
@@ -38,10 +38,6 @@
 #define ELF_PLATFORM (NULL)
 #endif
 
-#ifndef ELF_HWCAP
-#define ELF_HWCAP 0
-#endif
-
 /* XXX Look at the other conflicting AT_* values. */
 #define FREEBSD_AT_NCPUS 19
 #define FREEBSD_AT_HWCAP 25
@@ -114,12 +110,12 @@ static abi_ulong target_create_elf_tables(abi_ulong p, 
int argc, int envc,
 NEW_AUX_ENT(AT_FLAGS, (abi_ulong)0);
 NEW_AUX_ENT(FREEBSD_AT_NCPUS, (abi_ulong)bsd_get_ncpu());
 NEW_AUX_ENT(AT_ENTRY, load_bias + exec->e_entry);
+features = ELF_HWCAP;
+NEW_AUX_ENT(FREEBSD_AT_HWCAP, features);
 NEW_AUX_ENT(AT_UID, (abi_ulong)getuid());
 NEW_AUX_ENT(AT_EUID, (abi_ulong)geteuid());
 NEW_AUX_ENT(AT_GID, (abi_ulong)getgid());
 NEW_AUX_ENT(AT_EGID, (abi_ulong)getegid());
-features = ELF_HWCAP;
-NEW_AUX_ENT(FREEBSD_AT_HWCAP, features);
 target_auxents = sp; /* Note where the aux entries are in the target */
 #ifdef ARCH_DLINFO
 /*
-- 
2.32.0

[PATCH 06/14] bsd-user: move TARGET_MC_GET_CLEAR_RET to target_os_signal.h

2021-09-21 Thread Warner Losh

Move TARGET_MC_GET_CLEAR_RET to freebsd/target_os_signal.h since it's
FreeBSD-wide.

Signed-off-by: Warner Losh 
---
 bsd-user/freebsd/target_os_signal.h  | 3 +++
 bsd-user/i386/target_arch_signal.h   | 2 --
 bsd-user/x86_64/target_arch_signal.h | 2 --
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/bsd-user/freebsd/target_os_signal.h 
b/bsd-user/freebsd/target_os_signal.h
index 3ed454e086..9fcdfce003 100644
--- a/bsd-user/freebsd/target_os_signal.h
+++ b/bsd-user/freebsd/target_os_signal.h
@@ -1,6 +1,9 @@
 #ifndef _TARGET_OS_SIGNAL_H_
 #define _TARGET_OS_SIGNAL_H_
 
+/* FreeBSD's sys/ucontex.h defines this */
+#define TARGET_MC_GET_CLEAR_RET 0x0001
+
 #include "target_os_siginfo.h"
 #include "target_arch_signal.h"
 
diff --git a/bsd-user/i386/target_arch_signal.h 
b/bsd-user/i386/target_arch_signal.h
index 9812c6b034..a90750d602 100644
--- a/bsd-user/i386/target_arch_signal.h
+++ b/bsd-user/i386/target_arch_signal.h
@@ -27,8 +27,6 @@
 #define TARGET_MINSIGSTKSZ  (512 * 4)   /* min sig stack size */
 #define TARGET_SIGSTKSZ (MINSIGSTKSZ + 32768)   /* recommended size */
 
-#define TARGET_MC_GET_CLEAR_RET 0x0001
-
 struct target_sigcontext {
 /* to be added */
 };
diff --git a/bsd-user/x86_64/target_arch_signal.h 
b/bsd-user/x86_64/target_arch_signal.h
index 4c1ff0e5ba..4bb753b08b 100644
--- a/bsd-user/x86_64/target_arch_signal.h
+++ b/bsd-user/x86_64/target_arch_signal.h
@@ -27,8 +27,6 @@
 #define TARGET_MINSIGSTKSZ  (512 * 4)   /* min sig stack size */
 #define TARGET_SIGSTKSZ (MINSIGSTKSZ + 32768)   /* recommended size */
 
-#define TARGET_MC_GET_CLEAR_RET 0x0001
-
 struct target_sigcontext {
 /* to be added */
 };
-- 
2.32.0

[PATCH 01/14] bsd-user/target_os-user.h: Remove support for FreeBSD older than 12.0

2021-09-21 Thread Warner Losh

Signed-off-by: Warner Losh 
---
 bsd-user/freebsd/target_os_user.h | 100 +-
 1 file changed, 1 insertion(+), 99 deletions(-)

diff --git a/bsd-user/freebsd/target_os_user.h 
b/bsd-user/freebsd/target_os_user.h
index 95b1fa9f99..19892c5071 100644
--- a/bsd-user/freebsd/target_os_user.h
+++ b/bsd-user/freebsd/target_os_user.h
@@ -61,15 +61,7 @@ struct target_sockaddr_storage {
 /*
  * from sys/user.h
  */
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 1200031
 #define TARGET_KI_NSPARE_INT2
-#elif defined(__FreeBSD_version) && __FreeBSD_version >= 110
-#define TARGET_KI_NSPARE_INT4
-#elif defined(__FreeBSD_version) && __FreeBSD_version >= 100
-#define TARGET_KI_NSPARE_INT7
-#else
-#define TARGET_KI_NSPARE_INT9
-#endif /* ! __FreeBSD_version >= 100 */
 #define TARGET_KI_NSPARE_LONG   12
 #define TARGET_KI_NSPARE_PTR6
 
@@ -116,11 +108,7 @@ struct target_kinfo_proc {
 int32_t ki_tsid;/* Terminal session ID */
 int16_t ki_jobc;/* job control counter */
 int16_t ki_spare_short1;/* unused (just here for alignment) */
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 1200031
 int32_t ki_tdev__freebsd11; /* controlling tty dev */
-#else
-int32_t ki_tdev;/* controlling tty dev */
-#endif
 target_sigset_t ki_siglist; /* Signals arrived but not delivered */
 target_sigset_t ki_sigmask; /* Current signal mask */
 target_sigset_t ki_sigignore;   /* Signals being ignored */
@@ -164,45 +152,24 @@ struct target_kinfo_proc {
 int8_t  ki_nice;/* Process "nice" value */
 charki_lock;/* Process lock (prevent swap) count */
 charki_rqindex; /* Run queue index */
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 110
 u_char  ki_oncpu_old;   /* Which cpu we are on (legacy) */
 u_char  ki_lastcpu_old; /* Last cpu we were on (legacy) */
-#else
-u_char  ki_oncpu;   /* Which cpu we are on */
-u_char  ki_lastcpu; /* Last cpu we were on */
-#endif /* ! __FreeBSD_version >= 110 */
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 90
 charki_tdname[TARGET_TDNAMLEN + 1];  /* thread name */
-#else
-charki_ocomm[TARGET_TDNAMLEN + 1];   /* thread name */
-#endif /* ! __FreeBSD_version >= 90 */
 charki_wmesg[TARGET_WMESGLEN + 1];   /* wchan message */
 charki_login[TARGET_LOGNAMELEN + 1]; /* setlogin name */
 charki_lockname[TARGET_LOCKNAMELEN + 1]; /* lock name */
 charki_comm[TARGET_COMMLEN + 1]; /* command name */
 charki_emul[TARGET_KI_EMULNAMELEN + 1];  /* emulation name */
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 90
 charki_loginclass[TARGET_LOGINCLASSLEN + 1]; /* login class */
-#endif /* ! __FreeBSD_version >= 90 */
 
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 90
 charki_sparestrings[50];/* spare string space */
-#else
-charki_sparestrings[68];/* spare string space */
-#endif /* ! __FreeBSD_version >= 90 */
 int32_t ki_spareints[TARGET_KI_NSPARE_INT]; /* spare room for growth */
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 1200031
- uint64_t ki_tdev;  /* controlling tty dev */
-#endif
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 110
+uint64_t ki_tdev;  /* controlling tty dev */
 int32_t ki_oncpu;   /* Which cpu we are on */
 int32_t ki_lastcpu; /* Last cpu we were on */
 int32_t ki_tracer;  /* Pid of tracing process */
-#endif /* __FreeBSD_version >= 110 */
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 90
 int32_t ki_flag2;   /* P2_* flags */
 int32_t ki_fibnum;  /* Default FIB number */
-#endif /* ! __FreeBSD_version >= 90 */
 uint32_tki_cr_flags;/* Credential flags */
 int32_t ki_jid; /* Process jail ID */
 int32_t ki_numthreads;  /* XXXKSE number of threads in total */
@@ -234,18 +201,8 @@ struct target_kinfo_file {
 int32_t  kf_flags;  /* Flags. */
 int32_t  kf_pad0;  /* Round to 64 bit alignment. */
 int64_t  kf_offset;  /* Seek location. */
-#if defined(__FreeBSD_version) && __FreeBSD_version < 1200031
-int32_t  kf_vnode_type;  /* Vnode type. */
-int32_t  kf_sock_domain;  /* Socket domain. */
-int32_t  kf_sock_type;  /* Socket type. */
-int32_t  kf_sock_protocol; /* Socket protocol. */
-struct target_sockaddr_storage kf_sa_local; /* Socket address. */
-struct target_sockaddr_storage kf_sa_peer; /* Peer address. */
-#endif
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 90
 union {
 struct {
-#if defined(__FreeBSD_version) && __FreeBSD_version >= 1200031
 uint32_

[PATCH 13/14] bsd-user: Rename sigqueue to qemu_sigqueue

2021-09-21 Thread Warner Losh

To avoid a name clash with FreeBSD's sigqueue data structure in
signalvar.h, rename sigqueue to qemu_sigqueue. This sturcture
is currently defined, but unused.

Signed-off-by: Warner Losh 
---
 bsd-user/qemu.h | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 3dde381d5d..5a2fd87e44 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -73,15 +73,15 @@ struct image_info {
 
 #define MAX_SIGQUEUE_SIZE 1024
 
-struct sigqueue {
-struct sigqueue *next;
+struct qemu_sigqueue {
+struct qemu_sigqueue *next;
+target_siginfo_t info;
 };
 
 struct emulated_sigtable {
 int pending; /* true if signal is pending */
-struct sigqueue *first;
-/* in order to always have memory for the first signal, we put it here */
-struct sigqueue info;
+struct qemu_sigqueue *first;
+struct qemu_sigqueue info; /* Put first signal info here */
 };
 
 /*
@@ -95,8 +95,8 @@ typedef struct TaskState {
 struct image_info *info;
 
 struct emulated_sigtable sigtab[TARGET_NSIG];
-struct sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */
-struct sigqueue *first_free; /* first free siginfo queue entry */
+struct qemu_sigqueue sigqueue_table[MAX_SIGQUEUE_SIZE]; /* siginfo queue */
+struct qemu_sigqueue *first_free; /* first free siginfo queue entry */
 int signal_pending; /* non zero if a signal may be pending */
 
 uint8_t stack[];
-- 
2.32.0

[PATCH 05/14] bsd-user/errno_defs.h: Add internal error numbers

2021-09-21 Thread Warner Losh

From: Stacey Son 

To emulate signals and interrupted system calls, we need to have the
same mechanisms we have in the kernel, including these errno values.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
---
 bsd-user/errno_defs.h | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/bsd-user/errno_defs.h b/bsd-user/errno_defs.h
index 1efa502a12..b538dd93da 100644
--- a/bsd-user/errno_defs.h
+++ b/bsd-user/errno_defs.h
@@ -1,6 +1,3 @@
-/*  $OpenBSD: errno.h,v 1.20 2007/09/03 14:37:52 millert Exp $  */
-/*  $NetBSD: errno.h,v 1.10 1996/01/20 01:33:53 jtc Exp $   */
-
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *  The Regents of the University of California.  All rights reserved.
@@ -37,6 +34,9 @@
  *  @(#)errno.h 8.5 (Berkeley) 1/21/94
  */
 
+#ifndef _ERRNO_DEFS_H_
+#define _ERRNO_DEFS_H_
+
 #define TARGET_EPERM1   /* Operation not permitted */
 #define TARGET_ENOENT   2   /* No such file or directory */
 #define TARGET_ESRCH3   /* No such process */
@@ -147,3 +147,11 @@
 #define TARGET_EIDRM89  /* Identifier removed */
 #define TARGET_ENOMSG   90  /* No message of desired type 
*/
 #define TARGET_ELAST90  /* Must be equal largest errno 
*/
+
+/* Internal errors: */
+#define TARGET_EJUSTRETURN  254 /* Just return without
+   modifing regs */
+#define TARGET_ERESTART 255 /* Restart syscall */
+#define TARGET_ERESTARTSYS  TARGET_ERESTART /* Linux compat */
+
+#endif /* !  _ERRNO_DEFS_H_ */
-- 
2.32.0

[PATCH 00/14] bsd-user: misc cleanup for aarch64 import

2021-09-21 Thread Warner Losh

Prepare for aarch64 support (the next architecture to be upstreamed). As the
aarch64 emulation is more complete, it relies on a number of different items.
In some cases, I've pulled in the full support from bsd-user fork. In other
cases I've created a simple stub (as is the case for signals, which have
independent changes pending, so I wanted to be as minimal as possible.  Since
all pre-12.2 support was purged from the bsd-user fork, go ahead and remove it
here. FreeBSD 11.x goes ouft of support at the end of the month. Remove what
little multi-version support that's in upstream.

The aarch64 patch set will be published soon after some of the dust settles.

Stacey Son (1):
  bsd-user/errno_defs.h: Add internal error numbers

Warner Losh (13):
  bsd-user/target_os-user.h: Remove support for FreeBSD older than 12.0
  bsd-user/strace.list: Remove support for FreeBSD versions older than
12.0
  bsd-user: TARGET_RESET define is unused, remove it
  bsd-user: export get_errno and is_error from syscall.c
  bsd-user: move TARGET_MC_GET_CLEAR_RET to target_os_signal.h
  bsd-user/target_os_elf.h: Remove fallback ELF_HWCAP and reorder
  bsd-user/target_os_elf: If ELF_HWCAP2 is defined, publish it
  bsd-user: Remove used from TaskState
  bsd-user: Add stop_all_tasks
  bsd-user/sysarch: Move to using do_freebsd_arch_sysarch interface
  bsd-user/sysarch: Provide a per-arch framework for sysarch syscall
  bsd-user: Rename sigqueue to qemu_sigqueue
  bsd-user/signal: Create a dummy signal queueing function

 bsd-user/errno_defs.h|  14 +++-
 bsd-user/freebsd/meson.build |   3 +
 bsd-user/freebsd/os-sys.c|  28 
 bsd-user/freebsd/strace.list |  11 ---
 bsd-user/freebsd/target_os_elf.h |  12 ++--
 bsd-user/freebsd/target_os_signal.h  |   3 +
 bsd-user/freebsd/target_os_user.h| 100 +--
 bsd-user/i386/target_arch_cpu.h  |   2 -
 bsd-user/i386/target_arch_signal.h   |   2 -
 bsd-user/main.c  |  10 ++-
 bsd-user/meson.build |   6 ++
 bsd-user/qemu.h  |  24 ---
 bsd-user/signal.c|   8 +++
 bsd-user/syscall.c   |  60 ++--
 bsd-user/x86_64/target_arch_cpu.h|   2 -
 bsd-user/x86_64/target_arch_signal.h |   2 -
 16 files changed, 96 insertions(+), 191 deletions(-)
 create mode 100644 bsd-user/freebsd/meson.build
 create mode 100644 bsd-user/freebsd/os-sys.c

-- 
2.32.0

[PATCH 02/14] bsd-user/strace.list: Remove support for FreeBSD versions older than 12.0

2021-09-21 Thread Warner Losh

Signed-off-by: Warner Losh 
---
 bsd-user/freebsd/strace.list | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/bsd-user/freebsd/strace.list b/bsd-user/freebsd/strace.list
index b01b5f36e8..275d2dbe27 100644
--- a/bsd-user/freebsd/strace.list
+++ b/bsd-user/freebsd/strace.list
@@ -33,10 +33,6 @@
 { TARGET_FREEBSD_NR___syscall, "__syscall", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR___sysctl, "__sysctl", NULL, print_sysctl, NULL },
 { TARGET_FREEBSD_NR__umtx_op, "_umtx_op", "%s(%#x, %d, %d, %#x, %#x)", NULL, 
NULL },
-#if defined(__FreeBSD_version) && __FreeBSD_version < 100
-{ TARGET_FREEBSD_NR__umtx_lock, "__umtx_lock", NULL, NULL, NULL },
-{ TARGET_FREEBSD_NR__umtx_unlock, "__umtx_unlock", NULL, NULL, NULL },
-#endif
 { TARGET_FREEBSD_NR_accept, "accept", "%s(%d,%#x,%#x)", NULL, NULL },
 { TARGET_FREEBSD_NR_accept4, "accept4", "%s(%d,%d,%#x,%#x)", NULL, NULL },
 { TARGET_FREEBSD_NR_access, "access", "%s(\"%s\",%#o)", NULL, NULL },
@@ -49,10 +45,6 @@
 { TARGET_FREEBSD_NR_cap_fcntls_get, "cap_fcntls_get", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_cap_fcntls_limit, "cap_fcntls_limit", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_cap_getmode, "cap_getmode", NULL, NULL, NULL },
-#if defined(__FreeBSD_version) && __FreeBSD_version < 100
-{ TARGET_FREEBSD_NR_cap_getrights, "cap_getrights", NULL, NULL, NULL },
-{ TARGET_FREEBSD_NR_cap_new, "cap_new", NULL, NULL, NULL },
-#endif
 { TARGET_FREEBSD_NR_cap_ioctls_get, "cap_ioctls_get", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_cap_ioctls_limit, "cap_ioctls_limit", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_cap_rights_limit, "cap_rights_limit", NULL, NULL, NULL },
@@ -146,9 +138,6 @@
 { TARGET_FREEBSD_NR_freebsd11_kevent, "freebsd11_kevent", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_kevent, "kevent", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_kill, "kill", NULL, NULL, NULL },
-#if defined(__FreeBSD_version) && __FreeBSD_version < 100
-{ TARGET_FREEBSD_NR_killpg, "killpg", NULL, NULL, NULL },
-#endif
 { TARGET_FREEBSD_NR_kqueue, "kqueue", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_ktrace, "ktrace", NULL, NULL, NULL },
 { TARGET_FREEBSD_NR_lchown, "lchown", NULL, NULL, NULL },
-- 
2.32.0

[PATCH 12/14] bsd-user/sysarch: Provide a per-arch framework for sysarch syscall

2021-09-21 Thread Warner Losh

Add the missing glue to pull in do_freebsd_sysarch to call
do_freebsd_arch_sysarch. Put it in os-sys.c, which will be used for
sysctl and sysarch system calls because they are mostly arch specific.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
---
 bsd-user/freebsd/meson.build |  3 +++
 bsd-user/freebsd/os-sys.c| 28 
 bsd-user/meson.build |  6 ++
 bsd-user/qemu.h  |  3 +++
 bsd-user/syscall.c   |  7 ---
 5 files changed, 40 insertions(+), 7 deletions(-)
 create mode 100644 bsd-user/freebsd/meson.build
 create mode 100644 bsd-user/freebsd/os-sys.c

diff --git a/bsd-user/freebsd/meson.build b/bsd-user/freebsd/meson.build
new file mode 100644
index 00..4b69cca7b9
--- /dev/null
+++ b/bsd-user/freebsd/meson.build
@@ -0,0 +1,3 @@
+bsd_user_ss.add(files(
+  'os-sys.c',
+))
diff --git a/bsd-user/freebsd/os-sys.c b/bsd-user/freebsd/os-sys.c
new file mode 100644
index 00..756b024305
--- /dev/null
+++ b/bsd-user/freebsd/os-sys.c
@@ -0,0 +1,28 @@
+/*
+ *  FreeBSD sysctl() and sysarch() system call emulation
+ *
+ *  Copyright (c) 2013-15 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#include "qemu.h"
+#include "target_arch_sysarch.h"
+
+/* sysarch() is architecture dependent. */
+abi_long do_freebsd_sysarch(void *cpu_env, abi_long arg1, abi_long arg2)
+{
+
+return do_freebsd_arch_sysarch(cpu_env, arg1, arg2);
+}
diff --git a/bsd-user/meson.build b/bsd-user/meson.build
index 0369549340..561913de05 100644
--- a/bsd-user/meson.build
+++ b/bsd-user/meson.build
@@ -8,3 +8,9 @@ bsd_user_ss.add(files(
   'syscall.c',
   'uaccess.c',
 ))
+
+# Pull in the OS-specific build glue, if any
+if fs.exists(targetos)
+   subdir(targetos)
+endif
+
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 4ee57b91f0..3dde381d5d 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -239,6 +239,9 @@ extern unsigned long target_sgrowsiz;
 abi_long get_errno(abi_long ret);
 int is_error(abi_long ret);
 
+/* os-sys.c */
+abi_long do_freebsd_sysarch(void *cpu_env, abi_long arg1, abi_long arg2);
+
 /* user access */
 
 #define VERIFY_READ  PAGE_READ
diff --git a/bsd-user/syscall.c b/bsd-user/syscall.c
index 9bc72501b2..9f51563abd 100644
--- a/bsd-user/syscall.c
+++ b/bsd-user/syscall.c
@@ -88,13 +88,6 @@ static abi_long do_obreak(abi_ulong new_brk)
 return 0;
 }
 
-#if defined(TARGET_I386)
-static abi_long do_freebsd_sysarch(CPUX86State *env, int op, abi_ulong parms)
-{
-do_freebsd_arch_sysarch(env, op, parms);
-}
-#endif
-
 #ifdef __FreeBSD__
 /*
  * XXX this uses the undocumented oidfmt interface to find the kind of
-- 
2.32.0

[PATCH v2 2/3] QIOChannelSocket: Implement io_async_write & io_async_flush

2021-09-21 Thread Leonardo Bras

Implement the new optional callbacks io_async_write and io_async_flush on
QIOChannelSocket, but enables it only when MSG_ZEROCOPY feature is
available in the host kernel, and TCP sockets are used.

qio_channel_socket_writev() contents were moved to a helper function
__qio_channel_socket_writev() which accepts an extra 'flag' argument.
This helper function is used to implement qio_channel_socket_writev(), with
flags = 0, keeping it's behavior unchanged, and
qio_channel_socket_async_writev() with flags = MSG_ZEROCOPY.

qio_channel_socket_async_flush() was implemented by reading the socket's error
queue, which will have information on MSG_ZEROCOPY send completion.
There is no need to worry with re-sending packets in case any error happens, as
MSG_ZEROCOPY only works with TCP and it will re-tranmsmit if any error ocurs.

Notes on using async_write():
- As MSG_ZEROCOPY tells the kernel to use the same user buffer to avoid copying,
some caution is necessary to avoid overwriting any buffer before it's sent.
If something like this happen, a newer version of the buffer may be sent 
instead.
- If this is a problem, it's recommended to use async_flush() before freeing or
re-using the buffer.

.
Signed-off-by: Leonardo Bras 
---
 include/io/channel-socket.h |   2 +
 io/channel-socket.c | 145 ++--
 2 files changed, 140 insertions(+), 7 deletions(-)

diff --git a/include/io/channel-socket.h b/include/io/channel-socket.h
index e747e63514..4d1be0637a 100644
--- a/include/io/channel-socket.h
+++ b/include/io/channel-socket.h
@@ -47,6 +47,8 @@ struct QIOChannelSocket {
 socklen_t localAddrLen;
 struct sockaddr_storage remoteAddr;
 socklen_t remoteAddrLen;
+ssize_t async_queued;
+ssize_t async_sent;
 };
 
 
diff --git a/io/channel-socket.c b/io/channel-socket.c
index 606ec97cf7..128fab4cd2 100644
--- a/io/channel-socket.c
+++ b/io/channel-socket.c
@@ -26,9 +26,23 @@
 #include "io/channel-watch.h"
 #include "trace.h"
 #include "qapi/clone-visitor.h"
+#ifdef CONFIG_LINUX
+#include 
+#include 
+#endif
 
 #define SOCKET_MAX_FDS 16
 
+static ssize_t qio_channel_socket_async_writev(QIOChannel *ioc,
+   const struct iovec *iov,
+   size_t niov,
+   int *fds,
+   size_t nfds,
+   Error **errp);
+
+static void qio_channel_socket_async_flush(QIOChannel *ioc,
+   Error **errp);
+
 SocketAddress *
 qio_channel_socket_get_local_address(QIOChannelSocket *ioc,
  Error **errp)
@@ -55,6 +69,8 @@ qio_channel_socket_new(void)
 
 sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
 sioc->fd = -1;
+sioc->async_queued = 0;
+sioc->async_sent = 0;
 
 ioc = QIO_CHANNEL(sioc);
 qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
@@ -140,6 +156,7 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
 Error **errp)
 {
 int fd;
+int ret, v = 1;
 
 trace_qio_channel_socket_connect_sync(ioc, addr);
 fd = socket_connect(addr, errp);
@@ -154,6 +171,19 @@ int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
 return -1;
 }
 
+#ifdef CONFIG_LINUX
+if (addr->type != SOCKET_ADDRESS_TYPE_INET) {
+return 0;
+}
+
+ret = qemu_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v));
+if (ret >= 0) {
+QIOChannelClass *klass = QIO_CHANNEL_GET_CLASS(ioc);
+klass->io_async_writev = qio_channel_socket_async_writev;
+klass->io_async_flush = qio_channel_socket_async_flush;
+}
+#endif
+
 return 0;
 }
 
@@ -520,12 +550,13 @@ static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
 return ret;
 }
 
-static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
- const struct iovec *iov,
- size_t niov,
- int *fds,
- size_t nfds,
- Error **errp)
+static ssize_t __qio_channel_socket_writev(QIOChannel *ioc,
+   const struct iovec *iov,
+   size_t niov,
+   int *fds,
+   size_t nfds,
+   int flags,
+   Error **errp)
 {
 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
 ssize_t ret;
@@ -558,7 +589,7 @@ static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
 }
 
  retry:
-ret = sendmsg(sioc->fd, &msg, 0);
+ret = sendmsg(sioc->

[PATCH v2 3/3] multifd: Send using asynchronous write on nocomp to send RAM pages.

2021-09-21 Thread Leonardo Bras

Change multifd nocomp version to use asynchronous write for RAM pages, and
benefit of MSG_ZEROCOPY when it's available.

The asynchronous flush happens on cleanup only, before destroying the 
QIOChannel.

This will work fine on RAM migration because the RAM pages are not usually 
freed,
and there is no problem on changing the pages content between async_send() and
the actual sending of the buffer, because this change will dirty the page and
cause it to be re-sent on a next iteration anyway.

Signed-off-by: Leonardo Bras 
---
 migration/multifd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/migration/multifd.c b/migration/multifd.c
index 377da78f5b..d247207a0a 100644
--- a/migration/multifd.c
+++ b/migration/multifd.c
@@ -105,7 +105,7 @@ static int nocomp_send_prepare(MultiFDSendParams *p, 
uint32_t used,
  */
 static int nocomp_send_write(MultiFDSendParams *p, uint32_t used, Error **errp)
 {
-return qio_channel_writev_all(p->c, p->pages->iov, used, errp);
+return qio_channel_async_writev_all(p->c, p->pages->iov, used, errp);
 }
 
 /**
@@ -546,6 +546,7 @@ void multifd_save_cleanup(void)
 MultiFDSendParams *p = &multifd_send_state->params[i];
 Error *local_err = NULL;
 
+qio_channel_async_flush(p->c, NULL);
 socket_send_channel_destroy(p->c);
 p->c = NULL;
 qemu_mutex_destroy(&p->mutex);
-- 
2.33.0

[PATCH v2 0/3] QIOChannel async_write & async_flush + MSG_ZEROCOPY + multifd

2021-09-21 Thread Leonardo Bras

This patch series intends to enable MSG_ZEROCOPY in QIOChannel, and make
use of it for multifd migration performance improvement.

Patch #1 creates new callbacks for QIOChannel, allowing the implementation
of asynchronous writing.

Patch #2 implements async_write and async_flush on QIOChannelSocket,

Patch #3 Makes use of async_write + async_flush to enable MSG_ZEROCOPY
for migration using multifd nocomp. 


Results:
So far, the resource usage of __sys_sendmsg() reduced 15 times, and the
overall migration took 13-18% less time, based in synthetic workload.

The objective is to reduce migration time in hosts with heavy cpu usage.

---
Changes since v1:
- Reimplemented the patchset using async_write + async_flush approach.
- Implemented a flush to be able to tell whenever all data was written.

Leonardo Bras (3):
  QIOCHannel: Add io_async_writev & io_async_flush callbacks
  QIOChannelSocket: Implement io_async_write & io_async_flush
  multifd: Send using asynchronous write on nocomp to send RAM pages.

 include/io/channel-socket.h |   2 +
 include/io/channel.h|  93 +++
 io/channel-socket.c | 145 ++--
 io/channel.c|  66 
 migration/multifd.c |   3 +-
 5 files changed, 271 insertions(+), 38 deletions(-)

-- 
2.33.0

[PATCH v2 1/3] QIOCHannel: Add io_async_writev & io_async_flush callbacks

2021-09-21 Thread Leonardo Bras

Adds io_async_writev and io_async_flush as optional callback to QIOChannelClass,
allowing the implementation of asynchronous writes by subclasses.

How to use them:
- Write data using qio_channel_async_writev(),
- Wait write completion with qio_channel_async_flush().

Notes:
Some asynchronous implementations may benefit from zerocopy mechanisms, so it's
recommended to keep the write buffer untouched until the return of
qio_channel_async_flush().

As the new callbacks are optional, if a subclass does not implement them
there will be a fallback to the mandatory synchronous implementation:
- io_async_writev will fallback to io_writev,
- io_async_flush will return without changing anything.
This makes simpler for the user to make use of the asynchronous implementation.

Also, some functions like qio_channel_writev_full_all() were adapted to
offer an async version, and make better use of the new callbacks.

Signed-off-by: Leonardo Bras 
---
 include/io/channel.h | 93 +---
 io/channel.c | 66 ---
 2 files changed, 129 insertions(+), 30 deletions(-)

diff --git a/include/io/channel.h b/include/io/channel.h
index 88988979f8..74f2e3ae8a 100644
--- a/include/io/channel.h
+++ b/include/io/channel.h
@@ -136,6 +136,14 @@ struct QIOChannelClass {
   IOHandler *io_read,
   IOHandler *io_write,
   void *opaque);
+ssize_t (*io_async_writev)(QIOChannel *ioc,
+   const struct iovec *iov,
+   size_t niov,
+   int *fds,
+   size_t nfds,
+   Error **errp);
+   void (*io_async_flush)(QIOChannel *ioc,
+  Error **errp);
 };
 
 /* General I/O handling functions */
@@ -255,12 +263,17 @@ ssize_t qio_channel_readv_full(QIOChannel *ioc,
  * or QIO_CHANNEL_ERR_BLOCK if no data is can be sent
  * and the channel is non-blocking
  */
-ssize_t qio_channel_writev_full(QIOChannel *ioc,
-const struct iovec *iov,
-size_t niov,
-int *fds,
-size_t nfds,
-Error **errp);
+ssize_t __qio_channel_writev_full(QIOChannel *ioc,
+  const struct iovec *iov,
+  size_t niov,
+  int *fds,
+  size_t nfds,
+  bool async,
+  Error **errp);
+#define qio_channel_writev_full(ioc, iov, niov, fds, nfds, errp) \
+__qio_channel_writev_full(ioc, iov, niov, fds, nfds, false, errp)
+#define qio_channel_async_writev_full(ioc, iov, niov, fds, nfds, errp) \
+__qio_channel_writev_full(ioc, iov, niov, fds, nfds, true, errp)
 
 /**
  * qio_channel_readv_all_eof:
@@ -339,10 +352,15 @@ int qio_channel_readv_all(QIOChannel *ioc,
  *
  * Returns: 0 if all bytes were written, or -1 on error
  */
-int qio_channel_writev_all(QIOChannel *ioc,
-   const struct iovec *iov,
-   size_t niov,
-   Error **erp);
+int __qio_channel_writev_all(QIOChannel *ioc,
+ const struct iovec *iov,
+ size_t niov,
+ bool async,
+ Error **erp);
+#define qio_channel_writev_all(ioc, iov, niov, erp) \
+__qio_channel_writev_all(ioc, iov, niov, false, erp)
+#define qio_channel_async_writev_all(ioc, iov, niov, erp) \
+__qio_channel_writev_all(ioc, iov, niov, true, erp)
 
 /**
  * qio_channel_readv:
@@ -849,10 +867,55 @@ int qio_channel_readv_full_all(QIOChannel *ioc,
  * Returns: 0 if all bytes were written, or -1 on error
  */
 
-int qio_channel_writev_full_all(QIOChannel *ioc,
-const struct iovec *iov,
-size_t niov,
-int *fds, size_t nfds,
-Error **errp);
+int __qio_channel_writev_full_all(QIOChannel *ioc,
+  const struct iovec *iov,
+  size_t niov,
+  int *fds, size_t nfds,
+  bool async, Error **errp);
+#define qio_channel_writev_full_all(ioc, iov, niov, fds, nfds, errp) \
+__qio_channel_writev_full_all(ioc, iov, niov, fds, nfds, false, errp)
+#define qio_channel_async_writev_full_all(ioc, iov, niov, fds, nfds, errp) \
+__qio_channel_writev_full_all(ioc, iov, niov, fds, nfds, true, errp)
+
+/**
+ * qio_channel_async_writev:
+ * @ioc: the channel object
+ * @iov: the array of memory regions to write data from
+ * @niov: the length of the @iov array
+ * @fds: an array of file ha

[PATCH v2 7/9] bsd-user/mmap.c: Don't mmap fd == -1 independently from MAP_ANON flag

2021-09-21 Thread Warner Losh

From: Guy Yur 

Switch checks for !(flags & MAP_ANONYMOUS) with checks for fd != -1.
MAP_STACK and MAP_GUARD both require fd == -1 and don't require mapping
the fd either.

Signed-off-by: Guy Yur 
[ partially merged before, finishing the job and documenting origin]
Signed-off-by: Warner Losh 
---
 bsd-user/mmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 8b763fffc3..347d314aa9 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -154,7 +154,7 @@ static int mmap_frag(abi_ulong real_start,
 if (prot1 == 0) {
 /* no page was there, so we allocate one */
 void *p = mmap(host_start, qemu_host_page_size, prot,
-   flags | MAP_ANON, -1, 0);
+   flags | ((fd != -1) ? MAP_ANON : 0), -1, 0);
 if (p == MAP_FAILED)
 return -1;
 prot1 = prot;
@@ -162,7 +162,7 @@ static int mmap_frag(abi_ulong real_start,
 prot1 &= PAGE_BITS;
 
 prot_new = prot | prot1;
-if (!(flags & MAP_ANON)) {
+if (fd != -1) {
 /* msync() won't work here, so we return an error if write is
possible while it is a shared mapping */
 if ((flags & TARGET_BSD_MAP_FLAGMASK) == MAP_SHARED &&
@@ -571,7 +571,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
  * worst case: we cannot map the file because the offset is not
  * aligned, so we read it
  */
-if (!(flags & MAP_ANON) &&
+if (fd != -1 &&
 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) 
{
 /*
  * msync() won't work here, so we return an error if write is
-- 
2.32.0

[PATCH v2 8/9] bsd-user/mmap.c: Implement MAP_EXCL, required by jemalloc in head

2021-09-21 Thread Warner Losh

From: Kyle Evans 

jemalloc requires a working MAP_EXCL. Ensure that no page is double
mapped when specified.

Signed-off-by: Kyle Evans 
Signed-off-by: Warner Losh 
---
 bsd-user/mmap.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 347d314aa9..792ff00548 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -387,7 +387,7 @@ abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
 abi_long target_mmap(abi_ulong start, abi_ulong len, int prot,
  int flags, int fd, off_t offset)
 {
-abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
+abi_ulong addr, ret, end, real_start, real_end, retaddr, host_offset, 
host_len;
 
 mmap_lock();
 #ifdef DEBUG_MMAP
@@ -599,6 +599,14 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 goto the_end;
 }
 
+/* Reject the mapping if any page within the range is mapped */
+if (flags & MAP_EXCL) {
+for (addr = start; addr < end; addr++) {
+if (page_get_flags(addr) != 0)
+goto fail;
+}
+}
+
 /* handle the start of the mapping */
 if (start > real_start) {
 if (real_end == real_start + qemu_host_page_size) {
-- 
2.32.0

[PATCH v2 5/9] bsd-user/mmap.c: mmap prefer MAP_ANON for BSD

2021-09-21 Thread Warner Losh

MAP_ANON and MAP_ANONYMOUS are identical. Prefer MAP_ANON for BSD since
the file is now a confusing mix of the two.

Signed-off-by: Warner Losh 
Reviewed-by: Philippe Mathieu-Daudé 
---
 bsd-user/mmap.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 0acc2db712..bafbdacd31 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -284,7 +284,7 @@ static abi_ulong mmap_find_vma_aligned(abi_ulong start, 
abi_ulong size,
 addr = start;
 wrapped = repeat = 0;
 prev = 0;
-flags = MAP_ANONYMOUS | MAP_PRIVATE;
+flags = MAP_ANON | MAP_PRIVATE;
 if (alignment != 0) {
 flags |= MAP_ALIGNED(alignment);
 }
@@ -408,7 +408,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 if (flags & MAP_FIXED) {
 printf("MAP_FIXED ");
 }
-if (flags & MAP_ANONYMOUS) {
+if (flags & MAP_ANON) {
 printf("MAP_ANON ");
 }
 if (flags & MAP_EXCL) {
@@ -430,7 +430,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 }
 #endif
 
-if ((flags & MAP_ANONYMOUS) && fd != -1) {
+if ((flags & MAP_ANON) && fd != -1) {
 errno = EINVAL;
 goto fail;
 }
@@ -532,7 +532,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
  * qemu_real_host_page_size
  */
 p = mmap(g2h_untagged(start), host_len, prot,
- flags | MAP_FIXED | ((fd != -1) ? MAP_ANONYMOUS : 0), -1, 0);
+ flags | MAP_FIXED | ((fd != -1) ? MAP_ANON : 0), -1, 0);
 if (p == MAP_FAILED)
 goto fail;
 /* update start so that it points to the file position at 'offset' */
@@ -694,8 +694,7 @@ static void mmap_reserve(abi_ulong start, abi_ulong size)
 }
 if (real_start != real_end) {
 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
- MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE,
- -1, 0);
+ MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0);
 }
 }
 
-- 
2.32.0

[PATCH v2 6/9] bsd-user/mmap.c: line wrap change

2021-09-21 Thread Warner Losh

Keep the shifted expression on one line. It's the same number of lines
and easier to read like this.

Signed-off-by: Warner Losh 
---
 bsd-user/mmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index bafbdacd31..8b763fffc3 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -399,8 +399,8 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
prot & PROT_WRITE ? 'w' : '-',
prot & PROT_EXEC ? 'x' : '-');
 if (flags & MAP_ALIGNMENT_MASK) {
-printf("MAP_ALIGNED(%u) ", (flags & MAP_ALIGNMENT_MASK)
->> MAP_ALIGNMENT_SHIFT);
+printf("MAP_ALIGNED(%u) ",
+   (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT);
 }
 if (flags & MAP_GUARD) {
 printf("MAP_GUARD ");
-- 
2.32.0

[PATCH v2 9/9] bsd-user/mmap.c: assert that target_mprotect cannot fail

2021-09-21 Thread Warner Losh

Similar to the equivalent linux-user change 86abac06c14. All error
conditions that target_mprotect checks are also checked by target_mmap.
EACCESS cannot happen because we are just removing PROT_WRITE.  ENOMEM
should not happen because we are modifying a whole VMA (and we have
bigger problems anyway if it happens).

Fixes a Coverity false positive, where Coverity complains about
target_mprotect's return value being passed to tb_invalidate_phys_range.

Signed-off-by: Mikaël Urankar 
Signed-off-by: Warner Losh 
---
 bsd-user/mmap.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 792ff00548..4ddbd50b62 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -591,10 +591,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 goto fail;
 if (!(prot & PROT_WRITE)) {
 ret = target_mprotect(start, len, prot);
-if (ret != 0) {
-start = ret;
-goto the_end;
-}
+assert(ret == 0);
 }
 goto the_end;
 }
-- 
2.32.0

[PATCH v2 1/9] bsd-user/mmap.c: Always zero MAP_ANONYMOUS memory in mmap_frag()

2021-09-21 Thread Warner Losh

From: Mikaël Urankar 

Similar to the equivalent linux-user commit e6deac9cf99

When mapping MAP_ANONYMOUS memory fragments, still need notice about to
set it zero, or it will cause issues.

Signed-off-by: Mikaël Urankar 
Signed-off-by: Warner Losh 
---
 bsd-user/mmap.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index b40ab9045f..fc3c1480f5 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -180,10 +180,12 @@ static int mmap_frag(abi_ulong real_start,
 if (prot_new != (prot1 | PROT_WRITE))
 mprotect(host_start, qemu_host_page_size, prot_new);
 } else {
-/* just update the protection */
 if (prot_new != prot1) {
 mprotect(host_start, qemu_host_page_size, prot_new);
 }
+if (prot_new & PROT_WRITE) {
+memset(g2h_untagged(start), 0, end - start);
+}
 }
 return 0;
 }
-- 
2.32.0

[PATCH v2 4/9] bsd-user/mmap.c: mmap return ENOMEM on overflow

2021-09-21 Thread Warner Losh

mmap should return ENOMEM on len overflow rather than EINVAL. Return
EINVAL when len == 0 and ENOMEM when the rounded to a page length is 0.
Found by make check-tcg.

Signed-off-by: Warner Losh 
---
 bsd-user/mmap.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index c40059d7fc..0acc2db712 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -454,11 +454,18 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 goto fail;
 }
 
-len = TARGET_PAGE_ALIGN(len);
 if (len == 0) {
 errno = EINVAL;
 goto fail;
 }
+
+/* Check for overflows */
+len = TARGET_PAGE_ALIGN(len);
+if (len == 0) {
+errno = ENOMEM;
+goto fail;
+}
+
 real_start = start & qemu_host_page_mask;
 host_offset = offset & qemu_host_page_mask;
 
-- 
2.32.0

[PATCH v2 2/9] bsd-user/mmap.c: check pread's return value to fix warnings with _FORTIFY_SOURCE

2021-09-21 Thread Warner Losh

From: Mikaël Urankar 

Simmilar to the equivalent linux-user: commit fb7e378cf9c, which added
checking to pread's return value.

Signed-off-by: Mikaël Urankar 
Signed-off-by: Warner Losh 
---
 bsd-user/mmap.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index fc3c1480f5..90b6313161 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -174,7 +174,8 @@ static int mmap_frag(abi_ulong real_start,
 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
 
 /* read the corresponding file data */
-pread(fd, g2h_untagged(start), end - start, offset);
+if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
+return -1;
 
 /* put final protection */
 if (prot_new != (prot1 | PROT_WRITE))
@@ -593,7 +594,8 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
   -1, 0);
 if (retaddr == -1)
 goto fail;
-pread(fd, g2h_untagged(start), len, offset);
+if (pread(fd, g2h_untagged(start), len, offset) == -1)
+goto fail;
 if (!(prot & PROT_WRITE)) {
 ret = target_mprotect(start, len, prot);
 if (ret != 0) {
-- 
2.32.0

[PATCH v2 0/9] bsd-user mmap fixes

2021-09-21 Thread Warner Losh

This series synchronizes mmap.c with the bsd-user fork. This is a mix of old bug
fixes pulled in from linux-user, as well as some newer fixes to adress bugs
found in check-tcg and recent FreeBSD developments. There are also a couple of
style commits.

v2: do the cherry-picks from linux-user in qemu-style.

Guy Yur (1):
  bsd-user/mmap.c: Don't mmap fd == -1 independently from MAP_ANON flag

Kyle Evans (1):
  bsd-user/mmap.c: Implement MAP_EXCL, required by jemalloc in head

Mikaël Urankar (2):
  bsd-user/mmap.c: Always zero MAP_ANONYMOUS memory in mmap_frag()
  bsd-user/mmap.c: check pread's return value to fix warnings with
_FORTIFY_SOURCE

Warner Losh (5):
  bsd-user/mmap.c: MAP_ symbols are defined, so no need for ifdefs
  bsd-user/mmap.c: mmap return ENOMEM on overflow
  bsd-user/mmap.c: mmap prefer MAP_ANON for BSD
  bsd-user/mmap.c: line wrap change
  bsd-user/mmap.c: assert that target_mprotect cannot fail

 bsd-user/mmap.c | 69 +
 1 file changed, 35 insertions(+), 34 deletions(-)

-- 
2.32.0

[PATCH v2 3/9] bsd-user/mmap.c: MAP_ symbols are defined, so no need for ifdefs

2021-09-21 Thread Warner Losh

All these MAP_ symbols are always defined on supported FreeBSD versions
(12.2 and newer), so remove the #ifdefs since they aren't needed.

Signed-off-by: Warner Losh 
Reviewed-by: Philippe Mathieu-Daudé 
---
 bsd-user/mmap.c | 14 --
 1 file changed, 14 deletions(-)

diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 90b6313161..c40059d7fc 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -285,13 +285,9 @@ static abi_ulong mmap_find_vma_aligned(abi_ulong start, 
abi_ulong size,
 wrapped = repeat = 0;
 prev = 0;
 flags = MAP_ANONYMOUS | MAP_PRIVATE;
-#ifdef MAP_ALIGNED
 if (alignment != 0) {
 flags |= MAP_ALIGNED(alignment);
 }
-#else
-/* XXX TODO */
-#endif
 
 for (;; prev = ptr) {
 /*
@@ -406,22 +402,18 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 printf("MAP_ALIGNED(%u) ", (flags & MAP_ALIGNMENT_MASK)
 >> MAP_ALIGNMENT_SHIFT);
 }
-#if MAP_GUARD
 if (flags & MAP_GUARD) {
 printf("MAP_GUARD ");
 }
-#endif
 if (flags & MAP_FIXED) {
 printf("MAP_FIXED ");
 }
 if (flags & MAP_ANONYMOUS) {
 printf("MAP_ANON ");
 }
-#ifdef MAP_EXCL
 if (flags & MAP_EXCL) {
 printf("MAP_EXCL ");
 }
-#endif
 if (flags & MAP_PRIVATE) {
 printf("MAP_PRIVATE ");
 }
@@ -431,11 +423,9 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 if (flags & MAP_NOCORE) {
 printf("MAP_NOCORE ");
 }
-#ifdef MAP_STACK
 if (flags & MAP_STACK) {
 printf("MAP_STACK ");
 }
-#endif
 printf("fd=%d offset=0x%llx\n", fd, offset);
 }
 #endif
@@ -444,7 +434,6 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 errno = EINVAL;
 goto fail;
 }
-#ifdef MAP_STACK
 if (flags & MAP_STACK) {
 if ((fd != -1) || ((prot & (PROT_READ | PROT_WRITE)) !=
 (PROT_READ | PROT_WRITE))) {
@@ -452,8 +441,6 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 goto fail;
 }
 }
-#endif /* MAP_STACK */
-#ifdef MAP_GUARD
 if ((flags & MAP_GUARD) && (prot != PROT_NONE || fd != -1 ||
 offset != 0 || (flags & (MAP_SHARED | MAP_PRIVATE |
 /* MAP_PREFAULT | */ /* MAP_PREFAULT not in mman.h */
@@ -461,7 +448,6 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 errno = EINVAL;
 goto fail;
 }
-#endif
 
 if (offset & ~TARGET_PAGE_MASK) {
 errno = EINVAL;
-- 
2.32.0

Re: ensuring a machine's buses have unique names

2021-09-21 Thread Markus Armbruster

Peter Maydell  writes:

[...]

> I'm not sure how best to sort this tangle out. We could:
>  * make controller devices pass in NULL as bus name; this
>means that some bus names will change, which is an annoying
>breakage but for these minor bus types we can probably
>get away with it. This brings these buses into line with
>how we've been handling uniqueness for ide and scsi.
>  * drop the 'name' argument for buses like ide that don't
>actually have any callsites that need to pass a name
>  * split into foo_bus_new() and foo_bus_new_named() so that
>the "easy default" doesn't pass a name, and there's at least
>a place to put a doc comment explaining that the name passed
>into the _named() version should be unique ??
>  * something else ?

A possible work-around for non-unique bus IDs is QOM paths.  Precedence,
kind of:

commit 6287d827d494b5850049584c3f7fb1a589dbb1de
Author: Daniel P. Berrangé 
Date:   Fri Sep 11 13:33:56 2015 +0100

monitor: allow device_del to accept QOM paths

Currently device_del requires that the client provide the
device short ID. device_add allows devices to be created
without giving an ID, at which point there is no way to
delete them with device_del. The QOM object path, however,
provides an alternative way to identify the devices.

Allowing device_del to accept an object path ensures all
devices are deletable regardless of whether they have an
ID.

 (qemu) device_add usb-mouse
 (qemu) qom-list /machine/peripheral-anon
 device[0] (child)
 type (string)
 (qemu) device_del /machine/peripheral-anon/device[0]

Devices are required to be marked as hotpluggable
otherwise an error is raised

 (qemu) device_del /machine/unattached/device[4]
 Device 'PIIX3' does not support hotplugging

Signed-off-by: Daniel P. Berrange 
Message-Id: <1441974836-17476-1-git-send-email-berra...@redhat.com>
Reviewed-by: Eric Blake 
[Commit message touched up, accidental white-space change dropped]
Signed-off-by: Markus Armbruster 

Their length makes QOM paths inconvenient for humans, but machines won't
care.

However, we already burned /-separated paths for paths within the qdev
tree (the thing info qtree shows).  Friends don't let friends use them
(I should be able to dig up a critique if you're curious).

Without that, it could be made to work like

-device virtio-scsi,id=vscsi
-device scsi-cd,bus=/machine/peripheral/vscsi/virtio-backend/vscsi.0

We should consult with libvirt developers before we go down this route.

Re: ensuring a machine's buses have unique names

2021-09-21 Thread Markus Armbruster

BALATON Zoltan  writes:

> On Tue, 21 Sep 2021, Peter Maydell wrote:
>> On Wed, 15 Sept 2021 at 05:28, Markus Armbruster  wrote:
>>>
>>> Peter Maydell  writes:
 I'm not sure how best to sort this tangle out. We could:
  * make controller devices pass in NULL as bus name; this
means that some bus names will change, which is an annoying
breakage but for these minor bus types we can probably
get away with it. This brings these buses into line with
how we've been handling uniqueness for ide and scsi.
>>>
>>> To gauge the breakage, we need a list of the affected bus names.
>>
>> Looking through, there are a few single-use or special
>> purpose buses I'm going to ignore for now (eg vmbus, or
>> the s390 ones). The four big bus types where controllers
>> often specify a bus name and override the 'autogenerate
>> unique name' handling are pci, ssi, sd, and i2c. (pci mostly
>> gets away with it I expect by machines only having one pci
>> bus.) Of those, I've gone through i2c. These are all the
>> places where we create a specifically-named i2c bus (via
>> i2c_init_bus()), together with the affected boards:
>>
>>   hw/arm/pxa2xx.c
>>- the PXA SoC code creates both the intended-for-use
>>  i2c buses (which get auto-names) and also several i2c
>>  buses intended for internal board-code use only which
>>  are all given the same name "dummy".
>>  Boards: connex, verdex, tosa, mainstone, akita, spitz,
>>  borzoi, terrier, z2
>>   hw/arm/stellaris.c
>>- The i2c controller names its bus "i2c". There is only one i2c
>>  controller on these boards, so no name conflicts.
>>  Boards: lm3s811evb, lm3s6965evb
>>   hw/display/ati.c
>>- The ATI VGA device has an on-board i2c controller which it
>>  connects the DDC that holds the EDID information. The bus is
>>  always named "ati-vga.ddc", so if you have multiple of this
>>  PCI device in the system the buses have the same names.
>>   hw/display/sm501.c
>>- Same as ATI, but the bus name is "sm501.i2c"
>>   hw/i2c/aspeed_i2c.c
>>- This I2C controller has either 14 or 16 (!) different i2c
>>  buses, and it assigns them names "aspeed.i2c.N" for N = 0,1,2,...
>>  The board code mostly seems to use these to wire up various
>>  on-board i2c devices.
>>  Boards: palmetto-bmc, supermicrox11-bmc, ast2500-evb, romulus-bmc,
>>  swift-bmc, sonorapass-bmc, witherspoon-bmc, ast2600-evb,
>>  tacoma-bmc, g220a-bmc, quanta-q71l-bmc, rainier-bmc
>>   hw/i2c/bitbang_i2c.c
>>- the "GPIO to I2C bridge" device always names its bus "i2c".
>>  Used only on musicpal, which only creates one of these buses.
>>  Boards: musicpal
>>   hw/i2c/exynos4210_i2c.c
>>- This i2c controller always names its bus "i2c". There are 9
>>  of these controllers on the board, so they all have clashing
>>  names.
>>  Boards: nuri, smdkc210
>>   hw/i2c/i2c_mux_pca954x.c
>>- This is an i2c multiplexer. All the child buses are named
>>  "i2c-bus". The multiplexer is used by the aspeed and npcm7xx
>>  boards. (There's a programmable way to get at individual
>>  downstream i2c buses despite the name clash; none of the boards
>>  using this multiplexer actually connect any devices downstream of
>>  it yet.)
>>  Boards: palmetto-bmc, supermicrox11-bmc, ast2500-evb, romulus-bmc,
>>  swift-bmc, sonorapass-bmc, witherspoon-bmc, ast2600-evb,
>>  tacoma-bmc, g220a-bmc, quanta-q71l-bmc, rainier-bmc,
>>  npcm750-evb, quanta-gsj, quanta-gbs-bmc, kudo-bmc
>>   hw/i2c/mpc_i2c.c
>>- This controller always names its bus "i2c". There is only one
>>  of these controllers in the machine.
>>  Boards: ppce500, mpc8544ds
>>   hw/i2c/npcm7xx_smbus.c
>>- This controller always names its bus "i2c-bus". There are multiple
>>  controllers on the boards. The name also clashes with the one used
>>  by the pca954x muxes on these boards (see above).
>>  Boards: npcm750-evb, quanta-gsj, quanta-gbs-bmc, kudo-bmc
>>   hw/i2c/pm_smbus.c
>>- This is the PC SMBUS implementation (it is not a QOM device...)
>>  The bus is always called "i2c".
>>  Boards: haven't worked through; at least all the x86 PC-like
>>  boards, I guess
>>   hw/i2c/ppc4xx_i2c.c
>>- This controller always names its bus "i2c". The taihu and
>>  ref405ep have only one controller, but sam460ex has two which
>>  will have non-unique names.
>>  Boards: taihu, ref405ep, sam460ex
>>   hw/i2c/versatile_i2c.c
>>- This controller always names its bus "i2c". The MPS boards all
>>  have multiples of this controller with clashing names; the others
>>  have only one controller.
>>  Boards: mps2-an385, mps2-an386, mps2-an500, mps2-an511,
>>  mps2-an505, mps2-an521, mps3-an524, mps3-an547,
>>  realview-eb, realview-eb-mpcore, realview-pb-a8, realview-pbx-a9,
>>  versatileab, versatilepb, vexpress-a9, vexpress-a15
>>
>> In a lot of these

Re: [PATCH v2 12/30] tcg/loongarch64: Implement not/and/or/xor/nor/andc/orc/eqv ops

2021-09-21 Thread Richard Henderson


On 9/21/21 1:18 PM, WANG Xuerui wrote:

+case INDEX_op_eqv_i32:
+case INDEX_op_eqv_i64:
+if (c2) {
+/* guaranteed to fit due to constraint */
+tcg_out_opc_xori(s, a0, a1, ~a2);
+} else {
+tcg_out_opc_nor(s, a0, a2, TCG_REG_ZERO);
+tcg_out_opc_xor(s, a0, a1, a0);
+}
+break;


You don't actually have eqv (xnor), so don't pretend that you do.  The middle-end will 
expand this as xor + not on its own.


Otherwise,
Reviewed-by: Richard Henderson 


r~

Re: [PATCH v2 03/30] tcg/loongarch64: Add the tcg-target.h file

2021-09-21 Thread WANG Xuerui


Hi Richard,

On 9/22/21 11:55, Richard Henderson wrote:

On 9/21/21 1:18 PM, WANG Xuerui wrote:

Signed-off-by: WANG Xuerui
---
  tcg/loongarch64/tcg-target.h | 180 +++
  1 file changed, 180 insertions(+)
  create mode 100644 tcg/loongarch64/tcg-target.h


Reviewed-by: Richard Henderson 

However...



+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_mulu2_i32    0
+#define TCG_TARGET_HAS_muls2_i32    0
+#define TCG_TARGET_HAS_muluh_i32    1
+#define TCG_TARGET_HAS_mulsh_i32    1
+#define TCG_TARGET_HAS_ext8s_i32    1
+#define TCG_TARGET_HAS_ext16s_i32   1
+#define TCG_TARGET_HAS_ext8u_i32    1
+#define TCG_TARGET_HAS_ext16u_i32   1
+#define TCG_TARGET_HAS_bswap16_i32  0


It might be better to enable these with the patch that adds the insns.
Sounds like a good idea, I'll re-organize to enable these individually 
in each commit in v3.


r~

Re: [PATCH v2 09/30] tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi

2021-09-21 Thread Richard Henderson


On 9/21/21 1:18 PM, WANG Xuerui wrote:

+/* Test for PC-relative values that can be loaded faster.  */
+intptr_t pc_offset = val - (uintptr_t)s->code_ptr;


This isn't quite right for split r^x code buffer.
You should have seen this with --enable-debug-tcg...

You need pc_offset = tcg_pcrel_diff(s, (void *)val).


+if (pc_offset == (int32_t)pc_offset) {
+tcg_target_long lo = sextreg(pc_offset, 0, 12);
+tcg_target_long hi = pc_offset - lo;
+tcg_out_opc_pcaddu12i(s, rd, hi >> 12);


And... this doesn't quite work, right at the edges.  If lo is negative, hi can overflow 
out of range.  There are a number of ways to fix this.  One is to extract the pieces and 
re-assemble to see if it matches.  Another is to rearrange the arithmetic just a little 
and use PCALAU12I.



+tcg_target_long upper = (val >> 12) & 0xf;
+tcg_target_long higher = (val >> 32) & 0xf;


Better to use extract64(val, 12, 20) and extract64(val, 32, 30).


r~

Re: [PATCH v2 07/30] tcg/loongarch64: Implement necessary relocation operations

2021-09-21 Thread Richard Henderson


On 9/21/21 1:18 PM, WANG Xuerui wrote:

Signed-off-by: WANG Xuerui
---
  tcg/loongarch64/tcg-target.c.inc | 66 
  1 file changed, 66 insertions(+)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH v2 08/30] tcg/loongarch64: Implement the memory barrier op

2021-09-21 Thread Richard Henderson


On 9/21/21 1:18 PM, WANG Xuerui wrote:

Signed-off-by: WANG Xuerui
---
  tcg/loongarch64/tcg-target.c.inc | 32 
  1 file changed, 32 insertions(+)


Reviewed-by: Richard Henderson 

r

Re: [PATCH] spapr/xive: Fix kvm_xive_source_reset trace event

2021-09-21 Thread David Gibson

On Tue, Sep 21, 2021 at 09:13:54AM +0200, Greg Kurz wrote:
> On Tue, 21 Sep 2021 08:56:52 +0200
> Cédric Le Goater  wrote:
> 
> > Signed-off-by: Cédric Le Goater 
> > ---
> 
> Maybe add ?
> 
> Fixes: 4e960974d4ee ("xive: Add trace events")

That would be helpful.

and... this really needs at least *some* commit message.

> 
> anyway
> 
> Reviewed-by: Greg Kurz 
> 
> >  hw/intc/spapr_xive_kvm.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/hw/intc/spapr_xive_kvm.c b/hw/intc/spapr_xive_kvm.c
> > index 53731d158625..7ac2502cc013 100644
> > --- a/hw/intc/spapr_xive_kvm.c
> > +++ b/hw/intc/spapr_xive_kvm.c
> > @@ -236,6 +236,8 @@ int kvmppc_xive_source_reset_one(XiveSource *xsrc, int 
> > srcno, Error **errp)
> >  SpaprXive *xive = SPAPR_XIVE(xsrc->xive);
> >  uint64_t state = 0;
> >  
> > +trace_kvm_xive_source_reset(srcno);
> > +
> >  assert(xive->fd != -1);
> >  
> >  if (xive_source_irq_is_lsi(xsrc, srcno)) {
> > @@ -323,8 +325,6 @@ uint64_t kvmppc_xive_esb_rw(XiveSource *xsrc, int 
> > srcno, uint32_t offset,
> >  return xive_esb_rw(xsrc, srcno, offset, data, 1);
> >  }
> >  
> > -trace_kvm_xive_source_reset(srcno);
> > -
> >  /*
> >   * Special Load EOI handling for LSI sources. Q bit is never set
> >   * and the interrupt should be re-triggered if the level is still
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [PATCH v2 06/30] tcg/loongarch64: Define the operand constraints

2021-09-21 Thread Richard Henderson


On 9/21/21 1:18 PM, WANG Xuerui wrote:

Signed-off-by: WANG Xuerui
---
  tcg/loongarch64/tcg-target-con-str.h | 28 +++
  tcg/loongarch64/tcg-target.c.inc | 52 
  2 files changed, 80 insertions(+)
  create mode 100644 tcg/loongarch64/tcg-target-con-str.h


Reviewed-by: Richard Henderson 

r~

Re: [PATCH] spapr_numa.c: fixes in spapr_numa_FORM2_write_rtas_tables()

2021-09-21 Thread David Gibson

On Tue, Sep 21, 2021 at 04:43:47PM -0300, Daniel Henrique Barboza wrote:
> This patch has a handful of modifications for the recent added
> FORM2 support:
> 
> - there is no particular reason for both 'lookup_index_table' and
> 'distance_table' to be allocated in the heap, since their sizes are
> known right at the start of the function. Use static allocation in
> them to spare a couple of g_new0() calls;
> 
> - to not allocate more than the necessary size in 'distance_table'. At
> this moment the array is oversized due to allocating uint32_t for all
> elements, when most of them fits in an uint8_t;
> 
> - create a NUMA_LOCAL_DISTANCE macro to avoid hardcoding the local
> distance value.
> 
> Signed-off-by: Daniel Henrique Barboza 

Applied to ppc-fot-6.2, thanks.

> ---
>  hw/ppc/spapr_numa.c | 35 +++
>  1 file changed, 19 insertions(+), 16 deletions(-)
> 
> diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
> index 58d5dc7084..039a0439c6 100644
> --- a/hw/ppc/spapr_numa.c
> +++ b/hw/ppc/spapr_numa.c
> @@ -19,6 +19,9 @@
>  /* Moved from hw/ppc/spapr_pci_nvlink2.c */
>  #define SPAPR_GPU_NUMA_ID   (cpu_to_be32(1))
>  
> +/* Macro to avoid hardcoding the local distance value */
> +#define NUMA_LOCAL_DISTANCE 10
> +
>  /*
>   * Retrieves max_dist_ref_points of the current NUMA affinity.
>   */
> @@ -500,17 +503,21 @@ static void 
> spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
>  MachineState *ms = MACHINE(spapr);
>  NodeInfo *numa_info = ms->numa_state->nodes;
>  int nb_numa_nodes = ms->numa_state->num_nodes;
> +/* Lookup index table has an extra uint32_t with its length */
> +uint32_t lookup_index_table[nb_numa_nodes + 1];
>  int distance_table_entries = nb_numa_nodes * nb_numa_nodes;
> -g_autofree uint32_t *lookup_index_table = NULL;
> -g_autofree uint32_t *distance_table = NULL;
> -int src, dst, i, distance_table_size;
> -uint8_t *node_distances;
> +/*
> + * Distance table is an uint8_t array with a leading uint32_t
> + * containing its length.
> + */
> +uint8_t distance_table[distance_table_entries + 4];
> +uint32_t *distance_table_length;
> +int src, dst, i;
>  
>  /*
>   * ibm,numa-lookup-index-table: array with length and a
>   * list of NUMA ids present in the guest.
>   */
> -lookup_index_table = g_new0(uint32_t, nb_numa_nodes + 1);
>  lookup_index_table[0] = cpu_to_be32(nb_numa_nodes);
>  
>  for (i = 0; i < nb_numa_nodes; i++) {
> @@ -518,8 +525,7 @@ static void 
> spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
>  }
>  
>  _FDT(fdt_setprop(fdt, rtas, "ibm,numa-lookup-index-table",
> - lookup_index_table,
> - (nb_numa_nodes + 1) * sizeof(uint32_t)));
> + lookup_index_table, sizeof(lookup_index_table)));
>  
>  /*
>   * ibm,numa-distance-table: contains all node distances. First
> @@ -531,11 +537,10 @@ static void 
> spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
>   * array because NUMA ids can be sparse (node 0 is the first,
>   * node 8 is the second ...).
>   */
> -distance_table = g_new0(uint32_t, distance_table_entries + 1);
> -distance_table[0] = cpu_to_be32(distance_table_entries);
> +distance_table_length = (uint32_t *)distance_table;
> +distance_table_length[0] = cpu_to_be32(distance_table_entries);
>  
> -node_distances = (uint8_t *)&distance_table[1];
> -i = 0;
> +i = 4;
>  
>  for (src = 0; src < nb_numa_nodes; src++) {
>  for (dst = 0; dst < nb_numa_nodes; dst++) {
> @@ -546,18 +551,16 @@ static void 
> spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
>   * adding the numa_info to retrieve distance info from.
>   */
>  if (src == dst) {
> -node_distances[i++] = 10;
> +distance_table[i++] = NUMA_LOCAL_DISTANCE;
>  continue;
>  }
>  
> -node_distances[i++] = numa_info[src].distance[dst];
> +distance_table[i++] = numa_info[src].distance[dst];
>  }
>  }
>  
> -distance_table_size = distance_table_entries * sizeof(uint8_t) +
> -  sizeof(uint32_t);
>  _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table",
> - distance_table, distance_table_size));
> + distance_table, sizeof(distance_table)));
>  }
>  
>  /*

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [PATCH v2 03/30] tcg/loongarch64: Add the tcg-target.h file

2021-09-21 Thread Richard Henderson


On 9/21/21 1:18 PM, WANG Xuerui wrote:

Signed-off-by: WANG Xuerui
---
  tcg/loongarch64/tcg-target.h | 180 +++
  1 file changed, 180 insertions(+)
  create mode 100644 tcg/loongarch64/tcg-target.h


Reviewed-by: Richard Henderson 

However...



+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_mulu2_i320
+#define TCG_TARGET_HAS_muls2_i320
+#define TCG_TARGET_HAS_muluh_i321
+#define TCG_TARGET_HAS_mulsh_i321
+#define TCG_TARGET_HAS_ext8s_i321
+#define TCG_TARGET_HAS_ext16s_i32   1
+#define TCG_TARGET_HAS_ext8u_i321
+#define TCG_TARGET_HAS_ext16u_i32   1
+#define TCG_TARGET_HAS_bswap16_i32  0


It might be better to enable these with the patch that adds the insns.

r~

Re: [PATCH 5/5] [RFC] target/arm: Advertise MVE to gdb when present

2021-09-21 Thread Richard Henderson


On 9/21/21 9:29 AM, Peter Maydell wrote:

Cortex-M CPUs with MVE should advertise this fact to gdb, using the
org.gnu.gdb.arm.m-profile-mve XML feature, which defines the VPR
register.  Presence of this feature also tells gdb to create
pseudo-registers Q0..Q7, so we do not need to tell gdb about them
separately.

Note that unless you have a very recent GDB that includes this fix:
http://patches-tcwg.linaro.org/patch/58133/  gdb will mis-print the
individual fields of the VPR register as zero (but showing the whole
thing as hex, eg with "print /x $vpr" will give the correct value).

NB: the gdb patches to implement this have not yet landed in
gdb upstream, so this patch is RFC status only until that
happens and the XML is finalized.

Signed-off-by: Peter Maydell
---
  configs/targets/aarch64-softmmu.mak  |  2 +-
  configs/targets/arm-linux-user.mak   |  2 +-
  configs/targets/arm-softmmu.mak  |  2 +-
  configs/targets/armeb-linux-user.mak |  2 +-
  target/arm/gdbstub.c | 25 +
  gdb-xml/arm-m-profile-mve.xml| 19 +++
  6 files changed, 48 insertions(+), 4 deletions(-)
  create mode 100644 gdb-xml/arm-m-profile-mve.xml


Looks like it'll do what's advertised.
Reviewed-by: Richard Henderson 

r~

Re: [PATCH 4/5] target/arm: Don't put FPEXC and FPSID in org.gnu.gdb.arm.vfp XML

2021-09-21 Thread Richard Henderson


On 9/21/21 9:29 AM, Peter Maydell wrote:

Currently we send VFP XML which includes D0..D15 or D0..D31, plus
FPSID, FPSCR and FPEXC.  The upstream GDB tolerates this, but its
definition of this XML feature does not include FPSID or FPEXC.  In
particular, for M-profile cores there are no FPSID or FPEXC
registers, so advertising those is wrong.

Move FPSID and FPEXC into their own bit of XML which we only send for
A and R profile cores.  This brings our definition of the XML
org.gnu.gdb.arm.vfp feature into line with GDB's own (at least for
non-Neon cores...) and means we don't claim to have FPSID and FPEXC
on M-profile.

(It seems unlikely to me that any gdbstub users really care about
being able to look at FPEXC and FPSID; but we've supplied them to gdb
for a decade and it's not hard to keep doing so.)

Signed-off-by: Peter Maydell
---


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 3/5] target/arm: Move gdbstub related code out of helper.c

2021-09-21 Thread Richard Henderson


On 9/21/21 9:28 AM, Peter Maydell wrote:

Currently helper.c includes some code which is part of the arm
target's gdbstub support.  This code has a better home: in gdbstub.c
and gdbstub64.c.  Move it there.

Because aarch64_fpu_gdb_get_reg() and aarch64_fpu_gdb_set_reg() move
into gdbstub64.c, this means that they're now compiled only for
TARGET_AARCH64 rather than always.  That is the only case when they
would ever be used, but it does mean that the ifdef in
arm_cpu_register_gdb_regs_for_features() needs to be adjusted to
match.

Signed-off-by: Peter Maydell
---
  target/arm/internals.h |   7 ++
  target/arm/gdbstub.c   | 130 
  target/arm/gdbstub64.c | 140 +
  target/arm/helper.c| 271 -
  4 files changed, 277 insertions(+), 271 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 2/5] target/arm: Fix coding style issues in gdbstub code in helper.c

2021-09-21 Thread Richard Henderson


On 9/21/21 9:28 AM, Peter Maydell wrote:

We're going to move this code to a different file; fix the coding
style first so checkpatch doesn't complain.  This includes deleting
the spurious 'break' statements after returns in the
vfp_gdb_get_reg() function.

Signed-off-by: Peter Maydell
---
  target/arm/helper.c | 23 ---
  1 file changed, 16 insertions(+), 7 deletions(-)


Reviewed-by: Richard Henderson 

r~

Re: [PATCH 1/5] configs: Don't include 32-bit-only GDB XML in aarch64 linux configs

2021-09-21 Thread Richard Henderson


On 9/21/21 9:28 AM, Peter Maydell wrote:

The aarch64-linux QEMU usermode binaries can never run 32-bit
code, so they do not need to include the GDB XML for it.
(arm_cpu_register_gdb_regs_for_features() will not use these
XML files if the CPU has ARM_FEATURE_AARCH64, so we will not
advertise to gdb that we have them.)

Signed-off-by: Peter Maydell
---
  configs/targets/aarch64-linux-user.mak| 2 +-
  configs/targets/aarch64_be-linux-user.mak | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)


Reviewed-by: Richard Henderson 

r~

[PULL 00/10] tcg patch queue, v3

2021-09-21 Thread Richard Henderson

Rebase and resolve minor conflict.

r~


The following changes since commit 2c3e83f92d93fbab071b8a96b8ab769b01902475:

  Merge remote-tracking branch 
'remotes/alistair23/tags/pull-riscv-to-apply-20210921' into staging (2021-09-21 
10:57:48 -0700)

are available in the Git repository at:

  https://gitlab.com/rth7680/qemu.git tags/pull-tcg-20210921

for you to fetch changes up to 81c65ee223ba759c15c11068f9b292a59a900451:

  tcg/riscv: Remove add with zero on user-only memory access (2021-09-21 
19:36:44 -0700)


Move cpu_signal_handler declaration.
Restrict cpu_handle_halt to sysemu.
Make do_unaligned_access noreturn.
Misc tcg/mips cleanup
Misc tcg/sparc cleanup
Misc tcg/riscv cleanup


Philippe Mathieu-Daudé (1):
  accel/tcg: Restrict cpu_handle_halt() to sysemu

Richard Henderson (9):
  include/exec: Move cpu_signal_handler declaration
  tcg/mips: Drop inline markers
  tcg/mips: Allow JAL to be out of range in tcg_out_bswap_subr
  tcg/mips: Unset TCG_TARGET_HAS_direct_jump
  tcg/mips: Drop special alignment for code_gen_buffer
  tcg/sparc: Drop inline markers
  tcg/sparc: Introduce tcg_out_mov_delay
  hw/core: Make do_unaligned_access noreturn
  tcg/riscv: Remove add with zero on user-only memory access

 include/exec/exec-all.h|  13 +
 include/hw/core/tcg-cpu-ops.h  |   3 +-
 target/alpha/cpu.h |  10 +---
 target/arm/cpu.h   |   7 ---
 target/arm/internals.h |   2 +-
 target/avr/cpu.h   |   2 -
 target/cris/cpu.h  |   8 
 target/hexagon/cpu.h   |   3 --
 target/hppa/cpu.h  |   3 --
 target/i386/cpu.h  |   7 ---
 target/m68k/cpu.h  |   8 
 target/microblaze/cpu.h|   9 +---
 target/mips/cpu.h  |   3 --
 target/mips/internal.h |   2 -
 target/mips/tcg/tcg-internal.h |   4 +-
 target/nios2/cpu.h |   6 +--
 target/openrisc/cpu.h  |   2 -
 target/ppc/cpu.h   |   7 ---
 target/ppc/internal.h  |   4 +-
 target/riscv/cpu.h |   4 +-
 target/rx/cpu.h|   4 --
 target/s390x/cpu.h |   7 ---
 target/s390x/s390x-internal.h  |   4 +-
 target/sh4/cpu.h   |   7 +--
 target/sparc/cpu.h |   2 -
 target/tricore/cpu.h   |   2 -
 target/xtensa/cpu.h|   6 +--
 tcg/mips/tcg-target.h  |  12 ++---
 accel/tcg/cpu-exec.c   |   6 ++-
 target/hppa/cpu.c  |   7 +--
 tcg/region.c   |  91 ---
 tcg/mips/tcg-target.c.inc  | 105 ++---
 tcg/riscv/tcg-target.c.inc |  10 +---
 tcg/sparc/tcg-target.c.inc |  64 ++---
 34 files changed, 119 insertions(+), 315 deletions(-)

unknown keycodes `empty+aliases(qwerty)'

2021-09-21 Thread Dominik Wrona

MacOS Mojave Macbook Air, running a VM via ssh-ing into a linux kernel 4.15.
My 'e' acts as delete, t as w, y as q, u as d in the login screen.

Unfortunate.

[RFC v7] virtio/vsock: add two more queues for datagram types

2021-09-21 Thread Jiang Wang

Datagram sockets are connectionless and unreliable.
The sender does not know the capacity of the receiver
and may send more packets than the receiver can handle.

Add two more dedicate virtqueues for datagram sockets,
so that it will not unfairly steal resources from
stream and future connection-oriented sockets.

The two new virtqueues are enabled by default and will
be removed if the guest does not support. This will help
migration work.

btw: enable_dgram argument in vhost_vsock_common_realize
is redundant for now, but will be used later when we
want to disable DGRAM feature bit for old versions.

Signed-off-by: Jiang Wang 
---
v1 -> v2: use qemu cmd option to control number of queues,
removed configuration settings for dgram.
v2 -> v3: use ioctl to get features and decide number of
virt queues, instead of qemu cmd option.
v3 -> v4: change DGRAM feature bit value to 2. Add an argument
in vhost_vsock_common_realize to indicate dgram is supported or not.
v4 -> v5: don't open dev to get vhostfd. Removed leftover definition of
enable_dgram
v5 -> v6: fix style errors. Imporve error handling of
vhost_vsock_dgram_supported. Rename MAX_VQS_WITH_DGRAM and another one.
v6 -> v7: Always enable dgram for vhost-user and vhost kernel.
Delete unused virtqueues at the beginning of 
vhost_vsock_common_start for migration. Otherwise, migration will fail.

 hw/virtio/vhost-user-vsock.c  |  2 +-
 hw/virtio/vhost-vsock-common.c| 32 +--
 hw/virtio/vhost-vsock.c   |  6 +++-
 include/hw/virtio/vhost-vsock-common.h|  6 ++--
 include/hw/virtio/vhost-vsock.h   |  3 ++
 include/standard-headers/linux/virtio_vsock.h |  1 +
 6 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/hw/virtio/vhost-user-vsock.c b/hw/virtio/vhost-user-vsock.c
index 6095ed7349..9823a2f3bd 100644
--- a/hw/virtio/vhost-user-vsock.c
+++ b/hw/virtio/vhost-user-vsock.c
@@ -105,7 +105,7 @@ static void vuv_device_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
-vhost_vsock_common_realize(vdev, "vhost-user-vsock");
+vhost_vsock_common_realize(vdev, "vhost-user-vsock", true);
 
 vhost_dev_set_config_notifier(&vvc->vhost_dev, &vsock_ops);
 
diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c
index 4ad6e234ad..7d89b4d242 100644
--- a/hw/virtio/vhost-vsock-common.c
+++ b/hw/virtio/vhost-vsock-common.c
@@ -26,6 +26,18 @@ int vhost_vsock_common_start(VirtIODevice *vdev)
 int ret;
 int i;
 
+if (!virtio_has_feature(vdev->guest_features, VIRTIO_VSOCK_F_DGRAM)) {
+struct vhost_virtqueue *vqs;
+virtio_delete_queue(vvc->dgram_recv_vq);
+virtio_delete_queue(vvc->dgram_trans_vq);
+
+vqs = vvc->vhost_dev.vqs;
+vvc->vhost_dev.nvqs = MAX_VQS_WITHOUT_DGRAM;
+vvc->vhost_dev.vqs = g_new0(struct vhost_virtqueue,
+   vvc->vhost_dev.nvqs);
+g_free(vqs);
+}
+
 if (!k->set_guest_notifiers) {
 error_report("binding does not support guest notifiers");
 return -ENOSYS;
@@ -196,9 +208,11 @@ int vhost_vsock_common_post_load(void *opaque, int 
version_id)
 return 0;
 }
 
-void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name)
+void vhost_vsock_common_realize(VirtIODevice *vdev, const char *name,
+   bool enable_dgram)
 {
 VHostVSockCommon *vvc = VHOST_VSOCK_COMMON(vdev);
+int nvqs = MAX_VQS_WITH_DGRAM;
 
 virtio_init(vdev, name, VIRTIO_ID_VSOCK,
 sizeof(struct virtio_vsock_config));
@@ -209,12 +223,17 @@ void vhost_vsock_common_realize(VirtIODevice *vdev, const 
char *name)
 vvc->trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
vhost_vsock_common_handle_output);
 
+vvc->dgram_recv_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
+  vhost_vsock_common_handle_output);
+vvc->dgram_trans_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
+  vhost_vsock_common_handle_output);
+
 /* The event queue belongs to QEMU */
 vvc->event_vq = virtio_add_queue(vdev, VHOST_VSOCK_QUEUE_SIZE,
vhost_vsock_common_handle_output);
 
-vvc->vhost_dev.nvqs = ARRAY_SIZE(vvc->vhost_vqs);
-vvc->vhost_dev.vqs = vvc->vhost_vqs;
+vvc->vhost_dev.nvqs = nvqs;
+vvc->vhost_dev.vqs = g_new0(struct vhost_virtqueue, vvc->vhost_dev.nvqs);
 
 vvc->post_load_timer = NULL;
 }
@@ -227,6 +246,13 @@ void vhost_vsock_common_unrealize(VirtIODevice *vdev)
 
 virtio_delete_queue(vvc->recv_vq);
 virtio_delete_queue(vvc->trans_vq);
+if (vvc->vhost_dev.nvqs == MAX_VQS_WITH_DGRAM) {
+virtio_delete_queue(vvc->dgram_recv_vq);
+virtio_delete_queue(vvc->dgram_trans_vq);
+}
+
+g_free(vvc->vhost_dev.vqs);
+

Re: [PATCH] hw/loader: Remove unused rom_add_file_as() definition

2021-09-21 Thread Alistair Francis

On Sat, Sep 18, 2021 at 5:02 AM Philippe Mathieu-Daudé
 wrote:
>
> rom_add_file_as() is not used anywhere, remove it.
>
> Signed-off-by: Philippe Mathieu-Daudé 

Looks fine, although I'm not convinced removing this gets us anything.
It might be used in the future and doesn't add too much cruft.

Reviewed-by: Alistair Francis 

Alistair

> ---
>  include/hw/loader.h | 2 --
>  1 file changed, 2 deletions(-)
>
> diff --git a/include/hw/loader.h b/include/hw/loader.h
> index cbfc1848737..c52a1df8b68 100644
> --- a/include/hw/loader.h
> +++ b/include/hw/loader.h
> @@ -329,8 +329,6 @@ void hmp_info_roms(Monitor *mon, const QDict *qdict);
>  rom_add_blob(_f, _b, _l, _l, _a, NULL, NULL, NULL, NULL, true)
>  #define rom_add_file_mr(_f, _mr, _i)\
>  rom_add_file(_f, NULL, 0, _i, false, _mr, NULL)
> -#define rom_add_file_as(_f, _as, _i)\
> -rom_add_file(_f, NULL, 0, _i, false, NULL, _as)
>  #define rom_add_file_fixed_as(_f, _a, _i, _as)  \
>  rom_add_file(_f, NULL, _a, _i, false, NULL, _as)
>  #define rom_add_blob_fixed_as(_f, _b, _l, _a, _as)  \
> --
> 2.31.1
>
>

Re: [PATCH v3 05/15] target/ppc: PMU: add instruction counting

2021-09-21 Thread Daniel Henrique Barboza





On 9/6/21 22:57, David Gibson wrote:

On Fri, Sep 03, 2021 at 05:31:06PM -0300, Daniel Henrique Barboza wrote:

The PMU is already counting cycles by calculating time elapsed in
nanoseconds. Counting instructions is a different matter and requires
another approach.

This patch adds the capability of counting completed instructions
(Perf event PM_INST_CMPL) by counting the amount of instructions
translated in each translation block right before exiting it.

A new pmu_count_insns() helper in translation.c was added to do that.
After verifying that the PMU is running (MMCR0_FC bit not set), call
helper_insns_inc(). This new helper from power8_pmu.c will add the
instructions to the relevant counters. It'll also be responsible for
triggering counter negative overflows later on.

Signed-off-by: Daniel Henrique Barboza 
---
  target/ppc/cpu.h |  1 +
  target/ppc/helper.h  |  1 +
  target/ppc/helper_regs.c |  3 ++
  target/ppc/power8_pmu.c  | 70 
  target/ppc/translate.c   | 46 ++
  5 files changed, 114 insertions(+), 7 deletions(-)

diff --git a/target/ppc/cpu.h b/target/ppc/cpu.h
index 74698a3600..4d4886ac74 100644
--- a/target/ppc/cpu.h
+++ b/target/ppc/cpu.h
@@ -628,6 +628,7 @@ enum {
  HFLAGS_FP = 13,  /* MSR_FP */
  HFLAGS_PR = 14,  /* MSR_PR */
  HFLAGS_PMCCCLEAR = 15, /* PMU MMCR0 PMCC equal to 0b00 */
+HFLAGS_MMCR0FC = 16, /* MMCR0 FC bit */
  HFLAGS_VSX = 23, /* MSR_VSX if cpu has VSX */
  HFLAGS_VR = 25,  /* MSR_VR if cpu has VRE */
  
diff --git a/target/ppc/helper.h b/target/ppc/helper.h

index 5122632784..47dbbe6da1 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -21,6 +21,7 @@ DEF_HELPER_1(hrfid, void, env)
  DEF_HELPER_2(store_lpcr, void, env, tl)
  DEF_HELPER_2(store_pcr, void, env, tl)
  DEF_HELPER_2(store_mmcr0, void, env, tl)
+DEF_HELPER_2(insns_inc, void, env, i32)
  #endif
  DEF_HELPER_1(check_tlb_flush_local, void, env)
  DEF_HELPER_1(check_tlb_flush_global, void, env)
diff --git a/target/ppc/helper_regs.c b/target/ppc/helper_regs.c
index 4c1d9575ac..27d139edd8 100644
--- a/target/ppc/helper_regs.c
+++ b/target/ppc/helper_regs.c
@@ -109,6 +109,9 @@ static uint32_t hreg_compute_hflags_value(CPUPPCState *env)
  if (((env->spr[SPR_POWER_MMCR0] & MMCR0_PMCC) >> 18) == 0) {
  hflags |= 1 << HFLAGS_PMCCCLEAR;
  }
+if (env->spr[SPR_POWER_MMCR0] & MMCR0_FC) {
+hflags |= 1 << HFLAGS_MMCR0FC;
+}
  
  #ifndef CONFIG_USER_ONLY

  if (!env->has_hv_mode || (msr & (1ull << MSR_HV))) {
diff --git a/target/ppc/power8_pmu.c b/target/ppc/power8_pmu.c
index 3f7b305f4f..9769c0ff35 100644
--- a/target/ppc/power8_pmu.c
+++ b/target/ppc/power8_pmu.c
@@ -31,10 +31,13 @@ static void update_PMC_PM_CYC(CPUPPCState *env, int sprn,
  env->spr[sprn] += time_delta;
  }
  
-static void update_programmable_PMC_reg(CPUPPCState *env, int sprn,

-uint64_t time_delta)
+static uint8_t get_PMC_event(CPUPPCState *env, int sprn)


I like the idea of splitting out a helper to get the selected event
(might even make sense to move that to the earlier patch).  What would
be even nicer is if it also included handling of the fact that some
events are specific to particular PMCs (like 0xF0 for PMC1).  That
means that all the event selection logic will be here, rather than
having to check the PMC number again in the caller.  Obviously to do
that you'll need some special "bad event" return value, which might
mean changing the return type.


The initial idea of this function was to be a simple event extractor
returning 0 if MMCR1 is blank. In the end of the series there are 3 callers
of this function that will execute a specific action based on the event
returned by it.

I suppose that we can use the return value 0 as a 'bad value' based on the
PMC events we're going to support, but that will not prevent the callers
from doing a 'switch()' like logic, rechecking the PMC, to see which action
is supposed to be taken.

IIUC, your idea would require an additional layer of abstraction, e.g. a
PMCEvent object, that would tie together PMC + event. Then get_PMC_event()
would return a PMCEvent object and the caller wouldn't need to re-check the
PMC again.

I'll see how hard it would be to introduce this new concept in this existing
series. If it ends up being too much rework I'll suggest to do this in a
follow-up.



Thanks,


Daniel




  {
-uint8_t event, evt_extr;
+uint8_t evt_extr = 0;
+
+if (env->spr[SPR_POWER_MMCR1] == 0) {
+return 0;
+}
  
  switch (sprn) {

  case SPR_POWER_PMC1:
@@ -50,10 +53,16 @@ static void update_programmable_PMC_reg(CPUPPCState *env, 
int sprn,
  evt_extr = MMCR1_PMC4EVT_EXTR;
  break;
  default:
-return;
+return 0;
  }
  
-event = extract64(env->spr[SPR_POWER_MMCR1], evt_extr, MMCR1_EVT_SIZE);

+return extract64(env->spr[SPR_POWER_MMCR1], evt_extr

Change TCG cache size?

2021-09-21 Thread Kenneth Adam Miller

Hello all,

I just want to ask this one question: if I change the qemu tcg cache size (
TB_JMP_CACHE_SIZE), will that force any errors at run time?

[PATCH v2 27/30] tcg/loongarch64: Register the JIT

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 44 
 1 file changed, 44 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 240e40374a..d599de3b17 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1568,3 +1568,47 @@ static void tcg_target_init(TCGContext *s)
 tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
 tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
 }
+
+typedef struct {
+DebugFrameHeader h;
+uint8_t fde_def_cfa[4];
+uint8_t fde_reg_ofs[ARRAY_SIZE(tcg_target_callee_save_regs) * 2];
+} DebugFrame;
+
+#define ELF_HOST_MACHINE EM_LOONGARCH
+
+static const DebugFrame debug_frame = {
+.h.cie.len = sizeof(DebugFrameCIE) - 4, /* length after .len member */
+.h.cie.id = -1,
+.h.cie.version = 1,
+.h.cie.code_align = 1,
+.h.cie.data_align = -(TCG_TARGET_REG_BITS / 8) & 0x7f, /* sleb128 */
+.h.cie.return_column = TCG_REG_RA,
+
+/* Total FDE size does not include the "len" member.  */
+.h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset),
+
+.fde_def_cfa = {
+12, TCG_REG_SP, /* DW_CFA_def_cfa sp, ...  */
+(FRAME_SIZE & 0x7f) | 0x80, /* ... uleb128 FRAME_SIZE */
+(FRAME_SIZE >> 7)
+},
+.fde_reg_ofs = {
+0x80 + 23, 11,  /* DW_CFA_offset, s0, -88 */
+0x80 + 24, 10,  /* DW_CFA_offset, s1, -80 */
+0x80 + 25, 9,   /* DW_CFA_offset, s2, -72 */
+0x80 + 26, 8,   /* DW_CFA_offset, s3, -64 */
+0x80 + 27, 7,   /* DW_CFA_offset, s4, -56 */
+0x80 + 28, 6,   /* DW_CFA_offset, s5, -48 */
+0x80 + 29, 5,   /* DW_CFA_offset, s6, -40 */
+0x80 + 30, 4,   /* DW_CFA_offset, s7, -32 */
+0x80 + 31, 3,   /* DW_CFA_offset, s8, -24 */
+0x80 + 22, 2,   /* DW_CFA_offset, s9, -16 */
+0x80 + 1 , 1,   /* DW_CFA_offset, ra, -8 */
+}
+};
+
+void tcg_register_jit(const void *buf, size_t buf_size)
+{
+tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
+}
-- 
2.33.0

[PATCH v2 30/30] configure, meson.build: Mark support for loongarch64 hosts

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 configure   | 7 ++-
 meson.build | 4 +++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 1043ccce4f..3a9035385d 100755
--- a/configure
+++ b/configure
@@ -659,6 +659,8 @@ elif check_define __arm__ ; then
   cpu="arm"
 elif check_define __aarch64__ ; then
   cpu="aarch64"
+elif check_define __loongarch64 ; then
+  cpu="loongarch64"
 else
   cpu=$(uname -m)
 fi
@@ -667,7 +669,7 @@ ARCH=
 # Normalise host CPU name and set ARCH.
 # Note that this case should only have supported host CPUs, not guests.
 case "$cpu" in
-  ppc|ppc64|s390x|sparc64|x32|riscv32|riscv64)
+  ppc|ppc64|s390x|sparc64|x32|riscv32|riscv64|loongarch64)
   ;;
   ppc64le)
 ARCH="ppc64"
@@ -4969,6 +4971,9 @@ if test "$linux" = "yes" ; then
   aarch64)
 linux_arch=arm64
 ;;
+  loongarch*)
+linux_arch=loongarch
+;;
   mips64)
 linux_arch=mips
 ;;
diff --git a/meson.build b/meson.build
index 2711cbb789..10e527423a 100644
--- a/meson.build
+++ b/meson.build
@@ -57,7 +57,7 @@ python = import('python').find_installation()
 
 supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 
'sunos', 'linux']
 supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv32', 'riscv64', 'x86', 
'x86_64',
-  'arm', 'aarch64', 'mips', 'mips64', 'sparc', 'sparc64']
+  'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc', 'sparc64']
 
 cpu = host_machine.cpu_family()
 targetos = host_machine.system()
@@ -269,6 +269,8 @@ if not get_option('tcg').disabled()
 tcg_arch = 's390'
   elif config_host['ARCH'] in ['x86_64', 'x32']
 tcg_arch = 'i386'
+  elif config_host['ARCH'] == 'loongarch64'
+tcg_arch = 'loongarch64'
   elif config_host['ARCH'] == 'ppc64'
 tcg_arch = 'ppc'
   elif config_host['ARCH'] in ['riscv32', 'riscv64']
-- 
2.33.0

Re: [PULL v2 00/21] riscv-to-apply queue

2021-09-21 Thread Richard Henderson


On 9/20/21 11:53 PM, Alistair Francis wrote:

From: Alistair Francis 

The following changes since commit 326ff8dd09556fc2e257196c49f35009700794ac:

   Merge remote-tracking branch 'remotes/jasowang/tags/net-pull-request' into 
staging (2021-09-20 16:17:05 +0100)

are available in the Git repository at:

   g...@github.com:alistair23/qemu.git tags/pull-riscv-to-apply-20210921

for you to fetch changes up to ed481d9837250aa682f5156528bc923e1b214f76:

   hw/riscv: opentitan: Correct the USB Dev address (2021-09-21 12:10:47 +1000)


Second RISC-V PR for QEMU 6.2

  - ePMP CSR address updates
  - Convert internal interrupts to use QEMU GPIO lines
  - SiFive PWM support
  - Support for RISC-V ACLINT
  - SiFive PDMA fixes
  - Update to u-boot instructions for sifive_u
  - mstatus.SD bug fix for hypervisor extensions
  - OpenTitan fix for USB dev address


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/6.2
for any user-visible changes.

r~

Re: plugins: Missing Store Exclusive Memory Accesses

2021-09-21 Thread Aaron Lindsay via

On Sep 17 12:05, Alex Bennée wrote:
> Aaron Lindsay  writes:
> > I recently noticed that the plugin interface does not appear to be
> > emitting callbacks to functions registered via
> > `qemu_plugin_register_vcpu_mem_cb` for AArch64 store exclusives. This
> > would include instructions like `stxp  w16, x2, x3, [x4]` (encoding:
> > 0xc8300c82). Seeing as how I'm only running with a single CPU, I don't
> > see how this could be due to losing exclusivity after the preceding
> > `ldxp`.
> 
> The exclusive handling is a bit special due to the need to emulate it's
> behaviour using cmpxchg primitives.
> 
> >
> > In looking at QEMU's source, I *think* this is because the
> > `gen_store_exclusive` function in translate-a64.c is not making the same
> > calls to `plugin_gen_mem_callbacks` & company that are being made by
> > "normal" stores handled by functions like `tcg_gen_qemu_st_i64` (at
> > least in my case; I do see some code paths under `gen_store_exclusive`
> > call down into `tcg_gen_qemu_st_i64` eventually, but it appears not all
> > of them do?).
> 
> The key TCG operation is the cmpxchg which does the effective store. For
> -smp 1 we should use normal ld and st tcg ops. For > 1 it eventually
> falls to tcg_gen_atomic_cmpxchg_XX which is a helper. That eventually
> ends up at:
> 
>   atomic_trace_rmw_post
> 
> which should be where things are hooked.

When I open this up in gdb, I see that I'm getting the following call
graph for the `stxp` instruction in question (for -smp 1):

gen_store_exclusive -> gen_helper_paired_cmpxchg64_le

In other words, I'm taking the `s->be_data == MO_LE` else/if clause.

I do not see where the helper behind that (defined in helper-a64.c as
`uint64_t HELPER(paired_cmpxchg64_le)...`) is calling in to generate
plugin callbacks in this case. Am I missing something?

-Aaron

[PATCH v2 17/30] tcg/loongarch64: Implement add/sub ops

2021-09-21 Thread WANG Xuerui

The neg_i{32,64} ops is fully expressible with sub, so omitted for
simplicity.

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target-con-set.h |  2 ++
 tcg/loongarch64/tcg-target.c.inc | 38 
 2 files changed, 40 insertions(+)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index 42f8e28741..4b8ce85897 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -18,6 +18,8 @@ C_O0_I1(r)
 C_O1_I1(r, r)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, ri)
+C_O1_I2(r, r, rI)
 C_O1_I2(r, r, rU)
 C_O1_I2(r, r, rW)
 C_O1_I2(r, 0, rZ)
+C_O1_I2(r, rZ, rN)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index f06c61ee2b..e1b333120d 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -640,6 +640,36 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 }
 break;
 
+case INDEX_op_add_i32:
+if (c2) {
+tcg_out_opc_addi_w(s, a0, a1, a2);
+} else {
+tcg_out_opc_add_w(s, a0, a1, a2);
+}
+break;
+case INDEX_op_add_i64:
+if (c2) {
+tcg_out_opc_addi_d(s, a0, a1, a2);
+} else {
+tcg_out_opc_add_d(s, a0, a1, a2);
+}
+break;
+
+case INDEX_op_sub_i32:
+if (c2) {
+tcg_out_opc_addi_w(s, a0, a1, -a2);
+} else {
+tcg_out_opc_sub_w(s, a0, a1, a2);
+}
+break;
+case INDEX_op_sub_i64:
+if (c2) {
+tcg_out_opc_addi_d(s, a0, a1, -a2);
+} else {
+tcg_out_opc_sub_d(s, a0, a1, a2);
+}
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 default:
@@ -701,6 +731,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_rotr_i64:
 return C_O1_I2(r, r, ri);
 
+case INDEX_op_add_i32:
+case INDEX_op_add_i64:
+return C_O1_I2(r, r, rI);
+
 case INDEX_op_and_i32:
 case INDEX_op_and_i64:
 case INDEX_op_nor_i32:
@@ -723,6 +757,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 /* Must deposit into the same register as input */
 return C_O1_I2(r, 0, rZ);
 
+case INDEX_op_sub_i32:
+case INDEX_op_sub_i64:
+return C_O1_I2(r, rZ, rN);
+
 default:
 g_assert_not_reached();
 }
-- 
2.33.0

Re: [PATCH v2 24/30] linux-user/openrisc: Use force_sig_fault, force_sigsegv_for_addr

2021-09-21 Thread Stafford Horne

On Sun, Sep 19, 2021 at 10:49:26AM -0700, Richard Henderson wrote:
> On 8/24/21 10:17 AM, Peter Maydell wrote:
> > I think that EXCP_RANGE should for us be unreachable in user-only
> > mode (because it can only happen if the relevant bits in SR are
> > set, and SR is writeable only in supervisor mode, and its starting
> > value doesn't set these bits). So we could just delete the EXCP_RANGE
> > handling and let it hit the default g_assert_not_reached() case.
> 
> If I also disable the SR case from gdbstub.
> 
> > EXCP_FPE is more tricky -- this happens for FP exceptions, where
> > the enabling bit is in the FPCSR, which does appear to be writeable
> > from user mode. So either:
> >   * our mtspr is wrong and should either be not allowing writes
> > to FPCSR in usermode (or at least sanitizing them)
> >   * the Linux kernel for openrisc is wrong, because a userspace
> > program that sets FPCSR.FPEE can make it run into unhandled_exception()
> > and die(), and it should be doing something else, like delivering
> > a suitable SIGFPE
> 
> I believe the kernel to be buggy.  But it also point to the fact that no one
> has written fenv.h for or1k for musl, so no one has tried to use those bits.

Hi,

*On User Accessible FPCSR*

As per the spec FPCSR should not be accessible in user space. But...

I am currently working on the OpenRISC port for glibc, and at first I was
planning for FPU support but this was one thing that slowed me down.

For that reason I proposed an architecture change to allow setting fpcsr in user
space, it seems that is allowed by almost all other architectures:

 https://openrisc.io/proposals/p17-user-mode-fpcsr

I think I could also simulate it in the kernel by catching the mtspr failure and
then performing it on behalf of the user if its for MTSPR.

At the moment I am going with softfpu until I can spend time on sorting out the
FPCSR issue.

*On QEMU*

When I started to develop the glibc FPU code, I put a patch into qemu to allow
for using mtspr and mfspr in user space:

 branch:
  https://github.com/stffrdhrn/qemu/commits/or1k-glibc

 commit:

https://github.com/stffrdhrn/qemu/commit/dfa5331bf43f71535847c585a6b3f5779a422b13

User space access it not allowed as per trans_l_mfspr, trans_l_mfspr.  I did not
post this upstream as it's not as per spec.

I hope it helps a bit.

-Stafford

[PATCH v2 15/30] tcg/loongarch64: Implement clz/ctz ops

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target-con-set.h |  1 +
 tcg/loongarch64/tcg-target.c.inc | 42 
 2 files changed, 43 insertions(+)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index d958183020..2975e03127 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -18,4 +18,5 @@ C_O0_I1(r)
 C_O1_I1(r, r)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, rU)
+C_O1_I2(r, r, rW)
 C_O1_I2(r, 0, rZ)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index e7b5f2c5ab..65545f7636 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -364,6 +364,28 @@ static void tcg_out_ext32s(TCGContext *s, TCGReg ret, 
TCGReg arg)
 tcg_out_opc_addi_w(s, ret, arg, 0);
 }
 
+static void tcg_out_clzctz(TCGContext *s, LoongArchInsn opc,
+   TCGReg a0, TCGReg a1, TCGReg a2,
+   bool c2, bool is_32bit)
+{
+if (c2) {
+/*
+ * Fast path: semantics already satisfied due to constraint and
+ * insn behavior, single instruction is enough.
+ */
+tcg_debug_assert(a2 == (is_32bit ? 32 : 64));
+/* all clz/ctz insns belong to DJ-format */
+tcg_out32(s, encode_dj_insn(opc, a0, a1));
+return;
+}
+
+tcg_out32(s, encode_dj_insn(opc, TCG_REG_TMP0, a1));
+/* a0 = a1 ? REG_TMP0 : a2 */
+tcg_out_opc_maskeqz(s, TCG_REG_TMP0, TCG_REG_TMP0, a1);
+tcg_out_opc_masknez(s, a0, a2, a1);
+tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
+}
+
 /*
  * Entry-points
  */
@@ -525,6 +547,20 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_opc_revb_d(s, a0, a1);
 break;
 
+case INDEX_op_clz_i32:
+tcg_out_clzctz(s, OPC_CLZ_W, a0, a1, a2, c2, true);
+break;
+case INDEX_op_clz_i64:
+tcg_out_clzctz(s, OPC_CLZ_D, a0, a1, a2, c2, false);
+break;
+
+case INDEX_op_ctz_i32:
+tcg_out_clzctz(s, OPC_CTZ_W, a0, a1, a2, c2, true);
+break;
+case INDEX_op_ctz_i64:
+tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false);
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 default:
@@ -585,6 +621,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 /* LoongArch reg-imm bitops have their imms ZERO-extended */
 return C_O1_I2(r, r, rU);
 
+case INDEX_op_clz_i32:
+case INDEX_op_clz_i64:
+case INDEX_op_ctz_i32:
+case INDEX_op_ctz_i64:
+return C_O1_I2(r, r, rW);
+
 case INDEX_op_deposit_i32:
 case INDEX_op_deposit_i64:
 /* Must deposit into the same register as input */
-- 
2.33.0

[PATCH v2 28/30] linux-user: Add safe syscall handling for loongarch64 hosts

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 linux-user/host/loongarch64/hostdep.h | 34 
 .../host/loongarch64/safe-syscall.inc.S   | 80 +++
 2 files changed, 114 insertions(+)
 create mode 100644 linux-user/host/loongarch64/hostdep.h
 create mode 100644 linux-user/host/loongarch64/safe-syscall.inc.S

diff --git a/linux-user/host/loongarch64/hostdep.h 
b/linux-user/host/loongarch64/hostdep.h
new file mode 100644
index 00..e3d5fa703f
--- /dev/null
+++ b/linux-user/host/loongarch64/hostdep.h
@@ -0,0 +1,34 @@
+/*
+ * hostdep.h : things which are dependent on the host architecture
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef LOONGARCH64_HOSTDEP_H
+#define LOONGARCH64_HOSTDEP_H
+
+/* We have a safe-syscall.inc.S */
+#define HAVE_SAFE_SYSCALL
+
+#ifndef __ASSEMBLER__
+
+/* These are defined by the safe-syscall.inc.S file */
+extern char safe_syscall_start[];
+extern char safe_syscall_end[];
+
+/* Adjust the signal context to rewind out of safe-syscall if we're in it */
+static inline void rewind_if_in_safe_syscall(void *puc)
+{
+ucontext_t *uc = puc;
+unsigned long long *pcreg = &uc->uc_mcontext.__pc;
+
+if (*pcreg > (uintptr_t)safe_syscall_start
+&& *pcreg < (uintptr_t)safe_syscall_end) {
+*pcreg = (uintptr_t)safe_syscall_start;
+}
+}
+
+#endif /* __ASSEMBLER__ */
+
+#endif
diff --git a/linux-user/host/loongarch64/safe-syscall.inc.S 
b/linux-user/host/loongarch64/safe-syscall.inc.S
new file mode 100644
index 00..bb530248b3
--- /dev/null
+++ b/linux-user/host/loongarch64/safe-syscall.inc.S
@@ -0,0 +1,80 @@
+/*
+ * safe-syscall.inc.S : host-specific assembly fragment
+ * to handle signals occurring at the same time as system calls.
+ * This is intended to be included by linux-user/safe-syscall.S
+ *
+ * Ported to LoongArch by WANG Xuerui 
+ *
+ * Based on safe-syscall.inc.S code for every other architecture,
+ * originally written by Richard Henderson 
+ * Copyright (C) 2018 Linaro, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+   .global safe_syscall_base
+   .global safe_syscall_start
+   .global safe_syscall_end
+   .type   safe_syscall_base, @function
+   .type   safe_syscall_start, @function
+   .type   safe_syscall_end, @function
+
+   /*
+* This is the entry point for making a system call. The calling
+* convention here is that of a C varargs function with the
+* first argument an 'int *' to the signal_pending flag, the
+* second one the system call number (as a 'long'), and all further
+* arguments being syscall arguments (also 'long').
+* We return a long which is the syscall's return value, which
+* may be negative-errno on failure. Conversion to the
+* -1-and-errno-set convention is done by the calling wrapper.
+*/
+safe_syscall_base:
+   .cfi_startproc
+   /*
+* The syscall calling convention is nearly the same as C:
+* we enter with a0 == *signal_pending
+*   a1 == syscall number
+*   a2 ... a7 == syscall arguments
+*   and return the result in a0
+* and the syscall instruction needs
+*   a7 == syscall number
+*   a0 ... a5 == syscall arguments
+*   and returns the result in a0
+* Shuffle everything around appropriately.
+*/
+   move$t0, $a0/* signal_pending pointer */
+   move$t1, $a1/* syscall number */
+   move$a0, $a2/* syscall arguments */
+   move$a1, $a3
+   move$a2, $a4
+   move$a3, $a5
+   move$a4, $a6
+   move$a5, $a7
+   move$a7, $t1
+
+   /*
+* This next sequence of code works in conjunction with the
+* rewind_if_safe_syscall_function(). If a signal is taken
+* and the interrupted PC is anywhere between 'safe_syscall_start'
+* and 'safe_syscall_end' then we rewind it to 'safe_syscall_start'.
+* The code sequence must therefore be able to cope with this, and
+* the syscall instruction must be the final one in the sequence.
+*/
+safe_syscall_start:
+   /* If signal_pending is non-zero, don't do the call */
+   ld.w$t1, $t0, 0
+   bnez$t1, 0f
+   syscall 0
+safe_syscall_end:
+   /* code path for having successfully executed the syscall */
+   jr  $ra
+
+0:
+   /* code path when we didn't execute the syscall */
+   li.w$a0, -TARGET_ERESTARTSYS
+   jr  $ra
+   .cfi_endproc
+
+   .size   safe_syscall_base, .-safe_syscall_base
-- 
2.33.0

[PATCH v2 29/30] accel/tcg/user-exec: Implement CPU-specific signal handler for loongarch64 hosts

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 accel/tcg/user-exec.c | 78 +++
 1 file changed, 78 insertions(+)

diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 8fed542622..87660903b2 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -878,6 +878,84 @@ int cpu_signal_handler(int host_signum, void *pinfo,
 return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
 }
 
+#elif defined(__loongarch64)
+
+int cpu_signal_handler(int host_signum, void *pinfo,
+   void *puc)
+{
+siginfo_t *info = pinfo;
+ucontext_t *uc = puc;
+greg_t pc = uc->uc_mcontext.__pc;
+uint32_t insn = *(uint32_t *)pc;
+int is_write = 0;
+
+/* Detect store by reading the instruction at the program counter.  */
+switch ((insn >> 26) & 0b11) {
+case 0b001000: /* {ll,sc}.[wd] */
+switch ((insn >> 24) & 0b11) {
+case 0b01: /* sc.w */
+case 0b11: /* sc.d */
+is_write = 1;
+break;
+}
+break;
+case 0b001001: /* {ld,st}ox4.[wd] ({ld,st}ptr.[wd]) */
+switch ((insn >> 24) & 0b11) {
+case 0b01: /* stox4.w (stptr.w) */
+case 0b11: /* stox4.d (stptr.d) */
+is_write = 1;
+break;
+}
+break;
+case 0b001010: /* {ld,st}.* family */
+switch ((insn >> 22) & 0b) {
+case 0b0100: /* st.b */
+case 0b0101: /* st.h */
+case 0b0110: /* st.w */
+case 0b0111: /* st.d */
+case 0b1101: /* fst.s */
+case 0b: /* fst.d */
+is_write = 1;
+break;
+}
+break;
+case 0b001110: /* indexed, atomic, bounds-checking memory operations */
+uint32_t sel = (insn >> 15) & 0b111;
+
+switch (sel) {
+case 0b010: /* stx.b */
+case 0b0101000: /* stx.h */
+case 0b011: /* stx.w */
+case 0b0111000: /* stx.d */
+case 0b111: /* fstx.s */
+case 0b000: /* fstx.d */
+case 0b00011101100: /* fstgt.s */
+case 0b00011101101: /* fstgt.d */
+case 0b00011101110: /* fstle.s */
+case 0b0001110: /* fstle.d */
+case 0b0001000: /* stgt.b */
+case 0b0001001: /* stgt.h */
+case 0b0001010: /* stgt.w */
+case 0b0001011: /* stgt.d */
+case 0b0001100: /* stle.b */
+case 0b0001101: /* stle.h */
+case 0b0001110: /* stle.w */
+case 0b000: /* stle.d */
+is_write = 1;
+break;
+default:
+/* test for am* instruction range */
+if (0b0001100 <= sel && sel <= 0b00011100011) {
+is_write = 1;
+}
+break;
+}
+break;
+}
+
+return handle_cpu_signal(pc, info, is_write, &uc->uc_sigmask);
+}
+
 #else
 
 #error host CPU specific signal handler needed
-- 
2.33.0

[PATCH v2 13/30] tcg/loongarch64: Implement deposit/extract ops

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target-con-set.h |  1 +
 tcg/loongarch64/tcg-target.c.inc | 21 +
 2 files changed, 22 insertions(+)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index 9ac24b8ad0..d958183020 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -18,3 +18,4 @@ C_O0_I1(r)
 C_O1_I1(r, r)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, rU)
+C_O1_I2(r, 0, rZ)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 89fdb6d7c3..27066960cf 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -494,6 +494,20 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 }
 break;
 
+case INDEX_op_extract_i32:
+tcg_out_opc_bstrpick_w(s, a0, a1, a2, a2 + args[3] - 1);
+break;
+case INDEX_op_extract_i64:
+tcg_out_opc_bstrpick_d(s, a0, a1, a2, a2 + args[3] - 1);
+break;
+
+case INDEX_op_deposit_i32:
+tcg_out_opc_bstrins_w(s, a0, a2, args[3], args[3] + args[4] - 1);
+break;
+case INDEX_op_deposit_i64:
+tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1);
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 default:
@@ -523,6 +537,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 case INDEX_op_ext_i32_i64:
 case INDEX_op_not_i32:
 case INDEX_op_not_i64:
+case INDEX_op_extract_i32:
+case INDEX_op_extract_i64:
 return C_O1_I1(r, r);
 
 case INDEX_op_andc_i32:
@@ -549,6 +565,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 /* LoongArch reg-imm bitops have their imms ZERO-extended */
 return C_O1_I2(r, r, rU);
 
+case INDEX_op_deposit_i32:
+case INDEX_op_deposit_i64:
+/* Must deposit into the same register as input */
+return C_O1_I2(r, 0, rZ);
+
 default:
 g_assert_not_reached();
 }
-- 
2.33.0

[PATCH v2 26/30] tcg/loongarch64: Implement tcg_target_init

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 9cdb283942..240e40374a 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -1541,3 +1541,30 @@ static void tcg_target_qemu_prologue(TCGContext *s)
 tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
 tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0);
 }
+
+static void tcg_target_init(TCGContext *s)
+{
+tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS;
+tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS;
+
+tcg_target_call_clobber_regs = ALL_GENERAL_REGS;
+tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S0);
+tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S1);
+tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S2);
+tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S3);
+tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S4);
+tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S5);
+tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S6);
+tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S7);
+tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S8);
+tcg_regset_reset_reg(tcg_target_call_clobber_regs, TCG_REG_S9);
+
+s->reserved_regs = 0;
+tcg_regset_set_reg(s->reserved_regs, TCG_REG_ZERO);
+tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP0);
+tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP1);
+tcg_regset_set_reg(s->reserved_regs, TCG_REG_TMP2);
+tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);
+tcg_regset_set_reg(s->reserved_regs, TCG_REG_TP);
+tcg_regset_set_reg(s->reserved_regs, TCG_REG_RESERVED);
+}
-- 
2.33.0

[PATCH v2 24/30] tcg/loongarch64: Implement tcg_target_qemu_prologue

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 68 
 1 file changed, 68 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index bbb6b7f47d..0c413c4c95 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -906,6 +906,8 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args)
  * Entry-points
  */
 
+static const tcg_insn_unit *tb_ret_addr;
+
 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg args[TCG_MAX_OP_ARGS],
const int const_args[TCG_MAX_OP_ARGS])
@@ -1454,3 +1456,69 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 g_assert_not_reached();
 }
 }
+
+static const int tcg_target_callee_save_regs[] = {
+TCG_REG_S0, /* used for the global env (TCG_AREG0) */
+TCG_REG_S1,
+TCG_REG_S2,
+TCG_REG_S3,
+TCG_REG_S4,
+TCG_REG_S5,
+TCG_REG_S6,
+TCG_REG_S7,
+TCG_REG_S8,
+TCG_REG_S9,
+TCG_REG_RA, /* should be last for ABI compliance */
+};
+
+/* Stack frame parameters.  */
+#define REG_SIZE   (TCG_TARGET_REG_BITS / 8)
+#define SAVE_SIZE  ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * REG_SIZE)
+#define TEMP_SIZE  (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
+#define FRAME_SIZE ((TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE + SAVE_SIZE \
+ + TCG_TARGET_STACK_ALIGN - 1) \
+& -TCG_TARGET_STACK_ALIGN)
+#define SAVE_OFS   (TCG_STATIC_CALL_ARGS_SIZE + TEMP_SIZE)
+
+/* We're expecting to be able to use an immediate for frame allocation.  */
+QEMU_BUILD_BUG_ON(FRAME_SIZE > 0x7ff);
+
+/* Generate global QEMU prologue and epilogue code */
+static void tcg_target_qemu_prologue(TCGContext *s)
+{
+int i;
+
+tcg_set_frame(s, TCG_REG_SP, TCG_STATIC_CALL_ARGS_SIZE, TEMP_SIZE);
+
+/* TB prologue */
+tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, -FRAME_SIZE);
+for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+tcg_out_st(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+}
+
+#if !defined(CONFIG_SOFTMMU)
+if (USE_GUEST_BASE) {
+tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base);
+tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+}
+#endif
+
+/* Call generated code */
+tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
+tcg_out_opc_jirl(s, TCG_REG_ZERO, tcg_target_call_iarg_regs[1], 0);
+
+/* Return path for goto_ptr. Set return value to 0 */
+tcg_code_gen_epilogue = tcg_splitwx_to_rx(s->code_ptr);
+tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_A0, TCG_REG_ZERO);
+
+/* TB epilogue */
+tb_ret_addr = tcg_splitwx_to_rx(s->code_ptr);
+for (i = 0; i < ARRAY_SIZE(tcg_target_callee_save_regs); i++) {
+tcg_out_ld(s, TCG_TYPE_REG, tcg_target_callee_save_regs[i],
+   TCG_REG_SP, SAVE_OFS + i * REG_SIZE);
+}
+
+tcg_out_opc_addi_d(s, TCG_REG_SP, TCG_REG_SP, FRAME_SIZE);
+tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_RA, 0);
+}
-- 
2.33.0

[PATCH v2 11/30] tcg/loongarch64: Implement sign-/zero-extension ops

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target-con-set.h |  1 +
 tcg/loongarch64/tcg-target.c.inc | 82 
 2 files changed, 83 insertions(+)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index 5cc4407367..7e459490ea 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -15,3 +15,4 @@
  * tcg-target-con-str.h; the constraint combination is inclusive or.
  */
 C_O0_I1(r)
+C_O1_I1(r, r)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index e789a592c6..e000a31a06 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -334,6 +334,36 @@ static void tcg_out_movi(TCGContext *s, TCGType type, 
TCGReg rd,
 tcg_out_opc_cu52i_d(s, rd, rd, top);
 }
 
+static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+tcg_out_opc_andi(s, ret, arg, 0xff);
+}
+
+static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+tcg_out_opc_bstrpick_w(s, ret, arg, 0, 15);
+}
+
+static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+tcg_out_opc_bstrpick_d(s, ret, arg, 0, 31);
+}
+
+static void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+tcg_out_opc_sext_b(s, ret, arg);
+}
+
+static void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+tcg_out_opc_sext_h(s, ret, arg);
+}
+
+static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg)
+{
+tcg_out_opc_addi_w(s, ret, arg, 0);
+}
+
 /*
  * Entry-points
  */
@@ -343,6 +373,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
const int const_args[TCG_MAX_OP_ARGS])
 {
 TCGArg a0 = args[0];
+TCGArg a1 = args[1];
 
 switch (opc) {
 case INDEX_op_mb:
@@ -353,6 +384,41 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0);
 break;
 
+case INDEX_op_ext8s_i32:
+case INDEX_op_ext8s_i64:
+tcg_out_ext8s(s, a0, a1);
+break;
+
+case INDEX_op_ext8u_i32:
+case INDEX_op_ext8u_i64:
+tcg_out_ext8u(s, a0, a1);
+break;
+
+case INDEX_op_ext16s_i32:
+case INDEX_op_ext16s_i64:
+tcg_out_ext16s(s, a0, a1);
+break;
+
+case INDEX_op_ext16u_i32:
+case INDEX_op_ext16u_i64:
+tcg_out_ext16u(s, a0, a1);
+break;
+
+case INDEX_op_ext32u_i64:
+case INDEX_op_extu_i32_i64:
+tcg_out_ext32u(s, a0, a1);
+break;
+
+case INDEX_op_ext32s_i64:
+case INDEX_op_extrl_i64_i32:
+case INDEX_op_ext_i32_i64:
+tcg_out_ext32s(s, a0, a1);
+break;
+
+case INDEX_op_extrh_i64_i32:
+tcg_out_opc_srai_d(s, a0, a1, 32);
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 default:
@@ -366,6 +432,22 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_goto_ptr:
 return C_O0_I1(r);
 
+case INDEX_op_ext8s_i32:
+case INDEX_op_ext8s_i64:
+case INDEX_op_ext8u_i32:
+case INDEX_op_ext8u_i64:
+case INDEX_op_ext16s_i32:
+case INDEX_op_ext16s_i64:
+case INDEX_op_ext16u_i32:
+case INDEX_op_ext16u_i64:
+case INDEX_op_ext32s_i64:
+case INDEX_op_ext32u_i64:
+case INDEX_op_extu_i32_i64:
+case INDEX_op_extrl_i64_i32:
+case INDEX_op_extrh_i64_i32:
+case INDEX_op_ext_i32_i64:
+return C_O1_I1(r, r);
+
 default:
 g_assert_not_reached();
 }
-- 
2.33.0

[PATCH v2 25/30] tcg/loongarch64: Implement exit_tb/goto_tb

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 0c413c4c95..9cdb283942 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -918,6 +918,25 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 int c2 = const_args[2];
 
 switch (opc) {
+case INDEX_op_exit_tb:
+/* Reuse the zeroing that exists for goto_ptr.  */
+if (a0 == 0) {
+tcg_out_call_int(s, tcg_code_gen_epilogue, true);
+} else {
+tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0);
+tcg_out_call_int(s, tb_ret_addr, true);
+}
+break;
+
+case INDEX_op_goto_tb:
+assert(s->tb_jmp_insn_offset == 0);
+/* indirect jump method */
+tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO,
+   (uintptr_t)(s->tb_jmp_target_addr + a0));
+tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0);
+set_jmp_reset_offset(s, a0);
+break;
+
 case INDEX_op_mb:
 tcg_out_mb(s, a0);
 break;
-- 
2.33.0

[PATCH v2 20/30] tcg/loongarch64: Implement setcond ops

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target.c.inc | 74 
 1 file changed, 74 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index b0600a3dbd..08e6541dcf 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -386,6 +386,73 @@ static void tcg_out_clzctz(TCGContext *s, LoongArchInsn 
opc,
 tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
 }
 
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+TCGReg arg1, TCGReg arg2, bool c1, bool c2)
+{
+TCGReg tmp;
+
+if (c1) {
+tcg_debug_assert(arg1 == 0);
+}
+if (c2) {
+tcg_debug_assert(arg2 == 0);
+}
+
+switch (cond) {
+case TCG_COND_EQ:
+if (c1) {
+tmp = arg2;
+} else if (c2) {
+tmp = arg1;
+} else {
+tcg_out_opc_sub_d(s, ret, arg1, arg2);
+tmp = ret;
+}
+tcg_out_opc_sltui(s, ret, tmp, 1);
+break;
+case TCG_COND_NE:
+if (c1) {
+tmp = arg2;
+} else if (c2) {
+tmp = arg1;
+} else {
+tcg_out_opc_sub_d(s, ret, arg1, arg2);
+tmp = ret;
+}
+tcg_out_opc_sltu(s, ret, TCG_REG_ZERO, tmp);
+break;
+case TCG_COND_LT:
+tcg_out_opc_slt(s, ret, arg1, arg2);
+break;
+case TCG_COND_GE:
+tcg_out_opc_slt(s, ret, arg1, arg2);
+tcg_out_opc_xori(s, ret, ret, 1);
+break;
+case TCG_COND_LE:
+tcg_out_setcond(s, TCG_COND_GE, ret, arg2, arg1, c2, c1);
+break;
+case TCG_COND_GT:
+tcg_out_setcond(s, TCG_COND_LT, ret, arg2, arg1, c2, c1);
+break;
+case TCG_COND_LTU:
+tcg_out_opc_sltu(s, ret, arg1, arg2);
+break;
+case TCG_COND_GEU:
+tcg_out_opc_sltu(s, ret, arg1, arg2);
+tcg_out_opc_xori(s, ret, ret, 1);
+break;
+case TCG_COND_LEU:
+tcg_out_setcond(s, TCG_COND_GEU, ret, arg2, arg1, c2, c1);
+break;
+case TCG_COND_GTU:
+tcg_out_setcond(s, TCG_COND_LTU, ret, arg2, arg1, c2, c1);
+break;
+default:
+g_assert_not_reached();
+break;
+}
+}
+
 /*
  * Branch helpers
  */
@@ -768,6 +835,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_opc_mod_du(s, a0, a1, a2);
 break;
 
+case INDEX_op_setcond_i32:
+case INDEX_op_setcond_i64:
+tcg_out_setcond(s, args[3], a0, a1, a2, const_args[1], c2);
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 default:
@@ -877,6 +949,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 case INDEX_op_rem_i64:
 case INDEX_op_remu_i32:
 case INDEX_op_remu_i64:
+case INDEX_op_setcond_i32:
+case INDEX_op_setcond_i64:
 return C_O1_I2(r, rZ, rZ);
 
 default:
-- 
2.33.0

[PATCH v2 23/30] tcg/loongarch64: Add softmmu load/store helpers, implement qemu_ld/qemu_st ops

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target-con-set.h |   2 +
 tcg/loongarch64/tcg-target.c.inc | 332 +++
 2 files changed, 334 insertions(+)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index 3ab0416d9f..8fd3a2f4a1 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -17,7 +17,9 @@
 C_O0_I1(r)
 C_O0_I2(rZ, r)
 C_O0_I2(rZ, rZ)
+C_O0_I2(LZ, L)
 C_O1_I1(r, r)
+C_O1_I1(r, L)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rI)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 18b2473d9c..bbb6b7f47d 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -117,6 +117,11 @@ static const int tcg_target_call_oarg_regs[] = {
 TCG_REG_A1,
 };
 
+#ifndef CONFIG_SOFTMMU
+#define USE_GUEST_BASE (guest_base != 0)
+#define TCG_GUEST_BASE_REG TCG_REG_S1
+#endif
+
 #define TCG_CT_CONST_ZERO  0x100
 #define TCG_CT_CONST_S12   0x200
 #define TCG_CT_CONST_N12   0x400
@@ -591,6 +596,312 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, 
TCGArg val,
 return false;
 }
 
+/*
+ * Load/store helpers for SoftMMU, and qemu_ld/st implementations
+ */
+
+#if defined(CONFIG_SOFTMMU)
+#include "../tcg-ldst.c.inc"
+
+/*
+ * helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
+ * TCGMemOpIdx oi, uintptr_t ra)
+ */
+static void * const qemu_ld_helpers[4] = {
+[MO_8]  = helper_ret_ldub_mmu,
+[MO_16] = helper_le_lduw_mmu,
+[MO_32] = helper_le_ldul_mmu,
+[MO_64] = helper_le_ldq_mmu,
+};
+
+/*
+ * helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
+ * uintxx_t val, TCGMemOpIdx oi,
+ * uintptr_t ra)
+ */
+static void * const qemu_st_helpers[4] = {
+[MO_8]  = helper_ret_stb_mmu,
+[MO_16] = helper_le_stw_mmu,
+[MO_32] = helper_le_stl_mmu,
+[MO_64] = helper_le_stq_mmu,
+};
+
+/* We expect to use a 12-bit negative offset from ENV.  */
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -(1 << 11));
+
+static bool tcg_out_goto(TCGContext *s, const tcg_insn_unit *target)
+{
+tcg_out_opc_b(s, 0);
+return reloc_br_sd10k16(s->code_ptr - 1, target);
+}
+
+static void tcg_out_tlb_load(TCGContext *s, TCGReg addrl, TCGMemOpIdx oi,
+ tcg_insn_unit **label_ptr, bool is_load)
+{
+MemOp opc = get_memop(oi);
+unsigned s_bits = opc & MO_SIZE;
+unsigned a_bits = get_alignment_bits(opc);
+tcg_target_long compare_mask;
+int mem_index = get_mmuidx(oi);
+int fast_ofs = TLB_MASK_TABLE_OFS(mem_index);
+int mask_ofs = fast_ofs + offsetof(CPUTLBDescFast, mask);
+int table_ofs = fast_ofs + offsetof(CPUTLBDescFast, table);
+
+tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_AREG0, mask_ofs);
+tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_AREG0, table_ofs);
+
+tcg_out_opc_srli_d(s, TCG_REG_TMP2, addrl,
+TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+tcg_out_opc_and(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP0);
+tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, TCG_REG_TMP1);
+
+/* Load the tlb comparator and the addend.  */
+tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP0, TCG_REG_TMP2,
+   is_load ? offsetof(CPUTLBEntry, addr_read)
+   : offsetof(CPUTLBEntry, addr_write));
+tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP2, TCG_REG_TMP2,
+   offsetof(CPUTLBEntry, addend));
+
+/* We don't support unaligned accesses.  */
+if (a_bits < s_bits) {
+a_bits = s_bits;
+}
+/* Clear the non-page, non-alignment bits from the address.  */
+compare_mask = (tcg_target_long)TARGET_PAGE_MASK | ((1 << a_bits) - 1);
+tcg_out_movi(s, TCG_TYPE_TL, TCG_REG_TMP1, compare_mask);
+tcg_out_opc_and(s, TCG_REG_TMP1, TCG_REG_TMP1, addrl);
+
+/* Compare masked address with the TLB entry.  */
+label_ptr[0] = s->code_ptr;
+tcg_out_opc_bne(s, TCG_REG_TMP0, TCG_REG_TMP1, 0);
+
+/* TLB Hit - translate address using addend.  */
+tcg_out_opc_add_d(s, TCG_REG_TMP0, TCG_REG_TMP2, addrl);
+}
+
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOpIdx oi,
+TCGReg datalo, TCGReg addrlo,
+void *raddr, tcg_insn_unit **label_ptr)
+{
+TCGLabelQemuLdst *label = new_ldst_label(s);
+
+label->is_ld = is_ld;
+label->oi = oi;
+label->type = 0;
+label->datalo_reg = datalo;
+label->datahi_reg = 0;
+label->addrlo_reg = addrlo;
+label->addrhi_reg = 0;
+label->raddr = tcg_splitwx_to_rx(raddr);
+label->label_ptr[0] = label_ptr[0];
+}
+
+static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+TCGMemOpIdx oi = l->oi;
+MemOp opc = get_memop(oi);
+MemOp size = opc & MO_SIZE;

[PATCH v2 19/30] tcg/loongarch64: Implement br/brcond ops

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target-con-set.h |  1 +
 tcg/loongarch64/tcg-target.c.inc | 53 
 2 files changed, 54 insertions(+)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index fb56f3a295..367689c2e2 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -15,6 +15,7 @@
  * tcg-target-con-str.h; the constraint combination is inclusive or.
  */
 C_O0_I1(r)
+C_O0_I2(rZ, rZ)
 C_O1_I1(r, r)
 C_O1_I2(r, r, rC)
 C_O1_I2(r, r, ri)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 802066035b..b0600a3dbd 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -386,6 +386,44 @@ static void tcg_out_clzctz(TCGContext *s, LoongArchInsn 
opc,
 tcg_out_opc_or(s, a0, TCG_REG_TMP0, a0);
 }
 
+/*
+ * Branch helpers
+ */
+
+static const struct {
+LoongArchInsn op;
+bool swap;
+} tcg_brcond_to_loongarch[] = {
+[TCG_COND_EQ] =  { OPC_BEQ,  false },
+[TCG_COND_NE] =  { OPC_BNE,  false },
+[TCG_COND_LT] =  { OPC_BGT,  true  },
+[TCG_COND_GE] =  { OPC_BLE,  true  },
+[TCG_COND_LE] =  { OPC_BLE,  false },
+[TCG_COND_GT] =  { OPC_BGT,  false },
+[TCG_COND_LTU] = { OPC_BGTU, true  },
+[TCG_COND_GEU] = { OPC_BLEU, true  },
+[TCG_COND_LEU] = { OPC_BLEU, false },
+[TCG_COND_GTU] = { OPC_BGTU, false }
+};
+
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+   TCGReg arg2, TCGLabel *l)
+{
+LoongArchInsn op = tcg_brcond_to_loongarch[cond].op;
+
+tcg_debug_assert(op != 0);
+
+if (tcg_brcond_to_loongarch[cond].swap) {
+TCGReg t = arg1;
+arg1 = arg2;
+arg2 = t;
+}
+
+/* all conditional branch insns belong to DJSk16-format */
+tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SK16, l, 0);
+tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0));
+}
+
 /*
  * Entry-points
  */
@@ -408,6 +446,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0);
 break;
 
+case INDEX_op_br:
+tcg_out_reloc(s, s->code_ptr, R_LOONGARCH_BR_SD10K16, arg_label(a0),
+  0);
+tcg_out_opc_b(s, 0);
+break;
+
+case INDEX_op_brcond_i32:
+case INDEX_op_brcond_i64:
+tcg_out_brcond(s, a2, a0, a1, arg_label(args[3]));
+break;
+
 case INDEX_op_ext8s_i32:
 case INDEX_op_ext8s_i64:
 tcg_out_ext8s(s, a0, a1);
@@ -732,6 +781,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_goto_ptr:
 return C_O0_I1(r);
 
+case INDEX_op_brcond_i32:
+case INDEX_op_brcond_i64:
+return C_O0_I2(rZ, rZ);
+
 case INDEX_op_ext8s_i32:
 case INDEX_op_ext8s_i64:
 case INDEX_op_ext8u_i32:
-- 
2.33.0

[PATCH v2 10/30] tcg/loongarch64: Implement goto_ptr

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target-con-set.h | 17 +
 tcg/loongarch64/tcg-target.c.inc | 15 +++
 2 files changed, 32 insertions(+)
 create mode 100644 tcg/loongarch64/tcg-target-con-set.h

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
new file mode 100644
index 00..5cc4407367
--- /dev/null
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define LoongArch target-specific constraint sets.
+ *
+ * Copyright (c) 2021 WANG Xuerui 
+ *
+ * Based on tcg/riscv/tcg-target-con-set.h
+ *
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * C_On_Im(...) defines a constraint set with  outputs and  inputs.
+ * Each operand should be a sequence of constraint letters as defined by
+ * tcg-target-con-str.h; the constraint combination is inclusive or.
+ */
+C_O0_I1(r)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index e4e7e5e903..e789a592c6 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -349,9 +349,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_mb(s, a0);
 break;
 
+case INDEX_op_goto_ptr:
+tcg_out_opc_jirl(s, TCG_REG_ZERO, a0, 0);
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 default:
 g_assert_not_reached();
 }
 }
+
+static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
+{
+switch (op) {
+case INDEX_op_goto_ptr:
+return C_O0_I1(r);
+
+default:
+g_assert_not_reached();
+}
+}
-- 
2.33.0

[PATCH v2 22/30] tcg/loongarch64: Implement simple load/store ops

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target-con-set.h |   1 +
 tcg/loongarch64/tcg-target.c.inc | 131 +++
 2 files changed, 132 insertions(+)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index 367689c2e2..3ab0416d9f 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -15,6 +15,7 @@
  * tcg-target-con-str.h; the constraint combination is inclusive or.
  */
 C_O0_I1(r)
+C_O0_I2(rZ, r)
 C_O0_I2(rZ, rZ)
 C_O1_I1(r, r)
 C_O1_I2(r, r, rC)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 44532ee1e4..18b2473d9c 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -524,6 +524,73 @@ static void tcg_out_call(TCGContext *s, const 
tcg_insn_unit *arg)
 tcg_out_call_int(s, arg, false);
 }
 
+/*
+ * Load/store helpers
+ */
+
+static void tcg_out_ldst(TCGContext *s, LoongArchInsn opc, TCGReg data,
+ TCGReg addr, intptr_t offset)
+{
+intptr_t imm12 = sextreg(offset, 0, 12);
+
+if (offset != imm12) {
+intptr_t diff = offset - (uintptr_t)s->code_ptr;
+
+if (addr == TCG_REG_ZERO && diff == (int32_t)diff) {
+imm12 = sextreg(diff, 0, 12);
+tcg_out_opc_pcaddu12i(s, TCG_REG_TMP2, (diff - imm12) >> 12);
+} else {
+tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP2, offset - imm12);
+if (addr != TCG_REG_ZERO) {
+tcg_out_opc_add_d(s, TCG_REG_TMP2, TCG_REG_TMP2, addr);
+}
+}
+addr = TCG_REG_TMP2;
+}
+
+switch (opc) {
+case OPC_LD_B:
+case OPC_LD_BU:
+case OPC_LD_H:
+case OPC_LD_HU:
+case OPC_LD_W:
+case OPC_LD_WU:
+case OPC_LD_D:
+case OPC_ST_B:
+case OPC_ST_H:
+case OPC_ST_W:
+case OPC_ST_D:
+tcg_out32(s, encode_djsk12_insn(opc, data, addr, imm12));
+break;
+default:
+g_assert_not_reached();
+}
+}
+
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg arg,
+   TCGReg arg1, intptr_t arg2)
+{
+bool is_32bit = type == TCG_TYPE_I32;
+tcg_out_ldst(s, is_32bit ? OPC_LD_W : OPC_LD_D, arg, arg1, arg2);
+}
+
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
+   TCGReg arg1, intptr_t arg2)
+{
+bool is_32bit = type == TCG_TYPE_I32;
+tcg_out_ldst(s, is_32bit ? OPC_ST_W : OPC_ST_D, arg, arg1, arg2);
+}
+
+static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
+TCGReg base, intptr_t ofs)
+{
+if (val == 0) {
+tcg_out_st(s, type, TCG_REG_ZERO, base, ofs);
+return true;
+}
+return false;
+}
+
 /*
  * Entry-points
  */
@@ -873,6 +940,49 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_setcond(s, args[3], a0, a1, a2, const_args[1], c2);
 break;
 
+case INDEX_op_ld8s_i32:
+case INDEX_op_ld8s_i64:
+tcg_out_ldst(s, OPC_LD_B, a0, a1, a2);
+break;
+case INDEX_op_ld8u_i32:
+case INDEX_op_ld8u_i64:
+tcg_out_ldst(s, OPC_LD_BU, a0, a1, a2);
+break;
+case INDEX_op_ld16s_i32:
+case INDEX_op_ld16s_i64:
+tcg_out_ldst(s, OPC_LD_H, a0, a1, a2);
+break;
+case INDEX_op_ld16u_i32:
+case INDEX_op_ld16u_i64:
+tcg_out_ldst(s, OPC_LD_HU, a0, a1, a2);
+break;
+case INDEX_op_ld_i32:
+case INDEX_op_ld32s_i64:
+tcg_out_ldst(s, OPC_LD_W, a0, a1, a2);
+break;
+case INDEX_op_ld32u_i64:
+tcg_out_ldst(s, OPC_LD_WU, a0, a1, a2);
+break;
+case INDEX_op_ld_i64:
+tcg_out_ldst(s, OPC_LD_D, a0, a1, a2);
+break;
+
+case INDEX_op_st8_i32:
+case INDEX_op_st8_i64:
+tcg_out_ldst(s, OPC_ST_B, a0, a1, a2);
+break;
+case INDEX_op_st16_i32:
+case INDEX_op_st16_i64:
+tcg_out_ldst(s, OPC_ST_H, a0, a1, a2);
+break;
+case INDEX_op_st_i32:
+case INDEX_op_st32_i64:
+tcg_out_ldst(s, OPC_ST_W, a0, a1, a2);
+break;
+case INDEX_op_st_i64:
+tcg_out_ldst(s, OPC_ST_D, a0, a1, a2);
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 case INDEX_op_call: /* Always emitted via tcg_out_call.  */
@@ -887,6 +997,15 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_goto_ptr:
 return C_O0_I1(r);
 
+case INDEX_op_st8_i32:
+case INDEX_op_st8_i64:
+case INDEX_op_st16_i32:
+case INDEX_op_st16_i64:
+case INDEX_op_st32_i64:
+case INDEX_op_st_i32:
+case INDEX_op_st_i64:
+return C_O0_I2(rZ, r);
+
 case INDEX_op_brcond_i32:
 case INDEX_op_brcond_i64:
 return C_O0_I2(rZ, rZ);
@@ -912,6 +1031,18 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_bswap32_i32:
 case

[PATCH v2 21/30] tcg/loongarch64: Implement tcg_out_call

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target.c.inc | 34 
 1 file changed, 34 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 08e6541dcf..44532ee1e4 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -491,6 +491,39 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, 
TCGReg arg1,
 tcg_out32(s, encode_djsk16_insn(op, arg1, arg2, 0));
 }
 
+static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool 
tail)
+{
+TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
+ptrdiff_t offset = tcg_pcrel_diff(s, arg);
+
+tcg_debug_assert((offset & 3) == 0);
+if (offset == sextreg(offset, 0, 28)) {
+/* short jump: +/- 256MiB */
+if (tail) {
+tcg_out_opc_b(s, offset >> 2);
+} else {
+tcg_out_opc_bl(s, offset >> 2);
+}
+} else if (offset == sextreg(offset, 0, 38)) {
+/* long jump: +/- 256GiB */
+tcg_target_long lo = sextreg(offset, 0, 18);
+tcg_target_long hi = offset - lo;
+tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, hi >> 18);
+tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2);
+} else {
+/* far jump: 64-bit */
+tcg_target_long lo = sextreg((tcg_target_long)arg, 0, 18);
+tcg_target_long hi = (tcg_target_long)arg - lo;
+tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, hi);
+tcg_out_opc_jirl(s, link, TCG_REG_TMP0, lo >> 2);
+}
+}
+
+static void tcg_out_call(TCGContext *s, const tcg_insn_unit *arg)
+{
+tcg_out_call_int(s, arg, false);
+}
+
 /*
  * Entry-points
  */
@@ -842,6 +875,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
+case INDEX_op_call: /* Always emitted via tcg_out_call.  */
 default:
 g_assert_not_reached();
 }
-- 
2.33.0

[PATCH v2 08/30] tcg/loongarch64: Implement the memory barrier op

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target.c.inc | 32 
 1 file changed, 32 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 69e882ba5d..338b772732 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -234,3 +234,35 @@ static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
 g_assert_not_reached();
 }
 }
+
+#include "tcg-insn-defs.c.inc"
+
+/*
+ * TCG intrinsics
+ */
+
+static void tcg_out_mb(TCGContext *s, TCGArg a0)
+{
+/* Baseline LoongArch only has the full barrier, unfortunately.  */
+tcg_out_opc_dbar(s, 0);
+}
+
+/*
+ * Entry-points
+ */
+
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+   const TCGArg args[TCG_MAX_OP_ARGS],
+   const int const_args[TCG_MAX_OP_ARGS])
+{
+TCGArg a0 = args[0];
+
+switch (opc) {
+case INDEX_op_mb:
+tcg_out_mb(s, a0);
+break;
+
+default:
+g_assert_not_reached();
+}
+}
-- 
2.33.0

[PATCH v2 18/30] tcg/loongarch64: Implement mul/mulsh/muluh/div/divu/rem/remu ops

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target-con-set.h |  1 +
 tcg/loongarch64/tcg-target.c.inc | 65 
 2 files changed, 66 insertions(+)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index 4b8ce85897..fb56f3a295 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -23,3 +23,4 @@ C_O1_I2(r, r, rU)
 C_O1_I2(r, r, rW)
 C_O1_I2(r, 0, rZ)
 C_O1_I2(r, rZ, rN)
+C_O1_I2(r, rZ, rZ)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index e1b333120d..802066035b 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -670,6 +670,55 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 }
 break;
 
+case INDEX_op_mul_i32:
+tcg_out_opc_mul_w(s, a0, a1, a2);
+break;
+case INDEX_op_mul_i64:
+tcg_out_opc_mul_d(s, a0, a1, a2);
+break;
+
+case INDEX_op_mulsh_i32:
+tcg_out_opc_mulh_w(s, a0, a1, a2);
+break;
+case INDEX_op_mulsh_i64:
+tcg_out_opc_mulh_d(s, a0, a1, a2);
+break;
+
+case INDEX_op_muluh_i32:
+tcg_out_opc_mulh_wu(s, a0, a1, a2);
+break;
+case INDEX_op_muluh_i64:
+tcg_out_opc_mulh_du(s, a0, a1, a2);
+break;
+
+case INDEX_op_div_i32:
+tcg_out_opc_div_w(s, a0, a1, a2);
+break;
+case INDEX_op_div_i64:
+tcg_out_opc_div_d(s, a0, a1, a2);
+break;
+
+case INDEX_op_divu_i32:
+tcg_out_opc_div_wu(s, a0, a1, a2);
+break;
+case INDEX_op_divu_i64:
+tcg_out_opc_div_du(s, a0, a1, a2);
+break;
+
+case INDEX_op_rem_i32:
+tcg_out_opc_mod_w(s, a0, a1, a2);
+break;
+case INDEX_op_rem_i64:
+tcg_out_opc_mod_d(s, a0, a1, a2);
+break;
+
+case INDEX_op_remu_i32:
+tcg_out_opc_mod_wu(s, a0, a1, a2);
+break;
+case INDEX_op_remu_i64:
+tcg_out_opc_mod_du(s, a0, a1, a2);
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 default:
@@ -761,6 +810,22 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_sub_i64:
 return C_O1_I2(r, rZ, rN);
 
+case INDEX_op_mul_i32:
+case INDEX_op_mul_i64:
+case INDEX_op_mulsh_i32:
+case INDEX_op_mulsh_i64:
+case INDEX_op_muluh_i32:
+case INDEX_op_muluh_i64:
+case INDEX_op_div_i32:
+case INDEX_op_div_i64:
+case INDEX_op_divu_i32:
+case INDEX_op_divu_i64:
+case INDEX_op_rem_i32:
+case INDEX_op_rem_i64:
+case INDEX_op_remu_i32:
+case INDEX_op_remu_i64:
+return C_O1_I2(r, rZ, rZ);
+
 default:
 g_assert_not_reached();
 }
-- 
2.33.0

[PATCH v2 12/30] tcg/loongarch64: Implement not/and/or/xor/nor/andc/orc/eqv ops

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target-con-set.h |   2 +
 tcg/loongarch64/tcg-target.c.inc | 101 +++
 2 files changed, 103 insertions(+)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index 7e459490ea..9ac24b8ad0 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -16,3 +16,5 @@
  */
 C_O0_I1(r)
 C_O1_I1(r, r)
+C_O1_I2(r, r, rC)
+C_O1_I2(r, r, rU)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index e000a31a06..89fdb6d7c3 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -374,6 +374,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 {
 TCGArg a0 = args[0];
 TCGArg a1 = args[1];
+TCGArg a2 = args[2];
+int c2 = const_args[2];
 
 switch (opc) {
 case INDEX_op_mb:
@@ -419,6 +421,79 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_opc_srai_d(s, a0, a1, 32);
 break;
 
+case INDEX_op_not_i32:
+case INDEX_op_not_i64:
+tcg_out_opc_nor(s, a0, a1, TCG_REG_ZERO);
+break;
+
+case INDEX_op_nor_i32:
+case INDEX_op_nor_i64:
+if (c2) {
+tcg_out_opc_ori(s, a0, a1, a2);
+tcg_out_opc_nor(s, a0, a0, TCG_REG_ZERO);
+} else {
+tcg_out_opc_nor(s, a0, a1, a2);
+}
+break;
+
+case INDEX_op_andc_i32:
+case INDEX_op_andc_i64:
+if (c2) {
+/* guaranteed to fit due to constraint */
+tcg_out_opc_andi(s, a0, a1, ~a2);
+} else {
+tcg_out_opc_andn(s, a0, a1, a2);
+}
+break;
+
+case INDEX_op_orc_i32:
+case INDEX_op_orc_i64:
+if (c2) {
+/* guaranteed to fit due to constraint */
+tcg_out_opc_ori(s, a0, a1, ~a2);
+} else {
+tcg_out_opc_orn(s, a0, a1, a2);
+}
+break;
+
+case INDEX_op_eqv_i32:
+case INDEX_op_eqv_i64:
+if (c2) {
+/* guaranteed to fit due to constraint */
+tcg_out_opc_xori(s, a0, a1, ~a2);
+} else {
+tcg_out_opc_nor(s, a0, a2, TCG_REG_ZERO);
+tcg_out_opc_xor(s, a0, a1, a0);
+}
+break;
+
+case INDEX_op_and_i32:
+case INDEX_op_and_i64:
+if (c2) {
+tcg_out_opc_andi(s, a0, a1, a2);
+} else {
+tcg_out_opc_and(s, a0, a1, a2);
+}
+break;
+
+case INDEX_op_or_i32:
+case INDEX_op_or_i64:
+if (c2) {
+tcg_out_opc_ori(s, a0, a1, a2);
+} else {
+tcg_out_opc_or(s, a0, a1, a2);
+}
+break;
+
+case INDEX_op_xor_i32:
+case INDEX_op_xor_i64:
+if (c2) {
+tcg_out_opc_xori(s, a0, a1, a2);
+} else {
+tcg_out_opc_xor(s, a0, a1, a2);
+}
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 default:
@@ -446,8 +521,34 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
 case INDEX_op_extrl_i64_i32:
 case INDEX_op_extrh_i64_i32:
 case INDEX_op_ext_i32_i64:
+case INDEX_op_not_i32:
+case INDEX_op_not_i64:
 return C_O1_I1(r, r);
 
+case INDEX_op_andc_i32:
+case INDEX_op_andc_i64:
+case INDEX_op_eqv_i32:
+case INDEX_op_eqv_i64:
+case INDEX_op_orc_i32:
+case INDEX_op_orc_i64:
+/*
+ * LoongArch insns for these ops don't have reg-imm forms, but we
+ * can express using andi/ori/xori if ~constant satisfies
+ * TCG_CT_CONST_U12.
+ */
+return C_O1_I2(r, r, rC);
+
+case INDEX_op_and_i32:
+case INDEX_op_and_i64:
+case INDEX_op_nor_i32:
+case INDEX_op_nor_i64:
+case INDEX_op_or_i32:
+case INDEX_op_or_i64:
+case INDEX_op_xor_i32:
+case INDEX_op_xor_i64:
+/* LoongArch reg-imm bitops have their imms ZERO-extended */
+return C_O1_I2(r, r, rU);
+
 default:
 g_assert_not_reached();
 }
-- 
2.33.0

[PATCH v2 06/30] tcg/loongarch64: Define the operand constraints

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target-con-str.h | 28 +++
 tcg/loongarch64/tcg-target.c.inc | 52 
 2 files changed, 80 insertions(+)
 create mode 100644 tcg/loongarch64/tcg-target-con-str.h

diff --git a/tcg/loongarch64/tcg-target-con-str.h 
b/tcg/loongarch64/tcg-target-con-str.h
new file mode 100644
index 00..c3986a4fd4
--- /dev/null
+++ b/tcg/loongarch64/tcg-target-con-str.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Define LoongArch target-specific operand constraints.
+ *
+ * Copyright (c) 2021 WANG Xuerui 
+ *
+ * Based on tcg/riscv/tcg-target-con-str.h
+ *
+ * Copyright (c) 2021 Linaro
+ */
+
+/*
+ * Define constraint letters for register sets:
+ * REGS(letter, register_mask)
+ */
+REGS('r', ALL_GENERAL_REGS)
+REGS('L', ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS)
+
+/*
+ * Define constraint letters for constants:
+ * CONST(letter, TCG_CT_CONST_* bit set)
+ */
+CONST('I', TCG_CT_CONST_S12)
+CONST('N', TCG_CT_CONST_N12)
+CONST('U', TCG_CT_CONST_U12)
+CONST('Z', TCG_CT_CONST_ZERO)
+CONST('C', TCG_CT_CONST_C12)
+CONST('W', TCG_CT_CONST_WSZ)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 42eebef78e..f0930f77ef 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -116,3 +116,55 @@ static const int tcg_target_call_oarg_regs[] = {
 TCG_REG_A0,
 TCG_REG_A1,
 };
+
+#define TCG_CT_CONST_ZERO  0x100
+#define TCG_CT_CONST_S12   0x200
+#define TCG_CT_CONST_N12   0x400
+#define TCG_CT_CONST_U12   0x800
+#define TCG_CT_CONST_C12   0x1000
+#define TCG_CT_CONST_WSZ   0x2000
+
+#define ALL_GENERAL_REGS  MAKE_64BIT_MASK(0, 32)
+/*
+ * For softmmu, we need to avoid conflicts with the first 5
+ * argument registers to call the helper.  Some of these are
+ * also used for the tlb lookup.
+ */
+#ifdef CONFIG_SOFTMMU
+#define SOFTMMU_RESERVE_REGS  MAKE_64BIT_MASK(TCG_REG_A0, 5)
+#else
+#define SOFTMMU_RESERVE_REGS  0
+#endif
+
+
+static inline tcg_target_long sextreg(tcg_target_long val, int pos, int len)
+{
+return sextract64(val, pos, len);
+}
+
+/* test if a constant matches the constraint */
+static bool tcg_target_const_match(int64_t val, TCGType type, int ct)
+{
+if (ct & TCG_CT_CONST) {
+return 1;
+}
+if ((ct & TCG_CT_CONST_ZERO) && val == 0) {
+return 1;
+}
+if ((ct & TCG_CT_CONST_S12) && val == sextreg(val, 0, 12)) {
+return 1;
+}
+if ((ct & TCG_CT_CONST_N12) && -val == sextreg(-val, 0, 12)) {
+return 1;
+}
+if ((ct & TCG_CT_CONST_U12) && val >= 0 && val <= 0xfff) {
+return 1;
+}
+if ((ct & TCG_CT_CONST_C12) && ~val >= 0 && ~val <= 0xfff) {
+return 1;
+}
+if ((ct & TCG_CT_CONST_WSZ) && val == (type == TCG_TYPE_I32 ? 32 : 64)) {
+return 1;
+}
+return 0;
+}
-- 
2.33.0

[PATCH v2 16/30] tcg/loongarch64: Implement shl/shr/sar/rotl/rotr ops

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target-con-set.h |  1 +
 tcg/loongarch64/tcg-target.c.inc | 91 
 2 files changed, 92 insertions(+)

diff --git a/tcg/loongarch64/tcg-target-con-set.h 
b/tcg/loongarch64/tcg-target-con-set.h
index 2975e03127..42f8e28741 100644
--- a/tcg/loongarch64/tcg-target-con-set.h
+++ b/tcg/loongarch64/tcg-target-con-set.h
@@ -17,6 +17,7 @@
 C_O0_I1(r)
 C_O1_I1(r, r)
 C_O1_I2(r, r, rC)
+C_O1_I2(r, r, ri)
 C_O1_I2(r, r, rU)
 C_O1_I2(r, r, rW)
 C_O1_I2(r, 0, rZ)
diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 65545f7636..f06c61ee2b 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -561,6 +561,85 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_clzctz(s, OPC_CTZ_D, a0, a1, a2, c2, false);
 break;
 
+case INDEX_op_shl_i32:
+if (c2) {
+tcg_out_opc_slli_w(s, a0, a1, a2 & 0x1f);
+} else {
+tcg_out_opc_sll_w(s, a0, a1, a2);
+}
+break;
+case INDEX_op_shl_i64:
+if (c2) {
+tcg_out_opc_slli_d(s, a0, a1, a2 & 0x3f);
+} else {
+tcg_out_opc_sll_d(s, a0, a1, a2);
+}
+break;
+
+case INDEX_op_shr_i32:
+if (c2) {
+tcg_out_opc_srli_w(s, a0, a1, a2 & 0x1f);
+} else {
+tcg_out_opc_srl_w(s, a0, a1, a2);
+}
+break;
+case INDEX_op_shr_i64:
+if (c2) {
+tcg_out_opc_srli_d(s, a0, a1, a2 & 0x3f);
+} else {
+tcg_out_opc_srl_d(s, a0, a1, a2);
+}
+break;
+
+case INDEX_op_sar_i32:
+if (c2) {
+tcg_out_opc_srai_w(s, a0, a1, a2 & 0x1f);
+} else {
+tcg_out_opc_sra_w(s, a0, a1, a2);
+}
+break;
+case INDEX_op_sar_i64:
+if (c2) {
+tcg_out_opc_srai_d(s, a0, a1, a2 & 0x3f);
+} else {
+tcg_out_opc_sra_d(s, a0, a1, a2);
+}
+break;
+
+case INDEX_op_rotl_i32:
+/* transform into equivalent rotr/rotri */
+if (c2) {
+tcg_out_opc_rotri_w(s, a0, a1, (32 - a2) & 0x1f);
+} else {
+tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2);
+tcg_out_opc_rotr_w(s, a0, a1, TCG_REG_TMP0);
+}
+break;
+case INDEX_op_rotl_i64:
+/* transform into equivalent rotr/rotri */
+if (c2) {
+tcg_out_opc_rotri_d(s, a0, a1, (64 - a2) & 0x3f);
+} else {
+tcg_out_opc_sub_w(s, TCG_REG_TMP0, TCG_REG_ZERO, a2);
+tcg_out_opc_rotr_d(s, a0, a1, TCG_REG_TMP0);
+}
+break;
+
+case INDEX_op_rotr_i32:
+if (c2) {
+tcg_out_opc_rotri_w(s, a0, a1, a2 & 0x1f);
+} else {
+tcg_out_opc_rotr_w(s, a0, a1, a2);
+}
+break;
+case INDEX_op_rotr_i64:
+if (c2) {
+tcg_out_opc_rotri_d(s, a0, a1, a2 & 0x3f);
+} else {
+tcg_out_opc_rotr_d(s, a0, a1, a2);
+}
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 default:
@@ -610,6 +689,18 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode 
op)
  */
 return C_O1_I2(r, r, rC);
 
+case INDEX_op_shl_i32:
+case INDEX_op_shl_i64:
+case INDEX_op_shr_i32:
+case INDEX_op_shr_i64:
+case INDEX_op_sar_i32:
+case INDEX_op_sar_i64:
+case INDEX_op_rotl_i32:
+case INDEX_op_rotl_i64:
+case INDEX_op_rotr_i32:
+case INDEX_op_rotr_i64:
+return C_O1_I2(r, r, ri);
+
 case INDEX_op_and_i32:
 case INDEX_op_and_i64:
 case INDEX_op_nor_i32:
-- 
2.33.0

[PATCH v2 14/30] tcg/loongarch64: Implement bswap32_i32/bswap32_i64/bswap64_i64

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target.c.inc | 20 
 1 file changed, 20 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 27066960cf..e7b5f2c5ab 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -508,6 +508,23 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_opc_bstrins_d(s, a0, a2, args[3], args[3] + args[4] - 1);
 break;
 
+case INDEX_op_bswap32_i32:
+/* All 32-bit values are computed sign-extended in the register.  */
+a2 = TCG_BSWAP_OS;
+/* fallthrough */
+case INDEX_op_bswap32_i64:
+tcg_out_opc_revb_2w(s, a0, a1);
+if (a2 & TCG_BSWAP_OS) {
+tcg_out_ext32s(s, a0, a0);
+} else if ((a2 & (TCG_BSWAP_IZ | TCG_BSWAP_OZ)) == TCG_BSWAP_OZ) {
+tcg_out_ext32u(s, a0, a0);
+}
+break;
+
+case INDEX_op_bswap64_i64:
+tcg_out_opc_revb_d(s, a0, a1);
+break;
+
 case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
 case INDEX_op_mov_i64:
 default:
@@ -539,6 +556,9 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
 case INDEX_op_not_i64:
 case INDEX_op_extract_i32:
 case INDEX_op_extract_i64:
+case INDEX_op_bswap32_i32:
+case INDEX_op_bswap32_i64:
+case INDEX_op_bswap64_i64:
 return C_O1_I1(r, r);
 
 case INDEX_op_andc_i32:
-- 
2.33.0

[PATCH v2 02/30] MAINTAINERS: Add tcg/loongarch64 entry with myself as maintainer

2021-09-21 Thread WANG Xuerui

I ported the initial code, so I should maintain it of course.

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 MAINTAINERS | 5 +
 1 file changed, 5 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 6c20634d63..66d1a17ca3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3109,6 +3109,11 @@ S: Maintained
 F: tcg/i386/
 F: disas/i386.c
 
+LoongArch64 TCG target
+M: WANG Xuerui 
+S: Maintained
+F: tcg/loongarch64/
+
 MIPS TCG target
 M: Philippe Mathieu-Daudé 
 R: Aurelien Jarno 
-- 
2.33.0

[PATCH v2 05/30] tcg/loongarch64: Add register names, allocation order and input/output sets

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Reviewed-by: Richard Henderson 
---
 tcg/loongarch64/tcg-target.c.inc | 118 +++
 1 file changed, 118 insertions(+)
 create mode 100644 tcg/loongarch64/tcg-target.c.inc

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
new file mode 100644
index 00..42eebef78e
--- /dev/null
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -0,0 +1,118 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2021 WANG Xuerui 
+ *
+ * Based on tcg/riscv/tcg-target.c.inc
+ *
+ * Copyright (c) 2018 SiFive, Inc
+ * Copyright (c) 2008-2009 Arnaud Patard 
+ * Copyright (c) 2009 Aurelien Jarno 
+ * Copyright (c) 2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifdef CONFIG_DEBUG_TCG
+static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
+"zero",
+"ra",
+"tp",
+"sp",
+"a0",
+"a1",
+"a2",
+"a3",
+"a4",
+"a5",
+"a6",
+"a7",
+"t0",
+"t1",
+"t2",
+"t3",
+"t4",
+"t5",
+"t6",
+"t7",
+"t8",
+"r21", /* reserved in the LP64 ABI, hence no ABI name */
+"s9",
+"s0",
+"s1",
+"s2",
+"s3",
+"s4",
+"s5",
+"s6",
+"s7",
+"s8"
+};
+#endif
+
+static const int tcg_target_reg_alloc_order[] = {
+/* Registers preserved across calls */
+/* TCG_REG_S0 reserved for TCG_AREG0 */
+TCG_REG_S1,
+TCG_REG_S2,
+TCG_REG_S3,
+TCG_REG_S4,
+TCG_REG_S5,
+TCG_REG_S6,
+TCG_REG_S7,
+TCG_REG_S8,
+TCG_REG_S9,
+
+/* Registers (potentially) clobbered across calls */
+TCG_REG_T0,
+TCG_REG_T1,
+TCG_REG_T2,
+TCG_REG_T3,
+TCG_REG_T4,
+TCG_REG_T5,
+TCG_REG_T6,
+TCG_REG_T7,
+TCG_REG_T8,
+
+/* Argument registers, opposite order of allocation.  */
+TCG_REG_A7,
+TCG_REG_A6,
+TCG_REG_A5,
+TCG_REG_A4,
+TCG_REG_A3,
+TCG_REG_A2,
+TCG_REG_A1,
+TCG_REG_A0,
+};
+
+static const int tcg_target_call_iarg_regs[] = {
+TCG_REG_A0,
+TCG_REG_A1,
+TCG_REG_A2,
+TCG_REG_A3,
+TCG_REG_A4,
+TCG_REG_A5,
+TCG_REG_A6,
+TCG_REG_A7,
+};
+
+static const int tcg_target_call_oarg_regs[] = {
+TCG_REG_A0,
+TCG_REG_A1,
+};
-- 
2.33.0

[PATCH v2 09/30] tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target.c.inc | 89 
 1 file changed, 89 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index 338b772732..e4e7e5e903 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -247,6 +247,93 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0)
 tcg_out_opc_dbar(s, 0);
 }
 
+static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg)
+{
+if (ret == arg) {
+return true;
+}
+switch (type) {
+case TCG_TYPE_I32:
+case TCG_TYPE_I64:
+/*
+ * Conventional register-register move used in LoongArch is
+ * `or dst, src, zero`.
+ */
+tcg_out_opc_or(s, ret, arg, TCG_REG_ZERO);
+break;
+default:
+g_assert_not_reached();
+}
+return true;
+}
+
+static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
+ tcg_target_long val)
+{
+if (type == TCG_TYPE_I32) {
+val = (int32_t)val;
+}
+
+/* Single-instruction cases.  */
+tcg_target_long low = sextreg(val, 0, 12);
+if (low == val) {
+/* val fits in simm12: addi.w rd, zero, val */
+tcg_out_opc_addi_w(s, rd, TCG_REG_ZERO, val);
+return;
+}
+if (0x800 <= val && val <= 0xfff) {
+/* val fits in uimm12: ori rd, zero, val */
+tcg_out_opc_ori(s, rd, TCG_REG_ZERO, val);
+return;
+}
+
+/* Test for PC-relative values that can be loaded faster.  */
+intptr_t pc_offset = val - (uintptr_t)s->code_ptr;
+if (pc_offset == sextreg(pc_offset, 0, 22) && (pc_offset & 3) == 0) {
+tcg_out_opc_pcaddu2i(s, rd, pc_offset >> 2);
+return;
+}
+if (pc_offset == (int32_t)pc_offset) {
+tcg_target_long lo = sextreg(pc_offset, 0, 12);
+tcg_target_long hi = pc_offset - lo;
+tcg_out_opc_pcaddu12i(s, rd, hi >> 12);
+tcg_out_opc_addi_d(s, rd, rd, lo);
+return;
+}
+
+/*
+ * Slow path: at most lu12i.w + ori + cu32i.d + cu52i.d.
+ *
+ * Chop upper bits into 3 immediate-field-sized segments respectively.
+ */
+tcg_target_long upper = (val >> 12) & 0xf;
+tcg_target_long higher = (val >> 32) & 0xf;
+tcg_target_long top = val >> 52;
+
+tcg_out_opc_lu12i_w(s, rd, upper);
+if (low != 0) {
+tcg_out_opc_ori(s, rd, rd, low & 0xfff);
+}
+
+if (sextreg(val, 0, 32) == val) {
+/*
+ * Fits in 32-bits, upper bits are already properly sign-extended by
+ * lu12i.w.
+ */
+return;
+}
+tcg_out_opc_cu32i_d(s, rd, higher);
+
+if (sextreg(val, 0, 52) == val) {
+/*
+ * Fits in 52-bits, upper bits are already properly sign-extended by
+ * cu32i.d.
+ */
+return;
+}
+tcg_out_opc_cu52i_d(s, rd, rd, top);
+}
+
 /*
  * Entry-points
  */
@@ -262,6 +349,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
 tcg_out_mb(s, a0);
 break;
 
+case INDEX_op_mov_i32:  /* Always emitted via tcg_out_mov.  */
+case INDEX_op_mov_i64:
 default:
 g_assert_not_reached();
 }
-- 
2.33.0

[PATCH v2 04/30] tcg/loongarch64: Add generated instruction opcodes and encoding helpers

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
Acked-by: Richard Henderson 
---
 tcg/loongarch64/tcg-insn-defs.c.inc | 873 
 1 file changed, 873 insertions(+)
 create mode 100644 tcg/loongarch64/tcg-insn-defs.c.inc

diff --git a/tcg/loongarch64/tcg-insn-defs.c.inc 
b/tcg/loongarch64/tcg-insn-defs.c.inc
new file mode 100644
index 00..b587e92c1b
--- /dev/null
+++ b/tcg/loongarch64/tcg-insn-defs.c.inc
@@ -0,0 +1,873 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * LoongArch instruction formats, opcodes, and encoders for TCG use.
+ *
+ * Code generated by genqemutcgdefs from
+ * https://github.com/loongson-community/loongarch-opcodes,
+ * from commit a6a0cd83c8dbf7d05dcad1554e32fd7ea496c78f.
+ * DO NOT EDIT.
+ */
+
+typedef enum {
+OPC_CLZ_W = 0x1400,
+OPC_CTZ_W = 0x1c00,
+OPC_CLZ_D = 0x2400,
+OPC_CTZ_D = 0x2c00,
+OPC_REVB_2W = 0x3800,
+OPC_REVB_D = 0x3c00,
+OPC_SEXT_H = 0x5800,
+OPC_SEXT_B = 0x5c00,
+OPC_ADD_W = 0x0010,
+OPC_ADD_D = 0x00108000,
+OPC_SUB_W = 0x0011,
+OPC_SUB_D = 0x00118000,
+OPC_SLT = 0x0012,
+OPC_SLTU = 0x00128000,
+OPC_MASKEQZ = 0x0013,
+OPC_MASKNEZ = 0x00138000,
+OPC_NOR = 0x0014,
+OPC_AND = 0x00148000,
+OPC_OR = 0x0015,
+OPC_XOR = 0x00158000,
+OPC_ORN = 0x0016,
+OPC_ANDN = 0x00168000,
+OPC_SLL_W = 0x0017,
+OPC_SRL_W = 0x00178000,
+OPC_SRA_W = 0x0018,
+OPC_SLL_D = 0x00188000,
+OPC_SRL_D = 0x0019,
+OPC_SRA_D = 0x00198000,
+OPC_ROTR_W = 0x001b,
+OPC_ROTR_D = 0x001b8000,
+OPC_MUL_W = 0x001c,
+OPC_MULH_W = 0x001c8000,
+OPC_MULH_WU = 0x001d,
+OPC_MUL_D = 0x001d8000,
+OPC_MULH_D = 0x001e,
+OPC_MULH_DU = 0x001e8000,
+OPC_DIV_W = 0x0020,
+OPC_MOD_W = 0x00208000,
+OPC_DIV_WU = 0x0021,
+OPC_MOD_WU = 0x00218000,
+OPC_DIV_D = 0x0022,
+OPC_MOD_D = 0x00228000,
+OPC_DIV_DU = 0x0023,
+OPC_MOD_DU = 0x00238000,
+OPC_SLLI_W = 0x00408000,
+OPC_SLLI_D = 0x0041,
+OPC_SRLI_W = 0x00448000,
+OPC_SRLI_D = 0x0045,
+OPC_SRAI_W = 0x00488000,
+OPC_SRAI_D = 0x0049,
+OPC_ROTRI_W = 0x004c8000,
+OPC_ROTRI_D = 0x004d,
+OPC_BSTRINS_W = 0x0060,
+OPC_BSTRPICK_W = 0x00608000,
+OPC_BSTRINS_D = 0x0080,
+OPC_BSTRPICK_D = 0x00c0,
+OPC_SLTI = 0x0200,
+OPC_SLTUI = 0x0240,
+OPC_ADDI_W = 0x0280,
+OPC_ADDI_D = 0x02c0,
+OPC_CU52I_D = 0x0300,
+OPC_ANDI = 0x0340,
+OPC_ORI = 0x0380,
+OPC_XORI = 0x03c0,
+OPC_LU12I_W = 0x1400,
+OPC_CU32I_D = 0x1600,
+OPC_PCADDU2I = 0x1800,
+OPC_PCADDU12I = 0x1c00,
+OPC_PCADDU18I = 0x1e00,
+OPC_LD_B = 0x2800,
+OPC_LD_H = 0x2840,
+OPC_LD_W = 0x2880,
+OPC_LD_D = 0x28c0,
+OPC_ST_B = 0x2900,
+OPC_ST_H = 0x2940,
+OPC_ST_W = 0x2980,
+OPC_ST_D = 0x29c0,
+OPC_LD_BU = 0x2a00,
+OPC_LD_HU = 0x2a40,
+OPC_LD_WU = 0x2a80,
+OPC_DBAR = 0x3872,
+OPC_JIRL = 0x4c00,
+OPC_B = 0x5000,
+OPC_BL = 0x5400,
+OPC_BEQ = 0x5800,
+OPC_BNE = 0x5c00,
+OPC_BGT = 0x6000,
+OPC_BLE = 0x6400,
+OPC_BGTU = 0x6800,
+OPC_BLEU = 0x6c00,
+} LoongArchInsn;
+
+static int32_t __attribute__((unused))
+encode_d_slot(LoongArchInsn opc, uint32_t d)
+{
+return opc | d;
+}
+
+static int32_t __attribute__((unused))
+encode_dj_slots(LoongArchInsn opc, uint32_t d, uint32_t j)
+{
+return opc | d | j << 5;
+}
+
+static int32_t __attribute__((unused))
+encode_djk_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k)
+{
+return opc | d | j << 5 | k << 10;
+}
+
+static int32_t __attribute__((unused))
+encode_djkm_slots(LoongArchInsn opc, uint32_t d, uint32_t j, uint32_t k,
+  uint32_t m)
+{
+return opc | d | j << 5 | k << 10 | m << 16;
+}
+
+static int32_t __attribute__((unused))
+encode_dk_slots(LoongArchInsn opc, uint32_t d, uint32_t k)
+{
+return opc | d | k << 10;
+}
+
+static int32_t __attribute__((unused))
+encode_dj_insn(LoongArchInsn opc, TCGReg d, TCGReg j)
+{
+tcg_debug_assert(d >= 0 && d <= 0x1f);
+tcg_debug_assert(j >= 0 && j <= 0x1f);
+return encode_dj_slots(opc, d, j);
+}
+
+static int32_t __attribute__((unused))
+encode_djk_insn(LoongArchInsn opc, TCGReg d, TCGReg j, TCGReg k)
+{
+tcg_debug_assert(d >= 0 && d <= 0x1f);
+tcg_debug_assert(j >= 0 && j <= 0x1f);
+tcg_debug_assert(k >= 0 && k <= 0x1f);
+return encode_djk_slots(opc, d, j, k);
+}
+
+static int32_t __attribute__((unused))
+encode_djsk12_insn(LoongArchInsn opc, TCGReg d, TCGReg j, int32_t sk12)
+{
+tcg_debug_assert(d >= 0 && d <= 0x1f);
+tcg_debug_assert(j >= 0 && j <= 0x1f);
+tcg_debug_assert(sk12 >= -0x800 && sk12 <= 0x7ff);
+return encode_djk_slots(opc, d, j, sk12 & 0xfff);
+}
+

[PATCH v2 00/30] LoongArch64 port of QEMU TCG

2021-09-21 Thread WANG Xuerui

Hi all,

This is a port of QEMU TCG to the brand-new CPU architecture LoongArch,
introduced by Loongson with their 3A5000 chips. Test suite all passed
except one timeout that is test-crypto-tlssession, but this particular
case runs well when relatively few targets are enabled, so it may be
just a case of low performance (4C4T 2.5GHz). I also boot-tested x86_64
(Debian and Gentoo installation CDs) and install-tested aarch64 (Debian
netboot installer), and ran riscv64 linux-user emulation with a chroot;
everything seems fine so far.

## About the series

Only the LP64 ABI is supported, as this is the only one fully
implemented and supported by Loongson. 32-bit support is incomplete from
outset, and removed from the very latest upstream submissions, so you
can't even configure for that.

The architecture's documentation is already translated into English;
it can be browsed at https://loongson.github.io/LoongArch-Documentation/.
The LoongArch ELF psABI doc (version 1.00) could be found at [1];
if anything is missing there, it's most likely the same as RISC-V, but
you can always raise an issue over their issue tracker at [2].

[1]: 
https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
[2]: https://github.com/loongson/LoongArch-Documentation/issues

In this series I made use of generated instruction encodings and
emitters from https://github.com/loongson-community/loongarch-opcodes
(a community project started by myself, something I must admit), as the
LoongArch encoding is highly irregular even for a fixed 32-bit ISA, and
I want to minimize the maintenance burden for future collaboration. This
is something not seen in any of the other TCG ports out there, so I'd
like to see if this is acceptable practice (and also maybe bikeshed the
file name).

This series touches some of the same files as Song Gao's previous
submission of LoongArch *target* support, which is a bit unfortunate;
one of us will have to rebase after either series gets in. Actual
conflict should only happen on build system bits and include/elf.h,
though, as we're working on entirely different areas.

## How to build and test this

Upstream support for LoongArch is largely WIP for now, which means you
must apply a lot of patches if you want to even cross-build for this arch.
The main sources I used are as follows:

* binutils: https://github.com/xen0n/binutils-gdb/tree/for-gentoo-2.37-v2
  based on 
https://github.com/loongson/binutils-gdb/tree/loongarch/upstream_v6_a1d65b3
* gcc: https://github.com/xen0n/gcc/tree/for-gentoo-gcc-12-v2
  based on https://github.com/loongson/gcc/tree/loongarch_upstream
* glibc: https://github.com/xen0n/glibc/tree/for-gentoo-glibc-2.34
  based on https://github.com/loongson/glibc/tree/loongarch_2_34_for_upstream
* Linux: https://github.com/xen0n/linux/tree/loongarch-playground
  based on https://github.com/loongson/linux/tree/loongarch-next
* Gentoo overlay: https://github.com/xen0n/loongson-overlay

I have made ready-to-use Gentoo stage3 tarballs, but they're served with
CDN off my personal cloud account, and I don't want the link to be
exposed so that my bills skyrocket; you can reach me off-list to get the
links if you're interested.

As for the hardware availability, the boards can already be bought in
China on Taobao, and I think some people at Loongson might be able to
arrange for testing environments, if testing on real hardware other than
mine is required before merging; they have their in-house Debian spin-off
from the early days of this architecture. Their kernel is
ABI-incompatible with the version being upstreamed and used by me, but
QEMU should work there regardless.

Lastly, I'm new to QEMU development and this is my first patch series
here; apologizes if I get anything wrong, and any help or suggestion is
certainly appreciated!

## Changelog

v2 -> v1:

- Addressed all review comments from v1
  - Use "loongarch64" everywhere, tcg directory renamed to "tcg/loongarch64"
  - Removed all redundant TCG_TARGET_REG_BITS conditional
  - Removed support for the neg op
  - Added support for eqv and bswap32_i64 ops
  - Added safe syscall handling for linux-user
  - Fixed everything else I could see
- Updated generated instruction definitions to latest
- Reordered the configure/meson.build changes to come last

v1: https://patchew.org/QEMU/20210920080451.408655-1-...@xen0n.name/

WANG Xuerui (30):
  elf: Add machine type value for LoongArch
  MAINTAINERS: Add tcg/loongarch64 entry with myself as maintainer
  tcg/loongarch64: Add the tcg-target.h file
  tcg/loongarch64: Add generated instruction opcodes and encoding
helpers
  tcg/loongarch64: Add register names, allocation order and input/output
sets
  tcg/loongarch64: Define the operand constraints
  tcg/loongarch64: Implement necessary relocation operations
  tcg/loongarch64: Implement the memory barrier op
  tcg/loongarch64: Implement tcg_out_mov and tcg_out_movi
  tcg/loongarch64: Implement goto_ptr
  tcg/loongarch64: Implement si

[PATCH v2 03/30] tcg/loongarch64: Add the tcg-target.h file

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target.h | 180 +++
 1 file changed, 180 insertions(+)
 create mode 100644 tcg/loongarch64/tcg-target.h

diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h
new file mode 100644
index 00..d9d320adac
--- /dev/null
+++ b/tcg/loongarch64/tcg-target.h
@@ -0,0 +1,180 @@
+/*
+ * Tiny Code Generator for QEMU
+ *
+ * Copyright (c) 2021 WANG Xuerui 
+ *
+ * Based on tcg/riscv/tcg-target.h
+ *
+ * Copyright (c) 2018 SiFive, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef LOONGARCH_TCG_TARGET_H
+#define LOONGARCH_TCG_TARGET_H
+
+/*
+ * Loongson removed the (incomplete) 32-bit support from kernel and toolchain
+ * for the initial upstreaming of this architecture, so don't bother and just
+ * support the LP64 ABI for now.
+ */
+#if defined(__loongarch64)
+# define TCG_TARGET_REG_BITS 64
+#else
+# error unsupported LoongArch register size
+#endif
+
+#define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_NB_REGS 32
+#define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+
+typedef enum {
+TCG_REG_ZERO,
+TCG_REG_RA,
+TCG_REG_TP,
+TCG_REG_SP,
+TCG_REG_A0,
+TCG_REG_A1,
+TCG_REG_A2,
+TCG_REG_A3,
+TCG_REG_A4,
+TCG_REG_A5,
+TCG_REG_A6,
+TCG_REG_A7,
+TCG_REG_T0,
+TCG_REG_T1,
+TCG_REG_T2,
+TCG_REG_T3,
+TCG_REG_T4,
+TCG_REG_T5,
+TCG_REG_T6,
+TCG_REG_T7,
+TCG_REG_T8,
+TCG_REG_RESERVED,
+TCG_REG_S9,
+TCG_REG_S0,
+TCG_REG_S1,
+TCG_REG_S2,
+TCG_REG_S3,
+TCG_REG_S4,
+TCG_REG_S5,
+TCG_REG_S6,
+TCG_REG_S7,
+TCG_REG_S8,
+
+/* aliases */
+TCG_AREG0= TCG_REG_S0,
+TCG_REG_TMP0 = TCG_REG_T8,
+TCG_REG_TMP1 = TCG_REG_T7,
+TCG_REG_TMP2 = TCG_REG_T6,
+} TCGReg;
+
+/* used for function call generation */
+#define TCG_REG_CALL_STACK  TCG_REG_SP
+#define TCG_TARGET_STACK_ALIGN  16
+#define TCG_TARGET_CALL_ALIGN_ARGS  1
+#define TCG_TARGET_CALL_STACK_OFFSET0
+
+/* optional instructions */
+#define TCG_TARGET_HAS_movcond_i32  0
+#define TCG_TARGET_HAS_div_i32  1
+#define TCG_TARGET_HAS_rem_i32  1
+#define TCG_TARGET_HAS_div2_i32 0
+#define TCG_TARGET_HAS_rot_i32  1
+#define TCG_TARGET_HAS_deposit_i32  1
+#define TCG_TARGET_HAS_extract_i32  1
+#define TCG_TARGET_HAS_sextract_i32 0
+#define TCG_TARGET_HAS_extract2_i32 0
+#define TCG_TARGET_HAS_add2_i32 0
+#define TCG_TARGET_HAS_sub2_i32 0
+#define TCG_TARGET_HAS_mulu2_i320
+#define TCG_TARGET_HAS_muls2_i320
+#define TCG_TARGET_HAS_muluh_i321
+#define TCG_TARGET_HAS_mulsh_i321
+#define TCG_TARGET_HAS_ext8s_i321
+#define TCG_TARGET_HAS_ext16s_i32   1
+#define TCG_TARGET_HAS_ext8u_i321
+#define TCG_TARGET_HAS_ext16u_i32   1
+#define TCG_TARGET_HAS_bswap16_i32  0
+#define TCG_TARGET_HAS_bswap32_i32  1
+#define TCG_TARGET_HAS_not_i32  1
+#define TCG_TARGET_HAS_neg_i32  0
+#define TCG_TARGET_HAS_andc_i32 1
+#define TCG_TARGET_HAS_orc_i32  1
+#define TCG_TARGET_HAS_eqv_i32  1
+#define TCG_TARGET_HAS_nand_i32 0
+#define TCG_TARGET_HAS_nor_i32  1
+#define TCG_TARGET_HAS_clz_i32  1
+#define TCG_TARGET_HAS_ctz_i32  1
+#define TCG_TARGET_HAS_ctpop_i320
+#define TCG_TARGET_HAS_direct_jump  0
+#define TCG_TARGET_HAS_brcond2  0
+#define TCG_TARGET_HAS_setcond2 0
+#define TCG_TARGET_HAS_qemu_st8_i32 0
+
+/* 64-bit operations */
+#define TCG_TARGET_HAS_movcond_i64  0
+#define TCG_TARGET_HAS_div_i64  1
+#define TCG_TARGET_HAS_rem_i64  1
+#define TCG_TARGET_HAS_div2_i64 0
+#define TCG_TARGET_HAS_rot_i64  1
+#define TCG_TARGET_HAS_deposit_i64  1
+#define TCG_TARGET_HAS_extract_i64  1
+#define TCG_TARGET_HAS_sextract_i64

[PATCH v2 01/30] elf: Add machine type value for LoongArch

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 include/elf.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/elf.h b/include/elf.h
index 811bf4a1cb..3a4bcb646a 100644
--- a/include/elf.h
+++ b/include/elf.h
@@ -182,6 +182,8 @@ typedef struct mips_elf_abiflags_v0 {
 
 #define EM_NANOMIPS 249 /* Wave Computing nanoMIPS */
 
+#define EM_LOONGARCH258 /* LoongArch */
+
 /*
  * This is an interim value that we will use until the committee comes
  * up with a final number.
-- 
2.33.0

[PATCH v2 07/30] tcg/loongarch64: Implement necessary relocation operations

2021-09-21 Thread WANG Xuerui

Signed-off-by: WANG Xuerui 
---
 tcg/loongarch64/tcg-target.c.inc | 66 
 1 file changed, 66 insertions(+)

diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc
index f0930f77ef..69e882ba5d 100644
--- a/tcg/loongarch64/tcg-target.c.inc
+++ b/tcg/loongarch64/tcg-target.c.inc
@@ -168,3 +168,69 @@ static bool tcg_target_const_match(int64_t val, TCGType 
type, int ct)
 }
 return 0;
 }
+
+/*
+ * Relocations
+ */
+
+/*
+ * Relocation records defined in LoongArch ELF psABI v1.00 is way too
+ * complicated; a whopping stack machine is needed to stuff the fields, at
+ * the very least one SOP_PUSH and one SOP_POP (of the correct format) are
+ * needed.
+ *
+ * Hence, define our own simpler relocation types. Numbers are chosen as to
+ * not collide with potential future additions to the true ELF relocation
+ * type enum.
+ */
+
+/* Field Sk16, shifted right by 2; suitable for conditional jumps */
+#define R_LOONGARCH_BR_SK16 256
+/* Field Sd10k16, shifted right by 2; suitable for B and BL */
+#define R_LOONGARCH_BR_SD10K16  257
+
+static bool reloc_br_sk16(tcg_insn_unit *src_rw, const tcg_insn_unit *target)
+{
+const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
+
+tcg_debug_assert((offset & 3) == 0);
+offset >>= 2;
+if (offset == sextreg(offset, 0, 16)) {
+*src_rw |= (offset << 10) & 0x3fffc00;
+return true;
+}
+
+return false;
+}
+
+static bool reloc_br_sd10k16(tcg_insn_unit *src_rw,
+ const tcg_insn_unit *target)
+{
+const tcg_insn_unit *src_rx = tcg_splitwx_to_rx(src_rw);
+intptr_t offset = (intptr_t)target - (intptr_t)src_rx;
+
+tcg_debug_assert((offset & 3) == 0);
+offset >>= 2;
+if (offset == sextreg(offset, 0, 26)) {
+*src_rw |= (offset >> 16) & 0x3ff; /* slot d10 */
+*src_rw |= ((offset & 0x) << 10) & 0x3fffc00; /* slot k16 */
+return true;
+}
+
+return false;
+}
+
+static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
+intptr_t value, intptr_t addend)
+{
+tcg_debug_assert(addend == 0);
+switch (type) {
+case R_LOONGARCH_BR_SK16:
+return reloc_br_sk16(code_ptr, (tcg_insn_unit *)value);
+case R_LOONGARCH_BR_SD10K16:
+return reloc_br_sd10k16(code_ptr, (tcg_insn_unit *)value);
+default:
+g_assert_not_reached();
+}
+}
-- 
2.33.0

Re: [PATCH] nbd/client: Request larger block status by default

2021-09-21 Thread Eric Blake

On Tue, Sep 21, 2021 at 10:12:02PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> 21.09.2021 21:08, Eric Blake wrote:
> > On Tue, Sep 21, 2021 at 08:25:11PM +0300, Vladimir Sementsov-Ogievskiy 
> > wrote:
> > > 21.09.2021 19:17, Eric Blake wrote:
> > > > Now that commit 5a1cfd21 has clarified that a driver's block_status
> > > > can report larger *pnum than in the original request, we can take
> > > > advantage of that in the NBD driver.  Rather that limiting our request
> > > > to the server based on the maximum @bytes our caller mentioned, we
> > > > instead ask for as much status as possible (the minimum of our 4G
> > > > limit or the rest of the export); the server will still only give us
> > > > one extent in its answer (because we are using NBD_CMD_FLAG_REQ_ONE),
> > > > but now the block layer's caching of data areas can take advantage of
> > > > cases where the server gives us a large answer to avoid the need for
> > > > future NBD_CMD_BLOCK_STATUS calls.
> > > > 
> > > > Signed-off-by: Eric Blake 
> > > > ---
> > 
> > > 
> > > I remember we already discussed that, but can't find.
> > > 
> > > The problem is that it's not for free:
> > > 
> > > In server code in blockstatus_to_extents, we loop though the disk, trying 
> > > to merge extents of the same type.
> > > 
> > > With full allocated qcow2, we'll have to load all L2 tables and handle 
> > > them, to merge all block status into one big "allocated" extent.
> > > 
> > 
> > We don't have to loop that far.  The NBD protocol allows the server to
> > stop looping at whatever point makes sense, as long as it makes
> > progress.
> > 
> > > Maybe, we need some additional negotiation flag, to allow BLOCK_STATUS 
> > > command with NBD_CMD_FLAG_REQ_ONE flag to return an extent larger than 
> > > required when that information is available for free?

That's already the case when FLAG_REQ_ONE is not present.  The reason
that REQ_ONE clamps things at the requested limit is because older
qemu had a bug that it rejected the server sending extra information,
even when that info was free.

> > 
> > That's one possibility.  Another does not add anything to the NBD
> > protocol, but instead limits the code that tries to loop over block
> > status to deteremine a larger "allocated" answer to return to instead
> > stop looping after a finite number of extents have been merged
> > together.
> > 
> 
> In this case we should answer a question: when to stop looping? I'm not sure 
> we can simply drop the loop:
> 
> For example, for compressed clusters, bdrv_co_block_status() will return them 
> one-by-one, and sending them one by one to the wire, when user requested 
> large range would be inefficient.
> Or should we change block-status behavior for compressed clusters? And may be 
> add flag to block_status() that we are not interested in valid_offset, so it 
> can return an extent corresponding to the whole L2 table chunk (if all 
> entries are allocated, but not consecutive)?

Currently, bdrv_co_block_status() takes 'bool want_zero' that says
what the client wants.  Maybe it's worth expanding that into an enum
or bitmask to allow finer-grained client requests (the notion of
whether valid_offset matters to the caller IS relevant for deciding
when to clamp vs. loop).

> 
> 
> Hmm. So, if not update spec, we'll have to "fix" implementation. That means 
> actually, that we should update spec anyway, at least to note that: "clients 
> tend to request large regions in hope that server will not spend too much 
> time to serve them but instead return shorter answer"..

I'm really hoping we don't have to tweak the NBD spec on this one, but
rather improve the quality of implementation in qemu.

> 
> And you'll never have guarantee, that some another (non-qemu) NBD server will 
> not try to satisfy the whole request in on go.

That's true, but the NBD spec has always tried to encourage servers to
provide more information when it was free, but to give up early if it
gets too expensive.  It's a judgment call on where that line lies, and
may indeed be different between different servers.

> 
> 
> In other words:
> 
> 1. We want block_status of some region
> 2. If there some free information available about larger region we are happy 
> to cache it
> 
> With your solution, we just request a lot larger region, so we lose 
> information of [1]. That means that sever can't imagine, how much of 
> requested region is really needed, i.e. if we do some additional work to 
> return more information (still within boundaries of the request) will it be:
>  - good work to minimize network traffic
> OR
>  - extra work, waste server time, client will cache this information but 
> probably never use (or even lose it soon, as our cache is very simple)
> 
> With additional negotiation flag we don't lose [1], i.e how much client wants 
> now.
> 
> 
> So, for me, modifying the protocol looks nicer..
> 
> Another approach is do request without NBD_CMD_FLAG_REQ_ONE and handle 
> several extents.

_Thi

[PATCH] spapr_numa.c: fixes in spapr_numa_FORM2_write_rtas_tables()

2021-09-21 Thread Daniel Henrique Barboza

This patch has a handful of modifications for the recent added
FORM2 support:

- there is no particular reason for both 'lookup_index_table' and
'distance_table' to be allocated in the heap, since their sizes are
known right at the start of the function. Use static allocation in
them to spare a couple of g_new0() calls;

- to not allocate more than the necessary size in 'distance_table'. At
this moment the array is oversized due to allocating uint32_t for all
elements, when most of them fits in an uint8_t;

- create a NUMA_LOCAL_DISTANCE macro to avoid hardcoding the local
distance value.

Signed-off-by: Daniel Henrique Barboza 
---
 hw/ppc/spapr_numa.c | 35 +++
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/hw/ppc/spapr_numa.c b/hw/ppc/spapr_numa.c
index 58d5dc7084..039a0439c6 100644
--- a/hw/ppc/spapr_numa.c
+++ b/hw/ppc/spapr_numa.c
@@ -19,6 +19,9 @@
 /* Moved from hw/ppc/spapr_pci_nvlink2.c */
 #define SPAPR_GPU_NUMA_ID   (cpu_to_be32(1))
 
+/* Macro to avoid hardcoding the local distance value */
+#define NUMA_LOCAL_DISTANCE 10
+
 /*
  * Retrieves max_dist_ref_points of the current NUMA affinity.
  */
@@ -500,17 +503,21 @@ static void 
spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
 MachineState *ms = MACHINE(spapr);
 NodeInfo *numa_info = ms->numa_state->nodes;
 int nb_numa_nodes = ms->numa_state->num_nodes;
+/* Lookup index table has an extra uint32_t with its length */
+uint32_t lookup_index_table[nb_numa_nodes + 1];
 int distance_table_entries = nb_numa_nodes * nb_numa_nodes;
-g_autofree uint32_t *lookup_index_table = NULL;
-g_autofree uint32_t *distance_table = NULL;
-int src, dst, i, distance_table_size;
-uint8_t *node_distances;
+/*
+ * Distance table is an uint8_t array with a leading uint32_t
+ * containing its length.
+ */
+uint8_t distance_table[distance_table_entries + 4];
+uint32_t *distance_table_length;
+int src, dst, i;
 
 /*
  * ibm,numa-lookup-index-table: array with length and a
  * list of NUMA ids present in the guest.
  */
-lookup_index_table = g_new0(uint32_t, nb_numa_nodes + 1);
 lookup_index_table[0] = cpu_to_be32(nb_numa_nodes);
 
 for (i = 0; i < nb_numa_nodes; i++) {
@@ -518,8 +525,7 @@ static void 
spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
 }
 
 _FDT(fdt_setprop(fdt, rtas, "ibm,numa-lookup-index-table",
- lookup_index_table,
- (nb_numa_nodes + 1) * sizeof(uint32_t)));
+ lookup_index_table, sizeof(lookup_index_table)));
 
 /*
  * ibm,numa-distance-table: contains all node distances. First
@@ -531,11 +537,10 @@ static void 
spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
  * array because NUMA ids can be sparse (node 0 is the first,
  * node 8 is the second ...).
  */
-distance_table = g_new0(uint32_t, distance_table_entries + 1);
-distance_table[0] = cpu_to_be32(distance_table_entries);
+distance_table_length = (uint32_t *)distance_table;
+distance_table_length[0] = cpu_to_be32(distance_table_entries);
 
-node_distances = (uint8_t *)&distance_table[1];
-i = 0;
+i = 4;
 
 for (src = 0; src < nb_numa_nodes; src++) {
 for (dst = 0; dst < nb_numa_nodes; dst++) {
@@ -546,18 +551,16 @@ static void 
spapr_numa_FORM2_write_rtas_tables(SpaprMachineState *spapr,
  * adding the numa_info to retrieve distance info from.
  */
 if (src == dst) {
-node_distances[i++] = 10;
+distance_table[i++] = NUMA_LOCAL_DISTANCE;
 continue;
 }
 
-node_distances[i++] = numa_info[src].distance[dst];
+distance_table[i++] = numa_info[src].distance[dst];
 }
 }
 
-distance_table_size = distance_table_entries * sizeof(uint8_t) +
-  sizeof(uint32_t);
 _FDT(fdt_setprop(fdt, rtas, "ibm,numa-distance-table",
- distance_table, distance_table_size));
+ distance_table, sizeof(distance_table)));
 }
 
 /*
-- 
2.31.1

Re: [PATCH] nbd/client: Request larger block status by default

2021-09-21 Thread Vladimir Sementsov-Ogievskiy


21.09.2021 21:08, Eric Blake wrote:

On Tue, Sep 21, 2021 at 08:25:11PM +0300, Vladimir Sementsov-Ogievskiy wrote:

21.09.2021 19:17, Eric Blake wrote:

Now that commit 5a1cfd21 has clarified that a driver's block_status
can report larger *pnum than in the original request, we can take
advantage of that in the NBD driver.  Rather that limiting our request
to the server based on the maximum @bytes our caller mentioned, we
instead ask for as much status as possible (the minimum of our 4G
limit or the rest of the export); the server will still only give us
one extent in its answer (because we are using NBD_CMD_FLAG_REQ_ONE),
but now the block layer's caching of data areas can take advantage of
cases where the server gives us a large answer to avoid the need for
future NBD_CMD_BLOCK_STATUS calls.

Signed-off-by: Eric Blake 
---




I remember we already discussed that, but can't find.

The problem is that it's not for free:

In server code in blockstatus_to_extents, we loop though the disk, trying to 
merge extents of the same type.

With full allocated qcow2, we'll have to load all L2 tables and handle them, to merge all 
block status into one big "allocated" extent.



We don't have to loop that far.  The NBD protocol allows the server to
stop looping at whatever point makes sense, as long as it makes
progress.


Maybe, we need some additional negotiation flag, to allow BLOCK_STATUS command 
with NBD_CMD_FLAG_REQ_ONE flag to return an extent larger than required when 
that information is available for free?


That's one possibility.  Another does not add anything to the NBD
protocol, but instead limits the code that tries to loop over block
status to deteremine a larger "allocated" answer to return to instead
stop looping after a finite number of extents have been merged
together.



In this case we should answer a question: when to stop looping? I'm not sure we 
can simply drop the loop:

For example, for compressed clusters, bdrv_co_block_status() will return them 
one-by-one, and sending them one by one to the wire, when user requested large 
range would be inefficient.
Or should we change block-status behavior for compressed clusters? And may be 
add flag to block_status() that we are not interested in valid_offset, so it 
can return an extent corresponding to the whole L2 table chunk (if all entries 
are allocated, but not consecutive)?


Hmm. So, if not update spec, we'll have to "fix" implementation. That means actually, 
that we should update spec anyway, at least to note that: "clients tend to request large 
regions in hope that server will not spend too much time to serve them but instead return shorter 
answer"..

And you'll never have guarantee, that some another (non-qemu) NBD server will 
not try to satisfy the whole request in on go.


In other words:

1. We want block_status of some region
2. If there some free information available about larger region we are happy to 
cache it

With your solution, we just request a lot larger region, so we lose information 
of [1]. That means that sever can't imagine, how much of requested region is 
really needed, i.e. if we do some additional work to return more information 
(still within boundaries of the request) will it be:
 - good work to minimize network traffic
OR
 - extra work, waste server time, client will cache this information but 
probably never use (or even lose it soon, as our cache is very simple)

With additional negotiation flag we don't lose [1], i.e how much client wants 
now.


So, for me, modifying the protocol looks nicer..

Another approach is do request without NBD_CMD_FLAG_REQ_ONE and handle several 
extents.


Are you optimizing some concrete scenario?


--
Best regards,
Vladimir

[PATCH] linux-user/syscall: add support for CLONE_PIDFD

2021-09-21 Thread Andreas Schwab

Add basic support for CLONE_PIDFD, only fork-like clone without additional
flags.  This is enough to make Qt/forkfd working.

Signed-off-by: Andreas Schwab 
---
 linux-user/syscall.c | 52 ++--
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 544f5b662f..8b40064e75 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -143,6 +143,9 @@
 #ifndef CLONE_IO
 #define CLONE_IO0x8000  /* Clone io context */
 #endif
+#ifndef CLONE_PIDFD
+#define CLONE_PIDFD 0x1000  /* set if a pidfd should be 
placed in parent */
+#endif
 
 /* We can't directly call the host clone syscall, because this will
  * badly confuse libc (breaking mutexes, for example). So we must
@@ -169,7 +172,8 @@
 /* Flags for fork which we can implement within QEMU itself */
 #define CLONE_OPTIONAL_FORK_FLAGS   \
 (CLONE_SETTLS | CLONE_PARENT_SETTID |   \
- CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID)
+ CLONE_CHILD_CLEARTID | CLONE_CHILD_SETTID | \
+ CLONE_PIDFD)
 
 /* Flags for thread creation which we can implement within QEMU itself */
 #define CLONE_OPTIONAL_THREAD_FLAGS \
@@ -494,6 +498,39 @@ _syscall4(int, sys_prlimit64, pid_t, pid, int, resource,
   struct host_rlimit64 *, old_limit)
 #endif
 
+#if defined __NR_clone2
+#define __NR_sys_clone2 __NR_clone2
+_syscall6(int, sys_clone2, int, flags, void *, child_stack, size_t, stack_size,
+  int *, ptid, int *, ctid, void *, newtls);
+#else
+#define __NR_sys_clone __NR_clone
+#if defined __cris__ || defined __s390x__
+_syscall5(int, sys_clone, void *, child_stack, int, flags, int *, ptid,
+  void *, newtls, int *, ctid);
+#elif defined __microblaze__
+_syscall6(int, sys_clone, int, flags, void *, child_stack, size_t, stack_size,
+  int *, ptid, void *, newtls, int *, ctid);
+#else
+/*
+ * Note: ctid and newtls are swapped on some architectures, but both are
+ * passed as NULL only for now.
+ */
+_syscall5(int, sys_clone, int, flags, void *, child_stack, int *, ptid,
+  int *, ctid, void *, newtls);
+#endif
+#endif
+static int sys_clone_pidfd(int flags, int *pidfd)
+{
+#ifdef __NR_clone2
+return sys_clone2(flags, NULL, 0, pidfd, NULL, NULL);
+#elif defined __cris__ || defined __s390x__
+return sys_clone(NULL, flags, pidfd, NULL, NULL);
+#elif defined __microblaze__
+return sys_clone(flags, NULL, 0, pidfd, NULL, NULL);
+#else
+return sys_clone(flags, NULL, pidfd, NULL, NULL);
+#endif
+}
 
 #if defined(TARGET_NR_timer_create)
 /* Maximum of 32 active POSIX timers allowed at any one time. */
@@ -6355,6 +6392,7 @@ static int do_fork(CPUArchState *env, unsigned int flags, 
abi_ulong newsp,
 CPUState *new_cpu;
 CPUArchState *new_env;
 sigset_t sigmask;
+int pidfd;
 
 flags &= ~CLONE_IGNORED_FLAGS;
 
@@ -6362,6 +6400,10 @@ static int do_fork(CPUArchState *env, unsigned int 
flags, abi_ulong newsp,
 if (flags & CLONE_VFORK)
 flags &= ~(CLONE_VFORK | CLONE_VM);
 
+/* Only basic fork-like clone is supported with CLONE_PIDFD for now. */
+if (flags & CLONE_PIDFD && flags & ~(CLONE_PIDFD|CSIGNAL))
+return -TARGET_EINVAL;
+
 if (flags & CLONE_VM) {
 TaskState *parent_ts = (TaskState *)cpu->opaque;
 new_thread_info info;
@@ -6460,7 +6502,11 @@ static int do_fork(CPUArchState *env, unsigned int 
flags, abi_ulong newsp,
 }
 
 fork_start();
-ret = fork();
+if (flags & CLONE_PIDFD) {
+ret = sys_clone_pidfd(flags, &pidfd);
+} else {
+ret = fork();
+}
 if (ret == 0) {
 /* Child Process.  */
 cpu_clone_regs_child(env, newsp, flags);
@@ -6483,6 +6529,8 @@ static int do_fork(CPUArchState *env, unsigned int flags, 
abi_ulong newsp,
 } else {
 cpu_clone_regs_parent(env, flags);
 fork_end(0);
+if (flags & CLONE_PIDFD)
+put_user_u32(pidfd, parent_tidptr);
 }
 }
 return ret;
-- 
2.33.0


-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."

Re: [PATCH 3/5] target/arm: Move gdbstub related code out of helper.c

2021-09-21 Thread Philippe Mathieu-Daudé


On 9/21/21 18:28, Peter Maydell wrote:

Currently helper.c includes some code which is part of the arm
target's gdbstub support.  This code has a better home: in gdbstub.c
and gdbstub64.c.  Move it there.

Because aarch64_fpu_gdb_get_reg() and aarch64_fpu_gdb_set_reg() move
into gdbstub64.c, this means that they're now compiled only for
TARGET_AARCH64 rather than always.  That is the only case when they
would ever be used, but it does mean that the ifdef in
arm_cpu_register_gdb_regs_for_features() needs to be adjusted to
match.

Signed-off-by: Peter Maydell 
---
  target/arm/internals.h |   7 ++
  target/arm/gdbstub.c   | 130 
  target/arm/gdbstub64.c | 140 +
  target/arm/helper.c| 271 -
  4 files changed, 277 insertions(+), 271 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH 2/5] target/arm: Fix coding style issues in gdbstub code in helper.c

2021-09-21 Thread Philippe Mathieu-Daudé


On 9/21/21 18:28, Peter Maydell wrote:

We're going to move this code to a different file; fix the coding
style first so checkpatch doesn't complain.  This includes deleting
the spurious 'break' statements after returns in the
vfp_gdb_get_reg() function.

Signed-off-by: Peter Maydell 
---
  target/arm/helper.c | 23 ---
  1 file changed, 16 insertions(+), 7 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé

Re: [PATCH] nbd/client: Request larger block status by default

2021-09-21 Thread Eric Blake

On Tue, Sep 21, 2021 at 08:25:11PM +0300, Vladimir Sementsov-Ogievskiy wrote:
> 21.09.2021 19:17, Eric Blake wrote:
> > Now that commit 5a1cfd21 has clarified that a driver's block_status
> > can report larger *pnum than in the original request, we can take
> > advantage of that in the NBD driver.  Rather that limiting our request
> > to the server based on the maximum @bytes our caller mentioned, we
> > instead ask for as much status as possible (the minimum of our 4G
> > limit or the rest of the export); the server will still only give us
> > one extent in its answer (because we are using NBD_CMD_FLAG_REQ_ONE),
> > but now the block layer's caching of data areas can take advantage of
> > cases where the server gives us a large answer to avoid the need for
> > future NBD_CMD_BLOCK_STATUS calls.
> > 
> > Signed-off-by: Eric Blake 
> > ---

> 
> I remember we already discussed that, but can't find.
> 
> The problem is that it's not for free:
> 
> In server code in blockstatus_to_extents, we loop though the disk, trying to 
> merge extents of the same type.
> 
> With full allocated qcow2, we'll have to load all L2 tables and handle them, 
> to merge all block status into one big "allocated" extent.
> 

We don't have to loop that far.  The NBD protocol allows the server to
stop looping at whatever point makes sense, as long as it makes
progress.

> Maybe, we need some additional negotiation flag, to allow BLOCK_STATUS 
> command with NBD_CMD_FLAG_REQ_ONE flag to return an extent larger than 
> required when that information is available for free?

That's one possibility.  Another does not add anything to the NBD
protocol, but instead limits the code that tries to loop over block
status to deteremine a larger "allocated" answer to return to instead
stop looping after a finite number of extents have been merged
together.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH] nbd/client: Request larger block status by default

2021-09-21 Thread Vladimir Sementsov-Ogievskiy


21.09.2021 19:17, Eric Blake wrote:

Now that commit 5a1cfd21 has clarified that a driver's block_status
can report larger *pnum than in the original request, we can take
advantage of that in the NBD driver.  Rather that limiting our request
to the server based on the maximum @bytes our caller mentioned, we
instead ask for as much status as possible (the minimum of our 4G
limit or the rest of the export); the server will still only give us
one extent in its answer (because we are using NBD_CMD_FLAG_REQ_ONE),
but now the block layer's caching of data areas can take advantage of
cases where the server gives us a large answer to avoid the need for
future NBD_CMD_BLOCK_STATUS calls.

Signed-off-by: Eric Blake 
---
  block/nbd.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/block/nbd.c b/block/nbd.c
index f6ff1c4fb472..7c4ec058b0aa 100644
--- a/block/nbd.c
+++ b/block/nbd.c
@@ -1479,10 +1479,15 @@ static int coroutine_fn nbd_client_co_block_status(
  BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
  Error *local_err = NULL;

+/*
+ * No need to limit our over-the-wire request to @bytes; rather,
+ * ask the server for as much as it can send in one go, and the
+ * block layer will then cap things.
+ */
  NBDRequest request = {
  .type = NBD_CMD_BLOCK_STATUS,
  .from = offset,
  .len = MIN(QEMU_ALIGN_DOWN(INT_MAX, bs->bl.request_alignment),
-   MIN(bytes, s->info.size - offset)),
+   s->info.size - offset),
  .flags = NBD_CMD_FLAG_REQ_ONE,
  };




I remember we already discussed that, but can't find.

The problem is that it's not for free:

In server code in blockstatus_to_extents, we loop though the disk, trying to 
merge extents of the same type.

With full allocated qcow2, we'll have to load all L2 tables and handle them, to merge all 
block status into one big "allocated" extent.

Maybe, we need some additional negotiation flag, to allow BLOCK_STATUS command 
with NBD_CMD_FLAG_REQ_ONE flag to return an extent larger than required when 
that information is available for free?

--
Best regards,
Vladimir

Re: [PATCH 28/30] configure, meson.build: Mark support for 64-bit LoongArch hosts

2021-09-21 Thread Richard Henderson


On 9/21/21 9:09 AM, WANG Xuerui wrote:

I think cpu=loongarch64 but ARCH=loongarch should be okay...


Make it easier on yourself and keep them the same.


r~

Re: [PATCH v4 04/20] nubus: use bitmap to manage available slots

2021-09-21 Thread Mark Cave-Ayland


On 20/09/2021 20:48, Laurent Vivier wrote:


Le 17/09/2021 à 09:50, Mark Cave-Ayland a écrit :

Convert nubus_device_realize() to use a bitmap to manage available slots to 
allow
for future Nubus devices to be plugged into arbitrary slots from the command 
line.

Update mac_nubus_bridge_init() to only allow slots 0x9 to 0xe on a Macintosh
machines as documented in "Desigining Cards and Drivers for the Macintosh 
Family".


Perhaps we can also add "NuBus Specification" for the non mac-nubus part?

http://www.bitsavers.org/pdf/ti/nubus/2242825-0001_NuBus_Spec1983.pdf


I can add that but I'm wondering if it would be better to do this in patch 13 
("nubus-bridge: introduce separate NubusBridge structure") where the comment is 
updated to reflect the difference between Nubus and Macintosh-specific Nubus?



Signed-off-by: Mark Cave-Ayland 
---
  hw/nubus/mac-nubus-bridge.c |  3 +++
  hw/nubus/nubus-bus.c|  2 +-
  hw/nubus/nubus-device.c | 29 +
  include/hw/nubus/nubus.h|  4 ++--
  4 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/hw/nubus/mac-nubus-bridge.c b/hw/nubus/mac-nubus-bridge.c
index 7c329300b8..c1d77e2bc7 100644
--- a/hw/nubus/mac-nubus-bridge.c
+++ b/hw/nubus/mac-nubus-bridge.c
@@ -18,6 +18,9 @@ static void mac_nubus_bridge_init(Object *obj)
  
  s->bus = NUBUS_BUS(qbus_create(TYPE_NUBUS_BUS, DEVICE(s), NULL));
  
+/* Macintosh only has slots 0x9 to 0xe available */

+s->bus->slot_available_mask = MAKE_64BIT_MASK(9, 6);


Perhaps we can introduce MAC_NUBUS_FIRST_SLOT and MAC_NUBUS_LAST_SLOT

#define MAC_NUBUS_FIRST_SLOT 0x9
#define MAC_NUBUS_LAST_SLOT  0xe

MAKE_64BIT_MASK(MAC_NUBUS_FIRST_SLOT, MAC_NUBUS_LAST_SLOT - 
MAC_NUBUS_FIRST_SLOT + 1)


I wasn't so keen on the verbosity of the above approach, however since both yourself 
and Zoltan have suggested a similar thing then I will see how it looks for v5.



+
  sysbus_init_mmio(sbd, &s->bus->super_slot_io);
  sysbus_init_mmio(sbd, &s->bus->slot_io);
  }
diff --git a/hw/nubus/nubus-bus.c b/hw/nubus/nubus-bus.c
index 5c13452308..404c1032e0 100644
--- a/hw/nubus/nubus-bus.c
+++ b/hw/nubus/nubus-bus.c
@@ -84,7 +84,7 @@ static void nubus_init(Object *obj)
nubus, "nubus-slots",
NUBUS_SLOT_NB * NUBUS_SLOT_SIZE);
  
-nubus->current_slot = NUBUS_FIRST_SLOT;

+nubus->slot_available_mask = MAKE_64BIT_MASK(0, 16);


MAKE_64BIT_MASK(NUBUS_FIRST_SLOT, NUBUS_LAST_SLOT - NUBUS_FIRST_SLOT + 1) ?


Same here.


And we define 16 slots, but NUBUS_SLOT_NB (above) is 15. (I think it's the 
value for Mac as last
slot is 0xe)


If your suggested approach above works then I should be able to change NUBUS_SLOT_NB 
from 15 to 16 here with no other effect.



  }
  
  static void nubus_class_init(ObjectClass *oc, void *data)

diff --git a/hw/nubus/nubus-device.c b/hw/nubus/nubus-device.c
index c1832f73da..d91a1e4af3 100644
--- a/hw/nubus/nubus-device.c
+++ b/hw/nubus/nubus-device.c
@@ -160,14 +160,35 @@ static void nubus_device_realize(DeviceState *dev, Error 
**errp)
  NubusDevice *nd = NUBUS_DEVICE(dev);
  char *name;
  hwaddr slot_offset;
+uint16_t s;
+
+if (nd->slot == -1) {
+/* No slot specified, find first available free slot */
+s = ctz32(nubus->slot_available_mask);
+if (s != 32) {
+nd->slot = s;
+} else {
+error_setg(errp, "Cannot register nubus card, no free slot "
+ "available");
+return;
+}
+} else {
+/* Slot specified, make sure the slot is available */
+if (!(nubus->slot_available_mask & BIT(nd->slot))) {
+error_setg(errp, "Cannot register nubus card, slot %d is "
+ "unavailable or already occupied", nd->slot);
+return;
+}
+}
  
-if (nubus->current_slot < NUBUS_FIRST_SLOT ||

-nubus->current_slot > NUBUS_LAST_SLOT) {
-error_setg(errp, "Cannot register nubus card, not enough slots");
+if (nd->slot < NUBUS_FIRST_SLOT || nd->slot > NUBUS_LAST_SLOT) {
+error_setg(errp, "Cannot register nubus card, slot must be "
+ "between %d and %d", NUBUS_FIRST_SLOT,
+ NUBUS_LAST_SLOT);


Do we need this checking as we already checked the slot bit is available?
Moreover it would be more accurate to rely on the bitmap as the first available 
slot differs between
nubus and mac-nubus.


From the discussion of earlier versions of the patchset, the intention was really to 
keep the old restriction as a failsafe: I'm happy to remove this for v5.



  return;
  }
  
-nd->slot = nubus->current_slot++;

+nubus->slot_available_mask &= ~BIT(nd->slot);
  
  /* Super */

  slot_offset = (nd->slot - 6) * NUBUS_SUPER_SLOT_SIZE;
diff --git a/include/hw/nubus/nubus.h b/include/hw/nubus/nubus.h
index 357f621d15..8ff4736259 100644
--- a/include

Re: [PULL v2 00/27] target-arm queuea

2021-09-21 Thread Peter Maydell

On Tue, 21 Sept 2021 at 16:31, Peter Maydell  wrote:
>
> v2: added missing #include to fix osx/x86.
>
> The following changes since commit 7adb961995a3744f51396502b33ad04a56a317c3:
>
>   Merge remote-tracking branch 
> 'remotes/dgilbert-gitlab/tags/pull-virtiofs-20210916' into staging 
> (2021-09-19 18:53:29 +0100)
>
> are available in the Git repository at:
>
>   https://git.linaro.org/people/pmaydell/qemu-arm.git 
> tags/pull-target-arm-20210921
>
> for you to fetch changes up to 4b445c926add3fdec13958736e482e88857bcad8:
>
>   target/arm: Optimize MVE 1op-immediate insns (2021-09-21 16:28:27 +0100)
>
> 
> target-arm queue:
>  * Optimize codegen for MVE when predication not active
>  * hvf: Add Apple Silicon support
>  * hw/intc: Set GIC maintenance interrupt level to only 0 or 1
>  * Fix mishandling of MVE FPSCR.LTPSIZE reset for usermode emulator
>  * elf2dmp: Fix coverity nits


Applied, thanks.

Please update the changelog at https://wiki.qemu.org/ChangeLog/6.2
for any user-visible changes.

-- PMM

Re: [PATCH v4 03/20] nubus-device: add device slot parameter

2021-09-21 Thread Mark Cave-Ayland


On 20/09/2021 18:31, Laurent Vivier wrote:


Le 17/09/2021 à 09:50, Mark Cave-Ayland a écrit :

This prepares for allowing Nubus devices to be placed in a specific slot instead
of always being auto-allocated by the bus itself.

Signed-off-by: Mark Cave-Ayland 
---
  hw/nubus/nubus-device.c  | 6 ++
  include/hw/nubus/nubus.h | 2 +-
  2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/hw/nubus/nubus-device.c b/hw/nubus/nubus-device.c
index 36203848e5..c1832f73da 100644
--- a/hw/nubus/nubus-device.c
+++ b/hw/nubus/nubus-device.c
@@ -191,12 +191,18 @@ static void nubus_device_realize(DeviceState *dev, Error 
**errp)
  nubus_register_format_block(nd);
  }
  
+static Property nubus_device_properties[] = {

+DEFINE_PROP_INT32("slot", NubusDevice, slot, -1),
+DEFINE_PROP_END_OF_LIST()
+};
+
  static void nubus_device_class_init(ObjectClass *oc, void *data)
  {
  DeviceClass *dc = DEVICE_CLASS(oc);
  
  dc->realize = nubus_device_realize;

  dc->bus_type = TYPE_NUBUS_BUS;
+device_class_set_props(dc, nubus_device_properties);
  }
  
  static const TypeInfo nubus_device_type_info = {

diff --git a/include/hw/nubus/nubus.h b/include/hw/nubus/nubus.h
index 89b0976aaa..357f621d15 100644
--- a/include/hw/nubus/nubus.h
+++ b/include/hw/nubus/nubus.h
@@ -42,7 +42,7 @@ struct NubusBus {
  struct NubusDevice {
  DeviceState qdev;
  
-int slot;

+int32_t slot;
  MemoryRegion super_slot_mem;
  MemoryRegion slot_mem;
  



I think this patch should be merged with the following one because slot is 
simply ignored for the
moment as it is overwritten in nubus_device_realize().

Reviewed-by: Laurent Vivier 


Okay I can do that. My original thinking was to introduce the property first, then 
change the logic to make the follow-on patch easier to review but I don't mind either 
way.



ATB,

Mark.

Re: [PATCH v12 04/10] hvf: Add Apple Silicon support

2021-09-21 Thread Alexander Graf



On 21.09.21 17:30, Peter Maydell wrote:
> On Thu, 16 Sept 2021 at 16:54, Alexander Graf  wrote:
>> With Apple Silicon available to the masses, it's a good time to add support
>> for driving its virtualization extensions from QEMU.
>>
>> This patch adds all necessary architecture specific code to get basic VMs
>> working, including save/restore.
>>
>> Known limitations:
>>
>>   - WFI handling is missing (follows in later patch)
>>   - No watchpoint/breakpoint support
>>
>> Signed-off-by: Alexander Graf 
>> Reviewed-by: Roman Bolshakov 
>> Reviewed-by: Sergio Lopez 
>> Reviewed-by: Peter Maydell 
> This broke compilation on x86 osx:
>
>> --- a/target/i386/hvf/hvf.c
>> +++ b/target/i386/hvf/hvf.c
>> @@ -206,6 +206,11 @@ static inline bool apic_bus_freq_is_known(CPUX86State 
>> *env)
>>  return env->apic_bus_freq != 0;
>>  }
>>
>> +void hvf_kick_vcpu_thread(CPUState *cpu)
>> +{
>> +cpus_kick_thread(cpu);
>> +}
> This won't build without an extra #include "sysemu/cpus.h".
> I've folded in the fix.


Thanks a lot! I test compiled all the earlier versions of the patch set
on x86 as well, but no longer have easy access to an x86 macOS system
:(. They're just so insanely slow in comparison!


Alex

1 2 >

1 - 100 of 189 matches

Mail list logo