[RFC] hw/nvme: Use irqfd to send interrupts

2022-07-08 Thread Jinhao Fan
Use irqfd to directly notify KVM to inject interrupts. This is done by
registering a virtual IRQ(virq) in KVM and associate the virq with an
irqfd, so that KVM can directly inject the interrupt when it receives
notification from the irqfd. This approach is supposed to improve 
performance because it bypasses QEMU's MSI interrupt emulation logic.

However, I did not see an obvious improvement of the emulation KIOPS:

QD  1   4  16  64 
QEMU   38 123 210 329
irqfd  40 129 219 328

I found this problem quite hard to diagnose since irqfd's workflow
involves both QEMU and the in-kernel KVM. 

Could you help me figure out the following questions:

1. How much performance improvement can I expect from using irqfd?
2. How can I debug this kind of cross QEMU-KVM problems?

Signed-off-by: Jinhao Fan 
---
 hw/nvme/ctrl.c | 67 -
 hw/nvme/nvme.h |  3 +++
 2 files changed, 68 insertions(+), 1 deletions(-)

diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c
index 4b75c5f549..59768c4586 100644
--- a/hw/nvme/ctrl.c
+++ b/hw/nvme/ctrl.c
@@ -159,6 +159,7 @@
 #include "sysemu/sysemu.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/hostmem.h"
+#include "sysemu/kvm.h"
 #include "hw/pci/msix.h"
 #include "migration/vmstate.h"
 
@@ -484,12 +485,70 @@ static void nvme_irq_check(NvmeCtrl *n)
 }
 }
 
+static int nvme_kvm_msix_vector_use(NvmeCtrl *n,
+NvmeCQueue *cq,
+int vector)
+{
+int ret;
+
+KVMRouteChange c = kvm_irqchip_begin_route_changes(kvm_state);
+ret = kvm_irqchip_add_msi_route(, vector, >parent_obj);
+if (ret < 0) {
+return ret;
+}
+kvm_irqchip_commit_route_changes();
+cq->virq = ret;
+return 0;
+}
+
+static int nvme_init_cq_irqfd(NvmeCQueue *cq)
+{
+int ret;
+
+ret = nvme_kvm_msix_vector_use(cq->ctrl, cq, (int)cq->vector);
+if (ret < 0) {
+goto fail;
+}
+
+ret = event_notifier_init(>irq_notifier, 0);
+if (ret < 0) {
+goto fail_notifier;
+}
+
+ret = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, >irq_notifier,
+  NULL, cq->virq);
+if (ret < 0) {
+goto fail_kvm;
+}
+
+return 0;
+
+fail_kvm:
+event_notifier_cleanup(>irq_notifier);
+fail_notifier:
+kvm_irqchip_release_virq(kvm_state, cq->virq);
+fail:
+return ret;
+}
+
 static void nvme_irq_assert(NvmeCtrl *n, NvmeCQueue *cq)
 {
 if (cq->irq_enabled) {
 if (msix_enabled(&(n->parent_obj))) {
+/* Initialize CQ irqfd */
+if (!cq->irqfd_enabled && n->params.ioeventfd && cq->cqid != 0) {
+int ret = nvme_init_cq_irqfd(cq);
+if (ret == 0) {
+cq->irqfd_enabled = true;
+}
+}
+
 trace_pci_nvme_irq_msix(cq->vector);
-msix_notify(&(n->parent_obj), cq->vector);
+if (cq->irqfd_enabled) {
+event_notifier_set(>irq_notifier);
+} else {
+msix_notify(&(n->parent_obj), cq->vector);
+}
 } else {
 trace_pci_nvme_irq_pin();
 assert(cq->vector < 32);
@@ -4670,6 +4729,12 @@ static void nvme_free_cq(NvmeCQueue *cq, NvmeCtrl *n)
 event_notifier_cleanup(>notifier);
 }
 if (msix_enabled(>parent_obj)) {
+if (cq->irqfd_enabled) {
+kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, >irq_notifier,
+  cq->virq);
+kvm_irqchip_release_virq(kvm_state, cq->virq);
+event_notifier_cleanup(>irq_notifier);
+}
 msix_vector_unuse(>parent_obj, cq->vector);
 }
 if (cq->cqid) {
diff --git a/hw/nvme/nvme.h b/hw/nvme/nvme.h
index 2a9beea0c8..84e5b00fe3 100644
--- a/hw/nvme/nvme.h
+++ b/hw/nvme/nvme.h
@@ -391,7 +391,10 @@ typedef struct NvmeCQueue {
 uint64_tei_addr;
 QEMUTimer   *timer;
 EventNotifier notifier;
+EventNotifier irq_notifier;
+int virq;
 boolioeventfd_enabled;
+boolirqfd_enabled;
 QTAILQ_HEAD(, NvmeSQueue) sq_list;
 QTAILQ_HEAD(, NvmeRequest) req_list;
 } NvmeCQueue;
-- 
2.25.1




Re: [PATCH 0/2] Hexagon (target/hexagon) bug fixes for mem_noshuf

2022-07-08 Thread Richard Henderson

On 7/8/22 02:35, Taylor Simpson wrote:

Recall that the semantics of a Hexagon mem_noshuf packet are that the
store effectively happens before the load.  There are two bug fixes
in this series.


Taylor Simpson (2):
   Hexagon (target/hexagon) fix store w/mem_noshuf & predicated load
   Hexagon (target/hexagon) fix bug in mem_noshuf load exception

  target/hexagon/gen_tcg.h |  10 +-
  target/hexagon/helper.h  |   1 +
  target/hexagon/macros.h  |  37 --
  target/hexagon/genptr.c  |   7 ++
  target/hexagon/op_helper.c   |  23 +++-
  tests/tcg/hexagon/mem_noshuf.c   | 122 ++-
  tests/tcg/hexagon/mem_noshuf_exception.c | 146 +++
  tests/tcg/hexagon/Makefile.target|   1 +
  8 files changed, 323 insertions(+), 24 deletions(-)
  create mode 100644 tests/tcg/hexagon/mem_noshuf_exception.c



Reviewed-by: Richard Henderson 

r~



Re: [PATCH v4] hw/nvme: Use ioeventfd to handle doorbell updates

2022-07-08 Thread Jinhao Fan
at 10:24 PM, Jinhao Fan  wrote:

> @@ -5793,6 +5891,7 @@ static uint16_t nvme_dbbuf_config(NvmeCtrl *n, const 
> NvmeRequest *req)
> uint64_t dbs_addr = le64_to_cpu(req->cmd.dptr.prp1);
> uint64_t eis_addr = le64_to_cpu(req->cmd.dptr.prp2);
> int i;
> +int ret;
> 

I just noticed this ret is unused. Could you help remove this line when
applying the patch?



Re: [PATCH RFC v2 0/2] arm: enable MTE for QEMU + kvm

2022-07-08 Thread Richard Henderson

On 7/7/22 21:46, Cornelia Huck wrote:

If I'm not misunderstanding things, we need a way to fault in a page together
with the tag; doing that in one go is probably the only way that we can be
sure that this is race-free on the QEMU side.


That's my understanding as well.


r~



Re: [PATCH] scsi/lsi53c895a: fix use-after-free in lsi_do_msgout (CVE-2022-0216)

2022-07-08 Thread Alexander Bulekov
On 220705 2205, Mauro Matteo Cascella wrote:
> Set current_req->req to NULL to prevent reusing a free'd buffer in case of
> repeated SCSI cancel requests. Thanks to Thomas Huth for suggesting the patch.
> 
> Fixes: CVE-2022-0216
> Resolves: https://gitlab.com/qemu-project/qemu/-/issues/972
> Signed-off-by: Mauro Matteo Cascella 
> ---
>  hw/scsi/lsi53c895a.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
> index c8773f73f7..99ea42d49b 100644
> --- a/hw/scsi/lsi53c895a.c
> +++ b/hw/scsi/lsi53c895a.c
> @@ -1028,8 +1028,9 @@ static void lsi_do_msgout(LSIState *s)
>  case 0x0d:
>  /* The ABORT TAG message clears the current I/O process only. */
>  trace_lsi_do_msgout_abort(current_tag);
> -if (current_req) {
> +if (current_req && current_req->req) {
>  scsi_req_cancel(current_req->req);
> +current_req->req = NULL;
>  }
>  lsi_disconnect(s);
>  break;
> -- 
> 2.35.3
> 
>

Hi Mauro,
https://gitlab.com/qemu-project/qemu/-/issues/972#note_1019851430
This reproducer crashes, with this patch applied. Maybe it is some
different bug though - I'm not sure.

With -trace lsi*

lsi_reg_write Write reg DSP1 0x2d = 0x00
lsi_reg_write Write reg DSP2 0x2e = 0x40
lsi_reg_write Write reg DSP3 0x2f = 0x36
lsi_execute_script SCRIPTS dsp=0x364001d0 opcode 0x5808 arg 0x0
lsi_execute_script_io_set Set ATN
lsi_execute_script SCRIPTS dsp=0x364001d8 opcode 0x2601 arg 0x5a41ae0d
lsi_do_msgout MSG out len=65536
lsi_do_msgout_busdevicereset MSG: BUS DEVICE RESET tag=0x0
lsi_do_msgout_select Select LUN 0
lsi_do_msgout_abort MSG: ABORT TAG tag=0x0

In busdevicereset, there are also scsi_req_cancel calls. Do they need
similar changes?

-Alex



Re: What to do with the nanomips disassembler (was: [PATCH] disas: Remove libvixl disassembler)

2022-07-08 Thread Thomas Huth

On 04/07/2022 14.22, Milica Lazarevic wrote:


On 09/06/2022 18.31, Vince Del Vecchio wrote:

...

Regardless, I think we can look at converting the existing disassembler from
C++ to C.  That would address the current concern, right?

> Right - if it's not too much of a hassle that would be great!

Hi everyone, I am interested in taking on this task.


Since nobody else replied, I assume nobody started working on this task yet, 
so I'd say: Please go ahead and convert that file to plain C!


 Thanks,
   Thomas




Re: [PATCH v9 05/21] job.c: add job_lock/unlock while keeping job.h intact

2022-07-08 Thread Vladimir Sementsov-Ogievskiy

On 7/6/22 23:15, Emanuele Giuseppe Esposito wrote:

With "intact" we mean that all job.h functions implicitly
take the lock. Therefore API callers are unmodified.

This means that:
- many static functions that will be always called with job lock held
   become _locked, and call _locked functions
- all public functions take the lock internally if needed, and call _locked
   functions
- all public functions called internally by other functions in job.c will have a
   _locked counterpart (sometimes public), to avoid deadlocks (job lock already 
taken).
   These functions are not used for now.
- some public functions called only from exernal files (not job.c) do not
   have _locked() counterpart and take the lock inside. Others won't need
   the lock at all because use fields only set at initialization and
   never modified.

job_{lock/unlock} is independent from real_job_{lock/unlock}.

Note: at this stage, job_{lock/unlock} and job lock guard macros
are *nop*

Signed-off-by: Emanuele Giuseppe Esposito 


Reviewed-by: Vladimir Sementsov-Ogievskiy 

I think, we still lack some comments on function lock-related interface, but it 
may be improved later.

[..]

  
-static int job_txn_apply(Job *job, int fn(Job *))

+/* Called with job_mutex held, but releases it temporarily. */


Hmm. Yes, it may release it temprorarily when fn() release it.. Not very clear 
but OK..


+static int job_txn_apply_locked(Job *job, int fn(Job *))
  {
  AioContext *inner_ctx;
  Job *other_job, *next;
@@ -170,7 +182,7 @@ static int job_txn_apply(Job *job, int fn(Job *))
   * we need to release it here to avoid holding the lock twice - which 
would
   * break AIO_WAIT_WHILE from within fn.
   */
-job_ref(job);
+job_ref_locked(job);
  aio_context_release(job->aio_context);
  


[..]


+
  static bool job_started(Job *job)


So we can call it both with mutex locked and without. Hope it never race with 
job_start.


  {
  return job->co;
  }
  
-static bool job_should_pause(Job *job)

+/* Called with job_mutex held. */


[..]

  
-/** Useful only as a type shim for aio_bh_schedule_oneshot. */

+/**
+ * Useful only as a type shim for aio_bh_schedule_oneshot.
+ * Called with job_mutex *not* held, but releases it temporarily.


", but releases it temprorarily" is misleading for me. If called with mutext not held, 
then "releases it temprorarily" is not part of function interface.. Many functions that 
take some mutex internally do release it temporarily and callers should not care of it.

So, better just "Called with job_mutex *not* held."


+ */
  static void job_exit(void *opaque)
  {
  Job *job = (Job *)opaque;
  AioContext *ctx;
+JOB_LOCK_GUARD();
  




--
Best regards,
Vladimir



Re: Intermittent meson failures on msys2

2022-07-08 Thread Thomas Huth

On 08/07/2022 15.13, Daniel P. Berrangé wrote:

On Fri, Jul 08, 2022 at 04:41:48PM +0400, Marc-André Lureau wrote:

Hi

On Mon, Jun 27, 2022 at 6:41 AM Richard Henderson <
richard.hender...@linaro.org> wrote:


Hi guys,

There's an occasional failure on msys2, where meson fails to capture the
output of a build
script.  E.g.

https://gitlab.com/qemu-project/qemu/-/jobs/2642051161

FAILED: ui/input-keymap-qcode-to-linux.c.inc
"C:/GitLab-Runner/builds/qemu-project/qemu/msys64/mingw64/bin/python3.exe"
"C:/GitLab-Runner/builds/qemu-project/qemu/meson/meson.py" "--internal"
"exe" "--capture"
"ui/input-keymap-qcode-to-linux.c.inc" "--"
"C:/GitLab-Runner/builds/qemu-project/qemu/msys64/mingw64/bin/python3.exe"
"../ui/keycodemapdb/tools/keymap-gen" "code-map" "--lang" "glib2"
"--varname"
"qemu_input_map_qcode_to_linux" "../ui/keycodemapdb/data/keymaps.csv"
"qcode" "linux"
[301/1665] Generating input-keymap-qcode-to-qnum.c.inc with a custom
command (wrapped by
meson to capture output)
ninja: build stopped: subcommand failed.


https://gitlab.com/qemu-project/qemu/-/jobs/2625836697

FAILED: ui/shader/texture-blit-frag.h
"C:/GitLab-Runner/builds/qemu-project/qemu/msys64/mingw64/bin/python3.exe"
"C:/GitLab-Runner/builds/qemu-project/qemu/meson/meson.py" "--internal"
"exe" "--capture"
"ui/shader/texture-blit-frag.h" "--" "perl"
"C:/GitLab-Runner/builds/qemu-project/qemu/scripts/shaderinclude.pl"
"../ui/shader/texture-blit.frag"
[313/1663] Generating texture-blit-vert.h with a custom command (wrapped
by meson to
capture output)
ninja: build stopped: subcommand failed.


Could you have a look please?




Ah, we don't have artifacts for msys2 builds it seems, that would perhaps
help. It would make sense to at least take meson-logs/*.txt. I'll work on a
patch.

My guess is that CI randomly fails with "too many opened files", as I have
seen that regularly on various projects with Windows runners. And here,
it's probably reaching limits when running python/perl scripts
simultaneously... I don't see an easy way to solve that if that's the issue.


There shouldn't be very much parallelism even taking place, because

https://docs.gitlab.com/ee/ci/runners/saas/windows_saas_runner.html

says  "Windows runners execute your CI/CD jobs on n1-standard-2
instances with 2 vCPUs and 7.5 GB RAM. "

unless ninja is setting a parellism much higher than nCPUs ?


We're compiling with "make -j2" there, see .gitlab-ci.d/windows.yml ... so I 
don't think that it's about too many things going on in parallel. 
Additionally, the problem hasn't been there a couple of weeks ago, so it's 
either something new that we merged recently, or the Windows containers or 
MSYS2 environment has been changed recently?


 Thomas




Re: [PATCH v3] hw: m25p80: Add Block Protect and Top Bottom bits for write protect

2022-07-08 Thread Francisco Iglesias
On [2022 Jul 08] Fri 09:45:52, Iris Chen wrote:
> Signed-off-by: Iris Chen 

Reviewed-by: Francisco Iglesias 

> ---
> Cosmetic suggestions addressed. 
> 
>  hw/block/m25p80.c | 102 --
>  1 file changed, 90 insertions(+), 12 deletions(-)
> 
> diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
> index 50b523e5b1..f3b401cf90 100644
> --- a/hw/block/m25p80.c
> +++ b/hw/block/m25p80.c
> @@ -38,21 +38,19 @@
>  #include "trace.h"
>  #include "qom/object.h"
>  
> -/* Fields for FlashPartInfo->flags */
> -
> -/* erase capabilities */
> -#define ER_4K 1
> -#define ER_32K 2
> -/* set to allow the page program command to write 0s back to 1. Useful for
> - * modelling EEPROM with SPI flash command set
> - */
> -#define EEPROM 0x100
> -
>  /* 16 MiB max in 3 byte address mode */
>  #define MAX_3BYTES_SIZE 0x100
> -
>  #define SPI_NOR_MAX_ID_LEN 6
>  
> +/* Fields for FlashPartInfo->flags */
> +enum spi_flash_option_flags {
> +ER_4K  = BIT(0),
> +ER_32K = BIT(1),
> +EEPROM = BIT(2),
> +HAS_SR_TB  = BIT(3),
> +HAS_SR_BP3_BIT6= BIT(4),
> +};
> +
>  typedef struct FlashPartInfo {
>  const char *part_name;
>  /*
> @@ -253,7 +251,8 @@ static const FlashPartInfo known_devices[] = {
>  { INFO("n25q512a11",  0x20bb20,  0,  64 << 10, 1024, ER_4K) },
>  { INFO("n25q512a13",  0x20ba20,  0,  64 << 10, 1024, ER_4K) },
>  { INFO("n25q128", 0x20ba18,  0,  64 << 10, 256, 0) },
> -{ INFO("n25q256a",0x20ba19,  0,  64 << 10, 512, ER_4K) },
> +{ INFO("n25q256a",0x20ba19,  0,  64 << 10, 512,
> +   ER_4K | HAS_SR_BP3_BIT6 | HAS_SR_TB) },
>  { INFO("n25q512a",0x20ba20,  0,  64 << 10, 1024, ER_4K) },
>  { INFO("n25q512ax3",  0x20ba20,  0x1000,  64 << 10, 1024, ER_4K) },
>  { INFO("mt25ql512ab", 0x20ba20, 0x1044, 64 << 10, 1024, ER_4K | ER_32K) 
> },
> @@ -480,6 +479,11 @@ struct Flash {
>  bool reset_enable;
>  bool quad_enable;
>  bool aai_enable;
> +bool block_protect0;
> +bool block_protect1;
> +bool block_protect2;
> +bool block_protect3;
> +bool top_bottom_bit;
>  bool status_register_write_disabled;
>  uint8_t ear;
>  
> @@ -625,12 +629,36 @@ void flash_write8(Flash *s, uint32_t addr, uint8_t data)
>  {
>  uint32_t page = addr / s->pi->page_size;
>  uint8_t prev = s->storage[s->cur_addr];
> +uint32_t block_protect_value = (s->block_protect3 << 3) |
> +   (s->block_protect2 << 2) |
> +   (s->block_protect1 << 1) |
> +   (s->block_protect0 << 0);
>  
>  if (!s->write_enable) {
>  qemu_log_mask(LOG_GUEST_ERROR, "M25P80: write with write 
> protect!\n");
>  return;
>  }
>  
> +if (block_protect_value > 0) {
> +uint32_t num_protected_sectors = 1 << (block_protect_value - 1);
> +uint32_t sector = addr / s->pi->sector_size;
> +
> +/* top_bottom_bit == 0 means TOP */
> +if (!s->top_bottom_bit) {
> +if (s->pi->n_sectors <= sector + num_protected_sectors) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "M25P80: write with write protect!\n");
> +return;
> +}
> +} else {
> +if (sector < num_protected_sectors) {
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "M25P80: write with write protect!\n");
> +return;
> +}
> +}
> +}
> +
>  if ((prev ^ data) & data) {
>  trace_m25p80_programming_zero_to_one(s, addr, prev, data);
>  }
> @@ -728,6 +756,15 @@ static void complete_collecting_data(Flash *s)
>  break;
>  case WRSR:
>  s->status_register_write_disabled = extract32(s->data[0], 7, 1);
> +s->block_protect0 = extract32(s->data[0], 2, 1);
> +s->block_protect1 = extract32(s->data[0], 3, 1);
> +s->block_protect2 = extract32(s->data[0], 4, 1);
> +if (s->pi->flags & HAS_SR_TB) {
> +s->top_bottom_bit = extract32(s->data[0], 5, 1);
> +}
> +if (s->pi->flags & HAS_SR_BP3_BIT6) {
> +s->block_protect3 = extract32(s->data[0], 6, 1);
> +}
>  
>  switch (get_man(s)) {
>  case MAN_SPANSION:
> @@ -1213,6 +1250,15 @@ static void decode_new_cmd(Flash *s, uint32_t value)
>  case RDSR:
>  s->data[0] = (!!s->write_enable) << 1;
>  s->data[0] |= (!!s->status_register_write_disabled) << 7;
> +s->data[0] |= (!!s->block_protect0) << 2;
> +s->data[0] |= (!!s->block_protect1) << 3;
> +s->data[0] |= (!!s->block_protect2) << 4;
> +if (s->pi->flags & HAS_SR_TB) {
> +s->data[0] |= (!!s->top_bottom_bit) << 5;
> +}
> +if (s->pi->flags & HAS_SR_BP3_BIT6) {
> +s->data[0] |= 

[PATCH v3] hw: m25p80: Add Block Protect and Top Bottom bits for write protect

2022-07-08 Thread Iris Chen
Signed-off-by: Iris Chen 
---
Cosmetic suggestions addressed. 

 hw/block/m25p80.c | 102 --
 1 file changed, 90 insertions(+), 12 deletions(-)

diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c
index 50b523e5b1..f3b401cf90 100644
--- a/hw/block/m25p80.c
+++ b/hw/block/m25p80.c
@@ -38,21 +38,19 @@
 #include "trace.h"
 #include "qom/object.h"
 
-/* Fields for FlashPartInfo->flags */
-
-/* erase capabilities */
-#define ER_4K 1
-#define ER_32K 2
-/* set to allow the page program command to write 0s back to 1. Useful for
- * modelling EEPROM with SPI flash command set
- */
-#define EEPROM 0x100
-
 /* 16 MiB max in 3 byte address mode */
 #define MAX_3BYTES_SIZE 0x100
-
 #define SPI_NOR_MAX_ID_LEN 6
 
+/* Fields for FlashPartInfo->flags */
+enum spi_flash_option_flags {
+ER_4K  = BIT(0),
+ER_32K = BIT(1),
+EEPROM = BIT(2),
+HAS_SR_TB  = BIT(3),
+HAS_SR_BP3_BIT6= BIT(4),
+};
+
 typedef struct FlashPartInfo {
 const char *part_name;
 /*
@@ -253,7 +251,8 @@ static const FlashPartInfo known_devices[] = {
 { INFO("n25q512a11",  0x20bb20,  0,  64 << 10, 1024, ER_4K) },
 { INFO("n25q512a13",  0x20ba20,  0,  64 << 10, 1024, ER_4K) },
 { INFO("n25q128", 0x20ba18,  0,  64 << 10, 256, 0) },
-{ INFO("n25q256a",0x20ba19,  0,  64 << 10, 512, ER_4K) },
+{ INFO("n25q256a",0x20ba19,  0,  64 << 10, 512,
+   ER_4K | HAS_SR_BP3_BIT6 | HAS_SR_TB) },
 { INFO("n25q512a",0x20ba20,  0,  64 << 10, 1024, ER_4K) },
 { INFO("n25q512ax3",  0x20ba20,  0x1000,  64 << 10, 1024, ER_4K) },
 { INFO("mt25ql512ab", 0x20ba20, 0x1044, 64 << 10, 1024, ER_4K | ER_32K) },
@@ -480,6 +479,11 @@ struct Flash {
 bool reset_enable;
 bool quad_enable;
 bool aai_enable;
+bool block_protect0;
+bool block_protect1;
+bool block_protect2;
+bool block_protect3;
+bool top_bottom_bit;
 bool status_register_write_disabled;
 uint8_t ear;
 
@@ -625,12 +629,36 @@ void flash_write8(Flash *s, uint32_t addr, uint8_t data)
 {
 uint32_t page = addr / s->pi->page_size;
 uint8_t prev = s->storage[s->cur_addr];
+uint32_t block_protect_value = (s->block_protect3 << 3) |
+   (s->block_protect2 << 2) |
+   (s->block_protect1 << 1) |
+   (s->block_protect0 << 0);
 
 if (!s->write_enable) {
 qemu_log_mask(LOG_GUEST_ERROR, "M25P80: write with write protect!\n");
 return;
 }
 
+if (block_protect_value > 0) {
+uint32_t num_protected_sectors = 1 << (block_protect_value - 1);
+uint32_t sector = addr / s->pi->sector_size;
+
+/* top_bottom_bit == 0 means TOP */
+if (!s->top_bottom_bit) {
+if (s->pi->n_sectors <= sector + num_protected_sectors) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "M25P80: write with write protect!\n");
+return;
+}
+} else {
+if (sector < num_protected_sectors) {
+qemu_log_mask(LOG_GUEST_ERROR,
+  "M25P80: write with write protect!\n");
+return;
+}
+}
+}
+
 if ((prev ^ data) & data) {
 trace_m25p80_programming_zero_to_one(s, addr, prev, data);
 }
@@ -728,6 +756,15 @@ static void complete_collecting_data(Flash *s)
 break;
 case WRSR:
 s->status_register_write_disabled = extract32(s->data[0], 7, 1);
+s->block_protect0 = extract32(s->data[0], 2, 1);
+s->block_protect1 = extract32(s->data[0], 3, 1);
+s->block_protect2 = extract32(s->data[0], 4, 1);
+if (s->pi->flags & HAS_SR_TB) {
+s->top_bottom_bit = extract32(s->data[0], 5, 1);
+}
+if (s->pi->flags & HAS_SR_BP3_BIT6) {
+s->block_protect3 = extract32(s->data[0], 6, 1);
+}
 
 switch (get_man(s)) {
 case MAN_SPANSION:
@@ -1213,6 +1250,15 @@ static void decode_new_cmd(Flash *s, uint32_t value)
 case RDSR:
 s->data[0] = (!!s->write_enable) << 1;
 s->data[0] |= (!!s->status_register_write_disabled) << 7;
+s->data[0] |= (!!s->block_protect0) << 2;
+s->data[0] |= (!!s->block_protect1) << 3;
+s->data[0] |= (!!s->block_protect2) << 4;
+if (s->pi->flags & HAS_SR_TB) {
+s->data[0] |= (!!s->top_bottom_bit) << 5;
+}
+if (s->pi->flags & HAS_SR_BP3_BIT6) {
+s->data[0] |= (!!s->block_protect3) << 6;
+}
 
 if (get_man(s) == MAN_MACRONIX || get_man(s) == MAN_ISSI) {
 s->data[0] |= (!!s->quad_enable) << 6;
@@ -1553,6 +1599,11 @@ static void m25p80_reset(DeviceState *d)
 
 s->wp_level = true;
 s->status_register_write_disabled = false;
+s->block_protect0 = false;
+

Re: [RFC 0/8] Introduce an extensible static analyzer

2022-07-08 Thread Alberto Faria
On Wed, Jul 6, 2022 at 11:15 AM Daniel P. Berrangé  wrote:
> For clang-tidy, I've been trying it out integrated into emacs
> via eglot and clangd. This means I get clang-tidy errors reported
> interactively as I write code, so wouldn't need to run a full
> tree analysis. Unfortunately, unless I'm missing something, there's
> no way to extend clangd to plugin extra checks.  So it would need
> to re-implement something equivalent to clangd for our custom checks,
> and then integrate that into eglot (or equiv for other editors).

That would be very handy. Still, running the script on the whole tree
would be necessary to ensure that changes to headers don't break
translation units that are not open in the editor.

Alberto




[RISU PATCH v4 29/29] aarch64: Trivial SME test

2022-07-08 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 test_sme_aarch64.s | 55 ++
 1 file changed, 55 insertions(+)
 create mode 100644 test_sme_aarch64.s

diff --git a/test_sme_aarch64.s b/test_sme_aarch64.s
new file mode 100644
index 000..a5ef909
--- /dev/null
+++ b/test_sme_aarch64.s
@@ -0,0 +1,55 @@
+   .arch_extension sme
+
+   mov w0, #0
+   mov w1, #0
+   mov w2, #0
+   mov w3, #0
+   mov w4, #0
+   mov w5, #0
+   mov w6, #0
+   mov w7, #0
+   mov w8, #0
+   mov w9, #0
+   mov w10, #0
+   mov w11, #0
+   mov w12, #0
+   mov w13, #0
+   mov w14, #0
+   mov w15, #0
+   mov w16, #0
+   mov w17, #0
+   mov w18, #0
+   mov w19, #0
+   mov w20, #0
+   mov w21, #0
+   mov w22, #0
+   mov w23, #0
+   mov w24, #0
+   mov w25, #0
+   mov w26, #0
+   mov w27, #0
+   mov w28, #0
+   mov w29, #0
+   mov w30, #0
+
+   smstart
+
+   ptrue   p0.b
+   rdsvl   x12, #1
+
+0: subsw12, w12, #1
+   lsl w13, w12, #4
+   index   z0.b, w13, #1
+   movaza0h.b[w12, #0], p0/m, z0.b
+   b.ne0b
+
+   .inst 0x5af0/* compare */
+
+   rdsvl   x12, #1
+0: subsw12, w12, #1
+   lsl w13, w12, #4
+   index   z0.b, w13, #1
+   movaza0v.b[w12, #0], p0/m, z0.b
+   b.ne0b
+
+   .inst 0x5af1/* exit */
-- 
2.34.1




[RISU PATCH v4 28/29] aarch64: Add support for ZA storage

2022-07-08 Thread Richard Henderson
Require NVL == SVL on startup, to make it easier to manage reginfo.
Most of the time PSTATE.SM would be active with PSTATE.ZA anyway,
for any non-trivial SME testing.

Extend saved storage only when PSTATE.ZA is active.
Use a carefully reserved uint16_t for saving SVCR.

Signed-off-by: Richard Henderson 
---
 risu_reginfo_aarch64.h |  52 -
 risu_reginfo_aarch64.c | 161 -
 2 files changed, 191 insertions(+), 22 deletions(-)

diff --git a/risu_reginfo_aarch64.h b/risu_reginfo_aarch64.h
index 536c12b..097b7ad 100644
--- a/risu_reginfo_aarch64.h
+++ b/risu_reginfo_aarch64.h
@@ -21,6 +21,43 @@
 #define SVE_VQ_MAX 16
 
 #define ROUND_UP(SIZE, POW2)(((SIZE) + (POW2) - 1) & -(POW2))
+
+#ifdef ZA_MAGIC
+/* System headers have all Streaming SVE definitions. */
+typedef struct sve_context risu_sve_context;
+typedef struct za_context  risu_za_context;
+#else
+#define ZA_MAGIC 0x54366345
+#define SVE_SIG_FLAG_SM  1
+
+/* System headers missing flags field. */
+typedef struct {
+struct _aarch64_ctx head;
+uint16_t vl;
+uint16_t flags;
+uint16_t reserved[2];
+} risu_sve_context;
+
+typedef struct {
+struct _aarch64_ctx head;
+uint16_t vl;
+uint16_t reserved[3];
+} risu_za_context;
+
+#define ZA_SIG_REGS_OFFSET \
+ROUND_UP(sizeof(risu_za_context), SVE_VQ_BYTES)
+
+#define ZA_SIG_REGS_SIZE(vq) \
+((vq) * (vq) * SVE_VQ_BYTES * SVE_VQ_BYTES)
+
+#define ZA_SIG_ZAV_OFFSET(vq, n) \
+(ZA_SIG_REGS_OFFSET + (SVE_SIG_ZREG_SIZE(vq) * n))
+
+#define ZA_SIG_CONTEXT_SIZE(vq) \
+(ZA_SIG_REGS_OFFSET + ZA_SIG_REGS_SIZE(vq))
+
+#endif /* ZA_MAGIC */
+
 #define RISU_SVE_REGS_SIZE(VQ)  ROUND_UP(SVE_SIG_REGS_SIZE(VQ), 16)
 #define RISU_SIMD_REGS_SIZE (32 * 16)
 
@@ -36,12 +73,16 @@ struct reginfo {
 uint32_t fpsr;
 uint32_t fpcr;
 uint16_t sve_vl;
-uint16_t reserved;
+uint16_t svcr;
 
-char extra[RISU_SVE_REGS_SIZE(SVE_VQ_MAX)]
+char extra[RISU_SVE_REGS_SIZE(SVE_VQ_MAX) +
+   ZA_SIG_REGS_SIZE(SVE_VQ_MAX)]
 __attribute__((aligned(16)));
 };
 
+#define SVCR_SM  1
+#define SVCR_ZA  2
+
 static inline uint64_t *reginfo_vreg(struct reginfo *ri, int i)
 {
 return (uint64_t *)>extra[i * 16];
@@ -59,4 +100,11 @@ static inline uint16_t *reginfo_preg(struct reginfo *ri, 
int vq, int i)
   SVE_SIG_REGS_OFFSET];
 }
 
+static inline uint64_t *reginfo_zav(struct reginfo *ri, int vq, int i)
+{
+return (uint64_t *)>extra[RISU_SVE_REGS_SIZE(vq) +
+  ZA_SIG_ZAV_OFFSET(vq, i) -
+  ZA_SIG_REGS_OFFSET];
+}
+
 #endif /* RISU_REGINFO_AARCH64_H */
diff --git a/risu_reginfo_aarch64.c b/risu_reginfo_aarch64.c
index 9ea0acf..af620f3 100644
--- a/risu_reginfo_aarch64.c
+++ b/risu_reginfo_aarch64.c
@@ -25,25 +25,44 @@
 #include "risu.h"
 #include "risu_reginfo_aarch64.h"
 
+#ifndef PR_SME_SET_VL
+#define PR_SME_SET_VL 63
+#endif
+
 /* Should we test SVE register state */
 static int test_sve;
+static int test_za;
 static const struct option extra_opts[] = {
 {"test-sve", required_argument, NULL, FIRST_ARCH_OPT },
+{"test-za", required_argument, NULL, FIRST_ARCH_OPT + 1 },
 {0, 0, 0, 0}
 };
 
 const struct option * const arch_long_opts = _opts[0];
 const char * const arch_extra_help
-= "  --test-sve=Compare SVE registers with VQ\n";
+= "  --test-sve=Compare SVE registers with VQ\n"
+  "  --test-za= Compare ZA storage with VQ\n";
 
 void process_arch_opt(int opt, const char *arg)
 {
-assert(opt == FIRST_ARCH_OPT);
-test_sve = strtol(arg, 0, 10);
-
-if (test_sve <= 0 || test_sve > SVE_VQ_MAX) {
-fprintf(stderr, "Invalid value for VQ (1-%d)\n", SVE_VQ_MAX);
-exit(EXIT_FAILURE);
+switch (opt) {
+case FIRST_ARCH_OPT:
+test_sve = strtol(arg, 0, 10);
+if (test_sve <= 0 || test_sve > SVE_VQ_MAX) {
+fprintf(stderr, "Invalid value for SVE VQ (1-%d)\n", SVE_VQ_MAX);
+exit(EXIT_FAILURE);
+}
+break;
+case FIRST_ARCH_OPT + 1:
+test_za = strtol(arg, 0, 10);
+if (test_za <= 0 || test_za > SVE_VQ_MAX
+|| (test_za & (test_za - 1))) {
+fprintf(stderr, "Invalid value for ZA VQ (1-%d)\n", SVE_VQ_MAX);
+exit(EXIT_FAILURE);
+}
+break;
+default:
+abort();
 }
 }
 
@@ -51,6 +70,31 @@ void arch_init(void)
 {
 long want, got;
 
+if (test_za) {
+/*
+ * For now, reginfo requires NVL == SVL.
+ * There doesn't seem to be much advantage to differing.
+ */
+if (test_sve && test_sve != test_za) {
+fprintf(stderr, "Mismatched values for SVE and ZA VQ\n");
+exit(EXIT_FAILURE);
+}
+
+want = sve_vl_from_vq(test_za);
+got = prctl(PR_SME_SET_VL, want);
+if (want != got) {
+if (got >= 0) {
+

[RISU PATCH v4 25/29] Remove return value from reginfo_dump

2022-07-08 Thread Richard Henderson
No uses actually checked the error indication.  Even if we wanted
to check ferror on the stream, we should do that generically rather
than per arch.

Signed-off-by: Richard Henderson 
---
 risu.h | 4 ++--
 risu_reginfo_aarch64.c | 8 +++-
 risu_reginfo_arm.c | 6 ++
 risu_reginfo_i386.c| 6 ++
 risu_reginfo_m68k.c| 6 ++
 risu_reginfo_ppc64.c   | 6 ++
 6 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/risu.h b/risu.h
index 99f0d8e..6eceb9f 100644
--- a/risu.h
+++ b/risu.h
@@ -120,8 +120,8 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc);
 /* return 1 if structs are equal, 0 otherwise. */
 int reginfo_is_eq(struct reginfo *r1, struct reginfo *r2);
 
-/* print reginfo state to a stream, returns 1 on success, 0 on failure */
-int reginfo_dump(struct reginfo *ri, FILE * f);
+/* print reginfo state to a stream */
+void reginfo_dump(struct reginfo *ri, FILE *f);
 
 /* reginfo_dump_mismatch: print mismatch details to a stream */
 void reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f);
diff --git a/risu_reginfo_aarch64.c b/risu_reginfo_aarch64.c
index e0f80c0..b86864d 100644
--- a/risu_reginfo_aarch64.c
+++ b/risu_reginfo_aarch64.c
@@ -219,8 +219,8 @@ static void sve_dump_zreg_diff(FILE *f, int vq, const 
uint64_t *za,
 }
 }
 
-/* reginfo_dump: print state to a stream, returns nonzero on success */
-int reginfo_dump(struct reginfo *ri, FILE * f)
+/* reginfo_dump: print state to a stream */
+void reginfo_dump(struct reginfo *ri, FILE * f)
 {
 int i;
 fprintf(f, "  faulting insn %08x\n", ri->faulting_insn);
@@ -263,7 +263,7 @@ int reginfo_dump(struct reginfo *ri, FILE * f)
 sve_dump_preg(f, vq, p);
 fprintf(f, "\n");
 }
-return !ferror(f);
+return;
 }
 
 for (i = 0; i < 32; i++) {
@@ -271,8 +271,6 @@ int reginfo_dump(struct reginfo *ri, FILE * f)
 fprintf(f, "  V%-2d: %016" PRIx64 "%016" PRIx64 "\n",
 i, v[1], v[0]);
 }
-
-return !ferror(f);
 }
 
 void reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE * f)
diff --git a/risu_reginfo_arm.c b/risu_reginfo_arm.c
index ba1035e..09813c4 100644
--- a/risu_reginfo_arm.c
+++ b/risu_reginfo_arm.c
@@ -161,8 +161,8 @@ int reginfo_is_eq(struct reginfo *r1, struct reginfo *r2)
 return memcmp(r1, r2, sizeof(*r1)) == 0;/* ok since we memset 0 */
 }
 
-/* reginfo_dump: print the state to a stream, returns nonzero on success */
-int reginfo_dump(struct reginfo *ri, FILE *f)
+/* reginfo_dump: print the state to a stream */
+void reginfo_dump(struct reginfo *ri, FILE *f)
 {
 int i;
 if (ri->faulting_insn_size == 2) {
@@ -179,8 +179,6 @@ int reginfo_dump(struct reginfo *ri, FILE *f)
 i, (unsigned long long) ri->fpregs[i]);
 }
 fprintf(f, "  fpscr: %08x\n", ri->fpscr);
-
-return !ferror(f);
 }
 
 void reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f)
diff --git a/risu_reginfo_i386.c b/risu_reginfo_i386.c
index 57e4c00..37506fa 100644
--- a/risu_reginfo_i386.c
+++ b/risu_reginfo_i386.c
@@ -310,8 +310,8 @@ static char get_vecletter(uint64_t features)
 }
 }
 
-/* reginfo_dump: print state to a stream, returns nonzero on success */
-int reginfo_dump(struct reginfo *ri, FILE *f)
+/* reginfo_dump: print state to a stream */
+void reginfo_dump(struct reginfo *ri, FILE *f)
 {
 uint64_t features;
 int i, j, n, w;
@@ -345,8 +345,6 @@ int reginfo_dump(struct reginfo *ri, FILE *f)
 fprintf(f, "  k%-5d: %016" PRIx64 "\n", i, ri->kregs[i]);
 }
 }
-
-return !ferror(f);
 }
 
 void reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f)
diff --git a/risu_reginfo_m68k.c b/risu_reginfo_m68k.c
index 29edce9..38d7dd3 100644
--- a/risu_reginfo_m68k.c
+++ b/risu_reginfo_m68k.c
@@ -92,8 +92,8 @@ int reginfo_is_eq(struct reginfo *m, struct reginfo *a)
 return 1;
 }
 
-/* reginfo_dump: print state to a stream, returns nonzero on success */
-int reginfo_dump(struct reginfo *ri, FILE *f)
+/* reginfo_dump: print state to a stream */
+void reginfo_dump(struct reginfo *ri, FILE *f)
 {
 int i;
 fprintf(f, "  pc\e[1;101;37m0x%08x\e[0m\n", ri->pc);
@@ -114,8 +114,6 @@ int reginfo_dump(struct reginfo *ri, FILE *f)
 }
 
 fprintf(f, "\n");
-
-return !ferror(f);
 }
 
 void reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f)
diff --git a/risu_reginfo_ppc64.c b/risu_reginfo_ppc64.c
index e96dc48..134a152 100644
--- a/risu_reginfo_ppc64.c
+++ b/risu_reginfo_ppc64.c
@@ -112,8 +112,8 @@ int reginfo_is_eq(struct reginfo *m, struct reginfo *a)
 return 1;
 }
 
-/* reginfo_dump: print state to a stream, returns nonzero on success */
-int reginfo_dump(struct reginfo *ri, FILE * f)
+/* reginfo_dump: print state to a stream */
+void reginfo_dump(struct reginfo *ri, FILE * f)
 {
 int i;
 
@@ -152,8 +152,6 @@ int reginfo_dump(struct reginfo *ri, FILE * f)

[RISU PATCH v4 27/29] aarch64: Tidy reginfo dumping ahead of ZA state

2022-07-08 Thread Richard Henderson
A misalignment for sve_vl, plus add a bit more space
on the left for the ZA[n] field name.

Signed-off-by: Richard Henderson 
---
 risu_reginfo_aarch64.c | 29 ++---
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/risu_reginfo_aarch64.c b/risu_reginfo_aarch64.c
index b86864d..9ea0acf 100644
--- a/risu_reginfo_aarch64.c
+++ b/risu_reginfo_aarch64.c
@@ -183,6 +183,18 @@ static int sve_preg_is_eq(int vq, const void *p1, const 
void *p2)
 return memcmp(p1, p2, vq * 2) == 0;
 }
 
+static void sve_dump_zreg(FILE *f, int vq, const uint64_t *z)
+{
+const char *pad = "";
+int q;
+
+for (q = 0; q < vq; q++) {
+fprintf(f, "%s[%-2d] %016" PRIx64 "%016" PRIx64 "\n",
+pad, q, z[2 * q + 1], z[2 * q]);
+pad = "   "; /* 11 spaces */
+}
+}
+
 static void sve_dump_preg(FILE *f, int vq, const uint16_t *p)
 {
 int q;
@@ -211,10 +223,10 @@ static void sve_dump_zreg_diff(FILE *f, int vq, const 
uint64_t *za,
 uint64_t zb0 = zb[2 * q], zb1 = zb[2 * q + 1];
 
 if (za0 != zb0 || za1 != zb1) {
-fprintf(f, "%sq%-2d: %016" PRIx64 "%016" PRIx64
+fprintf(f, "%s[%-2d]: %016" PRIx64 "%016" PRIx64
 " vs %016" PRIx64 "%016" PRIx64"\n",
 pad, q, za1, za0, zb1, zb0);
-pad = "  ";
+pad = "   "; /* 11 spaces */
 }
 }
 }
@@ -237,19 +249,14 @@ void reginfo_dump(struct reginfo *ri, FILE * f)
 
 if (ri->sve_vl) {
 int vq = sve_vq_from_vl(ri->sve_vl);
-int q;
 
 fprintf(f, "  vl : %d\n", ri->sve_vl);
 
 for (i = 0; i < SVE_NUM_ZREGS; i++) {
 uint64_t *z = reginfo_zreg(ri, vq, i);
 
-fprintf(f, "  Z%-2d q%-2d: %016" PRIx64 "%016" PRIx64 "\n",
-i, 0, z[1], z[0]);
-for (q = 1; q < vq; ++q) {
-fprintf(f, "  q%-2d: %016" PRIx64 "%016" PRIx64 "\n",
-q, z[q * 2 + 1], z[q * 2]);
-}
+fprintf(f, "  Z%-2d: ", i);
+sve_dump_zreg(f, vq, z);
 }
 
 for (i = 0; i < SVE_NUM_PREGS + 1; i++) {
@@ -312,7 +319,7 @@ void reginfo_dump_mismatch(struct reginfo *m, struct 
reginfo *a, FILE * f)
 }
 
 if (m->sve_vl != a->sve_vl) {
-fprintf(f, "  vl: %d vs %d\n", m->sve_vl, a->sve_vl);
+fprintf(f, "  vl : %d vs %d\n", m->sve_vl, a->sve_vl);
 }
 
 if (m->sve_vl) {
@@ -323,7 +330,7 @@ void reginfo_dump_mismatch(struct reginfo *m, struct 
reginfo *a, FILE * f)
 uint64_t *za = reginfo_zreg(a, vq, i);
 
 if (!sve_zreg_is_eq(vq, zm, za)) {
-fprintf(f, "  Z%-2d ", i);
+fprintf(f, "  Z%-2d: ", i);
 sve_dump_zreg_diff(f, vq, zm, za);
 }
 }
-- 
2.34.1




[RISU PATCH v4 23/29] Standardize reginfo_dump_mismatch printing

2022-07-08 Thread Richard Henderson
Hoist the "master vs apprentice" label to apprentice(), since
we will want different labels for dumping.  Remove all of the
"mismatch" text from reginfo_dump_mismatch -- just print "vs".

Signed-off-by: Richard Henderson 
---
 risu.h |  4 ++--
 risu.c |  1 +
 risu_reginfo_aarch64.c | 12 +---
 risu_reginfo_arm.c | 18 -
 risu_reginfo_i386.c|  6 +-
 risu_reginfo_m68k.c| 23 +++---
 risu_reginfo_ppc64.c   | 44 ++
 7 files changed, 44 insertions(+), 64 deletions(-)

diff --git a/risu.h b/risu.h
index bdb70c1..99f0d8e 100644
--- a/risu.h
+++ b/risu.h
@@ -123,8 +123,8 @@ int reginfo_is_eq(struct reginfo *r1, struct reginfo *r2);
 /* print reginfo state to a stream, returns 1 on success, 0 on failure */
 int reginfo_dump(struct reginfo *ri, FILE * f);
 
-/* reginfo_dump_mismatch: print mismatch details to a stream, ret nonzero=ok */
-int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f);
+/* reginfo_dump_mismatch: print mismatch details to a stream */
+void reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f);
 
 /* return size of reginfo */
 int reginfo_size(struct reginfo *ri);
diff --git a/risu.c b/risu.c
index 1c096a8..f613fa9 100644
--- a/risu.c
+++ b/risu.c
@@ -449,6 +449,7 @@ static int apprentice(void)
 reginfo_dump([MASTER], stderr);
 fprintf(stderr, "apprentice reginfo:\n");
 reginfo_dump([APPRENTICE], stderr);
+fprintf(stderr, "mismatch detail (master : apprentice):\n");
 reginfo_dump_mismatch([MASTER], [APPRENTICE], stderr);
 return EXIT_FAILURE;
 
diff --git a/risu_reginfo_aarch64.c b/risu_reginfo_aarch64.c
index be47980..e0f80c0 100644
--- a/risu_reginfo_aarch64.c
+++ b/risu_reginfo_aarch64.c
@@ -275,15 +275,15 @@ int reginfo_dump(struct reginfo *ri, FILE * f)
 return !ferror(f);
 }
 
-/* reginfo_dump_mismatch: print mismatch details to a stream, ret nonzero=ok */
-int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE * f)
+void reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE * f)
 {
 int i;
-fprintf(f, "mismatch detail (master : apprentice):\n");
+
 if (m->faulting_insn != a->faulting_insn) {
-fprintf(f, "  faulting insn mismatch %08x vs %08x\n",
+fprintf(f, "  faulting insn: %08x vs %08x\n",
 m->faulting_insn, a->faulting_insn);
 }
+
 for (i = 0; i < 31; i++) {
 if (m->regs[i] != a->regs[i]) {
 fprintf(f, "  X%-2d: %016" PRIx64 " vs %016" PRIx64 "\n",
@@ -342,7 +342,7 @@ int reginfo_dump_mismatch(struct reginfo *m, struct reginfo 
*a, FILE * f)
 sve_dump_preg_diff(f, vq, pm, pa);
 }
 }
-return !ferror(f);
+return;
 }
 
 for (i = 0; i < 32; i++) {
@@ -356,6 +356,4 @@ int reginfo_dump_mismatch(struct reginfo *m, struct reginfo 
*a, FILE * f)
 i, mv[1], mv[0], av[1], av[0]);
 }
 }
-
-return !ferror(f);
 }
diff --git a/risu_reginfo_arm.c b/risu_reginfo_arm.c
index 202120b..ba1035e 100644
--- a/risu_reginfo_arm.c
+++ b/risu_reginfo_arm.c
@@ -183,32 +183,33 @@ int reginfo_dump(struct reginfo *ri, FILE *f)
 return !ferror(f);
 }
 
-int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f)
+void reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f)
 {
 int i;
-fprintf(f, "mismatch detail (master : apprentice):\n");
 
 if (m->faulting_insn_size != a->faulting_insn_size) {
-fprintf(f, "  faulting insn size mismatch %d vs %d\n",
+fprintf(f, "  faulting insn size: %d vs %d\n",
 m->faulting_insn_size, a->faulting_insn_size);
 } else if (m->faulting_insn != a->faulting_insn) {
 if (m->faulting_insn_size == 2) {
-fprintf(f, "  faulting insn mismatch %04x vs %04x\n",
+fprintf(f, "  faulting insn: %04x vs %04x\n",
 m->faulting_insn, a->faulting_insn);
 } else {
-fprintf(f, "  faulting insn mismatch %08x vs %08x\n",
+fprintf(f, "  faulting insn: %08x vs %08x\n",
 m->faulting_insn, a->faulting_insn);
 }
 }
+
 for (i = 0; i < 16; i++) {
 if (m->gpreg[i] != a->gpreg[i]) {
-fprintf(f, "  r%d: %08x vs %08x\n", i, m->gpreg[i],
-a->gpreg[i]);
+fprintf(f, "  r%d: %08x vs %08x\n", i, m->gpreg[i], a->gpreg[i]);
 }
 }
+
 if (m->cpsr != a->cpsr) {
 fprintf(f, "  cpsr: %08x vs %08x\n", m->cpsr, a->cpsr);
 }
+
 for (i = 0; i < 32; i++) {
 if (m->fpregs[i] != a->fpregs[i]) {
 fprintf(f, "  d%d: %016llx vs %016llx\n", i,
@@ -216,9 +217,8 @@ int reginfo_dump_mismatch(struct reginfo *m, struct reginfo 
*a, FILE *f)
 (unsigned long long) a->fpregs[i]);
 }
 }
+
 if (m->fpscr != a->fpscr) {

[RISU PATCH v4 21/29] aarch64: Use arch_init to configure sve

2022-07-08 Thread Richard Henderson
Adjust some of the aarch64 code to look at the reginfo struct
instead of looking at test_sve, so that we do not need to pass
the --test-sve option in order to dump sve trace files.

Diagnose EINVAL as either cpu or kernel does not support SVE.

Signed-off-by: Richard Henderson 
---
 risu.h |  1 +
 risu.c |  3 +++
 risu_reginfo_aarch64.c | 31 ---
 risu_reginfo_arm.c |  4 
 risu_reginfo_i386.c|  4 
 risu_reginfo_m68k.c|  4 
 risu_reginfo_ppc64.c   |  4 
 7 files changed, 40 insertions(+), 11 deletions(-)

diff --git a/risu.h b/risu.h
index 3cad3d5..bdb70c1 100644
--- a/risu.h
+++ b/risu.h
@@ -23,6 +23,7 @@
 extern const struct option * const arch_long_opts;
 extern const char * const arch_extra_help;
 void process_arch_opt(int opt, const char *arg);
+void arch_init(void);
 #define FIRST_ARCH_OPT   0x100
 
 /* GCC computed include to pull in the correct risu_reginfo_*.h for
diff --git a/risu.c b/risu.c
index a70b778..1c096a8 100644
--- a/risu.c
+++ b/risu.c
@@ -617,6 +617,9 @@ int main(int argc, char **argv)
 
 load_image(imgfile);
 
+/* E.g. select requested SVE vector length. */
+arch_init();
+
 if (ismaster) {
 return master();
 } else {
diff --git a/risu_reginfo_aarch64.c b/risu_reginfo_aarch64.c
index 81a77ba..be47980 100644
--- a/risu_reginfo_aarch64.c
+++ b/risu_reginfo_aarch64.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "risu.h"
@@ -37,8 +38,6 @@ const char * const arch_extra_help
 
 void process_arch_opt(int opt, const char *arg)
 {
-long want, got;
-
 assert(opt == FIRST_ARCH_OPT);
 test_sve = strtol(arg, 0, 10);
 
@@ -46,16 +45,26 @@ void process_arch_opt(int opt, const char *arg)
 fprintf(stderr, "Invalid value for VQ (1-%d)\n", SVE_VQ_MAX);
 exit(EXIT_FAILURE);
 }
-want = sve_vl_from_vq(test_sve);
-got = prctl(PR_SVE_SET_VL, want);
-if (want != got) {
-if (got < 0) {
-perror("prctl PR_SVE_SET_VL");
-} else {
-fprintf(stderr, "Unsupported value for VQ (%d != %d)\n",
-test_sve, (int)sve_vq_from_vl(got));
+}
+
+void arch_init(void)
+{
+long want, got;
+
+if (test_sve) {
+want = sve_vl_from_vq(test_sve);
+got = prctl(PR_SVE_SET_VL, want);
+if (want != got) {
+if (got >= 0) {
+fprintf(stderr, "Unsupported VQ for SVE (%d != %d)\n",
+test_sve, (int)sve_vq_from_vl(got));
+} else if (errno == EINVAL) {
+fprintf(stderr, "System does not support SVE\n");
+} else {
+perror("prctl PR_SVE_SET_VL");
+}
+exit(EXIT_FAILURE);
 }
-exit(EXIT_FAILURE);
 }
 }
 
diff --git a/risu_reginfo_arm.c b/risu_reginfo_arm.c
index 47c52e8..202120b 100644
--- a/risu_reginfo_arm.c
+++ b/risu_reginfo_arm.c
@@ -36,6 +36,10 @@ void process_arch_opt(int opt, const char *arg)
 abort();
 }
 
+void arch_init(void)
+{
+}
+
 int reginfo_size(struct reginfo *ri)
 {
 return sizeof(*ri);
diff --git a/risu_reginfo_i386.c b/risu_reginfo_i386.c
index 50505ab..e9730be 100644
--- a/risu_reginfo_i386.c
+++ b/risu_reginfo_i386.c
@@ -74,6 +74,10 @@ void process_arch_opt(int opt, const char *arg)
 }
 }
 
+void arch_init(void)
+{
+}
+
 int reginfo_size(struct reginfo *ri)
 {
 return sizeof(*ri);
diff --git a/risu_reginfo_m68k.c b/risu_reginfo_m68k.c
index 4eb30cd..4c25e77 100644
--- a/risu_reginfo_m68k.c
+++ b/risu_reginfo_m68k.c
@@ -23,6 +23,10 @@ void process_arch_opt(int opt, const char *arg)
 abort();
 }
 
+void arch_init(void)
+{
+}
+
 int reginfo_size(struct reginfo *ri)
 {
 return sizeof(*ri);
diff --git a/risu_reginfo_ppc64.c b/risu_reginfo_ppc64.c
index 39e8f1c..c80e387 100644
--- a/risu_reginfo_ppc64.c
+++ b/risu_reginfo_ppc64.c
@@ -32,6 +32,10 @@ void process_arch_opt(int opt, const char *arg)
 abort();
 }
 
+void arch_init(void)
+{
+}
+
 int reginfo_size(struct reginfo *ri)
 {
 return sizeof(*ri);
-- 
2.34.1




[RISU PATCH v4 24/29] Add --fulldump and --diffdup options

2022-07-08 Thread Richard Henderson
These allow the inspection of the trace files.

Signed-off-by: Richard Henderson 
---
 risu.c | 117 +
 1 file changed, 102 insertions(+), 15 deletions(-)

diff --git a/risu.c b/risu.c
index f613fa9..8d907d9 100644
--- a/risu.c
+++ b/risu.c
@@ -484,23 +484,101 @@ static int apprentice(void)
 }
 }
 
-static int ismaster;
+static int dump_trace(bool isfull)
+{
+RisuResult res;
+int tick = 0;
+
+while (1) {
+struct reginfo *this_ri;
+
+this_ri = [tick & 1];
+res = recv_register_info(this_ri);
+
+switch (res) {
+case RES_OK:
+switch (header.risu_op) {
+case OP_COMPARE:
+case OP_TESTEND:
+case OP_SIGILL:
+printf("%s: (pc %#lx)\n", op_name(header.risu_op),
+   (unsigned long)header.pc);
+
+if (isfull || tick == 0) {
+reginfo_dump(this_ri, stdout);
+} else {
+struct reginfo *prev_ri = [(tick - 1) & 1];
+
+if (reginfo_is_eq(prev_ri, this_ri)) {
+/*
+ * ??? There should never be no change -- at minimum
+ * PC should have advanced.  But for completeness...
+ */
+printf("change detail: none\n");
+} else {
+printf("change detail (prev : next):\n");
+reginfo_dump_mismatch(prev_ri, this_ri, stdout);
+}
+}
+putchar('\n');
+if (header.risu_op == OP_TESTEND) {
+return EXIT_SUCCESS;
+}
+tick++;
+break;
+
+case OP_COMPAREMEM:
+/* TODO: Dump 8k of data? */
+/* fall through */
+default:
+printf("%s\n", op_name(header.risu_op));
+break;
+}
+break;
+
+case RES_BAD_IO:
+fprintf(stderr, "I/O error\n");
+return EXIT_FAILURE;
+case RES_BAD_MAGIC:
+fprintf(stderr, "Unexpected magic number: %#08x\n", header.magic);
+return EXIT_FAILURE;
+case RES_BAD_SIZE:
+fprintf(stderr, "Unexpected payload size: %u\n", header.size);
+return EXIT_FAILURE;
+case RES_BAD_OP:
+fprintf(stderr, "Unexpected opcode: %d\n", header.risu_op);
+return EXIT_FAILURE;
+default:
+fprintf(stderr, "Unexpected recv result %d\n", res);
+return EXIT_FAILURE;
+}
+}
+}
+
+enum {
+DO_APPRENTICE,
+DO_MASTER,
+DO_FULLDUMP,
+DO_DIFFDUMP,
+};
+
+static int operation = DO_APPRENTICE;
 
 static void usage(void)
 {
 fprintf(stderr,
-"Usage: risu [--master] [--host ] [--port ] "
-"\n\n");
-fprintf(stderr,
-"Run through the pattern file verifying each instruction\n");
-fprintf(stderr, "between master and apprentice risu processes.\n\n");
-fprintf(stderr, "Options:\n");
-fprintf(stderr, "  --master  Be the master (server)\n");
-fprintf(stderr, "  -t, --trace=FILE  Record/playback " TRACE_TYPE " trace 
file\n");
-fprintf(stderr,
-"  -h, --host=HOST   Specify master host machine (apprentice only)"
-"\n");
-fprintf(stderr,
+"Usage: risu [--master|--fulldump|--diffdump]\n"
+"[--host ] [--port ] \n"
+"\n"
+"Run through the pattern file verifying each instruction\n"
+"between master and apprentice risu processes.\n"
+"\n"
+"Options:\n"
+"  --master  Be the master (server)\n"
+"  --fulldumpDump each record\n"
+"  --diffdumpDump difference between each record\n"
+"  -t, --trace=FILE  Record/playback " TRACE_TYPE " trace file\n"
+"  -h, --host=HOST   Specify master host machine\n"
 "  -p, --port=PORT   Specify the port to connect to/listen on "
 "(default 9191)\n");
 if (arch_extra_help) {
@@ -512,7 +590,9 @@ static struct option * setup_options(char **short_opts)
 {
 static struct option default_longopts[] = {
 {"help", no_argument, 0, '?'},
-{"master", no_argument, , 1},
+{"master", no_argument, , DO_MASTER},
+{"fulldump", no_argument, , DO_FULLDUMP},
+{"diffdump", no_argument, , DO_DIFFDUMP},
 {"host", required_argument, 0, 'h'},
 {"port", required_argument, 0, 'p'},
 {"trace", required_argument, 0, 't'},
@@ -520,7 +600,7 @@ static struct option * setup_options(char **short_opts)
 };
 struct option *lopts = _longopts[0];
 
-*short_opts = "h:p:t:";
+*short_opts = "d:h:p:t:";
 
 if (arch_long_opts) {
   

[RISU PATCH v4 18/29] Compute reginfo_size based on the reginfo

2022-07-08 Thread Richard Henderson
This will allow dumping of SVE frames without having
to know the SVE vector length beforehand.

Signed-off-by: Richard Henderson 
---
 risu.h | 2 +-
 risu.c | 9 +++--
 risu_reginfo_aarch64.c | 4 ++--
 risu_reginfo_arm.c | 4 ++--
 risu_reginfo_i386.c| 4 ++--
 risu_reginfo_m68k.c| 4 ++--
 risu_reginfo_ppc64.c   | 4 ++--
 7 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/risu.h b/risu.h
index bfcf0af..3cad3d5 100644
--- a/risu.h
+++ b/risu.h
@@ -126,6 +126,6 @@ int reginfo_dump(struct reginfo *ri, FILE * f);
 int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f);
 
 /* return size of reginfo */
-const int reginfo_size(void);
+int reginfo_size(struct reginfo *ri);
 
 #endif /* RISU_H */
diff --git a/risu.c b/risu.c
index a248db1..a70b778 100644
--- a/risu.c
+++ b/risu.c
@@ -125,7 +125,7 @@ static RisuResult send_register_info(void *uc)
 case OP_TESTEND:
 case OP_COMPARE:
 case OP_SIGILL:
-header.size = reginfo_size();
+header.size = reginfo_size([MASTER]);
 extra = [MASTER];
 break;
 case OP_COMPAREMEM:
@@ -209,7 +209,12 @@ static RisuResult recv_register_info(struct reginfo *ri)
 return RES_BAD_SIZE;
 }
 respond(RES_OK);
-return read_buffer(ri, header.size);
+res = read_buffer(ri, header.size);
+if (res == RES_OK && header.size != reginfo_size(ri)) {
+/* The payload size is not self-consistent with the data. */
+return RES_BAD_SIZE;
+}
+return res;
 
 case OP_COMPAREMEM:
 if (header.size != MEMBLOCKLEN) {
diff --git a/risu_reginfo_aarch64.c b/risu_reginfo_aarch64.c
index 028c690..7044648 100644
--- a/risu_reginfo_aarch64.c
+++ b/risu_reginfo_aarch64.c
@@ -69,7 +69,7 @@ void process_arch_opt(int opt, const char *arg)
 #endif
 }
 
-const int reginfo_size(void)
+int reginfo_size(struct reginfo *ri)
 {
 int size = offsetof(struct reginfo, simd.end);
 #ifdef SVE_MAGIC
@@ -194,7 +194,7 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 /* reginfo_is_eq: compare the reginfo structs, returns nonzero if equal */
 int reginfo_is_eq(struct reginfo *r1, struct reginfo *r2)
 {
-return memcmp(r1, r2, reginfo_size()) == 0;
+return memcmp(r1, r2, reginfo_size(r1)) == 0;
 }
 
 #ifdef SVE_MAGIC
diff --git a/risu_reginfo_arm.c b/risu_reginfo_arm.c
index 3662f12..47c52e8 100644
--- a/risu_reginfo_arm.c
+++ b/risu_reginfo_arm.c
@@ -36,9 +36,9 @@ void process_arch_opt(int opt, const char *arg)
 abort();
 }
 
-const int reginfo_size(void)
+int reginfo_size(struct reginfo *ri)
 {
-return sizeof(struct reginfo);
+return sizeof(*ri);
 }
 
 static void reginfo_init_vfp(struct reginfo *ri, ucontext_t *uc)
diff --git a/risu_reginfo_i386.c b/risu_reginfo_i386.c
index 60fc239..50505ab 100644
--- a/risu_reginfo_i386.c
+++ b/risu_reginfo_i386.c
@@ -74,9 +74,9 @@ void process_arch_opt(int opt, const char *arg)
 }
 }
 
-const int reginfo_size(void)
+int reginfo_size(struct reginfo *ri)
 {
-return sizeof(struct reginfo);
+return sizeof(*ri);
 }
 
 static void *xsave_feature_buf(struct _xstate *xs, int feature)
diff --git a/risu_reginfo_m68k.c b/risu_reginfo_m68k.c
index 32b28c8..4eb30cd 100644
--- a/risu_reginfo_m68k.c
+++ b/risu_reginfo_m68k.c
@@ -23,9 +23,9 @@ void process_arch_opt(int opt, const char *arg)
 abort();
 }
 
-const int reginfo_size(void)
+int reginfo_size(struct reginfo *ri)
 {
-return sizeof(struct reginfo);
+return sizeof(*ri);
 }
 
 /* reginfo_init: initialize with a ucontext */
diff --git a/risu_reginfo_ppc64.c b/risu_reginfo_ppc64.c
index 071c951..39e8f1c 100644
--- a/risu_reginfo_ppc64.c
+++ b/risu_reginfo_ppc64.c
@@ -32,9 +32,9 @@ void process_arch_opt(int opt, const char *arg)
 abort();
 }
 
-const int reginfo_size(void)
+int reginfo_size(struct reginfo *ri)
 {
-return sizeof(struct reginfo);
+return sizeof(*ri);
 }
 
 /* reginfo_init: initialize with a ucontext */
-- 
2.34.1




[RISU PATCH v4 22/29] ppc64: Use uint64_t to represent double

2022-07-08 Thread Richard Henderson
We want to do exact bitwise comparisons of the data,
not be held hostage to IEEE comparisons and NaNs.

Signed-off-by: Richard Henderson 
---
 risu_reginfo_ppc64.h |  3 ++-
 risu_reginfo_ppc64.c | 29 +
 2 files changed, 11 insertions(+), 21 deletions(-)

diff --git a/risu_reginfo_ppc64.h b/risu_reginfo_ppc64.h
index 7f2c962..4b1d8bd 100644
--- a/risu_reginfo_ppc64.h
+++ b/risu_reginfo_ppc64.h
@@ -20,7 +20,8 @@ struct reginfo {
 uint64_t nip;
 uint64_t prev_addr;
 gregset_t gregs;
-fpregset_t fpregs;
+uint64_t fpregs[32];
+uint64_t fpscr;
 vrregset_t vrregs;
 };
 
diff --git a/risu_reginfo_ppc64.c b/risu_reginfo_ppc64.c
index c80e387..9899b36 100644
--- a/risu_reginfo_ppc64.c
+++ b/risu_reginfo_ppc64.c
@@ -45,6 +45,7 @@ int reginfo_size(struct reginfo *ri)
 void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 {
 int i;
+
 memset(ri, 0, sizeof(*ri));
 
 ri->faulting_insn = *((uint32_t *) uc->uc_mcontext.regs->nip);
@@ -54,16 +55,11 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 ri->gregs[i] = uc->uc_mcontext.gp_regs[i];
 }
 
-for (i = 0; i < NFPREG; i++) {
-ri->fpregs[i] = uc->uc_mcontext.fp_regs[i];
-}
+memcpy(ri->fpregs, uc->uc_mcontext.fp_regs, 32 * sizeof(double));
+ri->fpscr = uc->uc_mcontext.fp_regs[32];
 
-for (i = 0; i < 32; i++) {
-ri->vrregs.vrregs[i][0] = uc->uc_mcontext.v_regs->vrregs[i][0];
-ri->vrregs.vrregs[i][1] = uc->uc_mcontext.v_regs->vrregs[i][1];
-ri->vrregs.vrregs[i][2] = uc->uc_mcontext.v_regs->vrregs[i][2];
-ri->vrregs.vrregs[i][3] = uc->uc_mcontext.v_regs->vrregs[i][3];
-}
+memcpy(ri->vrregs.vrregs, uc->uc_mcontext.v_regs->vrregs,
+   sizeof(ri->vrregs.vrregs[0]) * 32);
 ri->vrregs.vscr = uc->uc_mcontext.v_regs->vscr;
 ri->vrregs.vrsave = uc->uc_mcontext.v_regs->vrsave;
 }
@@ -91,10 +87,6 @@ int reginfo_is_eq(struct reginfo *m, struct reginfo *a)
 }
 
 for (i = 0; i < 32; i++) {
-if (isnan(m->fpregs[i]) && isnan(a->fpregs[i])) {
-continue;
-}
-
 if (m->fpregs[i] != a->fpregs[i]) {
 return 0;
 }
@@ -141,10 +133,10 @@ int reginfo_dump(struct reginfo *ri, FILE * f)
 fprintf(f, "\tdscr   : %16lx\n\n", ri->gregs[44]);
 
 for (i = 0; i < 16; i++) {
-fprintf(f, "\tf%2d: %.4f\tf%2d: %.4f\n", i, ri->fpregs[i],
+fprintf(f, "\tf%2d: %016lx\tf%2d: %016lx\n", i, ri->fpregs[i],
 i + 16, ri->fpregs[i + 16]);
 }
-fprintf(f, "\tfpscr: %f\n\n", ri->fpregs[32]);
+fprintf(f, "\tfpscr: %016lx\n\n", ri->fpscr);
 
 for (i = 0; i < 32; i++) {
 fprintf(f, "vr%02d: %8x, %8x, %8x, %8x\n", i,
@@ -181,13 +173,10 @@ int reginfo_dump_mismatch(struct reginfo *m, struct 
reginfo *a, FILE *f)
 }
 
 for (i = 0; i < 32; i++) {
-if (isnan(m->fpregs[i]) && isnan(a->fpregs[i])) {
-continue;
-}
-
 if (m->fpregs[i] != a->fpregs[i]) {
 fprintf(f, "Mismatch: Register f%d\n", i);
-fprintf(f, "m: [%f] != a: [%f]\n", m->fpregs[i], a->fpregs[i]);
+fprintf(f, "m: [%016lx] != a: [%016lx]\n",
+m->fpregs[i], a->fpregs[i]);
 }
 }
 
-- 
2.34.1




Re: [Qemu-devel] [PATCH v1] virtio-scsi: Send "REPORTED LUNS CHANGED" sense data upon a disk hotplug.

2022-07-08 Thread Venu Busireddy


Ping?

On 2022-05-31 15:22:37 -0500, Venu Busireddy wrote:
> When a disk is hotplugged, QEMU reports a VIRTIO_SCSI_EVT_RESET_RESCAN
> event, but does not send the "REPORTED LUNS CHANGED" sense data. This
> does not conform to Section 5.6.6.3 of the VirtIO specification, which
> states "Events will also be reported via sense codes..." SCSI layer on
> Solaris depends on this sense data, and hence does not recognize the
> hotplugged disks (until a reboot).
> 
> As specified in SAM-4, Section 5.14, return a CHECK_CONDITION status with
> a sense data of 0x06/0x3F/0x0E, whenever a command other than INQUIRY,
> REPORT_LUNS, or REQUEST_SENSE is received.
> 
> Signed-off-by: Venu Busireddy 
> ---
>  hw/scsi/virtio-scsi.c   | 15 ++-
>  include/hw/virtio/virtio-scsi.h |  1 +
>  2 files changed, 15 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
> index 4141517a..7ae1cfa6e584 100644
> --- a/hw/scsi/virtio-scsi.c
> +++ b/hw/scsi/virtio-scsi.c
> @@ -608,7 +608,19 @@ static void virtio_scsi_command_complete(SCSIRequest *r, 
> size_t resid)
>  
>  req->resp.cmd.response = VIRTIO_SCSI_S_OK;
>  req->resp.cmd.status = r->status;
> -if (req->resp.cmd.status == GOOD) {
> +if (req->dev->reported_luns_changed &&
> +(req->req.cmd.cdb[0] != INQUIRY) &&
> +(req->req.cmd.cdb[0] != REPORT_LUNS) &&
> +(req->req.cmd.cdb[0] != REQUEST_SENSE)) {
> +req->dev->reported_luns_changed = false;
> +req->resp.cmd.resid = 0;
> +req->resp.cmd.status_qualifier = 0;
> +req->resp.cmd.status = CHECK_CONDITION;
> +sense_len = scsi_build_sense(sense, 
> SENSE_CODE(REPORTED_LUNS_CHANGED));
> +qemu_iovec_from_buf(>resp_iov, sizeof(req->resp.cmd),
> +sense, sense_len);
> +req->resp.cmd.sense_len = virtio_tswap32(vdev, sense_len);
> +} else if (req->resp.cmd.status == GOOD) {
>  req->resp.cmd.resid = virtio_tswap32(vdev, resid);
>  } else {
>  req->resp.cmd.resid = 0;
> @@ -956,6 +968,7 @@ static void virtio_scsi_hotplug(HotplugHandler 
> *hotplug_dev, DeviceState *dev,
> VIRTIO_SCSI_T_TRANSPORT_RESET,
> VIRTIO_SCSI_EVT_RESET_RESCAN);
>  virtio_scsi_release(s);
> +s->reported_luns_changed = true;
>  }
>  }
>  
> diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
> index a36aad9c8695..efbcf9ba069a 100644
> --- a/include/hw/virtio/virtio-scsi.h
> +++ b/include/hw/virtio/virtio-scsi.h
> @@ -81,6 +81,7 @@ struct VirtIOSCSI {
>  SCSIBus bus;
>  int resetting;
>  bool events_dropped;
> +bool reported_luns_changed;
>  
>  /* Fields for dataplane below */
>  AioContext *ctx; /* one iothread per virtio-scsi-pci for now */



[RISU PATCH v4 19/29] aarch64: Assume system support for SVE

2022-07-08 Thread Richard Henderson
SVE support is no longer new, assume it's present.

Signed-off-by: Richard Henderson 
---
 risu_reginfo_aarch64.h |  4 
 risu_reginfo_aarch64.c | 24 
 2 files changed, 28 deletions(-)

diff --git a/risu_reginfo_aarch64.h b/risu_reginfo_aarch64.h
index c33b86f..efbca56 100644
--- a/risu_reginfo_aarch64.h
+++ b/risu_reginfo_aarch64.h
@@ -20,7 +20,6 @@ struct simd_reginfo {
 char end[0];
 };
 
-#ifdef SVE_MAGIC
 struct sve_reginfo {
 /* SVE */
 uint16_tvl; /* current VL */
@@ -29,7 +28,6 @@ struct sve_reginfo {
 uint16_tffr[SVE_VQ_MAX];
 char end[0];
 };
-#endif
 
 /* The kernel headers set this based on future arch extensions.
The current arch maximum is 16.  Save space below.  */
@@ -50,9 +48,7 @@ struct reginfo {
 
 union {
 struct simd_reginfo simd;
-#ifdef SVE_MAGIC
 struct sve_reginfo sve;
-#endif
 };
 };
 
diff --git a/risu_reginfo_aarch64.c b/risu_reginfo_aarch64.c
index 7044648..16a57ba 100644
--- a/risu_reginfo_aarch64.c
+++ b/risu_reginfo_aarch64.c
@@ -24,11 +24,6 @@
 #include "risu.h"
 #include "risu_reginfo_aarch64.h"
 
-#ifndef SVE_MAGIC
-const struct option * const arch_long_opts;
-const char * const arch_extra_help;
-#else
-
 /* Should we test SVE register state */
 static int test_sve;
 static const struct option extra_opts[] = {
@@ -39,11 +34,9 @@ static const struct option extra_opts[] = {
 const struct option * const arch_long_opts = _opts[0];
 const char * const arch_extra_help
 = "  --test-sve=Compare SVE registers with VQ\n";
-#endif
 
 void process_arch_opt(int opt, const char *arg)
 {
-#ifdef SVE_MAGIC
 long want, got;
 
 assert(opt == FIRST_ARCH_OPT);
@@ -64,19 +57,14 @@ void process_arch_opt(int opt, const char *arg)
 }
 exit(EXIT_FAILURE);
 }
-#else
-abort();
-#endif
 }
 
 int reginfo_size(struct reginfo *ri)
 {
 int size = offsetof(struct reginfo, simd.end);
-#ifdef SVE_MAGIC
 if (test_sve) {
 size = offsetof(struct reginfo, sve.end);
 }
-#endif
 return size;
 }
 
@@ -86,9 +74,7 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 int i;
 struct _aarch64_ctx *ctx, *extra = NULL;
 struct fpsimd_context *fp = NULL;
-#ifdef SVE_MAGIC
 struct sve_context *sve = NULL;
-#endif
 
 /* necessary to be able to compare with memcmp later */
 memset(ri, 0, sizeof(*ri));
@@ -110,14 +96,12 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 case FPSIMD_MAGIC:
 fp = (void *)ctx;
 break;
-#ifdef SVE_MAGIC
 case SVE_MAGIC:
 sve = (void *)ctx;
 break;
 case EXTRA_MAGIC:
 extra = (void *)((struct extra_context *)(ctx))->datap;
 break;
-#endif
 case 0:
 /* End of list.  */
 ctx = extra;
@@ -137,7 +121,6 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 ri->fpsr = fp->fpsr;
 ri->fpcr = fp->fpcr;
 
-#ifdef SVE_MAGIC
 if (test_sve) {
 int vq = test_sve;
 
@@ -184,7 +167,6 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 
 return;
 }
-#endif /* SVE_MAGIC */
 
 for (i = 0; i < 32; i++) {
 ri->simd.vregs[i] = fp->vregs[i];
@@ -197,7 +179,6 @@ int reginfo_is_eq(struct reginfo *r1, struct reginfo *r2)
 return memcmp(r1, r2, reginfo_size(r1)) == 0;
 }
 
-#ifdef SVE_MAGIC
 static int sve_zreg_is_eq(int vq, const void *z1, const void *z2)
 {
 return memcmp(z1, z2, vq * 16) == 0;
@@ -241,7 +222,6 @@ static void sve_dump_zreg_diff(FILE *f, int vq, const 
__uint128_t *z1,
 }
 }
 }
-#endif
 
 /* reginfo_dump: print state to a stream, returns nonzero on success */
 int reginfo_dump(struct reginfo *ri, FILE * f)
@@ -259,7 +239,6 @@ int reginfo_dump(struct reginfo *ri, FILE * f)
 fprintf(f, "  fpsr   : %08x\n", ri->fpsr);
 fprintf(f, "  fpcr   : %08x\n", ri->fpcr);
 
-#ifdef SVE_MAGIC
 if (test_sve) {
 int q, vq = test_sve;
 
@@ -287,7 +266,6 @@ int reginfo_dump(struct reginfo *ri, FILE * f)
 
 return !ferror(f);
 }
-#endif
 
 for (i = 0; i < 32; i++) {
 fprintf(f, "  V%-2d: %016" PRIx64 "%016" PRIx64 "\n", i,
@@ -336,7 +314,6 @@ int reginfo_dump_mismatch(struct reginfo *m, struct reginfo 
*a, FILE * f)
 fprintf(f, "  fpcr   : %08x vs %08x\n", m->fpcr, a->fpcr);
 }
 
-#ifdef SVE_MAGIC
 if (test_sve) {
 int vq = sve_vq_from_vl(m->sve.vl);
 
@@ -365,7 +342,6 @@ int reginfo_dump_mismatch(struct reginfo *m, struct reginfo 
*a, FILE * f)
 
 return !ferror(f);
 }
-#endif
 
 for (i = 0; i < 32; i++) {
 if (m->simd.vregs[i] != a->simd.vregs[i]) {
-- 
2.34.1




[RISU PATCH v4 17/29] Add magic and size to the trace header

2022-07-08 Thread Richard Henderson
Sanity check that we're not getting out of sync with
the trace stream.  This will be especially bad with
the change in size of the sve save data.

Signed-off-by: Richard Henderson 
---
 risu.h |  10 +++-
 risu.c | 162 -
 2 files changed, 136 insertions(+), 36 deletions(-)

diff --git a/risu.h b/risu.h
index dd9fda5..bfcf0af 100644
--- a/risu.h
+++ b/risu.h
@@ -55,7 +55,11 @@ typedef enum {
 RES_END,
 RES_MISMATCH_REG,
 RES_MISMATCH_MEM,
+RES_MISMATCH_OP,
 RES_BAD_IO,
+RES_BAD_MAGIC,
+RES_BAD_SIZE,
+RES_BAD_OP,
 } RisuResult;
 
 /* The memory block should be this long */
@@ -69,10 +73,14 @@ typedef enum {
 struct reginfo;
 
 typedef struct {
-   uintptr_t pc;
+   uint32_t magic;
+   uint32_t size;
uint32_t risu_op;
+   uintptr_t pc;
 } trace_header_t;
 
+#define RISU_MAGIC  (('R' << 24) | ('I' << 16) | ('S' << 8) | 'U')
+
 /* Socket related routines */
 int master_connect(int port);
 int apprentice_connect(const char *hostname, int port);
diff --git a/risu.c b/risu.c
index 80bc3b1..a248db1 100644
--- a/risu.c
+++ b/risu.c
@@ -111,32 +111,54 @@ static RisuResult send_register_info(void *uc)
 uint64_t paramreg;
 RisuResult res;
 RisuOp op;
+void *extra;
 
 reginfo_init([MASTER], uc);
 op = get_risuop([MASTER]);
 
 /* Write a header with PC/op to keep in sync */
+header.magic = RISU_MAGIC;
 header.pc = get_pc([MASTER]);
 header.risu_op = op;
+
+switch (op) {
+case OP_TESTEND:
+case OP_COMPARE:
+case OP_SIGILL:
+header.size = reginfo_size();
+extra = [MASTER];
+break;
+case OP_COMPAREMEM:
+header.size = MEMBLOCKLEN;
+extra = memblock;
+break;
+case OP_SETMEMBLOCK:
+case OP_GETMEMBLOCK:
+header.size = 0;
+extra = NULL;
+break;
+default:
+abort();
+}
+
 res = write_buffer(, sizeof(header));
 if (res != RES_OK) {
 return res;
 }
+if (extra) {
+res = write_buffer(extra, header.size);
+if (res != RES_OK) {
+return res;
+}
+}
 
 switch (op) {
 case OP_COMPARE:
-case OP_TESTEND:
 case OP_SIGILL:
-/*
- * Do a simple register compare on (a) explicit request
- * (b) end of test (c) a non-risuop UNDEF
- */
-res = write_buffer([MASTER], reginfo_size());
-/* For OP_TEST_END, force exit. */
-if (res == RES_OK && op == OP_TESTEND) {
-res = RES_END;
-}
+case OP_COMPAREMEM:
 break;
+case OP_TESTEND:
+return RES_END;
 case OP_SETMEMBLOCK:
 paramreg = get_reginfo_paramreg([MASTER]);
 memblock = (void *)(uintptr_t)paramreg;
@@ -145,12 +167,10 @@ static RisuResult send_register_info(void *uc)
 paramreg = get_reginfo_paramreg([MASTER]);
 set_ucontext_paramreg(uc, paramreg + (uintptr_t)memblock);
 break;
-case OP_COMPAREMEM:
-return write_buffer(memblock, MEMBLOCKLEN);
 default:
 abort();
 }
-return res;
+return RES_OK;
 }
 
 static void master_sigill(int sig, siginfo_t *si, void *uc)
@@ -175,22 +195,35 @@ static RisuResult recv_register_info(struct reginfo *ri)
 return res;
 }
 
-/* send OK for the header */
-respond(RES_OK);
+if (header.magic != RISU_MAGIC) {
+/* If the magic number is wrong, we can't trust the rest. */
+return RES_BAD_MAGIC;
+}
 
 switch (header.risu_op) {
 case OP_COMPARE:
 case OP_TESTEND:
 case OP_SIGILL:
-return read_buffer(ri, reginfo_size());
+/* If we can't store the data, report invalid size. */
+if (header.size > sizeof(*ri)) {
+return RES_BAD_SIZE;
+}
+respond(RES_OK);
+return read_buffer(ri, header.size);
+
 case OP_COMPAREMEM:
+if (header.size != MEMBLOCKLEN) {
+return RES_BAD_SIZE;
+}
+respond(RES_OK);
 return read_buffer(other_memblock, MEMBLOCKLEN);
+
 case OP_SETMEMBLOCK:
 case OP_GETMEMBLOCK:
-return RES_OK;
+return header.size == 0 ? RES_OK : RES_BAD_SIZE;
+
 default:
-/* TODO: Create a better error message. */
-return RES_BAD_IO;
+return RES_BAD_OP;
 }
 }
 
@@ -204,48 +237,71 @@ static RisuResult recv_and_compare_register_info(void *uc)
 
 res = recv_register_info([MASTER]);
 if (res != RES_OK) {
-/* I/O error.  Tell master to exit. */
-respond(RES_END);
-return res;
+goto done;
 }
 
 op = get_risuop([APPRENTICE]);
-if (header.risu_op != op) {
-/* We are out of sync.  Tell master to exit. */
-respond(RES_END);
-return RES_BAD_IO;
-}
 
 switch (op) {
 case OP_COMPARE:
 case OP_TESTEND:
 case OP_SIGILL:
-if (!reginfo_is_eq([MASTER], [APPRENTICE])) {
+/*
+ * If 

[RISU PATCH v4 20/29] aarch64: Reorg sve reginfo to save space

2022-07-08 Thread Richard Henderson
Mirror the signal frame by storing all of the registers
as a lump.  Use the signal macros to pull out the values.

Signed-off-by: Richard Henderson 
---
 risu_reginfo_aarch64.h |  45 ++-
 risu_reginfo_aarch64.c | 171 -
 2 files changed, 108 insertions(+), 108 deletions(-)

diff --git a/risu_reginfo_aarch64.h b/risu_reginfo_aarch64.h
index efbca56..536c12b 100644
--- a/risu_reginfo_aarch64.h
+++ b/risu_reginfo_aarch64.h
@@ -13,27 +13,17 @@
 #ifndef RISU_REGINFO_AARCH64_H
 #define RISU_REGINFO_AARCH64_H
 
-#include  /* for SVE_MAGIC */
-
-struct simd_reginfo {
-__uint128_t vregs[32];
-char end[0];
-};
-
-struct sve_reginfo {
-/* SVE */
-uint16_tvl; /* current VL */
-__uint128_t zregs[SVE_NUM_ZREGS][SVE_VQ_MAX];
-uint16_tpregs[SVE_NUM_PREGS][SVE_VQ_MAX];
-uint16_tffr[SVE_VQ_MAX];
-char end[0];
-};
+#include 
 
 /* The kernel headers set this based on future arch extensions.
The current arch maximum is 16.  Save space below.  */
 #undef SVE_VQ_MAX
 #define SVE_VQ_MAX 16
 
+#define ROUND_UP(SIZE, POW2)(((SIZE) + (POW2) - 1) & -(POW2))
+#define RISU_SVE_REGS_SIZE(VQ)  ROUND_UP(SVE_SIG_REGS_SIZE(VQ), 16)
+#define RISU_SIMD_REGS_SIZE (32 * 16)
+
 struct reginfo {
 uint64_t fault_address;
 uint64_t regs[31];
@@ -45,11 +35,28 @@ struct reginfo {
 /* FP/SIMD */
 uint32_t fpsr;
 uint32_t fpcr;
+uint16_t sve_vl;
+uint16_t reserved;
 
-union {
-struct simd_reginfo simd;
-struct sve_reginfo sve;
-};
+char extra[RISU_SVE_REGS_SIZE(SVE_VQ_MAX)]
+__attribute__((aligned(16)));
 };
 
+static inline uint64_t *reginfo_vreg(struct reginfo *ri, int i)
+{
+return (uint64_t *)>extra[i * 16];
+}
+
+static inline uint64_t *reginfo_zreg(struct reginfo *ri, int vq, int i)
+{
+return (uint64_t *)>extra[SVE_SIG_ZREG_OFFSET(vq, i) -
+  SVE_SIG_REGS_OFFSET];
+}
+
+static inline uint16_t *reginfo_preg(struct reginfo *ri, int vq, int i)
+{
+return (uint16_t *)>extra[SVE_SIG_PREG_OFFSET(vq, i) -
+  SVE_SIG_REGS_OFFSET];
+}
+
 #endif /* RISU_REGINFO_AARCH64_H */
diff --git a/risu_reginfo_aarch64.c b/risu_reginfo_aarch64.c
index 16a57ba..81a77ba 100644
--- a/risu_reginfo_aarch64.c
+++ b/risu_reginfo_aarch64.c
@@ -61,9 +61,13 @@ void process_arch_opt(int opt, const char *arg)
 
 int reginfo_size(struct reginfo *ri)
 {
-int size = offsetof(struct reginfo, simd.end);
-if (test_sve) {
-size = offsetof(struct reginfo, sve.end);
+int size = offsetof(struct reginfo, extra);
+
+if (ri->sve_vl) {
+int vq = sve_vq_from_vl(ri->sve_vl);
+size += RISU_SVE_REGS_SIZE(vq);
+} else {
+size += RISU_SIMD_REGS_SIZE;
 }
 return size;
 }
@@ -128,6 +132,7 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 fprintf(stderr, "risu_reginfo_aarch64: failed to get SVE state\n");
 return;
 }
+
 if (sve->vl != sve_vl_from_vq(vq)) {
 fprintf(stderr, "risu_reginfo_aarch64: "
 "unexpected SVE state: %d != %d\n",
@@ -135,42 +140,22 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 return;
 }
 
-ri->sve.vl = sve->vl;
-
-if (sve->head.size < SVE_SIG_CONTEXT_SIZE(vq)) {
-if (sve->head.size == sizeof(*sve)) {
-/* SVE state is empty -- not an error.  */
-} else {
-fprintf(stderr, "risu_reginfo_aarch64: "
-"failed to get complete SVE state\n");
-}
+if (sve->head.size <= SVE_SIG_CONTEXT_SIZE(0)) {
+/* Only AdvSIMD state is present. */
+} else if (sve->head.size < SVE_SIG_CONTEXT_SIZE(vq)) {
+fprintf(stderr, "risu_reginfo_aarch64: "
+"failed to get complete SVE state\n");
+return;
+} else {
+ri->sve_vl = sve->vl;
+memcpy(reginfo_zreg(ri, vq, 0),
+   (char *)sve + SVE_SIG_REGS_OFFSET,
+   SVE_SIG_REGS_SIZE(vq));
 return;
 }
-
-/* Copy ZREG's one at a time */
-for (i = 0; i < SVE_NUM_ZREGS; i++) {
-memcpy(>sve.zregs[i],
-   (void *)sve + SVE_SIG_ZREG_OFFSET(vq, i),
-   SVE_SIG_ZREG_SIZE(vq));
-}
-
-/* Copy PREG's one at a time */
-for (i = 0; i < SVE_NUM_PREGS; i++) {
-memcpy(>sve.pregs[i],
-   (void *)sve + SVE_SIG_PREG_OFFSET(vq, i),
-   SVE_SIG_PREG_SIZE(vq));
-}
-
-/* Finally the FFR */
-memcpy(>sve.ffr, (void *)sve + SVE_SIG_FFR_OFFSET(vq),
-   SVE_SIG_FFR_SIZE(vq));
-
-return;
 }
 
-for (i = 0; i < 32; i++) {
-ri->simd.vregs[i] = fp->vregs[i];
-}
+memcpy(reginfo_vreg(ri, 0), fp->vregs, RISU_SIMD_REGS_SIZE);
 }
 
 /* 

[RISU PATCH v4 11/29] Always write for --master

2022-07-08 Thread Richard Henderson
For trace, master of course must write to the file we create.

For sockets, we can report mismatches from either end.  At present,
we are reporting mismatches from master.  Reverse that so that we
report mismatches from the apprentice, just as we do for trace.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 risu.h|  2 +-
 reginfo.c | 38 --
 risu.c| 96 ++-
 3 files changed, 61 insertions(+), 75 deletions(-)

diff --git a/risu.h b/risu.h
index c83b803..f383b64 100644
--- a/risu.h
+++ b/risu.h
@@ -106,7 +106,7 @@ RisuResult recv_and_compare_register_info(void *uc);
  * Should return 0 if it was a good match (ie end of test)
  * and 1 for a mismatch.
  */
-int report_match_status(bool trace);
+int report_match_status(void);
 
 /* Interface provided by CPU-specific code: */
 
diff --git a/reginfo.c b/reginfo.c
index fee025e..c37c5df 100644
--- a/reginfo.c
+++ b/reginfo.c
@@ -14,9 +14,8 @@
 #include 
 #include "risu.h"
 
-struct reginfo master_ri, apprentice_ri;
-
-uint8_t apprentice_memblock[MEMBLOCKLEN];
+static struct reginfo master_ri, apprentice_ri;
+static uint8_t master_memblock[MEMBLOCKLEN];
 
 static int mem_used;
 static int packet_mismatch;
@@ -82,8 +81,8 @@ RisuResult recv_and_compare_register_info(void *uc)
 trace_header_t header;
 RisuOp op;
 
-reginfo_init(_ri, uc);
-op = get_risuop(_ri);
+reginfo_init(_ri, uc);
+op = get_risuop(_ri);
 
 res = read_buffer(, sizeof(header));
 if (res != RES_OK) {
@@ -107,7 +106,7 @@ RisuResult recv_and_compare_register_info(void *uc)
 /* Do a simple register compare on (a) explicit request
  * (b) end of test (c) a non-risuop UNDEF
  */
-res = read_buffer(_ri, reginfo_size());
+res = read_buffer(_ri, reginfo_size());
 if (res != RES_OK) {
 packet_mismatch = 1;
 } else if (!reginfo_is_eq(_ri, _ri)) {
@@ -119,18 +118,18 @@ RisuResult recv_and_compare_register_info(void *uc)
 respond(res);
 break;
 case OP_SETMEMBLOCK:
-memblock = (void *)(uintptr_t)get_reginfo_paramreg(_ri);
+memblock = (void *)(uintptr_t)get_reginfo_paramreg(_ri);
 break;
 case OP_GETMEMBLOCK:
-set_ucontext_paramreg(uc, get_reginfo_paramreg(_ri) +
+set_ucontext_paramreg(uc, get_reginfo_paramreg(_ri) +
   (uintptr_t)memblock);
 break;
 case OP_COMPAREMEM:
 mem_used = 1;
-res = read_buffer(apprentice_memblock, MEMBLOCKLEN);
+res = read_buffer(master_memblock, MEMBLOCKLEN);
 if (res != RES_OK) {
 packet_mismatch = 1;
-} else if (memcmp(memblock, apprentice_memblock, MEMBLOCKLEN) != 0) {
+} else if (memcmp(memblock, master_memblock, MEMBLOCKLEN) != 0) {
 /* memory mismatch */
 res = RES_MISMATCH;
 }
@@ -149,18 +148,13 @@ RisuResult recv_and_compare_register_info(void *uc)
  * Should return 0 if it was a good match (ie end of test)
  * and 1 for a mismatch.
  */
-int report_match_status(bool trace)
+int report_match_status(void)
 {
 int resp = 0;
 fprintf(stderr, "match status...\n");
 if (packet_mismatch) {
 fprintf(stderr, "packet mismatch (probably disagreement "
 "about UNDEF on load/store)\n");
-/* We don't have valid reginfo from the apprentice side
- * so stop now rather than printing anything about it.
- */
-fprintf(stderr, "%s reginfo:\n", trace ? "this" : "master");
-reginfo_dump(_ri, stderr);
 return 1;
 }
 if (!reginfo_is_eq(_ri, _ri)) {
@@ -168,7 +162,7 @@ int report_match_status(bool trace)
 resp = 1;
 }
 if (mem_used
-&& memcmp(memblock, _memblock, MEMBLOCKLEN) != 0) {
+&& memcmp(memblock, _memblock, MEMBLOCKLEN) != 0) {
 fprintf(stderr, "mismatch on memory!\n");
 resp = 1;
 }
@@ -177,15 +171,11 @@ int report_match_status(bool trace)
 return 0;
 }
 
-fprintf(stderr, "%s reginfo:\n", trace ? "this" : "master");
+fprintf(stderr, "master reginfo:\n");
 reginfo_dump(_ri, stderr);
-fprintf(stderr, "%s reginfo:\n", trace ? "trace" : "apprentice");
+fprintf(stderr, "apprentice reginfo:\n");
 reginfo_dump(_ri, stderr);
 
-if (trace) {
-reginfo_dump_mismatch(_ri, _ri, stderr);
-} else {
-reginfo_dump_mismatch(_ri, _ri, stderr);
-}
+reginfo_dump_mismatch(_ri, _ri, stderr);
 return resp;
 }
diff --git a/risu.c b/risu.c
index f238117..199f697 100644
--- a/risu.c
+++ b/risu.c
@@ -102,11 +102,7 @@ static void master_sigill(int sig, siginfo_t *si, void *uc)
 RisuResult r;
 signal_count++;
 
-if (trace) {
-r = send_register_info(uc);
-} else {
-r = recv_and_compare_register_info(uc);
-}
+r = send_register_info(uc);
 if (r == RES_OK) {
 advance_pc(uc);

[RISU PATCH v4 16/29] Split out recv_register_info

2022-07-08 Thread Richard Henderson
We will want to share this code when dumping.

Signed-off-by: Richard Henderson 
---
 risu.c | 50 ++
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/risu.c b/risu.c
index b91ad38..80bc3b1 100644
--- a/risu.c
+++ b/risu.c
@@ -166,6 +166,34 @@ static void master_sigill(int sig, siginfo_t *si, void *uc)
 }
 }
 
+static RisuResult recv_register_info(struct reginfo *ri)
+{
+RisuResult res;
+
+res = read_buffer(, sizeof(header));
+if (res != RES_OK) {
+return res;
+}
+
+/* send OK for the header */
+respond(RES_OK);
+
+switch (header.risu_op) {
+case OP_COMPARE:
+case OP_TESTEND:
+case OP_SIGILL:
+return read_buffer(ri, reginfo_size());
+case OP_COMPAREMEM:
+return read_buffer(other_memblock, MEMBLOCKLEN);
+case OP_SETMEMBLOCK:
+case OP_GETMEMBLOCK:
+return RES_OK;
+default:
+/* TODO: Create a better error message. */
+return RES_BAD_IO;
+}
+}
+
 static RisuResult recv_and_compare_register_info(void *uc)
 {
 uint64_t paramreg;
@@ -173,33 +201,26 @@ static RisuResult recv_and_compare_register_info(void *uc)
 RisuOp op;
 
 reginfo_init([APPRENTICE], uc);
-op = get_risuop([APPRENTICE]);
 
-res = read_buffer(, sizeof(header));
+res = recv_register_info([MASTER]);
 if (res != RES_OK) {
+/* I/O error.  Tell master to exit. */
+respond(RES_END);
 return res;
 }
 
+op = get_risuop([APPRENTICE]);
 if (header.risu_op != op) {
 /* We are out of sync.  Tell master to exit. */
 respond(RES_END);
 return RES_BAD_IO;
 }
 
-/* send OK for the header */
-respond(RES_OK);
-
 switch (op) {
 case OP_COMPARE:
 case OP_TESTEND:
 case OP_SIGILL:
-/* Do a simple register compare on (a) explicit request
- * (b) end of test (c) a non-risuop UNDEF
- */
-res = read_buffer([MASTER], reginfo_size());
-if (res != RES_OK) {
-/* fail */
-} else if (!reginfo_is_eq([MASTER], [APPRENTICE])) {
+if (!reginfo_is_eq([MASTER], [APPRENTICE])) {
 /* register mismatch */
 res = RES_MISMATCH_REG;
 } else if (op == OP_TESTEND) {
@@ -216,10 +237,7 @@ static RisuResult recv_and_compare_register_info(void *uc)
 set_ucontext_paramreg(uc, paramreg + (uintptr_t)memblock);
 break;
 case OP_COMPAREMEM:
-res = read_buffer(other_memblock, MEMBLOCKLEN);
-if (res != RES_OK) {
-/* fail */
-} else if (memcmp(memblock, other_memblock, MEMBLOCKLEN) != 0) {
+if (memcmp(memblock, other_memblock, MEMBLOCKLEN) != 0) {
 /* memory mismatch */
 res = RES_MISMATCH_MEM;
 }
-- 
2.34.1




[RISU PATCH v4 12/29] Simplify syncing with master

2022-07-08 Thread Richard Henderson
Do not pass status like RES_BAD_IO from apprentice to master.
This means that when master reports i/o error that we know it
came from master; the apprentice will report its own i/o error.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 reginfo.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/reginfo.c b/reginfo.c
index c37c5df..31bc699 100644
--- a/reginfo.c
+++ b/reginfo.c
@@ -90,10 +90,9 @@ RisuResult recv_and_compare_register_info(void *uc)
 }
 
 if (header.risu_op != op) {
-/* We are out of sync */
-res = RES_BAD_IO;
-respond(res);
-return res;
+/* We are out of sync.  Tell master to exit. */
+respond(RES_END);
+return RES_BAD_IO;
 }
 
 /* send OK for the header */
@@ -115,7 +114,7 @@ RisuResult recv_and_compare_register_info(void *uc)
 } else if (op == OP_TESTEND) {
 res = RES_END;
 }
-respond(res);
+respond(res == RES_OK ? RES_OK : RES_END);
 break;
 case OP_SETMEMBLOCK:
 memblock = (void *)(uintptr_t)get_reginfo_paramreg(_ri);
@@ -133,7 +132,7 @@ RisuResult recv_and_compare_register_info(void *uc)
 /* memory mismatch */
 res = RES_MISMATCH;
 }
-respond(res);
+respond(res == RES_OK ? RES_OK : RES_END);
 break;
 default:
 abort();
-- 
2.34.1




[RISU PATCH v4 13/29] Split RES_MISMATCH for registers and memory

2022-07-08 Thread Richard Henderson
By remembering the specific comparison that failed, we do not
have to try again when it comes time to report.  This makes
the mem_used flag redundant.  Also, packet_mismatch is now
redundant with RES_BAD_IO.

This means that the only thing that report_match_status does
is to report on register status, so rename to report_mismatch_reg.
Also, we know there is a failure, so don't return a status from
the report.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 risu.h| 13 ++---
 reginfo.c | 45 -
 risu.c| 10 +++---
 3 files changed, 21 insertions(+), 47 deletions(-)

diff --git a/risu.h b/risu.h
index f383b64..77d6128 100644
--- a/risu.h
+++ b/risu.h
@@ -54,7 +54,8 @@ typedef enum {
 typedef enum {
 RES_OK = 0,
 RES_END,
-RES_MISMATCH,
+RES_MISMATCH_REG,
+RES_MISMATCH_MEM,
 RES_BAD_IO,
 } RisuResult;
 
@@ -100,13 +101,11 @@ RisuResult send_register_info(void *uc);
  */
 RisuResult recv_and_compare_register_info(void *uc);
 
-/* Print a useful report on the status of the last comparison
- * done in recv_and_compare_register_info(). This is called on
- * exit, so need not restrict itself to signal-safe functions.
- * Should return 0 if it was a good match (ie end of test)
- * and 1 for a mismatch.
+/*
+ * Print a useful report on the status of the last reg comparison
+ * done in recv_and_compare_register_info().
  */
-int report_match_status(void);
+void report_mismatch_reg(void);
 
 /* Interface provided by CPU-specific code: */
 
diff --git a/reginfo.c b/reginfo.c
index 31bc699..a007f16 100644
--- a/reginfo.c
+++ b/reginfo.c
@@ -17,9 +17,6 @@
 static struct reginfo master_ri, apprentice_ri;
 static uint8_t master_memblock[MEMBLOCKLEN];
 
-static int mem_used;
-static int packet_mismatch;
-
 RisuResult send_register_info(void *uc)
 {
 struct reginfo ri;
@@ -107,10 +104,10 @@ RisuResult recv_and_compare_register_info(void *uc)
  */
 res = read_buffer(_ri, reginfo_size());
 if (res != RES_OK) {
-packet_mismatch = 1;
+/* fail */
 } else if (!reginfo_is_eq(_ri, _ri)) {
 /* register mismatch */
-res = RES_MISMATCH;
+res = RES_MISMATCH_REG;
 } else if (op == OP_TESTEND) {
 res = RES_END;
 }
@@ -124,13 +121,12 @@ RisuResult recv_and_compare_register_info(void *uc)
   (uintptr_t)memblock);
 break;
 case OP_COMPAREMEM:
-mem_used = 1;
 res = read_buffer(master_memblock, MEMBLOCKLEN);
 if (res != RES_OK) {
-packet_mismatch = 1;
+/* fail */
 } else if (memcmp(memblock, master_memblock, MEMBLOCKLEN) != 0) {
 /* memory mismatch */
-res = RES_MISMATCH;
+res = RES_MISMATCH_MEM;
 }
 respond(res == RES_OK ? RES_OK : RES_END);
 break;
@@ -141,40 +137,15 @@ RisuResult recv_and_compare_register_info(void *uc)
 return res;
 }
 
-/* Print a useful report on the status of the last comparison
- * done in recv_and_compare_register_info(). This is called on
- * exit, so need not restrict itself to signal-safe functions.
- * Should return 0 if it was a good match (ie end of test)
- * and 1 for a mismatch.
+/*
+ * Print a useful report on the status of the last reg comparison
+ * done in recv_and_compare_register_info().
  */
-int report_match_status(void)
+void report_mismatch_reg(void)
 {
-int resp = 0;
-fprintf(stderr, "match status...\n");
-if (packet_mismatch) {
-fprintf(stderr, "packet mismatch (probably disagreement "
-"about UNDEF on load/store)\n");
-return 1;
-}
-if (!reginfo_is_eq(_ri, _ri)) {
-fprintf(stderr, "mismatch on regs!\n");
-resp = 1;
-}
-if (mem_used
-&& memcmp(memblock, _memblock, MEMBLOCKLEN) != 0) {
-fprintf(stderr, "mismatch on memory!\n");
-resp = 1;
-}
-if (!resp) {
-fprintf(stderr, "match!\n");
-return 0;
-}
-
 fprintf(stderr, "master reginfo:\n");
 reginfo_dump(_ri, stderr);
 fprintf(stderr, "apprentice reginfo:\n");
 reginfo_dump(_ri, stderr);
-
 reginfo_dump_mismatch(_ri, _ri, stderr);
-return resp;
 }
diff --git a/risu.c b/risu.c
index 199f697..d6c2deb 100644
--- a/risu.c
+++ b/risu.c
@@ -224,9 +224,13 @@ static int apprentice(void)
 case RES_END:
 return EXIT_SUCCESS;
 
-case RES_MISMATCH:
-fprintf(stderr, "mismatch after %zd checkpoints\n", signal_count);
-report_match_status();
+case RES_MISMATCH_REG:
+fprintf(stderr, "mismatch reg after %zd checkpoints\n", signal_count);
+report_mismatch_reg();
+return EXIT_FAILURE;
+
+case RES_MISMATCH_MEM:
+fprintf(stderr, "mismatch mem after %zd checkpoints\n", signal_count);
 return EXIT_FAILURE;
 
 case RES_BAD_IO:
-- 
2.34.1




[RISU PATCH v4 09/29] Unify i/o functions and use RisuResult

2022-07-08 Thread Richard Henderson
Push the trace check down from the function calling the reginfo
function down into the i/o function.  This means we don't have
to pass a function pointer.

Return a RisuResult from the i/o functions.  This fixes a minor bug
in send_register_info (even before the conversion to RisuResult),
which returned the write_fn result directly.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 risu.h| 32 +-
 comms.c   |  8 +++---
 reginfo.c | 60 
 risu.c| 82 ++-
 4 files changed, 81 insertions(+), 101 deletions(-)

diff --git a/risu.h b/risu.h
index e6d07eb..c83b803 100644
--- a/risu.h
+++ b/risu.h
@@ -34,13 +34,6 @@ void process_arch_opt(int opt, const char *arg);
 
 #include REGINFO_HEADER(ARCH)
 
-/* Socket related routines */
-int master_connect(int port);
-int apprentice_connect(const char *hostname, int port);
-int send_data_pkt(int sock, void *pkt, int pktlen);
-int recv_data_pkt(int sock, void *pkt, int pktlen);
-void send_response_byte(int sock, int resp);
-
 extern uintptr_t image_start_address;
 extern void *memblock;
 
@@ -80,31 +73,32 @@ typedef struct {
uint32_t risu_op;
 } trace_header_t;
 
+/* Socket related routines */
+int master_connect(int port);
+int apprentice_connect(const char *hostname, int port);
+RisuResult send_data_pkt(int sock, void *pkt, int pktlen);
+RisuResult recv_data_pkt(int sock, void *pkt, int pktlen);
+void send_response_byte(int sock, int resp);
+
 /* Functions operating on reginfo */
 
-/* Function prototypes for read/write helper functions.
- *
- * We pass the helper function to send_register_info and
- * recv_and_compare_register_info which can either be backed by the
- * traditional network socket or a trace file.
- */
-typedef int (*write_fn) (void *ptr, size_t bytes);
-typedef int (*read_fn) (void *ptr, size_t bytes);
-typedef void (*respond_fn) (RisuResult response);
+/* Function prototypes for read/write helper functions. */
+RisuResult write_buffer(void *ptr, size_t bytes);
+RisuResult read_buffer(void *ptr, size_t bytes);
+void respond(RisuResult response);
 
 /*
  * Send the register information from the struct ucontext down the socket.
  * NB: called from a signal handler.
  */
-RisuResult send_register_info(write_fn write_fn, void *uc);
+RisuResult send_register_info(void *uc);
 
 /*
  * Read register info from the socket and compare it with that from the
  * ucontext.
  * NB: called from a signal handler.
  */
-RisuResult recv_and_compare_register_info(read_fn read_fn,
-  respond_fn respond, void *uc);
+RisuResult recv_and_compare_register_info(void *uc);
 
 /* Print a useful report on the status of the last comparison
  * done in recv_and_compare_register_info(). This is called on
diff --git a/comms.c b/comms.c
index 861e845..21968da 100644
--- a/comms.c
+++ b/comms.c
@@ -168,7 +168,7 @@ ssize_t safe_writev(int fd, struct iovec *iov_in, int 
iovcnt)
  * Note that both ends must agree on the length of the
  * block of data.
  */
-int send_data_pkt(int sock, void *pkt, int pktlen)
+RisuResult send_data_pkt(int sock, void *pkt, int pktlen)
 {
 unsigned char resp;
 /* First we send the packet length as a network-order 32 bit value.
@@ -196,7 +196,7 @@ int send_data_pkt(int sock, void *pkt, int pktlen)
 return resp;
 }
 
-int recv_data_pkt(int sock, void *pkt, int pktlen)
+RisuResult recv_data_pkt(int sock, void *pkt, int pktlen)
 {
 uint32_t net_pktlen;
 recv_bytes(sock, _pktlen, sizeof(net_pktlen));
@@ -206,10 +206,10 @@ int recv_data_pkt(int sock, void *pkt, int pktlen)
  * a response back.
  */
 recv_and_discard_bytes(sock, net_pktlen);
-return 1;
+return RES_BAD_IO;
 }
 recv_bytes(sock, pkt, pktlen);
-return 0;
+return RES_OK;
 }
 
 void send_response_byte(int sock, int resp)
diff --git a/reginfo.c b/reginfo.c
index b909a1f..fee025e 100644
--- a/reginfo.c
+++ b/reginfo.c
@@ -21,10 +21,11 @@ uint8_t apprentice_memblock[MEMBLOCKLEN];
 static int mem_used;
 static int packet_mismatch;
 
-RisuResult send_register_info(write_fn write_fn, void *uc)
+RisuResult send_register_info(void *uc)
 {
 struct reginfo ri;
 trace_header_t header;
+RisuResult res;
 RisuOp op;
 
 reginfo_init(, uc);
@@ -33,8 +34,9 @@ RisuResult send_register_info(write_fn write_fn, void *uc)
 /* Write a header with PC/op to keep in sync */
 header.pc = get_pc();
 header.risu_op = op;
-if (write_fn(, sizeof(header)) != 0) {
-return RES_BAD_IO;
+res = write_buffer(, sizeof(header));
+if (res != RES_OK) {
+return res;
 }
 
 switch (op) {
@@ -45,11 +47,12 @@ RisuResult send_register_info(write_fn write_fn, void *uc)
  * Do a simple register compare on (a) explicit request
  * (b) end of test (c) a non-risuop UNDEF
  */
-if (write_fn(, reginfo_size()) != 

[RISU PATCH v4 07/29] Add enum RisuOp

2022-07-08 Thread Richard Henderson
Formalize the set of defines, plus -1, into an enum.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 risu.h | 23 +++
 reginfo.c  | 32 +++-
 risu_aarch64.c |  6 +++---
 risu_arm.c |  6 +++---
 risu_i386.c|  4 ++--
 risu_m68k.c|  4 ++--
 risu_ppc64.c   |  4 ++--
 7 files changed, 46 insertions(+), 33 deletions(-)

diff --git a/risu.h b/risu.h
index e2b4508..a7aa929 100644
--- a/risu.h
+++ b/risu.h
@@ -45,11 +45,17 @@ extern uintptr_t image_start_address;
 extern void *memblock;
 
 /* Ops code under test can request from risu: */
-#define OP_COMPARE 0
-#define OP_TESTEND 1
-#define OP_SETMEMBLOCK 2
-#define OP_GETMEMBLOCK 3
-#define OP_COMPAREMEM 4
+typedef enum {
+/* Any other sigill besides the destignated undefined insn.  */
+OP_SIGILL = -1,
+
+/* These are generated by the designated undefined insn. */
+OP_COMPARE = 0,
+OP_TESTEND = 1,
+OP_SETMEMBLOCK = 2,
+OP_GETMEMBLOCK = 3,
+OP_COMPAREMEM = 4,
+} RisuOp;
 
 /* The memory block should be this long */
 #define MEMBLOCKLEN 8192
@@ -114,10 +120,11 @@ void set_ucontext_paramreg(void *vuc, uint64_t value);
 /* Return the value of the parameter register from a reginfo. */
 uint64_t get_reginfo_paramreg(struct reginfo *ri);
 
-/* Return the risu operation number we have been asked to do,
- * or -1 if this was a SIGILL for a non-risuop insn.
+/*
+ * Return the risu operation number we have been asked to do,
+ * or OP_SIGILL if this was a SIGILL for a non-risuop insn.
  */
-int get_risuop(struct reginfo *ri);
+RisuOp get_risuop(struct reginfo *ri);
 
 /* Return the PC from a reginfo */
 uintptr_t get_pc(struct reginfo *ri);
diff --git a/reginfo.c b/reginfo.c
index 1b2a821..2d67c93 100644
--- a/reginfo.c
+++ b/reginfo.c
@@ -11,7 +11,7 @@
 
 #include 
 #include 
-
+#include 
 #include "risu.h"
 
 struct reginfo master_ri, apprentice_ri;
@@ -25,7 +25,7 @@ int send_register_info(write_fn write_fn, void *uc)
 {
 struct reginfo ri;
 trace_header_t header;
-int op;
+RisuOp op;
 
 reginfo_init(, uc);
 op = get_risuop();
@@ -38,11 +38,18 @@ int send_register_info(write_fn write_fn, void *uc)
 }
 
 switch (op) {
+case OP_COMPARE:
 case OP_TESTEND:
-write_fn(, reginfo_size());
-/* if we are tracing write_fn will return 0 unlike a remote
-   end, hence we force return of 1 here */
-return 1;
+case OP_SIGILL:
+/*
+ * Do a simple register compare on (a) explicit request
+ * (b) end of test (c) a non-risuop UNDEF
+ */
+if (write_fn(, reginfo_size()) != 0) {
+return -1;
+}
+/* For OP_TEST_END, force return 1 to exit. */
+return op == OP_TESTEND;
 case OP_SETMEMBLOCK:
 memblock = (void *)(uintptr_t)get_reginfo_paramreg();
 break;
@@ -53,12 +60,8 @@ int send_register_info(write_fn write_fn, void *uc)
 case OP_COMPAREMEM:
 return write_fn(memblock, MEMBLOCKLEN);
 break;
-case OP_COMPARE:
 default:
-/* Do a simple register compare on (a) explicit request
- * (b) end of test (c) a non-risuop UNDEF
- */
-return write_fn(, reginfo_size());
+abort();
 }
 return 0;
 }
@@ -74,8 +77,9 @@ int send_register_info(write_fn write_fn, void *uc)
 int recv_and_compare_register_info(read_fn read_fn,
respond_fn resp_fn, void *uc)
 {
-int resp = 0, op;
+int resp = 0;
 trace_header_t header;
+RisuOp op;
 
 reginfo_init(_ri, uc);
 op = get_risuop(_ri);
@@ -97,7 +101,7 @@ int recv_and_compare_register_info(read_fn read_fn,
 switch (op) {
 case OP_COMPARE:
 case OP_TESTEND:
-default:
+case OP_SIGILL:
 /* Do a simple register compare on (a) explicit request
  * (b) end of test (c) a non-risuop UNDEF
  */
@@ -130,6 +134,8 @@ int recv_and_compare_register_info(read_fn read_fn,
 }
 resp_fn(resp);
 break;
+default:
+abort();
 }
 
 return resp;
diff --git a/risu_aarch64.c b/risu_aarch64.c
index 492d141..f8a8412 100644
--- a/risu_aarch64.c
+++ b/risu_aarch64.c
@@ -29,16 +29,16 @@ uint64_t get_reginfo_paramreg(struct reginfo *ri)
 return ri->regs[0];
 }
 
-int get_risuop(struct reginfo *ri)
+RisuOp get_risuop(struct reginfo *ri)
 {
 /* Return the risuop we have been asked to do
- * (or -1 if this was a SIGILL for a non-risuop insn)
+ * (or OP_SIGILL if this was a SIGILL for a non-risuop insn)
  */
 uint32_t insn = ri->faulting_insn;
 uint32_t op = insn & 0xf;
 uint32_t key = insn & ~0xf;
 uint32_t risukey = 0x5af0;
-return (key != risukey) ? -1 : op;
+return (key != risukey) ? OP_SIGILL : op;
 }
 
 uintptr_t get_pc(struct reginfo *ri)
diff --git a/risu_arm.c b/risu_arm.c
index 5fcb2a5..a20bf73 100644
--- a/risu_arm.c
+++ b/risu_arm.c
@@ -56,17 +56,17 @@ 

[RISU PATCH v4 10/29] Pass non-OK result back through siglongjmp

2022-07-08 Thread Richard Henderson
Rather than doing some work in the signal handler and
some work outside, move all of the non-resume work outside.
This works because we arranged for RES_OK to be 0, which
is the normal return from sigsetjmp.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 risu.c | 50 --
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/risu.c b/risu.c
index 1917311..f238117 100644
--- a/risu.c
+++ b/risu.c
@@ -107,15 +107,10 @@ static void master_sigill(int sig, siginfo_t *si, void 
*uc)
 } else {
 r = recv_and_compare_register_info(uc);
 }
-
-switch (r) {
-case RES_OK:
-/* match OK */
+if (r == RES_OK) {
 advance_pc(uc);
-return;
-default:
-/* mismatch, or end of test */
-siglongjmp(jmpbuf, 1);
+} else {
+siglongjmp(jmpbuf, r);
 }
 }
 
@@ -129,21 +124,10 @@ static void apprentice_sigill(int sig, siginfo_t *si, 
void *uc)
 } else {
 r = send_register_info(uc);
 }
-
-switch (r) {
-case RES_OK:
-/* match OK */
+if (r == RES_OK) {
 advance_pc(uc);
-return;
-case RES_END:
-/* end of test */
-exit(EXIT_SUCCESS);
-default:
-/* mismatch */
-if (trace) {
-siglongjmp(jmpbuf, 1);
-}
-exit(EXIT_FAILURE);
+} else {
+siglongjmp(jmpbuf, r);
 }
 }
 
@@ -200,7 +184,9 @@ static void load_image(const char *imgfile)
 
 static int master(void)
 {
-if (sigsetjmp(jmpbuf, 1)) {
+RisuResult res = sigsetjmp(jmpbuf, 1);
+
+if (res != RES_OK) {
 #ifdef HAVE_ZLIB
 if (trace && comm_fd != STDOUT_FILENO) {
 gzclose(gz_trace_file);
@@ -226,15 +212,27 @@ static int master(void)
 
 static int apprentice(void)
 {
-if (sigsetjmp(jmpbuf, 1)) {
+RisuResult res = sigsetjmp(jmpbuf, 1);
+
+if (res != RES_OK) {
 #ifdef HAVE_ZLIB
 if (trace && comm_fd != STDIN_FILENO) {
 gzclose(gz_trace_file);
 }
 #endif
 close(comm_fd);
-fprintf(stderr, "finished early after %zd checkpoints\n", 
signal_count);
-return report_match_status(true);
+
+switch (res) {
+case RES_END:
+return EXIT_SUCCESS;
+default:
+if (!trace) {
+return EXIT_FAILURE;
+}
+fprintf(stderr, "finished early after %zd checkpoints\n",
+signal_count);
+return report_match_status(true);
+}
 }
 set_sigill_handler(_sigill);
 fprintf(stderr, "starting apprentice image at 0x%"PRIxPTR"\n",
-- 
2.34.1




[RISU PATCH v4 02/29] Unify master_fd and apprentice_fd to comm_fd

2022-07-08 Thread Richard Henderson
Any one invocation cannot be both master and apprentice.
Let's use only one variable for the file descriptor.

Reviewed-by: Alex Bennée 
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 risu.c | 40 
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/risu.c b/risu.c
index 79b1092..059348f 100644
--- a/risu.c
+++ b/risu.c
@@ -30,7 +30,7 @@
 
 void *memblock;
 
-int apprentice_fd, master_fd;
+static int comm_fd;
 bool trace;
 size_t signal_count;
 
@@ -50,7 +50,7 @@ sigjmp_buf jmpbuf;
 
 int read_sock(void *ptr, size_t bytes)
 {
-return recv_data_pkt(master_fd, ptr, bytes);
+return recv_data_pkt(comm_fd, ptr, bytes);
 }
 
 int write_trace(void *ptr, size_t bytes)
@@ -58,9 +58,9 @@ int write_trace(void *ptr, size_t bytes)
 size_t res;
 
 #ifdef HAVE_ZLIB
-if (master_fd == STDOUT_FILENO) {
+if (comm_fd == STDOUT_FILENO) {
 #endif
-res = write(master_fd, ptr, bytes);
+res = write(comm_fd, ptr, bytes);
 #ifdef HAVE_ZLIB
 } else {
 res = gzwrite(gz_trace_file, ptr, bytes);
@@ -71,14 +71,14 @@ int write_trace(void *ptr, size_t bytes)
 
 void respond_sock(int r)
 {
-send_response_byte(master_fd, r);
+send_response_byte(comm_fd, r);
 }
 
 /* Apprentice function */
 
 int write_sock(void *ptr, size_t bytes)
 {
-return send_data_pkt(apprentice_fd, ptr, bytes);
+return send_data_pkt(comm_fd, ptr, bytes);
 }
 
 int read_trace(void *ptr, size_t bytes)
@@ -86,9 +86,9 @@ int read_trace(void *ptr, size_t bytes)
 size_t res;
 
 #ifdef HAVE_ZLIB
-if (apprentice_fd == STDIN_FILENO) {
+if (comm_fd == STDIN_FILENO) {
 #endif
-res = read(apprentice_fd, ptr, bytes);
+res = read(comm_fd, ptr, bytes);
 #ifdef HAVE_ZLIB
 } else {
 res = gzread(gz_trace_file, ptr, bytes);
@@ -218,11 +218,11 @@ int master(void)
 {
 if (sigsetjmp(jmpbuf, 1)) {
 #ifdef HAVE_ZLIB
-if (trace && master_fd != STDOUT_FILENO) {
+if (trace && comm_fd != STDOUT_FILENO) {
 gzclose(gz_trace_file);
 }
 #endif
-close(master_fd);
+close(comm_fd);
 if (trace) {
 fprintf(stderr, "trace complete after %zd checkpoints\n",
 signal_count);
@@ -244,11 +244,11 @@ int apprentice(void)
 {
 if (sigsetjmp(jmpbuf, 1)) {
 #ifdef HAVE_ZLIB
-if (trace && apprentice_fd != STDIN_FILENO) {
+if (trace && comm_fd != STDIN_FILENO) {
 gzclose(gz_trace_file);
 }
 #endif
-close(apprentice_fd);
+close(comm_fd);
 fprintf(stderr, "finished early after %zd checkpoints\n", 
signal_count);
 return report_match_status(true);
 }
@@ -375,31 +375,31 @@ int main(int argc, char **argv)
 if (ismaster) {
 if (trace) {
 if (strcmp(trace_fn, "-") == 0) {
-master_fd = STDOUT_FILENO;
+comm_fd = STDOUT_FILENO;
 } else {
-master_fd = open(trace_fn, O_WRONLY | O_CREAT, S_IRWXU);
+comm_fd = open(trace_fn, O_WRONLY | O_CREAT, S_IRWXU);
 #ifdef HAVE_ZLIB
-gz_trace_file = gzdopen(master_fd, "wb9");
+gz_trace_file = gzdopen(comm_fd, "wb9");
 #endif
 }
 } else {
 fprintf(stderr, "master port %d\n", port);
-master_fd = master_connect(port);
+comm_fd = master_connect(port);
 }
 return master();
 } else {
 if (trace) {
 if (strcmp(trace_fn, "-") == 0) {
-apprentice_fd = STDIN_FILENO;
+comm_fd = STDIN_FILENO;
 } else {
-apprentice_fd = open(trace_fn, O_RDONLY);
+comm_fd = open(trace_fn, O_RDONLY);
 #ifdef HAVE_ZLIB
-gz_trace_file = gzdopen(apprentice_fd, "rb");
+gz_trace_file = gzdopen(comm_fd, "rb");
 #endif
 }
 } else {
 fprintf(stderr, "apprentice host %s port %d\n", hostname, port);
-apprentice_fd = apprentice_connect(hostname, port);
+comm_fd = apprentice_connect(hostname, port);
 }
 return apprentice();
 }
-- 
2.34.1




[RISU PATCH v4 04/29] Adjust tracefile open for write

2022-07-08 Thread Richard Henderson
Truncate the new output file.  Rely on umask to remove
group+other file permissions, if desired.

Reviewed-by: Alex Bennée 
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 risu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/risu.c b/risu.c
index 2f6a677..819b786 100644
--- a/risu.c
+++ b/risu.c
@@ -368,7 +368,7 @@ int main(int argc, char **argv)
 comm_fd = ismaster ? STDOUT_FILENO : STDIN_FILENO;
 } else {
 if (ismaster) {
-comm_fd = open(trace_fn, O_WRONLY | O_CREAT, S_IRWXU);
+comm_fd = open(trace_fn, O_WRONLY | O_CREAT | O_TRUNC, 0666);
 } else {
 comm_fd = open(trace_fn, O_RDONLY);
 }
-- 
2.34.1




[RISU PATCH v4 08/29] Add enum RisuResult

2022-07-08 Thread Richard Henderson
Formalize the random set of numbers into an enum.  Doing this
makes it easy to see that one of the responses in
recv_and_compare_register_info was inconsistent.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 risu.h| 25 +
 reginfo.c | 32 
 risu.c| 18 +-
 3 files changed, 42 insertions(+), 33 deletions(-)

diff --git a/risu.h b/risu.h
index a7aa929..e6d07eb 100644
--- a/risu.h
+++ b/risu.h
@@ -57,6 +57,14 @@ typedef enum {
 OP_COMPAREMEM = 4,
 } RisuOp;
 
+/* Result of operation */
+typedef enum {
+RES_OK = 0,
+RES_END,
+RES_MISMATCH,
+RES_BAD_IO,
+} RisuResult;
+
 /* The memory block should be this long */
 #define MEMBLOCKLEN 8192
 
@@ -82,20 +90,21 @@ typedef struct {
  */
 typedef int (*write_fn) (void *ptr, size_t bytes);
 typedef int (*read_fn) (void *ptr, size_t bytes);
-typedef void (*respond_fn) (int response);
+typedef void (*respond_fn) (RisuResult response);
 
-/* Send the register information from the struct ucontext down the socket.
- * Return the response code from the master.
+/*
+ * Send the register information from the struct ucontext down the socket.
  * NB: called from a signal handler.
  */
-int send_register_info(write_fn write_fn, void *uc);
+RisuResult send_register_info(write_fn write_fn, void *uc);
 
-/* Read register info from the socket and compare it with that from the
- * ucontext. Return 0 for match, 1 for end-of-test, 2 for mismatch.
+/*
+ * Read register info from the socket and compare it with that from the
+ * ucontext.
  * NB: called from a signal handler.
  */
-int recv_and_compare_register_info(read_fn read_fn,
-   respond_fn respond, void *uc);
+RisuResult recv_and_compare_register_info(read_fn read_fn,
+  respond_fn respond, void *uc);
 
 /* Print a useful report on the status of the last comparison
  * done in recv_and_compare_register_info(). This is called on
diff --git a/reginfo.c b/reginfo.c
index 2d67c93..b909a1f 100644
--- a/reginfo.c
+++ b/reginfo.c
@@ -21,7 +21,7 @@ uint8_t apprentice_memblock[MEMBLOCKLEN];
 static int mem_used;
 static int packet_mismatch;
 
-int send_register_info(write_fn write_fn, void *uc)
+RisuResult send_register_info(write_fn write_fn, void *uc)
 {
 struct reginfo ri;
 trace_header_t header;
@@ -34,7 +34,7 @@ int send_register_info(write_fn write_fn, void *uc)
 header.pc = get_pc();
 header.risu_op = op;
 if (write_fn(, sizeof(header)) != 0) {
-return -1;
+return RES_BAD_IO;
 }
 
 switch (op) {
@@ -46,10 +46,10 @@ int send_register_info(write_fn write_fn, void *uc)
  * (b) end of test (c) a non-risuop UNDEF
  */
 if (write_fn(, reginfo_size()) != 0) {
-return -1;
+return RES_BAD_IO;
 }
 /* For OP_TEST_END, force return 1 to exit. */
-return op == OP_TESTEND;
+return op == OP_TESTEND ? RES_END : RES_OK;
 case OP_SETMEMBLOCK:
 memblock = (void *)(uintptr_t)get_reginfo_paramreg();
 break;
@@ -63,7 +63,7 @@ int send_register_info(write_fn write_fn, void *uc)
 default:
 abort();
 }
-return 0;
+return RES_OK;
 }
 
 /* Read register info from the socket and compare it with that from the
@@ -74,10 +74,10 @@ int send_register_info(write_fn write_fn, void *uc)
  * that says whether it is register or memory data, so if the two
  * sides get out of sync then we will fail obscurely.
  */
-int recv_and_compare_register_info(read_fn read_fn,
-   respond_fn resp_fn, void *uc)
+RisuResult recv_and_compare_register_info(read_fn read_fn,
+  respond_fn resp_fn, void *uc)
 {
-int resp = 0;
+RisuResult resp = RES_OK;
 trace_header_t header;
 RisuOp op;
 
@@ -85,18 +85,18 @@ int recv_and_compare_register_info(read_fn read_fn,
 op = get_risuop(_ri);
 
 if (read_fn(, sizeof(header)) != 0) {
-return -1;
+return RES_BAD_IO;
 }
 
 if (header.risu_op != op) {
 /* We are out of sync */
-resp = 2;
+resp = RES_BAD_IO;
 resp_fn(resp);
 return resp;
 }
 
 /* send OK for the header */
-resp_fn(0);
+resp_fn(RES_OK);
 
 switch (op) {
 case OP_COMPARE:
@@ -107,12 +107,12 @@ int recv_and_compare_register_info(read_fn read_fn,
  */
 if (read_fn(_ri, reginfo_size())) {
 packet_mismatch = 1;
-resp = 2;
+resp = RES_BAD_IO;
 } else if (!reginfo_is_eq(_ri, _ri)) {
 /* register mismatch */
-resp = 2;
+resp = RES_MISMATCH;
 } else if (op == OP_TESTEND) {
-resp = 1;
+resp = RES_END;
 }
 resp_fn(resp);
 break;
@@ -127,10 +127,10 @@ int recv_and_compare_register_info(read_fn read_fn,
 mem_used 

[RISU PATCH v4 06/29] Make some risu.c symbols static

2022-07-08 Thread Richard Henderson
These are unused in other translation units.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
 risu.c | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/risu.c b/risu.c
index 26dc116..ab17c71 100644
--- a/risu.c
+++ b/risu.c
@@ -31,18 +31,18 @@
 void *memblock;
 
 static int comm_fd;
-bool trace;
-size_t signal_count;
+static bool trace;
+static size_t signal_count;
 
 #ifdef HAVE_ZLIB
 #include 
-gzFile gz_trace_file;
+static gzFile gz_trace_file;
 #define TRACE_TYPE "compressed"
 #else
 #define TRACE_TYPE "uncompressed"
 #endif
 
-sigjmp_buf jmpbuf;
+static sigjmp_buf jmpbuf;
 
 #define ARRAY_SIZE(x)  (sizeof(x) / sizeof((x)[0]))
 
@@ -113,7 +113,7 @@ void respond_trace(int r)
 }
 }
 
-void master_sigill(int sig, siginfo_t *si, void *uc)
+static void master_sigill(int sig, siginfo_t *si, void *uc)
 {
 int r;
 signal_count++;
@@ -135,7 +135,7 @@ void master_sigill(int sig, siginfo_t *si, void *uc)
 }
 }
 
-void apprentice_sigill(int sig, siginfo_t *si, void *uc)
+static void apprentice_sigill(int sig, siginfo_t *si, void *uc)
 {
 int r;
 signal_count++;
@@ -180,9 +180,9 @@ static void set_sigill_handler(void (*fn) (int, siginfo_t 
*, void *))
 typedef void entrypoint_fn(void);
 
 uintptr_t image_start_address;
-entrypoint_fn *image_start;
+static entrypoint_fn *image_start;
 
-void load_image(const char *imgfile)
+static void load_image(const char *imgfile)
 {
 /* Load image file into memory as executable */
 struct stat st;
@@ -214,7 +214,7 @@ void load_image(const char *imgfile)
 image_start_address = (uintptr_t) addr;
 }
 
-int master(void)
+static int master(void)
 {
 if (sigsetjmp(jmpbuf, 1)) {
 #ifdef HAVE_ZLIB
@@ -240,7 +240,7 @@ int master(void)
 return EXIT_FAILURE;
 }
 
-int apprentice(void)
+static int apprentice(void)
 {
 if (sigsetjmp(jmpbuf, 1)) {
 #ifdef HAVE_ZLIB
@@ -261,9 +261,9 @@ int apprentice(void)
 return EXIT_FAILURE;
 }
 
-int ismaster;
+static int ismaster;
 
-void usage(void)
+static void usage(void)
 {
 fprintf(stderr,
 "Usage: risu [--master] [--host ] [--port ] "
-- 
2.34.1




[PATCH v4 11/12] tests/qemu-iotests: skip 108 when FUSE is not loaded

2022-07-08 Thread John Snow
Do not merge: Staged in Hanna's branch.

Signed-off-by: John Snow 
---
 tests/qemu-iotests/108 | 5 +
 1 file changed, 5 insertions(+)

diff --git a/tests/qemu-iotests/108 b/tests/qemu-iotests/108
index 9e923d6a59f..54e935acf28 100755
--- a/tests/qemu-iotests/108
+++ b/tests/qemu-iotests/108
@@ -60,6 +60,11 @@ if sudo -n losetup &>/dev/null; then
 else
 loopdev=false
 
+# Check for usable FUSE in the host environment:
+if test ! -c "/dev/fuse"; then
+_notrun 'No passwordless sudo nor usable /dev/fuse'
+fi
+
 # QSD --export fuse will either yield "Parameter 'id' is missing"
 # or "Invalid parameter 'fuse'", depending on whether there is
 # FUSE support or not.
-- 
2.34.3




[RISU PATCH v4 03/29] Hoist trace file and socket opening

2022-07-08 Thread Richard Henderson
We will want to share this code with --dump.

Reviewed-by: Alex Bennée 
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
v3: Hoist socket connecting as well as trace file opening.
---
 risu.c | 49 +++--
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/risu.c b/risu.c
index 059348f..2f6a677 100644
--- a/risu.c
+++ b/risu.c
@@ -363,6 +363,29 @@ int main(int argc, char **argv)
 }
 }
 
+if (trace) {
+if (strcmp(trace_fn, "-") == 0) {
+comm_fd = ismaster ? STDOUT_FILENO : STDIN_FILENO;
+} else {
+if (ismaster) {
+comm_fd = open(trace_fn, O_WRONLY | O_CREAT, S_IRWXU);
+} else {
+comm_fd = open(trace_fn, O_RDONLY);
+}
+#ifdef HAVE_ZLIB
+gz_trace_file = gzdopen(comm_fd, ismaster ? "wb9" : "rb");
+#endif
+}
+} else {
+if (ismaster) {
+fprintf(stderr, "master port %d\n", port);
+comm_fd = master_connect(port);
+} else {
+fprintf(stderr, "apprentice host %s port %d\n", hostname, port);
+comm_fd = apprentice_connect(hostname, port);
+}
+}
+
 imgfile = argv[optind];
 if (!imgfile) {
 fprintf(stderr, "Error: must specify image file name\n\n");
@@ -373,34 +396,8 @@ int main(int argc, char **argv)
 load_image(imgfile);
 
 if (ismaster) {
-if (trace) {
-if (strcmp(trace_fn, "-") == 0) {
-comm_fd = STDOUT_FILENO;
-} else {
-comm_fd = open(trace_fn, O_WRONLY | O_CREAT, S_IRWXU);
-#ifdef HAVE_ZLIB
-gz_trace_file = gzdopen(comm_fd, "wb9");
-#endif
-}
-} else {
-fprintf(stderr, "master port %d\n", port);
-comm_fd = master_connect(port);
-}
 return master();
 } else {
-if (trace) {
-if (strcmp(trace_fn, "-") == 0) {
-comm_fd = STDIN_FILENO;
-} else {
-comm_fd = open(trace_fn, O_RDONLY);
-#ifdef HAVE_ZLIB
-gz_trace_file = gzdopen(comm_fd, "rb");
-#endif
-}
-} else {
-fprintf(stderr, "apprentice host %s port %d\n", hostname, port);
-comm_fd = apprentice_connect(hostname, port);
-}
 return apprentice();
 }
 }
-- 
2.34.1




[PATCH v4 03/12] tests/vm: switch CentOS 8 to CentOS 8 Stream

2022-07-08 Thread John Snow
The old CentOS image didn't work anymore because it was already EOL at
the beginning of 2022.

Signed-off-by: John Snow 
Reviewed-by: Thomas Huth 
Reviewed-by: Daniel P. Berrangé 
---
 tests/vm/centos | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/vm/centos b/tests/vm/centos
index be4f6ff2f14..3a527c47b3d 100755
--- a/tests/vm/centos
+++ b/tests/vm/centos
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 #
-# CentOS image
+# CentOS 8 Stream image
 #
-# Copyright 2018 Red Hat Inc.
+# Copyright 2018, 2022 Red Hat Inc.
 #
 # Authors:
 #  Fam Zheng 
@@ -32,7 +32,7 @@ class CentosVM(basevm.BaseVM):
 """
 
 def build_image(self, img):
-cimg = 
self._download_with_cache("https://cloud.centos.org/centos/8/x86_64/images/CentOS-8-GenericCloud-8.3.2011-20201204.2.x86_64.qcow2;)
+cimg = 
self._download_with_cache("https://cloud.centos.org/centos/8-stream/x86_64/images/CentOS-Stream-GenericCloud-8-20220125.1.x86_64.qcow2;)
 img_tmp = img + ".tmp"
 subprocess.check_call(['cp', '-f', cimg, img_tmp])
 self.exec_qemu_img("resize", img_tmp, "50G")
-- 
2.34.3




[PATCH v4 06/12] tests/vm: remove ubuntu.i386 VM test

2022-07-08 Thread John Snow
Ubuntu 18.04 is out of our support window, and Ubuntu 20.04 does not
support i386 anymore. The debian project does, but they do not provide
any cloud images for it, a new expect-style script would have to be
written.

Since we have i386 cross-compiler tests hosted on GitLab CI, we don't
need to support this VM test anymore.

Signed-off-by: John Snow 
Reviewed-by: Thomas Huth 
Reviewed-by: Daniel P. Berrangé 
---
 tests/vm/Makefile.include |  3 +--
 tests/vm/ubuntu.i386  | 40 ---
 2 files changed, 1 insertion(+), 42 deletions(-)
 delete mode 100755 tests/vm/ubuntu.i386

diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index 5f5b1fbfe68..a94f0ebf7f1 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -17,7 +17,7 @@ EFI_AARCH64 = $(wildcard 
$(BUILD_DIR)/pc-bios/edk2-aarch64-code.fd)
 
 X86_IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64
 ifneq ($(GENISOIMAGE),)
-X86_IMAGES += ubuntu.i386 centos
+X86_IMAGES += centos
 ifneq ($(EFI_AARCH64),)
 ARM64_IMAGES += ubuntu.aarch64 centos.aarch64
 endif
@@ -48,7 +48,6 @@ vm-help vm-test:
@echo "  vm-build-fedora - Build QEMU in Fedora VM"
 ifneq ($(GENISOIMAGE),)
@echo "  vm-build-centos - Build QEMU in CentOS VM, 
with Docker"
-   @echo "  vm-build-ubuntu.i386- Build QEMU in ubuntu i386 VM"
 ifneq ($(EFI_AARCH64),)
@echo "  vm-build-ubuntu.aarch64 - Build QEMU in ubuntu aarch64 
VM"
@echo "  vm-build-centos.aarch64 - Build QEMU in CentOS aarch64 
VM"
diff --git a/tests/vm/ubuntu.i386 b/tests/vm/ubuntu.i386
deleted file mode 100755
index 47681b6f87d..000
--- a/tests/vm/ubuntu.i386
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env python3
-#
-# Ubuntu i386 image
-#
-# Copyright 2017 Red Hat Inc.
-#
-# Authors:
-#  Fam Zheng 
-#
-# This code is licensed under the GPL version 2 or later.  See
-# the COPYING file in the top-level directory.
-#
-
-import sys
-import basevm
-import ubuntuvm
-
-DEFAULT_CONFIG = {
-'install_cmds' : "apt-get update,"\
- "apt-get build-dep -y qemu,"\
- "apt-get install -y libfdt-dev language-pack-en 
ninja-build",
-}
-
-class UbuntuX86VM(ubuntuvm.UbuntuVM):
-name = "ubuntu.i386"
-arch = "i386"
-image_link="https://cloud-images.ubuntu.com/releases/bionic/"\
-   "release-20191114/ubuntu-18.04-server-cloudimg-i386.img"
-
image_sha256="28969840626d1ea80bb249c08eef1a4533e8904aa51a327b40f37ac4b4ff04ef"
-BUILD_SCRIPT = """
-set -e;
-cd $(mktemp -d);
-sudo chmod a+r /dev/vdb;
-tar -xf /dev/vdb;
-./configure {configure_opts};
-make --output-sync {target} -j{jobs} {verbose};
-"""
-
-if __name__ == "__main__":
-sys.exit(basevm.main(UbuntuX86VM, DEFAULT_CONFIG))
-- 
2.34.3




[PATCH v4 01/12] qga: treat get-guest-fsinfo as "best effort"

2022-07-08 Thread John Snow
In some container environments, there may be references to block devices
witnessable from a container through /proc/self/mountinfo that reference
devices we simply don't have access to in the container, and cannot
provide information about.

Instead of failing the entire fsinfo command, return stub information
for these failed lookups.

This allows test-qga to pass under docker tests, which are in turn used
by the CentOS VM tests.

Signed-off-by: John Snow 
---
 qga/commands-posix.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index 0469dc409d4..355de050a1c 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -1207,7 +1207,15 @@ static void build_guest_fsinfo_for_device(char const 
*devpath,
 
 syspath = realpath(devpath, NULL);
 if (!syspath) {
-error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
+if (errno != ENOENT) {
+error_setg_errno(errp, errno, "realpath(\"%s\")", devpath);
+return;
+}
+
+/* ENOENT: This devpath may not exist because of container config */
+if (!fs->name) {
+fs->name = g_path_get_basename(devpath);
+}
 return;
 }
 
-- 
2.34.3




[PATCH v6 45/45] linux-user/aarch64: Add SME related hwcap entries

2022-07-08 Thread Richard Henderson
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/elfload.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 1de77c7959..ce902dbd56 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -605,6 +605,18 @@ enum {
 ARM_HWCAP2_A64_RNG  = 1 << 16,
 ARM_HWCAP2_A64_BTI  = 1 << 17,
 ARM_HWCAP2_A64_MTE  = 1 << 18,
+ARM_HWCAP2_A64_ECV  = 1 << 19,
+ARM_HWCAP2_A64_AFP  = 1 << 20,
+ARM_HWCAP2_A64_RPRES= 1 << 21,
+ARM_HWCAP2_A64_MTE3 = 1 << 22,
+ARM_HWCAP2_A64_SME  = 1 << 23,
+ARM_HWCAP2_A64_SME_I16I64   = 1 << 24,
+ARM_HWCAP2_A64_SME_F64F64   = 1 << 25,
+ARM_HWCAP2_A64_SME_I8I32= 1 << 26,
+ARM_HWCAP2_A64_SME_F16F32   = 1 << 27,
+ARM_HWCAP2_A64_SME_B16F32   = 1 << 28,
+ARM_HWCAP2_A64_SME_F32F32   = 1 << 29,
+ARM_HWCAP2_A64_SME_FA64 = 1 << 30,
 };
 
 #define ELF_HWCAP   get_elf_hwcap()
@@ -674,6 +686,14 @@ static uint32_t get_elf_hwcap2(void)
 GET_FEATURE_ID(aa64_rndr, ARM_HWCAP2_A64_RNG);
 GET_FEATURE_ID(aa64_bti, ARM_HWCAP2_A64_BTI);
 GET_FEATURE_ID(aa64_mte, ARM_HWCAP2_A64_MTE);
+GET_FEATURE_ID(aa64_sme, (ARM_HWCAP2_A64_SME |
+  ARM_HWCAP2_A64_SME_F32F32 |
+  ARM_HWCAP2_A64_SME_B16F32 |
+  ARM_HWCAP2_A64_SME_F16F32 |
+  ARM_HWCAP2_A64_SME_I8I32));
+GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
+GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
+GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
 
 return hwcaps;
 }
-- 
2.34.1




[PATCH v4 02/12] tests/vm: use 'cp' instead of 'ln' for temporary vm images

2022-07-08 Thread John Snow
If the initial setup fails, you've permanently altered the state of the
downloaded image in an unknowable way. Use 'cp' like our other test
setup scripts do.

Signed-off-by: John Snow 
Reviewed-by: Thomas Huth 
Reviewed-by: Daniel P. Berrangé 
---
 tests/vm/centos | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/vm/centos b/tests/vm/centos
index 5c7bc1c1a9a..be4f6ff2f14 100755
--- a/tests/vm/centos
+++ b/tests/vm/centos
@@ -34,7 +34,7 @@ class CentosVM(basevm.BaseVM):
 def build_image(self, img):
 cimg = 
self._download_with_cache("https://cloud.centos.org/centos/8/x86_64/images/CentOS-8-GenericCloud-8.3.2011-20201204.2.x86_64.qcow2;)
 img_tmp = img + ".tmp"
-subprocess.check_call(["ln", "-f", cimg, img_tmp])
+subprocess.check_call(['cp', '-f', cimg, img_tmp])
 self.exec_qemu_img("resize", img_tmp, "50G")
 self.boot(img_tmp, extra_args = ["-cdrom", self.gen_cloud_init_iso()])
 self.wait_ssh()
-- 
2.34.3




[RISU PATCH v4 26/29] ppc64: Clean up reginfo handling

2022-07-08 Thread Richard Henderson
Several of the gp_reg[] elements are not relevant -- e.g. orig r3,
which is related to system calls.  Omit those from the original
reginfo_init(), so that any differences are automatically hidden.

Do not only compare bit 4 of CCR -- this register is 32 bits wide
with 8 cr subfields.  We should compare all of them.

Tidy reginfo_dump() output.  Especially, do not dump the non-
relevant fields.

Signed-off-by: Richard Henderson 
---
 risu_reginfo_ppc64.c | 114 +--
 1 file changed, 44 insertions(+), 70 deletions(-)

diff --git a/risu_reginfo_ppc64.c b/risu_reginfo_ppc64.c
index 134a152..eeb0460 100644
--- a/risu_reginfo_ppc64.c
+++ b/risu_reginfo_ppc64.c
@@ -21,19 +21,30 @@
 #include "risu.h"
 #include "risu_reginfo_ppc64.h"
 
-#define XER 37
-#define CCR 38
+/* Names for indexes within gregset_t, ignoring those irrelevant here */
+enum {
+NIP = 32,
+MSR = 33,
+CTR = 35,
+LNK = 36,
+XER = 37,
+CCR = 38,
+};
 
 const struct option * const arch_long_opts;
 const char * const arch_extra_help;
 
 static const char * const greg_names[NGREG] = {
-"r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
-"r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",
-   "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
-   "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
-   [XER] = "xer",
-   [CCR] = "ccr",
+ "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+ "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+[NIP] = "nip",
+[MSR] = "msr",
+[CTR] = "ctr",
+[LNK] = "lnk",
+[XER] = "xer",
+[CCR] = "ccr",
 };
 
 void process_arch_opt(int opt, const char *arg)
@@ -61,8 +72,13 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 ri->nip = uc->uc_mcontext.regs->nip - image_start_address;
 
 for (i = 0; i < NGREG; i++) {
-ri->gregs[i] = uc->uc_mcontext.gp_regs[i];
+/* Do not copy gp_reg entries not relevant to the context. */
+if (greg_names[i]) {
+ri->gregs[i] = uc->uc_mcontext.gp_regs[i];
+}
 }
+ri->gregs[1] = 0xdeadbeef;   /* sp */
+ri->gregs[13] = 0xdeadbeef;  /* gp */
 
 memcpy(ri->fpregs, uc->uc_mcontext.fp_regs, 32 * sizeof(double));
 ri->fpscr = uc->uc_mcontext.fp_regs[32];
@@ -76,79 +92,37 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
 /* reginfo_is_eq: compare the reginfo structs, returns nonzero if equal */
 int reginfo_is_eq(struct reginfo *m, struct reginfo *a)
 {
-int i;
-for (i = 0; i < 32; i++) {
-if (i == 1 || i == 13) {
-continue;
-}
-
-if (m->gregs[i] != a->gregs[i]) {
-return 0;
-}
-}
-
-if (m->gregs[XER] != a->gregs[XER]) {
-return 0;
-}
-
-if ((m->gregs[CCR] & 0x10) != (a->gregs[CCR] & 0x10)) {
-return 0;
-}
-
-for (i = 0; i < 32; i++) {
-if (m->fpregs[i] != a->fpregs[i]) {
-return 0;
-}
-}
-
-for (i = 0; i < 32; i++) {
-if (m->vrregs.vrregs[i][0] != a->vrregs.vrregs[i][0] ||
-m->vrregs.vrregs[i][1] != a->vrregs.vrregs[i][1] ||
-m->vrregs.vrregs[i][2] != a->vrregs.vrregs[i][2] ||
-m->vrregs.vrregs[i][3] != a->vrregs.vrregs[i][3]) {
-return 0;
-}
-}
-return 1;
+return memcmp(m, a, sizeof(*m)) == 0;
 }
 
 /* reginfo_dump: print state to a stream */
 void reginfo_dump(struct reginfo *ri, FILE * f)
 {
-int i;
+const char *sep;
+int i, j;
 
-fprintf(f, "  faulting insn 0x%x\n", ri->faulting_insn);
-fprintf(f, "  prev insn 0x%x\n", ri->prev_insn);
-fprintf(f, "  prev addr0x%" PRIx64 "\n\n", ri->nip);
+fprintf(f, "%6s: %08x\n", "insn", ri->faulting_insn);
+fprintf(f, "%6s: %016lx\n", "pc", ri->nip);
 
-for (i = 0; i < 16; i++) {
-fprintf(f, "\tr%2d: %16lx\tr%2d: %16lx\n", i, ri->gregs[i],
-i + 16, ri->gregs[i + 16]);
+sep = "";
+for (i = j = 0; i < NGREG; i++) {
+if (greg_names[i] != NULL) {
+fprintf(f, "%s%6s: %016lx", sep, greg_names[i], ri->gregs[i]);
+sep = (++j & 1 ? "  " : "\n");
+}
 }
 
-fprintf(f, "\n");
-fprintf(f, "\tnip: %16lx\n", ri->gregs[32]);
-fprintf(f, "\tmsr: %16lx\n", ri->gregs[33]);
-fprintf(f, "\torig r3: %16lx\n", ri->gregs[34]);
-fprintf(f, "\tctr: %16lx\n", ri->gregs[35]);
-fprintf(f, "\tlnk: %16lx\n", ri->gregs[36]);
-fprintf(f, "\txer: %16lx\n", ri->gregs[37]);
-fprintf(f, "\tccr: %16lx\n", ri->gregs[38]);
-fprintf(f, "\tmq : %16lx\n", ri->gregs[39]);
-fprintf(f, "\ttrap   : %16lx\n", ri->gregs[40]);
-fprintf(f, "\tdar: %16lx\n", ri->gregs[41]);
-fprintf(f, "\tdsisr  : %16lx\n", ri->gregs[42]);
-fprintf(f, "\tresult : %16lx\n", ri->gregs[43]);
-  

[PATCH v4 05/12] tests/vm: upgrade Ubuntu 18.04 VM to 20.04

2022-07-08 Thread John Snow
18.04 has fallen out of our support window, so move ubuntu.aarch64
forward to ubuntu 20.04, which is now our oldest supported Ubuntu
release.

Notes:

This checksum changes periodically; use a fixed point image with a known
checksum so that the image isn't re-downloaded on every single
invocation. (The checksum for the 18.04 image was already incorrect at
the time of writing.)

Just like the centos.aarch64 test, this test currently seems very
flaky when run as a TCG test.

Signed-off-by: John Snow 
Reviewed-by: Daniel P. Berrangé 
---
 tests/vm/ubuntu.aarch64 | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/vm/ubuntu.aarch64 b/tests/vm/ubuntu.aarch64
index b291945a7e9..666947393bd 100755
--- a/tests/vm/ubuntu.aarch64
+++ b/tests/vm/ubuntu.aarch64
@@ -32,9 +32,13 @@ DEFAULT_CONFIG = {
 class UbuntuAarch64VM(ubuntuvm.UbuntuVM):
 name = "ubuntu.aarch64"
 arch = "aarch64"
-image_name = "ubuntu-18.04-server-cloudimg-arm64.img"
-image_link = "https://cloud-images.ubuntu.com/releases/18.04/release/; + 
image_name
-
image_sha256="0fdcba761965735a8a903d8b88df8e47f156f48715c00508e4315c506d7d3cb1"
+# NOTE: The Ubuntu 20.04 cloud images are periodically updated. The
+# fixed image chosen below is the latest release at time of
+# writing. Using a rolling latest instead would mean that the SHA
+# would be incorrect at an indeterminate point in the future.
+image_name = "focal-server-cloudimg-arm64.img"
+image_link = "https://cloud-images.ubuntu.com/focal/20220615/; + image_name
+
image_sha256="95a027336e197debe88c92ff2e554598e23c409139e1e750b71b3b820b514832"
 BUILD_SCRIPT = """
 set -e;
 cd $(mktemp -d);
-- 
2.34.3




[PATCH v4 12/12] iotests: fix copy-before-write for macOS and FreeBSD

2022-07-08 Thread John Snow
From: Vladimir Sementsov-Ogievskiy 

Do not merge: this is a copy of Vladimir's fix that will be taken in
through the iotests tree.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: John Snow 
---
 tests/qemu-iotests/tests/copy-before-write | 5 +
 1 file changed, 5 insertions(+)

diff --git a/tests/qemu-iotests/tests/copy-before-write 
b/tests/qemu-iotests/tests/copy-before-write
index 16efebbf8f0..56937b9dfff 100755
--- a/tests/qemu-iotests/tests/copy-before-write
+++ b/tests/qemu-iotests/tests/copy-before-write
@@ -192,6 +192,11 @@ read 1048576/1048576 bytes at offset 0
 
 def test_timeout_break_guest(self):
 log = self.do_cbw_timeout('break-guest-write')
+# macOS and FreeBSD tend to represent ETIMEDOUT as
+# "Operation timed out", when Linux prefer
+# "Connection timed out"
+log = log.replace('Operation timed out',
+  'Connection timed out')
 self.assertEqual(log, """\
 wrote 524288/524288 bytes at offset 0
 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-- 
2.34.3




[PATCH v6 41/45] linux-user: Rename sve prctls

2022-07-08 Thread Richard Henderson
Add "sve" to the sve prctl functions, to distinguish
them from the coming "sme" prctls with similar names.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/target_prctl.h |  8 
 linux-user/syscall.c  | 12 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/linux-user/aarch64/target_prctl.h 
b/linux-user/aarch64/target_prctl.h
index 1d440ffbea..40481e6663 100644
--- a/linux-user/aarch64/target_prctl.h
+++ b/linux-user/aarch64/target_prctl.h
@@ -6,7 +6,7 @@
 #ifndef AARCH64_TARGET_PRCTL_H
 #define AARCH64_TARGET_PRCTL_H
 
-static abi_long do_prctl_get_vl(CPUArchState *env)
+static abi_long do_prctl_sve_get_vl(CPUArchState *env)
 {
 ARMCPU *cpu = env_archcpu(env);
 if (cpu_isar_feature(aa64_sve, cpu)) {
@@ -14,9 +14,9 @@ static abi_long do_prctl_get_vl(CPUArchState *env)
 }
 return -TARGET_EINVAL;
 }
-#define do_prctl_get_vl do_prctl_get_vl
+#define do_prctl_sve_get_vl do_prctl_sve_get_vl
 
-static abi_long do_prctl_set_vl(CPUArchState *env, abi_long arg2)
+static abi_long do_prctl_sve_set_vl(CPUArchState *env, abi_long arg2)
 {
 /*
  * We cannot support either PR_SVE_SET_VL_ONEXEC or PR_SVE_VL_INHERIT.
@@ -47,7 +47,7 @@ static abi_long do_prctl_set_vl(CPUArchState *env, abi_long 
arg2)
 }
 return -TARGET_EINVAL;
 }
-#define do_prctl_set_vl do_prctl_set_vl
+#define do_prctl_sve_set_vl do_prctl_sve_set_vl
 
 static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
 {
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 669add74c1..cbde82c907 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6362,11 +6362,11 @@ static abi_long do_prctl_inval1(CPUArchState *env, 
abi_long arg2)
 #ifndef do_prctl_set_fp_mode
 #define do_prctl_set_fp_mode do_prctl_inval1
 #endif
-#ifndef do_prctl_get_vl
-#define do_prctl_get_vl do_prctl_inval0
+#ifndef do_prctl_sve_get_vl
+#define do_prctl_sve_get_vl do_prctl_inval0
 #endif
-#ifndef do_prctl_set_vl
-#define do_prctl_set_vl do_prctl_inval1
+#ifndef do_prctl_sve_set_vl
+#define do_prctl_sve_set_vl do_prctl_inval1
 #endif
 #ifndef do_prctl_reset_keys
 #define do_prctl_reset_keys do_prctl_inval1
@@ -6431,9 +6431,9 @@ static abi_long do_prctl(CPUArchState *env, abi_long 
option, abi_long arg2,
 case PR_SET_FP_MODE:
 return do_prctl_set_fp_mode(env, arg2);
 case PR_SVE_GET_VL:
-return do_prctl_get_vl(env);
+return do_prctl_sve_get_vl(env);
 case PR_SVE_SET_VL:
-return do_prctl_set_vl(env, arg2);
+return do_prctl_sve_set_vl(env, arg2);
 case PR_PAC_RESET_KEYS:
 if (arg3 || arg4 || arg5) {
 return -TARGET_EINVAL;
-- 
2.34.1




[PATCH v4 09/12] tests/vm: Remove docker cross-compile test from CentOS VM

2022-07-08 Thread John Snow
The fedora container has since been split apart, so there's no suitable
nearby target that would support "test-mingw" as it requires both x32
and x64 support -- so either fedora-cross-win32 nor fedora-cross-win64
would be truly suitable.

Just remove this test as superfluous with our current CI infrastructure.

Signed-off-by: John Snow 
Reviewed-by: Daniel P. Berrangé 
---
 tests/vm/centos | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/vm/centos b/tests/vm/centos
index 3a527c47b3d..097a9ca14d3 100755
--- a/tests/vm/centos
+++ b/tests/vm/centos
@@ -28,7 +28,6 @@ class CentosVM(basevm.BaseVM):
 tar -xf $SRC_ARCHIVE;
 make docker-test-block@centos8 {verbose} J={jobs} NETWORK=1;
 make docker-test-quick@centos8 {verbose} J={jobs} NETWORK=1;
-make docker-test-mingw@fedora  {verbose} J={jobs} NETWORK=1;
 """
 
 def build_image(self, img):
-- 
2.34.3




[PATCH v6 44/45] target/arm: Enable SME for user-only

2022-07-08 Thread Richard Henderson
Enable SME, TPIDR2_EL0, and FA64 if supported by the cpu.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/cpu.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 9b54443843..5de7e097e9 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -210,6 +210,17 @@ static void arm_cpu_reset(DeviceState *dev)
  CPACR_EL1, ZEN, 3);
 env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
 }
+/* and for SME instructions, with default vector length, and TPIDR2 */
+if (cpu_isar_feature(aa64_sme, cpu)) {
+env->cp15.sctlr_el[1] |= SCTLR_EnTP2;
+env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+ CPACR_EL1, SMEN, 3);
+env->vfp.smcr_el[1] = cpu->sme_default_vq - 1;
+if (cpu_isar_feature(aa64_sme_fa64, cpu)) {
+env->vfp.smcr_el[1] = FIELD_DP64(env->vfp.smcr_el[1],
+ SMCR, FA64, 1);
+}
+}
 /*
  * Enable 48-bit address space (TODO: take reserved_va into account).
  * Enable TBI0 but not TBI1.
-- 
2.34.1




[PATCH v6 33/45] linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS

2022-07-08 Thread Richard Henderson
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/target_cpu.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/linux-user/aarch64/target_cpu.h b/linux-user/aarch64/target_cpu.h
index 97a477bd3e..f90359faf2 100644
--- a/linux-user/aarch64/target_cpu.h
+++ b/linux-user/aarch64/target_cpu.h
@@ -34,10 +34,13 @@ static inline void cpu_clone_regs_parent(CPUARMState *env, 
unsigned flags)
 
 static inline void cpu_set_tls(CPUARMState *env, target_ulong newtls)
 {
-/* Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
+/*
+ * Note that AArch64 Linux keeps the TLS pointer in TPIDR; this is
  * different from AArch32 Linux, which uses TPIDRRO.
  */
 env->cp15.tpidr_el[0] = newtls;
+/* TPIDR2_EL0 is cleared with CLONE_SETTLS. */
+env->cp15.tpidr2_el0 = 0;
 }
 
 static inline abi_ulong get_sp_from_cpustate(CPUARMState *state)
-- 
2.34.1




[RISU PATCH v4 15/29] Rearrange reginfo and memblock buffers

2022-07-08 Thread Richard Henderson
For send_register_info from master_sigill, do not keep a
reginfo buffer on the stack.  At the moment, this struct
is quite large for aarch64.

Put the two reginfo buffers into an array, for the benefit
of future dumping.  For recv_and_compare_register_info,
index this array with constants, so it's a simple rename.

Signed-off-by: Richard Henderson 
---
 risu.c | 58 --
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/risu.c b/risu.c
index a0e20d5..b91ad38 100644
--- a/risu.c
+++ b/risu.c
@@ -28,10 +28,16 @@
 #include "config.h"
 #include "risu.h"
 
-static void *memblock;
-static struct reginfo master_ri, apprentice_ri;
-static uint8_t master_memblock[MEMBLOCKLEN];
+enum {
+MASTER = 0, APPRENTICE = 1
+};
 
+static struct reginfo ri[2];
+static uint8_t other_memblock[MEMBLOCKLEN];
+static trace_header_t header;
+
+/* Memblock pointer into the execution image. */
+static void *memblock;
 
 static int comm_fd;
 static bool trace;
@@ -102,16 +108,15 @@ static void respond(RisuResult r)
 
 static RisuResult send_register_info(void *uc)
 {
-struct reginfo ri;
-trace_header_t header;
+uint64_t paramreg;
 RisuResult res;
 RisuOp op;
 
-reginfo_init(, uc);
-op = get_risuop();
+reginfo_init([MASTER], uc);
+op = get_risuop([MASTER]);
 
 /* Write a header with PC/op to keep in sync */
-header.pc = get_pc();
+header.pc = get_pc([MASTER]);
 header.risu_op = op;
 res = write_buffer(, sizeof(header));
 if (res != RES_OK) {
@@ -126,18 +131,19 @@ static RisuResult send_register_info(void *uc)
  * Do a simple register compare on (a) explicit request
  * (b) end of test (c) a non-risuop UNDEF
  */
-res = write_buffer(, reginfo_size());
+res = write_buffer([MASTER], reginfo_size());
 /* For OP_TEST_END, force exit. */
 if (res == RES_OK && op == OP_TESTEND) {
 res = RES_END;
 }
 break;
 case OP_SETMEMBLOCK:
-memblock = (void *)(uintptr_t)get_reginfo_paramreg();
+paramreg = get_reginfo_paramreg([MASTER]);
+memblock = (void *)(uintptr_t)paramreg;
 break;
 case OP_GETMEMBLOCK:
-set_ucontext_paramreg(uc,
-  get_reginfo_paramreg() + (uintptr_t)memblock);
+paramreg = get_reginfo_paramreg([MASTER]);
+set_ucontext_paramreg(uc, paramreg + (uintptr_t)memblock);
 break;
 case OP_COMPAREMEM:
 return write_buffer(memblock, MEMBLOCKLEN);
@@ -162,12 +168,12 @@ static void master_sigill(int sig, siginfo_t *si, void 
*uc)
 
 static RisuResult recv_and_compare_register_info(void *uc)
 {
+uint64_t paramreg;
 RisuResult res;
-trace_header_t header;
 RisuOp op;
 
-reginfo_init(_ri, uc);
-op = get_risuop(_ri);
+reginfo_init([APPRENTICE], uc);
+op = get_risuop([APPRENTICE]);
 
 res = read_buffer(, sizeof(header));
 if (res != RES_OK) {
@@ -190,10 +196,10 @@ static RisuResult recv_and_compare_register_info(void *uc)
 /* Do a simple register compare on (a) explicit request
  * (b) end of test (c) a non-risuop UNDEF
  */
-res = read_buffer(_ri, reginfo_size());
+res = read_buffer([MASTER], reginfo_size());
 if (res != RES_OK) {
 /* fail */
-} else if (!reginfo_is_eq(_ri, _ri)) {
+} else if (!reginfo_is_eq([MASTER], [APPRENTICE])) {
 /* register mismatch */
 res = RES_MISMATCH_REG;
 } else if (op == OP_TESTEND) {
@@ -202,17 +208,18 @@ static RisuResult recv_and_compare_register_info(void *uc)
 respond(res == RES_OK ? RES_OK : RES_END);
 break;
 case OP_SETMEMBLOCK:
-memblock = (void *)(uintptr_t)get_reginfo_paramreg(_ri);
+paramreg = get_reginfo_paramreg([APPRENTICE]);
+memblock = (void *)(uintptr_t)paramreg;
 break;
 case OP_GETMEMBLOCK:
-set_ucontext_paramreg(uc, get_reginfo_paramreg(_ri) +
-  (uintptr_t)memblock);
+paramreg = get_reginfo_paramreg([APPRENTICE]);
+set_ucontext_paramreg(uc, paramreg + (uintptr_t)memblock);
 break;
 case OP_COMPAREMEM:
-res = read_buffer(master_memblock, MEMBLOCKLEN);
+res = read_buffer(other_memblock, MEMBLOCKLEN);
 if (res != RES_OK) {
 /* fail */
-} else if (memcmp(memblock, master_memblock, MEMBLOCKLEN) != 0) {
+} else if (memcmp(memblock, other_memblock, MEMBLOCKLEN) != 0) {
 /* memory mismatch */
 res = RES_MISMATCH_MEM;
 }
@@ -221,7 +228,6 @@ static RisuResult recv_and_compare_register_info(void *uc)
 default:
 abort();
 }
-
 return res;
 }
 
@@ -342,10 +348,10 @@ static int apprentice(void)
 case RES_MISMATCH_REG:
 fprintf(stderr, "mismatch reg after %zd checkpoints\n", signal_count);
 fprintf(stderr, 

[PATCH v4 07/12] tests/vm: remove duplicate 'centos' VM test

2022-07-08 Thread John Snow
This is listed twice by accident; we require genisoimage to run the
test, so remove the unconditional entry.

Signed-off-by: John Snow 
Reviewed-by: Thomas Huth 
Reviewed-by: Daniel P. Berrangé 
---
 tests/vm/Makefile.include | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/vm/Makefile.include b/tests/vm/Makefile.include
index a94f0ebf7f1..8d2a164552d 100644
--- a/tests/vm/Makefile.include
+++ b/tests/vm/Makefile.include
@@ -15,7 +15,7 @@ endif
 
 EFI_AARCH64 = $(wildcard $(BUILD_DIR)/pc-bios/edk2-aarch64-code.fd)
 
-X86_IMAGES := freebsd netbsd openbsd centos fedora haiku.x86_64
+X86_IMAGES := freebsd netbsd openbsd fedora haiku.x86_64
 ifneq ($(GENISOIMAGE),)
 X86_IMAGES += centos
 ifneq ($(EFI_AARCH64),)
-- 
2.34.3




[RISU PATCH v4 14/29] Merge reginfo.c into risu.c

2022-07-08 Thread Richard Henderson
The distinction between the two is artificial.  Following
patches will rearrange the functions involved to make it
easier for dumping of the trace file.

Signed-off-by: Richard Henderson 
---
 Makefile  |   2 +-
 risu.h|  28 +-
 reginfo.c | 151 --
 risu.c| 129 --
 4 files changed, 126 insertions(+), 184 deletions(-)
 delete mode 100644 reginfo.c

diff --git a/Makefile b/Makefile
index 6ab014a..ad7f879 100644
--- a/Makefile
+++ b/Makefile
@@ -20,7 +20,7 @@ CFLAGS ?= -g
 ALL_CFLAGS = -Wall -D_GNU_SOURCE -DARCH=$(ARCH) -U$(ARCH) $(BUILD_INC) 
$(CFLAGS) $(EXTRA_CFLAGS)
 
 PROG=risu
-SRCS=risu.c comms.c reginfo.c risu_$(ARCH).c risu_reginfo_$(ARCH).c
+SRCS=risu.c comms.c risu_$(ARCH).c risu_reginfo_$(ARCH).c
 HDRS=risu.h risu_reginfo_$(ARCH).h
 BINS=test_$(ARCH).bin
 
diff --git a/risu.h b/risu.h
index 77d6128..dd9fda5 100644
--- a/risu.h
+++ b/risu.h
@@ -35,7 +35,6 @@ void process_arch_opt(int opt, const char *arg);
 #include REGINFO_HEADER(ARCH)
 
 extern uintptr_t image_start_address;
-extern void *memblock;
 
 /* Ops code under test can request from risu: */
 typedef enum {
@@ -83,34 +82,9 @@ void send_response_byte(int sock, int resp);
 
 /* Functions operating on reginfo */
 
-/* Function prototypes for read/write helper functions. */
-RisuResult write_buffer(void *ptr, size_t bytes);
-RisuResult read_buffer(void *ptr, size_t bytes);
-void respond(RisuResult response);
-
-/*
- * Send the register information from the struct ucontext down the socket.
- * NB: called from a signal handler.
- */
-RisuResult send_register_info(void *uc);
-
-/*
- * Read register info from the socket and compare it with that from the
- * ucontext.
- * NB: called from a signal handler.
- */
-RisuResult recv_and_compare_register_info(void *uc);
-
-/*
- * Print a useful report on the status of the last reg comparison
- * done in recv_and_compare_register_info().
- */
-void report_mismatch_reg(void);
-
 /* Interface provided by CPU-specific code: */
 
-/* Move the PC past this faulting insn by adjusting ucontext
- */
+/* Move the PC past this faulting insn by adjusting ucontext. */
 void advance_pc(void *uc);
 
 /* Set the parameter register in a ucontext_t to the specified value.
diff --git a/reginfo.c b/reginfo.c
deleted file mode 100644
index a007f16..000
--- a/reginfo.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/**
- * Copyright (c) 2017 Linaro Limited
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * Peter Maydell (Linaro) - initial implementation
- */
-
-#include 
-#include 
-#include 
-#include "risu.h"
-
-static struct reginfo master_ri, apprentice_ri;
-static uint8_t master_memblock[MEMBLOCKLEN];
-
-RisuResult send_register_info(void *uc)
-{
-struct reginfo ri;
-trace_header_t header;
-RisuResult res;
-RisuOp op;
-
-reginfo_init(, uc);
-op = get_risuop();
-
-/* Write a header with PC/op to keep in sync */
-header.pc = get_pc();
-header.risu_op = op;
-res = write_buffer(, sizeof(header));
-if (res != RES_OK) {
-return res;
-}
-
-switch (op) {
-case OP_COMPARE:
-case OP_TESTEND:
-case OP_SIGILL:
-/*
- * Do a simple register compare on (a) explicit request
- * (b) end of test (c) a non-risuop UNDEF
- */
-res = write_buffer(, reginfo_size());
-/* For OP_TEST_END, force exit. */
-if (res == RES_OK && op == OP_TESTEND) {
-res = RES_END;
-}
-break;
-case OP_SETMEMBLOCK:
-memblock = (void *)(uintptr_t)get_reginfo_paramreg();
-break;
-case OP_GETMEMBLOCK:
-set_ucontext_paramreg(uc,
-  get_reginfo_paramreg() + (uintptr_t)memblock);
-break;
-case OP_COMPAREMEM:
-return write_buffer(memblock, MEMBLOCKLEN);
-default:
-abort();
-}
-return res;
-}
-
-/* Read register info from the socket and compare it with that from the
- * ucontext. Return 0 for match, 1 for end-of-test, 2 for mismatch.
- * NB: called from a signal handler.
- *
- * We don't have any kind of identifying info in the incoming data
- * that says whether it is register or memory data, so if the two
- * sides get out of sync then we will fail obscurely.
- */
-RisuResult recv_and_compare_register_info(void *uc)
-{
-RisuResult res;
-trace_header_t header;
-RisuOp op;
-
-reginfo_init(_ri, uc);
-op = get_risuop(_ri);
-
-res = read_buffer(, sizeof(header));
-if (res != RES_OK) {
-return res;
-}
-
-if 

[PATCH v6 43/45] target/arm: Only set ZEN in reset if SVE present

2022-07-08 Thread Richard Henderson
There's no reason to set CPACR_EL1.ZEN if SVE disabled.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/cpu.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 9c58be8b14..9b54443843 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -204,11 +204,10 @@ static void arm_cpu_reset(DeviceState *dev)
 /* and to the FP/Neon instructions */
 env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
  CPACR_EL1, FPEN, 3);
-/* and to the SVE instructions */
-env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
- CPACR_EL1, ZEN, 3);
-/* with reasonable vector length */
+/* and to the SVE instructions, with default vector length */
 if (cpu_isar_feature(aa64_sve, cpu)) {
+env->cp15.cpacr_el1 = FIELD_DP64(env->cp15.cpacr_el1,
+ CPACR_EL1, ZEN, 3);
 env->vfp.zcr_el[1] = cpu->sve_default_vq - 1;
 }
 /*
-- 
2.34.1




[PATCH v6 34/45] linux-user/aarch64: Reset PSTATE.SM on syscalls

2022-07-08 Thread Richard Henderson
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/cpu_loop.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/linux-user/aarch64/cpu_loop.c b/linux-user/aarch64/cpu_loop.c
index f7ef36cd9f..9875d609a9 100644
--- a/linux-user/aarch64/cpu_loop.c
+++ b/linux-user/aarch64/cpu_loop.c
@@ -89,6 +89,15 @@ void cpu_loop(CPUARMState *env)
 
 switch (trapnr) {
 case EXCP_SWI:
+/*
+ * On syscall, PSTATE.ZA is preserved, along with the ZA matrix.
+ * PSTATE.SM is cleared, per SMSTOP, which does ResetSVEState.
+ */
+if (FIELD_EX64(env->svcr, SVCR, SM)) {
+env->svcr = FIELD_DP64(env->svcr, SVCR, SM, 0);
+arm_rebuild_hflags(env);
+arm_reset_sve_state(env);
+}
 ret = do_syscall(env,
  env->xregs[8],
  env->xregs[0],
-- 
2.34.1




[PATCH v4 08/12] tests/vm: add 1GB extra memory per core

2022-07-08 Thread John Snow
If you try to run a 16 or 32 threaded test, you're going to run out of
memory very quickly with qom-test and a few others. Bump the memory
limit to try to scale with larger-core machines.

Granted, this means that a 16 core processor is going to ask for 16GB,
but you *probably* meet that requirement if you have such a machine.

512MB per core didn't seem to be enough to avoid ENOMEM and SIGABRTs in
the test cases in practice on a six core machine; so I bumped it up to
1GB which seemed to help.

Add this magic in early to the configuration process so that the
config file, if provided, can still override it.

Signed-off-by: John Snow 
Reviewed-by: Daniel P. Berrangé 
---
 tests/vm/basevm.py | 5 +
 1 file changed, 5 insertions(+)

diff --git a/tests/vm/basevm.py b/tests/vm/basevm.py
index d7d0413df35..4fd9af10b7f 100644
--- a/tests/vm/basevm.py
+++ b/tests/vm/basevm.py
@@ -99,6 +99,11 @@ def __init__(self, args, config=None):
 self._source_path = args.source_path
 # Allow input config to override defaults.
 self._config = DEFAULT_CONFIG.copy()
+
+# 1GB per core, minimum of 4. This is only a default.
+mem = max(4, args.jobs)
+self._config['memory'] = f"{mem}G"
+
 if config != None:
 self._config.update(config)
 self.validate_ssh_keys()
-- 
2.34.3




[PATCH v6 35/45] linux-user/aarch64: Add SM bit to SVE signal context

2022-07-08 Thread Richard Henderson
Make sure to zero the currently reserved fields.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/signal.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 7da0e36c6d..3cef2f44cf 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -78,7 +78,8 @@ struct target_extra_context {
 struct target_sve_context {
 struct target_aarch64_ctx head;
 uint16_t vl;
-uint16_t reserved[3];
+uint16_t flags;
+uint16_t reserved[2];
 /* The actual SVE data immediately follows.  It is laid out
  * according to TARGET_SVE_SIG_{Z,P}REG_OFFSET, based off of
  * the original struct pointer.
@@ -101,6 +102,8 @@ struct target_sve_context {
 #define TARGET_SVE_SIG_CONTEXT_SIZE(VQ) \
 (TARGET_SVE_SIG_PREG_OFFSET(VQ, 17))
 
+#define TARGET_SVE_SIG_FLAG_SM  1
+
 struct target_rt_sigframe {
 struct target_siginfo info;
 struct target_ucontext uc;
@@ -177,9 +180,13 @@ static void target_setup_sve_record(struct 
target_sve_context *sve,
 {
 int i, j;
 
+memset(sve, 0, sizeof(*sve));
 __put_user(TARGET_SVE_MAGIC, >head.magic);
 __put_user(size, >head.size);
 __put_user(vq * TARGET_SVE_VQ_BYTES, >vl);
+if (FIELD_EX64(env->svcr, SVCR, SM)) {
+__put_user(TARGET_SVE_SIG_FLAG_SM, >flags);
+}
 
 /* Note that SVE regs are stored as a byte stream, with each byte element
  * at a subsequent address.  This corresponds to a little-endian store
-- 
2.34.1




[PATCH v4 10/12] tests/qemu-iotests: hotfix for 307, 223 output

2022-07-08 Thread John Snow
Do not merge; staged in Hanna's branch.

Signed-off-by: John Snow 
---
 tests/qemu-iotests/223.out | 4 ++--
 tests/qemu-iotests/307.out | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/qemu-iotests/223.out b/tests/qemu-iotests/223.out
index 06479415312..26fb347c5da 100644
--- a/tests/qemu-iotests/223.out
+++ b/tests/qemu-iotests/223.out
@@ -93,7 +93,7 @@ exports available: 3
  export: 'n2'
   description: some text
   size:  4194304
-  flags: 0xced ( flush fua trim zeroes df cache fast-zero )
+  flags: 0xded ( flush fua trim zeroes df multi cache fast-zero )
   min block: 1
   opt block: 4096
   max block: 33554432
@@ -212,7 +212,7 @@ exports available: 3
  export: 'n2'
   description: some text
   size:  4194304
-  flags: 0xced ( flush fua trim zeroes df cache fast-zero )
+  flags: 0xded ( flush fua trim zeroes df multi cache fast-zero )
   min block: 1
   opt block: 4096
   max block: 33554432
diff --git a/tests/qemu-iotests/307.out b/tests/qemu-iotests/307.out
index ec8d2be0e0a..390f05d1b78 100644
--- a/tests/qemu-iotests/307.out
+++ b/tests/qemu-iotests/307.out
@@ -83,7 +83,7 @@ exports available: 2
  export: 'export1'
   description: This is the writable second export
   size:  67108864
-  flags: 0xced ( flush fua trim zeroes df cache fast-zero )
+  flags: 0xded ( flush fua trim zeroes df multi cache fast-zero )
   min block: XXX
   opt block: XXX
   max block: XXX
@@ -109,7 +109,7 @@ exports available: 1
  export: 'export1'
   description: This is the writable second export
   size:  67108864
-  flags: 0xced ( flush fua trim zeroes df cache fast-zero )
+  flags: 0xded ( flush fua trim zeroes df multi cache fast-zero )
   min block: XXX
   opt block: XXX
   max block: XXX
-- 
2.34.3




[RISU PATCH v4 05/29] Use EXIT_FAILURE, EXIT_SUCCESS

2022-07-08 Thread Richard Henderson
Some of the time we exit via the return value from main.
This can make it easier to tell what it is we're returning.

Reviewed-by: Alex Bennée 
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 comms.c| 26 +-
 risu.c | 22 +++---
 risu_reginfo_aarch64.c |  4 ++--
 risu_reginfo_i386.c|  2 +-
 4 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/comms.c b/comms.c
index 6946fd9..861e845 100644
--- a/comms.c
+++ b/comms.c
@@ -31,7 +31,7 @@ int apprentice_connect(const char *hostname, int port)
 sock = socket(PF_INET, SOCK_STREAM, 0);
 if (sock < 0) {
 perror("socket");
-exit(1);
+exit(EXIT_FAILURE);
 }
 struct hostent *hostinfo;
 sa.sin_family = AF_INET;
@@ -39,12 +39,12 @@ int apprentice_connect(const char *hostname, int port)
 hostinfo = gethostbyname(hostname);
 if (!hostinfo) {
 fprintf(stderr, "Unknown host %s\n", hostname);
-exit(1);
+exit(EXIT_FAILURE);
 }
 sa.sin_addr = *(struct in_addr *) hostinfo->h_addr;
 if (connect(sock, (struct sockaddr *) , sizeof(sa)) < 0) {
 perror("connect");
-exit(1);
+exit(EXIT_FAILURE);
 }
 return sock;
 }
@@ -56,13 +56,13 @@ int master_connect(int port)
 sock = socket(PF_INET, SOCK_STREAM, 0);
 if (sock < 0) {
 perror("socket");
-exit(1);
+exit(EXIT_FAILURE);
 }
 int sora = 1;
 if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, , sizeof(sora)) !=
 0) {
 perror("setsockopt(SO_REUSEADDR)");
-exit(1);
+exit(EXIT_FAILURE);
 }
 
 sa.sin_family = AF_INET;
@@ -70,11 +70,11 @@ int master_connect(int port)
 sa.sin_addr.s_addr = htonl(INADDR_ANY);
 if (bind(sock, (struct sockaddr *) , sizeof(sa)) < 0) {
 perror("bind");
-exit(1);
+exit(EXIT_FAILURE);
 }
 if (listen(sock, 1) < 0) {
 perror("listen");
-exit(1);
+exit(EXIT_FAILURE);
 }
 /* Just block until we get a connection */
 fprintf(stderr, "master: waiting for connection on port %d...\n",
@@ -84,7 +84,7 @@ int master_connect(int port)
 int nsock = accept(sock, (struct sockaddr *) , );
 if (nsock < 0) {
 perror("accept");
-exit(1);
+exit(EXIT_FAILURE);
 }
 /* We're done with the server socket now */
 close(sock);
@@ -104,7 +104,7 @@ static void recv_bytes(int sock, void *pkt, int pktlen)
 continue;
 }
 perror("read failed");
-exit(1);
+exit(EXIT_FAILURE);
 }
 pktlen -= i;
 p += i;
@@ -127,7 +127,7 @@ static void recv_and_discard_bytes(int sock, int pktlen)
 continue;
 }
 perror("read failed");
-exit(1);
+exit(EXIT_FAILURE);
 }
 pktlen -= i;
 }
@@ -186,12 +186,12 @@ int send_data_pkt(int sock, void *pkt, int pktlen)
 
 if (safe_writev(sock, iov, 2) == -1) {
 perror("writev failed");
-exit(1);
+exit(EXIT_FAILURE);
 }
 
 if (read(sock, , 1) != 1) {
 perror("read failed");
-exit(1);
+exit(EXIT_FAILURE);
 }
 return resp;
 }
@@ -217,6 +217,6 @@ void send_response_byte(int sock, int resp)
 unsigned char r = resp;
 if (write(sock, , 1) != 1) {
 perror("write failed");
-exit(1);
+exit(EXIT_FAILURE);
 }
 }
diff --git a/risu.c b/risu.c
index 819b786..26dc116 100644
--- a/risu.c
+++ b/risu.c
@@ -153,13 +153,13 @@ void apprentice_sigill(int sig, siginfo_t *si, void *uc)
 return;
 case 1:
 /* end of test */
-exit(0);
+exit(EXIT_SUCCESS);
 default:
 /* mismatch */
 if (trace) {
 siglongjmp(jmpbuf, 1);
 }
-exit(1);
+exit(EXIT_FAILURE);
 }
 }
 
@@ -173,7 +173,7 @@ static void set_sigill_handler(void (*fn) (int, siginfo_t 
*, void *))
 sigemptyset(_mask);
 if (sigaction(SIGILL, , 0) != 0) {
 perror("sigaction");
-exit(1);
+exit(EXIT_FAILURE);
 }
 }
 
@@ -190,11 +190,11 @@ void load_image(const char *imgfile)
 int fd = open(imgfile, O_RDONLY);
 if (fd < 0) {
 fprintf(stderr, "failed to open image file %s\n", imgfile);
-exit(1);
+exit(EXIT_FAILURE);
 }
 if (fstat(fd, ) != 0) {
 perror("fstat");
-exit(1);
+exit(EXIT_FAILURE);
 }
 size_t len = st.st_size;
 void *addr;
@@ -207,7 +207,7 @@ void load_image(const char *imgfile)
  0);
 if (!addr) {
 perror("mmap");
-exit(1);
+exit(EXIT_FAILURE);
 }
 close(fd);
 image_start = addr;
@@ -226,7 +226,7 @@ int master(void)
 if (trace) {
 fprintf(stderr, "trace complete after %zd checkpoints\n",
 signal_count);
-return 

[PATCH v6 37/45] linux-user/aarch64: Do not allow duplicate or short sve records

2022-07-08 Thread Richard Henderson
In parse_user_sigframe, the kernel rejects duplicate sve records,
or records that are smaller than the header.  We were silently
allowing these cases to pass, dropping the record.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/signal.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 8b352abb97..8fbe98d72f 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -318,10 +318,13 @@ static int target_restore_sigframe(CPUARMState *env,
 break;
 
 case TARGET_SVE_MAGIC:
+if (sve || size < sizeof(struct target_sve_context)) {
+goto err;
+}
 if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
 vq = sve_vq(env);
 sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
-if (!sve && size == sve_size) {
+if (size == sve_size) {
 sve = (struct target_sve_context *)ctx;
 break;
 }
-- 
2.34.1




[PATCH v6 31/45] target/arm: Reset streaming sve state on exception boundaries

2022-07-08 Thread Richard Henderson
We can handle both exception entry and exception return by
hooking into aarch64_sve_change_el.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/helper.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 73a5b2b86d..cfcad97ce0 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -11242,6 +11242,19 @@ void aarch64_sve_change_el(CPUARMState *env, int 
old_el,
 return;
 }
 
+old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
+new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
+
+/*
+ * Both AArch64.TakeException and AArch64.ExceptionReturn
+ * invoke ResetSVEState when taking an exception from, or
+ * returning to, AArch32 state when PSTATE.SM is enabled.
+ */
+if (old_a64 != new_a64 && FIELD_EX64(env->svcr, SVCR, SM)) {
+arm_reset_sve_state(env);
+return;
+}
+
 /*
  * DDI0584A.d sec 3.2: "If SVE instructions are disabled or trapped
  * at ELx, or not available because the EL is in AArch32 state, then
@@ -11254,10 +11267,8 @@ void aarch64_sve_change_el(CPUARMState *env, int 
old_el,
  * we already have the correct register contents when encountering the
  * vq0->vq0 transition between EL0->EL1.
  */
-old_a64 = old_el ? arm_el_is_aa64(env, old_el) : el0_a64;
 old_len = (old_a64 && !sve_exception_el(env, old_el)
? sve_vqm1_for_el(env, old_el) : 0);
-new_a64 = new_el ? arm_el_is_aa64(env, new_el) : el0_a64;
 new_len = (new_a64 && !sve_exception_el(env, new_el)
? sve_vqm1_for_el(env, new_el) : 0);
 
-- 
2.34.1




[PATCH v6 40/45] linux-user/aarch64: Implement SME signal handling

2022-07-08 Thread Richard Henderson
Set the SM bit in the SVE record on signal delivery, create the ZA record.
Restore SM and ZA state according to the records present on return.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/signal.c | 167 +---
 1 file changed, 154 insertions(+), 13 deletions(-)

diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 22d0b8b4ec..6a2c6e06d2 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -104,6 +104,22 @@ struct target_sve_context {
 
 #define TARGET_SVE_SIG_FLAG_SM  1
 
+#define TARGET_ZA_MAGIC0x54366345
+
+struct target_za_context {
+struct target_aarch64_ctx head;
+uint16_t vl;
+uint16_t reserved[3];
+/* The actual ZA data immediately follows. */
+};
+
+#define TARGET_ZA_SIG_REGS_OFFSET \
+QEMU_ALIGN_UP(sizeof(struct target_za_context), TARGET_SVE_VQ_BYTES)
+#define TARGET_ZA_SIG_ZAV_OFFSET(VQ, N) \
+(TARGET_ZA_SIG_REGS_OFFSET + (VQ) * TARGET_SVE_VQ_BYTES * (N))
+#define TARGET_ZA_SIG_CONTEXT_SIZE(VQ) \
+TARGET_ZA_SIG_ZAV_OFFSET(VQ, VQ * TARGET_SVE_VQ_BYTES)
+
 struct target_rt_sigframe {
 struct target_siginfo info;
 struct target_ucontext uc;
@@ -176,9 +192,9 @@ static void target_setup_end_record(struct 
target_aarch64_ctx *end)
 }
 
 static void target_setup_sve_record(struct target_sve_context *sve,
-CPUARMState *env, int vq, int size)
+CPUARMState *env, int size)
 {
-int i, j;
+int i, j, vq = sve_vq(env);
 
 memset(sve, 0, sizeof(*sve));
 __put_user(TARGET_SVE_MAGIC, >head.magic);
@@ -207,6 +223,35 @@ static void target_setup_sve_record(struct 
target_sve_context *sve,
 }
 }
 
+static void target_setup_za_record(struct target_za_context *za,
+   CPUARMState *env, int size)
+{
+int vq = sme_vq(env);
+int vl = vq * TARGET_SVE_VQ_BYTES;
+int i, j;
+
+memset(za, 0, sizeof(*za));
+__put_user(TARGET_ZA_MAGIC, >head.magic);
+__put_user(size, >head.size);
+__put_user(vl, >vl);
+
+if (size == TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
+return;
+}
+assert(size == TARGET_ZA_SIG_CONTEXT_SIZE(vq));
+
+/*
+ * Note that ZA vectors are stored as a byte stream,
+ * with each byte element at a subsequent address.
+ */
+for (i = 0; i < vl; ++i) {
+uint64_t *z = (void *)za + TARGET_ZA_SIG_ZAV_OFFSET(vq, i);
+for (j = 0; j < vq * 2; ++j) {
+__put_user_e(env->zarray[i].d[j], z + j, le);
+}
+}
+}
+
 static void target_restore_general_frame(CPUARMState *env,
  struct target_rt_sigframe *sf)
 {
@@ -252,16 +297,28 @@ static void target_restore_fpsimd_record(CPUARMState *env,
 
 static bool target_restore_sve_record(CPUARMState *env,
   struct target_sve_context *sve,
-  int size)
+  int size, int *svcr)
 {
-int i, j, vl, vq;
+int i, j, vl, vq, flags;
+bool sm;
 
-if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
+__get_user(vl, >vl);
+__get_user(flags, >flags);
+
+sm = flags & TARGET_SVE_SIG_FLAG_SM;
+
+/* The cpu must support Streaming or Non-streaming SVE. */
+if (sm
+? !cpu_isar_feature(aa64_sme, env_archcpu(env))
+: !cpu_isar_feature(aa64_sve, env_archcpu(env))) {
 return false;
 }
 
-__get_user(vl, >vl);
-vq = sve_vq(env);
+/*
+ * Note that we cannot use sve_vq() because that depends on the
+ * current setting of PSTATE.SM, not the state to be restored.
+ */
+vq = sve_vqm1_for_el_sm(env, 0, sm) + 1;
 
 /* Reject mismatched VL. */
 if (vl != vq * TARGET_SVE_VQ_BYTES) {
@@ -278,6 +335,8 @@ static bool target_restore_sve_record(CPUARMState *env,
 return false;
 }
 
+*svcr = FIELD_DP64(*svcr, SVCR, SM, sm);
+
 /*
  * Note that SVE regs are stored as a byte stream, with each byte element
  * at a subsequent address.  This corresponds to a little-endian load
@@ -304,15 +363,57 @@ static bool target_restore_sve_record(CPUARMState *env,
 return true;
 }
 
+static bool target_restore_za_record(CPUARMState *env,
+ struct target_za_context *za,
+ int size, int *svcr)
+{
+int i, j, vl, vq;
+
+if (!cpu_isar_feature(aa64_sme, env_archcpu(env))) {
+return false;
+}
+
+__get_user(vl, >vl);
+vq = sme_vq(env);
+
+/* Reject mismatched VL. */
+if (vl != vq * TARGET_SVE_VQ_BYTES) {
+return false;
+}
+
+/* Accept empty record -- used to clear PSTATE.ZA. */
+if (size <= TARGET_ZA_SIG_CONTEXT_SIZE(0)) {
+return true;
+}
+
+/* Reject non-empty but incomplete record. */
+if (size < TARGET_ZA_SIG_CONTEXT_SIZE(vq)) {
+return false;
+ 

[RISU PATCH v4 01/29] Use bool for tracing variables

2022-07-08 Thread Richard Henderson
Reviewed-by: Alex Bennée 
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 risu.h| 3 ++-
 reginfo.c | 2 +-
 risu.c| 8 
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/risu.h b/risu.h
index 8d2d646..e2b4508 100644
--- a/risu.h
+++ b/risu.h
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Extra option processing for architectures */
 extern const struct option * const arch_long_opts;
@@ -96,7 +97,7 @@ int recv_and_compare_register_info(read_fn read_fn,
  * Should return 0 if it was a good match (ie end of test)
  * and 1 for a mismatch.
  */
-int report_match_status(int trace);
+int report_match_status(bool trace);
 
 /* Interface provided by CPU-specific code: */
 
diff --git a/reginfo.c b/reginfo.c
index dd42ae2..1b2a821 100644
--- a/reginfo.c
+++ b/reginfo.c
@@ -141,7 +141,7 @@ int recv_and_compare_register_info(read_fn read_fn,
  * Should return 0 if it was a good match (ie end of test)
  * and 1 for a mismatch.
  */
-int report_match_status(int trace)
+int report_match_status(bool trace)
 {
 int resp = 0;
 fprintf(stderr, "match status...\n");
diff --git a/risu.c b/risu.c
index 01525d2..79b1092 100644
--- a/risu.c
+++ b/risu.c
@@ -31,7 +31,7 @@
 void *memblock;
 
 int apprentice_fd, master_fd;
-int trace;
+bool trace;
 size_t signal_count;
 
 #ifdef HAVE_ZLIB
@@ -228,7 +228,7 @@ int master(void)
 signal_count);
 return 0;
 } else {
-return report_match_status(0);
+return report_match_status(false);
 }
 }
 set_sigill_handler(_sigill);
@@ -250,7 +250,7 @@ int apprentice(void)
 #endif
 close(apprentice_fd);
 fprintf(stderr, "finished early after %zd checkpoints\n", 
signal_count);
-return report_match_status(1);
+return report_match_status(true);
 }
 set_sigill_handler(_sigill);
 fprintf(stderr, "starting apprentice image at 0x%"PRIxPTR"\n",
@@ -344,7 +344,7 @@ int main(int argc, char **argv)
 break;
 case 't':
 trace_fn = optarg;
-trace = 1;
+trace = true;
 break;
 case 'h':
 hostname = optarg;
-- 
2.34.1




[PATCH v6 29/45] target/arm: Implement REVD

2022-07-08 Thread Richard Henderson
This is an SVE instruction that operates using the SVE vector
length but that it is present only if SME is implemented.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sve.h|  2 ++
 target/arm/sve.decode  |  1 +
 target/arm/sve_helper.c| 16 
 target/arm/translate-sve.c |  2 ++
 4 files changed, 21 insertions(+)

diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index ab0333400f..cc4e1d8948 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -719,6 +719,8 @@ DEF_HELPER_FLAGS_4(sve_revh_d, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_4(sve_revw_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_4(sme_revd_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
 DEF_HELPER_FLAGS_4(sve_rbit_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_rbit_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sve_rbit_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 966803cbb7..a9e48f07b4 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -652,6 +652,7 @@ REVB0101 .. 1001 00 100 ... . . 
@rd_pg_rn
 REVH0101 .. 1001 01 100 ... . . @rd_pg_rn
 REVW0101 .. 1001 10 100 ... . . @rd_pg_rn
 RBIT0101 .. 1001 11 100 ... . . @rd_pg_rn
+REVD0101 00 1011 10 100 ... . . @rd_pg_rn_e0
 
 # SVE vector splice (predicated, destructive)
 SPLICE  0101 .. 101 100 100 ... . . @rdn_pg_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index df16170469..d6f7ef94fe 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -931,6 +931,22 @@ DO_ZPZ_D(sve_revh_d, uint64_t, hswap64)
 
 DO_ZPZ_D(sve_revw_d, uint64_t, wswap64)
 
+void HELPER(sme_revd_q)(void *vd, void *vn, void *vg, uint32_t desc)
+{
+intptr_t i, opr_sz = simd_oprsz(desc) / 8;
+uint64_t *d = vd, *n = vn;
+uint8_t *pg = vg;
+
+for (i = 0; i < opr_sz; i += 2) {
+if (pg[H1(i)] & 1) {
+uint64_t n0 = n[i + 0];
+uint64_t n1 = n[i + 1];
+d[i + 0] = n1;
+d[i + 1] = n0;
+}
+}
+}
+
 DO_ZPZ(sve_rbit_b, uint8_t, H1, revbit8)
 DO_ZPZ(sve_rbit_h, uint16_t, H1_2, revbit16)
 DO_ZPZ(sve_rbit_s, uint32_t, H1_4, revbit32)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 24ffb69a2a..9ed3b267fd 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -2901,6 +2901,8 @@ TRANS_FEAT(REVH, aa64_sve, gen_gvec_ool_arg_zpz, 
revh_fns[a->esz], a, 0)
 TRANS_FEAT(REVW, aa64_sve, gen_gvec_ool_arg_zpz,
a->esz == 3 ? gen_helper_sve_revw_d : NULL, a, 0)
 
+TRANS_FEAT(REVD, aa64_sme, gen_gvec_ool_arg_zpz, gen_helper_sme_revd_q, a, 0)
+
 TRANS_FEAT(SPLICE, aa64_sve, gen_gvec_ool_arg_zpzz,
gen_helper_sve_splice, a, a->esz)
 
-- 
2.34.1




[PATCH v6 32/45] target/arm: Enable SME for -cpu max

2022-07-08 Thread Richard Henderson
Note that SME remains effectively disabled for user-only,
because we do not yet set CPACR_EL1.SMEN.  This needs to
wait until the kernel ABI is implemented.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 docs/system/arm/emulation.rst |  4 
 target/arm/cpu64.c| 11 +++
 2 files changed, 15 insertions(+)

diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst
index 83b4410065..8e494c8bea 100644
--- a/docs/system/arm/emulation.rst
+++ b/docs/system/arm/emulation.rst
@@ -65,6 +65,10 @@ the following architecture extensions:
 - FEAT_SHA512 (Advanced SIMD SHA512 instructions)
 - FEAT_SM3 (Advanced SIMD SM3 instructions)
 - FEAT_SM4 (Advanced SIMD SM4 instructions)
+- FEAT_SME (Scalable Matrix Extension)
+- FEAT_SME_FA64 (Full A64 instruction set in Streaming SVE mode)
+- FEAT_SME_F64F64 (Double-precision floating-point outer product instructions)
+- FEAT_SME_I16I64 (16-bit to 64-bit integer widening outer product 
instructions)
 - FEAT_SPECRES (Speculation restriction instructions)
 - FEAT_SSBS (Speculative Store Bypass Safe)
 - FEAT_TLBIOS (TLB invalidate instructions in Outer Shareable domain)
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index b4fd4b7ec8..78e27f778a 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -1024,6 +1024,7 @@ static void aarch64_max_initfn(Object *obj)
  */
 t = FIELD_DP64(t, ID_AA64PFR1, MTE, 3);   /* FEAT_MTE3 */
 t = FIELD_DP64(t, ID_AA64PFR1, RAS_FRAC, 0);  /* FEAT_RASv1p1 + 
FEAT_DoubleFault */
+t = FIELD_DP64(t, ID_AA64PFR1, SME, 1);   /* FEAT_SME */
 t = FIELD_DP64(t, ID_AA64PFR1, CSV2_FRAC, 0); /* FEAT_CSV2_2 */
 cpu->isar.id_aa64pfr1 = t;
 
@@ -1074,6 +1075,16 @@ static void aarch64_max_initfn(Object *obj)
 t = FIELD_DP64(t, ID_AA64DFR0, PMUVER, 5);/* FEAT_PMUv3p4 */
 cpu->isar.id_aa64dfr0 = t;
 
+t = cpu->isar.id_aa64smfr0;
+t = FIELD_DP64(t, ID_AA64SMFR0, F32F32, 1);   /* FEAT_SME */
+t = FIELD_DP64(t, ID_AA64SMFR0, B16F32, 1);   /* FEAT_SME */
+t = FIELD_DP64(t, ID_AA64SMFR0, F16F32, 1);   /* FEAT_SME */
+t = FIELD_DP64(t, ID_AA64SMFR0, I8I32, 0xf);  /* FEAT_SME */
+t = FIELD_DP64(t, ID_AA64SMFR0, F64F64, 1);   /* FEAT_SME_F64F64 */
+t = FIELD_DP64(t, ID_AA64SMFR0, I16I64, 0xf); /* FEAT_SME_I16I64 */
+t = FIELD_DP64(t, ID_AA64SMFR0, FA64, 1); /* FEAT_SME_FA64 */
+cpu->isar.id_aa64smfr0 = t;
+
 /* Replicate the same data to the 32-bit id registers.  */
 aa32_max_features(cpu);
 
-- 
2.34.1




[RISU PATCH v4 00/29] risu cleanups and improvements

2022-07-08 Thread Richard Henderson
If you can imagine, v3 was back in 2020:
https://lore.kernel.org/qemu-devel/20200522023440.26261-1-richard.hender...@linaro.org/

I've refreshed that, not that risu has changed much in that time,
and then also added support for SME to aarch64, i.e. SVCR and ZA
storage are now present in the reginfo, and compared.

I include a small test case, which allows one to see that ZA
storage is being handled properly.  When run with

  ./risu --test-sve=1 --test-za=1 --master -t test_sme_aarch64.{out,bin}
  ./risu --fulldump -t test_sme_aarch64.out

one can see the 16x16 bytes filled with row major then
column major indexes.


r~


Richard Henderson (29):
  Use bool for tracing variables
  Unify master_fd and apprentice_fd to comm_fd
  Hoist trace file and socket opening
  Adjust tracefile open for write
  Use EXIT_FAILURE, EXIT_SUCCESS
  Make some risu.c symbols static
  Add enum RisuOp
  Add enum RisuResult
  Unify i/o functions and use RisuResult
  Pass non-OK result back through siglongjmp
  Always write for --master
  Simplify syncing with master
  Split RES_MISMATCH for registers and memory
  Merge reginfo.c into risu.c
  Rearrange reginfo and memblock buffers
  Split out recv_register_info
  Add magic and size to the trace header
  Compute reginfo_size based on the reginfo
  aarch64: Assume system support for SVE
  aarch64: Reorg sve reginfo to save space
  aarch64: Use arch_init to configure sve
  ppc64: Use uint64_t to represent double
  Standardize reginfo_dump_mismatch printing
  Add --fulldump and --diffdup options
  Remove return value from reginfo_dump
  ppc64: Clean up reginfo handling
  aarch64: Tidy reginfo dumping ahead of ZA state
  aarch64: Add support for ZA storage
  aarch64: Trivial SME test

 Makefile   |   2 +-
 risu.h | 103 +++
 risu_reginfo_aarch64.h |  97 --
 risu_reginfo_ppc64.h   |   3 +-
 comms.c|  34 +--
 reginfo.c  | 183 ---
 risu.c | 676 ++---
 risu_aarch64.c |   6 +-
 risu_arm.c |   6 +-
 risu_i386.c|   4 +-
 risu_m68k.c|   4 +-
 risu_ppc64.c   |   4 +-
 risu_reginfo_aarch64.c | 408 +++--
 risu_reginfo_arm.c |  32 +-
 risu_reginfo_i386.c|  22 +-
 risu_reginfo_m68k.c|  37 +--
 risu_reginfo_ppc64.c   | 183 +--
 test_sme_aarch64.s |  55 
 18 files changed, 1070 insertions(+), 789 deletions(-)
 delete mode 100644 reginfo.c
 create mode 100644 test_sme_aarch64.s

-- 
2.34.1




[PATCH v6 30/45] target/arm: Implement SCLAMP, UCLAMP

2022-07-08 Thread Richard Henderson
This is an SVE instruction that operates using the SVE vector
length but that it is present only if SME is implemented.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/helper.h|  18 +++
 target/arm/sve.decode  |   5 ++
 target/arm/translate-sve.c | 102 +
 target/arm/vec_helper.c|  24 +
 4 files changed, 149 insertions(+)

diff --git a/target/arm/helper.h b/target/arm/helper.h
index 3a8ce42ab0..92f36d9dbb 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -1019,6 +1019,24 @@ DEF_HELPER_FLAGS_6(gvec_bfmlal, TCG_CALL_NO_RWG,
 DEF_HELPER_FLAGS_6(gvec_bfmlal_idx, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_5(gvec_sclamp_b, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sclamp_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sclamp_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_sclamp_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(gvec_uclamp_b, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uclamp_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uclamp_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(gvec_uclamp_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
+
 #ifdef TARGET_AARCH64
 #include "helper-a64.h"
 #include "helper-sve.h"
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index a9e48f07b4..14b3a69c36 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1695,3 +1695,8 @@ PSEL00100101 .. 1 100 .. 01  0  0 
  \
 @psel esz=2 imm=%psel_imm_s
 PSEL00100101 .1 1 000 .. 01  0  0   \
 @psel esz=3 imm=%psel_imm_d
+
+### SVE clamp
+
+SCLAMP  01000100 .. 0 . 11 . .  @rda_rn_rm
+UCLAMP  01000100 .. 0 . 110001 . .  @rda_rn_rm
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 9ed3b267fd..41f8b12259 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -7478,3 +7478,105 @@ static bool trans_PSEL(DisasContext *s, arg_psel *a)
 tcg_temp_free_ptr(ptr);
 return true;
 }
+
+static void gen_sclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
+{
+tcg_gen_smax_i32(d, a, n);
+tcg_gen_smin_i32(d, d, m);
+}
+
+static void gen_sclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
+{
+tcg_gen_smax_i64(d, a, n);
+tcg_gen_smin_i64(d, d, m);
+}
+
+static void gen_sclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+   TCGv_vec m, TCGv_vec a)
+{
+tcg_gen_smax_vec(vece, d, a, n);
+tcg_gen_smin_vec(vece, d, d, m);
+}
+
+static void gen_sclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+   uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+static const TCGOpcode vecop[] = {
+INDEX_op_smin_vec, INDEX_op_smax_vec, 0
+};
+static const GVecGen4 ops[4] = {
+{ .fniv = gen_sclamp_vec,
+  .fno  = gen_helper_gvec_sclamp_b,
+  .opt_opc = vecop,
+  .vece = MO_8 },
+{ .fniv = gen_sclamp_vec,
+  .fno  = gen_helper_gvec_sclamp_h,
+  .opt_opc = vecop,
+  .vece = MO_16 },
+{ .fni4 = gen_sclamp_i32,
+  .fniv = gen_sclamp_vec,
+  .fno  = gen_helper_gvec_sclamp_s,
+  .opt_opc = vecop,
+  .vece = MO_32 },
+{ .fni8 = gen_sclamp_i64,
+  .fniv = gen_sclamp_vec,
+  .fno  = gen_helper_gvec_sclamp_d,
+  .opt_opc = vecop,
+  .vece = MO_64,
+  .prefer_i64 = TCG_TARGET_REG_BITS == 64 }
+};
+tcg_gen_gvec_4(d, n, m, a, oprsz, maxsz, [vece]);
+}
+
+TRANS_FEAT(SCLAMP, aa64_sme, gen_gvec_fn_arg_, gen_sclamp, a)
+
+static void gen_uclamp_i32(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_i32 a)
+{
+tcg_gen_umax_i32(d, a, n);
+tcg_gen_umin_i32(d, d, m);
+}
+
+static void gen_uclamp_i64(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_i64 a)
+{
+tcg_gen_umax_i64(d, a, n);
+tcg_gen_umin_i64(d, d, m);
+}
+
+static void gen_uclamp_vec(unsigned vece, TCGv_vec d, TCGv_vec n,
+   TCGv_vec m, TCGv_vec a)
+{
+tcg_gen_umax_vec(vece, d, a, n);
+tcg_gen_umin_vec(vece, d, d, m);
+}
+
+static void gen_uclamp(unsigned vece, uint32_t d, uint32_t n, uint32_t m,
+   uint32_t a, uint32_t oprsz, uint32_t maxsz)
+{
+static const TCGOpcode vecop[] = {
+INDEX_op_umin_vec, INDEX_op_umax_vec, 0
+};
+static const GVecGen4 ops[4] = {
+{ .fniv = gen_uclamp_vec,
+  .fno  = gen_helper_gvec_uclamp_b,
+  .opt_opc = vecop,
+  .vece 

[PATCH v6 26/45] target/arm: Implement FMOPA, FMOPS (widening)

2022-07-08 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sme.h|  2 ++
 target/arm/sme.decode  |  1 +
 target/arm/sme_helper.c| 74 ++
 target/arm/translate-sme.c |  1 +
 4 files changed, 78 insertions(+)

diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
index 1d68fb8c74..4d5d05db3a 100644
--- a/target/arm/helper-sme.h
+++ b/target/arm/helper-sme.h
@@ -121,6 +121,8 @@ DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 
+DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
index afd9c0dffd..e8d27fd8a0 100644
--- a/target/arm/sme.decode
+++ b/target/arm/sme.decode
@@ -75,3 +75,4 @@ FMOPA_s 1000 100 . ... ... . . 00 ..  
  @op_32
 FMOPA_d 1000 110 . ... ... . . 0 ...@op_64
 
 BFMOPA  1001 100 . ... ... . . 00 ..@op_32
+FMOPA_h 1001 101 . ... ... . . 00 ..@op_32
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
index 690a53eee2..302f89c30b 100644
--- a/target/arm/sme_helper.c
+++ b/target/arm/sme_helper.c
@@ -1008,6 +1008,80 @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, 
uint32_t pg, uint32_t neg)
 return pair;
 }
 
+static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2,
+  float_status *s_std, float_status *s_odd)
+{
+float64 e1r = float16_to_float64(e1 & 0x, true, s_std);
+float64 e1c = float16_to_float64(e1 >> 16, true, s_std);
+float64 e2r = float16_to_float64(e2 & 0x, true, s_std);
+float64 e2c = float16_to_float64(e2 >> 16, true, s_std);
+float64 t64;
+float32 t32;
+
+/*
+ * The ARM pseudocode function FPDot performs both multiplies
+ * and the add with a single rounding operation.  Emulate this
+ * by performing the first multiply in round-to-odd, then doing
+ * the second multiply as fused multiply-add, and rounding to
+ * float32 all in one step.
+ */
+t64 = float64_mul(e1r, e2r, s_odd);
+t64 = float64r32_muladd(e1c, e2c, t64, 0, s_std);
+
+/* This conversion is exact, because we've already rounded. */
+t32 = float64_to_float32(t64, s_std);
+
+/* The final accumulation step is not fused. */
+return float32_add(sum, t32, s_std);
+}
+
+void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
+ void *vpm, void *vst, uint32_t desc)
+{
+intptr_t row, col, oprsz = simd_maxsz(desc);
+uint32_t neg = simd_data(desc) * 0x80008000u;
+uint16_t *pn = vpn, *pm = vpm;
+float_status fpst_odd, fpst_std;
+
+/*
+ * Make a copy of float_status because this operation does not
+ * update the cumulative fp exception status.  It also produces
+ * default nans.  Make a second copy with round-to-odd -- see above.
+ */
+fpst_std = *(float_status *)vst;
+set_default_nan_mode(true, _std);
+fpst_odd = fpst_std;
+set_float_rounding_mode(float_round_to_odd, _odd);
+
+for (row = 0; row < oprsz; ) {
+uint16_t prow = pn[H2(row >> 4)];
+do {
+void *vza_row = vza + tile_vslice_offset(row);
+uint32_t n = *(uint32_t *)(vzn + H1_4(row));
+
+n = f16mop_adj_pair(n, prow, neg);
+
+for (col = 0; col < oprsz; ) {
+uint16_t pcol = pm[H2(col >> 4)];
+do {
+if (prow & pcol & 0b0101) {
+uint32_t *a = vza_row + H1_4(col);
+uint32_t m = *(uint32_t *)(vzm + H1_4(col));
+
+m = f16mop_adj_pair(m, pcol, 0);
+*a = f16_dotadd(*a, n, m, _std, _odd);
+
+col += 4;
+pcol >>= 4;
+}
+} while (col & 15);
+}
+row += 4;
+prow >>= 4;
+} while (row & 15);
+}
+}
+
 void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
 void *vpm, uint32_t desc)
 {
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
index ecb7583c55..c2953b22ce 100644
--- a/target/arm/translate-sme.c
+++ b/target/arm/translate-sme.c
@@ -355,6 +355,7 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, 
MemOp esz,
 return true;
 }
 
+TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, 
gen_helper_sme_fmopa_h)
 TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, 
gen_helper_sme_fmopa_s)
 TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, 

[PATCH v6 39/45] linux-user/aarch64: Move sve record checks into restore

2022-07-08 Thread Richard Henderson
Move the checks out of the parsing loop and into the
restore function.  This more closely mirrors the code
structure in the kernel, and is slightly clearer.

Reject rather than silently skip incorrect VL and SVE record sizes,
bringing our checks in to line with those the kernel does.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/signal.c | 51 +
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 9ff79da4be..22d0b8b4ec 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -250,12 +250,36 @@ static void target_restore_fpsimd_record(CPUARMState *env,
 }
 }
 
-static void target_restore_sve_record(CPUARMState *env,
-  struct target_sve_context *sve, int vq)
+static bool target_restore_sve_record(CPUARMState *env,
+  struct target_sve_context *sve,
+  int size)
 {
-int i, j;
+int i, j, vl, vq;
 
-/* Note that SVE regs are stored as a byte stream, with each byte element
+if (!cpu_isar_feature(aa64_sve, env_archcpu(env))) {
+return false;
+}
+
+__get_user(vl, >vl);
+vq = sve_vq(env);
+
+/* Reject mismatched VL. */
+if (vl != vq * TARGET_SVE_VQ_BYTES) {
+return false;
+}
+
+/* Accept empty record -- used to clear PSTATE.SM. */
+if (size <= sizeof(*sve)) {
+return true;
+}
+
+/* Reject non-empty but incomplete record. */
+if (size < TARGET_SVE_SIG_CONTEXT_SIZE(vq)) {
+return false;
+}
+
+/*
+ * Note that SVE regs are stored as a byte stream, with each byte element
  * at a subsequent address.  This corresponds to a little-endian load
  * of our 64-bit hunks.
  */
@@ -277,6 +301,7 @@ static void target_restore_sve_record(CPUARMState *env,
 }
 }
 }
+return true;
 }
 
 static int target_restore_sigframe(CPUARMState *env,
@@ -287,7 +312,7 @@ static int target_restore_sigframe(CPUARMState *env,
 struct target_sve_context *sve = NULL;
 uint64_t extra_datap = 0;
 bool used_extra = false;
-int vq = 0, sve_size = 0;
+int sve_size = 0;
 
 target_restore_general_frame(env, sf);
 
@@ -321,15 +346,9 @@ static int target_restore_sigframe(CPUARMState *env,
 if (sve || size < sizeof(struct target_sve_context)) {
 goto err;
 }
-if (cpu_isar_feature(aa64_sve, env_archcpu(env))) {
-vq = sve_vq(env);
-sve_size = QEMU_ALIGN_UP(TARGET_SVE_SIG_CONTEXT_SIZE(vq), 16);
-if (size == sve_size) {
-sve = (struct target_sve_context *)ctx;
-break;
-}
-}
-goto err;
+sve = (struct target_sve_context *)ctx;
+sve_size = size;
+break;
 
 case TARGET_EXTRA_MAGIC:
 if (extra || size != sizeof(struct target_extra_context)) {
@@ -362,8 +381,8 @@ static int target_restore_sigframe(CPUARMState *env,
 }
 
 /* SVE data, if present, overwrites FPSIMD data.  */
-if (sve) {
-target_restore_sve_record(env, sve, vq);
+if (sve && !target_restore_sve_record(env, sve, sve_size)) {
+goto err;
 }
 unlock_user(extra, extra_datap, 0);
 return 0;
-- 
2.34.1




[PATCH v6 23/45] target/arm: Implement SME ADDHA, ADDVA

2022-07-08 Thread Richard Henderson
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sme.h|  5 +++
 target/arm/sme.decode  | 11 +
 target/arm/sme_helper.c| 90 ++
 target/arm/translate-sme.c | 31 +
 4 files changed, 137 insertions(+)

diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
index 95f6e88bdd..753e9e624c 100644
--- a/target/arm/helper-sme.h
+++ b/target/arm/helper-sme.h
@@ -115,3 +115,8 @@ DEF_HELPER_FLAGS_5(sme_st1q_be_h_mte, TCG_CALL_NO_WG, void, 
env, ptr, ptr, tl, i
 DEF_HELPER_FLAGS_5(sme_st1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
 DEF_HELPER_FLAGS_5(sme_st1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
 DEF_HELPER_FLAGS_5(sme_st1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+
+DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
index f1ebd857a5..8cb6c4053c 100644
--- a/target/arm/sme.decode
+++ b/target/arm/sme.decode
@@ -53,3 +53,14 @@ LDST1   111 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 
za_imm:4  \
 
 LDR 111 100 0 00 .. 000 . 0 @ldstr
 STR 111 100 1 00 .. 000 . 0 @ldstr
+
+### SME Add Vector to Array
+
+   zad zn pm pn
+@adda_32 .. . . pm:3 pn:3 zn:5 ... zad:2
+@adda_64 .. . . pm:3 pn:3 zn:5 ..  zad:3
+
+ADDHA_s 1100 10 01000 0 ... ... . 000 ..@adda_32
+ADDVA_s 1100 10 01000 1 ... ... . 000 ..@adda_32
+ADDHA_d 1100 11 01000 0 ... ... . 00 ...@adda_64
+ADDVA_d 1100 11 01000 1 ... ... . 00 ...@adda_64
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
index 10fd1ed910..f1e924db74 100644
--- a/target/arm/sme_helper.c
+++ b/target/arm/sme_helper.c
@@ -828,3 +828,93 @@ DO_ST(q, _be, MO_128)
 DO_ST(q, _le, MO_128)
 
 #undef DO_ST
+
+void HELPER(sme_addha_s)(void *vzda, void *vzn, void *vpn,
+ void *vpm, uint32_t desc)
+{
+intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
+uint64_t *pn = vpn, *pm = vpm;
+uint32_t *zda = vzda, *zn = vzn;
+
+for (row = 0; row < oprsz; ) {
+uint64_t pa = pn[row >> 4];
+do {
+if (pa & 1) {
+for (col = 0; col < oprsz; ) {
+uint64_t pb = pm[col >> 4];
+do {
+if (pb & 1) {
+zda[tile_vslice_index(row) + H4(col)] += 
zn[H4(col)];
+}
+pb >>= 4;
+} while (++col & 15);
+}
+}
+pa >>= 4;
+} while (++row & 15);
+}
+}
+
+void HELPER(sme_addha_d)(void *vzda, void *vzn, void *vpn,
+ void *vpm, uint32_t desc)
+{
+intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
+uint8_t *pn = vpn, *pm = vpm;
+uint64_t *zda = vzda, *zn = vzn;
+
+for (row = 0; row < oprsz; ++row) {
+if (pn[H1(row)] & 1) {
+for (col = 0; col < oprsz; ++col) {
+if (pm[H1(col)] & 1) {
+zda[tile_vslice_index(row) + col] += zn[col];
+}
+}
+}
+}
+}
+
+void HELPER(sme_addva_s)(void *vzda, void *vzn, void *vpn,
+ void *vpm, uint32_t desc)
+{
+intptr_t row, col, oprsz = simd_oprsz(desc) / 4;
+uint64_t *pn = vpn, *pm = vpm;
+uint32_t *zda = vzda, *zn = vzn;
+
+for (row = 0; row < oprsz; ) {
+uint64_t pa = pn[row >> 4];
+do {
+if (pa & 1) {
+uint32_t zn_row = zn[H4(row)];
+for (col = 0; col < oprsz; ) {
+uint64_t pb = pm[col >> 4];
+do {
+if (pb & 1) {
+zda[tile_vslice_index(row) + H4(col)] += zn_row;
+}
+pb >>= 4;
+} while (++col & 15);
+}
+}
+pa >>= 4;
+} while (++row & 15);
+}
+}
+
+void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
+ void *vpm, uint32_t desc)
+{
+intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
+uint8_t *pn = vpn, *pm = vpm;
+uint64_t *zda = vzda, *zn = vzn;
+
+for (row = 0; row < oprsz; ++row) {
+if (pn[H1(row)] & 1) {
+uint64_t zn_row = zn[row];
+for (col = 0; col < oprsz; ++col) {
+if (pm[H1(col)] & 1) {
+zda[tile_vslice_index(row) + col] += zn_row;
+}
+   

[PATCH v6 21/45] target/arm: Export unpredicated ld/st from translate-sve.c

2022-07-08 Thread Richard Henderson
Add a TCGv_ptr base argument, which will be cpu_env for SVE.
We will reuse this for SME save and restore array insns.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.h |  3 +++
 target/arm/translate-sve.c | 48 --
 2 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index 2a7fe6e9e7..ad3762d1ac 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -195,4 +195,7 @@ void gen_gvec_xar(unsigned vece, uint32_t rd_ofs, uint32_t 
rn_ofs,
   uint32_t rm_ofs, int64_t shift,
   uint32_t opr_sz, uint32_t max_sz);
 
+void gen_sve_ldr(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int 
imm);
+void gen_sve_str(DisasContext *s, TCGv_ptr, int vofs, int len, int rn, int 
imm);
+
 #endif /* TARGET_ARM_TRANSLATE_A64_H */
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 95016e49e9..fd1a173637 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -4306,7 +4306,8 @@ TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
  * The load should begin at the address Rn + IMM.
  */
 
-static void do_ldr(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
+void gen_sve_ldr(DisasContext *s, TCGv_ptr base, int vofs,
+ int len, int rn, int imm)
 {
 int len_align = QEMU_ALIGN_DOWN(len, 8);
 int len_remain = len % 8;
@@ -4332,7 +4333,7 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int 
len, int rn, int imm)
 t0 = tcg_temp_new_i64();
 for (i = 0; i < len_align; i += 8) {
 tcg_gen_qemu_ld_i64(t0, clean_addr, midx, MO_LEUQ);
-tcg_gen_st_i64(t0, cpu_env, vofs + i);
+tcg_gen_st_i64(t0, base, vofs + i);
 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
 }
 tcg_temp_free_i64(t0);
@@ -4345,6 +4346,12 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int 
len, int rn, int imm)
 clean_addr = new_tmp_a64_local(s);
 tcg_gen_mov_i64(clean_addr, t0);
 
+if (base != cpu_env) {
+TCGv_ptr b = tcg_temp_local_new_ptr();
+tcg_gen_mov_ptr(b, base);
+base = b;
+}
+
 gen_set_label(loop);
 
 t0 = tcg_temp_new_i64();
@@ -4352,7 +4359,7 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int 
len, int rn, int imm)
 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
 
 tp = tcg_temp_new_ptr();
-tcg_gen_add_ptr(tp, cpu_env, i);
+tcg_gen_add_ptr(tp, base, i);
 tcg_gen_addi_ptr(i, i, 8);
 tcg_gen_st_i64(t0, tp, vofs);
 tcg_temp_free_ptr(tp);
@@ -4360,6 +4367,11 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int 
len, int rn, int imm)
 
 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
 tcg_temp_free_ptr(i);
+
+if (base != cpu_env) {
+tcg_temp_free_ptr(base);
+assert(len_remain == 0);
+}
 }
 
 /*
@@ -4388,13 +4400,14 @@ static void do_ldr(DisasContext *s, uint32_t vofs, int 
len, int rn, int imm)
 default:
 g_assert_not_reached();
 }
-tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
+tcg_gen_st_i64(t0, base, vofs + len_align);
 tcg_temp_free_i64(t0);
 }
 }
 
 /* Similarly for stores.  */
-static void do_str(DisasContext *s, uint32_t vofs, int len, int rn, int imm)
+void gen_sve_str(DisasContext *s, TCGv_ptr base, int vofs,
+ int len, int rn, int imm)
 {
 int len_align = QEMU_ALIGN_DOWN(len, 8);
 int len_remain = len % 8;
@@ -4420,7 +4433,7 @@ static void do_str(DisasContext *s, uint32_t vofs, int 
len, int rn, int imm)
 
 t0 = tcg_temp_new_i64();
 for (i = 0; i < len_align; i += 8) {
-tcg_gen_ld_i64(t0, cpu_env, vofs + i);
+tcg_gen_ld_i64(t0, base, vofs + i);
 tcg_gen_qemu_st_i64(t0, clean_addr, midx, MO_LEUQ);
 tcg_gen_addi_i64(clean_addr, clean_addr, 8);
 }
@@ -4434,11 +4447,17 @@ static void do_str(DisasContext *s, uint32_t vofs, int 
len, int rn, int imm)
 clean_addr = new_tmp_a64_local(s);
 tcg_gen_mov_i64(clean_addr, t0);
 
+if (base != cpu_env) {
+TCGv_ptr b = tcg_temp_local_new_ptr();
+tcg_gen_mov_ptr(b, base);
+base = b;
+}
+
 gen_set_label(loop);
 
 t0 = tcg_temp_new_i64();
 tp = tcg_temp_new_ptr();
-tcg_gen_add_ptr(tp, cpu_env, i);
+tcg_gen_add_ptr(tp, base, i);
 tcg_gen_ld_i64(t0, tp, vofs);
 tcg_gen_addi_ptr(i, i, 8);
 tcg_temp_free_ptr(tp);
@@ -4449,12 +4468,17 @@ static void do_str(DisasContext *s, uint32_t vofs, int 
len, int rn, int imm)
 
 tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
 tcg_temp_free_ptr(i);
+
+if (base != cpu_env) {
+

[PATCH v6 22/45] target/arm: Implement SME LDR, STR

2022-07-08 Thread Richard Henderson
We can reuse the SVE functions for LDR and STR, passing in the
base of the ZA vector and a zero offset.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme.decode  |  7 +++
 target/arm/translate-sme.c | 24 
 2 files changed, 31 insertions(+)

diff --git a/target/arm/sme.decode b/target/arm/sme.decode
index 900e3f2a07..f1ebd857a5 100644
--- a/target/arm/sme.decode
+++ b/target/arm/sme.decode
@@ -46,3 +46,10 @@ LDST1   111 0 esz:2 st:1 rm:5 v:1 .. pg:3 rn:5 0 
za_imm:4  \
  rs=%mova_rs
 LDST1   111 111 st:1 rm:5 v:1 .. pg:3 rn:5 0 za_imm:4  \
  esz=4 rs=%mova_rs
+
+  rv rn imm
+@ldstr  ... ... . .. .. ... rn:5 . imm:4 \
+ rv=%mova_rs
+
+LDR 111 100 0 00 .. 000 . 0 @ldstr
+STR 111 100 1 00 .. 000 . 0 @ldstr
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
index 42d14b883a..35c2644812 100644
--- a/target/arm/translate-sme.c
+++ b/target/arm/translate-sme.c
@@ -243,3 +243,27 @@ static bool trans_LDST1(DisasContext *s, arg_LDST1 *a)
 tcg_temp_free_i64(addr);
 return true;
 }
+
+typedef void GenLdStR(DisasContext *, TCGv_ptr, int, int, int, int);
+
+static bool do_ldst_r(DisasContext *s, arg_ldstr *a, GenLdStR *fn)
+{
+int svl = streaming_vec_reg_size(s);
+int imm = a->imm;
+TCGv_ptr base;
+
+if (!sme_za_enabled_check(s)) {
+return true;
+}
+
+/* ZA[n] equates to ZA0H.B[n]. */
+base = get_tile_rowcol(s, MO_8, a->rv, imm, false);
+
+fn(s, base, 0, svl, a->rn, imm * svl);
+
+tcg_temp_free_ptr(base);
+return true;
+}
+
+TRANS_FEAT(LDR, aa64_sme, do_ldst_r, a, gen_sve_ldr)
+TRANS_FEAT(STR, aa64_sme, do_ldst_r, a, gen_sve_str)
-- 
2.34.1




[PATCH v6 20/45] target/arm: Implement SME LD1, ST1

2022-07-08 Thread Richard Henderson
We cannot reuse the SVE functions for LD[1-4] and ST[1-4],
because those functions accept only a Zreg register number.
For SME, we want to pass a pointer into ZA storage.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sme.h|  82 +
 target/arm/sme.decode  |   9 +
 target/arm/sme_helper.c| 595 +
 target/arm/translate-sme.c |  70 +
 4 files changed, 756 insertions(+)

diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
index 154bc73d2e..95f6e88bdd 100644
--- a/target/arm/helper-sme.h
+++ b/target/arm/helper-sme.h
@@ -33,3 +33,85 @@ DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, 
ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sme_ld1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1s_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1d_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_ld1q_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+
+DEF_HELPER_FLAGS_5(sme_st1b_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1b_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1b_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_st1b_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+
+DEF_HELPER_FLAGS_5(sme_st1h_be_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_le_h, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_be_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_le_v, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, i32)
+DEF_HELPER_FLAGS_5(sme_st1h_be_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_st1h_le_h_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_st1h_be_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+DEF_HELPER_FLAGS_5(sme_st1h_le_v_mte, TCG_CALL_NO_WG, void, env, ptr, ptr, tl, 
i32)
+
+DEF_HELPER_FLAGS_5(sme_st1s_be_h, 

[PATCH v6 38/45] linux-user/aarch64: Verify extra record lock succeeded

2022-07-08 Thread Richard Henderson
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/signal.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 8fbe98d72f..9ff79da4be 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -340,6 +340,9 @@ static int target_restore_sigframe(CPUARMState *env,
 __get_user(extra_size,
&((struct target_extra_context *)ctx)->size);
 extra = lock_user(VERIFY_READ, extra_datap, extra_size, 0);
+if (!extra) {
+return 1;
+}
 break;
 
 default:
-- 
2.34.1




[PATCH v4 00/12] Improve reliability of VM tests

2022-07-08 Thread John Snow
Note: patches 10-12 are included for testing simplicity, they shouldn't
be merged. They will be included in a forthcoming block PR.

V4:

- Addressed concern by Marc-Andre in patch 01.
- Squashed Ubuntu patches (rth)

This patch series attempts to improve the reliability of several of the
VM test targets. In particular, both CentOS 8 tests are non-functional
because CentOS 8 was EOL at the beginning of this calendar year, with
repositories and mirrors going offline.

I also remove the ubuntu.i386 test because we no longer support Ubuntu
18.04 nor do we have explicit need of an i386 build test.

After this series, I am able to successfully run every VM target on an
x86_64 host, except:

- ubuntu.aarch64: Hangs often during testing, see below.
- centos.aarch64: Hangs often during testing, see below.
- haiku.x86_64: Build failures not addressed by this series, see
  https://lists.gnu.org/archive/html/qemu-devel/2022-06/msg02103.html

The unit tests that I see fail most often under aarch64 are:

- virtio-net-failover: Seems to like to hang on openbsd
- migration-test: Tends to hang under aarch64 tcg

Future work (next version? next series?);

- Try to get centos.aarch64 working reliably under TCG
- Upgrade ubuntu.aarch64 to 20.04 after fixing centos.aarch64
- Fix the Haiku build test, if possible.
- Ensure I can reliably run and pass "make vm-build-all".
  (Remove VMs from this recipe if necessary.)

John Snow (11):
  qga: treat get-guest-fsinfo as "best effort"
  tests/vm: use 'cp' instead of 'ln' for temporary vm images
  tests/vm: switch CentOS 8 to CentOS 8 Stream
  tests/vm: switch centos.aarch64 to CentOS 8 Stream
  tests/vm: upgrade Ubuntu 18.04 VM to 20.04
  tests/vm: remove ubuntu.i386 VM test
  tests/vm: remove duplicate 'centos' VM test
  tests/vm: add 1GB extra memory per core
  tests/vm: Remove docker cross-compile test from CentOS VM
  tests/qemu-iotests: hotfix for 307, 223 output
  tests/qemu-iotests: skip 108 when FUSE is not loaded

Vladimir Sementsov-Ogievskiy (1):
  iotests: fix copy-before-write for macOS and FreeBSD

 qga/commands-posix.c   |  10 +-
 tests/qemu-iotests/108 |   5 +
 tests/qemu-iotests/223.out |   4 +-
 tests/qemu-iotests/307.out |   4 +-
 tests/qemu-iotests/tests/copy-before-write |   5 +
 tests/vm/Makefile.include  |   5 +-
 tests/vm/basevm.py |   5 +
 tests/vm/centos|   9 +-
 tests/vm/centos.aarch64| 174 +++--
 tests/vm/ubuntu.aarch64|  10 +-
 tests/vm/ubuntu.i386   |  40 -
 11 files changed, 65 insertions(+), 206 deletions(-)
 delete mode 100755 tests/vm/ubuntu.i386

-- 
2.34.3





[PATCH v6 18/45] target/arm: Implement SME ZERO

2022-07-08 Thread Richard Henderson
Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
v4: Fix ZA[] comment in helper_sme_zero.
---
 target/arm/helper-sme.h|  2 ++
 target/arm/sme.decode  |  4 
 target/arm/sme_helper.c| 25 +
 target/arm/translate-sme.c | 13 +
 4 files changed, 44 insertions(+)

diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
index 3bd48c235f..c4ee1f09e4 100644
--- a/target/arm/helper-sme.h
+++ b/target/arm/helper-sme.h
@@ -19,3 +19,5 @@
 
 DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, env, i32)
 DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
+
+DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
index c25c031a71..6e4483fdce 100644
--- a/target/arm/sme.decode
+++ b/target/arm/sme.decode
@@ -18,3 +18,7 @@
 #
 # This file is processed by scripts/decodetree.py
 #
+
+### SME Misc
+
+ZERO1100 00 001 000 imm:8
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
index b215725594..eef2df73e1 100644
--- a/target/arm/sme_helper.c
+++ b/target/arm/sme_helper.c
@@ -59,3 +59,28 @@ void helper_set_pstate_za(CPUARMState *env, uint32_t i)
 memset(env->zarray, 0, sizeof(env->zarray));
 }
 }
+
+void helper_sme_zero(CPUARMState *env, uint32_t imm, uint32_t svl)
+{
+uint32_t i;
+
+/*
+ * Special case clearing the entire ZA space.
+ * This falls into the CONSTRAINED UNPREDICTABLE zeroing of any
+ * parts of the ZA storage outside of SVL.
+ */
+if (imm == 0xff) {
+memset(env->zarray, 0, sizeof(env->zarray));
+return;
+}
+
+/*
+ * Recall that ZAnH.D[m] is spread across ZA[n+8*m],
+ * so each row is discontiguous within ZA[].
+ */
+for (i = 0; i < svl; i++) {
+if (imm & (1 << (i % 8))) {
+memset(>zarray[i], 0, svl);
+}
+}
+}
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
index 786c93fb2d..971504559b 100644
--- a/target/arm/translate-sme.c
+++ b/target/arm/translate-sme.c
@@ -33,3 +33,16 @@
  */
 
 #include "decode-sme.c.inc"
+
+
+static bool trans_ZERO(DisasContext *s, arg_ZERO *a)
+{
+if (!dc_isar_feature(aa64_sme, s)) {
+return false;
+}
+if (sme_za_enabled_check(s)) {
+gen_helper_sme_zero(cpu_env, tcg_constant_i32(a->imm),
+tcg_constant_i32(streaming_vec_reg_size(s)));
+}
+return true;
+}
-- 
2.34.1




[PATCH v6 28/45] target/arm: Implement PSEL

2022-07-08 Thread Richard Henderson
This is an SVE instruction that operates using the SVE vector
length but that it is present only if SME is implemented.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sve.decode  | 20 +
 target/arm/translate-sve.c | 57 ++
 2 files changed, 77 insertions(+)

diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 95af08c139..966803cbb7 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1674,3 +1674,23 @@ BFMLALT_zzxw01100100 11 1 . 0100.1 . .   
  @rrxr_3a esz=2
 
 ### SVE2 floating-point bfloat16 dot-product (indexed)
 BFDOT_zzxz  01100100 01 1 . 01 . . @rrxr_2 esz=2
+
+### SVE broadcast predicate element
+
+   esz pd pn pm rv imm
+%psel_rv16:2 !function=plus_12
+%psel_imm_b 22:2 19:2
+%psel_imm_h 22:2 20:1
+%psel_imm_s 22:2
+%psel_imm_d 23:1
+@psel    .. . ... .. .. pn:4 . pm:4 . pd:4  \
+ rv=%psel_rv
+
+PSEL00100101 .. 1 ..1 .. 01  0  0   \
+@psel esz=0 imm=%psel_imm_b
+PSEL00100101 .. 1 .10 .. 01  0  0   \
+@psel esz=1 imm=%psel_imm_h
+PSEL00100101 .. 1 100 .. 01  0  0   \
+@psel esz=2 imm=%psel_imm_s
+PSEL00100101 .1 1 000 .. 01  0  0   \
+@psel esz=3 imm=%psel_imm_d
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index fd1a173637..24ffb69a2a 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -7419,3 +7419,60 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz 
*a, bool sel)
 
 TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
 TRANS_FEAT(BFMLALT_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, true)
+
+static bool trans_PSEL(DisasContext *s, arg_psel *a)
+{
+int vl = vec_full_reg_size(s);
+int pl = pred_gvec_reg_size(s);
+int elements = vl >> a->esz;
+TCGv_i64 tmp, didx, dbit;
+TCGv_ptr ptr;
+
+if (!dc_isar_feature(aa64_sme, s)) {
+return false;
+}
+if (!sve_access_check(s)) {
+return true;
+}
+
+tmp = tcg_temp_new_i64();
+dbit = tcg_temp_new_i64();
+didx = tcg_temp_new_i64();
+ptr = tcg_temp_new_ptr();
+
+/* Compute the predicate element. */
+tcg_gen_addi_i64(tmp, cpu_reg(s, a->rv), a->imm);
+if (is_power_of_2(elements)) {
+tcg_gen_andi_i64(tmp, tmp, elements - 1);
+} else {
+tcg_gen_remu_i64(tmp, tmp, tcg_constant_i64(elements));
+}
+
+/* Extract the predicate byte and bit indices. */
+tcg_gen_shli_i64(tmp, tmp, a->esz);
+tcg_gen_andi_i64(dbit, tmp, 7);
+tcg_gen_shri_i64(didx, tmp, 3);
+if (HOST_BIG_ENDIAN) {
+tcg_gen_xori_i64(didx, didx, 7);
+}
+
+/* Load the predicate word. */
+tcg_gen_trunc_i64_ptr(ptr, didx);
+tcg_gen_add_ptr(ptr, ptr, cpu_env);
+tcg_gen_ld8u_i64(tmp, ptr, pred_full_reg_offset(s, a->pm));
+
+/* Extract the predicate bit and replicate to MO_64. */
+tcg_gen_shr_i64(tmp, tmp, dbit);
+tcg_gen_andi_i64(tmp, tmp, 1);
+tcg_gen_neg_i64(tmp, tmp);
+
+/* Apply to either copy the source, or write zeros. */
+tcg_gen_gvec_ands(MO_64, pred_full_reg_offset(s, a->pd),
+  pred_full_reg_offset(s, a->pn), tmp, pl, pl);
+
+tcg_temp_free_i64(tmp);
+tcg_temp_free_i64(dbit);
+tcg_temp_free_i64(didx);
+tcg_temp_free_ptr(ptr);
+return true;
+}
-- 
2.34.1




[PATCH v6 19/45] target/arm: Implement SME MOVA

2022-07-08 Thread Richard Henderson
We can reuse the SVE functions for implementing moves to/from
horizontal tile slices, but we need new ones for moves to/from
vertical tile slices.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sme.h|  12 +++
 target/arm/helper-sve.h|   2 +
 target/arm/translate-a64.h |   8 ++
 target/arm/translate.h |   5 ++
 target/arm/sme.decode  |  15 
 target/arm/sme_helper.c| 151 -
 target/arm/sve_helper.c|  12 +++
 target/arm/translate-sme.c | 127 +++
 8 files changed, 331 insertions(+), 1 deletion(-)

diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
index c4ee1f09e4..154bc73d2e 100644
--- a/target/arm/helper-sme.h
+++ b/target/arm/helper-sme.h
@@ -21,3 +21,15 @@ DEF_HELPER_FLAGS_2(set_pstate_sm, TCG_CALL_NO_RWG, void, 
env, i32)
 DEF_HELPER_FLAGS_2(set_pstate_za, TCG_CALL_NO_RWG, void, env, i32)
 
 DEF_HELPER_FLAGS_3(sme_zero, TCG_CALL_NO_RWG, void, env, i32, i32)
+
+/* Move to/from vertical array slices, i.e. columns, so 'c'.  */
+DEF_HELPER_FLAGS_4(sme_mova_cz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_zc_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_cz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_zc_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_cz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_zc_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_cz_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_zc_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_cz_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(sme_mova_zc_q, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index dc629f851a..ab0333400f 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -325,6 +325,8 @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve_sel_zpzz_q, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, i32)
 
 DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index 099d3d11d6..2a7fe6e9e7 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -178,6 +178,14 @@ static inline int pred_gvec_reg_size(DisasContext *s)
 return size_for_gvec(pred_full_reg_size(s));
 }
 
+/* Return a newly allocated pointer to the predicate register.  */
+static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
+{
+TCGv_ptr ret = tcg_temp_new_ptr();
+tcg_gen_addi_ptr(ret, cpu_env, pred_full_reg_offset(s, regno));
+return ret;
+}
+
 bool disas_sve(DisasContext *, uint32_t);
 bool disas_sme(DisasContext *, uint32_t);
 
diff --git a/target/arm/translate.h b/target/arm/translate.h
index e2e619dab2..af5d4a7086 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -156,6 +156,11 @@ static inline int plus_2(DisasContext *s, int x)
 return x + 2;
 }
 
+static inline int plus_12(DisasContext *s, int x)
+{
+return x + 12;
+}
+
 static inline int times_2(DisasContext *s, int x)
 {
 return x * 2;
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
index 6e4483fdce..241b4895b7 100644
--- a/target/arm/sme.decode
+++ b/target/arm/sme.decode
@@ -22,3 +22,18 @@
 ### SME Misc
 
 ZERO1100 00 001 000 imm:8
+
+### SME Move into/from Array
+
+%mova_rs13:2 !function=plus_12
+   esz rs pg zr za_imm v:bool to_vec:bool
+
+MOVA1100 esz:2 0 0 v:1 .. pg:3 zr:5 0 za_imm:4  \
+ to_vec=0 rs=%mova_rs
+MOVA1100 110 1 v:1 .. pg:3 zr:5 0 za_imm:4  \
+ to_vec=0 rs=%mova_rs esz=4
+
+MOVA1100 esz:2 1 0 v:1 .. pg:3 0 za_imm:4 zr:5  \
+ to_vec=1 rs=%mova_rs
+MOVA1100 111 1 v:1 .. pg:3 0 za_imm:4 zr:5  \
+ to_vec=1 rs=%mova_rs esz=4
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
index eef2df73e1..e88244423d 100644
--- a/target/arm/sme_helper.c
+++ b/target/arm/sme_helper.c
@@ -19,8 +19,10 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
-#include "internals.h"
+#include "tcg/tcg-gvec-desc.h"
 #include "exec/helper-proto.h"
+#include "qemu/int128.h"
+#include "vec_internal.h"
 
 /* ResetSVEState */
 void arm_reset_sve_state(CPUARMState *env)
@@ -84,3 +86,150 @@ void helper_sme_zero(CPUARMState *env, uint32_t imm, 
uint32_t svl)
 }
 }
 }
+
+
+/*
+ * When considering the ZA storage as an array of elements of
+ * type T, the index within that array of the 

[PATCH v6 42/45] linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL

2022-07-08 Thread Richard Henderson
These prctl set the Streaming SVE vector length, which may
be completely different from the Normal SVE vector length.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/target_prctl.h | 54 +++
 linux-user/syscall.c  | 16 +
 2 files changed, 70 insertions(+)

diff --git a/linux-user/aarch64/target_prctl.h 
b/linux-user/aarch64/target_prctl.h
index 40481e6663..907c314146 100644
--- a/linux-user/aarch64/target_prctl.h
+++ b/linux-user/aarch64/target_prctl.h
@@ -10,6 +10,7 @@ static abi_long do_prctl_sve_get_vl(CPUArchState *env)
 {
 ARMCPU *cpu = env_archcpu(env);
 if (cpu_isar_feature(aa64_sve, cpu)) {
+/* PSTATE.SM is always unset on syscall entry. */
 return sve_vq(env) * 16;
 }
 return -TARGET_EINVAL;
@@ -27,6 +28,7 @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, 
abi_long arg2)
 && arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
 uint32_t vq, old_vq;
 
+/* PSTATE.SM is always unset on syscall entry. */
 old_vq = sve_vq(env);
 
 /*
@@ -49,6 +51,58 @@ static abi_long do_prctl_sve_set_vl(CPUArchState *env, 
abi_long arg2)
 }
 #define do_prctl_sve_set_vl do_prctl_sve_set_vl
 
+static abi_long do_prctl_sme_get_vl(CPUArchState *env)
+{
+ARMCPU *cpu = env_archcpu(env);
+if (cpu_isar_feature(aa64_sme, cpu)) {
+return sme_vq(env) * 16;
+}
+return -TARGET_EINVAL;
+}
+#define do_prctl_sme_get_vl do_prctl_sme_get_vl
+
+static abi_long do_prctl_sme_set_vl(CPUArchState *env, abi_long arg2)
+{
+/*
+ * We cannot support either PR_SME_SET_VL_ONEXEC or PR_SME_VL_INHERIT.
+ * Note the kernel definition of sve_vl_valid allows for VQ=512,
+ * i.e. VL=8192, even though the architectural maximum is VQ=16.
+ */
+if (cpu_isar_feature(aa64_sme, env_archcpu(env))
+&& arg2 >= 0 && arg2 <= 512 * 16 && !(arg2 & 15)) {
+int vq, old_vq;
+
+old_vq = sme_vq(env);
+
+/*
+ * Bound the value of vq, so that we know that it fits into
+ * the 4-bit field in SMCR_EL1.  Because PSTATE.SM is cleared
+ * on syscall entry, we are not modifying the current SVE
+ * vector length.
+ */
+vq = MAX(arg2 / 16, 1);
+vq = MIN(vq, 16);
+env->vfp.smcr_el[1] =
+FIELD_DP64(env->vfp.smcr_el[1], SMCR, LEN, vq - 1);
+
+/* Delay rebuilding hflags until we know if ZA must change. */
+vq = sve_vqm1_for_el_sm(env, 0, true) + 1;
+
+if (vq != old_vq) {
+/*
+ * PSTATE.ZA state is cleared on any change to SVL.
+ * We need not call arm_rebuild_hflags because PSTATE.SM was
+ * cleared on syscall entry, so this hasn't changed VL.
+ */
+env->svcr = FIELD_DP64(env->svcr, SVCR, ZA, 0);
+arm_rebuild_hflags(env);
+}
+return vq * 16;
+}
+return -TARGET_EINVAL;
+}
+#define do_prctl_sme_set_vl do_prctl_sme_set_vl
+
 static abi_long do_prctl_reset_keys(CPUArchState *env, abi_long arg2)
 {
 ARMCPU *cpu = env_archcpu(env);
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index cbde82c907..991b85e6b4 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -6343,6 +6343,12 @@ abi_long do_arch_prctl(CPUX86State *env, int code, 
abi_ulong addr)
 #ifndef PR_SET_SYSCALL_USER_DISPATCH
 # define PR_SET_SYSCALL_USER_DISPATCH 59
 #endif
+#ifndef PR_SME_SET_VL
+# define PR_SME_SET_VL  63
+# define PR_SME_GET_VL  64
+# define PR_SME_VL_LEN_MASK  0x
+# define PR_SME_VL_INHERIT   (1 << 17)
+#endif
 
 #include "target_prctl.h"
 
@@ -6383,6 +6389,12 @@ static abi_long do_prctl_inval1(CPUArchState *env, 
abi_long arg2)
 #ifndef do_prctl_set_unalign
 #define do_prctl_set_unalign do_prctl_inval1
 #endif
+#ifndef do_prctl_sme_get_vl
+#define do_prctl_sme_get_vl do_prctl_inval0
+#endif
+#ifndef do_prctl_sme_set_vl
+#define do_prctl_sme_set_vl do_prctl_inval1
+#endif
 
 static abi_long do_prctl(CPUArchState *env, abi_long option, abi_long arg2,
  abi_long arg3, abi_long arg4, abi_long arg5)
@@ -6434,6 +6446,10 @@ static abi_long do_prctl(CPUArchState *env, abi_long 
option, abi_long arg2,
 return do_prctl_sve_get_vl(env);
 case PR_SVE_SET_VL:
 return do_prctl_sve_set_vl(env, arg2);
+case PR_SME_GET_VL:
+return do_prctl_sme_get_vl(env);
+case PR_SME_SET_VL:
+return do_prctl_sme_set_vl(env, arg2);
 case PR_PAC_RESET_KEYS:
 if (arg3 || arg4 || arg5) {
 return -TARGET_EINVAL;
-- 
2.34.1




[PATCH v6 17/45] target/arm: Implement SME RDSVL, ADDSVL, ADDSPL

2022-07-08 Thread Richard Henderson
These SME instructions are nominally within the SVE decode space,
so we add them to sve.decode and translate-sve.c.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
v4: Add streaming_{vec,pred}_reg_size.
---
 target/arm/translate-a64.h | 12 
 target/arm/sve.decode  |  5 -
 target/arm/translate-sve.c | 38 ++
 3 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index 02fb95e019..099d3d11d6 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -128,6 +128,12 @@ static inline int vec_full_reg_size(DisasContext *s)
 return s->vl;
 }
 
+/* Return the byte size of the vector register, SVL / 8. */
+static inline int streaming_vec_reg_size(DisasContext *s)
+{
+return s->svl;
+}
+
 /*
  * Return the offset info CPUARMState of the predicate vector register Pn.
  * Note for this purpose, FFR is P16.
@@ -143,6 +149,12 @@ static inline int pred_full_reg_size(DisasContext *s)
 return s->vl >> 3;
 }
 
+/* Return the byte size of the predicate register, SVL / 64.  */
+static inline int streaming_pred_reg_size(DisasContext *s)
+{
+return s->svl >> 3;
+}
+
 /*
  * Round up the size of a register to a size allowed by
  * the tcg vector infrastructure.  Any operation which uses this
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 908643d7d9..95af08c139 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -449,14 +449,17 @@ INDEX_ri0100 esz:2 1 imm:s5 010001 rn:5 rd:5
 # SVE index generation (register start, register increment)
 INDEX_rr0100 .. 1 . 010011 . .  @rd_rn_rm
 
-### SVE Stack Allocation Group
+### SVE / Streaming SVE Stack Allocation Group
 
 # SVE stack frame adjustment
 ADDVL   0100 001 . 01010 .. .   @rd_rn_i6
+ADDSVL  0100 001 . 01011 .. .   @rd_rn_i6
 ADDPL   0100 011 . 01010 .. .   @rd_rn_i6
+ADDSPL  0100 011 . 01011 .. .   @rd_rn_i6
 
 # SVE stack frame size
 RDVL0100 101 1 01010 imm:s6 rd:5
+RDSVL   0100 101 1 01011 imm:s6 rd:5
 
 ### SVE Bitwise Shift - Unpredicated Group
 
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 96e934c1ea..95016e49e9 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -1286,6 +1286,19 @@ static bool trans_ADDVL(DisasContext *s, arg_ADDVL *a)
 return true;
 }
 
+static bool trans_ADDSVL(DisasContext *s, arg_ADDSVL *a)
+{
+if (!dc_isar_feature(aa64_sme, s)) {
+return false;
+}
+if (sme_enabled_check(s)) {
+TCGv_i64 rd = cpu_reg_sp(s, a->rd);
+TCGv_i64 rn = cpu_reg_sp(s, a->rn);
+tcg_gen_addi_i64(rd, rn, a->imm * streaming_vec_reg_size(s));
+}
+return true;
+}
+
 static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 {
 if (!dc_isar_feature(aa64_sve, s)) {
@@ -1299,6 +1312,19 @@ static bool trans_ADDPL(DisasContext *s, arg_ADDPL *a)
 return true;
 }
 
+static bool trans_ADDSPL(DisasContext *s, arg_ADDSPL *a)
+{
+if (!dc_isar_feature(aa64_sme, s)) {
+return false;
+}
+if (sme_enabled_check(s)) {
+TCGv_i64 rd = cpu_reg_sp(s, a->rd);
+TCGv_i64 rn = cpu_reg_sp(s, a->rn);
+tcg_gen_addi_i64(rd, rn, a->imm * streaming_pred_reg_size(s));
+}
+return true;
+}
+
 static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 {
 if (!dc_isar_feature(aa64_sve, s)) {
@@ -1311,6 +1337,18 @@ static bool trans_RDVL(DisasContext *s, arg_RDVL *a)
 return true;
 }
 
+static bool trans_RDSVL(DisasContext *s, arg_RDSVL *a)
+{
+if (!dc_isar_feature(aa64_sme, s)) {
+return false;
+}
+if (sme_enabled_check(s)) {
+TCGv_i64 reg = cpu_reg(s, a->rd);
+tcg_gen_movi_i64(reg, a->imm * streaming_vec_reg_size(s));
+}
+return true;
+}
+
 /*
  *** SVE Compute Vector Address Group
  */
-- 
2.34.1




[PATCH v6 27/45] target/arm: Implement SME integer outer product

2022-07-08 Thread Richard Henderson
This is SMOPA, SUMOPA, USMOPA_s, UMOPA, for both Int8 and Int16.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sme.h| 16 
 target/arm/sme.decode  | 10 +
 target/arm/sme_helper.c| 82 ++
 target/arm/translate-sme.c | 10 +
 4 files changed, 118 insertions(+)

diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
index 4d5d05db3a..d2d544a696 100644
--- a/target/arm/helper-sme.h
+++ b/target/arm/helper-sme.h
@@ -129,3 +129,19 @@ DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_smopa_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_umopa_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_sumopa_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_usmopa_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_smopa_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_umopa_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_sumopa_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_usmopa_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
index e8d27fd8a0..628804e37a 100644
--- a/target/arm/sme.decode
+++ b/target/arm/sme.decode
@@ -76,3 +76,13 @@ FMOPA_d 1000 110 . ... ... . . 0 ... 
   @op_64
 
 BFMOPA  1001 100 . ... ... . . 00 ..@op_32
 FMOPA_h 1001 101 . ... ... . . 00 ..@op_32
+
+SMOPA_s 101 0 10 0 . ... ... . . 00 ..  @op_32
+SUMOPA_s101 0 10 1 . ... ... . . 00 ..  @op_32
+USMOPA_s101 1 10 0 . ... ... . . 00 ..  @op_32
+UMOPA_s 101 1 10 1 . ... ... . . 00 ..  @op_32
+
+SMOPA_d 101 0 11 0 . ... ... . . 0 ...  @op_64
+SUMOPA_d101 0 11 1 . ... ... . . 0 ...  @op_64
+USMOPA_d101 1 11 0 . ... ... . . 0 ...  @op_64
+UMOPA_d 101 1 11 1 . ... ... . . 0 ...  @op_64
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
index 302f89c30b..f891306bb9 100644
--- a/target/arm/sme_helper.c
+++ b/target/arm/sme_helper.c
@@ -1117,3 +1117,85 @@ void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, 
void *vpn,
 } while (row & 15);
 }
 }
+
+typedef uint64_t IMOPFn(uint64_t, uint64_t, uint64_t, uint8_t, bool);
+
+static inline void do_imopa(uint64_t *za, uint64_t *zn, uint64_t *zm,
+uint8_t *pn, uint8_t *pm,
+uint32_t desc, IMOPFn *fn)
+{
+intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
+bool neg = simd_data(desc);
+
+for (row = 0; row < oprsz; ++row) {
+uint8_t pa = pn[H1(row)];
+uint64_t *za_row = [tile_vslice_index(row)];
+uint64_t n = zn[row];
+
+for (col = 0; col < oprsz; ++col) {
+uint8_t pb = pm[H1(col)];
+uint64_t *a = _row[col];
+
+*a = fn(n, zm[col], *a, pa & pb, neg);
+}
+}
+}
+
+#define DEF_IMOP_32(NAME, NTYPE, MTYPE) \
+static uint64_t NAME(uint64_t n, uint64_t m, uint64_t a, uint8_t p, bool neg) \
+{   \
+uint32_t sum0 = 0, sum1 = 0;\
+/* Apply P to N as a mask, making the inactive elements 0. */   \
+n &= expand_pred_b(p);  \
+sum0 += (NTYPE)(n >> 0) * (MTYPE)(m >> 0);  \
+sum0 += (NTYPE)(n >> 8) * (MTYPE)(m >> 8);  \
+sum0 += (NTYPE)(n >> 16) * (MTYPE)(m >> 16);\
+sum0 += (NTYPE)(n >> 24) * (MTYPE)(m >> 24);\
+sum1 += (NTYPE)(n >> 32) * (MTYPE)(m >> 32);\
+sum1 += (NTYPE)(n >> 40) * (MTYPE)(m >> 40);\
+sum1 += (NTYPE)(n >> 48) * (MTYPE)(m >> 48);\
+sum1 += (NTYPE)(n >> 56) * (MTYPE)(m >> 56);\
+if (neg) {  \
+sum0 = (uint32_t)a - sum0, sum1 = (uint32_t)(a >> 32) - sum1;   \
+} else {\
+sum0 = (uint32_t)a + sum0, sum1 = (uint32_t)(a >> 32) + sum1;   \
+}

[PATCH v6 15/45] target/arm: Add SME enablement checks

2022-07-08 Thread Richard Henderson
These functions will be used to verify that the cpu
is in the correct state for a given instruction.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.h | 21 +
 target/arm/translate-a64.c | 34 ++
 2 files changed, 55 insertions(+)

diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index 789b6e8e78..02fb95e019 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -29,6 +29,27 @@ void write_fp_dreg(DisasContext *s, int reg, TCGv_i64 v);
 bool logic_imm_decode_wmask(uint64_t *result, unsigned int immn,
 unsigned int imms, unsigned int immr);
 bool sve_access_check(DisasContext *s);
+bool sme_enabled_check(DisasContext *s);
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned);
+
+/* This function corresponds to CheckStreamingSVEEnabled. */
+static inline bool sme_sm_enabled_check(DisasContext *s)
+{
+return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK);
+}
+
+/* This function corresponds to CheckSMEAndZAEnabled. */
+static inline bool sme_za_enabled_check(DisasContext *s)
+{
+return sme_enabled_check_with_svcr(s, R_SVCR_ZA_MASK);
+}
+
+/* Note that this function corresponds to CheckStreamingSVEAndZAEnabled. */
+static inline bool sme_smza_enabled_check(DisasContext *s)
+{
+return sme_enabled_check_with_svcr(s, R_SVCR_SM_MASK | R_SVCR_ZA_MASK);
+}
+
 TCGv_i64 clean_data_tbi(DisasContext *s, TCGv_i64 addr);
 TCGv_i64 gen_mte_check1(DisasContext *s, TCGv_i64 addr, bool is_write,
 bool tag_checked, int log2_size);
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 7fab7f64f8..b16d81bf19 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1216,6 +1216,40 @@ static bool sme_access_check(DisasContext *s)
 return true;
 }
 
+/* This function corresponds to CheckSMEEnabled. */
+bool sme_enabled_check(DisasContext *s)
+{
+/*
+ * Note that unlike sve_excp_el, we have not constrained sme_excp_el
+ * to be zero when fp_excp_el has priority.  This is because we need
+ * sme_excp_el by itself for cpregs access checks.
+ */
+if (!s->fp_excp_el || s->sme_excp_el < s->fp_excp_el) {
+s->fp_access_checked = true;
+return sme_access_check(s);
+}
+return fp_access_check_only(s);
+}
+
+/* Common subroutine for CheckSMEAnd*Enabled. */
+bool sme_enabled_check_with_svcr(DisasContext *s, unsigned req)
+{
+if (!sme_enabled_check(s)) {
+return false;
+}
+if (FIELD_EX64(req, SVCR, SM) && !s->pstate_sm) {
+gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+   syn_smetrap(SME_ET_NotStreaming, false));
+return false;
+}
+if (FIELD_EX64(req, SVCR, ZA) && !s->pstate_za) {
+gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
+   syn_smetrap(SME_ET_InactiveZA, false));
+return false;
+}
+return true;
+}
+
 /*
  * This utility function is for doing register extension with an
  * optional shift. You will likely want to pass a temporary for the
-- 
2.34.1




[PATCH v6 24/45] target/arm: Implement FMOPA, FMOPS (non-widening)

2022-07-08 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sme.h|  5 +++
 target/arm/sme.decode  |  9 +
 target/arm/sme_helper.c| 69 ++
 target/arm/translate-sme.c | 32 ++
 4 files changed, 115 insertions(+)

diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
index 753e9e624c..f50d0fe1d6 100644
--- a/target/arm/helper-sme.h
+++ b/target/arm/helper-sme.h
@@ -120,3 +120,8 @@ DEF_HELPER_FLAGS_5(sme_addha_s, TCG_CALL_NO_RWG, void, ptr, 
ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sme_addva_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
index 8cb6c4053c..ba4774d174 100644
--- a/target/arm/sme.decode
+++ b/target/arm/sme.decode
@@ -64,3 +64,12 @@ ADDHA_s 1100 10 01000 0 ... ... . 000 .. 
   @adda_32
 ADDVA_s 1100 10 01000 1 ... ... . 000 ..@adda_32
 ADDHA_d 1100 11 01000 0 ... ... . 00 ...@adda_64
 ADDVA_d 1100 11 01000 1 ... ... . 00 ...@adda_64
+
+### SME Outer Product
+
+ zad zn zm pm pn sub:bool
+@op_32   ... zm:5 pm:3 pn:3 zn:5 sub:1 .. zad:2 
+@op_64   ... zm:5 pm:3 pn:3 zn:5 sub:1 .  zad:3 
+
+FMOPA_s 1000 100 . ... ... . . 00 ..@op_32
+FMOPA_d 1000 110 . ... ... . . 0 ...@op_64
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
index f1e924db74..7dc76b6a1c 100644
--- a/target/arm/sme_helper.c
+++ b/target/arm/sme_helper.c
@@ -25,6 +25,7 @@
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
 #include "qemu/int128.h"
+#include "fpu/softfloat.h"
 #include "vec_internal.h"
 #include "sve_ldst_internal.h"
 
@@ -918,3 +919,71 @@ void HELPER(sme_addva_d)(void *vzda, void *vzn, void *vpn,
 }
 }
 }
+
+void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
+ void *vpm, void *vst, uint32_t desc)
+{
+intptr_t row, col, oprsz = simd_maxsz(desc);
+uint32_t neg = simd_data(desc) << 31;
+uint16_t *pn = vpn, *pm = vpm;
+float_status fpst;
+
+/*
+ * Make a copy of float_status because this operation does not
+ * update the cumulative fp exception status.  It also produces
+ * default nans.
+ */
+fpst = *(float_status *)vst;
+set_default_nan_mode(true, );
+
+for (row = 0; row < oprsz; ) {
+uint16_t pa = pn[H2(row >> 4)];
+do {
+if (pa & 1) {
+void *vza_row = vza + tile_vslice_offset(row);
+uint32_t n = *(uint32_t *)(vzn + H1_4(row)) ^ neg;
+
+for (col = 0; col < oprsz; ) {
+uint16_t pb = pm[H2(col >> 4)];
+do {
+if (pb & 1) {
+uint32_t *a = vza_row + H1_4(col);
+uint32_t *m = vzm + H1_4(col);
+*a = float32_muladd(n, *m, *a, 0, vst);
+}
+col += 4;
+pb >>= 4;
+} while (col & 15);
+}
+}
+row += 4;
+pa >>= 4;
+} while (row & 15);
+}
+}
+
+void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, void *vpn,
+ void *vpm, void *vst, uint32_t desc)
+{
+intptr_t row, col, oprsz = simd_oprsz(desc) / 8;
+uint64_t neg = (uint64_t)simd_data(desc) << 63;
+uint64_t *za = vza, *zn = vzn, *zm = vzm;
+uint8_t *pn = vpn, *pm = vpm;
+float_status fpst = *(float_status *)vst;
+
+set_default_nan_mode(true, );
+
+for (row = 0; row < oprsz; ++row) {
+if (pn[H1(row)] & 1) {
+uint64_t *za_row = [tile_vslice_index(row)];
+uint64_t n = zn[row] ^ neg;
+
+for (col = 0; col < oprsz; ++col) {
+if (pm[H1(col)] & 1) {
+uint64_t *a = _row[col];
+*a = float64_muladd(n, zm[col], *a, 0, );
+}
+}
+}
+}
+}
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
index d3b9cdd5c4..fa8f343a7d 100644
--- a/target/arm/translate-sme.c
+++ b/target/arm/translate-sme.c
@@ -298,3 +298,35 @@ TRANS_FEAT(ADDHA_s, aa64_sme, do_adda, a, MO_32, 
gen_helper_sme_addha_s)
 TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, gen_helper_sme_addva_s)
 TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
 TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, 

[PATCH v6 16/45] target/arm: Handle SME in sve_access_check

2022-07-08 Thread Richard Henderson
The pseudocode for CheckSVEEnabled gains a check for Streaming
SVE mode, and for SME present but SVE absent.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.c | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index b16d81bf19..b7b64f7358 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1183,21 +1183,31 @@ static bool fp_access_check(DisasContext *s)
 return true;
 }
 
-/* Check that SVE access is enabled.  If it is, return true.
+/*
+ * Check that SVE access is enabled.  If it is, return true.
  * If not, emit code to generate an appropriate exception and return false.
+ * This function corresponds to CheckSVEEnabled().
  */
 bool sve_access_check(DisasContext *s)
 {
-if (s->sve_excp_el) {
-assert(!s->sve_access_checked);
-s->sve_access_checked = true;
-
+if (s->pstate_sm || !dc_isar_feature(aa64_sve, s)) {
+assert(dc_isar_feature(aa64_sme, s));
+if (!sme_sm_enabled_check(s)) {
+goto fail_exit;
+}
+} else if (s->sve_excp_el) {
 gen_exception_insn_el(s, s->pc_curr, EXCP_UDEF,
   syn_sve_access_trap(), s->sve_excp_el);
-return false;
+goto fail_exit;
 }
 s->sve_access_checked = true;
 return fp_access_check(s);
+
+ fail_exit:
+/* Assert that we only raise one exception per instruction. */
+assert(!s->sve_access_checked);
+s->sve_access_checked = true;
+return false;
 }
 
 /*
-- 
2.34.1




[PATCH v6 36/45] linux-user/aarch64: Tidy target_restore_sigframe error return

2022-07-08 Thread Richard Henderson
Fold the return value setting into the goto, so each
point of failure need not do both.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 linux-user/aarch64/signal.c | 26 +++---
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/linux-user/aarch64/signal.c b/linux-user/aarch64/signal.c
index 3cef2f44cf..8b352abb97 100644
--- a/linux-user/aarch64/signal.c
+++ b/linux-user/aarch64/signal.c
@@ -287,7 +287,6 @@ static int target_restore_sigframe(CPUARMState *env,
 struct target_sve_context *sve = NULL;
 uint64_t extra_datap = 0;
 bool used_extra = false;
-bool err = false;
 int vq = 0, sve_size = 0;
 
 target_restore_general_frame(env, sf);
@@ -301,8 +300,7 @@ static int target_restore_sigframe(CPUARMState *env,
 switch (magic) {
 case 0:
 if (size != 0) {
-err = true;
-goto exit;
+goto err;
 }
 if (used_extra) {
 ctx = NULL;
@@ -314,8 +312,7 @@ static int target_restore_sigframe(CPUARMState *env,
 
 case TARGET_FPSIMD_MAGIC:
 if (fpsimd || size != sizeof(struct target_fpsimd_context)) {
-err = true;
-goto exit;
+goto err;
 }
 fpsimd = (struct target_fpsimd_context *)ctx;
 break;
@@ -329,13 +326,11 @@ static int target_restore_sigframe(CPUARMState *env,
 break;
 }
 }
-err = true;
-goto exit;
+goto err;
 
 case TARGET_EXTRA_MAGIC:
 if (extra || size != sizeof(struct target_extra_context)) {
-err = true;
-goto exit;
+goto err;
 }
 __get_user(extra_datap,
&((struct target_extra_context *)ctx)->datap);
@@ -348,8 +343,7 @@ static int target_restore_sigframe(CPUARMState *env,
 /* Unknown record -- we certainly didn't generate it.
  * Did we in fact get out of sync?
  */
-err = true;
-goto exit;
+goto err;
 }
 ctx = (void *)ctx + size;
 }
@@ -358,17 +352,19 @@ static int target_restore_sigframe(CPUARMState *env,
 if (fpsimd) {
 target_restore_fpsimd_record(env, fpsimd);
 } else {
-err = true;
+goto err;
 }
 
 /* SVE data, if present, overwrites FPSIMD data.  */
 if (sve) {
 target_restore_sve_record(env, sve, vq);
 }
-
- exit:
 unlock_user(extra, extra_datap, 0);
-return err;
+return 0;
+
+ err:
+unlock_user(extra, extra_datap, 0);
+return 1;
 }
 
 static abi_ulong get_sigframe(struct target_sigaction *ka,
-- 
2.34.1




[PATCH v6 13/45] target/arm: Mark LDFF1 and LDNF1 as non-streaming

2022-07-08 Thread Richard Henderson
Mark these as a non-streaming instructions, which should trap
if full a64 support is not enabled in streaming mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme-fa64.decode | 2 --
 target/arm/translate-sve.c | 2 ++
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index 7d4c33fb5b..2b5432bf85 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -59,7 +59,5 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 #   --11 1100 --1-     --10   # Load/store FP register 
(register offset)
 #   --11 1101         # Load/store FP register 
(scaled imm)
 
-FAIL1010 010-   011-      # SVE contiguous FF load 
(scalar+scalar)
-FAIL1010 010- ---1  101-      # SVE contiguous NF load 
(scalar+imm)
 FAIL1010 010- -01-  000-      # SVE load & replicate 32 
bytes (scalar+scalar)
 FAIL1010 010- -010  001-      # SVE load & replicate 32 
bytes (scalar+imm)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index bbf3bf2119..5182ee4c06 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -4805,6 +4805,7 @@ static bool trans_LDFF1_zprr(DisasContext *s, 
arg_rprr_load *a)
 if (!dc_isar_feature(aa64_sve, s)) {
 return false;
 }
+s->is_nonstreaming = true;
 if (sve_access_check(s)) {
 TCGv_i64 addr = new_tmp_a64(s);
 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
@@ -4906,6 +4907,7 @@ static bool trans_LDNF1_zpri(DisasContext *s, 
arg_rpri_load *a)
 if (!dc_isar_feature(aa64_sve, s)) {
 return false;
 }
+s->is_nonstreaming = true;
 if (sve_access_check(s)) {
 int vsz = vec_full_reg_size(s);
 int elements = vsz >> dtype_esz[a->dtype];
-- 
2.34.1




[PATCH v6 14/45] target/arm: Mark LD1RO as non-streaming

2022-07-08 Thread Richard Henderson
Mark these as a non-streaming instructions, which should trap
if full a64 support is not enabled in streaming mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme-fa64.decode | 3 ---
 target/arm/translate-sve.c | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index 2b5432bf85..47708ccc8d 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -58,6 +58,3 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 #   --11 1100 --0-        # Load/store FP register 
(unscaled imm)
 #   --11 1100 --1-     --10   # Load/store FP register 
(register offset)
 #   --11 1101         # Load/store FP register 
(scaled imm)
-
-FAIL1010 010- -01-  000-      # SVE load & replicate 32 
bytes (scalar+scalar)
-FAIL1010 010- -010  001-      # SVE load & replicate 32 
bytes (scalar+imm)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 5182ee4c06..96e934c1ea 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -5062,6 +5062,7 @@ static bool trans_LD1RO_zprr(DisasContext *s, 
arg_rprr_load *a)
 if (a->rm == 31) {
 return false;
 }
+s->is_nonstreaming = true;
 if (sve_access_check(s)) {
 TCGv_i64 addr = new_tmp_a64(s);
 tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
@@ -5076,6 +5077,7 @@ static bool trans_LD1RO_zpri(DisasContext *s, 
arg_rpri_load *a)
 if (!dc_isar_feature(aa64_sve_f64mm, s)) {
 return false;
 }
+s->is_nonstreaming = true;
 if (sve_access_check(s)) {
 TCGv_i64 addr = new_tmp_a64(s);
 tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * 32);
-- 
2.34.1




[PATCH v6 25/45] target/arm: Implement BFMOPA, BFMOPS

2022-07-08 Thread Richard Henderson
Signed-off-by: Richard Henderson 
---
 target/arm/helper-sme.h|  2 ++
 target/arm/sme.decode  |  2 ++
 target/arm/sme_helper.c| 56 ++
 target/arm/translate-sme.c | 30 
 4 files changed, 90 insertions(+)

diff --git a/target/arm/helper-sme.h b/target/arm/helper-sme.h
index f50d0fe1d6..1d68fb8c74 100644
--- a/target/arm/helper-sme.h
+++ b/target/arm/helper-sme.h
@@ -125,3 +125,5 @@ DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
 DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_6(sme_bfmopa, TCG_CALL_NO_RWG,
+   void, ptr, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
index ba4774d174..afd9c0dffd 100644
--- a/target/arm/sme.decode
+++ b/target/arm/sme.decode
@@ -73,3 +73,5 @@ ADDVA_d 1100 11 01000 1 ... ... . 00 ...  
  @adda_64
 
 FMOPA_s 1000 100 . ... ... . . 00 ..@op_32
 FMOPA_d 1000 110 . ... ... . . 0 ...@op_64
+
+BFMOPA  1001 100 . ... ... . . 00 ..@op_32
diff --git a/target/arm/sme_helper.c b/target/arm/sme_helper.c
index 7dc76b6a1c..690a53eee2 100644
--- a/target/arm/sme_helper.c
+++ b/target/arm/sme_helper.c
@@ -987,3 +987,59 @@ void HELPER(sme_fmopa_d)(void *vza, void *vzn, void *vzm, 
void *vpn,
 }
 }
 }
+
+/*
+ * Alter PAIR as needed for controlling predicates being false,
+ * and for NEG on an enabled row element.
+ */
+static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t 
neg)
+{
+/*
+ * The pseudocode uses a conditional negate after the conditional zero.
+ * It is simpler here to unconditionally negate before conditional zero.
+ */
+pair ^= neg;
+if (!(pg & 1)) {
+pair &= 0xu;
+}
+if (!(pg & 4)) {
+pair &= 0xu;
+}
+return pair;
+}
+
+void HELPER(sme_bfmopa)(void *vza, void *vzn, void *vzm, void *vpn,
+void *vpm, uint32_t desc)
+{
+intptr_t row, col, oprsz = simd_maxsz(desc);
+uint32_t neg = simd_data(desc) * 0x80008000u;
+uint16_t *pn = vpn, *pm = vpm;
+
+for (row = 0; row < oprsz; ) {
+uint16_t prow = pn[H2(row >> 4)];
+do {
+void *vza_row = vza + tile_vslice_offset(row);
+uint32_t n = *(uint32_t *)(vzn + H1_4(row));
+
+n = f16mop_adj_pair(n, prow, neg);
+
+for (col = 0; col < oprsz; ) {
+uint16_t pcol = pm[H2(col >> 4)];
+do {
+if (prow & pcol & 0b0101) {
+uint32_t *a = vza_row + H1_4(col);
+uint32_t m = *(uint32_t *)(vzm + H1_4(col));
+
+m = f16mop_adj_pair(m, pcol, 0);
+*a = bfdotadd(*a, n, m);
+
+col += 4;
+pcol >>= 4;
+}
+} while (col & 15);
+}
+row += 4;
+prow >>= 4;
+} while (row & 15);
+}
+}
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
index fa8f343a7d..ecb7583c55 100644
--- a/target/arm/translate-sme.c
+++ b/target/arm/translate-sme.c
@@ -299,6 +299,33 @@ TRANS_FEAT(ADDVA_s, aa64_sme, do_adda, a, MO_32, 
gen_helper_sme_addva_s)
 TRANS_FEAT(ADDHA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addha_d)
 TRANS_FEAT(ADDVA_d, aa64_sme_i16i64, do_adda, a, MO_64, gen_helper_sme_addva_d)
 
+static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
+   gen_helper_gvec_5 *fn)
+{
+int svl = streaming_vec_reg_size(s);
+uint32_t desc = simd_desc(svl, svl, a->sub);
+TCGv_ptr za, zn, zm, pn, pm;
+
+if (!sme_smza_enabled_check(s)) {
+return true;
+}
+
+/* Sum XZR+zad to find ZAd. */
+za = get_tile_rowcol(s, esz, 31, a->zad, false);
+zn = vec_full_reg_ptr(s, a->zn);
+zm = vec_full_reg_ptr(s, a->zm);
+pn = pred_full_reg_ptr(s, a->pn);
+pm = pred_full_reg_ptr(s, a->pm);
+
+fn(za, zn, zm, pn, pm, tcg_constant_i32(desc));
+
+tcg_temp_free_ptr(za);
+tcg_temp_free_ptr(zn);
+tcg_temp_free_ptr(pn);
+tcg_temp_free_ptr(pm);
+return true;
+}
+
 static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
 gen_helper_gvec_5_ptr *fn)
 {
@@ -330,3 +357,6 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, 
MemOp esz,
 
 TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, 
gen_helper_sme_fmopa_s)
 TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, 
gen_helper_sme_fmopa_d)
+
+/* TODO: FEAT_EBF16 */
+TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
-- 
2.34.1




[PATCH v6 08/45] target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming

2022-07-08 Thread Richard Henderson
Mark these as a non-streaming instructions, which should trap
if full a64 support is not enabled in streaming mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme-fa64.decode |  3 ---
 target/arm/translate-sve.c | 15 +++
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index 4ff2df82e5..b5eaa2d0fa 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -59,9 +59,6 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 #   --11 1100 --1-     --10   # Load/store FP register 
(register offset)
 #   --11 1101         # Load/store FP register 
(scaled imm)
 
-FAIL0110 0101 --0-   11--     # FTSMUL
-FAIL0110 0101 --01 0--- 100-      # FTMAD
-FAIL0110 0101 --01 1--- 001-      # FADDA
 FAIL0100 0101 --0-  1001 10--     # SMMLA, UMMLA, USMMLA
 FAIL0100 0101 --1-  1---      # SVE2 string/histo/crypto 
instructions
 FAIL1000 010- -00-  10--      # SVE2 32-bit gather NT load 
(vector+scalar)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 4ff2102fc8..d5aad53923 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -3861,9 +3861,9 @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
 NULL,   gen_helper_sve_ftmad_h,
 gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
 };
-TRANS_FEAT(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
-   ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
-   a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
+TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
+ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
+a->esz == MO_16 ? FPST_FPCR_F16 : FPST_FPCR)
 
 /*
  *** SVE Floating Point Accumulating Reduction Group
@@ -3886,6 +3886,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
 if (a->esz == 0 || !dc_isar_feature(aa64_sve, s)) {
 return false;
 }
+s->is_nonstreaming = true;
 if (!sve_access_check(s)) {
 return true;
 }
@@ -3923,12 +3924,18 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz 
*a)
 DO_FP3(FADD_zzz, fadd)
 DO_FP3(FSUB_zzz, fsub)
 DO_FP3(FMUL_zzz, fmul)
-DO_FP3(FTSMUL, ftsmul)
 DO_FP3(FRECPS, recps)
 DO_FP3(FRSQRTS, rsqrts)
 
 #undef DO_FP3
 
+static gen_helper_gvec_3_ptr * const ftsmul_fns[4] = {
+NULL, gen_helper_gvec_ftsmul_h,
+gen_helper_gvec_ftsmul_s, gen_helper_gvec_ftsmul_d
+};
+TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
+ftsmul_fns[a->esz], a, 0)
+
 /*
  *** SVE Floating Point Arithmetic - Predicated Group
  */
-- 
2.34.1




[PATCH v6 12/45] target/arm: Mark gather prefetch as non-streaming

2022-07-08 Thread Richard Henderson
Mark these as a non-streaming instructions, which should trap if full
a64 support is not enabled in streaming mode.  In this case, introduce
PRF_ns (prefetch non-streaming) to handle the checks.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme-fa64.decode |  3 ---
 target/arm/sve.decode  | 10 +-
 target/arm/translate-sve.c | 11 +++
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index 1acc3ae080..7d4c33fb5b 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -59,10 +59,7 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 #   --11 1100 --1-     --10   # Load/store FP register 
(register offset)
 #   --11 1101         # Load/store FP register 
(scaled imm)
 
-FAIL1000 010- -00-  111-      # SVE 32-bit gather prefetch 
(vector+imm)
-FAIL1000 0100 0-1-  0---      # SVE 32-bit gather prefetch 
(scalar+vector)
 FAIL1010 010-   011-      # SVE contiguous FF load 
(scalar+scalar)
 FAIL1010 010- ---1  101-      # SVE contiguous NF load 
(scalar+imm)
 FAIL1010 010- -01-  000-      # SVE load & replicate 32 
bytes (scalar+scalar)
 FAIL1010 010- -010  001-      # SVE load & replicate 32 
bytes (scalar+imm)
-FAIL1100 010-         # SVE 64-bit gather 
load/prefetch
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index a54feb2f61..908643d7d9 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1183,10 +1183,10 @@ LD1RO_zpri  1010010 .. 01 0 001 ... . . 
\
 @rpri_load_msz nreg=0
 
 # SVE 32-bit gather prefetch (scalar plus 32-bit scaled offsets)
-PRF 110 00 -1 - 0-- --- - 0 
+PRF_ns  110 00 -1 - 0-- --- - 0 
 
 # SVE 32-bit gather prefetch (vector plus immediate)
-PRF 110 -- 00 - 111 --- - 0 
+PRF_ns  110 -- 00 - 111 --- - 0 
 
 # SVE contiguous prefetch (scalar plus immediate)
 PRF 110 11 1- - 0-- --- - 0 
@@ -1223,13 +1223,13 @@ LD1_zpiz1100010 .. 01 . 1.. ... . . 
\
 @rpri_g_load esz=3
 
 # SVE 64-bit gather prefetch (scalar plus 64-bit scaled offsets)
-PRF 1100010 00 11 - 1-- --- - 0 
+PRF_ns  1100010 00 11 - 1-- --- - 0 
 
 # SVE 64-bit gather prefetch (scalar plus unpacked 32-bit scaled offsets)
-PRF 1100010 00 -1 - 0-- --- - 0 
+PRF_ns  1100010 00 -1 - 0-- --- - 0 
 
 # SVE 64-bit gather prefetch (vector plus immediate)
-PRF 1100010 -- 00 - 111 --- - 0 
+PRF_ns  1100010 -- 00 - 111 --- - 0 
 
 ### SVE Memory Store Group
 
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index b23c6aa0bf..bbf3bf2119 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -5971,6 +5971,17 @@ static bool trans_PRF_rr(DisasContext *s, arg_PRF_rr *a)
 return true;
 }
 
+static bool trans_PRF_ns(DisasContext *s, arg_PRF_ns *a)
+{
+if (!dc_isar_feature(aa64_sve, s)) {
+return false;
+}
+/* Prefetch is a nop within QEMU.  */
+s->is_nonstreaming = true;
+(void)sve_access_check(s);
+return true;
+}
+
 /*
  * Move Prefix
  *
-- 
2.34.1




[PATCH v6 09/45] target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming

2022-07-08 Thread Richard Henderson
Mark these as a non-streaming instructions, which should trap
if full a64 support is not enabled in streaming mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme-fa64.decode |  1 -
 target/arm/translate-sve.c | 12 ++--
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index b5eaa2d0fa..3260ea2d64 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -59,7 +59,6 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 #   --11 1100 --1-     --10   # Load/store FP register 
(register offset)
 #   --11 1101         # Load/store FP register 
(scaled imm)
 
-FAIL0100 0101 --0-  1001 10--     # SMMLA, UMMLA, USMMLA
 FAIL0100 0101 --1-  1---      # SVE2 string/histo/crypto 
instructions
 FAIL1000 010- -00-  10--      # SVE2 32-bit gather NT load 
(vector+scalar)
 FAIL1000 010- -00-  111-      # SVE 32-bit gather prefetch 
(vector+imm)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index d5aad53923..9bbf44f008 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -7302,12 +7302,12 @@ TRANS_FEAT(FMLALT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, 
false, true)
 TRANS_FEAT(FMLSLB_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, false)
 TRANS_FEAT(FMLSLT_zzxw, aa64_sve2, do_FMLAL_zzxw, a, true, true)
 
-TRANS_FEAT(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_,
-   gen_helper_gvec_smmla_b, a, 0)
-TRANS_FEAT(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_,
-   gen_helper_gvec_usmmla_b, a, 0)
-TRANS_FEAT(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_,
-   gen_helper_gvec_ummla_b, a, 0)
+TRANS_FEAT_NONSTREAMING(SMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_,
+gen_helper_gvec_smmla_b, a, 0)
+TRANS_FEAT_NONSTREAMING(USMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_,
+gen_helper_gvec_usmmla_b, a, 0)
+TRANS_FEAT_NONSTREAMING(UMMLA, aa64_sve_i8mm, gen_gvec_ool_arg_,
+gen_helper_gvec_ummla_b, a, 0)
 
 TRANS_FEAT(BFDOT_, aa64_sve_bf16, gen_gvec_ool_arg_,
gen_helper_gvec_bfdot, a, 0)
-- 
2.34.1




[PATCH v6 06/45] target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as non-streaming

2022-07-08 Thread Richard Henderson
Mark these as a non-streaming instructions, which should trap
if full a64 support is not enabled in streaming mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme-fa64.decode |  3 ---
 target/arm/translate-sve.c | 22 --
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index fa2b5cbf1a..4f515939d9 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -59,9 +59,6 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 #   --11 1100 --1-     --10   # Load/store FP register 
(register offset)
 #   --11 1101         # Load/store FP register 
(scaled imm)
 
-FAIL 0100 --1-  1011 -0--     # FTSSEL, FEXPA
-FAIL 0101 --10 0001 100-      # COMPACT
-FAIL0100 0101 --0-  1011      # BDEP, BEXT, BGRP
 FAIL0100 0101 000-  0110 1---     # PMULLB, PMULLT (128b 
result)
 FAIL0110 0100 --1-  1110 01--     # FMMLA, BFMMLA
 FAIL0110 0101 --0-   11--     # FTSMUL
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index d6faec15fe..ae48040aa4 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -1333,14 +1333,15 @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
 NULL,   gen_helper_sve_fexpa_h,
 gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
 };
-TRANS_FEAT(FEXPA, aa64_sve, gen_gvec_ool_zz,
-   fexpa_fns[a->esz], a->rd, a->rn, 0)
+TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
+fexpa_fns[a->esz], a->rd, a->rn, 0)
 
 static gen_helper_gvec_3 * const ftssel_fns[4] = {
 NULL,gen_helper_sve_ftssel_h,
 gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
 };
-TRANS_FEAT(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz, ftssel_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
+ftssel_fns[a->esz], a, 0)
 
 /*
  *** SVE Predicate Logical Operations Group
@@ -2536,7 +2537,8 @@ TRANS_FEAT(TRN2_q, aa64_sve_f64mm, gen_gvec_ool_arg_zzz,
 static gen_helper_gvec_3 * const compact_fns[4] = {
 NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
 };
-TRANS_FEAT(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz, compact_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(COMPACT, aa64_sve, gen_gvec_ool_arg_zpz,
+compact_fns[a->esz], a, 0)
 
 /* Call the helper that computes the ARM LastActiveElement pseudocode
  * function, scaled by the element size.  This includes the not found
@@ -6374,22 +6376,22 @@ static gen_helper_gvec_3 * const bext_fns[4] = {
 gen_helper_sve2_bext_b, gen_helper_sve2_bext_h,
 gen_helper_sve2_bext_s, gen_helper_sve2_bext_d,
 };
-TRANS_FEAT(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
-   bext_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(BEXT, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
+bext_fns[a->esz], a, 0)
 
 static gen_helper_gvec_3 * const bdep_fns[4] = {
 gen_helper_sve2_bdep_b, gen_helper_sve2_bdep_h,
 gen_helper_sve2_bdep_s, gen_helper_sve2_bdep_d,
 };
-TRANS_FEAT(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
-   bdep_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(BDEP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
+bdep_fns[a->esz], a, 0)
 
 static gen_helper_gvec_3 * const bgrp_fns[4] = {
 gen_helper_sve2_bgrp_b, gen_helper_sve2_bgrp_h,
 gen_helper_sve2_bgrp_s, gen_helper_sve2_bgrp_d,
 };
-TRANS_FEAT(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
-   bgrp_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(BGRP, aa64_sve2_bitperm, gen_gvec_ool_arg_zzz,
+bgrp_fns[a->esz], a, 0)
 
 static gen_helper_gvec_3 * const cadd_fns[4] = {
 gen_helper_sve2_cadd_b, gen_helper_sve2_cadd_h,
-- 
2.34.1




[PATCH v6 11/45] target/arm: Mark gather/scatter load/store as non-streaming

2022-07-08 Thread Richard Henderson
Mark these as a non-streaming instructions, which should trap
if full a64 support is not enabled in streaming mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme-fa64.decode | 9 -
 target/arm/translate-sve.c | 6 ++
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index fe462d2ccc..1acc3ae080 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -59,19 +59,10 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 #   --11 1100 --1-     --10   # Load/store FP register 
(register offset)
 #   --11 1101         # Load/store FP register 
(scaled imm)
 
-FAIL1000 010- -00-  10--      # SVE2 32-bit gather NT load 
(vector+scalar)
 FAIL1000 010- -00-  111-      # SVE 32-bit gather prefetch 
(vector+imm)
 FAIL1000 0100 0-1-  0---      # SVE 32-bit gather prefetch 
(scalar+vector)
-FAIL1000 010- -01-  1---      # SVE 32-bit gather load 
(vector+imm)
-FAIL1000 0100 0-0-  0---      # SVE 32-bit gather load 
byte (scalar+vector)
-FAIL1000 0100 1---  0---      # SVE 32-bit gather load 
half (scalar+vector)
-FAIL1000 0101 0---  0---      # SVE 32-bit gather load 
word (scalar+vector)
 FAIL1010 010-   011-      # SVE contiguous FF load 
(scalar+scalar)
 FAIL1010 010- ---1  101-      # SVE contiguous NF load 
(scalar+imm)
 FAIL1010 010- -01-  000-      # SVE load & replicate 32 
bytes (scalar+scalar)
 FAIL1010 010- -010  001-      # SVE load & replicate 32 
bytes (scalar+imm)
 FAIL1100 010-         # SVE 64-bit gather 
load/prefetch
-FAIL1110 010- -00-  001-      # SVE2 64-bit scatter NT 
store (vector+scalar)
-FAIL1110 010- -10-  001-      # SVE2 32-bit scatter NT 
store (vector+scalar)
-FAIL1110 010-   1-0-      # SVE scatter store 
(scalar+32-bit vector)
-FAIL1110 010-   101-      # SVE scatter store (misc)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index f8e0716474..b23c6aa0bf 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -5669,6 +5669,7 @@ static bool trans_LD1_zprz(DisasContext *s, arg_LD1_zprz 
*a)
 if (!dc_isar_feature(aa64_sve, s)) {
 return false;
 }
+s->is_nonstreaming = true;
 if (!sve_access_check(s)) {
 return true;
 }
@@ -5700,6 +5701,7 @@ static bool trans_LD1_zpiz(DisasContext *s, arg_LD1_zpiz 
*a)
 if (!dc_isar_feature(aa64_sve, s)) {
 return false;
 }
+s->is_nonstreaming = true;
 if (!sve_access_check(s)) {
 return true;
 }
@@ -5734,6 +5736,7 @@ static bool trans_LDNT1_zprz(DisasContext *s, 
arg_LD1_zprz *a)
 if (!dc_isar_feature(aa64_sve2, s)) {
 return false;
 }
+s->is_nonstreaming = true;
 if (!sve_access_check(s)) {
 return true;
 }
@@ -5857,6 +5860,7 @@ static bool trans_ST1_zprz(DisasContext *s, arg_ST1_zprz 
*a)
 if (!dc_isar_feature(aa64_sve, s)) {
 return false;
 }
+s->is_nonstreaming = true;
 if (!sve_access_check(s)) {
 return true;
 }
@@ -5887,6 +5891,7 @@ static bool trans_ST1_zpiz(DisasContext *s, arg_ST1_zpiz 
*a)
 if (!dc_isar_feature(aa64_sve, s)) {
 return false;
 }
+s->is_nonstreaming = true;
 if (!sve_access_check(s)) {
 return true;
 }
@@ -5921,6 +5926,7 @@ static bool trans_STNT1_zprz(DisasContext *s, 
arg_ST1_zprz *a)
 if (!dc_isar_feature(aa64_sve2, s)) {
 return false;
 }
+s->is_nonstreaming = true;
 if (!sve_access_check(s)) {
 return true;
 }
-- 
2.34.1




[PATCH v6 04/45] target/arm: Mark ADR as non-streaming

2022-07-08 Thread Richard Henderson
Mark ADR as a non-streaming instruction, which should trap
if full a64 support is not enabled in streaming mode.

Removing entries from sme-fa64.decode is an easy way to see
what remains to be done.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/translate.h | 7 +++
 target/arm/sme-fa64.decode | 1 -
 target/arm/translate-sve.c | 8 
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/target/arm/translate.h b/target/arm/translate.h
index cbc907c751..e2e619dab2 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -566,4 +566,11 @@ uint64_t asimd_imm_const(uint32_t imm, int cmode, int op);
 static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \
 { return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__); }
 
+#define TRANS_FEAT_NONSTREAMING(NAME, FEAT, FUNC, ...)\
+static bool trans_##NAME(DisasContext *s, arg_##NAME *a)  \
+{ \
+s->is_nonstreaming = true;\
+return dc_isar_feature(FEAT, s) && FUNC(s, __VA_ARGS__);  \
+}
+
 #endif /* TARGET_ARM_TRANSLATE_H */
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index 3d90837fc7..73c71abc46 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -59,7 +59,6 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 #   --11 1100 --1-     --10   # Load/store FP register 
(register offset)
 #   --11 1101         # Load/store FP register 
(scaled imm)
 
-FAIL 0100 --1-  1010      # ADR
 FAIL 0100 --1-  1011 -0--     # FTSSEL, FEXPA
 FAIL 0101 --10 0001 100-      # COMPACT
 FAIL0010 0101 --01 100-  000- ---0    # RDFFR, RDFFRS
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 62b5f3040c..5d1db0d3ff 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -1320,10 +1320,10 @@ static bool do_adr(DisasContext *s, arg_rrri *a, 
gen_helper_gvec_3 *fn)
 return gen_gvec_ool_zzz(s, fn, a->rd, a->rn, a->rm, a->imm);
 }
 
-TRANS_FEAT(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
-TRANS_FEAT(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
-TRANS_FEAT(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
-TRANS_FEAT(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
+TRANS_FEAT_NONSTREAMING(ADR_p32, aa64_sve, do_adr, a, gen_helper_sve_adr_p32)
+TRANS_FEAT_NONSTREAMING(ADR_p64, aa64_sve, do_adr, a, gen_helper_sve_adr_p64)
+TRANS_FEAT_NONSTREAMING(ADR_s32, aa64_sve, do_adr, a, gen_helper_sve_adr_s32)
+TRANS_FEAT_NONSTREAMING(ADR_u32, aa64_sve, do_adr, a, gen_helper_sve_adr_u32)
 
 /*
  *** SVE Integer Misc - Unpredicated Group
-- 
2.34.1




[PATCH v6 10/45] target/arm: Mark string/histo/crypto as non-streaming

2022-07-08 Thread Richard Henderson
Mark these as non-streaming instructions, which should trap
if full a64 support is not enabled in streaming mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme-fa64.decode |  1 -
 target/arm/translate-sve.c | 35 ++-
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index 3260ea2d64..fe462d2ccc 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -59,7 +59,6 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 #   --11 1100 --1-     --10   # Load/store FP register 
(register offset)
 #   --11 1101         # Load/store FP register 
(scaled imm)
 
-FAIL0100 0101 --1-  1---      # SVE2 string/histo/crypto 
instructions
 FAIL1000 010- -00-  10--      # SVE2 32-bit gather NT load 
(vector+scalar)
 FAIL1000 010- -00-  111-      # SVE 32-bit gather prefetch 
(vector+imm)
 FAIL1000 0100 0-1-  0---      # SVE 32-bit gather prefetch 
(scalar+vector)
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 9bbf44f008..f8e0716474 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -7110,21 +7110,21 @@ DO_SVE2_ZZZ_NARROW(RSUBHNT, rsubhnt)
 static gen_helper_gvec_flags_4 * const match_fns[4] = {
 gen_helper_sve2_match_ppzz_b, gen_helper_sve2_match_ppzz_h, NULL, NULL
 };
-TRANS_FEAT(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
+TRANS_FEAT_NONSTREAMING(MATCH, aa64_sve2, do_ppzz_flags, a, match_fns[a->esz])
 
 static gen_helper_gvec_flags_4 * const nmatch_fns[4] = {
 gen_helper_sve2_nmatch_ppzz_b, gen_helper_sve2_nmatch_ppzz_h, NULL, NULL
 };
-TRANS_FEAT(NMATCH, aa64_sve2, do_ppzz_flags, a, nmatch_fns[a->esz])
+TRANS_FEAT_NONSTREAMING(NMATCH, aa64_sve2, do_ppzz_flags, a, 
nmatch_fns[a->esz])
 
 static gen_helper_gvec_4 * const histcnt_fns[4] = {
 NULL, NULL, gen_helper_sve2_histcnt_s, gen_helper_sve2_histcnt_d
 };
-TRANS_FEAT(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
-   histcnt_fns[a->esz], a, 0)
+TRANS_FEAT_NONSTREAMING(HISTCNT, aa64_sve2, gen_gvec_ool_arg_zpzz,
+histcnt_fns[a->esz], a, 0)
 
-TRANS_FEAT(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
-   a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
+TRANS_FEAT_NONSTREAMING(HISTSEG, aa64_sve2, gen_gvec_ool_arg_zzz,
+a->esz == 0 ? gen_helper_sve2_histseg : NULL, a, 0)
 
 DO_ZPZZ_FP(FADDP, aa64_sve2, sve2_faddp_zpzz)
 DO_ZPZZ_FP(FMAXNMP, aa64_sve2, sve2_fmaxnmp_zpzz)
@@ -7238,20 +7238,21 @@ TRANS_FEAT(SQRDCMLAH_, aa64_sve2, gen_gvec_ool_,
 TRANS_FEAT(USDOT_, aa64_sve_i8mm, gen_gvec_ool_arg_,
a->esz == 2 ? gen_helper_gvec_usdot_b : NULL, a, 0)
 
-TRANS_FEAT(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
-   gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
+TRANS_FEAT_NONSTREAMING(AESMC, aa64_sve2_aes, gen_gvec_ool_zz,
+gen_helper_crypto_aesmc, a->rd, a->rd, a->decrypt)
 
-TRANS_FEAT(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
-   gen_helper_crypto_aese, a, false)
-TRANS_FEAT(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
-   gen_helper_crypto_aese, a, true)
+TRANS_FEAT_NONSTREAMING(AESE, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
+gen_helper_crypto_aese, a, false)
+TRANS_FEAT_NONSTREAMING(AESD, aa64_sve2_aes, gen_gvec_ool_arg_zzz,
+gen_helper_crypto_aese, a, true)
 
-TRANS_FEAT(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
-   gen_helper_crypto_sm4e, a, 0)
-TRANS_FEAT(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
-   gen_helper_crypto_sm4ekey, a, 0)
+TRANS_FEAT_NONSTREAMING(SM4E, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
+gen_helper_crypto_sm4e, a, 0)
+TRANS_FEAT_NONSTREAMING(SM4EKEY, aa64_sve2_sm4, gen_gvec_ool_arg_zzz,
+gen_helper_crypto_sm4ekey, a, 0)
 
-TRANS_FEAT(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz, gen_gvec_rax1, a)
+TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
+gen_gvec_rax1, a)
 
 TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR)
-- 
2.34.1




[PATCH v6 00/45] target/arm: Scalable Matrix Extension

2022-07-08 Thread Richard Henderson
Changes for v6:
  * Some sub-word big-endian addressing fixups (pmm).
  * Logic errors for BFMOPA/FMOPA (pmm).
  * Fix for PR_SME_SET_VL hflags rebuild.

r~

Richard Henderson (45):
  target/arm: Handle SME in aarch64_cpu_dump_state
  target/arm: Add infrastructure for disas_sme
  target/arm: Trap non-streaming usage when Streaming SVE is active
  target/arm: Mark ADR as non-streaming
  target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming
  target/arm: Mark BDEP, BEXT, BGRP, COMPACT, FEXPA, FTSSEL as
non-streaming
  target/arm: Mark PMULL, FMMLA as non-streaming
  target/arm: Mark FTSMUL, FTMAD, FADDA as non-streaming
  target/arm: Mark SMMLA, UMMLA, USMMLA as non-streaming
  target/arm: Mark string/histo/crypto as non-streaming
  target/arm: Mark gather/scatter load/store as non-streaming
  target/arm: Mark gather prefetch as non-streaming
  target/arm: Mark LDFF1 and LDNF1 as non-streaming
  target/arm: Mark LD1RO as non-streaming
  target/arm: Add SME enablement checks
  target/arm: Handle SME in sve_access_check
  target/arm: Implement SME RDSVL, ADDSVL, ADDSPL
  target/arm: Implement SME ZERO
  target/arm: Implement SME MOVA
  target/arm: Implement SME LD1, ST1
  target/arm: Export unpredicated ld/st from translate-sve.c
  target/arm: Implement SME LDR, STR
  target/arm: Implement SME ADDHA, ADDVA
  target/arm: Implement FMOPA, FMOPS (non-widening)
  target/arm: Implement BFMOPA, BFMOPS
  target/arm: Implement FMOPA, FMOPS (widening)
  target/arm: Implement SME integer outer product
  target/arm: Implement PSEL
  target/arm: Implement REVD
  target/arm: Implement SCLAMP, UCLAMP
  target/arm: Reset streaming sve state on exception boundaries
  target/arm: Enable SME for -cpu max
  linux-user/aarch64: Clear tpidr2_el0 if CLONE_SETTLS
  linux-user/aarch64: Reset PSTATE.SM on syscalls
  linux-user/aarch64: Add SM bit to SVE signal context
  linux-user/aarch64: Tidy target_restore_sigframe error return
  linux-user/aarch64: Do not allow duplicate or short sve records
  linux-user/aarch64: Verify extra record lock succeeded
  linux-user/aarch64: Move sve record checks into restore
  linux-user/aarch64: Implement SME signal handling
  linux-user: Rename sve prctls
  linux-user/aarch64: Implement PR_SME_GET_VL, PR_SME_SET_VL
  target/arm: Only set ZEN in reset if SVE present
  target/arm: Enable SME for user-only
  linux-user/aarch64: Add SME related hwcap entries

 docs/system/arm/emulation.rst |4 +
 linux-user/aarch64/target_cpu.h   |5 +-
 linux-user/aarch64/target_prctl.h |   62 +-
 target/arm/cpu.h  |7 +
 target/arm/helper-sme.h   |  126 
 target/arm/helper-sve.h   |4 +
 target/arm/helper.h   |   18 +
 target/arm/translate-a64.h|   45 ++
 target/arm/translate.h|   16 +
 target/arm/sme-fa64.decode|   60 ++
 target/arm/sme.decode |   88 +++
 target/arm/sve.decode |   41 +-
 linux-user/aarch64/cpu_loop.c |9 +
 linux-user/aarch64/signal.c   |  243 +-
 linux-user/elfload.c  |   20 +
 linux-user/syscall.c  |   28 +-
 target/arm/cpu.c  |   35 +-
 target/arm/cpu64.c|   11 +
 target/arm/helper.c   |   56 +-
 target/arm/sme_helper.c   | 1140 +
 target/arm/sve_helper.c   |   28 +
 target/arm/translate-a64.c|  103 ++-
 target/arm/translate-sme.c|  373 ++
 target/arm/translate-sve.c|  393 --
 target/arm/translate-vfp.c|   12 +
 target/arm/translate.c|2 +
 target/arm/vec_helper.c   |   24 +
 target/arm/meson.build|3 +
 28 files changed, 2821 insertions(+), 135 deletions(-)
 create mode 100644 target/arm/sme-fa64.decode
 create mode 100644 target/arm/sme.decode
 create mode 100644 target/arm/translate-sme.c

-- 
2.34.1




[PATCH v6 05/45] target/arm: Mark RDFFR, WRFFR, SETFFR as non-streaming

2022-07-08 Thread Richard Henderson
Mark these as a non-streaming instructions, which should trap
if full a64 support is not enabled in streaming mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme-fa64.decode | 2 --
 target/arm/translate-sve.c | 9 ++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index 73c71abc46..fa2b5cbf1a 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -61,8 +61,6 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 
 FAIL 0100 --1-  1011 -0--     # FTSSEL, FEXPA
 FAIL 0101 --10 0001 100-      # COMPACT
-FAIL0010 0101 --01 100-  000- ---0    # RDFFR, RDFFRS
-FAIL0010 0101 --10 1--- 1001      # WRFFR, SETFFR
 FAIL0100 0101 --0-  1011      # BDEP, BEXT, BGRP
 FAIL0100 0101 000-  0110 1---     # PMULLB, PMULLT (128b 
result)
 FAIL0110 0100 --1-  1110 01--     # FMMLA, BFMMLA
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 5d1db0d3ff..d6faec15fe 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -1785,7 +1785,8 @@ static bool do_predset(DisasContext *s, int esz, int rd, 
int pat, bool setflag)
 TRANS_FEAT(PTRUE, aa64_sve, do_predset, a->esz, a->rd, a->pat, a->s)
 
 /* Note pat == 31 is #all, to set all elements.  */
-TRANS_FEAT(SETFFR, aa64_sve, do_predset, 0, FFR_PRED_NUM, 31, false)
+TRANS_FEAT_NONSTREAMING(SETFFR, aa64_sve,
+do_predset, 0, FFR_PRED_NUM, 31, false)
 
 /* Note pat == 32 is #unimp, to set no elements.  */
 TRANS_FEAT(PFALSE, aa64_sve, do_predset, 0, a->rd, 32, false)
@@ -1799,11 +1800,13 @@ static bool trans_RDFFR_p(DisasContext *s, arg_RDFFR_p 
*a)
 .rd = a->rd, .pg = a->pg, .s = a->s,
 .rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
 };
+
+s->is_nonstreaming = true;
 return trans_AND_(s, _a);
 }
 
-TRANS_FEAT(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
-TRANS_FEAT(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
+TRANS_FEAT_NONSTREAMING(RDFFR, aa64_sve, do_mov_p, a->rd, FFR_PRED_NUM)
+TRANS_FEAT_NONSTREAMING(WRFFR, aa64_sve, do_mov_p, FFR_PRED_NUM, a->rn)
 
 static bool do_pfirst_pnext(DisasContext *s, arg_rr_esz *a,
 void (*gen_fn)(TCGv_i32, TCGv_ptr,
-- 
2.34.1




[PATCH v6 07/45] target/arm: Mark PMULL, FMMLA as non-streaming

2022-07-08 Thread Richard Henderson
Mark these as a non-streaming instructions, which should trap
if full a64 support is not enabled in streaming mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/sme-fa64.decode |  2 --
 target/arm/translate-sve.c | 24 +++-
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
index 4f515939d9..4ff2df82e5 100644
--- a/target/arm/sme-fa64.decode
+++ b/target/arm/sme-fa64.decode
@@ -59,8 +59,6 @@ FAIL0001 1110 0111 1110  00--     # FJCVTZS
 #   --11 1100 --1-     --10   # Load/store FP register 
(register offset)
 #   --11 1101         # Load/store FP register 
(scaled imm)
 
-FAIL0100 0101 000-  0110 1---     # PMULLB, PMULLT (128b 
result)
-FAIL0110 0100 --1-  1110 01--     # FMMLA, BFMMLA
 FAIL0110 0101 --0-   11--     # FTSMUL
 FAIL0110 0101 --01 0--- 100-      # FTMAD
 FAIL0110 0101 --01 1--- 001-      # FADDA
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index ae48040aa4..4ff2102fc8 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -6186,9 +6186,13 @@ static bool do_trans_pmull(DisasContext *s, arg_rrr_esz 
*a, bool sel)
 gen_helper_gvec_pmull_q, gen_helper_sve2_pmull_h,
 NULL,gen_helper_sve2_pmull_d,
 };
-if (a->esz == 0
-? !dc_isar_feature(aa64_sve2_pmull128, s)
-: !dc_isar_feature(aa64_sve, s)) {
+
+if (a->esz == 0) {
+if (!dc_isar_feature(aa64_sve2_pmull128, s)) {
+return false;
+}
+s->is_nonstreaming = true;
+} else if (!dc_isar_feature(aa64_sve, s)) {
 return false;
 }
 return gen_gvec_ool_arg_zzz(s, fns[a->esz], a, sel);
@@ -7125,10 +7129,12 @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
  * SVE Integer Multiply-Add (unpredicated)
  */
 
-TRANS_FEAT(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_, gen_helper_fmmla_s,
-   a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
-TRANS_FEAT(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_, gen_helper_fmmla_d,
-   a->rd, a->rn, a->rm, a->ra, 0, FPST_FPCR)
+TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_,
+gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
+0, FPST_FPCR)
+TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_,
+gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
+0, FPST_FPCR)
 
 static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
 NULL,   gen_helper_sve2_sqdmlal_zzzw_h,
@@ -7301,8 +7307,8 @@ TRANS_FEAT(BFDOT_, aa64_sve_bf16, 
gen_gvec_ool_arg_,
 TRANS_FEAT(BFDOT_zzxz, aa64_sve_bf16, gen_gvec_ool_arg_zzxz,
gen_helper_gvec_bfdot_idx, a)
 
-TRANS_FEAT(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_,
-   gen_helper_gvec_bfmmla, a, 0)
+TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_ool_arg_,
+gen_helper_gvec_bfmmla, a, 0)
 
 static bool do_BFMLAL_zzzw(DisasContext *s, arg__esz *a, bool sel)
 {
-- 
2.34.1




[PATCH v6 03/45] target/arm: Trap non-streaming usage when Streaming SVE is active

2022-07-08 Thread Richard Henderson
This new behaviour is in the ARM pseudocode function
AArch64.CheckFPAdvSIMDEnabled, which applies to AArch32
via AArch32.CheckAdvSIMDOrFPEnabled when the EL to which
the trap would be delivered is in AArch64 mode.

Given that ARMv9 drops support for AArch32 outside EL0, the trap EL
detection ought to be trivially true, but the pseudocode still contains
a number of conditions, and QEMU has not yet committed to dropping A32
support for EL[12] when v9 features are present.

Since the computation of SME_TRAP_NONSTREAMING is necessarily different
for the two modes, we might as well preserve bits within TBFLAG_ANY and
allocate separate bits within TBFLAG_A32 and TBFLAG_A64 instead.

Note that DDI0616A.a has typos for bits [22:21] of LD1RO in the table
of instructions illegal in streaming mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/cpu.h   |  7 +++
 target/arm/translate.h |  4 ++
 target/arm/sme-fa64.decode | 90 ++
 target/arm/helper.c| 41 +
 target/arm/translate-a64.c | 40 -
 target/arm/translate-vfp.c | 12 +
 target/arm/translate.c |  2 +
 target/arm/meson.build |  1 +
 8 files changed, 195 insertions(+), 2 deletions(-)
 create mode 100644 target/arm/sme-fa64.decode

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 1f4f3e0485..1e36a839ee 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3158,6 +3158,11 @@ FIELD(TBFLAG_A32, HSTR_ACTIVE, 9, 1)
  * the same thing as the current security state of the processor!
  */
 FIELD(TBFLAG_A32, NS, 10, 1)
+/*
+ * Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not.
+ * This requires an SME trap from AArch32 mode when using NEON.
+ */
+FIELD(TBFLAG_A32, SME_TRAP_NONSTREAMING, 11, 1)
 
 /*
  * Bit usage when in AArch32 state, for M-profile only.
@@ -3195,6 +3200,8 @@ FIELD(TBFLAG_A64, SMEEXC_EL, 20, 2)
 FIELD(TBFLAG_A64, PSTATE_SM, 22, 1)
 FIELD(TBFLAG_A64, PSTATE_ZA, 23, 1)
 FIELD(TBFLAG_A64, SVL, 24, 4)
+/* Indicates that SME Streaming mode is active, and SMCR_ELx.FA64 is not. */
+FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1)
 
 /*
  * Helpers for using the above.
diff --git a/target/arm/translate.h b/target/arm/translate.h
index 22fd882368..cbc907c751 100644
--- a/target/arm/translate.h
+++ b/target/arm/translate.h
@@ -102,6 +102,10 @@ typedef struct DisasContext {
 bool pstate_sm;
 /* True if PSTATE.ZA is set. */
 bool pstate_za;
+/* True if non-streaming insns should raise an SME Streaming exception. */
+bool sme_trap_nonstreaming;
+/* True if the current instruction is non-streaming. */
+bool is_nonstreaming;
 /* True if MVE insns are definitely not predicated by VPR or LTPSIZE */
 bool mve_no_pred;
 /*
diff --git a/target/arm/sme-fa64.decode b/target/arm/sme-fa64.decode
new file mode 100644
index 00..3d90837fc7
--- /dev/null
+++ b/target/arm/sme-fa64.decode
@@ -0,0 +1,90 @@
+# AArch64 SME allowed instruction decoding
+#
+#  Copyright (c) 2022 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see .
+
+#
+# This file is processed by scripts/decodetree.py
+#
+
+# These patterns are taken from Appendix E1.1 of DDI0616 A.a,
+# Arm Architecture Reference Manual Supplement,
+# The Scalable Matrix Extension (SME), for Armv9-A
+
+{
+  [
+OK  0-00 1110  0001 0010 11--     # SMOV W|Xd,Vn.B[0]
+OK  0-00 1110  0010 0010 11--     # SMOV W|Xd,Vn.H[0]
+OK  0100 1110  0100 0010 11--     # SMOV Xd,Vn.S[0]
+OK   1110  0001 0011 11--     # UMOV Wd,Vn.B[0]
+OK   1110  0010 0011 11--     # UMOV Wd,Vn.H[0]
+OK   1110  0100 0011 11--     # UMOV Wd,Vn.S[0]
+OK  0100 1110  1000 0011 11--     # UMOV Xd,Vn.D[0]
+  ]
+  FAIL  0--0 111-         # Advanced SIMD vector 
operations
+}
+
+{
+  [
+OK  0101 1110 --1-  11-1 11--     # FMULX/FRECPS/FRSQRTS 
(scalar)
+OK  0101 1110 -10-  00-1 11--     # FMULX/FRECPS/FRSQRTS 
(scalar, FP16)
+OK  01-1 1110 1-10 0001 11-1 10--     # FRECPE/FRSQRTE/FRECPX 
(scalar)
+OK  01-1 1110  1001 11-1 10--     # FRECPE/FRSQRTE/FRECPX 
(scalar, FP16)
+  ]
+  FAIL  01-1 111-         # Advanced SIMD 
single-element 

[PATCH v6 01/45] target/arm: Handle SME in aarch64_cpu_dump_state

2022-07-08 Thread Richard Henderson
Dump SVCR, plus use the correct access check for Streaming Mode.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/cpu.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index ae6dca2f01..9c58be8b14 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -878,6 +878,7 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, 
int flags)
 int i;
 int el = arm_current_el(env);
 const char *ns_status;
+bool sve;
 
 qemu_fprintf(f, " PC=%016" PRIx64 " ", env->pc);
 for (i = 0; i < 32; i++) {
@@ -904,6 +905,12 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, 
int flags)
  el,
  psr & PSTATE_SP ? 'h' : 't');
 
+if (cpu_isar_feature(aa64_sme, cpu)) {
+qemu_fprintf(f, "  SVCR=%08" PRIx64 " %c%c",
+ env->svcr,
+ (FIELD_EX64(env->svcr, SVCR, ZA) ? 'Z' : '-'),
+ (FIELD_EX64(env->svcr, SVCR, SM) ? 'S' : '-'));
+}
 if (cpu_isar_feature(aa64_bti, cpu)) {
 qemu_fprintf(f, "  BTYPE=%d", (psr & PSTATE_BTYPE) >> 10);
 }
@@ -918,7 +925,15 @@ static void aarch64_cpu_dump_state(CPUState *cs, FILE *f, 
int flags)
 qemu_fprintf(f, " FPCR=%08x FPSR=%08x\n",
  vfp_get_fpcr(env), vfp_get_fpsr(env));
 
-if (cpu_isar_feature(aa64_sve, cpu) && sve_exception_el(env, el) == 0) {
+if (cpu_isar_feature(aa64_sme, cpu) && FIELD_EX64(env->svcr, SVCR, SM)) {
+sve = sme_exception_el(env, el) == 0;
+} else if (cpu_isar_feature(aa64_sve, cpu)) {
+sve = sve_exception_el(env, el) == 0;
+} else {
+sve = false;
+}
+
+if (sve) {
 int j, zcr_len = sve_vqm1_for_el(env, el);
 
 for (i = 0; i <= FFR_PRED_NUM; i++) {
-- 
2.34.1




[PATCH v6 02/45] target/arm: Add infrastructure for disas_sme

2022-07-08 Thread Richard Henderson
This includes the build rules for the decoder, and the
new file for translation, but excludes any instructions.

Reviewed-by: Peter Maydell 
Signed-off-by: Richard Henderson 
---
 target/arm/translate-a64.h |  1 +
 target/arm/sme.decode  | 20 
 target/arm/translate-a64.c |  7 ++-
 target/arm/translate-sme.c | 35 +++
 target/arm/meson.build |  2 ++
 5 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 target/arm/sme.decode
 create mode 100644 target/arm/translate-sme.c

diff --git a/target/arm/translate-a64.h b/target/arm/translate-a64.h
index f0970c6b8c..789b6e8e78 100644
--- a/target/arm/translate-a64.h
+++ b/target/arm/translate-a64.h
@@ -146,6 +146,7 @@ static inline int pred_gvec_reg_size(DisasContext *s)
 }
 
 bool disas_sve(DisasContext *, uint32_t);
+bool disas_sme(DisasContext *, uint32_t);
 
 void gen_gvec_rax1(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
uint32_t rm_ofs, uint32_t opr_sz, uint32_t max_sz);
diff --git a/target/arm/sme.decode b/target/arm/sme.decode
new file mode 100644
index 00..c25c031a71
--- /dev/null
+++ b/target/arm/sme.decode
@@ -0,0 +1,20 @@
+# AArch64 SME instruction descriptions
+#
+#  Copyright (c) 2022 Linaro, Ltd
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, see .
+
+#
+# This file is processed by scripts/decodetree.py
+#
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index c86b97b1d4..a5f8a6c771 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -14806,7 +14806,12 @@ static void aarch64_tr_translate_insn(DisasContextBase 
*dcbase, CPUState *cpu)
 }
 
 switch (extract32(insn, 25, 4)) {
-case 0x0: case 0x1: case 0x3: /* UNALLOCATED */
+case 0x0:
+if (!extract32(insn, 31, 1) || !disas_sme(s, insn)) {
+unallocated_encoding(s);
+}
+break;
+case 0x1: case 0x3: /* UNALLOCATED */
 unallocated_encoding(s);
 break;
 case 0x2:
diff --git a/target/arm/translate-sme.c b/target/arm/translate-sme.c
new file mode 100644
index 00..786c93fb2d
--- /dev/null
+++ b/target/arm/translate-sme.c
@@ -0,0 +1,35 @@
+/*
+ * AArch64 SME translation
+ *
+ * Copyright (c) 2022 Linaro, Ltd
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see .
+ */
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "tcg/tcg-op.h"
+#include "tcg/tcg-op-gvec.h"
+#include "tcg/tcg-gvec-desc.h"
+#include "translate.h"
+#include "exec/helper-gen.h"
+#include "translate-a64.h"
+#include "fpu/softfloat.h"
+
+
+/*
+ * Include the generated decoder.
+ */
+
+#include "decode-sme.c.inc"
diff --git a/target/arm/meson.build b/target/arm/meson.build
index 43dc600547..6dd7e93643 100644
--- a/target/arm/meson.build
+++ b/target/arm/meson.build
@@ -1,5 +1,6 @@
 gen = [
   decodetree.process('sve.decode', extra_args: '--decode=disas_sve'),
+  decodetree.process('sme.decode', extra_args: '--decode=disas_sme'),
   decodetree.process('neon-shared.decode', extra_args: 
'--decode=disas_neon_shared'),
   decodetree.process('neon-dp.decode', extra_args: '--decode=disas_neon_dp'),
   decodetree.process('neon-ls.decode', extra_args: '--decode=disas_neon_ls'),
@@ -50,6 +51,7 @@ arm_ss.add(when: 'TARGET_AARCH64', if_true: files(
   'sme_helper.c',
   'translate-a64.c',
   'translate-sve.c',
+  'translate-sme.c',
 ))
 
 arm_softmmu_ss = ss.source_set()
-- 
2.34.1




  1   2   >