Re: [PATCH v2 05/11] block/nvme: Have nvme_create_queue_pair() report errors consistently

2021-08-26 Thread Klaus Jensen
On Aug 26 21:50, Philippe Mathieu-Daudé wrote:
> nvme_create_queue_pair() does not return a boolean value (indicating
> eventual error) but a pointer, and is inconsistent in how it fills the
> error handler. To fulfill callers expectations, always set an error
> message on failure.
> 
> Reported-by: Auger Eric 
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature


Re: [PATCH v2 02/11] util/vfio-helpers: Let qemu_vfio_verify_mappings() use error_report()

2021-08-26 Thread Klaus Jensen
On Aug 26 21:50, Philippe Mathieu-Daudé wrote:
> Instead of displaying the error on stderr, use error_report()
> which also report to the monitor.
> 
> Reviewed-by: Fam Zheng 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Philippe Mathieu-Daudé 

Reviewed-by: Klaus Jensen 


signature.asc
Description: PGP signature


Re: [PATCH v2 06/11] util/vfio-helpers: Pass Error handle to qemu_vfio_dma_map()

2021-08-26 Thread Klaus Jensen
On Aug 26 21:50, Philippe Mathieu-Daudé wrote:
> Currently qemu_vfio_dma_map() displays errors on stderr.
> When using management interface, this information is simply
> lost. Pass qemu_vfio_dma_map() an Error** handle so it can
> propagate the error to callers.
> 
> Reviewed-by: Fam Zheng 
> Reviewed-by: Stefan Hajnoczi 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  include/qemu/vfio-helpers.h |  2 +-
>  block/nvme.c| 22 +++---
>  util/vfio-helpers.c | 10 ++
>  3 files changed, 18 insertions(+), 16 deletions(-)
> 
> diff --git a/include/qemu/vfio-helpers.h b/include/qemu/vfio-helpers.h
> index 4491c8e1a6e..bde9495b254 100644
> --- a/include/qemu/vfio-helpers.h
> +++ b/include/qemu/vfio-helpers.h
> @@ -18,7 +18,7 @@ typedef struct QEMUVFIOState QEMUVFIOState;
>  QEMUVFIOState *qemu_vfio_open_pci(const char *device, Error **errp);
>  void qemu_vfio_close(QEMUVFIOState *s);
>  int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
> -  bool temporary, uint64_t *iova_list);
> +  bool temporary, uint64_t *iova_list, Error **errp);
>  int qemu_vfio_dma_reset_temporary(QEMUVFIOState *s);
>  void qemu_vfio_dma_unmap(QEMUVFIOState *s, void *host);
>  void *qemu_vfio_pci_map_bar(QEMUVFIOState *s, int index,
> diff --git a/block/nvme.c b/block/nvme.c
> index 0786c501e46..80546b0babd 100644
> --- a/block/nvme.c
> +++ b/block/nvme.c
> @@ -176,12 +176,11 @@ static bool nvme_init_queue(BDRVNVMeState *s, NVMeQueue 
> *q,
>  return false;
>  }
>  memset(q->queue, 0, bytes);
> -r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, >iova);
> +r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, >iova, errp);
>  if (r) {
> -error_setg(errp, "Cannot map queue");
> -return false;
> +error_prepend(errp, "Cannot map queue: ");
>  }
> -return true;
> +return r == 0;
>  }
>  
>  static void nvme_free_queue_pair(NVMeQueuePair *q)
> @@ -239,9 +238,9 @@ static NVMeQueuePair 
> *nvme_create_queue_pair(BDRVNVMeState *s,
>  qemu_co_queue_init(>free_req_queue);
>  q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, 
> q);
>  r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, bytes,
> -  false, _list_iova);
> +  false, _list_iova, errp);
>  if (r) {
> -error_setg_errno(errp, -r, "Cannot map buffer for DMA");
> +error_prepend(errp, "Cannot map buffer for DMA: ");
>  goto fail;
>  }
>  q->free_req_head = -1;
> @@ -534,9 +533,9 @@ static bool nvme_identify(BlockDriverState *bs, int 
> namespace, Error **errp)
>  error_setg(errp, "Cannot allocate buffer for identify response");
>  goto out;
>  }
> -r = qemu_vfio_dma_map(s->vfio, id, id_size, true, );
> +r = qemu_vfio_dma_map(s->vfio, id, id_size, true, , errp);
>  if (r) {
> -error_setg(errp, "Cannot map buffer for DMA");
> +error_prepend(errp, "Cannot map buffer for DMA: ");
>  goto out;
>  }
>  
> @@ -1032,7 +1031,7 @@ static coroutine_fn int 
> nvme_cmd_map_qiov(BlockDriverState *bs, NvmeCmd *cmd,
>  try_map:
>  r = qemu_vfio_dma_map(s->vfio,
>qiov->iov[i].iov_base,
> -  len, true, );
> +  len, true, , NULL);
>  if (r == -ENOSPC) {
>  /*
>   * In addition to the -ENOMEM error, the VFIO_IOMMU_MAP_DMA
> @@ -1524,14 +1523,15 @@ static void nvme_aio_unplug(BlockDriverState *bs)
>  static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
>  {
>  int ret;
> +Error *local_err = NULL;
>  BDRVNVMeState *s = bs->opaque;
>  
> -ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL);
> +ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, _err);
>  if (ret) {
>  /* FIXME: we may run out of IOVA addresses after repeated
>   * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
>   * doesn't reclaim addresses for fixed mappings. */
> -error_report("nvme_register_buf failed: %s", strerror(-ret));
> +error_reportf_err(local_err, "nvme_register_buf failed: ");
>  }
>  }
>  
> diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
> index e7909222cfd..77cdec845d9 100644
> --- a/util/vfio-helpers.c
> +++ b/util/vfio-helpers.c
> @@ -463,13 +463,15 @@ static void qemu_vfio_ram_block_added(RAMBlockNotifier 
> *n, void *host,
>size_t size, size_t max_size)
>  {
>  QEMUVFIOState *s = container_of(n, QEMUVFIOState, ram_notifier);
> +Error *local_err = NULL;
>  int ret;
>  
>  trace_qemu_vfio_ram_block_added(s, host, max_size);
> -ret = qemu_vfio_dma_map(s, host, max_size, false, NULL);
> +ret = qemu_vfio_dma_map(s, host, max_size, false, NULL, _err);
>  if (ret) {
> -

[PULL 14/15] Remove superfluous ERRP_GUARD()

2021-08-26 Thread Markus Armbruster
Macro ERRP_GUARD() is only needed when we want to dereference @errp or
pass it to error_prepend() or error_append_hint().  Delete superfluous
ones.

Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-15-arm...@redhat.com>
Acked-by: Michael S. Tsirkin 
---
 hw/remote/mpqemu-link.c  |  3 ---
 qga/commands-posix-ssh.c | 17 -
 qga/commands-win32.c |  1 -
 ui/console.c |  1 -
 4 files changed, 22 deletions(-)

diff --git a/hw/remote/mpqemu-link.c b/hw/remote/mpqemu-link.c
index e67a5de72c..7e841820e5 100644
--- a/hw/remote/mpqemu-link.c
+++ b/hw/remote/mpqemu-link.c
@@ -34,7 +34,6 @@
  */
 bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error **errp)
 {
-ERRP_GUARD();
 bool iolock = qemu_mutex_iothread_locked();
 bool iothread = qemu_in_iothread();
 struct iovec send[2] = {};
@@ -97,7 +96,6 @@ bool mpqemu_msg_send(MPQemuMsg *msg, QIOChannel *ioc, Error 
**errp)
 static ssize_t mpqemu_read(QIOChannel *ioc, void *buf, size_t len, int **fds,
size_t *nfds, Error **errp)
 {
-ERRP_GUARD();
 struct iovec iov = { .iov_base = buf, .iov_len = len };
 bool iolock = qemu_mutex_iothread_locked();
 bool iothread = qemu_in_iothread();
@@ -192,7 +190,6 @@ fail:
 uint64_t mpqemu_msg_send_and_await_reply(MPQemuMsg *msg, PCIProxyDev *pdev,
  Error **errp)
 {
-ERRP_GUARD();
 MPQemuMsg msg_reply = {0};
 uint64_t ret = UINT64_MAX;
 
diff --git a/qga/commands-posix-ssh.c b/qga/commands-posix-ssh.c
index 2dda136d64..f3a580b8cc 100644
--- a/qga/commands-posix-ssh.c
+++ b/qga/commands-posix-ssh.c
@@ -45,8 +45,6 @@ get_passwd_entry(const char *username, Error **errp)
 g_autoptr(GError) err = NULL;
 struct passwd *p;
 
-ERRP_GUARD();
-
 p = g_unix_get_passwd_entry_qemu(username, );
 if (p == NULL) {
 error_setg(errp, "failed to lookup user '%s': %s",
@@ -61,8 +59,6 @@ static bool
 mkdir_for_user(const char *path, const struct passwd *p,
mode_t mode, Error **errp)
 {
-ERRP_GUARD();
-
 if (g_mkdir(path, mode) == -1) {
 error_setg(errp, "failed to create directory '%s': %s",
path, g_strerror(errno));
@@ -87,8 +83,6 @@ mkdir_for_user(const char *path, const struct passwd *p,
 static bool
 check_openssh_pub_key(const char *key, Error **errp)
 {
-ERRP_GUARD();
-
 /* simple sanity-check, we may want more? */
 if (!key || key[0] == '#' || strchr(key, '\n')) {
 error_setg(errp, "invalid OpenSSH public key: '%s'", key);
@@ -104,8 +98,6 @@ check_openssh_pub_keys(strList *keys, size_t *nkeys, Error 
**errp)
 size_t n = 0;
 strList *k;
 
-ERRP_GUARD();
-
 for (k = keys; k != NULL; k = k->next) {
 if (!check_openssh_pub_key(k->value, errp)) {
 return false;
@@ -126,8 +118,6 @@ write_authkeys(const char *path, const GStrv keys,
 g_autofree char *contents = NULL;
 g_autoptr(GError) err = NULL;
 
-ERRP_GUARD();
-
 contents = g_strjoinv("\n", keys);
 if (!g_file_set_contents(path, contents, -1, )) {
 error_setg(errp, "failed to write to '%s': %s", path, err->message);
@@ -155,8 +145,6 @@ read_authkeys(const char *path, Error **errp)
 g_autoptr(GError) err = NULL;
 g_autofree char *contents = NULL;
 
-ERRP_GUARD();
-
 if (!g_file_get_contents(path, , NULL, )) {
 error_setg(errp, "failed to read '%s': %s", path, err->message);
 return NULL;
@@ -178,7 +166,6 @@ qmp_guest_ssh_add_authorized_keys(const char *username, 
strList *keys,
 strList *k;
 size_t nkeys, nauthkeys;
 
-ERRP_GUARD();
 reset = has_reset && reset;
 
 if (!check_openssh_pub_keys(keys, , errp)) {
@@ -228,8 +215,6 @@ qmp_guest_ssh_remove_authorized_keys(const char *username, 
strList *keys,
 GStrv a;
 size_t nkeys = 0;
 
-ERRP_GUARD();
-
 if (!check_openssh_pub_keys(keys, NULL, errp)) {
 return;
 }
@@ -277,8 +262,6 @@ qmp_guest_ssh_get_authorized_keys(const char *username, 
Error **errp)
 g_autoptr(GuestAuthorizedKeys) ret = NULL;
 int i;
 
-ERRP_GUARD();
-
 p = get_passwd_entry(username, errp);
 if (p == NULL) {
 return NULL;
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 7bac0c5d42..4e84afd83b 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -976,7 +976,6 @@ out:
 
 GuestDiskInfoList *qmp_guest_get_disks(Error **errp)
 {
-ERRP_GUARD();
 GuestDiskInfoList *ret = NULL;
 HDEVINFO dev_info;
 SP_DEVICE_INTERFACE_DATA dev_iface_data;
diff --git a/ui/console.c b/ui/console.c
index 5d2e6178ff..eabbbc951c 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1481,7 +1481,6 @@ static bool 
displaychangelistener_has_dmabuf(DisplayChangeListener *dcl)
 static bool dpy_compatible_with(QemuConsole *con,
 DisplayChangeListener *dcl, Error **errp)
 {
-ERRP_GUARD();
 int flags;
 
 flags = 

[PULL 02/15] spapr: Plug memory leak when we can't add a migration blocker

2021-08-26 Thread Markus Armbruster
Fixes: 2500fb423adb17995485de0b4d507cf2f09e3a7f
Cc: Aravinda Prasad 
Cc: Ganesh Goudar 
Cc: David Gibson 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-3-arm...@redhat.com>
Acked-by: Michael S. Tsirkin 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/ppc/spapr_events.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 23e2e2fff1..690533cbdc 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -872,7 +872,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
 CPUState *cs = CPU(cpu);
 int ret;
-Error *local_err = NULL;
 
 if (spapr->fwnmi_machine_check_addr == -1) {
 /* Non-FWNMI case, deliver it like an architected CPU interrupt. */
@@ -912,7 +911,7 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 }
 }
 
-ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, _err);
+ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, NULL);
 if (ret == -EBUSY) {
 /*
  * We don't want to abort so we let the migration to continue.
-- 
2.31.1




[PULL 12/15] vhost: Clean up how VhostOpts method vhost_get_config() fails

2021-08-26 Thread Markus Armbruster
vhost_user_get_config() can fail without setting an error.  Unclean.
Its caller vhost_dev_get_config() compensates by substituting a
generic error then.  Goes back to commit 50de51387f "vhost:
Distinguish errors in vhost_dev_get_config()".

Clean up by moving the generic error from vhost_dev_get_config() to
all the failure paths that neglect to set an error.

Cc: Kevin Wolf 
Cc: Michael S. Tsirkin 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-13-arm...@redhat.com>
Acked-by: Michael S. Tsirkin 
Reviewed-by: Philippe Mathieu-Daudé 
[Sign of error_setg_errno()'s second argument fixed in both calls]
---
 hw/virtio/vhost-user.c |  2 ++
 hw/virtio/vhost.c  | 10 ++
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index aec6cc1990..229c114a19 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2139,10 +2139,12 @@ static int vhost_user_get_config(struct vhost_dev *dev, 
uint8_t *config,
 msg.payload.config.offset = 0;
 msg.payload.config.size = config_len;
 if (vhost_user_write(dev, , NULL, 0) < 0) {
+error_setg_errno(errp, EPROTO, "vhost_get_config failed");
 return -EPROTO;
 }
 
 if (vhost_user_read(dev, ) < 0) {
+error_setg_errno(errp, EPROTO, "vhost_get_config failed");
 return -EPROTO;
 }
 
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index dbbc6b6915..88f8a397dc 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1564,17 +1564,11 @@ void vhost_ack_features(struct vhost_dev *hdev, const 
int *feature_bits,
 int vhost_dev_get_config(struct vhost_dev *hdev, uint8_t *config,
  uint32_t config_len, Error **errp)
 {
-ERRP_GUARD();
-int ret;
-
 assert(hdev->vhost_ops);
 
 if (hdev->vhost_ops->vhost_get_config) {
-ret = hdev->vhost_ops->vhost_get_config(hdev, config, config_len, 
errp);
-if (ret < 0 && !*errp) {
-error_setg_errno(errp, -ret, "vhost_get_config failed");
-}
-return ret;
+return hdev->vhost_ops->vhost_get_config(hdev, config, config_len,
+ errp);
 }
 
 error_setg(errp, "vhost_get_config not implemented");
-- 
2.31.1




[PULL 09/15] migration: Unify failure check for migrate_add_blocker()

2021-08-26 Thread Markus Armbruster
Most callers check the return value.  Some check whether it set an
error.  Functionally equivalent, but the former tends to be easier on
the eyes, so do that everywhere.

Prior art: commit c6ecec43b2 "qemu-option: Check return value instead
of @err where convenient".

Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-10-arm...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Michael S. Tsirkin 
---
 backends/tpm/tpm_emulator.c | 3 +--
 hw/virtio/vhost.c   | 2 +-
 target/i386/kvm/kvm.c   | 6 +++---
 target/i386/nvmm/nvmm-all.c | 3 +--
 target/i386/whpx/whpx-all.c | 3 +--
 5 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c
index e5f1063ab6..f8095d23d5 100644
--- a/backends/tpm/tpm_emulator.c
+++ b/backends/tpm/tpm_emulator.c
@@ -492,8 +492,7 @@ static int tpm_emulator_block_migration(TPMEmulator 
*tpm_emu)
 error_setg(_emu->migration_blocker,
"Migration disabled: TPM emulator does not support "
"migration");
-migrate_add_blocker(tpm_emu->migration_blocker, );
-if (err) {
+if (migrate_add_blocker(tpm_emu->migration_blocker, ) < 0) {
 error_report_err(err);
 error_free(tpm_emu->migration_blocker);
 tpm_emu->migration_blocker = NULL;
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index e8f85a5d2d..dbbc6b6915 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1372,7 +1372,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
 
 if (hdev->migration_blocker != NULL) {
 r = migrate_add_blocker(hdev->migration_blocker, errp);
-if (*errp) {
+if (r < 0) {
 error_free(hdev->migration_blocker);
 goto fail_busyloop;
 }
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 57aed525b5..500d2e0e68 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1435,7 +1435,7 @@ static int hyperv_init_vcpu(X86CPU *cpu)
"'hv-passthrough' CPU flag prevents migration, use explicit"
" set of hv-* flags instead");
 ret = migrate_add_blocker(hv_passthrough_mig_blocker, _err);
-if (local_err) {
+if (ret < 0) {
 error_report_err(local_err);
 return ret;
 }
@@ -1449,7 +1449,7 @@ static int hyperv_init_vcpu(X86CPU *cpu)
" make sure SMT is disabled and/or that vCPUs are properly"
" pinned)");
 ret = migrate_add_blocker(hv_no_nonarch_cs_mig_blocker, _err);
-if (local_err) {
+if (ret < 0) {
 error_report_err(local_err);
 return ret;
 }
@@ -1888,7 +1888,7 @@ int kvm_arch_init_vcpu(CPUState *cs)
"State blocked by non-migratable CPU device"
" (invtsc flag)");
 r = migrate_add_blocker(invtsc_mig_blocker, _err);
-if (local_err) {
+if (r < 0) {
 error_report_err(local_err);
 return r;
 }
diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index 7bb0d9e30e..28dee4c5ee 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -926,8 +926,7 @@ nvmm_init_vcpu(CPUState *cpu)
 error_setg(_migration_blocker,
 "NVMM: Migration not supported");
 
-(void)migrate_add_blocker(nvmm_migration_blocker, _error);
-if (local_error) {
+if (migrate_add_blocker(nvmm_migration_blocker, _error) < 0) {
 error_report_err(local_error);
 error_free(nvmm_migration_blocker);
 return -EINVAL;
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index cc8c0b984b..3e925b9da7 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -1346,8 +1346,7 @@ int whpx_init_vcpu(CPUState *cpu)
"State blocked due to non-migratable CPUID feature support,"
"dirty memory tracking support, and XSAVE/XRSTOR support");
 
-(void)migrate_add_blocker(whpx_migration_blocker, _error);
-if (local_error) {
+if (migrate_add_blocker(whpx_migration_blocker, _error) < 0) {
 error_report_err(local_error);
 error_free(whpx_migration_blocker);
 ret = -EINVAL;
-- 
2.31.1




[PULL 04/15] multi-process: Fix pci_proxy_dev_realize() error handling

2021-08-26 Thread Markus Armbruster
The Error ** argument must be NULL, _abort, _fatal, or a
pointer to a variable containing NULL.  Passing an argument of the
latter kind twice without clearing it in between is wrong: if the
first call sets an error, it no longer points to NULL for the second
call.

pci_proxy_dev_realize() is wrong that way: it passes @errp to
qio_channel_new_fd() without checking for failure.  If it runs into
another failure, it trips error_setv()'s assertion.

Fix it to check for failure properly.

Fixes: 9f8112073aad8e485ac012ee18809457ab7f23a6
Cc: Elena Ufimtseva 
Cc: Jagannathan Raman 
Cc: John G Johnson 
Cc: Stefan Hajnoczi 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-5-arm...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Jagannathan Raman 
Acked-by: Michael S. Tsirkin 
---
 hw/remote/proxy.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
index 6dda705fc2..499f540c94 100644
--- a/hw/remote/proxy.c
+++ b/hw/remote/proxy.c
@@ -102,10 +102,18 @@ static void pci_proxy_dev_realize(PCIDevice *device, 
Error **errp)
 }
 
 dev->ioc = qio_channel_new_fd(fd, errp);
+if (!dev->ioc) {
+close(fd);
+return;
+}
 
 error_setg(>migration_blocker, "%s does not support migration",
TYPE_PCI_PROXY_DEV);
-migrate_add_blocker(dev->migration_blocker, errp);
+if (migrate_add_blocker(dev->migration_blocker, errp) < 0) {
+error_free(dev->migration_blocker);
+object_unref(dev->ioc);
+return;
+}
 
 qemu_mutex_init(>io_mutex);
 qio_channel_set_blocking(dev->ioc, true, NULL);
-- 
2.31.1




[PULL 10/15] migration: Handle migration_incoming_setup() errors consistently

2021-08-26 Thread Markus Armbruster
Commit b673eab4e2 "multifd: Make multifd_load_setup() get an Error
parameter" changed migration_incoming_setup() to take an Error **
argument, and adjusted the callers accordingly.  It neglected to
change adjust multifd_load_setup(): it still exit()s on error.  Clean
that up.

The error now gets propagated up two call chains: via
migration_fd_process_incoming() to rdma_accept_incoming_migration(),
and via migration_ioc_process_incoming() to
migration_channel_process_incoming().  Both chain ends report the
error with error_report_err(), but otherwise ignore it.  Behavioral
change: we no longer exit() on this error.

This is consistent with how we handle other errors here, e.g. from
multifd_recv_new_channel() via migration_ioc_process_incoming() to
migration_channel_process_incoming().  Whether it's consistently right
or consistently wrong I can't tell.

Also clean up the return value from the unusual 0 on success, 1 on
error to the more common true on success, false on error.

Cc: Juan Quintela 
Cc: Dr. David Alan Gilbert 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-11-arm...@redhat.com>
Reviewed-by: Eric Blake 
Reviewed-by: Pankaj Gupta 
Acked-by: Michael S. Tsirkin 
---
 migration/migration.c | 27 +--
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/migration/migration.c b/migration/migration.c
index b169943f35..bb909781b7 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -611,30 +611,25 @@ fail:
 }
 
 /**
- * @migration_incoming_setup: Setup incoming migration
- *
- * Returns 0 for no error or 1 for error
- *
+ * migration_incoming_setup: Setup incoming migration
  * @f: file for main migration channel
  * @errp: where to put errors
+ *
+ * Returns: %true on success, %false on error.
  */
-static int migration_incoming_setup(QEMUFile *f, Error **errp)
+static bool migration_incoming_setup(QEMUFile *f, Error **errp)
 {
 MigrationIncomingState *mis = migration_incoming_get_current();
-Error *local_err = NULL;
 
-if (multifd_load_setup(_err) != 0) {
-/* We haven't been able to create multifd threads
-   nothing better to do */
-error_report_err(local_err);
-exit(EXIT_FAILURE);
+if (multifd_load_setup(errp) != 0) {
+return false;
 }
 
 if (!mis->from_src_file) {
 mis->from_src_file = f;
 }
 qemu_file_set_blocking(f, false);
-return 0;
+return true;
 }
 
 void migration_incoming_process(void)
@@ -677,14 +672,11 @@ static bool postcopy_try_recover(QEMUFile *f)
 
 void migration_fd_process_incoming(QEMUFile *f, Error **errp)
 {
-Error *local_err = NULL;
-
 if (postcopy_try_recover(f)) {
 return;
 }
 
-if (migration_incoming_setup(f, _err)) {
-error_propagate(errp, local_err);
+if (!migration_incoming_setup(f, errp)) {
 return;
 }
 migration_incoming_process();
@@ -705,8 +697,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error 
**errp)
 return;
 }
 
-if (migration_incoming_setup(f, _err)) {
-error_propagate(errp, local_err);
+if (!migration_incoming_setup(f, errp)) {
 return;
 }
 
-- 
2.31.1




[PULL 01/15] error: Use error_fatal to simplify obvious fatal errors (again)

2021-08-26 Thread Markus Armbruster
We did this with scripts/coccinelle/use-error_fatal.cocci before, in
commit 50beeb68094 and 007b06578ab.  This commit cleans up rarer
variations that don't seem worth matching with Coccinelle.

Cc: Thomas Huth 
Cc: Cornelia Huck 
Cc: Peter Xu 
Cc: Juan Quintela 
Cc: Stefan Hajnoczi 
Cc: Paolo Bonzini 
Cc: Marc-André Lureau 
Cc: Gerd Hoffmann 
Cc: Daniel P. Berrangé 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-2-arm...@redhat.com>
Reviewed-by: Eric Blake 
Reviewed-by: Peter Xu 
Acked-by: Michael S. Tsirkin 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/s390x/ipl.c|  6 +-
 migration/migration.c |  7 +--
 qemu-img.c|  6 +-
 qemu-io.c |  6 +-
 qemu-nbd.c|  5 +
 scsi/qemu-pr-helper.c | 11 +++
 softmmu/vl.c  |  7 +--
 target/i386/sev.c |  8 +---
 ui/console.c  |  6 ++
 ui/spice-core.c   |  7 +--
 10 files changed, 13 insertions(+), 56 deletions(-)

diff --git a/hw/s390x/ipl.c b/hw/s390x/ipl.c
index 8c863cf386..1821c6faee 100644
--- a/hw/s390x/ipl.c
+++ b/hw/s390x/ipl.c
@@ -711,7 +711,6 @@ int s390_ipl_pv_unpack(void)
 void s390_ipl_prepare_cpu(S390CPU *cpu)
 {
 S390IPLState *ipl = get_ipl_device();
-Error *err = NULL;
 
 cpu->env.psw.addr = ipl->start_addr;
 cpu->env.psw.mask = IPL_PSW_MASK;
@@ -723,10 +722,7 @@ void s390_ipl_prepare_cpu(S390CPU *cpu)
 }
 }
 if (ipl->netboot) {
-if (load_netboot_image() < 0) {
-error_report_err(err);
-exit(1);
-}
+load_netboot_image(_fatal);
 ipl->qipl.netboot_start_addr = cpu_to_be64(ipl->start_addr);
 }
 s390_ipl_set_boot_menu(ipl);
diff --git a/migration/migration.c b/migration/migration.c
index 041b8451a6..b169943f35 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -188,8 +188,6 @@ static gint page_request_addr_cmp(gconstpointer ap, 
gconstpointer bp)
 
 void migration_object_init(void)
 {
-Error *err = NULL;
-
 /* This can only be called once. */
 assert(!current_migration);
 current_migration = MIGRATION_OBJ(object_new(TYPE_MIGRATION));
@@ -210,10 +208,7 @@ void migration_object_init(void)
 qemu_mutex_init(_incoming->page_request_mutex);
 current_incoming->page_requested = g_tree_new(page_request_addr_cmp);
 
-if (!migration_object_check(current_migration, )) {
-error_report_err(err);
-exit(1);
-}
+migration_object_check(current_migration, _fatal);
 
 blk_mig_init();
 ram_mig_init();
diff --git a/qemu-img.c b/qemu-img.c
index 908fd0cce5..d77f3e76a9 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -5350,7 +5350,6 @@ int main(int argc, char **argv)
 {
 const img_cmd_t *cmd;
 const char *cmdname;
-Error *local_error = NULL;
 int c;
 static const struct option long_options[] = {
 {"help", no_argument, 0, 'h'},
@@ -5368,10 +5367,7 @@ int main(int argc, char **argv)
 module_call_init(MODULE_INIT_TRACE);
 qemu_init_exec_dir(argv[0]);
 
-if (qemu_init_main_loop(_error)) {
-error_report_err(local_error);
-exit(EXIT_FAILURE);
-}
+qemu_init_main_loop(_fatal);
 
 qcrypto_init(_fatal);
 
diff --git a/qemu-io.c b/qemu-io.c
index 57f07501df..3924639b92 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -529,7 +529,6 @@ int main(int argc, char **argv)
 int flags = BDRV_O_UNMAP;
 int ret;
 bool writethrough = true;
-Error *local_error = NULL;
 QDict *opts = NULL;
 const char *format = NULL;
 bool force_share = false;
@@ -629,10 +628,7 @@ int main(int argc, char **argv)
 exit(1);
 }
 
-if (qemu_init_main_loop(_error)) {
-error_report_err(local_error);
-exit(1);
-}
+qemu_init_main_loop(_fatal);
 
 if (!trace_init_backends()) {
 exit(1);
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 26ffbf15af..65ebec598f 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -963,10 +963,7 @@ int main(int argc, char **argv)
 }
 }
 
-if (qemu_init_main_loop(_err)) {
-error_report_err(local_err);
-exit(EXIT_FAILURE);
-}
+qemu_init_main_loop(_fatal);
 bdrv_init();
 atexit(qemu_nbd_shutdown);
 
diff --git a/scsi/qemu-pr-helper.c b/scsi/qemu-pr-helper.c
index 7b9389b47b..f281daeced 100644
--- a/scsi/qemu-pr-helper.c
+++ b/scsi/qemu-pr-helper.c
@@ -1044,10 +1044,7 @@ int main(int argc, char **argv)
 }
 }
 
-if (qemu_init_main_loop(_err)) {
-error_report_err(local_err);
-exit(EXIT_FAILURE);
-}
+qemu_init_main_loop(_fatal);
 
 server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
  G_IO_IN,
@@ -1061,10 +1058,8 @@ int main(int argc, char **argv)
 }
 }
 
-if ((daemonize || pidfile_specified) &&
-!qemu_write_pidfile(pidfile, _err)) {
-error_report_err(local_err);
-exit(EXIT_FAILURE);
+if (daemonize || 

[PULL 13/15] vhost: Clean up how VhostOpts method vhost_backend_init() fails

2021-08-26 Thread Markus Armbruster
vhost_user_backend_init() can fail without setting an error.  Unclean.
Its caller vhost_dev_init() compensates by substituting a generic
error then.  Goes back to commit 28770ff935 "vhost: Distinguish errors
in vhost_backend_init()".

Clean up by moving the generic error from vhost_dev_init() to all the
failure paths that neglect to set an error.

Cc: Kevin Wolf 
Cc: Michael S. Tsirkin 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-14-arm...@redhat.com>
Acked-by: Michael S. Tsirkin 
Reviewed-by: Philippe Mathieu-Daudé 
---
 hw/virtio/vhost-user.c | 6 ++
 hw/virtio/vhost.c  | 4 
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 229c114a19..2407836fac 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -1876,6 +1876,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, 
void *opaque,
 
 err = vhost_user_get_features(dev, );
 if (err < 0) {
+error_setg_errno(errp, -err, "vhost_backend_init failed");
 return err;
 }
 
@@ -1885,6 +1886,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, 
void *opaque,
 err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
  _features);
 if (err < 0) {
+error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
 return -EPROTO;
 }
 
@@ -1903,6 +1905,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, 
void *opaque,
 
 err = vhost_user_set_protocol_features(dev, dev->protocol_features);
 if (err < 0) {
+error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
 return -EPROTO;
 }
 
@@ -1911,6 +1914,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, 
void *opaque,
 err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
  >max_queues);
 if (err < 0) {
+error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
 return -EPROTO;
 }
 } else {
@@ -1940,6 +1944,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, 
void *opaque,
 } else {
 err = vhost_user_get_max_memslots(dev, _slots);
 if (err < 0) {
+error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
 return -EPROTO;
 }
 
@@ -1966,6 +1971,7 @@ static int vhost_user_backend_init(struct vhost_dev *dev, 
void *opaque,
 if (dev->vq_index == 0) {
 err = vhost_setup_slave_channel(dev);
 if (err < 0) {
+error_setg_errno(errp, EPROTO, "vhost_backend_init failed");
 return -EPROTO;
 }
 }
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index 88f8a397dc..3c0b537f89 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1289,7 +1289,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
VhostBackendType backend_type, uint32_t busyloop_timeout,
Error **errp)
 {
-ERRP_GUARD();
 uint64_t features;
 int i, r, n_initialized_vqs = 0;
 
@@ -1301,9 +1300,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
 
 r = hdev->vhost_ops->vhost_backend_init(hdev, opaque, errp);
 if (r < 0) {
-if (!*errp) {
-error_setg_errno(errp, -r, "vhost_backend_init failed");
-}
 goto fail;
 }
 
-- 
2.31.1




[PULL 08/15] whpx nvmm: Drop useless migrate_del_blocker()

2021-08-26 Thread Markus Armbruster
There is nothing to delete after migrate_add_blocker() failed.  Trying
anyway is safe, but useless.  Don't.

Cc: Sunil Muthuswamy 
Cc: Kamil Rytarowski 
Cc: Reinoud Zandijk 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-9-arm...@redhat.com>
Reviewed-by: Reinoud Zandijk 
Acked-by: Michael S. Tsirkin 
---
 target/i386/nvmm/nvmm-all.c | 1 -
 target/i386/whpx/whpx-all.c | 1 -
 2 files changed, 2 deletions(-)

diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c
index dfa690d65d..7bb0d9e30e 100644
--- a/target/i386/nvmm/nvmm-all.c
+++ b/target/i386/nvmm/nvmm-all.c
@@ -929,7 +929,6 @@ nvmm_init_vcpu(CPUState *cpu)
 (void)migrate_add_blocker(nvmm_migration_blocker, _error);
 if (local_error) {
 error_report_err(local_error);
-migrate_del_blocker(nvmm_migration_blocker);
 error_free(nvmm_migration_blocker);
 return -EINVAL;
 }
diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c
index f832f286ac..cc8c0b984b 100644
--- a/target/i386/whpx/whpx-all.c
+++ b/target/i386/whpx/whpx-all.c
@@ -1349,7 +1349,6 @@ int whpx_init_vcpu(CPUState *cpu)
 (void)migrate_add_blocker(whpx_migration_blocker, _error);
 if (local_error) {
 error_report_err(local_error);
-migrate_del_blocker(whpx_migration_blocker);
 error_free(whpx_migration_blocker);
 ret = -EINVAL;
 goto error;
-- 
2.31.1




[PULL 06/15] i386: Never free migration blocker objects instead of sometimes

2021-08-26 Thread Markus Armbruster
invtsc_mig_blocker has static storage duration.  When a CPU with
certain features is initialized, and invtsc_mig_blocker is still null,
we add a migration blocker and store it in invtsc_mig_blocker.

The object is freed when migrate_add_blocker() fails, leaving
invtsc_mig_blocker dangling.  It is not freed on later failures.

Same for hv_passthrough_mig_blocker and hv_no_nonarch_cs_mig_blocker.

All failures are actually fatal, so whether we free or not doesn't
really matter, except as bad examples to be copied / imitated.

Clean this up in a minimal way: never free these blocker objects.

Cc: Paolo Bonzini 
Cc: Marcelo Tosatti 
Cc: Eduardo Habkost 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-7-arm...@redhat.com>
Acked-by: Michael S. Tsirkin 
---
 target/i386/kvm/kvm.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index e69abe48e3..57aed525b5 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1437,7 +1437,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)
 ret = migrate_add_blocker(hv_passthrough_mig_blocker, _err);
 if (local_err) {
 error_report_err(local_err);
-error_free(hv_passthrough_mig_blocker);
 return ret;
 }
 }
@@ -1452,7 +1451,6 @@ static int hyperv_init_vcpu(X86CPU *cpu)
 ret = migrate_add_blocker(hv_no_nonarch_cs_mig_blocker, _err);
 if (local_err) {
 error_report_err(local_err);
-error_free(hv_no_nonarch_cs_mig_blocker);
 return ret;
 }
 }
@@ -1892,7 +1890,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
 r = migrate_add_blocker(invtsc_mig_blocker, _err);
 if (local_err) {
 error_report_err(local_err);
-error_free(invtsc_mig_blocker);
 return r;
 }
 }
-- 
2.31.1




[PULL 07/15] vfio: Avoid error_propagate() after migrate_add_blocker()

2021-08-26 Thread Markus Armbruster
When migrate_add_blocker(blocker, ) is followed by
error_propagate(errp, err), we can often just as well do
migrate_add_blocker(..., errp).  This is the case in
vfio_migration_probe().

Prior art: commit 386f6c07d2 "error: Avoid error_propagate() after
migrate_add_blocker()".

Cc: Kirti Wankhede 
Cc: Alex Williamson 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-8-arm...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed by: Kirti Wankhede 
Acked-by: Michael S. Tsirkin 
---
 hw/vfio/migration.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 82f654afb6..ff6b45de6b 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -858,7 +858,6 @@ int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
 {
 VFIOContainer *container = vbasedev->group->container;
 struct vfio_region_info *info = NULL;
-Error *local_err = NULL;
 int ret = -ENOTSUP;
 
 if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
@@ -885,9 +884,8 @@ add_blocker:
"VFIO device doesn't support migration");
 g_free(info);
 
-ret = migrate_add_blocker(vbasedev->migration_blocker, _err);
-if (local_err) {
-error_propagate(errp, local_err);
+ret = migrate_add_blocker(vbasedev->migration_blocker, errp);
+if (ret < 0) {
 error_free(vbasedev->migration_blocker);
 vbasedev->migration_blocker = NULL;
 }
-- 
2.31.1




[PULL 15/15] vl: Clean up -smp error handling

2021-08-26 Thread Markus Armbruster
The Error ** argument must be NULL, _abort, _fatal, or a
pointer to a variable containing NULL.  Passing an argument of the
latter kind twice without clearing it in between is wrong: if the
first call sets an error, it no longer points to NULL for the second
call.

machine_parse_property_opt() is wrong that way: it passes @errp to
keyval_parse() without checking for failure, then passes it to
keyval_merge().  Harmless, since the only caller passes _fatal.

Clean up: drop the parameter, and use _fatal directly.

Cc: Paolo Bonzini 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-16-arm...@redhat.com>
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Michael S. Tsirkin 
[Rebased, conflict with commit a3c2f128306 resolved]
---
 softmmu/vl.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/softmmu/vl.c b/softmmu/vl.c
index 6227f8f10e..bdeb17809d 100644
--- a/softmmu/vl.c
+++ b/softmmu/vl.c
@@ -1550,20 +1550,17 @@ machine_merge_property(const char *propname, QDict 
*prop, Error **errp)
 
 static void
 machine_parse_property_opt(QemuOptsList *opts_list, const char *propname,
-   const char *arg, Error **errp)
+   const char *arg)
 {
 QDict *prop = NULL;
 bool help = false;
 
-prop = keyval_parse(arg, opts_list->implied_opt_name, , errp);
+prop = keyval_parse(arg, opts_list->implied_opt_name, , _fatal);
 if (help) {
 qemu_opts_print_help(opts_list, true);
 exit(0);
 }
-if (!prop) {
-return;
-}
-machine_merge_property(propname, prop, errp);
+machine_merge_property(propname, prop, _fatal);
 qobject_unref(prop);
 }
 
@@ -3343,7 +3340,8 @@ void qemu_init(int argc, char **argv, char **envp)
 }
 break;
 case QEMU_OPTION_smp:
-machine_parse_property_opt(qemu_find_opts("smp-opts"), "smp", 
optarg, _fatal);
+machine_parse_property_opt(qemu_find_opts("smp-opts"),
+   "smp", optarg);
 break;
 case QEMU_OPTION_vnc:
 vnc_parse(optarg);
-- 
2.31.1




[PULL 05/15] vhost-scsi: Plug memory leak on migrate_add_blocker() failure

2021-08-26 Thread Markus Armbruster
Cc: Michael S. Tsirkin 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-6-arm...@redhat.com>
Acked-by: Michael S. Tsirkin 
---
 hw/scsi/vhost-scsi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c
index 8c611bfd2d..039caf2614 100644
--- a/hw/scsi/vhost-scsi.c
+++ b/hw/scsi/vhost-scsi.c
@@ -208,7 +208,6 @@ static void vhost_scsi_realize(DeviceState *dev, Error 
**errp)
 "target SCSI device state or use shared storage over network), 
"
 "set 'migratable' property to true to enable migration.");
 if (migrate_add_blocker(vsc->migration_blocker, errp) < 0) {
-error_free(vsc->migration_blocker);
 goto free_virtio;
 }
 }
@@ -233,11 +232,12 @@ static void vhost_scsi_realize(DeviceState *dev, Error 
**errp)
 return;
 
  free_vqs:
+g_free(vsc->dev.vqs);
 if (!vsc->migratable) {
 migrate_del_blocker(vsc->migration_blocker);
 }
-g_free(vsc->dev.vqs);
  free_virtio:
+error_free(vsc->migration_blocker);
 virtio_scsi_common_unrealize(dev);
  close_fd:
 close(vhostfd);
-- 
2.31.1




[PULL 11/15] microvm: Drop dead error handling in microvm_machine_state_init()

2021-08-26 Thread Markus Armbruster
Stillborn in commit 0ebf007dda "hw/i386: Introduce the microvm machine
type".

Cc: Sergio Lopez 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-12-arm...@redhat.com>
Reviewed-by: Sergio Lopez 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Pankaj Gupta 
Acked-by: Michael S. Tsirkin 
---
 hw/i386/microvm.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index aba0c83219..f257ec5a0b 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -458,15 +458,10 @@ static void microvm_machine_state_init(MachineState 
*machine)
 {
 MicrovmMachineState *mms = MICROVM_MACHINE(machine);
 X86MachineState *x86ms = X86_MACHINE(machine);
-Error *local_err = NULL;
 
 microvm_memory_init(mms);
 
 x86_cpus_init(x86ms, CPU_VERSION_LATEST);
-if (local_err) {
-error_report_err(local_err);
-exit(1);
-}
 
 microvm_devices_init(mms);
 }
-- 
2.31.1




[PULL 03/15] spapr: Explain purpose of ->fwnmi_migration_blocker more clearly

2021-08-26 Thread Markus Armbruster
spapr_mce_req_event() makes an effort to prevent migration from
degrading the reporting of FWNMIs.  It adds a migration blocker when
it receives one, and deletes it when it's done handling it.  This is a
best effort.

Commit 2500fb423a "migration: Include migration support for machine
check handling" tried to explain this in a comment.  Rewrite the
comment for clarity, and reposition it to make it clear it applies to
all failure modes, not just "migration already in progress".

Cc: David Gibson 
Cc: Aravinda Prasad 
Cc: Ganesh Goudar 
Cc: Dr. David Alan Gilbert 
Signed-off-by: Markus Armbruster 
Message-Id: <20210720125408.387910-4-arm...@redhat.com>
Acked-by: Michael S. Tsirkin 
---
 hw/ppc/spapr_events.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index 690533cbdc..630e86282c 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -911,16 +911,17 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
 }
 }
 
+/*
+ * Try to block migration while FWNMI is being handled, so the
+ * machine check handler runs where the information passed to it
+ * actually makes sense.  This shouldn't actually block migration,
+ * only delay it slightly, assuming migration is retried.  If the
+ * attempt to block fails, carry on.  Unfortunately, it always
+ * fails when running with -only-migrate.  A proper interface to
+ * delay migration completion for a bit could avoid that.
+ */
 ret = migrate_add_blocker(spapr->fwnmi_migration_blocker, NULL);
 if (ret == -EBUSY) {
-/*
- * We don't want to abort so we let the migration to continue.
- * In a rare case, the machine check handler will run on the target.
- * Though this is not preferable, it is better than aborting
- * the migration or killing the VM. It is okay to call
- * migrate_del_blocker on a blocker that was not added (which the
- * nmi-interlock handler would do when it's called after this).
- */
 warn_report("Received a fwnmi while migration was in progress");
 }
 
-- 
2.31.1




[PULL 00/15] Error reporting patches for 2021-08-26

2021-08-26 Thread Markus Armbruster
The following changes since commit c83fcfaf8a54d0d034bd0edf7bbb3b0d16669be9:

  Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2021-08-26' into 
staging (2021-08-26 13:42:34 +0100)

are available in the Git repository at:

  git://repo.or.cz/qemu/armbru.git tags/pull-error-2021-08-26

for you to fetch changes up to f9dfae9cb6b27649085f662a863f6167650402e0:

  vl: Clean up -smp error handling (2021-08-26 17:15:28 +0200)


Error reporting patches for 2021-08-26


Markus Armbruster (15):
  error: Use error_fatal to simplify obvious fatal errors (again)
  spapr: Plug memory leak when we can't add a migration blocker
  spapr: Explain purpose of ->fwnmi_migration_blocker more clearly
  multi-process: Fix pci_proxy_dev_realize() error handling
  vhost-scsi: Plug memory leak on migrate_add_blocker() failure
  i386: Never free migration blocker objects instead of sometimes
  vfio: Avoid error_propagate() after migrate_add_blocker()
  whpx nvmm: Drop useless migrate_del_blocker()
  migration: Unify failure check for migrate_add_blocker()
  migration: Handle migration_incoming_setup() errors consistently
  microvm: Drop dead error handling in microvm_machine_state_init()
  vhost: Clean up how VhostOpts method vhost_get_config() fails
  vhost: Clean up how VhostOpts method vhost_backend_init() fails
  Remove superfluous ERRP_GUARD()
  vl: Clean up -smp error handling

 backends/tpm/tpm_emulator.c |  3 +--
 hw/i386/microvm.c   |  5 -
 hw/ppc/spapr_events.c   | 20 ++--
 hw/remote/mpqemu-link.c |  3 ---
 hw/remote/proxy.c   | 10 +-
 hw/s390x/ipl.c  |  6 +-
 hw/scsi/vhost-scsi.c|  4 ++--
 hw/vfio/migration.c |  6 ++
 hw/virtio/vhost-user.c  |  8 
 hw/virtio/vhost.c   | 16 +++-
 migration/migration.c   | 34 ++
 qemu-img.c  |  6 +-
 qemu-io.c   |  6 +-
 qemu-nbd.c  |  5 +
 qga/commands-posix-ssh.c| 17 -
 qga/commands-win32.c|  1 -
 scsi/qemu-pr-helper.c   | 11 +++
 softmmu/vl.c| 19 ++-
 target/i386/kvm/kvm.c   |  9 +++--
 target/i386/nvmm/nvmm-all.c |  4 +---
 target/i386/sev.c   |  8 +---
 target/i386/whpx/whpx-all.c |  4 +---
 ui/console.c|  7 ++-
 ui/spice-core.c |  7 +--
 24 files changed, 67 insertions(+), 152 deletions(-)

-- 
2.31.1




Re: [PATCH v2 42/43] bsd-user: Add '-0 argv0' option to bsd-user/main.c

2021-08-26 Thread Philippe Mathieu-Daudé
On 8/26/21 11:12 PM, i...@bsdimp.com wrote:
> From: Colin Percival 
> 
> Previously it was impossible to emulate a program with a file name
> different from its argv[0].  With this change, you can run
> qemu -0 fakename realname args
> which runs the program "realname" with an argv of "fakename args".
> 
> Signed-off-by: Colin Percival 
> Signed-off-by: Warner Losh 
> Reviewed-by: Richard Henderson 
> ---
>  bsd-user/main.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/bsd-user/main.c b/bsd-user/main.c
> index b35bcf4d1e..ae25f4c773 100644
> --- a/bsd-user/main.c
> +++ b/bsd-user/main.c
> @@ -268,6 +268,7 @@ int main(int argc, char **argv)
>  char **target_environ, **wrk;
>  envlist_t *envlist = NULL;
>  bsd_type = HOST_DEFAULT_BSD_TYPE;
> +char * argv0 = NULL;
>  
>  adjust_ssize();
>  
> @@ -390,6 +391,8 @@ int main(int argc, char **argv)
>  do_strace = 1;
>  } else if (!strcmp(r, "trace")) {
>  trace_opt_parse(optarg);
> +} else if (!strcmp(r, "0")) {
> +argv0 = argv[optind++];
>  } else {
>  usage();
>  }
> @@ -413,6 +416,8 @@ int main(int argc, char **argv)
>  usage();
>  }
>  filename = argv[optind];
> +if (argv0)

Style:

{

> +argv[optind] = argv0;

}

>  
>  if (!trace_init_backends()) {
>  exit(1);
> 




Re: [PATCH v2 41/43] bsd-user: Implement cpu_copy() helper routine

2021-08-26 Thread Philippe Mathieu-Daudé
On 8/26/21 11:11 PM, i...@bsdimp.com wrote:
> From: Warner Losh 
> 
> cpu_copy shouldbe called when processes are creating new threads. It

Typo "should be"

> copies the current state of the CPU to a new cpu state needed for the
> new thread.
> 
> Signed-off-by: Stacey Son 
> Signed-off-by: Warner Losh 
> Signed-off-by: Justin Hibbits 
> Reviewed-by: Richard Henderson 
> ---
>  bsd-user/main.c | 30 ++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/bsd-user/main.c b/bsd-user/main.c
> index e2ed9e32ba..b35bcf4d1e 100644
> --- a/bsd-user/main.c
> +++ b/bsd-user/main.c
> @@ -180,6 +180,36 @@ void init_task_state(TaskState *ts)
>  ts->sigqueue_table[i].next = NULL;
>  }
>  
> +CPUArchState *cpu_copy(CPUArchState *env)
> +{
> +CPUState *cpu = env_cpu(env);
> +CPUState *new_cpu = cpu_create(cpu_type);
> +CPUArchState *new_env = new_cpu->env_ptr;
> +CPUBreakpoint *bp;
> +CPUWatchpoint *wp;
> +
> +/* Reset non arch specific state */
> +cpu_reset(new_cpu);
> +
> +memcpy(new_env, env, sizeof(CPUArchState));
> +
> +/*
> + * Clone all break/watchpoints.
> + * Note: Once we support ptrace with hw-debug register access, make sure
> + * BP_CPU break/watchpoints are handled correctly on clone.
> + */
> +QTAILQ_INIT(>breakpoints);
> +QTAILQ_INIT(>watchpoints);
> +QTAILQ_FOREACH(bp, >breakpoints, entry) {
> +cpu_breakpoint_insert(new_cpu, bp->pc, bp->flags, NULL);
> +}
> +QTAILQ_FOREACH(wp, >watchpoints, entry) {
> +cpu_watchpoint_insert(new_cpu, wp->vaddr, wp->len, wp->flags, NULL);
> +}
> +
> +return new_env;
> +}

But where is it called?



Re: [PATCH v2 30/43] bsd-user: Remove dead #ifdefs from elfload.c

2021-08-26 Thread Philippe Mathieu-Daudé
On 8/26/21 11:11 PM, i...@bsdimp.com wrote:
> From: Warner Losh 
> 
> LOW_ELF_STACK doesn't exist on FreeBSD and likely never will. Remove it.
> Likewise, remove an #if 0 block that's not useful
> 
> Signed-off-by: Warner Losh 
> Reviewed-by: Richard Henderson 
> ---
>  bsd-user/elfload.c | 20 
>  1 file changed, 20 deletions(-)

Move as patch #14?

Reviewed-by: Philippe Mathieu-Daudé 



Re: [PATCH v2 19/43] bsd-user: start to move target CPU functions to target_arch*

2021-08-26 Thread Philippe Mathieu-Daudé
On 8/26/21 11:11 PM, i...@bsdimp.com wrote:
> From: Warner Losh 
> 
> Move the CPU functons into target_arch_cpu.c that are unique to each

Typo "functions"

> CPU. These are defined in target_arch.h.
> 
> Signed-off-by: Stacey Son 
> Signed-off-by: Warner Losh 
> Reviewed-by: Richard Henderson 
> ---
>  bsd-user/i386/target_arch.h   | 31 +
>  bsd-user/i386/target_arch_cpu.c   | 75 +++
>  bsd-user/main.c   | 12 -
>  bsd-user/x86_64/target_arch.h | 31 +
>  bsd-user/x86_64/target_arch_cpu.c | 75 +++
>  configure |  7 +--
>  meson.build   |  8 +++-
>  7 files changed, 219 insertions(+), 20 deletions(-)
>  create mode 100644 bsd-user/i386/target_arch.h
>  create mode 100644 bsd-user/i386/target_arch_cpu.c
>  create mode 100644 bsd-user/x86_64/target_arch.h
>  create mode 100644 bsd-user/x86_64/target_arch_cpu.c

> diff --git a/bsd-user/main.c b/bsd-user/main.c
> index f7e1df5da5..7b3550898d 100644
> --- a/bsd-user/main.c
> +++ b/bsd-user/main.c
> @@ -72,13 +72,6 @@ void gemu_log(const char *fmt, ...)
>  va_end(ap);
>  }
>  
> -#if defined(TARGET_I386)
> -int cpu_get_pic_interrupt(CPUX86State *env)
> -{
> -return -1;
> -}
> -#endif

Let's avoid that using a stub.

> diff --git a/meson.build b/meson.build
> index f2e148eaf9..5fe6b4aae6 100644
> --- a/meson.build
> +++ b/meson.build
> @@ -2560,9 +2560,13 @@ foreach target : target_dirs
>  if 'CONFIG_LINUX_USER' in config_target
>base_dir = 'linux-user'
>target_inc += include_directories('linux-user/host/' / 
> config_host['ARCH'])
> -else
> +endif
> +if 'CONFIG_BSD_USER' in config_target
>base_dir = 'bsd-user'
> -  target_inc += include_directories('bsd-user/freebsd')
> +  target_inc += include_directories('bsd-user/' / targetos)
> +# target_inc += include_directories('bsd-user/host/' / 
> config_host['ARCH'])

Left-over?

> +  dir = base_dir / abi
> +  arch_srcs += files(dir / 'target_arch_cpu.c')
>  endif
>  target_inc += include_directories(
>base_dir,
> 




Re: [PATCH v2 15/43] bsd-user: assume pthreads and support of __thread

2021-08-26 Thread Philippe Mathieu-Daudé
On 8/26/21 11:11 PM, i...@bsdimp.com wrote:
> From: Warner Losh 
> 
> All compilers for some time have supported this. Follow linux-user 

Please insert: "(see 24cb36a61c6: "configure: Make NPTL non-optional")"

> and
> eliminate the #define THREAD and unconditionally insert __thread where
> needed.
> 
> Signed-off-by: Warner Losh 
> Reviewed-by: Richard Henderson 
> ---
>  bsd-user/main.c |  2 +-
>  bsd-user/qemu.h | 10 +-
>  2 files changed, 2 insertions(+), 10 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé 



Re: [PATCH v2 12/43] bsd-user: remove a.out support

2021-08-26 Thread Philippe Mathieu-Daudé
On 8/26/21 11:11 PM, i...@bsdimp.com wrote:
> From: Warner Losh 
> 
> Remove still-born a.out support. The BSDs switched from a.out to ELF 20+ years
> ago. It's out of scope for bsd-user, and what little support there was would
> simply wind up at a not-implemented message. Simplify the whole mess by 
> removing
> it entirely. Should future support be required, it would be better to start 
> from
> scratch.
> 
> Signed-off-by: Warner Losh 
> Reviewed-by: Richard Henderson 
> ---
>  bsd-user/bsdload.c |   9 +---
>  bsd-user/elfload.c | 105 -
>  bsd-user/qemu.h|   2 +-
>  3 files changed, 21 insertions(+), 95 deletions(-)

Reviewed-by: Philippe Mathieu-Daudé 



Re: [PATCH v2 08/43] bsd-user: pass the bsd_param into loader_exec

2021-08-26 Thread Philippe Mathieu-Daudé
On 8/26/21 11:11 PM, i...@bsdimp.com wrote:
> From: Warner Losh 
> 
> Pass the bsd_param into loader_exec, and adjust.

Missing the "why" justification.

Anyway,
Reviewed-by: Philippe Mathieu-Daudé 

> Signed-off-by: Warner Losh 
> Reviewed-by: Richard Henderson 
> ---
>  bsd-user/bsdload.c | 37 +++--
>  bsd-user/main.c|  7 ++-
>  bsd-user/qemu.h|  3 ++-
>  3 files changed, 27 insertions(+), 20 deletions(-)



Re: [PATCH v2 07/43] bsd-user: move arch specific defines out of elfload.c

2021-08-26 Thread Philippe Mathieu-Daudé
On 8/26/21 11:11 PM, i...@bsdimp.com wrote:
> From: Warner Losh 
> 
> Move the arcitecture specific defines to target_arch_elf.h and delete

Typo "architecture"

> them from elfload.c. unifdef as appropriate for i386 vs x86_64

"un-ifdef" or untangle?

> versions. Add the copyright/license comments, and guard ifdefs.
> 
> Signed-off-by: Warner Losh 
> Reviewed-by: Richard Henderson 
> ---
>  bsd-user/elfload.c| 81 +--
>  bsd-user/i386/target_arch_elf.h   | 76 +
>  bsd-user/x86_64/target_arch_elf.h | 64 
>  3 files changed, 142 insertions(+), 79 deletions(-)
>  create mode 100644 bsd-user/i386/target_arch_elf.h
>  create mode 100644 bsd-user/x86_64/target_arch_elf.h



Re: [PATCH v6 4/6] qmp: add QMP command x-debug-virtio-queue-status

2021-08-26 Thread Jason Wang



在 2021/8/26 下午2:25, Jonah Palmer 写道:


Hi Jason, could I get your thoughts on this implementation question below?

I'm not too sure on how I should proceed determining if vhost is 
active or not.


Thank you!


Jonah

On 7/26/21 5:33 AM, Jonah Palmer wrote:



On 7/22/21 5:22 AM, Jason Wang wrote:


在 2021/7/21 下午4:59, Jonah Palmer 写道:



On 7/13/21 10:37 PM, Jason Wang wrote:


在 2021/7/12 下午6:35, Jonah Palmer 写道:

From: Laurent Vivier 

This new command shows internal status of a VirtQueue.
(vrings and indexes).

Signed-off-by: Laurent Vivier 
Signed-off-by: Jonah Palmer 
---
  hw/virtio/virtio-stub.c |   6 +++
  hw/virtio/virtio.c  |  37 ++
  qapi/virtio.json    | 102 


  3 files changed, 145 insertions(+)

  [Jonah: Added 'device-type' field to VirtQueueStatus and
  qmp command x-debug-virtio-queue-status.]

diff --git a/hw/virtio/virtio-stub.c b/hw/virtio/virtio-stub.c
index ddb592f..3c1bf17 100644
--- a/hw/virtio/virtio-stub.c
+++ b/hw/virtio/virtio-stub.c
@@ -17,3 +17,9 @@ VirtioStatus *qmp_x_debug_virtio_status(const 
char* path, Error **errp)

  {
  return qmp_virtio_unsupported(errp);
  }
+
+VirtQueueStatus *qmp_x_debug_virtio_queue_status(const char *path,
+ uint16_t queue, Error **errp)
+{
+    return qmp_virtio_unsupported(errp);
+}
diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c
index 81a0ee8..ccd4371 100644
--- a/hw/virtio/virtio.c
+++ b/hw/virtio/virtio.c
@@ -3935,6 +3935,43 @@ static VirtIODevice 
*virtio_device_find(const char *path)

  return NULL;
  }
  +VirtQueueStatus *qmp_x_debug_virtio_queue_status(const char 
*path,

+ uint16_t queue, Error **errp)
+{
+    VirtIODevice *vdev;
+    VirtQueueStatus *status;
+
+    vdev = virtio_device_find(path);
+    if (vdev == NULL) {
+    error_setg(errp, "Path %s is not a VirtIO device", path);
+    return NULL;
+    }
+
+    if (queue >= VIRTIO_QUEUE_MAX || !virtio_queue_get_num(vdev, 
queue)) {

+    error_setg(errp, "Invalid virtqueue number %d", queue);
+    return NULL;
+    }
+
+    status = g_new0(VirtQueueStatus, 1);
+    status->device_type = qapi_enum_parse(_lookup, 
vdev->name,

+ VIRTIO_TYPE_UNKNOWN, NULL);
+    status->queue_index = vdev->vq[queue].queue_index;
+    status->inuse = vdev->vq[queue].inuse;
+    status->vring_num = vdev->vq[queue].vring.num;
+    status->vring_num_default = vdev->vq[queue].vring.num_default;
+    status->vring_align = vdev->vq[queue].vring.align;
+    status->vring_desc = vdev->vq[queue].vring.desc;
+    status->vring_avail = vdev->vq[queue].vring.avail;
+    status->vring_used = vdev->vq[queue].vring.used;
+    status->last_avail_idx = vdev->vq[queue].last_avail_idx;



As mentioned in previous versions. We need add vhost support 
otherwise the value here is wrong.
Got it. I'll add a case to determine if vhost is active for a given 
device.
So, in the case that vhost is active, should I just not print out 
the value or would I substitute it with

another value (whatever that might be)?



You can query the vhost for those index.

(vhost_get_vring_base())



  Same question for shadow_avail_idx below as well.



It's an implementation specific. I think we can simply not show it 
if vhost is enabled.


Thanks


Ah I see, thank you!

So, it appears to me that it's not very easy to get the struct 
vhost_dev pointer from struct VirtIODevice to indicate whether or not 
vhost is active, e.g. there's no virtio class-independent way to get 
struct vhost_dev.


I was thinking of adding an op/callback function to struct 
VirtioDeviceClass, e.g. bool has_vhost(VirtIODevice *vdev), and 
implement it for each virtio class (net, scsi, blk, etc.).


For example, for virtio-net, maybe it'd be something like:

bool has_vhost(VirtIODevice *vdev) {
   VirtIONet *n = VIRTIO_NET(vdev);
   NetClientState *nc = qemu_get_queue(n->nic);
   return nc->peer ? get_vhost_net(nc->peer) : false;
}



Something like this, yes.


Also, for getting the last_avail_idx, I was also thinking of adding 
another op/callback to struct VirtioDeviceClass, e.g. unsigned int 
get_last_avail_idx(VirtIODevice *vdev, unsigned int vq_idx) that 
finds if vhost is active or not and either gets last_avail_idx from 
virtio directly or through vhost (e.g. 
vhost_dev->vhost_ops->vhost_get_vring_base()).




So I think instead of has_vhost, we probably need get_vhost() to have a 
pointer to vhost_dev. Then we can do anything we want other than a 
dedicated interface just for avail index.


Thanks


I wanted to run this by you and get your opinion on this before I 
started implementing it in code. Let me know what you think about this.



Jonah






Jonah




+    status->shadow_avail_idx = vdev->vq[queue].shadow_avail_idx;



The shadow index is something that is implementation specific e.g 
in the case of vhost it's kind of meaningless.


Thanks



+    status->used_idx = vdev->vq[queue].used_idx;
+    status->signalled_used = 

Re: [PATCH v2 0/2] target/ppc: Fix vextu[bhw][lr]x on big endian hosts

2021-08-26 Thread David Gibson
On Thu, Aug 26, 2021 at 11:14:44AM -0300, matheus.fe...@eldorado.org.br wrote:
> From: Matheus Ferst 
> 
> The definition of struct Int128 is currently independent of the host
> endianness, causing different results when using the member s128 of
> union ppc_vsr_t in big-endian builds with CONFIG_INT128 or
> !CONFIG_INT128.
> 
> The only PPC instructions that seem to be affected by this issue are the
> "Vector Extract Unsigned Byte/Halfword/Word to GPR using GPR-specified
> Left/Right-Index." Even on builds with Int128 support, however, their
> helpers give the wrong result on big-endian hosts.
> 
> The first patch in this series changes the definition of struct Int128
> to allow its use in the ppc_vsr_t union. The second patch fixes the
> helper definition.

Applied to ppc-for-6.2, thanks.

> 
> Matheus Ferst (2):
>   include/qemu/int128.h: define struct Int128 according to the host
> endianness
>   target/ppc: fix vextu[bhw][lr]x helpers
> 
>  include/qemu/int128.h   | 27 ---
>  target/ppc/int_helper.c | 38 ++
>  2 files changed, 30 insertions(+), 35 deletions(-)
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH v3 0/2] target/ppc: Fix vector registers access in gdbstub for little-endian

2021-08-26 Thread David Gibson
On Thu, Aug 26, 2021 at 11:56:54AM -0300, matheus.fe...@eldorado.org.br wrote:
> From: Matheus Ferst 
> 
> PPC gdbstub code has two possible swaps of the 64-bit elements of AVR
> registers: in gdb_get_avr_reg/gdb_set_avr_reg (based on msr_le) and in
> gdb_get_reg128/ldq_p (based on TARGET_WORDS_BIGENDIAN).
> 
> In softmmu, only the first is done, because TARGET_WORDS_BIGENDIAN is
> always true. In user mode, both are being done, resulting in swapped
> high and low doublewords of AVR registers in little-endian binaries.
> 
> We fix this by moving the first swap to ppc_maybe_bswap_register, which
> already handles the endianness swap of each element's value in softmmu
> and does nothing in user mode.

Applied to ppc-for-6.2, thanks.

> 
> Based-on: <20210826141446.2488609-1-matheus.fe...@eldorado.org.br>
> 
> Matheus Ferst (2):
>   include/qemu/int128.h: introduce bswap128s
>   target/ppc: fix vector registers access in gdbstub for little-endian
> 
>  include/qemu/int128.h | 17 -
>  target/ppc/gdbstub.c  | 32 +++-
>  2 files changed, 23 insertions(+), 26 deletions(-)
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH] RFC: build-sys: drop dtc submodule

2021-08-26 Thread David Gibson
On Thu, Aug 26, 2021 at 11:34:59AM +0400, Marc-André Lureau wrote:
> Hi
> 
> On Thu, Aug 26, 2021 at 7:11 AM David Gibson 
> wrote:
> 
> > On Thu, Aug 26, 2021 at 12:11:17AM +0400, Marc-André Lureau wrote:
> > > Hi
> > >
> > > On Thu, Aug 26, 2021 at 12:00 AM Peter Maydell  > >
> > > wrote:
> > >
> > > > On Wed, 25 Aug 2021 at 20:55, Marc-André Lureau
> > > >  wrote:
> > > > > fdt_check_full was added in 1.4.7:
> > > > > https://git.kernel.org/pub/scm/utils/dtc/dtc.git/tag/?h=v1.4.7
> > > > >
> > > > > Only ubuntu appears to be lagging a bit behind. I wonder if they
> > would
> > > > consider an update.
> > > >
> > > > I doubt it. You would need to wait until that actually falls off
> > > > our supported list. You also have a couple of years to wait until
> > > > Debian oldstable is no longer on our supported list.
> > > >
> > > > Maybe, I don't know why debian oldstable would have received a new
> > version
> > > plus fixes, and not ubuntu.
> > >
> > > It seems we could have our own fallback copy of fdt_check_full() though..
> > > I'll give that a try.
> >
> > We could, but fdt_check_full() is actually a pretty complex function.
> >
> >
> Yeah, that would be used for those who don't have >= 1.4.7.
> 
> Alternatively we could lower the fdt_check_full to fdt_check_header in this
> case? It seems it is used to verify the DT from SLOF. It may be trusted I
> suppose, or a malformed DT may only impact the guest?

No, fdt_check_header() isn't enough.  We can't trust the dt blob from
SLOF, because it's coming from guest context.  We *expect* it to come
from the SLOF iamge we control, but nothing prevents anything else in
the guest from calling the hypercall, or corrupting the in-memory SLOF
image.

And, a bad DT won't just impact the guest - there's a couple of things
we need from it (that's the only reason we need to have SLOF give us
back the DT at all).  Note that the blob might not just have bad
content, but could have bad formatting which will make the functions
qemu uses to access it misbehave.  So, our options are either be
super-careful on every possible DT access after this point, or
pre-check it when it's loaded with fdt_check_full().

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


RE: [PATCH] hw/arm/smmuv3: Support non-PCI/PCIe devices connection

2021-08-26 Thread Li, Chunming


> -Original Message-
> From: Peter Maydell [mailto:peter.mayd...@linaro.org]
> Sent: Thursday, August 26, 2021 11:57 PM
> To: Li, Chunming
> Cc: eric.au...@redhat.com; Liu, Renwei; qemu-...@nongnu.org; Wen,
> Jianxian; qemu-devel@nongnu.org
> Subject: Re: [PATCH] hw/arm/smmuv3: Support non-PCI/PCIe devices
> connection
> 
> On Tue, 24 Aug 2021 at 09:22, Li, Chunming
>  wrote:
> > Sorry for interrupt you.
> > Could you help to check why my series patch cannot be listed by
> https://patchew.org/?
> > I split the patch into 4 commits with 1 coverletter and send them out
> with v4 tag.
> > I can see all 5 emails in https://www.mail-archive.com/qemu-
> de...@nongnu.org/
> > But
> https://patchew.org/QEMU/49C79B700B5D8F45B8EF0861B4EF3B3B01142FABD6@SHA
> SXM03.verisilicon.com/#
> > show " Only 0 patches received! ".
> > It is great helpful If you can help to check and tell me what mistake
> I made.
> > Thanks very much!
> 
> I guess you figured out the problem already, because v5 has them all:
> https://patchew.org/QEMU/1629878922-173270-1-git-send-email-
> chunming_li1...@163.com/
> But the problem was that for v4 the patch emails were not sent as
> threaded emails following up to the cover letter (that is, they were
> missing appropriate References: headers).
> 
> -- PMM

Yes, it is. Thanks for your help. I found the problem and fixed it in v5.
Please help to check the v5 patch, looking forward to your review feedback.

Chunming


[RFC PATCH v2 00/12] Add AMD Secure Nested Paging (SEV-SNP) support

2021-08-26 Thread Michael Roth
These patches implement SEV-SNP along with CPUID enforcement support for QEMU,
and are also available at:

  https://github.com/mdroth/qemu/commits/snp-rfc-v2-upstream

They are based on the initial RFC submitted by Brijesh:

  https://lore.kernel.org/qemu-devel/20210722000259.ykepl7t6ptua7...@amd.com/T/

Changes since RFC v1:

 - rebased onto latest master
 - drop SNP config file in favor of a new 'sev-snp-guest' object where all
   SNP-related params are passed as strings/integers via command-line
 - report specific error if BIOS reports invalid address/len for
   reserved/pre-validated regions (Connor)
 - use Range helpers for handling validated region overlaps (Dave)
 - simplify error handling in sev_snp_launch_start, and report the correct
   return code when handling LAUNCH_START failures (Dov)
 - add SEV-SNP bit to CPUID 0x801f when SNP enabled
 - updated query-sev to handle differences between SEV and SEV-SNP
 - updated to work against v5 of SEV-SNP host kernel / hypervisor patches

Overview


SEV-SNP builds upon existing SEV and SEV-ES functionality while adding
new hardware-based memory protections. SEV-SNP adds strong memory integrity
protection to help prevent malicious hypervisor-based attacks like data
replay, memory re-mapping and more in order to create an isolated memory
encryption environment.

This series depends on the following patches to support SEV-SNP in Linux
kernel and OVMF:

  guest kernel (v5, part 1):
  https://lore.kernel.org/kvm/20210820151933.22401-1-brijesh.si...@amd.com/T/
  
  host kernel (v5, part 2):
  https://lore.kernel.org/lkml/20210820155918.7518-1-brijesh.si...@amd.com/
  
  OVMF (v5):
  
https://edk2.groups.io/g/devel/message/77335?p=,,,20,0,0,0::Created,,posterid%3A5969970,20,2,20,83891508

The Qemu patches uses the command id added by the SEV-SNP hypervisor
patches to bootstrap the SEV-SNP VMs.

Additional resources

SEV-SNP whitepaper
https://www.amd.com/system/files/TechDocs/SEV-SNP-strengthening-vm-isolation-with-integrity-protection-and-more.pdf

APM 2: https://www.amd.com/system/files/TechDocs/24593.pdf (section 15.36)

GHCB spec:
https://developer.amd.com/wp-content/resources/56421.pdf

SEV-SNP firmware specification:
https://www.amd.com/system/files/TechDocs/56860.pdf


Brijesh Singh (6):
  linux-header: add the SNP specific command
  i386/sev: introduce 'sev-snp-guest' object
  i386/sev: initialize SNP context
  i386/sev: add the SNP launch start context
  i386/sev: add support to encrypt BIOS when SEV-SNP is enabled
  i386/sev: populate secrets and cpuid page and finalize the SNP launch

Michael Roth (6):
  i386/sev: introduce "sev-common" type to encapsulate common SEV state
  target/i386: set SEV-SNP CPUID bit when SNP enabled
  target/i386: allow versioned CPUs to specify new cache_info
  target/i386: add new EPYC CPU versions with updated cache_info
  i386/sev: sev-snp: add support for CPUID validation
  i386/sev: update query-sev QAPI format to handle SEV-SNP

 docs/amd-memory-encryption.txt |  77 +++-
 hw/i386/pc_sysfw.c |   7 +-
 include/sysemu/sev.h   |   2 +-
 linux-headers/linux/kvm.h  |  50 +++
 qapi/misc-target.json  |  71 ++-
 qapi/qom.json  |  94 +++-
 target/i386/cpu.c  | 221 -
 target/i386/monitor.c  |  29 +-
 target/i386/sev-stub.c |   8 +-
 target/i386/sev.c  | 989 +++--
 target/i386/sev_i386.h |   4 +
 target/i386/trace-events   |   4 +
 12 files changed, 1374 insertions(+), 182 deletions(-)





[RFC PATCH v2 09/12] target/i386: allow versioned CPUs to specify new cache_info

2021-08-26 Thread Michael Roth
New EPYC CPUs versions require small changes to their cache_info's.
Because current QEMU x86 CPU definition does not support cache
versions, we would have to declare a new CPU type for each such case.
To avoid this duplication, the patch allows new cache_info pointers to
be specificed for a new CPU version.

Co-developed-by: Wei Huang 
Signed-off-by: Wei Huang 
Signed-off-by: Michael Roth 
---
 target/i386/cpu.c | 36 +---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index f0b441f692..85d387163a 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1458,6 +1458,7 @@ typedef struct X86CPUVersionDefinition {
 const char *alias;
 const char *note;
 PropValue *props;
+const CPUCaches *const cache_info;
 } X86CPUVersionDefinition;
 
 /* Base definition for a CPU model */
@@ -4975,6 +4976,32 @@ static void x86_cpu_apply_version_props(X86CPU *cpu, 
X86CPUModel *model)
 assert(vdef->version == version);
 }
 
+/* Apply properties for the CPU model version specified in model */
+static const CPUCaches *x86_cpu_get_version_cache_info(X86CPU *cpu,
+   X86CPUModel *model)
+{
+const X86CPUVersionDefinition *vdef;
+X86CPUVersion version = x86_cpu_model_resolve_version(model);
+const CPUCaches *cache_info = model->cpudef->cache_info;
+
+if (version == CPU_VERSION_LEGACY) {
+return cache_info;
+}
+
+for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; 
vdef++) {
+if (vdef->cache_info) {
+cache_info = vdef->cache_info;
+}
+
+if (vdef->version == version) {
+break;
+}
+}
+
+assert(vdef->version == version);
+return cache_info;
+}
+
 /*
  * Load data from X86CPUDefinition into a X86CPU object.
  * Only for builtin_x86_defs models initialized with x86_register_cpudef_types.
@@ -5007,7 +5034,7 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel 
*model)
 }
 
 /* legacy-cache defaults to 'off' if CPU model provides cache info */
-cpu->legacy_cache = !def->cache_info;
+cpu->legacy_cache = !x86_cpu_get_version_cache_info(cpu, model);
 
 env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR;
 
@@ -6234,14 +6261,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error 
**errp)
 
 /* Cache information initialization */
 if (!cpu->legacy_cache) {
-if (!xcc->model || !xcc->model->cpudef->cache_info) {
+const CPUCaches *cache_info =
+x86_cpu_get_version_cache_info(cpu, xcc->model);
+
+if (!xcc->model || !cache_info) {
 g_autofree char *name = x86_cpu_class_get_model_name(xcc);
 error_setg(errp,
"CPU model '%s' doesn't support legacy-cache=off", 
name);
 return;
 }
 env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd =
-*xcc->model->cpudef->cache_info;
+*cache_info;
 } else {
 /* Build legacy cache information */
 env->cache_info_cpuid2.l1d_cache = _l1d_cache;
-- 
2.25.1




[RFC PATCH v2 07/12] i386/sev: populate secrets and cpuid page and finalize the SNP launch

2021-08-26 Thread Michael Roth
From: Brijesh Singh 

During the SNP guest launch sequence, a special secrets and cpuid page
needs to be populated by the SEV-SNP firmware. The secrets page contains
the VM Platform Communication Key (VMPCKs) used by the guest to send and
receive secure messages to the PSP. And CPUID page will contain the CPUID
value filtered through the PSP.

The guest BIOS (OVMF) reserves these pages in MEMFD and location of it
is available through the SNP boot block GUID. While finalizing the guest
boot flow, lookup for the boot block and call the SNP_LAUNCH_UPDATE
command to populate secrets and cpuid pages.

In order to support early boot code, the OVMF may ask hypervisor to
request the pre-validation of certain memory range. If such range is
present the call SNP_LAUNCH_UPDATE command to validate those address
range without affecting the measurement. See the SEV-SNP specification
for further details.

Finally, call the SNP_LAUNCH_FINISH to finalize the guest boot.

Signed-off-by: Brijesh Singh 
Signed-off-by: Michael Roth 
---
 target/i386/sev.c| 189 ++-
 target/i386/trace-events |   2 +
 2 files changed, 189 insertions(+), 2 deletions(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 867c0cb457..0009c93d28 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -33,6 +33,7 @@
 #include "monitor/monitor.h"
 #include "exec/confidential-guest-support.h"
 #include "hw/i386/pc.h"
+#include "qemu/range.h"
 
 #define TYPE_SEV_COMMON "sev-common"
 OBJECT_DECLARE_SIMPLE_TYPE(SevCommonState, SEV_COMMON)
@@ -107,6 +108,19 @@ typedef struct __attribute__((__packed__)) SevInfoBlock {
 uint32_t reset_addr;
 } SevInfoBlock;
 
+#define SEV_SNP_BOOT_BLOCK_GUID "bd39c0c2-2f8e-4243-83e8-1b74cebcb7d9"
+typedef struct __attribute__((__packed__)) SevSnpBootInfoBlock {
+/* Prevalidate range address */
+uint32_t pre_validated_start;
+uint32_t pre_validated_end;
+/* Secrets page address */
+uint32_t secrets_addr;
+uint32_t secrets_len;
+/* CPUID page address */
+uint32_t cpuid_addr;
+uint32_t cpuid_len;
+} SevSnpBootInfoBlock;
+
 static Error *sev_mig_blocker;
 
 static const char *const sev_fw_errlist[] = {
@@ -1086,6 +1100,162 @@ static Notifier sev_machine_done_notify = {
 .notify = sev_launch_get_measure,
 };
 
+static int
+sev_snp_launch_update_gpa(uint32_t hwaddr, uint32_t size, uint8_t type)
+{
+void *hva;
+MemoryRegion *mr = NULL;
+SevSnpGuestState *sev_snp_guest =
+SEV_SNP_GUEST(MACHINE(qdev_get_machine())->cgs);
+
+hva = gpa2hva(, hwaddr, size, NULL);
+if (!hva) {
+error_report("SEV-SNP failed to get HVA for GPA 0x%x", hwaddr);
+return 1;
+}
+
+return sev_snp_launch_update(sev_snp_guest, hwaddr, hva, size, type);
+}
+
+static bool
+detect_first_overlap(uint64_t start, uint64_t end, Range *range_list,
+ size_t range_count, Range *overlap_range)
+{
+int i;
+bool overlap = false;
+Range new;
+
+assert(overlap_range);
+range_make_empty(overlap_range);
+range_init_nofail(, start, end - start + 1);
+
+for (i = 0; i < range_count; i++) {
+if (range_overlaps_range(, _list[i]) &&
+(range_is_empty(overlap_range) ||
+ range_lob(_list[i]) < range_lob(overlap_range))) {
+*overlap_range = range_list[i];
+overlap = true;
+}
+}
+
+return overlap;
+}
+
+static void snp_ovmf_boot_block_setup(void)
+{
+SevSnpBootInfoBlock *info;
+uint32_t start, end, sz;
+int ret;
+Range validated_ranges[2];
+
+/*
+ * Extract the SNP boot block for the SEV-SNP guests by locating the
+ * SNP_BOOT GUID. The boot block contains the information such as location
+ * of secrets and CPUID page, additionaly it may contain the range of
+ * memory that need to be pre-validated for the boot.
+ */
+if (!pc_system_ovmf_table_find(SEV_SNP_BOOT_BLOCK_GUID,
+(uint8_t **), NULL)) {
+error_report("SEV-SNP: failed to find the SNP boot block");
+exit(1);
+}
+
+trace_kvm_sev_snp_ovmf_boot_block_info(info->secrets_addr,
+   info->secrets_len, info->cpuid_addr,
+   info->cpuid_len,
+   info->pre_validated_start,
+   info->pre_validated_end);
+
+/* Populate the secrets page */
+ret = sev_snp_launch_update_gpa(info->secrets_addr, info->secrets_len,
+KVM_SEV_SNP_PAGE_TYPE_SECRETS);
+if (ret) {
+error_report("SEV-SNP: failed to insert secret page GPA 0x%x",
+ info->secrets_addr);
+exit(1);
+}
+
+/* Populate the cpuid page */
+ret = sev_snp_launch_update_gpa(info->cpuid_addr, info->cpuid_len,
+KVM_SEV_SNP_PAGE_TYPE_CPUID);
+if (ret) {
+

[RFC PATCH v2 10/12] target/i386: add new EPYC CPU versions with updated cache_info

2021-08-26 Thread Michael Roth
This patch introduces new EPYC cpu versions: EPYC-v4, EPYC-Rome-v3,
and EPYC-Milan-v2. The only difference vs. older models is an updated
cache_info with the 'complex_indexing' bit unset, since this bit is
not currently defined for AMD and may cause problems should it be used
for something else in the future. Setting this bit will also cause
CPUID validation failures when running SEV-SNP guests.

Signed-off-by: Michael Roth 
---
 target/i386/cpu.c | 184 ++
 1 file changed, 184 insertions(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 85d387163a..45e456b557 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -1567,6 +1567,56 @@ static const CPUCaches epyc_cache_info = {
 },
 };
 
+static CPUCaches epyc_v4_cache_info = {
+.l1d_cache = &(CPUCacheInfo) {
+.type = DATA_CACHE,
+.level = 1,
+.size = 32 * KiB,
+.line_size = 64,
+.associativity = 8,
+.partitions = 1,
+.sets = 64,
+.lines_per_tag = 1,
+.self_init = 1,
+.no_invd_sharing = true,
+},
+.l1i_cache = &(CPUCacheInfo) {
+.type = INSTRUCTION_CACHE,
+.level = 1,
+.size = 64 * KiB,
+.line_size = 64,
+.associativity = 4,
+.partitions = 1,
+.sets = 256,
+.lines_per_tag = 1,
+.self_init = 1,
+.no_invd_sharing = true,
+},
+.l2_cache = &(CPUCacheInfo) {
+.type = UNIFIED_CACHE,
+.level = 2,
+.size = 512 * KiB,
+.line_size = 64,
+.associativity = 8,
+.partitions = 1,
+.sets = 1024,
+.lines_per_tag = 1,
+},
+.l3_cache = &(CPUCacheInfo) {
+.type = UNIFIED_CACHE,
+.level = 3,
+.size = 8 * MiB,
+.line_size = 64,
+.associativity = 16,
+.partitions = 1,
+.sets = 8192,
+.lines_per_tag = 1,
+.self_init = true,
+.inclusive = true,
+.complex_indexing = false,
+},
+};
+
 static const CPUCaches epyc_rome_cache_info = {
 .l1d_cache = &(CPUCacheInfo) {
 .type = DATA_CACHE,
@@ -1617,6 +1667,56 @@ static const CPUCaches epyc_rome_cache_info = {
 },
 };
 
+static const CPUCaches epyc_rome_v3_cache_info = {
+.l1d_cache = &(CPUCacheInfo) {
+.type = DATA_CACHE,
+.level = 1,
+.size = 32 * KiB,
+.line_size = 64,
+.associativity = 8,
+.partitions = 1,
+.sets = 64,
+.lines_per_tag = 1,
+.self_init = 1,
+.no_invd_sharing = true,
+},
+.l1i_cache = &(CPUCacheInfo) {
+.type = INSTRUCTION_CACHE,
+.level = 1,
+.size = 32 * KiB,
+.line_size = 64,
+.associativity = 8,
+.partitions = 1,
+.sets = 64,
+.lines_per_tag = 1,
+.self_init = 1,
+.no_invd_sharing = true,
+},
+.l2_cache = &(CPUCacheInfo) {
+.type = UNIFIED_CACHE,
+.level = 2,
+.size = 512 * KiB,
+.line_size = 64,
+.associativity = 8,
+.partitions = 1,
+.sets = 1024,
+.lines_per_tag = 1,
+},
+.l3_cache = &(CPUCacheInfo) {
+.type = UNIFIED_CACHE,
+.level = 3,
+.size = 16 * MiB,
+.line_size = 64,
+.associativity = 16,
+.partitions = 1,
+.sets = 16384,
+.lines_per_tag = 1,
+.self_init = true,
+.inclusive = true,
+.complex_indexing = false,
+},
+};
+
 static const CPUCaches epyc_milan_cache_info = {
 .l1d_cache = &(CPUCacheInfo) {
 .type = DATA_CACHE,
@@ -1667,6 +1767,56 @@ static const CPUCaches epyc_milan_cache_info = {
 },
 };
 
+static const CPUCaches epyc_milan_v2_cache_info = {
+.l1d_cache = &(CPUCacheInfo) {
+.type = DATA_CACHE,
+.level = 1,
+.size = 32 * KiB,
+.line_size = 64,
+.associativity = 8,
+.partitions = 1,
+.sets = 64,
+.lines_per_tag = 1,
+.self_init = 1,
+.no_invd_sharing = true,
+},
+.l1i_cache = &(CPUCacheInfo) {
+.type = INSTRUCTION_CACHE,
+.level = 1,
+.size = 32 * KiB,
+.line_size = 64,
+.associativity = 8,
+.partitions = 1,
+.sets = 64,
+.lines_per_tag = 1,
+.self_init = 1,
+.no_invd_sharing = true,
+},
+.l2_cache = &(CPUCacheInfo) {
+.type = UNIFIED_CACHE,
+.level = 2,
+.size = 512 * KiB,
+.line_size = 64,
+.associativity = 8,
+.partitions = 1,
+.sets = 1024,
+.lines_per_tag = 1,
+},
+.l3_cache = &(CPUCacheInfo) {
+.type = UNIFIED_CACHE,
+.level = 3,
+.size = 32 * MiB,
+.line_size = 64,
+.associativity = 16,
+.partitions = 1,
+.sets = 32768,
+.lines_per_tag = 1,
+.self_init = true,
+.inclusive = true,
+

[RFC PATCH v2 05/12] i386/sev: add the SNP launch start context

2021-08-26 Thread Michael Roth
From: Brijesh Singh 

The SNP_LAUNCH_START is called first to create a cryptographic launch
context within the firmware.

Signed-off-by: Brijesh Singh 
Signed-off-by: Michael Roth 
---
 target/i386/sev.c| 29 -
 target/i386/trace-events |  1 +
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index b8bd6ed9ea..51689d4fa4 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -875,6 +875,28 @@ sev_read_file_base64(const char *filename, guchar **data, 
gsize *len)
 return 0;
 }
 
+static int
+sev_snp_launch_start(SevSnpGuestState *sev_snp_guest)
+{
+int fw_error, rc;
+SevCommonState *sev_common = SEV_COMMON(sev_snp_guest);
+struct kvm_sev_snp_launch_start *start = _snp_guest->kvm_start_conf;
+
+trace_kvm_sev_snp_launch_start(start->policy);
+
+rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START,
+   start, _error);
+if (rc < 0) {
+error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'",
+__func__, rc, fw_error, fw_error_to_str(fw_error));
+return 1;
+}
+
+sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE);
+
+return 0;
+}
+
 static int
 sev_launch_start(SevGuestState *sev_guest)
 {
@@ -1173,7 +1195,12 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error 
**errp)
 goto err;
 }
 
-ret = sev_launch_start(SEV_GUEST(sev_common));
+if (sev_snp_enabled()) {
+ret = sev_snp_launch_start(SEV_SNP_GUEST(sev_common));
+} else {
+ret = sev_launch_start(SEV_GUEST(sev_common));
+}
+
 if (ret) {
 error_setg(errp, "%s: failed to create encryption context", __func__);
 goto err;
diff --git a/target/i386/trace-events b/target/i386/trace-events
index 2cd8726eeb..18cc14b956 100644
--- a/target/i386/trace-events
+++ b/target/i386/trace-events
@@ -11,3 +11,4 @@ kvm_sev_launch_measurement(const char *value) "data %s"
 kvm_sev_launch_finish(void) ""
 kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) 
"hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d"
 kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s 
data %s"
+kvm_sev_snp_launch_start(uint64_t policy) "policy 0x%" PRIx64
-- 
2.25.1




[RFC PATCH v2 03/12] i386/sev: introduce 'sev-snp-guest' object

2021-08-26 Thread Michael Roth
From: Brijesh Singh 

SEV-SNP support relies on a different set of properties/state than the
existing 'sev-guest' object. This patch introduces the 'sev-snp-guest'
object, which can be used to configure an SEV-SNP guest. For example,
a default-configured SEV-SNP guest with no additional information
passed in for use with attestation:

  -object sev-snp-guest,id=sev0

or a fully-specified SEV-SNP guest where all spec-defined binary
blobs are passed in as base64-encoded strings:

  -object sev-snp-guest,id=sev0, \
policy=0x3, \
init-flags=0, \
id-block=YWFhYWFhYWFhYWFhYWFhCg==, \
id-auth=CxHK/OKLkXGn/KpAC7Wl1FSiisWDbGTEKz..., \
auth-key-enabled=on, \
host-data=LNkCWBRC5CcdGXirbNUV1OrsR28s..., \
guest-visible-workarounds=AA==, \

See the QAPI schema updates included in this patch for more usage
details.

In some cases these blobs may be up to 4096 characters, but this is
generally well below the default limit for linux hosts where
command-line sizes are defined by the sysconf-configurable ARG_MAX
value, which defaults to 2097152 characters for Ubuntu hosts, for
example.

Co-developed-by: Michael Roth 
Signed-off-by: Brijesh Singh 
Signed-off-by: Michael Roth 
---
 docs/amd-memory-encryption.txt |  77 ++-
 qapi/qom.json  |  60 
 target/i386/sev.c  | 245 -
 3 files changed, 379 insertions(+), 3 deletions(-)

diff --git a/docs/amd-memory-encryption.txt b/docs/amd-memory-encryption.txt
index ffca382b5f..0d82e67fa1 100644
--- a/docs/amd-memory-encryption.txt
+++ b/docs/amd-memory-encryption.txt
@@ -22,8 +22,8 @@ support for notifying a guest's operating system when certain 
types of VMEXITs
 are about to occur. This allows the guest to selectively share information with
 the hypervisor to satisfy the requested function.
 
-Launching
--
+Launching (SEV and SEV-ES)
+--
 Boot images (such as bios) must be encrypted before a guest can be booted. The
 MEMORY_ENCRYPT_OP ioctl provides commands to encrypt the images: LAUNCH_START,
 LAUNCH_UPDATE_DATA, LAUNCH_MEASURE and LAUNCH_FINISH. These four commands
@@ -113,6 +113,79 @@ a SEV-ES guest:
  - Requires in-kernel irqchip - the burden is placed on the hypervisor to
manage booting APs.
 
+Launching (SEV-SNP)
+---
+Boot images (such as bios) must be encrypted before a guest can be booted. The
+MEMORY_ENCRYPT_OP ioctl provides commands to encrypt the images:
+KVM_SNP_INIT, SNP_LAUNCH_START, SNP_LAUNCH_UPDATE, and SNP_LAUNCH_FINISH. These
+four commands together generate a fresh memory encryption key for the VM,
+encrypt the boot images for a successful launch.
+
+KVM_SNP_INIT is called first to initialize the SEV-SNP firmware and SNP
+features in the KVM. The feature flags value can be provided through the
+'init-flags' property of the 'sev-snp-guest' object.
+
+++---+--+-+
+| key| type  | default  | meaning |
+++---+--+-+
+| init_flags | hex   | 0| SNP feature flags   |
++-+
+
+Note: currently the init_flags must be zero.
+
+SNP_LAUNCH_START is called first to create a cryptographic launch context
+within the firmware. To create this context, guest owner must provide a guest
+policy and other parameters as described in the SEV-SNP firmware
+specification. The launch parameters should be specified as described in the
+QAPI schema for the 'sev-snp-guest' object.
+
+The SNP_LAUNCH_START uses the following parameters (see the SEV-SNP
+specification for more details):
+
+++---+--+--+
+| key| type  | default  | meaning  |
+++---+--+--+
+| policy | hex   | 0x3  | a 64-bit guest policy|
+| imi_en | bool  | 0| 1 when IMI is enabled|
+| ma_end | bool  | 0| 1 when migration agent is used   |
+| gosvw  | string| 0| 16-byte base64 encoded string for the guest  |
+||   |  | OS visible workaround.   |
+++---+--+--+
+
+SNP_LAUNCH_UPDATE encrypts the memory region using the cryptographic context
+created via the SNP_LAUNCH_START command. If required, this command can be 
called
+multiple times to encrypt different memory regions. The command also calculates
+the measurement of the memory contents as it encrypts.
+
+SNP_LAUNCH_FINISH finalizes the guest launch flow. Optionally, while finalizing
+the launch the firmware can perform checks on the launch digest computing
+through the SNP_LAUNCH_UPDATE. To perform the check the user must supply
+the id block, 

[RFC PATCH v2 08/12] target/i386: set SEV-SNP CPUID bit when SNP enabled

2021-08-26 Thread Michael Roth
SNP guests will rely on this bit to determine certain feature support.

Signed-off-by: Michael Roth 
---
 target/i386/cpu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 97e250e876..f0b441f692 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -5619,6 +5619,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, 
uint32_t count,
 case 0x801F:
 *eax = sev_enabled() ? 0x2 : 0;
 *eax |= sev_es_enabled() ? 0x8 : 0;
+*eax |= sev_snp_enabled() ? 0x10 : 0;
 *ebx = sev_get_cbit_position();
 *ebx |= sev_get_reduced_phys_bits() << 6;
 *ecx = 0;
-- 
2.25.1




[RFC PATCH v2 04/12] i386/sev: initialize SNP context

2021-08-26 Thread Michael Roth
From: Brijesh Singh 

When SEV-SNP is enabled, the KVM_SNP_INIT command is used to initialize
the platform. The command checks whether SNP is enabled in the KVM, if
enabled then it allocates a new ASID from the SNP pool and calls the
firmware to initialize the all the resources.

Signed-off-by: Brijesh Singh 
Signed-off-by: Michael Roth 
---
 target/i386/sev-stub.c |  6 ++
 target/i386/sev.c  | 27 ---
 target/i386/sev_i386.h |  1 +
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c
index 0227cb5177..e4fb8e882e 100644
--- a/target/i386/sev-stub.c
+++ b/target/i386/sev-stub.c
@@ -81,3 +81,9 @@ sev_get_attestation_report(const char *mnonce, Error **errp)
 error_setg(errp, "SEV is not available in this QEMU");
 return NULL;
 }
+
+bool
+sev_snp_enabled(void)
+{
+return false;
+}
diff --git a/target/i386/sev.c b/target/i386/sev.c
index ba08b7d3ab..b8bd6ed9ea 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -614,12 +614,21 @@ sev_enabled(void)
 return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON);
 }
 
+bool
+sev_snp_enabled(void)
+{
+ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
+
+return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST);
+}
+
 bool
 sev_es_enabled(void)
 {
 ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
 
-return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES);
+return sev_snp_enabled() ||
+(sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES);
 }
 
 uint64_t
@@ -1074,6 +1083,7 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error 
**errp)
 uint32_t ebx;
 uint32_t host_cbitpos;
 struct sev_user_data_status status = {};
+void *init_args = NULL;
 
 if (!sev_common) {
 return 0;
@@ -1126,7 +1136,18 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error 
**errp)
 sev_common->api_major = status.api_major;
 sev_common->api_minor = status.api_minor;
 
-if (sev_es_enabled()) {
+if (sev_snp_enabled()) {
+SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common);
+if (!kvm_kernel_irqchip_allowed()) {
+error_report("%s: SEV-SNP guests require in-kernel irqchip 
support",
+ __func__);
+goto err;
+}
+
+cmd = KVM_SEV_SNP_INIT;
+init_args = (void *)_snp_guest->kvm_init_conf;
+
+} else if (sev_es_enabled()) {
 if (!kvm_kernel_irqchip_allowed()) {
 error_report("%s: SEV-ES guests require in-kernel irqchip support",
  __func__);
@@ -1145,7 +1166,7 @@ int sev_kvm_init(ConfidentialGuestSupport *cgs, Error 
**errp)
 }
 
 trace_kvm_sev_init();
-ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, _error);
+ret = sev_ioctl(sev_common->sev_fd, cmd, init_args, _error);
 if (ret) {
 error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'",
__func__, ret, fw_error, fw_error_to_str(fw_error));
diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h
index ae6d840478..e0e1a599be 100644
--- a/target/i386/sev_i386.h
+++ b/target/i386/sev_i386.h
@@ -29,6 +29,7 @@
 #define SEV_POLICY_SEV  0x20
 
 extern bool sev_es_enabled(void);
+extern bool sev_snp_enabled(void);
 extern uint64_t sev_get_me_mask(void);
 extern SevInfo *sev_get_info(void);
 extern uint32_t sev_get_cbit_position(void);
-- 
2.25.1




[RFC PATCH v2 01/12] i386/sev: introduce "sev-common" type to encapsulate common SEV state

2021-08-26 Thread Michael Roth
Currently all SEV/SEV-ES functionality is managed through a single
'sev-guest' QOM type. With upcoming support for SEV-SNP, taking this
same approach won't work well since some of the properties/state
managed by 'sev-guest' is not applicable to SEV-SNP, which will instead
rely on a new QOM type with its own set of properties/state.

To prepare for this, this patch moves common state into an abstract
'sev-common' parent type to encapsulate properties/state that is
common to both SEV/SEV-ES and SEV-SNP, leaving only SEV/SEV-ES-specific
properties/state in the current 'sev-guest' type. This should not
affect current behavior or command-line options.

As part of this patch, some related changes are also made:

  - a static 'sev_guest' variable is currently used to keep track of
the 'sev-guest' instance. SEV-SNP would similarly introduce an
'sev_snp_guest' static variable. But these instances are now
available via qdev_get_machine()->cgs, so switch to using that
instead and drop the static variable.

  - 'sev_guest' is currently used as the name for the static variable
holding a pointer to the 'sev-guest' instance. Re-purpose the name
as a local variable referring the 'sev-guest' instance, and use
that consistently throughout the code so it can be easily
distinguished from sev-common/sev-snp-guest instances.

  - 'sev' is generally used as the name for local variables holding a
pointer to the 'sev-guest' instance. In cases where that now points
to common state, use the name 'sev_common'; in cases where that now
points to state specific to 'sev-guest' instance, use the name
'sev_guest'

Signed-off-by: Michael Roth 
---
 qapi/qom.json |  34 +++--
 target/i386/sev.c | 329 +++---
 2 files changed, 214 insertions(+), 149 deletions(-)

diff --git a/qapi/qom.json b/qapi/qom.json
index a25616bc7a..211e083727 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -735,12 +735,29 @@
   'data': { '*filename': 'str' } }
 
 ##
-# @SevGuestProperties:
+# @SevCommonProperties:
 #
-# Properties for sev-guest objects.
+# Properties common to objects that are derivatives of sev-common.
 #
 # @sev-device: SEV device to use (default: "/dev/sev")
 #
+# @cbitpos: C-bit location in page table entry (default: 0)
+#
+# @reduced-phys-bits: number of bits in physical addresses that become
+# unavailable when SEV is enabled
+#
+# Since: 2.12
+##
+{ 'struct': 'SevCommonProperties',
+  'data': { '*sev-device': 'str',
+'*cbitpos': 'uint32',
+'reduced-phys-bits': 'uint32' } }
+
+##
+# @SevGuestProperties:
+#
+# Properties for sev-guest objects.
+#
 # @dh-cert-file: guest owners DH certificate (encoded with base64)
 #
 # @session-file: guest owners session parameters (encoded with base64)
@@ -749,21 +766,14 @@
 #
 # @handle: SEV firmware handle (default: 0)
 #
-# @cbitpos: C-bit location in page table entry (default: 0)
-#
-# @reduced-phys-bits: number of bits in physical addresses that become
-# unavailable when SEV is enabled
-#
 # Since: 2.12
 ##
 { 'struct': 'SevGuestProperties',
-  'data': { '*sev-device': 'str',
-'*dh-cert-file': 'str',
+  'base': 'SevCommonProperties',
+  'data': { '*dh-cert-file': 'str',
 '*session-file': 'str',
 '*policy': 'uint32',
-'*handle': 'uint32',
-'*cbitpos': 'uint32',
-'reduced-phys-bits': 'uint32' } }
+'*handle': 'uint32' } }
 
 ##
 # @ObjectType:
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 83df8c09f6..6acebfbd53 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -34,6 +34,8 @@
 #include "exec/confidential-guest-support.h"
 #include "hw/i386/pc.h"
 
+#define TYPE_SEV_COMMON "sev-common"
+OBJECT_DECLARE_SIMPLE_TYPE(SevCommonState, SEV_COMMON)
 #define TYPE_SEV_GUEST "sev-guest"
 OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST)
 
@@ -48,32 +50,38 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST)
  * -object sev-guest,id=sev0 \
  * -machine ...,memory-encryption=sev0
  */
-struct SevGuestState {
+struct SevCommonState {
 ConfidentialGuestSupport parent_obj;
 
 /* configuration parameters */
 char *sev_device;
-uint32_t policy;
-char *dh_cert_file;
-char *session_file;
 uint32_t cbitpos;
 uint32_t reduced_phys_bits;
 
 /* runtime state */
-uint32_t handle;
 uint8_t api_major;
 uint8_t api_minor;
 uint8_t build_id;
 uint64_t me_mask;
 int sev_fd;
 SevState state;
-gchar *measurement;
 
 uint32_t reset_cs;
 uint32_t reset_ip;
 bool reset_data_valid;
 };
 
+struct SevGuestState {
+SevCommonState sev_common;
+gchar *measurement;
+
+/* configuration parameters */
+uint32_t handle;
+uint32_t policy;
+char *dh_cert_file;
+char *session_file;
+};
+
 #define DEFAULT_GUEST_POLICY0x1 /* disable debug */
 #define DEFAULT_SEV_DEVICE  

[RFC PATCH v2 06/12] i386/sev: add support to encrypt BIOS when SEV-SNP is enabled

2021-08-26 Thread Michael Roth
From: Brijesh Singh 

The KVM_SEV_SNP_LAUNCH_UPDATE command is used for encrypting the bios
image used for booting the SEV-SNP guest.

Signed-off-by: Brijesh Singh 
Signed-off-by: Michael Roth 
---
 hw/i386/pc_sysfw.c   |  7 ---
 include/sysemu/sev.h |  2 +-
 target/i386/sev-stub.c   |  2 +-
 target/i386/sev.c| 40 ++--
 target/i386/trace-events |  1 +
 5 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c
index 68d6b1f783..54ccf13c0e 100644
--- a/hw/i386/pc_sysfw.c
+++ b/hw/i386/pc_sysfw.c
@@ -149,6 +149,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
 void *flash_ptr;
 int flash_size;
 int ret;
+hwaddr gpa;
 
 assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled);
 
@@ -182,11 +183,11 @@ static void pc_system_flash_map(PCMachineState *pcms,
 }
 
 total_size += size;
+gpa = 0x1ULL - total_size; /* where the flash is mapped */
 qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks",
  size / FLASH_SECTOR_SIZE);
 sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), _fatal);
-sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0,
-0x1ULL - total_size);
+sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa);
 
 if (i == 0) {
 flash_mem = pflash_cfi01_get_memory(system_flash);
@@ -208,7 +209,7 @@ static void pc_system_flash_map(PCMachineState *pcms,
 exit(1);
 }
 
-sev_encrypt_flash(flash_ptr, flash_size, _fatal);
+sev_encrypt_flash(gpa, flash_ptr, flash_size, _fatal);
 }
 }
 }
diff --git a/include/sysemu/sev.h b/include/sysemu/sev.h
index 94d821d737..78e3bf97e8 100644
--- a/include/sysemu/sev.h
+++ b/include/sysemu/sev.h
@@ -18,7 +18,7 @@
 
 bool sev_enabled(void);
 int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp);
-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp);
+int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp);
 int sev_inject_launch_secret(const char *hdr, const char *secret,
  uint64_t gpa, Error **errp);
 
diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c
index e4fb8e882e..8b35704937 100644
--- a/target/i386/sev-stub.c
+++ b/target/i386/sev-stub.c
@@ -56,7 +56,7 @@ int sev_inject_launch_secret(const char *hdr, const char 
*secret,
 return 1;
 }
 
-int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
+int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
 {
 return 0;
 }
diff --git a/target/i386/sev.c b/target/i386/sev.c
index 51689d4fa4..867c0cb457 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -946,6 +946,35 @@ out:
 return ret;
 }
 
+static int
+sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, hwaddr gpa, uint8_t 
*addr,
+  uint64_t len, int type)
+{
+int ret, fw_error;
+struct kvm_sev_snp_launch_update update = {0};
+
+if (!addr || !len) {
+error_report("%s: SNP_LAUNCH_UPDATE called with invalid address / 
length: %lx / %lx",
+__func__, gpa, len);
+return 1;
+}
+
+update.uaddr = (__u64)(unsigned long)addr;
+update.start_gfn = gpa >> TARGET_PAGE_BITS;
+update.len = len;
+update.page_type = type;
+trace_kvm_sev_snp_launch_update(addr, len, type);
+ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd,
+KVM_SEV_SNP_LAUNCH_UPDATE,
+, _error);
+if (ret) {
+error_report("%s: SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'",
+__func__, ret, fw_error, fw_error_to_str(fw_error));
+}
+
+return ret;
+}
+
 static int
 sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len)
 {
@@ -1219,7 +1248,7 @@ err:
 }
 
 int
-sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp)
+sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp)
 {
 SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs);
 
@@ -1229,7 +1258,14 @@ sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error 
**errp)
 
 /* if SEV is in update state then encrypt the data else do nothing */
 if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) {
-int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len);
+int ret;
+
+if (sev_snp_enabled()) {
+ret = sev_snp_launch_update(SEV_SNP_GUEST(sev_common), gpa, ptr,
+len, KVM_SEV_SNP_PAGE_TYPE_NORMAL);
+} else {
+ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len);
+}
 if (ret < 0) {
 error_setg(errp, "failed to encrypt pflash rom");
 return ret;
diff --git a/target/i386/trace-events b/target/i386/trace-events
index 

[RFC PATCH v2 02/12] linux-header: add the SNP specific command

2021-08-26 Thread Michael Roth
From: Brijesh Singh 

Sync the kvm.h with the kernel to include the SNP specific commands.

Signed-off-by: Brijesh Singh 
Signed-off-by: Michael Roth 
---
 linux-headers/linux/kvm.h | 50 +++
 1 file changed, 50 insertions(+)

diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index bcaf66cc4d..486c12b4f7 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -1712,6 +1712,12 @@ enum sev_cmd_id {
/* Guest Migration Extension */
KVM_SEV_SEND_CANCEL,
 
+   /* SNP specific commands */
+   KVM_SEV_SNP_INIT,
+   KVM_SEV_SNP_LAUNCH_START,
+   KVM_SEV_SNP_LAUNCH_UPDATE,
+   KVM_SEV_SNP_LAUNCH_FINISH,
+
KVM_SEV_NR_MAX,
 };
 
@@ -1808,6 +1814,50 @@ struct kvm_sev_receive_update_data {
__u32 trans_len;
 };
 
+struct kvm_snp_init {
+   __u64 flags;
+};
+
+struct kvm_sev_snp_launch_start {
+   __u64 policy;
+   __u64 ma_uaddr;
+   __u8 ma_en;
+   __u8 imi_en;
+   __u8 gosvw[16];
+   __u8 pad[6];
+};
+
+#define KVM_SEV_SNP_PAGE_TYPE_NORMAL   0x1
+#define KVM_SEV_SNP_PAGE_TYPE_VMSA 0x2
+#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
+#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED   0x4
+#define KVM_SEV_SNP_PAGE_TYPE_SECRETS  0x5
+#define KVM_SEV_SNP_PAGE_TYPE_CPUID0x6
+
+struct kvm_sev_snp_launch_update {
+__u64 start_gfn;
+   __u64 uaddr;
+   __u32 len;
+   __u8 imi_page;
+   __u8 page_type;
+   __u8 vmpl3_perms;
+   __u8 vmpl2_perms;
+   __u8 vmpl1_perms;
+};
+
+#define KVM_SEV_SNP_ID_BLOCK_SIZE  96
+#define KVM_SEV_SNP_ID_AUTH_SIZE   4096
+#define KVM_SEV_SNP_FINISH_DATA_SIZE   32
+
+struct kvm_sev_snp_launch_finish {
+   __u64 id_block_uaddr;
+   __u64 id_auth_uaddr;
+   __u8 id_block_en;
+   __u8 auth_key_en;
+   __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
+   __u8 pad[6];
+};
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX   (1 << 2)
-- 
2.25.1




[RFC PATCH v2 12/12] i386/sev: update query-sev QAPI format to handle SEV-SNP

2021-08-26 Thread Michael Roth
Most of the current 'query-sev' command is relevant to both legacy
SEV/SEV-ES guests and SEV-SNP guests, with 2 exceptions:

  - 'policy' is a 64-bit field for SEV-SNP, not 32-bit, and
the meaning of the bit positions has changed
  - 'handle' is not relevant to SEV-SNP

To address this, this patch adds a new 'sev-type' field that can be
used as a discriminator to select between SEV and SEV-SNP-specific
fields/formats without breaking compatibility for existing management
tools (so long as management tools that add support for launching
SEV-SNP guest update their handling of query-sev appropriately).

The corresponding HMP command has also been fixed up similarly.

Signed-off-by: Michael Roth 
---
 qapi/misc-target.json  | 71 +-
 target/i386/monitor.c  | 29 +
 target/i386/sev.c  | 22 +++--
 target/i386/sev_i386.h |  3 ++
 4 files changed, 95 insertions(+), 30 deletions(-)

diff --git a/qapi/misc-target.json b/qapi/misc-target.json
index 3b05ad3dbf..80f994ff9b 100644
--- a/qapi/misc-target.json
+++ b/qapi/misc-target.json
@@ -81,6 +81,49 @@
'send-update', 'receive-update' ],
   'if': 'TARGET_I386' }
 
+##
+# @SevGuestType:
+#
+# An enumeration indicating the type of SEV guest being run.
+#
+# @sev: The guest is a legacy SEV or SEV-ES guest.
+# @sev-snp: The guest is an SEV-SNP guest.
+#
+# Since: 6.2
+##
+{ 'enum': 'SevGuestType',
+  'data': [ 'sev', 'sev-snp' ],
+  'if': 'TARGET_I386' }
+
+##
+# @SevGuestInfo:
+#
+# Information specific to legacy SEV/SEV-ES guests.
+#
+# @policy: SEV policy value
+#
+# @handle: SEV firmware handle
+#
+# Since: 2.12
+##
+{ 'struct': 'SevGuestInfo',
+  'data': { 'policy': 'uint32',
+'handle': 'uint32' },
+  'if': 'TARGET_I386' }
+
+##
+# @SevSnpGuestInfo:
+#
+# Information specific to SEV-SNP guests.
+#
+# @policy: SEV-SNP policy value
+#
+# Since: 6.2
+##
+{ 'struct': 'SevSnpGuestInfo',
+  'data': { 'policy': 'uint64' },
+  'if': 'TARGET_I386' }
+
 ##
 # @SevInfo:
 #
@@ -94,25 +137,25 @@
 #
 # @build-id: SEV FW build id
 #
-# @policy: SEV policy value
-#
 # @state: SEV guest state
 #
-# @handle: SEV firmware handle
+# @sev-type: Type of SEV guest being run
 #
 # Since: 2.12
 ##
-{ 'struct': 'SevInfo',
-'data': { 'enabled': 'bool',
-  'api-major': 'uint8',
-  'api-minor' : 'uint8',
-  'build-id' : 'uint8',
-  'policy' : 'uint32',
-  'state' : 'SevState',
-  'handle' : 'uint32'
-},
-  'if': 'TARGET_I386'
-}
+{ 'union': 'SevInfo',
+  'base': { 'enabled': 'bool',
+'api-major': 'uint8',
+'api-minor' : 'uint8',
+'build-id' : 'uint8',
+'state' : 'SevState',
+'sev-type' : 'SevGuestType' },
+  'discriminator': 'sev-type',
+  'data': {
+  'sev': 'SevGuestInfo',
+  'sev-snp': 'SevSnpGuestInfo' },
+  'if': 'TARGET_I386' }
+
 
 ##
 # @query-sev:
diff --git a/target/i386/monitor.c b/target/i386/monitor.c
index 119211f0b0..85a8bc2bef 100644
--- a/target/i386/monitor.c
+++ b/target/i386/monitor.c
@@ -692,20 +692,37 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict)
 {
 SevInfo *info = sev_get_info();
 
-if (info && info->enabled) {
-monitor_printf(mon, "handle: %d\n", info->handle);
+if (!info || !info->enabled) {
+monitor_printf(mon, "SEV is not enabled\n");
+goto out;
+}
+
+if (sev_snp_enabled()) {
 monitor_printf(mon, "state: %s\n", SevState_str(info->state));
 monitor_printf(mon, "build: %d\n", info->build_id);
 monitor_printf(mon, "api version: %d.%d\n",
info->api_major, info->api_minor);
 monitor_printf(mon, "debug: %s\n",
-   info->policy & SEV_POLICY_NODBG ? "off" : "on");
-monitor_printf(mon, "key-sharing: %s\n",
-   info->policy & SEV_POLICY_NOKS ? "off" : "on");
+   info->u.sev_snp.policy & SEV_SNP_POLICY_DBG ? "on"
+   : "off");
+monitor_printf(mon, "SMT allowed: %s\n",
+   info->u.sev_snp.policy & SEV_SNP_POLICY_SMT ? "on"
+   : "off");
+monitor_printf(mon, "SEV type: %s\n", 
SevGuestType_str(info->sev_type));
 } else {
-monitor_printf(mon, "SEV is not enabled\n");
+monitor_printf(mon, "handle: %d\n", info->u.sev.handle);
+monitor_printf(mon, "state: %s\n", SevState_str(info->state));
+monitor_printf(mon, "build: %d\n", info->build_id);
+monitor_printf(mon, "api version: %d.%d\n",
+   info->api_major, info->api_minor);
+monitor_printf(mon, "debug: %s\n",
+   info->u.sev.policy & SEV_POLICY_NODBG ? "off" : "on");
+monitor_printf(mon, "key-sharing: %s\n",
+   info->u.sev.policy & 

[RFC PATCH v2 11/12] i386/sev: sev-snp: add support for CPUID validation

2021-08-26 Thread Michael Roth
SEV-SNP firmware allows a special guest page to be populated with a
table of guest CPUID values so that they can be validated through
firmware before being loaded into encrypted guest memory where they can
be used in place of hypervisor-provided values[1].

As part of SEV-SNP guest initialization, use this process to validate
the CPUID entries reported by KVM_GET_CPUID2 prior to initial guest
start.

[1]: SEV SNP Firmware ABI Specification, Rev. 0.8, 8.13.2.6

Signed-off-by: Michael Roth 
---
 target/i386/sev.c | 146 +-
 1 file changed, 143 insertions(+), 3 deletions(-)

diff --git a/target/i386/sev.c b/target/i386/sev.c
index 0009c93d28..72a6146295 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -153,6 +153,36 @@ static const char *const sev_fw_errlist[] = {
 
 #define SEV_FW_MAX_ERROR  ARRAY_SIZE(sev_fw_errlist)
 
+/*  doesn't expose this, so re-use the max from kvm.c */
+#define KVM_MAX_CPUID_ENTRIES 100
+
+typedef struct KvmCpuidInfo {
+struct kvm_cpuid2 cpuid;
+struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES];
+} KvmCpuidInfo;
+
+#define SNP_CPUID_FUNCTION_MAXCOUNT 64
+#define SNP_CPUID_FUNCTION_UNKNOWN 0x
+
+typedef struct {
+uint32_t eax_in;
+uint32_t ecx_in;
+uint64_t xcr0_in;
+uint64_t xss_in;
+uint32_t eax;
+uint32_t ebx;
+uint32_t ecx;
+uint32_t edx;
+uint64_t reserved;
+} __attribute__((packed)) SnpCpuidFunc;
+
+typedef struct {
+uint32_t count;
+uint32_t reserved1;
+uint64_t reserved2;
+SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT];
+} __attribute__((packed)) SnpCpuidInfo;
+
 static int
 sev_ioctl(int fd, int cmd, void *data, int *error)
 {
@@ -1141,6 +1171,117 @@ detect_first_overlap(uint64_t start, uint64_t end, 
Range *range_list,
 return overlap;
 }
 
+static int
+sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info,
+const KvmCpuidInfo *kvm_cpuid_info)
+{
+size_t i;
+
+memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info));
+
+for (i = 0; kvm_cpuid_info->entries[i].function != 0x; i++) {
+const struct kvm_cpuid_entry2 *kvm_cpuid_entry;
+SnpCpuidFunc *snp_cpuid_entry;
+
+kvm_cpuid_entry = _cpuid_info->entries[i];
+snp_cpuid_entry = _cpuid_info->entries[i];
+
+snp_cpuid_entry->eax_in = kvm_cpuid_entry->function;
+if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) {
+snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index;
+}
+snp_cpuid_entry->eax = kvm_cpuid_entry->eax;
+snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx;
+snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx;
+snp_cpuid_entry->edx = kvm_cpuid_entry->edx;
+
+if (snp_cpuid_entry->eax_in == 0xD &&
+(snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 
0x1)) {
+snp_cpuid_entry->ebx = 0x240;
+}
+}
+
+if (i > SNP_CPUID_FUNCTION_MAXCOUNT) {
+error_report("SEV-SNP: CPUID count '%lu' exceeds max '%u'",
+ i, SNP_CPUID_FUNCTION_MAXCOUNT);
+return -1;
+}
+
+snp_cpuid_info->count = i;
+
+return 0;
+}
+
+static void
+sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old,
+SnpCpuidInfo *new)
+{
+size_t i;
+
+for (i = 0; i < old->count; i++) {
+SnpCpuidFunc *old_func, *new_func;
+
+old_func = >entries[i];
+new_func = >entries[i];
+
+if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) {
+error_report("SEV-SNP: CPUID validation failed for function %x, 
index: %x.\n"
+ "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 
0x%08x\n"
+ "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 
0x%08x",
+ old_func->eax_in, old_func->ecx_in,
+ old_func->eax, old_func->ebx, old_func->ecx, 
old_func->edx,
+ new_func->eax, new_func->ebx, new_func->ecx, 
new_func->edx);
+}
+}
+}
+
+static int
+sev_snp_launch_update_cpuid(uint32_t cpuid_addr, uint32_t cpuid_len)
+{
+KvmCpuidInfo kvm_cpuid_info;
+SnpCpuidInfo snp_cpuid_info;
+CPUState *cs = first_cpu;
+MemoryRegion *mr = NULL;
+void *snp_cpuid_hva;
+int ret;
+
+snp_cpuid_hva = gpa2hva(, cpuid_addr, cpuid_len, NULL);
+if (!snp_cpuid_hva) {
+error_report("SEV-SNP: unable to access CPUID memory range at GPA %d",
+ cpuid_addr);
+return 1;
+}
+
+/* get the cpuid list from KVM */
+memset(_cpuid_info.entries, 0xFF,
+   KVM_MAX_CPUID_ENTRIES * sizeof(struct kvm_cpuid_entry2));
+kvm_cpuid_info.cpuid.nent = KVM_MAX_CPUID_ENTRIES;
+
+ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, _cpuid_info);
+if (ret) {
+error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'",
+ strerror(-ret));
+}
+
+ret = 

[PATCH 0/2] virtiofsd: Add capability to block xattrs

2021-08-26 Thread Vivek Goyal
As of now we have a knob "-o xattr/no_xattr" which either enables
all xattrs or disables all xattrs.

We need something more fine grained where we can selectively disable
only certain xattrs (and not all).

For example, in some cases we want to disable "security.selinux"
xattr. This is equivalent to virtiofs not supporting security.selinux
and guest kernel will fallback to a single label for whole fs
(virtiofs_t).

So add an option "-o block_xattr=" which will allow
specifying a list of xattrs to block.

Vivek Goyal (2):
  virtiofsd: Add an array to keep track of blocked xattrs
  virtiofsd: Add option "block_xattr=" to block certain xattrs

 docs/tools/virtiofsd.rst |  17 
 tools/virtiofsd/helper.c |   3 +
 tools/virtiofsd/passthrough_ll.c | 166 ---
 3 files changed, 171 insertions(+), 15 deletions(-)

-- 
2.31.1




[PATCH v2 39/43] bsd-user: move qemu_log to later in the file

2021-08-26 Thread imp
From: Warner Losh 

Signed-off-by: Warner Losh 
Acked-by: Richard Henderson 
---
 bsd-user/main.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index 57669ad7b7..d60a0a7ee2 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -69,14 +69,6 @@ unsigned long target_dflssiz = TARGET_DFLSSIZ;   /* initial 
data size limit */
 unsigned long target_maxssiz = TARGET_MAXSSIZ;   /* max stack size */
 unsigned long target_sgrowsiz = TARGET_SGROWSIZ; /* amount to grow stack */
 
-void gemu_log(const char *fmt, ...)
-{
-va_list ap;
-
-va_start(ap, fmt);
-vfprintf(stderr, fmt, ap);
-va_end(ap);
-}
 
 void fork_start(void)
 {
@@ -165,6 +157,15 @@ void init_task_state(TaskState *ts)
 ts->sigqueue_table[i].next = NULL;
 }
 
+void gemu_log(const char *fmt, ...)
+{
+va_list ap;
+
+va_start(ap, fmt);
+vfprintf(stderr, fmt, ap);
+va_end(ap);
+}
+
 static void
 adjust_ssize(void)
 {
-- 
2.32.0




[PATCH v2 30/43] bsd-user: Remove dead #ifdefs from elfload.c

2021-08-26 Thread imp
From: Warner Losh 

LOW_ELF_STACK doesn't exist on FreeBSD and likely never will. Remove it.
Likewise, remove an #if 0 block that's not useful

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/elfload.c | 20 
 1 file changed, 20 deletions(-)

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index a09f8fb315..c0787a4e52 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -558,9 +558,6 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 abi_ulong elf_entry, interp_load_addr = 0;
 abi_ulong start_code, end_code, start_data, end_data;
 abi_ulong reloc_func_desc = 0;
-#ifdef LOW_ELF_STACK
-abi_ulong elf_stack = ~((abi_ulong)0UL);
-#endif
 
 load_addr = 0;
 load_bias = 0;
@@ -761,11 +758,6 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 exit(-1);
 }
 
-#ifdef LOW_ELF_STACK
-if (TARGET_ELF_PAGESTART(elf_ppnt->p_vaddr) < elf_stack)
-elf_stack = TARGET_ELF_PAGESTART(elf_ppnt->p_vaddr);
-#endif
-
 if (!load_addr_set) {
 load_addr_set = 1;
 load_addr = elf_ppnt->p_vaddr - elf_ppnt->p_offset;
@@ -823,9 +815,6 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 
 close(bprm->fd);
 
-#ifdef LOW_ELF_STACK
-info->start_stack = bprm->p = elf_stack - 4;
-#endif
 bprm->p = target_create_elf_tables(bprm->p, bprm->argc, bprm->envc, 
bprm->stringp,
_ex, load_addr, load_bias, 
interp_load_addr, info);
 info->load_addr = reloc_func_desc;
@@ -842,15 +831,6 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 
 padzero(elf_bss, elf_brk);
 
-#if 0
-printf("(start_brk) %x\n" , info->start_brk);
-printf("(end_code) %x\n" , info->end_code);
-printf("(start_code) %x\n" , info->start_code);
-printf("(end_data) %x\n" , info->end_data);
-printf("(start_stack) %x\n" , info->start_stack);
-printf("(brk) %x\n" , info->brk);
-#endif
-
 info->entry = elf_entry;
 
 return 0;
-- 
2.32.0




[PATCH v2 34/43] bsd-user: Add target_arch_reg to describe a target's register set

2021-08-26 Thread imp
From: Warner Losh 

target_reg_t is the normal register. target_fpreg_t is the floating
point registers. target_copy_regs copies the registers out of CPU
context for things like core dumps.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/i386/target_arch_reg.h   | 82 +++
 bsd-user/x86_64/target_arch_reg.h | 92 +++
 2 files changed, 174 insertions(+)
 create mode 100644 bsd-user/i386/target_arch_reg.h
 create mode 100644 bsd-user/x86_64/target_arch_reg.h

diff --git a/bsd-user/i386/target_arch_reg.h b/bsd-user/i386/target_arch_reg.h
new file mode 100644
index 00..1fce1daf01
--- /dev/null
+++ b/bsd-user/i386/target_arch_reg.h
@@ -0,0 +1,82 @@
+/*
+ *  FreeBSD i386 register structures
+ *
+ *  Copyright (c) 2015 Stacey Son
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef _TARGET_ARCH_REG_H_
+#define _TARGET_ARCH_REG_H_
+
+/* See sys/i386/include/reg.h */
+typedef struct target_reg {
+uint32_tr_fs;
+uint32_tr_es;
+uint32_tr_ds;
+uint32_tr_edi;
+uint32_tr_esi;
+uint32_tr_ebp;
+uint32_tr_isp;
+uint32_tr_ebx;
+uint32_tr_edx;
+uint32_tr_ecx;
+uint32_tr_eax;
+uint32_tr_trapno;
+uint32_tr_err;
+uint32_tr_eip;
+uint32_tr_cs;
+uint32_tr_eflags;
+uint32_tr_esp;
+uint32_tr_ss;
+uint32_tr_gs;
+} target_reg_t;
+
+typedef struct target_fpreg {
+uint32_tfpr_env[7];
+uint8_t fpr_acc[8][10];
+uint32_tfpr_ex_sw;
+uint8_t fpr_pad[64];
+} target_fpreg_t;
+
+static inline void target_copy_regs(target_reg_t *regs, const CPUX86State *env)
+{
+
+regs->r_fs = env->segs[R_FS].selector & 0x;
+regs->r_es = env->segs[R_ES].selector & 0x;
+regs->r_ds = env->segs[R_DS].selector & 0x;
+
+regs->r_edi = env->regs[R_EDI];
+regs->r_esi = env->regs[R_ESI];
+regs->r_ebp = env->regs[R_EBP];
+/* regs->r_isp = env->regs[R_ISP]; XXX */
+regs->r_ebx = env->regs[R_EBX];
+regs->r_edx = env->regs[R_EDX];
+regs->r_ecx = env->regs[R_ECX];
+regs->r_eax = env->regs[R_EAX];
+/* regs->r_trapno = env->regs[R_TRAPNO]; XXX */
+regs->r_err = env->error_code;  /* XXX ? */
+regs->r_eip = env->eip;
+
+regs->r_cs = env->segs[R_CS].selector & 0x;
+
+regs->r_eflags = env->eflags;
+regs->r_esp = env->regs[R_ESP];
+
+regs->r_ss = env->segs[R_SS].selector & 0x;
+regs->r_gs = env->segs[R_GS].selector & 0x;
+}
+
+#endif /* !_TARGET_ARCH_REG_H_ */
diff --git a/bsd-user/x86_64/target_arch_reg.h 
b/bsd-user/x86_64/target_arch_reg.h
new file mode 100644
index 00..00e9624517
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_reg.h
@@ -0,0 +1,92 @@
+/*
+ *  FreeBSD amd64 register structures
+ *
+ *  Copyright (c) 2015 Stacey Son
+ *  All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef _TARGET_ARCH_REG_H_
+#define _TARGET_ARCH_REG_H_
+
+/* See sys/amd64/include/reg.h */
+typedef struct target_reg {
+uint64_tr_r15;
+uint64_tr_r14;
+uint64_tr_r13;
+uint64_tr_r12;
+uint64_tr_r11;
+uint64_tr_r10;
+uint64_tr_r9;
+uint64_tr_r8;
+uint64_tr_rdi;
+uint64_tr_rsi;
+uint64_tr_rbp;
+uint64_tr_rbx;
+uint64_tr_rdx;
+uint64_tr_rcx;
+uint64_tr_rax;
+uint32_tr_trapno;
+uint16_tr_fs;
+uint16_tr_gs;
+

[PATCH v2 33/43] bsd-user: update debugging in mmap.c

2021-08-26 Thread imp
From: Warner Losh 

Update the debugging code for new features and different targets.

Signed-off-by: Mikaël Urankar 
Signed-off-by: Sean Bruno 
Signed-off-by: Kyle Evans 
Signed-off-by: Warner Losh 
Acked-by: Richard Henderson 
---
 bsd-user/mmap.c | 45 -
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 03119b1f20..1ef4d271a8 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -68,8 +68,8 @@ int target_mprotect(abi_ulong start, abi_ulong len, int prot)
 int prot1, ret;
 
 #ifdef DEBUG_MMAP
-printf("mprotect: start=0x" TARGET_FMT_lx
-   " len=0x" TARGET_FMT_lx " prot=%c%c%c\n", start, len,
+printf("mprotect: start=0x" TARGET_ABI_FMT_lx
+   "len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c\n", start, len,
prot & PROT_READ ? 'r' : '-',
prot & PROT_WRITE ? 'w' : '-',
prot & PROT_EXEC ? 'x' : '-');
@@ -250,28 +250,37 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 mmap_lock();
 #ifdef DEBUG_MMAP
 {
-printf("mmap: start=0x" TARGET_FMT_lx
-   " len=0x" TARGET_FMT_lx " prot=%c%c%c flags=",
+printf("mmap: start=0x" TARGET_ABI_FMT_lx
+   " len=0x" TARGET_ABI_FMT_lx " prot=%c%c%c flags=",
start, len,
prot & PROT_READ ? 'r' : '-',
prot & PROT_WRITE ? 'w' : '-',
prot & PROT_EXEC ? 'x' : '-');
+if (flags & MAP_ALIGNMENT_MASK)
+printf ("MAP_ALIGNED(%u) ", (flags & MAP_ALIGNMENT_MASK) >> 
MAP_ALIGNMENT_SHIFT);
+#if defined(__FreeBSD_version) && __FreeBSD_version >= 1200035
+if (flags & MAP_GUARD)
+printf("MAP_GUARD ");
+#endif
 if (flags & MAP_FIXED)
 printf("MAP_FIXED ");
-if (flags & MAP_ANON)
+if (flags & MAP_ANONYMOUS)
 printf("MAP_ANON ");
-switch (flags & TARGET_BSD_MAP_FLAGMASK) {
-case MAP_PRIVATE:
+#ifdef MAP_EXCL
+if (flags & MAP_EXCL)
+printf("MAP_EXCL ");
+#endif
+if (flags & MAP_PRIVATE)
 printf("MAP_PRIVATE ");
-break;
-case MAP_SHARED:
+if (flags & MAP_SHARED)
 printf("MAP_SHARED ");
-break;
-default:
-printf("[MAP_FLAGMASK=0x%x] ", flags & TARGET_BSD_MAP_FLAGMASK);
-break;
-}
-printf("fd=%d offset=" TARGET_FMT_lx "\n", fd, offset);
+if (flags & MAP_NOCORE)
+printf("MAP_NOCORE ");
+#ifdef MAP_STACK
+if (flags & MAP_STACK)
+printf("MAP_STACK ");
+#endif
+printf("fd=%d offset=0x%llx\n", fd, offset);
 }
 #endif
 
@@ -399,7 +408,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int 
prot,
 page_set_flags(start, start + len, prot | PAGE_VALID);
  the_end:
 #ifdef DEBUG_MMAP
-printf("ret=0x" TARGET_FMT_lx "\n", start);
+printf("ret=0x" TARGET_ABI_FMT_lx "\n", start);
 page_dump(stdout);
 printf("\n");
 #endif
@@ -416,7 +425,9 @@ int target_munmap(abi_ulong start, abi_ulong len)
 int prot, ret;
 
 #ifdef DEBUG_MMAP
-printf("munmap: start=0x%lx len=0x%lx\n", start, len);
+printf("munmap: start=0x" TARGET_ABI_FMT_lx " len=0x"
+   TARGET_ABI_FMT_lx "\n",
+   start, len);
 #endif
 if (start & ~TARGET_PAGE_MASK)
 return -EINVAL;
-- 
2.32.0




[PATCH v2 32/43] bsd-user: Make cpu_model and cpu_type visible to all of main.c

2021-08-26 Thread imp
From: Warner Losh 

cpu_model and cpu_type will be used future commits, so move them from
main() scoped to file scoped.

Signed-off-by: Warner Losh 
Acked-by: Richard Henderson 
---
 bsd-user/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index f6643896f6..57669ad7b7 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -52,6 +52,8 @@
 int singlestep;
 unsigned long mmap_min_addr;
 uintptr_t guest_base;
+static const char *cpu_model;
+static const char *cpu_type;
 bool have_guest_base;
 unsigned long reserved_va;
 
@@ -198,8 +200,6 @@ static void save_proc_pathname(char *argv0)
 int main(int argc, char **argv)
 {
 const char *filename;
-const char *cpu_model;
-const char *cpu_type;
 const char *log_file = NULL;
 const char *log_mask = NULL;
 struct target_pt_regs regs1, *regs = 
-- 
2.32.0




[PATCH v2 42/43] bsd-user: Add '-0 argv0' option to bsd-user/main.c

2021-08-26 Thread imp
From: Colin Percival 

Previously it was impossible to emulate a program with a file name
different from its argv[0].  With this change, you can run
qemu -0 fakename realname args
which runs the program "realname" with an argv of "fakename args".

Signed-off-by: Colin Percival 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/main.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index b35bcf4d1e..ae25f4c773 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -268,6 +268,7 @@ int main(int argc, char **argv)
 char **target_environ, **wrk;
 envlist_t *envlist = NULL;
 bsd_type = HOST_DEFAULT_BSD_TYPE;
+char * argv0 = NULL;
 
 adjust_ssize();
 
@@ -390,6 +391,8 @@ int main(int argc, char **argv)
 do_strace = 1;
 } else if (!strcmp(r, "trace")) {
 trace_opt_parse(optarg);
+} else if (!strcmp(r, "0")) {
+argv0 = argv[optind++];
 } else {
 usage();
 }
@@ -413,6 +416,8 @@ int main(int argc, char **argv)
 usage();
 }
 filename = argv[optind];
+if (argv0)
+argv[optind] = argv0;
 
 if (!trace_init_backends()) {
 exit(1);
-- 
2.32.0




[PATCH v2 29/43] bsd-user: elf cleanup

2021-08-26 Thread imp
From: Warner Losh 

Move OS-dependent defines into target_os_elf.h. Move the architectural
dependent stuff into target_arch_elf.h. Adjust elfload.c to use
target_create_elf_tables instead of create_elf_tables.

Signed-off-by: Warner Losh 
Signed-off-by: Stacey Son 
Signed-off-by: Kyle Evans 
Signed-off-by: Justin Hibbits 
Signed-off-by: Alexander Kabaev 
Acked-by: Richard Henderson 
---
 bsd-user/elfload.c   | 190 ---
 bsd-user/freebsd/target_os_elf.h | 149 
 bsd-user/netbsd/target_os_elf.h  | 143 +++
 bsd-user/openbsd/target_os_elf.h | 143 +++
 bsd-user/qemu.h  |   1 +
 5 files changed, 459 insertions(+), 167 deletions(-)
 create mode 100644 bsd-user/freebsd/target_os_elf.h
 create mode 100644 bsd-user/netbsd/target_os_elf.h
 create mode 100644 bsd-user/openbsd/target_os_elf.h

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 5ceb60b1c2..a09f8fb315 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -23,48 +23,17 @@
 #include "disas/disas.h"
 #include "qemu/path.h"
 
-#include "target_arch_elf.h"
-#include "target_os_thread.h"
-
-/* this flag is uneffective under linux too, should be deleted */
-#ifndef MAP_DENYWRITE
-#define MAP_DENYWRITE 0
-#endif
-
-/* should probably go in elf.h */
-#ifndef ELIBBAD
-#define ELIBBAD 80
-#endif
-
-#ifndef ELF_PLATFORM
-#define ELF_PLATFORM (NULL)
-#endif
-
-#ifndef ELF_HWCAP
-#define ELF_HWCAP 0
-#endif
+static abi_ulong target_auxents;   /* Where the AUX entries are in target */
+static size_t target_auxents_sz;   /* Size of AUX entries including AT_NULL */
 
-#ifdef TARGET_ABI32
-#undef ELF_CLASS
-#define ELF_CLASS ELFCLASS32
-#undef bswaptls
-#define bswaptls(ptr) bswap32s(ptr)
-#endif
+#include "target_os_elf.h"
+#include "target_os_stack.h"
+#include "target_os_thread.h"
 
 #include "elf.h"
 
-/* max code+data+bss space allocated to elf interpreter */
-#define INTERP_MAP_SIZE (32 * 1024 * 1024)
-
-/* max code+data+bss+brk space allocated to ET_DYN executables */
-#define ET_DYN_MAP_SIZE (128 * 1024 * 1024)
-
-/* Necessary parameters */
-#define TARGET_ELF_EXEC_PAGESIZE TARGET_PAGE_SIZE
-#define TARGET_ELF_PAGESTART(_v) ((_v) & ~(unsigned 
long)(TARGET_ELF_EXEC_PAGESIZE - 1))
-#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE - 1))
-
-#define DLINFO_ITEMS 12
+abi_ulong target_stksiz;
+abi_ulong target_stkbas;
 
 static inline void memcpy_fromfs(void *to, const void *from, unsigned long n)
 {
@@ -195,43 +164,36 @@ static abi_ulong copy_elf_strings(int argc, char **argv, 
void **page,
 return p;
 }
 
-static abi_ulong setup_arg_pages(abi_ulong p, struct bsd_binprm *bprm,
- struct image_info *info)
+static void setup_arg_pages(struct bsd_binprm *bprm, struct image_info *info,
+abi_ulong *stackp, abi_ulong *stringp)
 {
-abi_ulong stack_base, size, error;
-int i;
+abi_ulong stack_base, size;
+abi_long addr;
 
 /* Create enough stack to hold everything.  If we don't use
  * it for args, we'll use it for something else...
  */
 size = target_dflssiz;
-if (size < MAX_ARG_PAGES * TARGET_PAGE_SIZE)
-size = MAX_ARG_PAGES * TARGET_PAGE_SIZE;
-error = target_mmap(0,
+stack_base = TARGET_USRSTACK - size;
+addr = target_mmap(stack_base,
 size + qemu_host_page_size,
 PROT_READ | PROT_WRITE,
 MAP_PRIVATE | MAP_ANON,
 -1, 0);
-if (error == -1) {
+if (addr == -1) {
 perror("stk mmap");
 exit(-1);
 }
 /* we reserve one extra page at the top of the stack as guard */
-target_mprotect(error + size, qemu_host_page_size, PROT_NONE);
+target_mprotect(addr + size, qemu_host_page_size, PROT_NONE);
 
-stack_base = error + size - MAX_ARG_PAGES * TARGET_PAGE_SIZE;
-p += stack_base;
+target_stksiz = size;
+target_stkbas = addr;
 
-for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
-if (bprm->page[i]) {
-info->rss++;
-/* FIXME - check return value of memcpy_to_target() for failure */
-memcpy_to_target(stack_base, bprm->page[i], TARGET_PAGE_SIZE);
-g_free(bprm->page[i]);
-}
-stack_base += TARGET_PAGE_SIZE;
+if (setup_initial_stack(bprm, stackp, stringp) != 0) {
+perror("stk setup");
+exit(-1);
 }
-return p;
 }
 
 static void set_brk(abi_ulong start, abi_ulong end)
@@ -287,86 +249,6 @@ static void padzero(abi_ulong elf_bss, abi_ulong last_bss)
 }
 }
 
-
-static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
-   struct elfhdr * exec,
-   abi_ulong load_addr,
-   abi_ulong load_bias,
-   abi_ulong interp_load_addr,
-

[PATCH v2 38/43] bsd-user: Refactor load_elf_sections and is_target_elf_binary

2021-08-26 Thread imp
From: Warner Losh 

Factor out load_elf_sections and is_target_elf_binary out of
load_elf_interp.

Signed-off-by: Mikaël Urankar 
Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
---
 bsd-user/elfload.c | 350 +
 1 file changed, 164 insertions(+), 186 deletions(-)

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index bdf18f3dce..aed28f2f73 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -36,6 +36,8 @@ abi_ulong target_stksiz;
 abi_ulong target_stkbas;
 
 static int elf_core_dump(int signr, CPUArchState *env);
+static int load_elf_sections(const struct elfhdr *hdr, struct elf_phdr *phdr,
+int fd, abi_ulong rbase, abi_ulong *baddrp);
 
 static inline void memcpy_fromfs(void *to, const void *from, unsigned long n)
 {
@@ -271,16 +273,10 @@ static abi_ulong load_elf_interp(struct elfhdr 
*interp_elf_ex,
  abi_ulong *interp_load_addr)
 {
 struct elf_phdr *elf_phdata  =  NULL;
-struct elf_phdr *eppnt;
-abi_ulong load_addr = 0;
-int load_addr_set = 0;
+abi_ulong rbase;
 int retval;
-abi_ulong last_bss, elf_bss;
-abi_ulong error;
-int i;
+abi_ulong baddr, error;
 
-elf_bss = 0;
-last_bss = 0;
 error = 0;
 
 bswap_ehdr(interp_elf_ex);
@@ -325,6 +321,7 @@ static abi_ulong load_elf_interp(struct elfhdr 
*interp_elf_ex,
 }
 bswap_phdr(elf_phdata, interp_elf_ex->e_phnum);
 
+rbase = 0;
 if (interp_elf_ex->e_type == ET_DYN) {
 /*
  * In order to avoid hardcoding the interpreter load
@@ -332,86 +329,25 @@ static abi_ulong load_elf_interp(struct elfhdr 
*interp_elf_ex,
  */
 error = target_mmap(0, INTERP_MAP_SIZE, PROT_NONE,
 MAP_PRIVATE | MAP_ANON, -1, 0);
-if (error == -1) {
+if (rbase == -1) {
 perror("mmap");
 exit(-1);
 }
-load_addr = error;
-load_addr_set = 1;
-}
-
-eppnt = elf_phdata;
-for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++)
-if (eppnt->p_type == PT_LOAD) {
-int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
-int elf_prot = 0;
-abi_ulong vaddr = 0;
-abi_ulong k;
-
-if (eppnt->p_flags & PF_R) elf_prot =  PROT_READ;
-if (eppnt->p_flags & PF_W) elf_prot |= PROT_WRITE;
-if (eppnt->p_flags & PF_X) elf_prot |= PROT_EXEC;
-if (interp_elf_ex->e_type == ET_EXEC || load_addr_set) {
-elf_type |= MAP_FIXED;
-vaddr = eppnt->p_vaddr;
-}
-error = target_mmap(load_addr + TARGET_ELF_PAGESTART(vaddr),
-eppnt->p_filesz + 
TARGET_ELF_PAGEOFFSET(eppnt->p_vaddr),
-elf_prot,
-elf_type,
-interpreter_fd,
-eppnt->p_offset - 
TARGET_ELF_PAGEOFFSET(eppnt->p_vaddr));
-
-if (error == -1) {
-/* Real error */
-close(interpreter_fd);
-free(elf_phdata);
-return ~((abi_ulong)0UL);
-}
-
-if (!load_addr_set && interp_elf_ex->e_type == ET_DYN) {
-load_addr = error;
-load_addr_set = 1;
-}
-
-/*
- * Find the end of the file  mapping for this phdr, and keep
- * track of the largest address we see for this.
- */
-k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
-if (k > elf_bss) elf_bss = k;
+}
 
-/*
- * Do the same thing for the memory mapping - between
- * elf_bss and last_bss is the bss section.
- */
-k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
-if (k > last_bss) last_bss = k;
-}
+error = load_elf_sections(interp_elf_ex, elf_phdata, interpreter_fd, rbase,
+);
+if (error != 0) {
+perror("load_elf_sections");
+exit(-1);
+}
 
 /* Now use mmap to map the library into memory. */
-
 close(interpreter_fd);
-
-/*
- * Now fill out the bss section.  First pad the last page up
- * to the page boundary, and then perform a mmap to make sure
- * that there are zeromapped pages up to and including the last
- * bss page.
- */
-padzero(elf_bss, last_bss);
-elf_bss = TARGET_ELF_PAGESTART(elf_bss + qemu_host_page_size - 1); /* What 
we have mapped so far */
-
-/* Map the last of the bss segment */
-if (last_bss > elf_bss) {
-target_mmap(elf_bss, last_bss - elf_bss,
-PROT_READ | PROT_WRITE | PROT_EXEC,
-MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0);
-}
 free(elf_phdata);
 
-*interp_load_addr = load_addr;
-return ((abi_ulong) interp_elf_ex->e_entry) + load_addr;
+*interp_load_addr = baddr;
+return ((abi_ulong) 

[PATCH v2 23/43] bsd-user: define max args in terms of pages

2021-08-26 Thread imp
From: Warner Losh 

For 32-bit platforms, pass in up to 256k of args. For 64-bit, bump that
to 512k.

Signed-off-by: Kyle Evans 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/qemu.h | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 55d71130bb..fea1a167e4 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -20,6 +20,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "qemu/units.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
 
@@ -101,11 +102,17 @@ extern const char *qemu_uname_release;
 extern unsigned long mmap_min_addr;
 
 /*
- * MAX_ARG_PAGES defines the number of pages allocated for arguments
- * and envelope for the new program. 32 should suffice, this gives
- * a maximum env+arg of 128kB w/4KB pages!
+ * TARGET_ARG_MAX defines the number of bytes allocated for arguments
+ * and envelope for the new program. 256k should suffice for a reasonable
+ * maxiumum env+arg in 32-bit environments, bump it up to 512k for !ILP32
+ * platforms.
  */
-#define MAX_ARG_PAGES 32
+#if TARGET_ABI_BITS > 32
+#define TARGET_ARG_MAX (512 * KiB)
+#else
+#define TARGET_ARG_MAX (256 * KiB)
+#endif
+#define MAX_ARG_PAGES (TARGET_ARG_MAX / TARGET_PAGE_SIZE)
 
 /*
  * This structure is used to hold the arguments that are
-- 
2.32.0




[PATCH v2 31/43] bsd-user: Rewrite target system call definintion glue

2021-08-26 Thread imp
From: Warner Losh 

Rewrite target definnitions to interface with the FreeBSD system calls.
This covers basic types (time_t, iovec, umtx_time, timespec, timeval,
rusage, rwusage) and basic defines (mmap, rusage). Also included are
FreeBSD version-specific variations.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
---
 bsd-user/bsd-mman.h | 121 
 bsd-user/mmap.c |   2 -
 bsd-user/syscall_defs.h | 247 ++--
 3 files changed, 162 insertions(+), 208 deletions(-)
 delete mode 100644 bsd-user/bsd-mman.h

diff --git a/bsd-user/bsd-mman.h b/bsd-user/bsd-mman.h
deleted file mode 100644
index 910e8c1921..00
--- a/bsd-user/bsd-mman.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*-
- * Copyright (c) 1982, 1986, 1993
- *  The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *notice, this list of conditions and the following disclaimer in the
- *documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *may be used to endorse or promote products derived from this software
- *without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *  @(#)mman.h  8.2 (Berkeley) 1/9/95
- * $FreeBSD: src/sys/sys/mman.h,v 1.42 2008/03/28 04:29:27 ps Exp $
- */
-
-#define TARGET_FREEBSD_MAP_RESERVED0080 0x0080  /* previously misimplemented 
MAP_INHERIT */
-#define TARGET_FREEBSD_MAP_RESERVED0100 0x0100  /* previously unimplemented 
MAP_NOEXTEND */
-#define TARGET_FREEBSD_MAP_STACK0x0400  /* region grows down, like a 
stack */
-#define TARGET_FREEBSD_MAP_NOSYNC   0x0800  /* page to but do not sync 
underlying file */
-
-#define TARGET_FREEBSD_MAP_FLAGMASK 0x1ff7
-
-/*  $NetBSD: mman.h,v 1.42 2008/11/18 22:13:49 ad Exp $ */
-
-/*-
- * Copyright (c) 1982, 1986, 1993
- *  The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *notice, this list of conditions and the following disclaimer in the
- *documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *may be used to endorse or promote products derived from this software
- *without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *  @(#)mman.h  8.2 (Berkeley) 1/9/95
- */
-#define TARGET_NETBSD_MAP_INHERIT   0x0080  /* region is retained after 
exec */
-#define TARGET_NETBSD_MAP_TRYFIXED  0x0400 /* attempt hint address, even 
within break */
-#define TARGET_NETBSD_MAP_WIRED 0x0800  /* mlock() mapping when it is 
established */
-
-#define TARGET_NETBSD_MAP_STACK   

[PATCH v2 21/43] bsd-user: pull in target_arch_thread.h update target_arch_elf.h

2021-08-26 Thread imp
From: Warner Losh 

Update target_arch_elf.h to remove thread_init. Move its contents to
target_arch_thread.h and rename to target_thread_init(). Update
elfload.c to call it. Create thread_os_thread.h to hold the os specific
parts of the thread and threat manipulation routines. Currently, it just
includes target_arch_thread.h. target_arch_thread.h contains the at the
moment unused target_thread_set_upcall which will be used in the future
when creating actual thread (i386 has this stubbed, but other
architectures in the bsd-user tree have real ones). FreeBSD doesn't do
AT_HWCAP, so remove that code. Linux does, and this code came from there.

These changes are all interrelated and could be brokend own, but seem to
represent a reviewable changeset since most of the change is boiler
plate.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
---
 bsd-user/elfload.c   |  4 ++-
 bsd-user/freebsd/target_os_thread.h  | 25 +++
 bsd-user/i386/target_arch_elf.h  | 47 ++--
 bsd-user/i386/target_arch_thread.h   | 47 
 bsd-user/netbsd/target_os_thread.h   | 25 +++
 bsd-user/openbsd/target_os_thread.h  | 25 +++
 bsd-user/x86_64/target_arch_elf.h| 35 ++---
 bsd-user/x86_64/target_arch_thread.h | 40 +++
 8 files changed, 171 insertions(+), 77 deletions(-)
 create mode 100644 bsd-user/freebsd/target_os_thread.h
 create mode 100644 bsd-user/i386/target_arch_thread.h
 create mode 100644 bsd-user/netbsd/target_os_thread.h
 create mode 100644 bsd-user/openbsd/target_os_thread.h
 create mode 100644 bsd-user/x86_64/target_arch_thread.h

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 8a6a72bf05..70a0f81f3d 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -24,6 +24,7 @@
 #include "qemu/path.h"
 
 #include "target_arch_elf.h"
+#include "target_os_thread.h"
 
 /* this flag is uneffective under linux too, should be deleted */
 #ifndef MAP_DENYWRITE
@@ -1001,5 +1002,6 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 
 void do_init_thread(struct target_pt_regs *regs, struct image_info *infop)
 {
-init_thread(regs, infop);
+
+target_thread_init(regs, infop);
 }
diff --git a/bsd-user/freebsd/target_os_thread.h 
b/bsd-user/freebsd/target_os_thread.h
new file mode 100644
index 00..77433acdff
--- /dev/null
+++ b/bsd-user/freebsd/target_os_thread.h
@@ -0,0 +1,25 @@
+/*
+ *  FreeBSD thread dependent code and definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef _TARGET_OS_THREAD_H_
+#define _TARGET_OS_THREAD_H_
+
+#include "target_arch_thread.h"
+
+#endif /* !_TARGET_OS_THREAD_H_ */
diff --git a/bsd-user/i386/target_arch_elf.h b/bsd-user/i386/target_arch_elf.h
index 84fbc8ba4c..eb760e07fa 100644
--- a/bsd-user/i386/target_arch_elf.h
+++ b/bsd-user/i386/target_arch_elf.h
@@ -19,57 +19,16 @@
 #ifndef _TARGET_ARCH_ELF_H_
 #define _TARGET_ARCH_ELF_H_
 
-#define ELF_PLATFORM get_elf_platform()
-
-static const char *get_elf_platform(void)
-{
-static char elf_platform[] = "i386";
-int family = object_property_get_int(OBJECT(thread_cpu), "family", NULL);
-if (family > 6)
-family = 6;
-if (family >= 3)
-elf_platform[1] = '0' + family;
-return elf_platform;
-}
-
-#define ELF_HWCAP get_elf_hwcap()
-
-static uint32_t get_elf_hwcap(void)
-{
-X86CPU *cpu = X86_CPU(thread_cpu);
-
-return cpu->env.features[FEAT_1_EDX];
-}
-
 #define ELF_START_MMAP 0x8000
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
+#define ELF_ET_DYN_LOAD_ADDR0x01001000
 #define elf_check_arch(x) (((x) == EM_386) || ((x) == EM_486))
 
-/*
- * These are used to set parameters in the core dumps.
- */
+#define ELF_HWCAP   0 /* FreeBSD doesn't do AT_HWCAP{,2} on x86 */
+
 #define ELF_CLASS   ELFCLASS32
 #define ELF_DATAELFDATA2LSB
 #define ELF_ARCHEM_386
 
-static inline void init_thread(struct target_pt_regs *regs, struct image_info 
*infop)
-{
-regs->esp = infop->start_stack;
-regs->eip = infop->entry;
-
-/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program
-   starts %edx contains a pointer to a function which might be
-   registered using `atexit'.  

[PATCH 2/2] virtiofsd: Add option "block_xattr=" to block certain xattrs

2021-08-26 Thread Vivek Goyal
We need capability to block security.selinux xattr and return EOPNOTSUPP.
That way guest SELinux thinks filesystem does not support selinux
xattr and falls back to some default label (virtiofs_t) for the
virtiofs filesystem instance.

So add a generic option "-o block_xattr=", which can allow user to
specify a list of xattrs to block. Xattrs should be ":" separated.
For example, "-o block_xattr=security.selinux:user.foo".

Valid xattrs to block should belong to one of of the "security",
"system", "trusted" or "user" xattr namespace.

Ex. -o block_xattr="security.selinux:user.foo"

One can also specify prefix which should be matched against xattr
name and if prefix matches, that xattr will be blocked. Requirement
of xattr belonging to one of the 4 namepsaces still remain in place.

For example -o block_xattr="user.virtiofs*" should block any
xattr name starting with prefix "user.virtiofs".

Signed-off-by: Vivek Goyal 
---
 docs/tools/virtiofsd.rst |  17 ++
 tools/virtiofsd/helper.c |   3 +
 tools/virtiofsd/passthrough_ll.c | 101 ---
 3 files changed, 114 insertions(+), 7 deletions(-)

diff --git a/docs/tools/virtiofsd.rst b/docs/tools/virtiofsd.rst
index b208f2a6f0..406c1ab721 100644
--- a/docs/tools/virtiofsd.rst
+++ b/docs/tools/virtiofsd.rst
@@ -101,6 +101,23 @@ Options
 Enable/disable extended attributes (xattr) on files and directories.  The
 default is ``no_xattr``.
 
+  * block_xattr= -
+Block xattrs specified in the colon separated list. When an xattr
+is blocked getxattr/setxattr/removexattr return error code
+EOPNOTSUPP, and listxattr removes the xattr from list if there is one.
+
+xattr name should belong to one of the four namespsaces, namely
+security, system, trusted and user.
+
+e.g. -o block_xattr=security.selinux:user.foo
+
+One could also specify just a xattr name prefix followed by "*" to
+signify any xattr name matching prefix will be blocked.
+
+e.g -o block_xattr=user.foo*
+
+This will block any xattr name starting with "user.foo"
+
   * posix_acl|no_posix_acl -
 Enable/disable posix acl support.  Posix ACLs are disabled by default.
 
diff --git a/tools/virtiofsd/helper.c b/tools/virtiofsd/helper.c
index a8295d975a..da674ff70a 100644
--- a/tools/virtiofsd/helper.c
+++ b/tools/virtiofsd/helper.c
@@ -175,6 +175,9 @@ void fuse_cmdline_help(void)
"-o xattrmap=  Enable xattr mapping (enables 
xattr)\n"
"is a string consists of a 
series of rules\n"
"   e.g. -o 
xattrmap=:map::user.virtiofs.:\n"
+   "-o block_xattr=Block xattrs specified in list\n"
+   "is colon separated list of 
xattrs to block\n"
+   "   e.g. -o 
block_xattr=security.selinux:user.*\n"
"-o modcaps=CAPLIST Modify the list of capabilities\n"
"   e.g. -o modcaps=+sys_admin:-chown\n"
"--rlimit-nofile=  set maximum number of file 
descriptors\n"
diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 9e93bcdbb3..2008e6be55 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -142,6 +142,12 @@ typedef struct xattr_map_entry {
 unsigned int flags;
 } XattrMapEntry;
 
+struct xattr_block_entry {
+/* true if name is prefix otherwise false */
+bool prefix;
+char *name;
+};
+
 struct lo_data {
 pthread_mutex_t mutex;
 int sandbox;
@@ -176,8 +182,9 @@ struct lo_data {
 /* If set, virtiofsd is responsible for setting umask during creation */
 bool change_umask;
 int user_posix_acl, posix_acl;
-char **blocked_xattrs;
+struct xattr_block_entry *blocked_xattrs;
 size_t num_blocked_xattrs;
+char *block_xattr_str;
 };
 
 static const struct fuse_opt lo_opts[] = {
@@ -212,6 +219,7 @@ static const struct fuse_opt lo_opts[] = {
 { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 },
 { "posix_acl", offsetof(struct lo_data, user_posix_acl), 1 },
 { "no_posix_acl", offsetof(struct lo_data, user_posix_acl), 0 },
+{ "block_xattr=%s", offsetof(struct lo_data, block_xattr_str), 0 },
 FUSE_OPT_END
 };
 static bool use_syslog = false;
@@ -2817,23 +2825,88 @@ static int xattr_map_server(const struct lo_data *lo, 
const char *server_name,
 static int add_blocked_xattr(struct lo_data *lo, const char *name)
 {
 size_t nr_elems = lo->num_blocked_xattrs + 1;
+struct xattr_block_entry *xbe;
+char *ptr;
 
 lo->blocked_xattrs = reallocarray(lo->blocked_xattrs, nr_elems,
-  sizeof(char *));
+  sizeof(struct xattr_block_entry));
 if (!lo->blocked_xattrs) {
 fuse_log(FUSE_LOG_ERR, "failed to grow blocked xattrs array: %m\n");
 return 

[PATCH v2 28/43] bsd-user: Add system independent stack, data and text limiting

2021-08-26 Thread imp
From: Warner Losh 

Eliminate the x86 specific stack stuff in favor of more generic control
over the process size:
target_maxtsiz  max text size
target_dfldsiz  initial data size limit
target_maxdsiz  max data size
target_dflssiz  initial stack size limit
target_maxssiz  max stack size
target_sgrowsiz amount to grow stack
These can be set on a per-arch basis, and the stack size can be set
on the command line. Adjust the stack size parameters at startup.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/elfload.c |  2 +-
 bsd-user/main.c| 51 +-
 bsd-user/qemu.h|  7 ++-
 3 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 70a0f81f3d..5ceb60b1c2 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -204,7 +204,7 @@ static abi_ulong setup_arg_pages(abi_ulong p, struct 
bsd_binprm *bprm,
 /* Create enough stack to hold everything.  If we don't use
  * it for args, we'll use it for something else...
  */
-size = x86_stack_size;
+size = target_dflssiz;
 if (size < MAX_ARG_PAGES * TARGET_PAGE_SIZE)
 size = MAX_ARG_PAGES * TARGET_PAGE_SIZE;
 error = target_mmap(0,
diff --git a/bsd-user/main.c b/bsd-user/main.c
index f16d6c80e2..f6643896f6 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -18,6 +18,11 @@
  *  along with this program; if not, see .
  */
 
+#include 
+#include 
+#include 
+#include 
+
 #include "qemu/osdep.h"
 #include "qemu-common.h"
 #include "qemu/units.h"
@@ -44,8 +49,6 @@
 #include "host-os.h"
 #include "target_arch_cpu.h"
 
-#include 
-
 int singlestep;
 unsigned long mmap_min_addr;
 uintptr_t guest_base;
@@ -57,12 +60,12 @@ const char *qemu_uname_release;
 enum BSDType bsd_type;
 char qemu_proc_pathname[PATH_MAX];  /* full path to exeutable */
 
-/*
- * XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
- * we allocate a bigger stack. Need a better solution, for example
- * by remapping the process stack directly at the right place
- */
-unsigned long x86_stack_size = 512 * 1024;
+unsigned long target_maxtsiz = TARGET_MAXTSIZ;   /* max text size */
+unsigned long target_dfldsiz = TARGET_DFLDSIZ;   /* initial data size limit */
+unsigned long target_maxdsiz = TARGET_MAXDSIZ;   /* max data size */
+unsigned long target_dflssiz = TARGET_DFLSSIZ;   /* initial data size limit */
+unsigned long target_maxssiz = TARGET_MAXSSIZ;   /* max stack size */
+unsigned long target_sgrowsiz = TARGET_SGROWSIZ; /* amount to grow stack */
 
 void gemu_log(const char *fmt, ...)
 {
@@ -112,7 +115,6 @@ static void usage(void)
"-d item1[,...]enable logging of specified items\n"
"  (use '-d help' for a list of log items)\n"
"-D logfilewrite logs to 'logfile' (default stderr)\n"
-   "-p pagesize   set the host page size to 'pagesize'\n"
"-singlestep   always run in singlestep mode\n"
"-strace   log system calls\n"
"-trace
[[enable=]][,events=][,file=]\n"
@@ -132,7 +134,7 @@ static void usage(void)
,
TARGET_NAME,
interp_prefix,
-   x86_stack_size);
+   target_dflssiz);
 exit(1);
 }
 
@@ -161,6 +163,22 @@ void init_task_state(TaskState *ts)
 ts->sigqueue_table[i].next = NULL;
 }
 
+static void
+adjust_ssize(void)
+{
+struct rlimit rl;
+
+if (getrlimit(RLIMIT_STACK, ) != 0)
+return;
+
+target_maxssiz = MIN(target_maxssiz, rl.rlim_max);
+target_dflssiz = MIN(MAX(target_dflssiz, rl.rlim_cur), target_maxssiz);
+
+rl.rlim_max = target_maxssiz;
+rl.rlim_cur = target_dflssiz;
+setrlimit(RLIMIT_STACK, );
+}
+
 static void save_proc_pathname(char *argv0)
 {
 int mib[4];
@@ -197,6 +215,8 @@ int main(int argc, char **argv)
 envlist_t *envlist = NULL;
 bsd_type = HOST_DEFAULT_BSD_TYPE;
 
+adjust_ssize();
+
 if (argc <= 1) {
 usage();
 }
@@ -257,14 +277,17 @@ int main(int argc, char **argv)
 }
 } else if (!strcmp(r, "s")) {
 r = argv[optind++];
-rv = qemu_strtoul(r, , 0, _stack_size);
-if (rv < 0 || x86_stack_size <= 0) {
+rv = qemu_strtoul(r, , 0, _dflssiz);
+if (rv < 0 || target_dflssiz <= 0) {
 usage();
 }
 if (*r == 'M') {
-x86_stack_size *= MiB;
+target_dflssiz *= 1024 * 1024;
 } else if (*r == 'k' || *r == 'K') {
-x86_stack_size *= KiB;
+target_dflssiz *= 1024;
+}
+if (target_dflssiz > target_maxssiz) {
+usage();
 }
 } else if (!strcmp(r, "L")) {
 interp_prefix = argv[optind++];
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 

[PATCH v2 24/43] bsd-user: Create target specific vmparam.h

2021-08-26 Thread imp
From: Warner Losh 

Target specific values for vm parameters and details.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/freebsd/target_os_vmparam.h  | 38 ++
 bsd-user/i386/target_arch_vmparam.h   | 46 +++
 bsd-user/qemu.h   |  1 +
 bsd-user/x86_64/target_arch_vmparam.h | 46 +++
 4 files changed, 131 insertions(+)
 create mode 100644 bsd-user/freebsd/target_os_vmparam.h
 create mode 100644 bsd-user/i386/target_arch_vmparam.h
 create mode 100644 bsd-user/x86_64/target_arch_vmparam.h

diff --git a/bsd-user/freebsd/target_os_vmparam.h 
b/bsd-user/freebsd/target_os_vmparam.h
new file mode 100644
index 00..990300c619
--- /dev/null
+++ b/bsd-user/freebsd/target_os_vmparam.h
@@ -0,0 +1,38 @@
+/*
+ *  FreeBSD VM parameters definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+#ifndef _TARGET_OS_VMPARAM_H_
+#define _TARGET_OS_VMPARAM_H_
+
+#include "target_arch_vmparam.h"
+
+/* Compare to sys/exec.h */
+struct target_ps_strings {
+abi_ulong ps_argvstr;
+uint32_t ps_nargvstr;
+abi_ulong ps_envstr;
+uint32_t ps_nenvstr;
+};
+
+extern abi_ulong target_stkbas;
+extern abi_ulong target_stksiz;
+
+#define TARGET_PS_STRINGS  ((target_stkbas + target_stksiz) - \
+sizeof(struct target_ps_strings))
+
+#endif /* !TARGET_OS_VMPARAM_H_ */
diff --git a/bsd-user/i386/target_arch_vmparam.h 
b/bsd-user/i386/target_arch_vmparam.h
new file mode 100644
index 00..bb7718265b
--- /dev/null
+++ b/bsd-user/i386/target_arch_vmparam.h
@@ -0,0 +1,46 @@
+/*
+ *  i386 VM parameters definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+#ifndef _TARGET_ARCH_VMPARAM_H_
+#define _TARGET_ARCH_VMPARAM_H_
+
+#include "cpu.h"
+
+/* compare to i386/include/vmparam.h */
+#define TARGET_MAXTSIZ  (128 * MiB) /* max text size */
+#define TARGET_DFLDSIZ  (128 * MiB) /* initial data size limit */
+#define TARGET_MAXDSIZ  (512 * MiB) /* max data size */
+#define TARGET_DFLSSIZ  (8 * MiB)   /* initial stack size limit */
+#define TARGET_MAXSSIZ  (64 * MiB)  /* max stack size */
+#define TARGET_SGROWSIZ (128 * KiB) /* amount to grow stack */
+
+#define TARGET_RESERVED_VA 0xf700
+
+#define TARGET_USRSTACK (0xbfc0)
+
+static inline abi_ulong get_sp_from_cpustate(CPUX86State *state)
+{
+return state->regs[R_ESP];
+}
+
+static inline void set_second_rval(CPUX86State *state, abi_ulong retval2)
+{
+state->regs[R_EDX] = retval2;
+}
+
+#endif /* !_TARGET_ARCH_VMPARAM_H_ */
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index fea1a167e4..1b37757e06 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -41,6 +41,7 @@ extern enum BSDType bsd_type;
 #include "target_arch.h"
 #include "syscall_defs.h"
 #include "target_syscall.h"
+#include "target_os_vmparam.h"
 #include "exec/gdbstub.h"
 
 /*
diff --git a/bsd-user/x86_64/target_arch_vmparam.h 
b/bsd-user/x86_64/target_arch_vmparam.h
new file mode 100644
index 00..81a915f2e5
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_vmparam.h
@@ -0,0 +1,46 @@
+/*
+ *  Intel x86_64 VM parameters definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; 

[PATCH v2 18/43] bsd-user: save the path to the qemu emulator

2021-08-26 Thread imp
From: Warner Losh 

Save the path to the qemu emulator. This will be used later when we have
a more complete implementation of exec.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Acked-by: Richard Henderson 
---
 bsd-user/main.c | 21 +
 bsd-user/qemu.h |  1 +
 2 files changed, 22 insertions(+)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index 3a23e63cf8..f7e1df5da5 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -43,6 +43,8 @@
 
 #include "host-os.h"
 
+#include 
+
 int singlestep;
 unsigned long mmap_min_addr;
 uintptr_t guest_base;
@@ -52,6 +54,7 @@ unsigned long reserved_va;
 static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
 const char *qemu_uname_release;
 enum BSDType bsd_type;
+char qemu_proc_pathname[PATH_MAX];  /* full path to exeutable */
 
 /*
  * XXX: on x86 MAP_GROWSDOWN only works if ESP <= address + 32, so
@@ -336,6 +339,22 @@ void init_task_state(TaskState *ts)
 ts->sigqueue_table[i].next = NULL;
 }
 
+static void save_proc_pathname(char *argv0)
+{
+int mib[4];
+size_t len;
+
+mib[0] = CTL_KERN;
+mib[1] = KERN_PROC;
+mib[2] = KERN_PROC_PATHNAME;
+mib[3] = -1;
+
+len = sizeof(qemu_proc_pathname);
+if (sysctl(mib, 4, qemu_proc_pathname, , NULL, 0)) {
+perror("sysctl");
+}
+}
+
 int main(int argc, char **argv)
 {
 const char *filename;
@@ -360,6 +379,8 @@ int main(int argc, char **argv)
 usage();
 }
 
+save_proc_pathname(argv[0]);
+
 error_init(argv[0]);
 module_call_init(MODULE_INIT_TRACE);
 qemu_init_cpu_list();
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index cf248ad3df..6c4ec61d76 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -207,6 +207,7 @@ void mmap_fork_start(void);
 void mmap_fork_end(int child);
 
 /* main.c */
+extern char qemu_proc_pathname[];
 extern unsigned long x86_stack_size;
 
 /* user access */
-- 
2.32.0




[PATCH 1/2] virtiofsd: Add an array to keep track of blocked xattrs

2021-08-26 Thread Vivek Goyal
Right now we have capability to block "system.posix_acl_access" and
"system.posix_acl_default" xattrs. But we have sort of hardcoded these
two values and its not generic.

Now we want to support blocking of arbitrary xattr as passed in
by user. So let us keep an array of blocked xattrs and consult
that array when deciding whether an attribute is blocked or not.

This should not result any functional change.

Signed-off-by: Vivek Goyal 
---
 tools/virtiofsd/passthrough_ll.c | 77 ++--
 1 file changed, 63 insertions(+), 14 deletions(-)

diff --git a/tools/virtiofsd/passthrough_ll.c b/tools/virtiofsd/passthrough_ll.c
index 38b2af8599..9e93bcdbb3 100644
--- a/tools/virtiofsd/passthrough_ll.c
+++ b/tools/virtiofsd/passthrough_ll.c
@@ -176,6 +176,8 @@ struct lo_data {
 /* If set, virtiofsd is responsible for setting umask during creation */
 bool change_umask;
 int user_posix_acl, posix_acl;
+char **blocked_xattrs;
+size_t num_blocked_xattrs;
 };
 
 static const struct fuse_opt lo_opts[] = {
@@ -2811,19 +2813,57 @@ static int xattr_map_server(const struct lo_data *lo, 
const char *server_name,
 assert(fchdir_res == 0);   \
 } while (0)
 
+/* Returns 0 on success, 1 on failure */
+static int add_blocked_xattr(struct lo_data *lo, const char *name)
+{
+size_t nr_elems = lo->num_blocked_xattrs + 1;
+
+lo->blocked_xattrs = reallocarray(lo->blocked_xattrs, nr_elems,
+  sizeof(char *));
+if (!lo->blocked_xattrs) {
+fuse_log(FUSE_LOG_ERR, "failed to grow blocked xattrs array: %m\n");
+return 1;
+}
+
+lo->blocked_xattrs[nr_elems - 1] = strdup(name);
+if (!lo->blocked_xattrs[nr_elems - 1]) {
+fuse_log(FUSE_LOG_ERR, "strdup(%s) failed: %m\n", name);
+return 1;
+}
+lo->num_blocked_xattrs++;
+return 0;
+}
+
+static void free_blocked_xattrs(struct lo_data *lo)
+{
+size_t i;
+
+if (!lo->num_blocked_xattrs) {
+return;
+}
+
+for (i = 0; i < lo->num_blocked_xattrs; i++) {
+free(lo->blocked_xattrs[i]);
+}
+
+free(lo->blocked_xattrs);
+lo->num_blocked_xattrs = 0;
+lo->blocked_xattrs = NULL;
+}
+
 static bool block_xattr(struct lo_data *lo, const char *name)
 {
-/*
- * If user explicitly enabled posix_acl or did not provide any option,
- * do not block acl. Otherwise block system.posix_acl_access and
- * system.posix_acl_default xattrs.
- */
-if (lo->user_posix_acl) {
+size_t i;
+
+if (!lo->num_blocked_xattrs) {
 return false;
 }
-if (!strcmp(name, "system.posix_acl_access") ||
-!strcmp(name, "system.posix_acl_default"))
+
+for (i = 0; i < lo->num_blocked_xattrs; i++) {
+if (!strcmp(name, lo->blocked_xattrs[i])) {
 return true;
+}
+}
 
 return false;
 }
@@ -2840,12 +2880,7 @@ static int remove_blocked_xattrs(struct lo_data *lo, 
char *xattr_list,
 {
 size_t out_index, in_index;
 
-/*
- * As of now we only filter out acl xattrs. If acls are enabled or
- * they have not been explicitly disabled, there is nothing to
- * filter.
- */
-if (lo->user_posix_acl) {
+if (!lo->num_blocked_xattrs) {
 return in_size;
 }
 
@@ -3880,6 +3915,7 @@ static void fuse_lo_data_cleanup(struct lo_data *lo)
 free(lo->xattrmap);
 free_xattrmap(lo);
 free(lo->xattr_security_capability);
+free_blocked_xattrs(lo);
 free(lo->source);
 }
 
@@ -3920,6 +3956,8 @@ int main(int argc, char *argv[])
 lo.root.fd = -1;
 lo.root.fuse_ino = FUSE_ROOT_ID;
 lo.cache = CACHE_AUTO;
+lo.num_blocked_xattrs = 0;
+lo.blocked_xattrs = NULL;
 
 /*
  * Set up the ino map like this:
@@ -4036,6 +4074,17 @@ int main(int argc, char *argv[])
 exit(1);
 }
 
+if (!lo.user_posix_acl) {
+/* User disabled posix acl explicitly. Block acl xattrs */
+if (add_blocked_xattr(, "system.posix_acl_access")) {
+exit(1);
+}
+
+if (add_blocked_xattr(, "system.posix_acl_default")) {
+exit(1);
+}
+}
+
 lo.use_statx = true;
 
 se = fuse_session_new(, _oper, sizeof(lo_oper), );
-- 
2.31.1




[PATCH v2 17/43] bsd-user: Include host-os.h from main

2021-08-26 Thread imp
From: Warner Losh 

Include host-os.h from main.c to pick up the default OS to emulate.  Set
that default in main().

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/freebsd/host-os.h | 2 ++
 bsd-user/main.c| 4 +++-
 bsd-user/netbsd/host-os.h  | 2 ++
 bsd-user/openbsd/host-os.h | 2 ++
 4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/bsd-user/freebsd/host-os.h b/bsd-user/freebsd/host-os.h
index a799164324..ceb1543d06 100644
--- a/bsd-user/freebsd/host-os.h
+++ b/bsd-user/freebsd/host-os.h
@@ -20,4 +20,6 @@
 #ifndef __HOST_OS_H_
 #define __HOST_OS_H_
 
+#define HOST_DEFAULT_BSD_TYPE target_freebsd
+
 #endif /*!__HOST_OS_H_ */
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 3f6f4080e8..3a23e63cf8 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -41,6 +41,8 @@
 #include "exec/log.h"
 #include "trace/control.h"
 
+#include "host-os.h"
+
 int singlestep;
 unsigned long mmap_min_addr;
 uintptr_t guest_base;
@@ -352,7 +354,7 @@ int main(int argc, char **argv)
 const char *gdbstub = NULL;
 char **target_environ, **wrk;
 envlist_t *envlist = NULL;
-bsd_type = target_openbsd;
+bsd_type = HOST_DEFAULT_BSD_TYPE;
 
 if (argc <= 1) {
 usage();
diff --git a/bsd-user/netbsd/host-os.h b/bsd-user/netbsd/host-os.h
index b44cb7fdda..ccbea076e6 100644
--- a/bsd-user/netbsd/host-os.h
+++ b/bsd-user/netbsd/host-os.h
@@ -20,4 +20,6 @@
 #ifndef __HOST_OS_H_
 #define __HOST_OS_H_
 
+#define HOST_DEFAULT_BSD_TYPE target_netbsd
+
 #endif /*!__HOST_OS_H_ */
diff --git a/bsd-user/openbsd/host-os.h b/bsd-user/openbsd/host-os.h
index 9083555f26..79468073e4 100644
--- a/bsd-user/openbsd/host-os.h
+++ b/bsd-user/openbsd/host-os.h
@@ -20,4 +20,6 @@
 #ifndef __HOST_OS_H_
 #define __HOST_OS_H_
 
+#define HOST_DEFAULT_BSD_TYPE target_openbsd
+
 #endif /*!__HOST_OS_H_ */
-- 
2.32.0




[PATCH v2 43/43] bsd-user: Update mapping to handle reserved and starting conditions

2021-08-26 Thread imp
From: Warner Losh 

Update the reserved base based on what platform we're on, as well as the
start of the mmap range. Update routines that find va ranges to interact
with the reserved ranges as well as properly align the mapping (this is
especially important for targets whose page size does not match the
host's). Loop where appropriate when the initial address space offered
by mmap does not meet the contraints.

This has 18e80c55bb6 from linux-user folded in to the upstream
bsd-user code as well.

Signed-off-by: Mikaël Urankar 
Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Acked-by: Richard Henderson 
---
 bsd-user/main.c |  41 +-
 bsd-user/mmap.c | 372 
 bsd-user/qemu.h |   5 +-
 3 files changed, 353 insertions(+), 65 deletions(-)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index ae25f4c773..aa3cb01c7d 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -50,12 +50,47 @@
 #include "target_arch_cpu.h"
 
 int singlestep;
-unsigned long mmap_min_addr;
 uintptr_t guest_base;
 static const char *cpu_model;
 static const char *cpu_type;
 bool have_guest_base;
+/*
+ * When running 32-on-64 we should make sure we can fit all of the possible
+ * guest address space into a contiguous chunk of virtual host memory.
+ *
+ * This way we will never overlap with our own libraries or binaries or stack
+ * or anything else that QEMU maps.
+ *
+ * Many cpus reserve the high bit (or more than one for some 64-bit cpus)
+ * of the address for the kernel.  Some cpus rely on this and user space
+ * uses the high bit(s) for pointer tagging and the like.  For them, we
+ * must preserve the expected address space.
+ */
+#ifndef MAX_RESERVED_VA
+# if HOST_LONG_BITS > TARGET_VIRT_ADDR_SPACE_BITS
+#  if TARGET_VIRT_ADDR_SPACE_BITS == 32 && \
+  (TARGET_LONG_BITS == 32 || defined(TARGET_ABI32))
+/* There are a number of places where we assign reserved_va to a variable
+   of type abi_ulong and expect it to fit.  Avoid the last page.  */
+#   define MAX_RESERVED_VA  (0xul & TARGET_PAGE_MASK)
+#  else
+#   define MAX_RESERVED_VA  (1ul << TARGET_VIRT_ADDR_SPACE_BITS)
+#  endif
+# else
+#  define MAX_RESERVED_VA  0
+# endif
+#endif
+
+/*
+ * That said, reserving *too* much vm space via mmap can run into problems
+ * with rlimits, oom due to page table creation, etc.  We will still try it,
+ * if directed by the command-line option, but not by default.
+ */
+#if HOST_LONG_BITS == 64 && TARGET_VIRT_ADDR_SPACE_BITS <= 32
+unsigned long reserved_va = MAX_RESERVED_VA;
+#else
 unsigned long reserved_va;
+#endif
 
 static const char *interp_prefix = CONFIG_QEMU_INTERP_PREFIX;
 const char *qemu_uname_release;
@@ -461,6 +496,10 @@ int main(int argc, char **argv)
 target_environ = envlist_to_environ(envlist, NULL);
 envlist_free(envlist);
 
+if (reserved_va) {
+mmap_next_start = reserved_va;
+}
+
 /*
  * Now that page sizes are configured we can do
  * proper page alignment for guest_base.
diff --git a/bsd-user/mmap.c b/bsd-user/mmap.c
index 1ef4d271a8..b2fdb94dfd 100644
--- a/bsd-user/mmap.c
+++ b/bsd-user/mmap.c
@@ -188,64 +188,191 @@ static int mmap_frag(abi_ulong real_start,
 return 0;
 }
 
-static abi_ulong mmap_next_start = 0x4000;
+#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
+# define TASK_UNMAPPED_BASE  (1ul << 38)
+#else
+# define TASK_UNMAPPED_BASE  0x4000
+#endif
+abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
 
 unsigned long last_brk;
 
-/* find a free memory area of size 'size'. The search starts at
-   'start'. If 'start' == 0, then a default start address is used.
-   Return -1 if error.
-*/
-/* page_init() marks pages used by the host as reserved to be sure not
-   to use them. */
-static abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size)
+/* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
+   of guest address space.  */
+static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size, 
abi_ulong alignment)
 {
-abi_ulong addr, addr1, addr_start;
+abi_ulong addr;
+abi_ulong end_addr;
 int prot;
-unsigned long new_brk;
-
-new_brk = (unsigned long)sbrk(0);
-if (last_brk && last_brk < new_brk && last_brk == (target_ulong)last_brk) {
-/* This is a hack to catch the host allocating memory with brk().
-   If it uses mmap then we loose.
-   FIXME: We really want to avoid the host allocating memory in
-   the first place, and maybe leave some slack to avoid switching
-   to mmap.  */
-page_set_flags(last_brk & TARGET_PAGE_MASK,
-   TARGET_PAGE_ALIGN(new_brk),
-   PAGE_RESERVED);
+int looped = 0;
+
+if (size > reserved_va) {
+return (abi_ulong)-1;
+}
+
+size = HOST_PAGE_ALIGN(size) + alignment;
+end_addr = start + size;
+if (end_addr > reserved_va) {
+end_addr = reserved_va;
+}
+addr = end_addr - 

[PATCH v2 20/43] bsd-user: Move per-cpu code into target_arch_cpu.h

2021-08-26 Thread imp
From: Warner Losh 

Move cpu_loop() into target_cpu_loop(), and put that in
target_arch_cpu.h for each architecture.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
---
 bsd-user/i386/target_arch_cpu.c   |   1 +
 bsd-user/i386/target_arch_cpu.h   | 208 ++
 bsd-user/main.c   | 343 ++
 bsd-user/qemu.h   |   1 +
 bsd-user/x86_64/target_arch_cpu.c |   1 +
 bsd-user/x86_64/target_arch_cpu.h | 246 +
 6 files changed, 472 insertions(+), 328 deletions(-)
 create mode 100644 bsd-user/i386/target_arch_cpu.h
 create mode 100644 bsd-user/x86_64/target_arch_cpu.h

diff --git a/bsd-user/i386/target_arch_cpu.c b/bsd-user/i386/target_arch_cpu.c
index 7f2f755a11..71998e5ba5 100644
--- a/bsd-user/i386/target_arch_cpu.c
+++ b/bsd-user/i386/target_arch_cpu.c
@@ -1,6 +1,7 @@
 /*
  *  i386 cpu related code
  *
+ * Copyright (c) 2013 Stacey Son 
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
diff --git a/bsd-user/i386/target_arch_cpu.h b/bsd-user/i386/target_arch_cpu.h
new file mode 100644
index 00..dc97e8a617
--- /dev/null
+++ b/bsd-user/i386/target_arch_cpu.h
@@ -0,0 +1,208 @@
+/*
+ *  i386 cpu init and loop
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef _TARGET_ARCH_CPU_H_
+#define _TARGET_ARCH_CPU_H_
+
+#include "target_arch.h"
+
+#define TARGET_DEFAULT_CPU_MODEL "qemu32"
+
+#define TARGET_CPU_RESET(cpu)
+
+static inline void target_cpu_init(CPUX86State *env,
+struct target_pt_regs *regs)
+{
+uint64_t *gdt_table;
+
+env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
+env->hflags |= HF_PE_MASK | HF_CPL_MASK;
+if (env->features[FEAT_1_EDX] & CPUID_SSE) {
+env->cr[4] |= CR4_OSFXSR_MASK;
+env->hflags |= HF_OSFXSR_MASK;
+}
+
+/* flags setup : we activate the IRQs by default as in user mode */
+env->eflags |= IF_MASK;
+
+/* register setup */
+env->regs[R_EAX] = regs->eax;
+env->regs[R_EBX] = regs->ebx;
+env->regs[R_ECX] = regs->ecx;
+env->regs[R_EDX] = regs->edx;
+env->regs[R_ESI] = regs->esi;
+env->regs[R_EDI] = regs->edi;
+env->regs[R_EBP] = regs->ebp;
+env->regs[R_ESP] = regs->esp;
+env->eip = regs->eip;
+
+/* interrupt setup */
+env->idt.limit = 255;
+
+env->idt.base = target_mmap(0, sizeof(uint64_t) * (env->idt.limit + 1),
+PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+bsd_i386_set_idt_base(env->idt.base);
+bsd_i386_set_idt(0, 0);
+bsd_i386_set_idt(1, 0);
+bsd_i386_set_idt(2, 0);
+bsd_i386_set_idt(3, 3);
+bsd_i386_set_idt(4, 3);
+bsd_i386_set_idt(5, 0);
+bsd_i386_set_idt(6, 0);
+bsd_i386_set_idt(7, 0);
+bsd_i386_set_idt(8, 0);
+bsd_i386_set_idt(9, 0);
+bsd_i386_set_idt(10, 0);
+bsd_i386_set_idt(11, 0);
+bsd_i386_set_idt(12, 0);
+bsd_i386_set_idt(13, 0);
+bsd_i386_set_idt(14, 0);
+bsd_i386_set_idt(15, 0);
+bsd_i386_set_idt(16, 0);
+bsd_i386_set_idt(17, 0);
+bsd_i386_set_idt(18, 0);
+bsd_i386_set_idt(19, 0);
+bsd_i386_set_idt(0x80, 3);
+
+/* segment setup */
+env->gdt.base = target_mmap(0, sizeof(uint64_t) * TARGET_GDT_ENTRIES,
+PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+env->gdt.limit = sizeof(uint64_t) * TARGET_GDT_ENTRIES - 1;
+gdt_table = g2h_untagged(env->gdt.base);
+
+bsd_i386_write_dt(_table[__USER_CS >> 3], 0, 0xf,
+DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
+(3 << DESC_DPL_SHIFT) | (0xa << DESC_TYPE_SHIFT));
+
+bsd_i386_write_dt(_table[__USER_DS >> 3], 0, 0xf,
+DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK |
+(3 << DESC_DPL_SHIFT) | (0x2 << DESC_TYPE_SHIFT));
+
+cpu_x86_load_seg(env, R_CS, __USER_CS);
+cpu_x86_load_seg(env, R_SS, __USER_DS);
+cpu_x86_load_seg(env, R_DS, __USER_DS);
+cpu_x86_load_seg(env, R_ES, __USER_DS);
+cpu_x86_load_seg(env, R_FS, __USER_DS);
+cpu_x86_load_seg(env, R_GS, __USER_DS);
+/* This hack makes Wine work... */
+env->segs[R_FS].selector = 0;
+}
+
+static inline void target_cpu_loop(CPUX86State *env)
+{
+CPUState *cs = env_cpu(env);
+int trapnr;
+abi_ulong pc;
+/* 

[PATCH v2 40/43] bsd-user: Implement interlock for atomic operations

2021-08-26 Thread imp
From: Warner Losh 

Implement the internlock in fork_start() and fork_end() to properly cope
with atomic operations and to safely keep state for parent and child
processes.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/main.c | 23 +++
 1 file changed, 23 insertions(+)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index d60a0a7ee2..e2ed9e32ba 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -69,15 +69,38 @@ unsigned long target_dflssiz = TARGET_DFLSSIZ;   /* initial 
data size limit */
 unsigned long target_maxssiz = TARGET_MAXSSIZ;   /* max stack size */
 unsigned long target_sgrowsiz = TARGET_SGROWSIZ; /* amount to grow stack */
 
+/* Helper routines for implementing atomic operations. */
 
 void fork_start(void)
 {
+start_exclusive();
+cpu_list_lock();
+mmap_fork_start();
 }
 
 void fork_end(int child)
 {
 if (child) {
+CPUState *cpu, *next_cpu;
+/*
+ * Child processes created by fork() only have a single thread.
+ * Discard information about the parent threads.
+ */
+CPU_FOREACH_SAFE(cpu, next_cpu) {
+if (cpu != thread_cpu) {
+QTAILQ_REMOVE_RCU(, cpu, node);
+}
+}
+mmap_fork_end(child);
+/* qemu_init_cpu_list() takes care of reinitializing the
+ * exclusive state, so we don't need to end_exclusive() here.
+ */
+qemu_init_cpu_list();
 gdbserver_fork(thread_cpu);
+} else {
+mmap_fork_end(child);
+cpu_list_unlock();
+end_exclusive();
 }
 }
 
-- 
2.32.0




Re: [PATCH v3 3/3] hw/usb/xhci: Always expect 'dma' link property to be set

2021-08-26 Thread Mark Cave-Ayland

On 26/08/2021 21:07, Philippe Mathieu-Daudé wrote:


Simplify by always passing a MemoryRegion property to the device.
Doing so we can move the AddressSpace field to the device struct,
removing need for heap allocation.

Update the MicroVM machine to pass the default system memory instead
of a NULL value.

We don't need to change the Versal machine, as the link property is
initialize as "versal.dwc3_alias" MemoryRegion alias.

Suggested-by: Peter Maydell 
Signed-off-by: Philippe Mathieu-Daudé 
---
Versal untested
---
  hw/usb/hcd-xhci.h|  2 +-
  hw/i386/microvm.c|  2 ++
  hw/usb/hcd-xhci-pci.c|  3 ++-
  hw/usb/hcd-xhci-sysbus.c | 13 ++---
  hw/usb/hcd-xhci.c| 20 ++--
  5 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h
index 98f598382ad..ea76ec4f277 100644
--- a/hw/usb/hcd-xhci.h
+++ b/hw/usb/hcd-xhci.h
@@ -180,7 +180,7 @@ typedef struct XHCIState {
  USBBus bus;
  MemoryRegion mem;
  MemoryRegion *dma_mr;
-AddressSpace *as;
+AddressSpace as;
  MemoryRegion mem_cap;
  MemoryRegion mem_oper;
  MemoryRegion mem_runtime;
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index aba0c832190..2d55114a676 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -219,6 +219,8 @@ static void microvm_devices_init(MicrovmMachineState *mms)
  qdev_prop_set_uint32(dev, "slots", XHCI_MAXSLOTS);
  qdev_prop_set_uint32(dev, "p2", 8);
  qdev_prop_set_uint32(dev, "p3", 8);
+object_property_set_link(OBJECT(dev), "dma",
+ OBJECT(get_system_memory()), _abort);


In a way I could see why you may wish to explicitly set the DMA memory region, but a 
quick look around suggests that devices where the memory region is unspecified 
(typically using a link property called "dma_mr") then the default is assumed to be 
get_system_memory(). That seems a reasonably intuitive default to me, but presumably 
there is another type of mistake you're trying to guard against here?



  sysbus_realize(SYS_BUS_DEVICE(dev), _fatal);
  sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, MICROVM_XHCI_BASE);
  sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0,
diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c
index 24c528d210f..10f5cc374fe 100644
--- a/hw/usb/hcd-xhci-pci.c
+++ b/hw/usb/hcd-xhci-pci.c
@@ -116,6 +116,8 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, 
Error **errp)
  dev->config[0x60] = 0x30; /* release number */
  
  object_property_set_link(OBJECT(>xhci), "host", OBJECT(s), _abort);

+object_property_set_link(OBJECT(dev), "dma",
+ OBJECT(pci_dma_memory_region(dev)), _abort);
  s->xhci.intr_update = xhci_pci_intr_update;
  s->xhci.intr_raise = xhci_pci_intr_raise;
  if (!qdev_realize(DEVICE(>xhci), NULL, errp)) {
@@ -161,7 +163,6 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, 
Error **errp)
>xhci.mem, 0, OFF_MSIX_PBA,
0x90, NULL);
  }
-s->xhci.as = pci_get_address_space(dev);
  }
  
  static void usb_xhci_pci_exit(PCIDevice *dev)

diff --git a/hw/usb/hcd-xhci-sysbus.c b/hw/usb/hcd-xhci-sysbus.c
index a14e4381960..f212ce785bd 100644
--- a/hw/usb/hcd-xhci-sysbus.c
+++ b/hw/usb/hcd-xhci-sysbus.c
@@ -36,6 +36,11 @@ static void xhci_sysbus_realize(DeviceState *dev, Error 
**errp)
  {
  XHCISysbusState *s = XHCI_SYSBUS(dev);
  
+if (!s->xhci.dma_mr) {

+error_setg(errp, TYPE_XHCI_SYSBUS " 'dma' link not set");
+return;
+}
+
  object_property_set_link(OBJECT(>xhci), "host", OBJECT(s), NULL);
  if (!qdev_realize(DEVICE(>xhci), NULL, errp)) {
  return;
@@ -43,13 +48,7 @@ static void xhci_sysbus_realize(DeviceState *dev, Error 
**errp)
  s->irq = g_new0(qemu_irq, s->xhci.numintrs);
  qdev_init_gpio_out_named(dev, s->irq, SYSBUS_DEVICE_GPIO_IRQ,
   s->xhci.numintrs);
-if (s->xhci.dma_mr) {
-s->xhci.as =  g_malloc0(sizeof(AddressSpace));
-address_space_init(s->xhci.as, s->xhci.dma_mr, NULL);
-} else {
-s->xhci.as = _space_memory;
-}
-
+address_space_init(>xhci.as, s->xhci.dma_mr, "usb-xhci-dma");
  sysbus_init_mmio(SYS_BUS_DEVICE(dev), >xhci.mem);
  }


My understanding of the patch is that you're trying to avoid the heap allocation 
above (which is a good idea!) so from that perspective all you need is somewhere to 
store the AddressSpace used for the the xhci-sysbus device, for which XHCISysbusState 
would be the natural choice.


It seems to me that the easiest approach is just to set the s->xhci.as pointer in 
xhci_sysbus_realize() in exactly the same as usb_xhci_pci_realize() does:


typedef struct XHCISysbusState {
...
...
AddressSpace as;
} XHCISysbusState

static void xhci_sysbus_realize(DeviceState *dev, Error **errp)
{
XHCISysbusState *s = XHCI_SYSBUS(dev);
...

[PATCH v2 22/43] bsd-user: Include more things in qemu.h

2021-08-26 Thread imp
From: Warner Losh 

Include more header files to match bsd-user fork.

Signed-off-by: Warner Losh 
---
 bsd-user/qemu.h | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 5e4cbb40d4..55d71130bb 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -18,12 +18,12 @@
 #define QEMU_H
 
 
+#include "qemu/osdep.h"
 #include "cpu.h"
 #include "exec/cpu_ldst.h"
+#include "exec/exec-all.h"
 
 #undef DEBUG_REMAP
-#ifdef DEBUG_REMAP
-#endif /* DEBUG_REMAP */
 
 #include "exec/user/abitypes.h"
 
@@ -36,6 +36,8 @@ enum BSDType {
 };
 extern enum BSDType bsd_type;
 
+#include "exec/user/thunk.h"
+#include "target_arch.h"
 #include "syscall_defs.h"
 #include "target_syscall.h"
 #include "exec/gdbstub.h"
-- 
2.32.0




[PATCH v2 14/43] bsd-user: elfload: simplify bswap a bit.

2021-08-26 Thread imp
From: Warner Losh 

Reduce the number of ifdefs by always calling the swapping routine, but
making them empty when swapping isn't needed.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/elfload.c | 97 ++
 1 file changed, 47 insertions(+), 50 deletions(-)

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 9c34e2ffcb..8a6a72bf05 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -67,13 +67,13 @@
 
 static inline void memcpy_fromfs(void *to, const void *from, unsigned long n)
 {
-memcpy(to, from, n);
+memcpy(to, from, n);
 }
 
 #ifdef BSWAP_NEEDED
 static void bswap_ehdr(struct elfhdr *ehdr)
 {
-bswap16s(>e_type);/* Object file type */
+bswap16s(>e_type);/* Object file type */
 bswap16s(>e_machine); /* Architecture */
 bswap32s(>e_version); /* Object file version */
 bswaptls(>e_entry);   /* Entry point virtual address */
@@ -81,37 +81,45 @@ static void bswap_ehdr(struct elfhdr *ehdr)
 bswaptls(>e_shoff);   /* Section header table file offset */
 bswap32s(>e_flags);   /* Processor-specific flags */
 bswap16s(>e_ehsize);  /* ELF header size in bytes */
-bswap16s(>e_phentsize);   /* Program header table entry 
size */
+bswap16s(>e_phentsize);   /* Program header table entry size */
 bswap16s(>e_phnum);   /* Program header table entry count */
-bswap16s(>e_shentsize);   /* Section header table entry 
size */
+bswap16s(>e_shentsize);   /* Section header table entry size */
 bswap16s(>e_shnum);   /* Section header table entry count */
-bswap16s(>e_shstrndx);/* Section header string table 
index */
+bswap16s(>e_shstrndx);/* Section header string table index */
 }
 
-static void bswap_phdr(struct elf_phdr *phdr)
+static void bswap_phdr(struct elf_phdr *phdr, int phnum)
 {
-bswap32s(>p_type);/* Segment type */
-bswaptls(>p_offset);  /* Segment file offset */
-bswaptls(>p_vaddr);   /* Segment virtual address */
-bswaptls(>p_paddr);   /* Segment physical address */
-bswaptls(>p_filesz);  /* Segment size in file */
-bswaptls(>p_memsz);   /* Segment size in memory */
-bswap32s(>p_flags);   /* Segment flags */
-bswaptls(>p_align);   /* Segment alignment */
+int i;
+
+for (i = 0; i < phnum; i++, phdr++) {
+bswap32s(>p_type);/* Segment type */
+bswap32s(>p_flags);   /* Segment flags */
+bswaptls(>p_offset);  /* Segment file offset */
+bswaptls(>p_vaddr);   /* Segment virtual address */
+bswaptls(>p_paddr);   /* Segment physical address */
+bswaptls(>p_filesz);  /* Segment size in file */
+bswaptls(>p_memsz);   /* Segment size in memory */
+bswaptls(>p_align);   /* Segment alignment */
+}
 }
 
-static void bswap_shdr(struct elf_shdr *shdr)
+static void bswap_shdr(struct elf_shdr *shdr, int shnum)
 {
-bswap32s(>sh_name);
-bswap32s(>sh_type);
-bswaptls(>sh_flags);
-bswaptls(>sh_addr);
-bswaptls(>sh_offset);
-bswaptls(>sh_size);
-bswap32s(>sh_link);
-bswap32s(>sh_info);
-bswaptls(>sh_addralign);
-bswaptls(>sh_entsize);
+int i;
+
+for (i = 0; i < shnum; i++, shdr++) {
+bswap32s(>sh_name);
+bswap32s(>sh_type);
+bswaptls(>sh_flags);
+bswaptls(>sh_addr);
+bswaptls(>sh_offset);
+bswaptls(>sh_size);
+bswap32s(>sh_link);
+bswap32s(>sh_info);
+bswaptls(>sh_addralign);
+bswaptls(>sh_entsize);
+}
 }
 
 static void bswap_sym(struct elf_sym *sym)
@@ -121,7 +129,15 @@ static void bswap_sym(struct elf_sym *sym)
 bswaptls(>st_size);
 bswap16s(>st_shndx);
 }
-#endif
+
+#else /* ! BSWAP_NEEDED */
+
+static void bswap_ehdr(struct elfhdr *ehdr) { }
+static void bswap_phdr(struct elf_phdr *phdr, int phnum) { }
+static void bswap_shdr(struct elf_shdr *shdr, int shnum) { }
+static void bswap_sym(struct elf_sym *sym) { }
+
+#endif /* ! BSWAP_NEEDED */
 
 /*
  * 'copy_elf_strings()' copies argument/envelope strings from user
@@ -367,9 +383,7 @@ static abi_ulong load_elf_interp(struct elfhdr 
*interp_elf_ex,
 last_bss = 0;
 error = 0;
 
-#ifdef BSWAP_NEEDED
 bswap_ehdr(interp_elf_ex);
-#endif
 /* First of all, some simple consistency checks */
 if ((interp_elf_ex->e_type != ET_EXEC &&
  interp_elf_ex->e_type != ET_DYN) ||
@@ -410,12 +424,7 @@ static abi_ulong load_elf_interp(struct elfhdr 
*interp_elf_ex,
 free(elf_phdata);
 return retval;
 }
-#ifdef BSWAP_NEEDED
-eppnt = elf_phdata;
-for (i = 0; ie_phnum; i++, eppnt++) {
-bswap_phdr(eppnt);
-}
-#endif
+bswap_phdr(elf_phdata, interp_elf_ex->e_phnum);
 
 if 

[PATCH v2 13/43] bsd-user: TARGET_NGROUPS unused in this file, remove

2021-08-26 Thread imp
From: Warner Losh 

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsdload.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c
index 6aefc7a28b..5b3c061a45 100644
--- a/bsd-user/bsdload.c
+++ b/bsd-user/bsdload.c
@@ -19,8 +19,6 @@
 
 #include "qemu.h"
 
-#define TARGET_NGROUPS 32
-
 /* ??? This should really be somewhere else.  */
 abi_long memcpy_to_target(abi_ulong dest, const void *src,
   unsigned long len)
-- 
2.32.0




[PATCH v2 36/43] bsd-user: add stubbed out core dump support

2021-08-26 Thread imp
From: Warner Losh 

Add a stubbed-out version of the bsd-user fork's core dump support. This
allows elfload.c to be almost the same between what's upstream and
what's in qemu-project upstream w/o the burden of reviewing the core
dump support.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
---
 bsd-user/elfcore.c | 10 ++
 bsd-user/elfload.c | 22 --
 bsd-user/qemu.h|  6 ++
 3 files changed, 36 insertions(+), 2 deletions(-)
 create mode 100644 bsd-user/elfcore.c

diff --git a/bsd-user/elfcore.c b/bsd-user/elfcore.c
new file mode 100644
index 00..c49d9280e2
--- /dev/null
+++ b/bsd-user/elfcore.c
@@ -0,0 +1,10 @@
+/* Stubbed out version of core dump support, explicitly in public domain */
+
+static int elf_core_dump(int signr, CPUArchState *env)
+{
+struct elf_note en = { 0 };
+
+bswap_note();
+
+return 0;
+}
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index c0787a4e52..7632277d87 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -26,15 +26,17 @@
 static abi_ulong target_auxents;   /* Where the AUX entries are in target */
 static size_t target_auxents_sz;   /* Size of AUX entries including AT_NULL */
 
+#include "target_arch_reg.h"
 #include "target_os_elf.h"
 #include "target_os_stack.h"
 #include "target_os_thread.h"
-
-#include "elf.h"
+#include "target_os_user.h"
 
 abi_ulong target_stksiz;
 abi_ulong target_stkbas;
 
+static int elf_core_dump(int signr, CPUArchState *env);
+
 static inline void memcpy_fromfs(void *to, const void *from, unsigned long n)
 {
 memcpy(to, from, n);
@@ -100,15 +102,25 @@ static void bswap_sym(struct elf_sym *sym)
 bswap16s(>st_shndx);
 }
 
+static void bswap_note(struct elf_note *en)
+{
+bswap32s(>n_namesz);
+bswap32s(>n_descsz);
+bswap32s(>n_type);
+}
+
 #else /* ! BSWAP_NEEDED */
 
 static void bswap_ehdr(struct elfhdr *ehdr) { }
 static void bswap_phdr(struct elf_phdr *phdr, int phnum) { }
 static void bswap_shdr(struct elf_shdr *shdr, int shnum) { }
 static void bswap_sym(struct elf_sym *sym) { }
+static void bswap_note(struct elf_note *en) { }
 
 #endif /* ! BSWAP_NEEDED */
 
+#include "elfcore.c"
+
 /*
  * 'copy_elf_strings()' copies argument/envelope strings from user
  * memory to free pages in kernel mem. These are in a format ready
@@ -833,6 +845,12 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 
 info->entry = elf_entry;
 
+#ifdef USE_ELF_CORE_DUMP
+bprm->core_dump = _core_dump;
+#else
+bprm->core_dump = NULL;
+#endif
+
 return 0;
 }
 
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 3685156123..8d20554688 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -51,6 +51,7 @@ extern enum BSDType bsd_type;
  * kernel
  */
 struct image_info {
+abi_ulong load_bias;
 abi_ulong load_addr;
 abi_ulong start_code;
 abi_ulong end_code;
@@ -65,6 +66,9 @@ struct image_info {
 abi_ulong entry;
 abi_ulong code_offset;
 abi_ulong data_offset;
+abi_ulong arg_start;
+abi_ulong arg_end;
+uint32_t  elf_flags;
 };
 
 #define MAX_SIGQUEUE_SIZE 1024
@@ -132,6 +136,7 @@ struct bsd_binprm {
 char **envp;
 char *filename; /* (Given) Name of binary */
 char *fullpath; /* Full path of binary */
+int (*core_dump)(int, CPUArchState *);
 };
 
 void do_init_thread(struct target_pt_regs *regs, struct image_info *infop);
@@ -145,6 +150,7 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 struct image_info *info);
 int load_flt_binary(struct bsd_binprm *bprm, struct target_pt_regs *regs,
 struct image_info *info);
+int is_target_elf_binary(int fd);
 
 abi_long memcpy_to_target(abi_ulong dest, const void *src,
   unsigned long len);
-- 
2.32.0




[PATCH v2 41/43] bsd-user: Implement cpu_copy() helper routine

2021-08-26 Thread imp
From: Warner Losh 

cpu_copy shouldbe called when processes are creating new threads. It
copies the current state of the CPU to a new cpu state needed for the
new thread.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Signed-off-by: Justin Hibbits 
Reviewed-by: Richard Henderson 
---
 bsd-user/main.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index e2ed9e32ba..b35bcf4d1e 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -180,6 +180,36 @@ void init_task_state(TaskState *ts)
 ts->sigqueue_table[i].next = NULL;
 }
 
+CPUArchState *cpu_copy(CPUArchState *env)
+{
+CPUState *cpu = env_cpu(env);
+CPUState *new_cpu = cpu_create(cpu_type);
+CPUArchState *new_env = new_cpu->env_ptr;
+CPUBreakpoint *bp;
+CPUWatchpoint *wp;
+
+/* Reset non arch specific state */
+cpu_reset(new_cpu);
+
+memcpy(new_env, env, sizeof(CPUArchState));
+
+/*
+ * Clone all break/watchpoints.
+ * Note: Once we support ptrace with hw-debug register access, make sure
+ * BP_CPU break/watchpoints are handled correctly on clone.
+ */
+QTAILQ_INIT(>breakpoints);
+QTAILQ_INIT(>watchpoints);
+QTAILQ_FOREACH(bp, >breakpoints, entry) {
+cpu_breakpoint_insert(new_cpu, bp->pc, bp->flags, NULL);
+}
+QTAILQ_FOREACH(wp, >watchpoints, entry) {
+cpu_watchpoint_insert(new_cpu, wp->vaddr, wp->len, wp->flags, NULL);
+}
+
+return new_env;
+}
+
 void gemu_log(const char *fmt, ...)
 {
 va_list ap;
-- 
2.32.0




[PATCH v2 15/43] bsd-user: assume pthreads and support of __thread

2021-08-26 Thread imp
From: Warner Losh 

All compilers for some time have supported this. Follow linux-user and
eliminate the #define THREAD and unconditionally insert __thread where
needed.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/main.c |  2 +-
 bsd-user/qemu.h | 10 +-
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index 7e1284c368..3f6f4080e8 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -309,7 +309,7 @@ static void usage(void)
 exit(1);
 }
 
-THREAD CPUState *thread_cpu;
+__thread CPUState *thread_cpu;
 
 bool qemu_cpu_is_self(CPUState *cpu)
 {
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index d1ab58a8eb..cf248ad3df 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -40,12 +40,6 @@ extern enum BSDType bsd_type;
 #include "target_syscall.h"
 #include "exec/gdbstub.h"
 
-#if defined(CONFIG_USE_NPTL)
-#define THREAD __thread
-#else
-#define THREAD
-#endif
-
 /*
  * This struct is used to hold certain information about the image.  Basically,
  * it replicates in user space what would be certain task_struct fields in the
@@ -155,7 +149,7 @@ abi_long do_openbsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 abi_long arg2, abi_long arg3, abi_long arg4,
 abi_long arg5, abi_long arg6);
 void gemu_log(const char *fmt, ...) GCC_FMT_ATTR(1, 2);
-extern THREAD CPUState *thread_cpu;
+extern __thread CPUState *thread_cpu;
 void cpu_loop(CPUArchState *env);
 char *target_strerror(int err);
 int get_osversion(void);
@@ -422,8 +416,6 @@ static inline void *lock_user_string(abi_ulong guest_addr)
 #define unlock_user_struct(host_ptr, guest_addr, copy)  \
 unlock_user(host_ptr, guest_addr, (copy) ? sizeof(*host_ptr) : 0)
 
-#if defined(CONFIG_USE_NPTL)
 #include 
-#endif
 
 #endif /* QEMU_H */
-- 
2.32.0




[PATCH v2 10/43] bsd-user: implement path searching

2021-08-26 Thread imp
From: Warner Losh 

Use the PATH to find the executable given a bare argument. We need to do
this so we can implement mixing native and emulated binaries (e.g.,
execing a x86 native binary from an emulated arm binary to optimize
parts of the build). By finding the binary, we will know how to exec it.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
---
 bsd-user/bsdload.c | 36 +++-
 bsd-user/qemu.h|  3 ++-
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c
index 379015c744..32f7fd5dec 100644
--- a/bsd-user/bsdload.c
+++ b/bsd-user/bsdload.c
@@ -139,21 +139,55 @@ abi_ulong loader_build_argptr(int envc, int argc, 
abi_ulong sp,
 return sp;
 }
 
+static bool is_there(const char *candidate)
+{
+struct stat fin;
+
+/* XXX work around access(2) false positives for superuser */
+if (access(candidate, X_OK) == 0 && stat(candidate, ) == 0 &&
+S_ISREG(fin.st_mode) && (getuid() != 0 ||
+(fin.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0)) {
+return true;
+}
+
+return false;
+}
+
 int loader_exec(const char *filename, char **argv, char **envp,
 struct target_pt_regs *regs, struct image_info *infop,
 struct bsd_binprm *bprm)
 {
+char *path, fullpath[PATH_MAX];
 int retval, i;
 
 bprm->p = TARGET_PAGE_SIZE * MAX_ARG_PAGES;
 for (i = 0; i < MAX_ARG_PAGES; i++) {   /* clear page-table */
 bprm->page[i] = NULL;
 }
-retval = open(filename, O_RDONLY);
+
+if (strchr(filename, '/') != NULL) {
+path = realpath(filename, fullpath);
+if (path == NULL) {
+/* Failed to resolve. */
+return -1;
+}
+if (!is_there(path)) {
+return -1;
+}
+} else {
+path = g_find_program_in_path(filename);
+if (path == NULL) {
+return -1;
+}
+}
+
+retval = open(path, O_RDONLY);
 if (retval < 0) {
+g_free(path);
 return retval;
 }
 
+bprm->fullpath = path;
 bprm->fd = retval;
 bprm->filename = (char *)filename;
 bprm->argc = count(argv);
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index 5237e35f9c..6b601ce4b5 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -124,7 +124,8 @@ struct bsd_binprm {
 int argc, envc;
 char **argv;
 char **envp;
-char *filename; /* Name of binary */
+char *filename; /* (Given) Name of binary */
+char *fullpath; /* Full path of binary */
 };
 
 void do_init_thread(struct target_pt_regs *regs, struct image_info *infop);
-- 
2.32.0




[PATCH v2 26/43] bsd-user: *BSD specific siginfo defintions

2021-08-26 Thread imp
From: Warner Losh 

Add FreeBSD, NetBSD and OpenBSD values for the various signal info types
and defines to decode different signals to discover more information
about the specific signal types.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Acked-by: Richard Henderson 
---
 bsd-user/freebsd/target_os_siginfo.h | 145 +++
 bsd-user/freebsd/target_os_signal.h  |  78 ++
 bsd-user/i386/target_arch_signal.h   |  94 +
 bsd-user/netbsd/target_os_siginfo.h  |  82 +++
 bsd-user/netbsd/target_os_signal.h   |  70 +
 bsd-user/openbsd/target_os_siginfo.h |  82 +++
 bsd-user/openbsd/target_os_signal.h  |  70 +
 bsd-user/qemu.h  |   1 +
 bsd-user/syscall_defs.h  |  10 --
 bsd-user/x86_64/target_arch_signal.h |  94 +
 10 files changed, 716 insertions(+), 10 deletions(-)
 create mode 100644 bsd-user/freebsd/target_os_siginfo.h
 create mode 100644 bsd-user/freebsd/target_os_signal.h
 create mode 100644 bsd-user/i386/target_arch_signal.h
 create mode 100644 bsd-user/netbsd/target_os_siginfo.h
 create mode 100644 bsd-user/netbsd/target_os_signal.h
 create mode 100644 bsd-user/openbsd/target_os_siginfo.h
 create mode 100644 bsd-user/openbsd/target_os_signal.h
 create mode 100644 bsd-user/x86_64/target_arch_signal.h

diff --git a/bsd-user/freebsd/target_os_siginfo.h 
b/bsd-user/freebsd/target_os_siginfo.h
new file mode 100644
index 00..d2b9db659d
--- /dev/null
+++ b/bsd-user/freebsd/target_os_siginfo.h
@@ -0,0 +1,145 @@
+/*
+ *  FreeBSD siginfo related definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+#ifndef _TARGET_OS_SIGINFO_H_
+#define _TARGET_OS_SIGINFO_H_
+
+#define TARGET_NSIG 128
+#define TARGET_NSIG_BPW (sizeof(uint32_t) * 8)
+#define TARGET_NSIG_WORDS   (TARGET_NSIG / TARGET_NSIG_BPW)
+
+/* this struct defines a stack used during syscall handling */
+typedef struct target_sigaltstack {
+abi_longss_sp;
+abi_ulong   ss_size;
+abi_longss_flags;
+} target_stack_t;
+
+typedef struct {
+uint32_t __bits[TARGET_NSIG_WORDS];
+} target_sigset_t;
+
+struct target_sigaction {
+abi_ulong   _sa_handler;
+int32_t sa_flags;
+target_sigset_t sa_mask;
+};
+
+typedef union target_sigval {
+int32_t sival_int;
+abi_ulong sival_ptr;
+int32_t sigval_int;
+abi_ulong sigval_ptr;
+} target_sigval_t;
+
+typedef struct target_siginfo {
+int32_t si_signo;   /* signal number */
+int32_t si_errno;   /* errno association */
+int32_t si_code;/* signal code */
+int32_t si_pid; /* sending process */
+int32_t si_uid; /* sender's ruid */
+int32_t si_status;  /* exit value */
+abi_ulong si_addr;  /* faulting instruction */
+union target_sigval si_value;   /* signal value */
+union {
+struct {
+int32_t _trapno;/* machine specific trap code */
+} _fault;
+
+/* POSIX.1b timers */
+struct {
+int32_t _timerid;
+int32_t _overrun;
+} _timer;
+
+struct {
+int32_t _mqd;
+} _mesgp;
+
+/* SIGPOLL */
+struct {
+int _band;  /* POLL_IN, POLL_OUT, POLL_MSG */
+} _poll;
+
+struct {
+abi_long __spare1__;
+int32_t  __spare2_[7];
+} __spare__;
+} _reason;
+} target_siginfo_t;
+
+struct target_sigevent {
+abi_int sigev_notify;
+abi_int sigev_signo;
+target_sigval_t sigev_value;
+union {
+abi_int _threadid;
+
+/* The kernel (and thus QEMU) never looks at these;
+ * they're only used as part of the ABI between a
+ * userspace program and libc.
+ */
+struct {
+abi_ulong _function;
+abi_ulong _attribute;
+} _sigev_thread;
+abi_ushort _kevent_flags;
+abi_long _pad[8];
+} _sigev_un;
+};
+
+#define target_si_signo si_signo
+#define target_si_code  si_code
+#define target_si_errno si_errno
+#define target_si_addr  si_addr
+
+/* SIGILL si_codes */
+#define TARGET_ILL_ILLOPC   (1) /* Illegal opcode. */
+#define TARGET_ILL_ILLOPN   (2) /* Illegal operand. */
+#define TARGET_ILL_ILLADR   (3) /* Illegal addressing mode. */

[PATCH v2 12/43] bsd-user: remove a.out support

2021-08-26 Thread imp
From: Warner Losh 

Remove still-born a.out support. The BSDs switched from a.out to ELF 20+ years
ago. It's out of scope for bsd-user, and what little support there was would
simply wind up at a not-implemented message. Simplify the whole mess by removing
it entirely. Should future support be required, it would be better to start from
scratch.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsdload.c |   9 +---
 bsd-user/elfload.c | 105 -
 bsd-user/qemu.h|   2 +-
 3 files changed, 21 insertions(+), 95 deletions(-)

diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c
index 32f7fd5dec..6aefc7a28b 100644
--- a/bsd-user/bsdload.c
+++ b/bsd-user/bsdload.c
@@ -98,7 +98,7 @@ static int prepare_binprm(struct bsd_binprm *bprm)
 
 /* Construct the envp and argv tables on the target stack.  */
 abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
-  abi_ulong stringp, int push_ptr)
+  abi_ulong stringp)
 {
 int n = sizeof(abi_ulong);
 abi_ulong envp;
@@ -108,13 +108,6 @@ abi_ulong loader_build_argptr(int envc, int argc, 
abi_ulong sp,
 envp = sp;
 sp -= (argc + 1) * n;
 argv = sp;
-if (push_ptr) {
-/* FIXME - handle put_user() failures */
-sp -= n;
-put_user_ual(envp, sp);
-sp -= n;
-put_user_ual(argv, sp);
-}
 sp -= n;
 /* FIXME - handle put_user() failures */
 put_user_ual(argc, sp);
diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index e950732978..9c34e2ffcb 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -52,25 +52,6 @@
 
 #include "elf.h"
 
-struct exec
-{
-  unsigned int a_info;   /* Use macros N_MAGIC, etc for access */
-  unsigned int a_text;   /* length of text, in bytes */
-  unsigned int a_data;   /* length of data, in bytes */
-  unsigned int a_bss;/* length of uninitialized data area, in bytes */
-  unsigned int a_syms;   /* length of symbol table data in file, in bytes */
-  unsigned int a_entry;  /* start address */
-  unsigned int a_trsize; /* length of relocation info for text, in bytes */
-  unsigned int a_drsize; /* length of relocation info for data, in bytes */
-};
-
-
-#define N_MAGIC(exec) ((exec).a_info & 0x)
-#define OMAGIC 0407
-#define NMAGIC 0410
-#define ZMAGIC 0413
-#define QMAGIC 0314
-
 /* max code+data+bss space allocated to elf interpreter */
 #define INTERP_MAP_SIZE (32 * 1024 * 1024)
 
@@ -82,10 +63,6 @@ struct exec
 #define TARGET_ELF_PAGESTART(_v) ((_v) & ~(unsigned 
long)(TARGET_ELF_EXEC_PAGESIZE - 1))
 #define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE - 1))
 
-#define INTERPRETER_NONE 0
-#define INTERPRETER_AOUT 1
-#define INTERPRETER_ELF 2
-
 #define DLINFO_ITEMS 12
 
 static inline void memcpy_fromfs(void *to, const void *from, unsigned long n)
@@ -93,8 +70,6 @@ static inline void memcpy_fromfs(void *to, const void *from, 
unsigned long n)
 memcpy(to, from, n);
 }
 
-static int load_aout_interp(void *exptr, int interp_fd);
-
 #ifdef BSWAP_NEEDED
 static void bswap_ehdr(struct elfhdr *ehdr)
 {
@@ -300,7 +275,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, 
int envc,
struct elfhdr * exec,
abi_ulong load_addr,
abi_ulong load_bias,
-   abi_ulong interp_load_addr, int ibcs,
+   abi_ulong interp_load_addr,
struct image_info *info)
 {
 abi_ulong sp;
@@ -330,7 +305,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, 
int envc,
 size += DLINFO_ARCH_ITEMS * 2;
 #endif
 size += envc + argc + 2;
-size += (!ibcs ? 3 : 1);/* argc itself */
+size += 1;/* argc itself */
 size *= n;
 if (size & 15)
 sp -= 16 - (size & 15);
@@ -370,7 +345,7 @@ static abi_ulong create_elf_tables(abi_ulong p, int argc, 
int envc,
 #endif
 #undef NEW_AUX_ENT
 
-sp = loader_build_argptr(envc, argc, sp, p, !ibcs);
+sp = loader_build_argptr(envc, argc, sp, p);
 return sp;
 }
 
@@ -432,7 +407,7 @@ static abi_ulong load_elf_interp(struct elfhdr 
*interp_elf_ex,
 if (retval < 0) {
 perror("load_elf_interp");
 exit(-1);
-free (elf_phdata);
+free(elf_phdata);
 return retval;
 }
 #ifdef BSWAP_NEEDED
@@ -685,11 +660,9 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 {
 struct elfhdr elf_ex;
 struct elfhdr interp_elf_ex;
-struct exec interp_ex;
 int interpreter_fd = -1; /* avoid warning */
 abi_ulong load_addr, load_bias;
 int load_addr_set = 0;
-unsigned int interpreter_type = INTERPRETER_NONE;
 int i;
 struct elf_phdr * elf_ppnt;
 struct elf_phdr *elf_phdata;
@@ -702,7 +675,6 @@ int 

[PATCH v2 37/43] bsd-user: elfload.c style catch up patch

2021-08-26 Thread imp
From: Warner Losh 

Various style fixes to elfload.c that were too painful to make earlier
in this series.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/elfload.c | 210 ++---
 1 file changed, 105 insertions(+), 105 deletions(-)

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 7632277d87..bdf18f3dce 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -143,10 +143,12 @@ static abi_ulong copy_elf_strings(int argc, char **argv, 
void **page,
 exit(-1);
 }
 tmp1 = tmp;
-while (*tmp++);
+while (*tmp++) {
+continue;
+}
 len = tmp - tmp1;
 if (p < len) {  /* this shouldn't happen - 128kB */
-return 0;
+return 0;
 }
 while (len) {
 --p; --tmp; --len;
@@ -156,14 +158,14 @@ static abi_ulong copy_elf_strings(int argc, char **argv, 
void **page,
 if (!pag) {
 pag = g_try_malloc0(TARGET_PAGE_SIZE);
 page[p / TARGET_PAGE_SIZE] = pag;
-if (!pag)
+if (!pag) {
 return 0;
+}
 }
 }
 if (len == 0 || offset == 0) {
 *(pag + offset) = *tmp;
-}
-else {
+} else {
   int bytes_to_copy = (len > offset) ? offset : len;
   tmp -= bytes_to_copy;
   p -= bytes_to_copy;
@@ -182,16 +184,14 @@ static void setup_arg_pages(struct bsd_binprm *bprm, 
struct image_info *info,
 abi_ulong stack_base, size;
 abi_long addr;
 
-/* Create enough stack to hold everything.  If we don't use
- * it for args, we'll use it for something else...
+/*
+ * Create enough stack to hold everything.  If we don't use it for args,
+ * we'll use it for something else...
  */
 size = target_dflssiz;
 stack_base = TARGET_USRSTACK - size;
-addr = target_mmap(stack_base,
-size + qemu_host_page_size,
-PROT_READ | PROT_WRITE,
-MAP_PRIVATE | MAP_ANON,
--1, 0);
+addr = target_mmap(stack_base , size + qemu_host_page_size,
+PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
 if (addr == -1) {
 perror("stk mmap");
 exit(-1);
@@ -210,55 +210,60 @@ static void setup_arg_pages(struct bsd_binprm *bprm, 
struct image_info *info,
 
 static void set_brk(abi_ulong start, abi_ulong end)
 {
-/* page-align the start and end addresses... */
-start = HOST_PAGE_ALIGN(start);
-end = HOST_PAGE_ALIGN(end);
-if (end <= start)
-return;
-if (target_mmap(start, end - start,
-   PROT_READ | PROT_WRITE | PROT_EXEC,
-   MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) {
-perror("cannot mmap brk");
-exit(-1);
-}
+/* page-align the start and end addresses... */
+start = HOST_PAGE_ALIGN(start);
+end = HOST_PAGE_ALIGN(end);
+if (end <= start) {
+return;
+}
+if (target_mmap(start, end - start, PROT_READ | PROT_WRITE | PROT_EXEC,
+MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) {
+perror("cannot mmap brk");
+exit(-1);
+}
 }
 
 
-/* We need to explicitly zero any fractional pages after the data
-   section (i.e. bss).  This would contain the junk from the file that
-   should not be in memory. */
+/*
+ * We need to explicitly zero any fractional pages after the data
+ * section (i.e. bss).  This would contain the junk from the file that
+ * should not be in memory.
+ */
 static void padzero(abi_ulong elf_bss, abi_ulong last_bss)
 {
-abi_ulong nbyte;
+abi_ulong nbyte;
 
-if (elf_bss >= last_bss)
-return;
+if (elf_bss >= last_bss) {
+return;
+}
 
-/* XXX: this is really a hack : if the real host page size is
-   smaller than the target page size, some pages after the end
-   of the file may not be mapped. A better fix would be to
-   patch target_mmap(), but it is more complicated as the file
-   size must be known */
-if (qemu_real_host_page_size < qemu_host_page_size) {
-abi_ulong end_addr, end_addr1;
-end_addr1 = REAL_HOST_PAGE_ALIGN(elf_bss);
-end_addr = HOST_PAGE_ALIGN(elf_bss);
-if (end_addr1 < end_addr) {
-mmap((void *)g2h_untagged(end_addr1), end_addr - end_addr1,
- PROT_READ | PROT_WRITE | PROT_EXEC,
- MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0);
-}
+/*
+ * XXX: this is really a hack : if the real host page size is
+ * smaller than the target page size, some pages after the end
+ * of the file may not be mapped. A better fix 

[PATCH v2 27/43] bsd-user: Move stack initializtion into a per-os file.

2021-08-26 Thread imp
From: Warner Losh 

Move all of the stack initialization into target_os_stack.h. Each BSD
sets up processes a little differently.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/freebsd/target_os_stack.h | 180 +
 bsd-user/netbsd/target_os_stack.h  |  56 +
 bsd-user/openbsd/target_os_stack.h |  56 +
 3 files changed, 292 insertions(+)
 create mode 100644 bsd-user/freebsd/target_os_stack.h
 create mode 100644 bsd-user/netbsd/target_os_stack.h
 create mode 100644 bsd-user/openbsd/target_os_stack.h

diff --git a/bsd-user/freebsd/target_os_stack.h 
b/bsd-user/freebsd/target_os_stack.h
new file mode 100644
index 00..93e353ca61
--- /dev/null
+++ b/bsd-user/freebsd/target_os_stack.h
@@ -0,0 +1,180 @@
+/*
+ *  FreeBSD setup_initial_stack() implementation.
+ *
+ *  Copyright (c) 2013-14 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef _TARGET_OS_STACK_H_
+#define _TARGET_OS_STACK_H_
+
+#include 
+#include "target_arch_sigtramp.h"
+#include "qemu/guest-random.h"
+
+/*
+ * The inital FreeBSD stack is as follows:
+ * (see kern/kern_exec.c exec_copyout_strings() )
+ *
+ *  Hi Address -> char **ps_argvstr  (struct ps_strings for ps, w, etc.)
+ *unsigned ps_nargvstr
+ *char **ps_envstr
+ *  PS_STRINGS -> unsigned ps_nenvstr
+ *
+ *machine dependent sigcode (sv_sigcode of size
+ *   sv_szsigcode)
+ *
+ *execpath  (absolute image path for rtld)
+ *
+ *SSP Canary(sizeof(long) * 8)
+ *
+ *page sizes array  (usually sizeof(u_long) )
+ *
+ *  "destp" ->argv, env strings (up to 262144 bytes)
+ */
+static inline int setup_initial_stack(struct bsd_binprm *bprm,
+abi_ulong *ret_addr, abi_ulong *stringp)
+{
+int i;
+abi_ulong stack_hi_addr;
+size_t execpath_len, stringspace;
+abi_ulong destp, argvp, envp, p;
+struct target_ps_strings ps_strs;
+char canary[sizeof(abi_long) * 8];
+
+stack_hi_addr = p = target_stkbas + target_stksiz;
+
+/* Save some space for ps_strings. */
+p -= sizeof(struct target_ps_strings);
+
+/* Add machine depedent sigcode. */
+p -= TARGET_SZSIGCODE;
+if (setup_sigtramp(p, (unsigned)offsetof(struct target_sigframe, sf_uc),
+TARGET_FREEBSD_NR_sigreturn)) {
+errno = EFAULT;
+return -1;
+}
+if (bprm->fullpath) {
+execpath_len = strlen(bprm->fullpath) + 1;
+p -= roundup(execpath_len, sizeof(abi_ulong));
+if (memcpy_to_target(p, bprm->fullpath, execpath_len)) {
+errno = EFAULT;
+return -1;
+}
+}
+/* Add canary for SSP. */
+qemu_guest_getrandom_nofail(canary, sizeof(canary));
+p -= roundup(sizeof(canary), sizeof(abi_ulong));
+if (memcpy_to_target(p, canary, sizeof(canary))) {
+errno = EFAULT;
+return -1;
+}
+/* Add page sizes array. */
+p -= sizeof(abi_ulong);
+if (put_user_ual(TARGET_PAGE_SIZE, p)) {
+errno = EFAULT;
+return -1;
+}
+/*
+ * Deviate from FreeBSD stack layout: force stack to new page here
+ * so that signal trampoline is not sharing the page with user stack
+ * frames. This is actively harmful in qemu as it marks pages with
+ * code it translated as read-only, which is somewhat problematic
+ * for user trying to use the stack as intended.
+ */
+p = rounddown(p, TARGET_PAGE_SIZE);
+
+/* Calculate the string space needed */
+stringspace = 0;
+for (i = 0; i < bprm->argc; ++i) {
+stringspace += strlen(bprm->argv[i]) + 1;
+}
+for (i = 0; i < bprm->envc; ++i) {
+stringspace += strlen(bprm->envp[i]) + 1;
+}
+if (stringspace > TARGET_ARG_MAX) {
+   errno = ENOMEM;
+   return -1;
+}
+/* Make room for the argv and envp strings */
+destp = rounddown(p - stringspace, sizeof(abi_ulong));
+p = argvp = destp - (bprm->argc + bprm->envc + 2) * sizeof(abi_ulong);
+/* Remember the strings pointer */
+if (stringp)
+*stringp = destp;
+/*
+ * Add argv strings.  Note that the argv[] vectors are added by
+ * loader_build_argptr()
+ */
+/* XXX need to make room for auxargs */
+

[PATCH v2 11/43] bsd-user: Eliminate elf personality

2021-08-26 Thread imp
From: Warner Losh 

The linux kernel supports a number of different ELF binaries. The Linux userland
emulator inheritted some of that. And we inheritted it from there. However, for
BSD there's only one kind of ELF file supported per platform, so there's no need
to cope with historical quirks. Simply the code as a result.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/elfload.c | 87 --
 bsd-user/qemu.h|  1 -
 2 files changed, 88 deletions(-)

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 639673f5b7..e950732978 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -25,66 +25,6 @@
 
 #include "target_arch_elf.h"
 
-/* from personality.h */
-
-/*
- * Flags for bug emulation.
- *
- * These occupy the top three bytes.
- */
-enum {
-ADDR_NO_RANDOMIZE = 0x004,  /* disable randomization of VA 
space */
-FDPIC_FUNCPTRS =0x008,  /* userspace function ptrs 
point to descriptors
- * (signal handling)
- */
-MMAP_PAGE_ZERO =0x010,
-ADDR_COMPAT_LAYOUT =0x020,
-READ_IMPLIES_EXEC = 0x040,
-ADDR_LIMIT_32BIT =  0x080,
-SHORT_INODE =   0x100,
-WHOLE_SECONDS = 0x200,
-STICKY_TIMEOUTS =   0x400,
-ADDR_LIMIT_3GB =0x800,
-};
-
-/*
- * Personality types.
- *
- * These go in the low byte.  Avoid using the top bit, it will
- * conflict with error returns.
- */
-enum {
-PER_LINUX = 0x,
-PER_LINUX_32BIT =   0x | ADDR_LIMIT_32BIT,
-PER_LINUX_FDPIC =   0x | FDPIC_FUNCPTRS,
-PER_SVR4 =  0x0001 | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-PER_SVR3 =  0x0002 | STICKY_TIMEOUTS | SHORT_INODE,
-PER_SCOSVR3 =   0x0003 | STICKY_TIMEOUTS |
- WHOLE_SECONDS | SHORT_INODE,
-PER_OSR5 =  0x0003 | STICKY_TIMEOUTS | WHOLE_SECONDS,
-PER_WYSEV386 =  0x0004 | STICKY_TIMEOUTS | SHORT_INODE,
-PER_ISCR4 = 0x0005 | STICKY_TIMEOUTS,
-PER_BSD =   0x0006,
-PER_SUNOS = 0x0006 | STICKY_TIMEOUTS,
-PER_XENIX = 0x0007 | STICKY_TIMEOUTS | SHORT_INODE,
-PER_LINUX32 =   0x0008,
-PER_LINUX32_3GB =   0x0008 | ADDR_LIMIT_3GB,
-PER_IRIX32 =0x0009 | STICKY_TIMEOUTS,/* IRIX5 32-bit */
-PER_IRIXN32 =   0x000a | STICKY_TIMEOUTS,/* IRIX6 new 32-bit */
-PER_IRIX64 =0x000b | STICKY_TIMEOUTS,/* IRIX6 64-bit */
-PER_RISCOS =0x000c,
-PER_SOLARIS =   0x000d | STICKY_TIMEOUTS,
-PER_UW7 =   0x000e | STICKY_TIMEOUTS | MMAP_PAGE_ZERO,
-PER_OSF4 =  0x000f,  /* OSF/1 v4 */
-PER_HPUX =  0x0010,
-PER_MASK =  0x00ff,
-};
-
-/*
- * Return the base personality without flags.
- */
-#define personality(pers)   (pers & PER_MASK)
-
 /* this flag is uneffective under linux too, should be deleted */
 #ifndef MAP_DENYWRITE
 #define MAP_DENYWRITE 0
@@ -750,7 +690,6 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 abi_ulong load_addr, load_bias;
 int load_addr_set = 0;
 unsigned int interpreter_type = INTERPRETER_NONE;
-unsigned char ibcs2_interpreter;
 int i;
 struct elf_phdr * elf_ppnt;
 struct elf_phdr *elf_phdata;
@@ -765,7 +704,6 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 #endif
 char passed_fileno[6];
 
-ibcs2_interpreter = 0;
 load_addr = 0;
 load_bias = 0;
 elf_ex = *((struct elfhdr *) bprm->buf);  /* exec-header */
@@ -856,20 +794,6 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 exit(-1);
 }
 
-/* If the program interpreter is one of these two,
-   then assume an iBCS2 image. Otherwise assume
-   a native linux image. */
-
-/* JRP - Need to add X86 lib dir stuff here... */
-
-if (strcmp(elf_interpreter, "/usr/lib/libc.so.1") == 0 ||
-strcmp(elf_interpreter, "/usr/lib/ld.so.1") == 0) {
-  ibcs2_interpreter = 1;
-}
-
-#if 0
-printf("Using ELF interpreter %s\n", path(elf_interpreter));
-#endif
 if (retval >= 0) {
 retval = open(path(elf_interpreter), O_RDONLY);
 if (retval >= 0) {
@@ -1099,7 +1023,6 @@ int load_elf_binary(struct bsd_binprm *bprm, struct 
target_pt_regs *regs,
 load_symbols(_ex, bprm->fd);
 
 if (interpreter_type != INTERPRETER_AOUT) close(bprm->fd);
-info->personality = (ibcs2_interpreter ? 

[PATCH v2 09/43] bsd-user: Fix calculation of size to allocate

2021-08-26 Thread imp
From: Warner Losh 

It was incorrect to subtract off the size of an unsigned int here.  In
bsd-user fork, this change was made when moving the arch specific items
to specific files.  The size in BSD that's available for the arguments
does not need a return address subtracted from it.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsdload.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c
index 5282a7c4f2..379015c744 100644
--- a/bsd-user/bsdload.c
+++ b/bsd-user/bsdload.c
@@ -143,10 +143,9 @@ int loader_exec(const char *filename, char **argv, char 
**envp,
 struct target_pt_regs *regs, struct image_info *infop,
 struct bsd_binprm *bprm)
 {
-int retval;
-int i;
+int retval, i;
 
-bprm->p = TARGET_PAGE_SIZE * MAX_ARG_PAGES - sizeof(unsigned int);
+bprm->p = TARGET_PAGE_SIZE * MAX_ARG_PAGES;
 for (i = 0; i < MAX_ARG_PAGES; i++) {   /* clear page-table */
 bprm->page[i] = NULL;
 }
-- 
2.32.0




[PATCH v2 35/43] bsd-user: Add target_os_user.h to capture the user/kernel structures

2021-08-26 Thread imp
From: Warner Losh 

This file evolved over the years to capture the user/kernel interfaces,
including those that changed over time.

Signed-off-by: Stacey Son 
Signed-off-by: Michal Meloun 
Signed-off-by: Warner Losh 
Acked-by: Richard Henderson 
---
 bsd-user/freebsd/target_os_user.h | 429 ++
 1 file changed, 429 insertions(+)
 create mode 100644 bsd-user/freebsd/target_os_user.h

diff --git a/bsd-user/freebsd/target_os_user.h 
b/bsd-user/freebsd/target_os_user.h
new file mode 100644
index 00..235cd35a5c
--- /dev/null
+++ b/bsd-user/freebsd/target_os_user.h
@@ -0,0 +1,429 @@
+/*
+ *  sys/user.h definitions
+ *
+ *  Copyright (c) 2015 Stacey D. Son (sson at FreeBSD)
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef _TARGET_OS_USER_H_
+#define _TARGET_OS_USER_H_
+
+/*
+ * from sys/priority.h
+ */
+struct target_priority {
+uint8_t pri_class;  /* Scheduling class. */
+uint8_t pri_level;  /* Normal priority level. */
+uint8_t pri_native; /* Priority before propogation. */
+uint8_t pri_user;   /* User priority based on p_cpu and p_nice. */
+};
+
+/*
+ * sys/caprights.h
+ */
+#define TARGET_CAP_RIGHTS_VERSION  0
+
+typedef struct target_cap_rights {
+uint64_tcr_rights[TARGET_CAP_RIGHTS_VERSION + 2];
+} target_cap_rights_t;
+
+/*
+ * From sys/_socketaddr_storage.h
+ *
+ */
+#define TARGET_SS_MAXSIZE 128U
+#define TARGET_SS_ALIGNSIZE   (sizeof(__int64_t))
+#define TARGET_SS_PAD1SIZE(TARGET_SS_ALIGNSIZE - sizeof(unsigned char) - \
+sizeof(uint8_t))
+#define TARGET_SS_PAD2SIZE(TARGET_SS_MAXSIZE - sizeof(unsigned char) - \
+sizeof(uint8_t) - TARGET_SS_PAD1SIZE - TARGET_SS_ALIGNSIZE)
+
+struct target_sockaddr_storage {
+unsigned char   ss_len; /* address length */
+uint8_t ss_family;  /* address family */
+char__ss_pad1[TARGET_SS_PAD1SIZE];
+__int64_t   __ss_align; /* force desired struct alignment */
+char__ss_pad2[TARGET_SS_PAD2SIZE];
+};
+
+/*
+ * from sys/user.h
+ */
+#if defined(__FreeBSD_version) && __FreeBSD_version >= 1200031
+#define TARGET_KI_NSPARE_INT2
+#elif defined(__FreeBSD_version) && __FreeBSD_version >= 110
+#define TARGET_KI_NSPARE_INT4
+#elif defined(__FreeBSD_version) && __FreeBSD_version >= 100
+#define TARGET_KI_NSPARE_INT7
+#else
+#define TARGET_KI_NSPARE_INT9
+#endif /* ! __FreeBSD_version >= 100 */
+#define TARGET_KI_NSPARE_LONG   12
+#define TARGET_KI_NSPARE_PTR6
+
+#define TARGET_WMESGLEN 8
+#define TARGET_LOCKNAMELEN  8
+#define TARGET_TDNAMLEN 16
+#define TARGET_COMMLEN  19
+#define TARGET_KI_EMULNAMELEN   16
+#define TARGET_KI_NGROUPS   16
+#define TARGET_LOGNAMELEN   17
+#define TARGET_LOGINCLASSLEN17
+
+#define TARGET_KF_TYPE_NONE 0
+#define TARGET_KF_TYPE_VNODE1
+#define TARGET_KF_TYPE_SOCKET   2
+#define TARGET_KF_TYPE_PIPE 3
+#define TARGET_KF_TYPE_FIFO 4
+#define TARGET_KF_TYPE_KQUEUE   5
+#define TARGET_KF_TYPE_CRYPTO   6
+#define TARGET_KF_TYPE_MQUEUE   7
+#define TARGET_KF_TYPE_SHM  8
+#define TARGET_KF_TYPE_SEM  9
+#define TARGET_KF_TYPE_PTS  10
+#define TARGET_KF_TYPE_PROCDESC 11
+#define TARGET_KF_TYPE_DEV  12
+#define TARGET_KF_TYPE_UNKNOWN  255
+
+struct target_kinfo_proc {
+int32_t ki_structsize;  /* size of this structure */
+int32_t ki_layout;  /* reserved: layout identifier */
+abi_ulong   ki_args;/* address of command arguments */
+abi_ulong   ki_paddr;   /* address of proc */
+abi_ulong   ki_addr;/* kernel virtual addr of u-area */
+abi_ulong   ki_tracep;  /* pointer to trace file */
+abi_ulong   ki_textvp;  /* pointer to executable file */
+abi_ulong   ki_fd;  /* pointer to open file info */
+abi_ulong   ki_vmspace; /* pointer to kernel vmspace struct */
+abi_ulong   ki_wchan;   /* sleep address */
+int32_t ki_pid; /* Process identifier */
+int32_t ki_ppid;/* parent process id */
+int32_t ki_pgid;/* process group id */
+int32_t ki_tpgid;

[PATCH v2 08/43] bsd-user: pass the bsd_param into loader_exec

2021-08-26 Thread imp
From: Warner Losh 

Pass the bsd_param into loader_exec, and adjust.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsdload.c | 37 +++--
 bsd-user/main.c|  7 ++-
 bsd-user/qemu.h|  3 ++-
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c
index ec71c5e923..5282a7c4f2 100644
--- a/bsd-user/bsdload.c
+++ b/bsd-user/bsdload.c
@@ -140,35 +140,36 @@ abi_ulong loader_build_argptr(int envc, int argc, 
abi_ulong sp,
 }
 
 int loader_exec(const char *filename, char **argv, char **envp,
-struct target_pt_regs *regs, struct image_info *infop)
+struct target_pt_regs *regs, struct image_info *infop,
+struct bsd_binprm *bprm)
 {
-struct bsd_binprm bprm;
 int retval;
 int i;
 
-bprm.p = TARGET_PAGE_SIZE * MAX_ARG_PAGES - sizeof(unsigned int);
-for (i = 0 ; i < MAX_ARG_PAGES ; i++) { /* clear page-table */
-bprm.page[i] = NULL;
+bprm->p = TARGET_PAGE_SIZE * MAX_ARG_PAGES - sizeof(unsigned int);
+for (i = 0; i < MAX_ARG_PAGES; i++) {   /* clear page-table */
+bprm->page[i] = NULL;
 }
 retval = open(filename, O_RDONLY);
 if (retval < 0) {
 return retval;
 }
-bprm.fd = retval;
-bprm.filename = (char *)filename;
-bprm.argc = count(argv);
-bprm.argv = argv;
-bprm.envc = count(envp);
-bprm.envp = envp;
 
-retval = prepare_binprm();
+bprm->fd = retval;
+bprm->filename = (char *)filename;
+bprm->argc = count(argv);
+bprm->argv = argv;
+bprm->envc = count(envp);
+bprm->envp = envp;
+
+retval = prepare_binprm(bprm);
 
 if (retval >= 0) {
-if (bprm.buf[0] == 0x7f
-&& bprm.buf[1] == 'E'
-&& bprm.buf[2] == 'L'
-&& bprm.buf[3] == 'F') {
-retval = load_elf_binary(, regs, infop);
+if (bprm->buf[0] == 0x7f
+&& bprm->buf[1] == 'E'
+&& bprm->buf[2] == 'L'
+&& bprm->buf[3] == 'F') {
+retval = load_elf_binary(bprm, regs, infop);
 } else {
 fprintf(stderr, "Unknown binary format\n");
 return -1;
@@ -183,7 +184,7 @@ int loader_exec(const char *filename, char **argv, char 
**envp,
 
 /* Something went wrong, return the inode and free the argument pages*/
 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
-g_free(bprm.page[i]);
+g_free(bprm->page[i]);
 }
 return retval;
 }
diff --git a/bsd-user/main.c b/bsd-user/main.c
index 39c4a0f33c..7e1284c368 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -343,6 +343,7 @@ int main(int argc, char **argv)
 const char *log_mask = NULL;
 struct target_pt_regs regs1, *regs = 
 struct image_info info1, *info = 
+struct bsd_binprm bprm;
 TaskState ts1, *ts = 
 CPUArchState *env;
 CPUState *cpu;
@@ -499,6 +500,9 @@ int main(int argc, char **argv)
 /* Zero out regs */
 memset(regs, 0, sizeof(struct target_pt_regs));
 
+/* Zero bsd params */
+memset(, 0, sizeof(bprm));
+
 /* Zero out image_info */
 memset(info, 0, sizeof(struct image_info));
 
@@ -566,7 +570,8 @@ int main(int argc, char **argv)
 }
 }
 
-if (loader_exec(filename, argv + optind, target_environ, regs, info) != 0) 
{
+if (loader_exec(filename, argv+optind, target_environ, regs, info,
+) != 0) {
 printf("Error loading %s\n", filename);
 _exit(1);
 }
diff --git a/bsd-user/qemu.h b/bsd-user/qemu.h
index c02e8a5ca1..5237e35f9c 100644
--- a/bsd-user/qemu.h
+++ b/bsd-user/qemu.h
@@ -131,7 +131,8 @@ void do_init_thread(struct target_pt_regs *regs, struct 
image_info *infop);
 abi_ulong loader_build_argptr(int envc, int argc, abi_ulong sp,
   abi_ulong stringp, int push_ptr);
 int loader_exec(const char *filename, char **argv, char **envp,
- struct target_pt_regs *regs, struct image_info *infop);
+struct target_pt_regs *regs, struct image_info *infop,
+struct bsd_binprm *bprm);
 
 int load_elf_binary(struct bsd_binprm *bprm, struct target_pt_regs *regs,
 struct image_info *info);
-- 
2.32.0




[PATCH v2 19/43] bsd-user: start to move target CPU functions to target_arch*

2021-08-26 Thread imp
From: Warner Losh 

Move the CPU functons into target_arch_cpu.c that are unique to each
CPU. These are defined in target_arch.h.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/i386/target_arch.h   | 31 +
 bsd-user/i386/target_arch_cpu.c   | 75 +++
 bsd-user/main.c   | 12 -
 bsd-user/x86_64/target_arch.h | 31 +
 bsd-user/x86_64/target_arch_cpu.c | 75 +++
 configure |  7 +--
 meson.build   |  8 +++-
 7 files changed, 219 insertions(+), 20 deletions(-)
 create mode 100644 bsd-user/i386/target_arch.h
 create mode 100644 bsd-user/i386/target_arch_cpu.c
 create mode 100644 bsd-user/x86_64/target_arch.h
 create mode 100644 bsd-user/x86_64/target_arch_cpu.c

diff --git a/bsd-user/i386/target_arch.h b/bsd-user/i386/target_arch.h
new file mode 100644
index 00..73e9a028fe
--- /dev/null
+++ b/bsd-user/i386/target_arch.h
@@ -0,0 +1,31 @@
+/*
+ * Intel x86 specific prototypes for bsd-user
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef _TARGET_ARCH_H_
+#define _TARGET_ARCH_H_
+
+/* target_arch_cpu.c */
+void bsd_i386_write_dt(void *ptr, unsigned long addr, unsigned long limit,
+int flags);
+void bsd_i386_set_idt(int n, unsigned int dpl);
+void bsd_i386_set_idt_base(uint64_t base);
+
+#define target_cpu_set_tls(env, newtls)
+
+#endif /* ! _TARGET_ARCH_H_ */
diff --git a/bsd-user/i386/target_arch_cpu.c b/bsd-user/i386/target_arch_cpu.c
new file mode 100644
index 00..7f2f755a11
--- /dev/null
+++ b/bsd-user/i386/target_arch_cpu.c
@@ -0,0 +1,75 @@
+/*
+ *  i386 cpu related code
+ *
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#include 
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "qemu.h"
+#include "qemu/timer.h"
+
+#include "target_arch.h"
+
+static uint64_t *idt_table;
+
+uint64_t cpu_get_tsc(CPUX86State *env)
+{
+return cpu_get_host_ticks();
+}
+
+int cpu_get_pic_interrupt(CPUX86State *env)
+{
+return -1;
+}
+
+void bsd_i386_write_dt(void *ptr, unsigned long addr, unsigned long limit,
+ int flags)
+{
+unsigned int e1, e2;
+uint32_t *p;
+e1 = (addr << 16) | (limit & 0x);
+e2 = ((addr >> 16) & 0xff) | (addr & 0xff00) | (limit & 0x000f);
+e2 |= flags;
+p = ptr;
+p[0] = tswap32(e1);
+p[1] = tswap32(e2);
+}
+
+
+static void set_gate(void *ptr, unsigned int type, unsigned int dpl,
+ uint32_t addr, unsigned int sel)
+{
+uint32_t *p, e1, e2;
+e1 = (addr & 0x) | (sel << 16);
+e2 = (addr & 0x) | 0x8000 | (dpl << 13) | (type << 8);
+p = ptr;
+p[0] = tswap32(e1);
+p[1] = tswap32(e2);
+}
+
+/* only dpl matters as we do only user space emulation */
+void bsd_i386_set_idt(int n, unsigned int dpl)
+{
+set_gate(idt_table + n, 0, dpl, 0, 0);
+}
+
+void bsd_i386_set_idt_base(uint64_t base)
+{
+idt_table = g2h_untagged(base);
+}
+
diff --git a/bsd-user/main.c b/bsd-user/main.c
index f7e1df5da5..7b3550898d 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -72,13 +72,6 @@ void gemu_log(const char *fmt, ...)
 va_end(ap);
 }
 
-#if defined(TARGET_I386)
-int cpu_get_pic_interrupt(CPUX86State *env)
-{
-return -1;
-}
-#endif
-
 void fork_start(void)
 {
 }
@@ -94,11 +87,6 @@ void fork_end(int child)
 /***/
 /* CPUX86 core interface */
 
-uint64_t cpu_get_tsc(CPUX86State *env)
-{
-return cpu_get_host_ticks();
-}
-
 static void write_dt(void *ptr, unsigned long addr, unsigned long limit,
  

[PATCH v2 07/43] bsd-user: move arch specific defines out of elfload.c

2021-08-26 Thread imp
From: Warner Losh 

Move the arcitecture specific defines to target_arch_elf.h and delete
them from elfload.c. unifdef as appropriate for i386 vs x86_64
versions. Add the copyright/license comments, and guard ifdefs.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/elfload.c| 81 +--
 bsd-user/i386/target_arch_elf.h   | 76 +
 bsd-user/x86_64/target_arch_elf.h | 64 
 3 files changed, 142 insertions(+), 79 deletions(-)
 create mode 100644 bsd-user/i386/target_arch_elf.h
 create mode 100644 bsd-user/x86_64/target_arch_elf.h

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index fffa24f041..639673f5b7 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -23,6 +23,8 @@
 #include "disas/disas.h"
 #include "qemu/path.h"
 
+#include "target_arch_elf.h"
+
 /* from personality.h */
 
 /*
@@ -93,85 +95,6 @@ enum {
 #define ELIBBAD 80
 #endif
 
-#ifdef TARGET_I386
-
-#define ELF_PLATFORM get_elf_platform()
-
-static const char *get_elf_platform(void)
-{
-static char elf_platform[] = "i386";
-int family = object_property_get_int(OBJECT(thread_cpu), "family", NULL);
-if (family > 6)
-family = 6;
-if (family >= 3)
-elf_platform[1] = '0' + family;
-return elf_platform;
-}
-
-#define ELF_HWCAP get_elf_hwcap()
-
-static uint32_t get_elf_hwcap(void)
-{
-X86CPU *cpu = X86_CPU(thread_cpu);
-
-return cpu->env.features[FEAT_1_EDX];
-}
-
-#ifdef TARGET_X86_64
-#define ELF_START_MMAP 0x2ab000ULL
-#define elf_check_arch(x) (((x) == ELF_ARCH))
-
-#define ELF_CLASS  ELFCLASS64
-#define ELF_DATA   ELFDATA2LSB
-#define ELF_ARCH   EM_X86_64
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info 
*infop)
-{
-regs->rax = 0;
-regs->rsp = infop->start_stack;
-regs->rip = infop->entry;
-if (bsd_type == target_freebsd) {
-regs->rdi = infop->start_stack;
-}
-}
-
-#else /* !TARGET_X86_64 */
-
-#define ELF_START_MMAP 0x8000
-
-/*
- * This is used to ensure we don't load something for the wrong architecture.
- */
-#define elf_check_arch(x) (((x) == EM_386) || ((x) == EM_486))
-
-/*
- * These are used to set parameters in the core dumps.
- */
-#define ELF_CLASS   ELFCLASS32
-#define ELF_DATAELFDATA2LSB
-#define ELF_ARCHEM_386
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info 
*infop)
-{
-regs->esp = infop->start_stack;
-regs->eip = infop->entry;
-
-/* SVR4/i386 ABI (pages 3-31, 3-32) says that when the program
-   starts %edx contains a pointer to a function which might be
-   registered using `atexit'.  This provides a mean for the
-   dynamic linker to call DT_FINI functions for shared libraries
-   that have been loaded before the code runs.
-
-   A value of 0 tells we have no such handler.  */
-regs->edx = 0;
-}
-#endif /* !TARGET_X86_64 */
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE   4096
-
-#endif
-
 #ifndef ELF_PLATFORM
 #define ELF_PLATFORM (NULL)
 #endif
diff --git a/bsd-user/i386/target_arch_elf.h b/bsd-user/i386/target_arch_elf.h
new file mode 100644
index 00..84fbc8ba4c
--- /dev/null
+++ b/bsd-user/i386/target_arch_elf.h
@@ -0,0 +1,76 @@
+/*
+ *  i386 ELF definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+#ifndef _TARGET_ARCH_ELF_H_
+#define _TARGET_ARCH_ELF_H_
+
+#define ELF_PLATFORM get_elf_platform()
+
+static const char *get_elf_platform(void)
+{
+static char elf_platform[] = "i386";
+int family = object_property_get_int(OBJECT(thread_cpu), "family", NULL);
+if (family > 6)
+family = 6;
+if (family >= 3)
+elf_platform[1] = '0' + family;
+return elf_platform;
+}
+
+#define ELF_HWCAP get_elf_hwcap()
+
+static uint32_t get_elf_hwcap(void)
+{
+X86CPU *cpu = X86_CPU(thread_cpu);
+
+return cpu->env.features[FEAT_1_EDX];
+}
+
+#define ELF_START_MMAP 0x8000
+
+/*
+ * This is used to ensure we don't load something for the wrong architecture.
+ */
+#define elf_check_arch(x) (((x) == EM_386) || ((x) == EM_486))
+
+/*
+ * These are used to set parameters in the core dumps.
+ */
+#define ELF_CLASS   ELFCLASS32
+#define ELF_DATAELFDATA2LSB
+#define ELF_ARCH   

[PATCH v2 25/43] bsd-user: Add architecture specific signal tramp code

2021-08-26 Thread imp
From: Warner Losh 

Add a stubbed out version of setup_sigtramp. This is not yet used for
x86, but is used for other architectures. This will be connected in
future commits.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/i386/target_arch_sigtramp.h   | 29 ++
 bsd-user/x86_64/target_arch_sigtramp.h | 29 ++
 2 files changed, 58 insertions(+)
 create mode 100644 bsd-user/i386/target_arch_sigtramp.h
 create mode 100644 bsd-user/x86_64/target_arch_sigtramp.h

diff --git a/bsd-user/i386/target_arch_sigtramp.h 
b/bsd-user/i386/target_arch_sigtramp.h
new file mode 100644
index 00..889bff7a54
--- /dev/null
+++ b/bsd-user/i386/target_arch_sigtramp.h
@@ -0,0 +1,29 @@
+/*
+ * Intel i386  sigcode for bsd-user
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef _TARGET_ARCH_SIGTRAMP_H_
+#define _TARGET_ARCH_SIGTRAMP_H_
+
+static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc,
+unsigned sys_sigreturn)
+{
+
+return -TARGET_EOPNOTSUPP;
+}
+#endif /* _TARGET_ARCH_SIGTRAMP_H_ */
diff --git a/bsd-user/x86_64/target_arch_sigtramp.h 
b/bsd-user/x86_64/target_arch_sigtramp.h
new file mode 100644
index 00..5629263a19
--- /dev/null
+++ b/bsd-user/x86_64/target_arch_sigtramp.h
@@ -0,0 +1,29 @@
+/*
+ * Intel x86_64  sigcode for bsd-user
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef _TARGET_ARCH_SIGTRAMP_H_
+#define _TARGET_ARCH_SIGTRAMP_H_
+
+static inline abi_long setup_sigtramp(abi_ulong offset, unsigned sigf_uc,
+unsigned sys_sigreturn)
+{
+
+return -TARGET_EOPNOTSUPP;
+}
+#endif /* _TARGET_ARCH_SIGTRAMP_H_ */
-- 
2.32.0




[PATCH v2 00/43] bsd-user updates to run hello world

2021-08-26 Thread imp
From: Warner Losh 

This series of patches gets me to the point that I can run "Hello World" on i386
and x86_64. This is for static binaries only, that are relatively small, but
it's better than the 100% instant mmap failre that is the current state of all
things bsd-user in upstream qemu. Future patch sets will refine this, add
the missing system calls, fix bugs preventing more sophisticated programms
from running and add a bunch of new architecture support.

There's three large themes in these patches, though the changes that
represent them are interrelated making it hard to separate out further.
1. Reorganization to support multiple OS and architectures (though I've only
   tested FreeBSD, other BSDs might not even compile yet).
2. Diff reduction with the bsd-user fork for several files. These diffs include
   changes that borrowed from linux-user as well as changes to make things work
   on FreeBSD. The records keeping when this was done, however, was poor at
   best, so many of the specific borrowings are going unacknowledged here, apart
   from this general ack. These diffs also include some minor code shuffling.
   Some of the changes are done specifically to make it easier to rebase
   the bsd-user fork's changes when these land in the tree (a number of changes
   have been pushed there to make this more possible).
3. Filling in the missing pieces to make things work. There's many changes to
   elfload to make it load things in the right places, to find the interpreter
   better, etc. There's changes to mmap.c to make the mappings work better and
   there's changes to main.c that were inspired, at least, by now-ancient 
changes
   to linux-user's main.c.

I ran checkpatch.pl on this, and there's 350-odd errors it identifies (the vast
majoirty come from BSD's fetish for tabs), so there will need to be a V2 to fix
this at the very least. In addition, the change set is big (about +~4.5k/-~2.5k
lines), so I anticipate some iteration as well just based on its sheer
size. I've tried to keep each set small to make it easy to review in isolation,
but I've also allowed some interrelated ones to get a little bigger than I'd
normally like. I've not done the customary documentation of the expected
checkpatch.pl output because it is large, and because I wanted to get review
of the other parts rolling to get this project unstuck. Future versions of the
patch will document the expected output.

In addition, I noticed a number of places where I could modernize to make the
code match things like linux-user better. I've resisted the urge to do these at
this time, since it would complicate merging the other ~30k lines of diff that
remains after this batch. Future batches should generally be smaller once this
one has landed since they are, by and large, either a bunch of new files to
support armv7, aarch64, riscv64, mips, mipsel, mips64, ppc, ppc64 and ppc64le,
or are adding system calls, which can be done individually or small groups. I've
removed sparc and sparc64 support as they've been removed from FreeBSD and
have been near totally busted for years.

Stacey Son did the bulk of this work originally, but since I had to move things
around so much and/or retool that work in non-trivial ways, I've kept myself as
author, and added his signed-off-by line. I'm unsure of the qemu standard
practice for this, but am happy to learn if this is too far outside its current
mainstream. For a while Sean Bruno did the merges from upstream, and he's
credited using his signed-off-by in appropriate places, though for this patch
set there's only a few. I've tried to ensure that others who have work in
individual patches that I've aggregated together also are reflected in their
signed-off-by. Given the chaotic stat of the upstream repo for its early
history, this may be the best that can be reconstructed at this late date. Most
of these files are 'foundational' so have existed from the earliest days when
record keeping wasn't quite what I'd wish for in hindsight. There was only
really one change that I could easily cherry-pick (Colin's), so I did that.

v2: rejected patches dropped
Use suggested glibc routines
Updated to be closer to qemu style
Disable bsd-user on netbsd and openbsd since they don't compile
fold together a couple of related changes
[[ tagged the review-by and acked-by from last series, but by hand...
  I think I got them all... ]]

Warner

Colin Percival (1):
  bsd-user: Add '-0 argv0' option to bsd-user/main.c

Warner Losh (42):
  bsd-user: remove sparc and sparc64
  bsd-user: add copyright header to elfload.c
  bsd-user: Add Stacey's copyright to main.c
  bsd-user: add license to bsdload.c
  bsd-user: style nits: bsdload.c whitespace to qemu standard
  bsd-user: Remove all non-x86 code from elfload.c
  bsd-user: move arch specific defines out of elfload.c
  bsd-user: pass the bsd_param into loader_exec
  bsd-user: Fix calculation of size to allocate
  bsd-user: implement path searching
  

[PATCH v2 06/43] bsd-user: Remove all non-x86 code from elfload.c

2021-08-26 Thread imp
From: Warner Losh 

bsd-user only builds x86 at the moment. Remove all non x86 code from
elfload.c. We'll move the x86 code to {i386,x86_64}/target_arch_elf.h
and bring it that support code from the forked bsd-user when the time
comes.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/elfload.c | 347 +
 1 file changed, 2 insertions(+), 345 deletions(-)

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index ae62f3aab3..fffa24f041 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -23,15 +23,6 @@
 #include "disas/disas.h"
 #include "qemu/path.h"
 
-#ifdef _ARCH_PPC64
-#undef ARCH_DLINFO
-#undef ELF_PLATFORM
-#undef ELF_HWCAP
-#undef ELF_CLASS
-#undef ELF_DATA
-#undef ELF_ARCH
-#endif
-
 /* from personality.h */
 
 /*
@@ -144,7 +135,7 @@ static inline void init_thread(struct target_pt_regs *regs, 
struct image_info *i
 }
 }
 
-#else
+#else /* !TARGET_X86_64 */
 
 #define ELF_START_MMAP 0x8000
 
@@ -174,343 +165,13 @@ static inline void init_thread(struct target_pt_regs 
*regs, struct image_info *i
A value of 0 tells we have no such handler.  */
 regs->edx = 0;
 }
-#endif
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE   4096
-
-#endif
-
-#ifdef TARGET_ARM
-
-#define ELF_START_MMAP 0x8000
-
-#define elf_check_arch(x) ((x) == EM_ARM)
-
-#define ELF_CLASS   ELFCLASS32
-#ifdef TARGET_WORDS_BIGENDIAN
-#define ELF_DATAELFDATA2MSB
-#else
-#define ELF_DATAELFDATA2LSB
-#endif
-#define ELF_ARCHEM_ARM
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info 
*infop)
-{
-abi_long stack = infop->start_stack;
-memset(regs, 0, sizeof(*regs));
-regs->ARM_cpsr = 0x10;
-if (infop->entry & 1)
-regs->ARM_cpsr |= CPSR_T;
-regs->ARM_pc = infop->entry & 0xfffe;
-regs->ARM_sp = infop->start_stack;
-/* FIXME - what to for failure of get_user()? */
-get_user_ual(regs->ARM_r2, stack + 8); /* envp */
-get_user_ual(regs->ARM_r1, stack + 4); /* envp */
-/* XXX: it seems that r0 is zeroed after ! */
-regs->ARM_r0 = 0;
-/* For uClinux PIC binaries.  */
-/* XXX: Linux does this only on ARM with no MMU (do we care ?) */
-regs->ARM_r10 = infop->start_data;
-}
-
-#define USE_ELF_CORE_DUMP
-#define ELF_EXEC_PAGESIZE   4096
-
-enum
-{
-  ARM_HWCAP_ARM_SWP   = 1 << 0,
-  ARM_HWCAP_ARM_HALF  = 1 << 1,
-  ARM_HWCAP_ARM_THUMB = 1 << 2,
-  ARM_HWCAP_ARM_26BIT = 1 << 3,
-  ARM_HWCAP_ARM_FAST_MULT = 1 << 4,
-  ARM_HWCAP_ARM_FPA   = 1 << 5,
-  ARM_HWCAP_ARM_VFP   = 1 << 6,
-  ARM_HWCAP_ARM_EDSP  = 1 << 7,
-};
-
-#define ELF_HWCAP (ARM_HWCAP_ARM_SWP | ARM_HWCAP_ARM_HALF  \
-| ARM_HWCAP_ARM_THUMB | ARM_HWCAP_ARM_FAST_MULT \
-| ARM_HWCAP_ARM_FPA | ARM_HWCAP_ARM_VFP)
-
-#endif
-
-#ifdef TARGET_SPARC
-#ifdef TARGET_SPARC64
-
-#define ELF_START_MMAP 0x8000
-
-#ifndef TARGET_ABI32
-#define elf_check_arch(x) ((x) == EM_SPARCV9 || (x) == EM_SPARC32PLUS)
-#else
-#define elf_check_arch(x) ((x) == EM_SPARC32PLUS || (x) == EM_SPARC)
-#endif
-
-#define ELF_CLASS   ELFCLASS64
-#define ELF_DATAELFDATA2MSB
-#define ELF_ARCHEM_SPARCV9
-
-#define STACK_BIAS  2047
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info 
*infop)
-{
-#ifndef TARGET_ABI32
-regs->tstate = 0;
-#endif
-regs->pc = infop->entry;
-regs->npc = regs->pc + 4;
-regs->y = 0;
-#ifdef TARGET_ABI32
-regs->u_regs[14] = infop->start_stack - 16 * 4;
-#else
-if (personality(infop->personality) == PER_LINUX32)
-regs->u_regs[14] = infop->start_stack - 16 * 4;
-else {
-regs->u_regs[14] = infop->start_stack - 16 * 8 - STACK_BIAS;
-if (bsd_type == target_freebsd) {
-regs->u_regs[8] = infop->start_stack;
-regs->u_regs[11] = infop->start_stack;
-}
-}
-#endif
-}
-
-#else
-#define ELF_START_MMAP 0x8000
-
-#define elf_check_arch(x) ((x) == EM_SPARC)
-
-#define ELF_CLASS   ELFCLASS32
-#define ELF_DATAELFDATA2MSB
-#define ELF_ARCHEM_SPARC
-
-static inline void init_thread(struct target_pt_regs *regs, struct image_info 
*infop)
-{
-regs->psr = 0;
-regs->pc = infop->entry;
-regs->npc = regs->pc + 4;
-regs->y = 0;
-regs->u_regs[14] = infop->start_stack - 16 * 4;
-}
-
-#endif
-#endif
-
-#ifdef TARGET_PPC
-
-#define ELF_START_MMAP 0x8000
-
-#if defined(TARGET_PPC64) && !defined(TARGET_ABI32)
-
-#define elf_check_arch(x) ((x) == EM_PPC64)
-
-#define ELF_CLASS   ELFCLASS64
-
-#else
-
-#define elf_check_arch(x) ((x) == EM_PPC)
-
-#define ELF_CLASS   ELFCLASS32
-
-#endif
-
-#ifdef TARGET_WORDS_BIGENDIAN
-#define ELF_DATAELFDATA2MSB
-#else
-#define ELF_DATAELFDATA2LSB
-#endif
-#define ELF_ARCHEM_PPC
-
-/*
- * We need to put in some extra aux table entries to tell glibc what
- * the cache block size is, so it 

[PATCH v2 04/43] bsd-user: add license to bsdload.c

2021-08-26 Thread imp
From: Warner Losh 

Pull in the license statement at the top of the bsdload.c file
from the bsd-user fork version of this file. No functional changes.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsdload.c | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c
index 8d83f21eda..0ade58b9e2 100644
--- a/bsd-user/bsdload.c
+++ b/bsd-user/bsdload.c
@@ -1,4 +1,19 @@
-/* Code for loading BSD executables.  Mostly linux kernel code.  */
+/*
+ *  Load BSD executables.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
 
 #include "qemu/osdep.h"
 
-- 
2.32.0




[PATCH v2 16/43] bsd-user: add host-os.h

2021-08-26 Thread imp
From: Warner Losh 

Host OS specific bits for this implementation go in this file.

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/freebsd/host-os.h | 23 +++
 bsd-user/netbsd/host-os.h  | 23 +++
 bsd-user/openbsd/host-os.h | 23 +++
 3 files changed, 69 insertions(+)
 create mode 100644 bsd-user/freebsd/host-os.h
 create mode 100644 bsd-user/netbsd/host-os.h
 create mode 100644 bsd-user/openbsd/host-os.h

diff --git a/bsd-user/freebsd/host-os.h b/bsd-user/freebsd/host-os.h
new file mode 100644
index 00..a799164324
--- /dev/null
+++ b/bsd-user/freebsd/host-os.h
@@ -0,0 +1,23 @@
+/*
+ *  FreeBSD host dependent code and definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef __HOST_OS_H_
+#define __HOST_OS_H_
+
+#endif /*!__HOST_OS_H_ */
diff --git a/bsd-user/netbsd/host-os.h b/bsd-user/netbsd/host-os.h
new file mode 100644
index 00..b44cb7fdda
--- /dev/null
+++ b/bsd-user/netbsd/host-os.h
@@ -0,0 +1,23 @@
+/*
+ *  NetBSD host dependent code and definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef __HOST_OS_H_
+#define __HOST_OS_H_
+
+#endif /*!__HOST_OS_H_ */
diff --git a/bsd-user/openbsd/host-os.h b/bsd-user/openbsd/host-os.h
new file mode 100644
index 00..9083555f26
--- /dev/null
+++ b/bsd-user/openbsd/host-os.h
@@ -0,0 +1,23 @@
+/*
+ *  OpenBSD host dependent code and definitions
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef __HOST_OS_H_
+#define __HOST_OS_H_
+
+#endif /*!__HOST_OS_H_ */
-- 
2.32.0




[PATCH v2 05/43] bsd-user: style nits: bsdload.c whitespace to qemu standard

2021-08-26 Thread imp
From: Warner Losh 

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/bsdload.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bsd-user/bsdload.c b/bsd-user/bsdload.c
index 0ade58b9e2..ec71c5e923 100644
--- a/bsd-user/bsdload.c
+++ b/bsd-user/bsdload.c
@@ -140,7 +140,7 @@ abi_ulong loader_build_argptr(int envc, int argc, abi_ulong 
sp,
 }
 
 int loader_exec(const char *filename, char **argv, char **envp,
- struct target_pt_regs *regs, struct image_info *infop)
+struct target_pt_regs *regs, struct image_info *infop)
 {
 struct bsd_binprm bprm;
 int retval;
@@ -148,7 +148,7 @@ int loader_exec(const char *filename, char **argv, char 
**envp,
 
 bprm.p = TARGET_PAGE_SIZE * MAX_ARG_PAGES - sizeof(unsigned int);
 for (i = 0 ; i < MAX_ARG_PAGES ; i++) { /* clear page-table */
-bprm.page[i] = NULL;
+bprm.page[i] = NULL;
 }
 retval = open(filename, O_RDONLY);
 if (retval < 0) {
-- 
2.32.0




[PATCH v2 01/43] bsd-user: remove sparc and sparc64

2021-08-26 Thread imp
From: Warner Losh 

These are broken here and in the bsd-user fork. They won't be fixed as
FreeBSD has dropped support for sparc. If people wish to support this in
other BSDs, you're better off starting over than starting from these
files.

Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/main.c| 289 -
 bsd-user/sparc/target_arch_sysarch.h   |  52 -
 bsd-user/sparc/target_syscall.h|  36 ---
 bsd-user/sparc64/target_arch_sysarch.h |  52 -
 bsd-user/sparc64/target_syscall.h  |  37 
 bsd-user/syscall.c |  11 -
 6 files changed, 477 deletions(-)
 delete mode 100644 bsd-user/sparc/target_arch_sysarch.h
 delete mode 100644 bsd-user/sparc/target_syscall.h
 delete mode 100644 bsd-user/sparc64/target_arch_sysarch.h
 delete mode 100644 bsd-user/sparc64/target_syscall.h

diff --git a/bsd-user/main.c b/bsd-user/main.c
index fe66204b6b..38185da111 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -261,274 +261,6 @@ void cpu_loop(CPUX86State *env)
 }
 #endif
 
-#ifdef TARGET_SPARC
-#define SPARC64_STACK_BIAS 2047
-
-/* #define DEBUG_WIN */
-/*
- * WARNING: dealing with register windows _is_ complicated. More info
- * can be found at http://www.sics.se/~psm/sparcstack.html
- */
-static inline int get_reg_index(CPUSPARCState *env, int cwp, int index)
-{
-index = (index + cwp * 16) % (16 * env->nwindows);
-/*
- * wrap handling : if cwp is on the last window, then we use the
- * registers 'after' the end
- */
-if (index < 8 && env->cwp == env->nwindows - 1) {
-index += 16 * env->nwindows;
-}
-return index;
-}
-
-/* save the register window 'cwp1' */
-static inline void save_window_offset(CPUSPARCState *env, int cwp1)
-{
-unsigned int i;
-abi_ulong sp_ptr;
-
-sp_ptr = env->regbase[get_reg_index(env, cwp1, 6)];
-#ifdef TARGET_SPARC64
-if (sp_ptr & 3) {
-sp_ptr += SPARC64_STACK_BIAS;
-}
-#endif
-#if defined(DEBUG_WIN)
-printf("win_overflow: sp_ptr=0x" TARGET_ABI_FMT_lx " save_cwp=%d\n",
-   sp_ptr, cwp1);
-#endif
-for (i = 0; i < 16; i++) {
-/* FIXME - what to do if put_user() fails? */
-put_user_ual(env->regbase[get_reg_index(env, cwp1, 8 + i)], sp_ptr);
-sp_ptr += sizeof(abi_ulong);
-}
-}
-
-static void save_window(CPUSPARCState *env)
-{
-#ifndef TARGET_SPARC64
-unsigned int new_wim;
-new_wim = ((env->wim >> 1) | (env->wim << (env->nwindows - 1))) &
-((1LL << env->nwindows) - 1);
-save_window_offset(env, cpu_cwp_dec(env, env->cwp - 2));
-env->wim = new_wim;
-#else
-/*
- * cansave is zero if the spill trap handler is triggered by `save` and
- * nonzero if triggered by a `flushw`
- */
-save_window_offset(env, cpu_cwp_dec(env, env->cwp - env->cansave - 2));
-env->cansave++;
-env->canrestore--;
-#endif
-}
-
-static void restore_window(CPUSPARCState *env)
-{
-#ifndef TARGET_SPARC64
-unsigned int new_wim;
-#endif
-unsigned int i, cwp1;
-abi_ulong sp_ptr;
-
-#ifndef TARGET_SPARC64
-new_wim = ((env->wim << 1) | (env->wim >> (env->nwindows - 1))) &
-((1LL << env->nwindows) - 1);
-#endif
-
-/* restore the invalid window */
-cwp1 = cpu_cwp_inc(env, env->cwp + 1);
-sp_ptr = env->regbase[get_reg_index(env, cwp1, 6)];
-#ifdef TARGET_SPARC64
-if (sp_ptr & 3) {
-sp_ptr += SPARC64_STACK_BIAS;
-}
-#endif
-#if defined(DEBUG_WIN)
-printf("win_underflow: sp_ptr=0x" TARGET_ABI_FMT_lx " load_cwp=%d\n",
-   sp_ptr, cwp1);
-#endif
-for (i = 0; i < 16; i++) {
-/* FIXME - what to do if get_user() fails? */
-get_user_ual(env->regbase[get_reg_index(env, cwp1, 8 + i)], sp_ptr);
-sp_ptr += sizeof(abi_ulong);
-}
-#ifdef TARGET_SPARC64
-env->canrestore++;
-if (env->cleanwin < env->nwindows - 1) {
-env->cleanwin++;
-}
-env->cansave--;
-#else
-env->wim = new_wim;
-#endif
-}
-
-static void flush_windows(CPUSPARCState *env)
-{
-int offset, cwp1;
-
-offset = 1;
-for (;;) {
-/* if restore would invoke restore_window(), then we can stop */
-cwp1 = cpu_cwp_inc(env, env->cwp + offset);
-#ifndef TARGET_SPARC64
-if (env->wim & (1 << cwp1)) {
-break;
-}
-#else
-if (env->canrestore == 0) {
-break;
-}
-env->cansave++;
-env->canrestore--;
-#endif
-save_window_offset(env, cwp1);
-offset++;
-}
-cwp1 = cpu_cwp_inc(env, env->cwp + 1);
-#ifndef TARGET_SPARC64
-/* set wim so that restore will reload the registers */
-env->wim = 1 << cwp1;
-#endif
-#if defined(DEBUG_WIN)
-printf("flush_windows: nb=%d\n", offset - 1);
-#endif
-}
-
-void cpu_loop(CPUSPARCState *env)
-{
-CPUState *cs = env_cpu(env);
-int trapnr, ret, syscall_nr;
-/* target_siginfo_t info; */
-
-while (1) {
-cpu_exec_start(cs);
-trapnr = 

[PATCH v2 03/43] bsd-user: Add Stacey's copyright to main.c

2021-08-26 Thread imp
From: Warner Losh 

Add Stacey's updated copyright to main.c

Signed-off-by: Warner Losh 
Signed-off-by: Stacey Son 
Reviewed-by: Richard Henderson 
---
 bsd-user/main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index 38185da111..39c4a0f33c 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -1,7 +1,8 @@
 /*
- *  qemu user main
+ *  qemu bsd user main
  *
  *  Copyright (c) 2003-2008 Fabrice Bellard
+ *  Copyright (c) 2013-14 Stacey Son
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
-- 
2.32.0




[PATCH v2 02/43] bsd-user: add copyright header to elfload.c

2021-08-26 Thread imp
From: Warner Losh 

Add Stacey's copyright to elfload.c

Signed-off-by: Stacey Son 
Signed-off-by: Warner Losh 
Reviewed-by: Richard Henderson 
---
 bsd-user/elfload.c | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/bsd-user/elfload.c b/bsd-user/elfload.c
index 6edceb3ea6..ae62f3aab3 100644
--- a/bsd-user/elfload.c
+++ b/bsd-user/elfload.c
@@ -1,4 +1,21 @@
-/* This is the Linux kernel elf-loading code, ported into user space */
+/*
+ *  ELF loading code
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
 
 #include "qemu/osdep.h"
 
-- 
2.32.0




Re: [PATCH v3 2/3] hw/usb/hcd-xhci-pci: Abort if setting link property failed

2021-08-26 Thread Mark Cave-Ayland

On 26/08/2021 21:07, Philippe Mathieu-Daudé wrote:


Do not ignore eventual error if we failed at setting the 'host'
property of the TYPE_XHCI model.

Signed-off-by: Philippe Mathieu-Daudé 
---
  hw/usb/hcd-xhci-pci.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c
index e934b1a5b1f..24c528d210f 100644
--- a/hw/usb/hcd-xhci-pci.c
+++ b/hw/usb/hcd-xhci-pci.c
@@ -115,7 +115,7 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, 
Error **errp)
  dev->config[PCI_CACHE_LINE_SIZE] = 0x10;
  dev->config[0x60] = 0x30; /* release number */
  
-object_property_set_link(OBJECT(>xhci), "host", OBJECT(s), NULL);

+object_property_set_link(OBJECT(>xhci), "host", OBJECT(s), 
_abort);
  s->xhci.intr_update = xhci_pci_intr_update;
  s->xhci.intr_raise = xhci_pci_intr_raise;
  if (!qdev_realize(DEVICE(>xhci), NULL, errp)) {


Reviewed-by: Mark Cave-Ayland 


ATB,

Mark.



Re: [PATCH v3 1/3] hw/pci: Introduce pci_dma_memory_region() helper

2021-08-26 Thread Mark Cave-Ayland

On 26/08/2021 21:07, Philippe Mathieu-Daudé wrote:


pci_get_address_space() returns an AddressSpace. In some
cases we want a MemoryRegion. Add the pci_dma_memory_region()
equivalent helper.

Signed-off-by: Philippe Mathieu-Daudé 
---
  include/hw/pci/pci.h | 5 +
  1 file changed, 5 insertions(+)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index d0f4266e372..5860f42e400 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -786,6 +786,11 @@ static inline AddressSpace 
*pci_get_address_space(PCIDevice *dev)
  return >bus_master_as;
  }
  
+static inline MemoryRegion *pci_dma_memory_region(PCIDevice *dev)

+{
+return >bus_master_container_region;
+}
+
  /**
   * pci_dma_rw: Read from or write to an address space from PCI device.
   *


This patch doesn't quite feel right: there are already a number of devices that have 
been using pci_get_address_space() for some time without requiring access to the 
underlying MemoryRegion. So then the first question I ask myself when I see this 
patch is why do we suddenly need it now?



ATB,

Mark.



[PATCH v3 2/3] hw/usb/hcd-xhci-pci: Abort if setting link property failed

2021-08-26 Thread Philippe Mathieu-Daudé
Do not ignore eventual error if we failed at setting the 'host'
property of the TYPE_XHCI model.

Signed-off-by: Philippe Mathieu-Daudé 
---
 hw/usb/hcd-xhci-pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c
index e934b1a5b1f..24c528d210f 100644
--- a/hw/usb/hcd-xhci-pci.c
+++ b/hw/usb/hcd-xhci-pci.c
@@ -115,7 +115,7 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, 
Error **errp)
 dev->config[PCI_CACHE_LINE_SIZE] = 0x10;
 dev->config[0x60] = 0x30; /* release number */
 
-object_property_set_link(OBJECT(>xhci), "host", OBJECT(s), NULL);
+object_property_set_link(OBJECT(>xhci), "host", OBJECT(s), 
_abort);
 s->xhci.intr_update = xhci_pci_intr_update;
 s->xhci.intr_raise = xhci_pci_intr_raise;
 if (!qdev_realize(DEVICE(>xhci), NULL, errp)) {
-- 
2.31.1




[PATCH v3 3/3] hw/usb/xhci: Always expect 'dma' link property to be set

2021-08-26 Thread Philippe Mathieu-Daudé
Simplify by always passing a MemoryRegion property to the device.
Doing so we can move the AddressSpace field to the device struct,
removing need for heap allocation.

Update the MicroVM machine to pass the default system memory instead
of a NULL value.

We don't need to change the Versal machine, as the link property is
initialize as "versal.dwc3_alias" MemoryRegion alias.

Suggested-by: Peter Maydell 
Signed-off-by: Philippe Mathieu-Daudé 
---
Versal untested
---
 hw/usb/hcd-xhci.h|  2 +-
 hw/i386/microvm.c|  2 ++
 hw/usb/hcd-xhci-pci.c|  3 ++-
 hw/usb/hcd-xhci-sysbus.c | 13 ++---
 hw/usb/hcd-xhci.c| 20 ++--
 5 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/hw/usb/hcd-xhci.h b/hw/usb/hcd-xhci.h
index 98f598382ad..ea76ec4f277 100644
--- a/hw/usb/hcd-xhci.h
+++ b/hw/usb/hcd-xhci.h
@@ -180,7 +180,7 @@ typedef struct XHCIState {
 USBBus bus;
 MemoryRegion mem;
 MemoryRegion *dma_mr;
-AddressSpace *as;
+AddressSpace as;
 MemoryRegion mem_cap;
 MemoryRegion mem_oper;
 MemoryRegion mem_runtime;
diff --git a/hw/i386/microvm.c b/hw/i386/microvm.c
index aba0c832190..2d55114a676 100644
--- a/hw/i386/microvm.c
+++ b/hw/i386/microvm.c
@@ -219,6 +219,8 @@ static void microvm_devices_init(MicrovmMachineState *mms)
 qdev_prop_set_uint32(dev, "slots", XHCI_MAXSLOTS);
 qdev_prop_set_uint32(dev, "p2", 8);
 qdev_prop_set_uint32(dev, "p3", 8);
+object_property_set_link(OBJECT(dev), "dma",
+ OBJECT(get_system_memory()), _abort);
 sysbus_realize(SYS_BUS_DEVICE(dev), _fatal);
 sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, MICROVM_XHCI_BASE);
 sysbus_connect_irq(SYS_BUS_DEVICE(dev), 0,
diff --git a/hw/usb/hcd-xhci-pci.c b/hw/usb/hcd-xhci-pci.c
index 24c528d210f..10f5cc374fe 100644
--- a/hw/usb/hcd-xhci-pci.c
+++ b/hw/usb/hcd-xhci-pci.c
@@ -116,6 +116,8 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, 
Error **errp)
 dev->config[0x60] = 0x30; /* release number */
 
 object_property_set_link(OBJECT(>xhci), "host", OBJECT(s), 
_abort);
+object_property_set_link(OBJECT(dev), "dma",
+ OBJECT(pci_dma_memory_region(dev)), _abort);
 s->xhci.intr_update = xhci_pci_intr_update;
 s->xhci.intr_raise = xhci_pci_intr_raise;
 if (!qdev_realize(DEVICE(>xhci), NULL, errp)) {
@@ -161,7 +163,6 @@ static void usb_xhci_pci_realize(struct PCIDevice *dev, 
Error **errp)
   >xhci.mem, 0, OFF_MSIX_PBA,
   0x90, NULL);
 }
-s->xhci.as = pci_get_address_space(dev);
 }
 
 static void usb_xhci_pci_exit(PCIDevice *dev)
diff --git a/hw/usb/hcd-xhci-sysbus.c b/hw/usb/hcd-xhci-sysbus.c
index a14e4381960..f212ce785bd 100644
--- a/hw/usb/hcd-xhci-sysbus.c
+++ b/hw/usb/hcd-xhci-sysbus.c
@@ -36,6 +36,11 @@ static void xhci_sysbus_realize(DeviceState *dev, Error 
**errp)
 {
 XHCISysbusState *s = XHCI_SYSBUS(dev);
 
+if (!s->xhci.dma_mr) {
+error_setg(errp, TYPE_XHCI_SYSBUS " 'dma' link not set");
+return;
+}
+
 object_property_set_link(OBJECT(>xhci), "host", OBJECT(s), NULL);
 if (!qdev_realize(DEVICE(>xhci), NULL, errp)) {
 return;
@@ -43,13 +48,7 @@ static void xhci_sysbus_realize(DeviceState *dev, Error 
**errp)
 s->irq = g_new0(qemu_irq, s->xhci.numintrs);
 qdev_init_gpio_out_named(dev, s->irq, SYSBUS_DEVICE_GPIO_IRQ,
  s->xhci.numintrs);
-if (s->xhci.dma_mr) {
-s->xhci.as =  g_malloc0(sizeof(AddressSpace));
-address_space_init(s->xhci.as, s->xhci.dma_mr, NULL);
-} else {
-s->xhci.as = _space_memory;
-}
-
+address_space_init(>xhci.as, s->xhci.dma_mr, "usb-xhci-dma");
 sysbus_init_mmio(SYS_BUS_DEVICE(dev), >xhci.mem);
 }
 
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index e01700039b1..011f1233ef3 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -487,7 +487,7 @@ static inline void xhci_dma_read_u32s(XHCIState *xhci, 
dma_addr_t addr,
 
 assert((len % sizeof(uint32_t)) == 0);
 
-dma_memory_read(xhci->as, addr, buf, len);
+dma_memory_read(>as, addr, buf, len);
 
 for (i = 0; i < (len / sizeof(uint32_t)); i++) {
 buf[i] = le32_to_cpu(buf[i]);
@@ -507,7 +507,7 @@ static inline void xhci_dma_write_u32s(XHCIState *xhci, 
dma_addr_t addr,
 for (i = 0; i < n; i++) {
 tmp[i] = cpu_to_le32(buf[i]);
 }
-dma_memory_write(xhci->as, addr, tmp, len);
+dma_memory_write(>as, addr, tmp, len);
 }
 
 static XHCIPort *xhci_lookup_port(XHCIState *xhci, struct USBPort *uport)
@@ -618,7 +618,7 @@ static void xhci_write_event(XHCIState *xhci, XHCIEvent 
*event, int v)
ev_trb.status, ev_trb.control);
 
 addr = intr->er_start + TRB_SIZE*intr->er_ep_idx;
-dma_memory_write(xhci->as, addr, _trb, TRB_SIZE);
+dma_memory_write(>as, addr, _trb, TRB_SIZE);
 
 intr->er_ep_idx++;
 if 

[PATCH v3 0/3] hw/usb: Always expect 'dma' link property to be set to simplify

2021-08-26 Thread Philippe Mathieu-Daudé
Simplify the XHCI based devices by always passing a MemoryRegion
property to the device.  Doing so we can move the AddressSpace
field to the device struct, removing need for heap allocation.

Since v2:
- Use _abort (Eduardo)

Philippe Mathieu-Daudé (3):
  hw/pci: Introduce pci_dma_memory_region() helper
  hw/usb/hcd-xhci-pci: Abort if setting link property failed
  hw/usb/xhci: Always expect 'dma' link property to be set

 hw/usb/hcd-xhci.h|  2 +-
 include/hw/pci/pci.h |  5 +
 hw/i386/microvm.c|  2 ++
 hw/usb/hcd-xhci-pci.c|  5 +++--
 hw/usb/hcd-xhci-sysbus.c | 13 ++---
 hw/usb/hcd-xhci.c| 20 ++--
 6 files changed, 27 insertions(+), 20 deletions(-)

-- 
2.31.1





[PATCH v3 1/3] hw/pci: Introduce pci_dma_memory_region() helper

2021-08-26 Thread Philippe Mathieu-Daudé
pci_get_address_space() returns an AddressSpace. In some
cases we want a MemoryRegion. Add the pci_dma_memory_region()
equivalent helper.

Signed-off-by: Philippe Mathieu-Daudé 
---
 include/hw/pci/pci.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index d0f4266e372..5860f42e400 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -786,6 +786,11 @@ static inline AddressSpace 
*pci_get_address_space(PCIDevice *dev)
 return >bus_master_as;
 }
 
+static inline MemoryRegion *pci_dma_memory_region(PCIDevice *dev)
+{
+return >bus_master_container_region;
+}
+
 /**
  * pci_dma_rw: Read from or write to an address space from PCI device.
  *
-- 
2.31.1




[PATCH v2 09/11] util/vfio-helpers: Simplify qemu_vfio_dma_map() returning directly

2021-08-26 Thread Philippe Mathieu-Daudé
To simplify qemu_vfio_dma_map():
- reduce 'ret' (returned value) scope by returning errno directly,
- remove the goto 'out' label.

Signed-off-by: Philippe Mathieu-Daudé 
---
 util/vfio-helpers.c | 23 ++-
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
index 7de5081dbd3..48b4384e8c8 100644
--- a/util/vfio-helpers.c
+++ b/util/vfio-helpers.c
@@ -750,7 +750,6 @@ static bool qemu_vfio_water_mark_reached(QEMUVFIOState *s, 
size_t size,
 int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t size,
   bool temporary, uint64_t *iova, Error **errp)
 {
-int ret = 0;
 int index;
 IOVAMapping *mapping;
 uint64_t iova0;
@@ -763,32 +762,31 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, 
size_t size,
 if (mapping) {
 iova0 = mapping->iova + ((uint8_t *)host - (uint8_t *)mapping->host);
 } else {
+int ret;
+
 if (qemu_vfio_water_mark_reached(s, size, errp)) {
-ret = -ENOMEM;
-goto out;
+return -ENOMEM;
 }
 if (!temporary) {
 if (qemu_vfio_find_fixed_iova(s, size, , errp) < 0) {
-ret = -ENOMEM;
-goto out;
+return -ENOMEM;
 }
 
 mapping = qemu_vfio_add_mapping(s, host, size, index + 1, iova0);
 assert(qemu_vfio_verify_mappings(s));
 ret = qemu_vfio_do_mapping(s, host, size, iova0);
-if (ret) {
+if (ret < 0) {
 qemu_vfio_undo_mapping(s, mapping, NULL);
-goto out;
+return ret;
 }
 qemu_vfio_dump_mappings(s);
 } else {
 if (qemu_vfio_find_temp_iova(s, size, , errp)) {
-ret = -ENOMEM;
-goto out;
+return -ENOMEM;
 }
 ret = qemu_vfio_do_mapping(s, host, size, iova0);
-if (ret) {
-goto out;
+if (ret < 0) {
+return ret;
 }
 }
 }
@@ -796,8 +794,7 @@ int qemu_vfio_dma_map(QEMUVFIOState *s, void *host, size_t 
size,
 if (iova) {
 *iova = iova0;
 }
-out:
-return ret;
+return 0;
 }
 
 /* Reset the high watermark and free all "temporary" mappings. */
-- 
2.31.1




  1   2   3   4   >