date:20230420

Re: [PATCH] vhost-user: send SET_STATUS 0 after GET_VRING_BASE

2023-04-20 Thread Michael S. Tsirkin

On Thu, Apr 20, 2023 at 09:07:06AM -0400, Stefan Hajnoczi wrote:
> Setting the VIRTIO Device Status Field to 0 resets the device. The
> device's state is lost, including the vring configuration.
> 
> vhost-user.c currently sends SET_STATUS 0 before GET_VRING_BASE. This
> risks confusion about the lifetime of the vhost-user state (e.g. vring
> last_avail_idx) across VIRTIO device reset.
> 
> Eugenio Pérez  adjusted the order for vhost-vdpa.c
> in commit c3716f260bff ("vdpa: move vhost reset after get vring base")
> and in that commit description suggested doing the same for vhost-user
> in the future.
> 
> Go ahead and adjust vhost-user.c now. I ran various online code searches
> to identify vhost-user backends implementing SET_STATUS. It seems only
> DPDK implements SET_STATUS and Yajun Wu  has
> confirmed that it is safe to make this change.


Fixes tag?

> Cc: Michael S. Tsirkin 
> Cc: Cindy Lu 
> Signed-off-by: Stefan Hajnoczi 
> ---
>  hw/virtio/vhost-user.c | 13 -
>  1 file changed, 12 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index e5285df4ba..2d40b1b3e7 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -2677,10 +2677,20 @@ static int vhost_user_dev_start(struct vhost_dev 
> *dev, bool started)
>VIRTIO_CONFIG_S_DRIVER |
>VIRTIO_CONFIG_S_DRIVER_OK);
>  } else {
> -return vhost_user_set_status(dev, 0);
> +return 0;
>  }
>  }
>  
> +static void vhost_user_reset_status(struct vhost_dev *dev)
> +{
> +/* Set device status only for last queue pair */
> +if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
> +return;
> +}
> +
> +vhost_user_set_status(dev, 0);
> +}
> +
>  const VhostOps user_ops = {
>  .backend_type = VHOST_BACKEND_TYPE_USER,
>  .vhost_backend_init = vhost_user_backend_init,
> @@ -2716,4 +2726,5 @@ const VhostOps user_ops = {
>  .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
>  .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
>  .vhost_dev_start = vhost_user_dev_start,
> +.vhost_reset_status = vhost_user_reset_status,
>  };
> -- 
> 2.39.2

Re: [PATCH] vhost-user: send SET_STATUS 0 after GET_VRING_BASE

2023-04-20 Thread Yajun Wu




On 4/20/2023 9:07 PM, Stefan Hajnoczi wrote:


Setting the VIRTIO Device Status Field to 0 resets the device. The
device's state is lost, including the vring configuration.

vhost-user.c currently sends SET_STATUS 0 before GET_VRING_BASE. This
risks confusion about the lifetime of the vhost-user state (e.g. vring
last_avail_idx) across VIRTIO device reset.

Eugenio Pérez  adjusted the order for vhost-vdpa.c
in commit c3716f260bff ("vdpa: move vhost reset after get vring base")
and in that commit description suggested doing the same for vhost-user
in the future.

Go ahead and adjust vhost-user.c now. I ran various online code searches
to identify vhost-user backends implementing SET_STATUS. It seems only
DPDK implements SET_STATUS and Yajun Wu  has
confirmed that it is safe to make this change.

Cc: Michael S. Tsirkin 
Cc: Cindy Lu 
Signed-off-by: Stefan Hajnoczi 
---
  hw/virtio/vhost-user.c | 13 -
  1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index e5285df4ba..2d40b1b3e7 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2677,10 +2677,20 @@ static int vhost_user_dev_start(struct vhost_dev *dev, 
bool started)
VIRTIO_CONFIG_S_DRIVER |
VIRTIO_CONFIG_S_DRIVER_OK);
  } else {
-return vhost_user_set_status(dev, 0);
+return 0;
  }
  }

+static void vhost_user_reset_status(struct vhost_dev *dev)
+{
+/* Set device status only for last queue pair */
+if (dev->vq_index + dev->nvqs != dev->vq_index_end) {
+return;
+}
+
+vhost_user_set_status(dev, 0);
+}
+
  const VhostOps user_ops = {
  .backend_type = VHOST_BACKEND_TYPE_USER,
  .vhost_backend_init = vhost_user_backend_init,
@@ -2716,4 +2726,5 @@ const VhostOps user_ops = {
  .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
  .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
  .vhost_dev_start = vhost_user_dev_start,
+.vhost_reset_status = vhost_user_reset_status,
  };
--
2.39.2


Thank you for this fix.

Can you add protocol feature bit check, just like we do in 
vhost_user_dev_start?


    if (!virtio_has_feature(dev->protocol_features,
    VHOST_USER_PROTOCOL_F_STATUS)) {
    return 0;
    }

[PATCH 11/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Warner Losh 

added the bind(2), connect(2), accept(2), getpeername(2) syscalls to
freebsd_syscall function defined in bsd-user/freebsd/os-syscall.c
---
 bsd-user/freebsd/os-syscall.c | 21 +
 1 file changed, 21 insertions(+)

diff --git a/bsd-user/freebsd/os-syscall.c b/bsd-user/freebsd/os-syscall.c
index c8f998ecec..7f29196a05 100644
--- a/bsd-user/freebsd/os-syscall.c
+++ b/bsd-user/freebsd/os-syscall.c
@@ -44,6 +44,8 @@
 #include "signal-common.h"
 #include "user/syscall-trace.h"
 
+/* BSD independent syscall shims */
+#include "bsd-socket.h"
 #include "bsd-file.h"
 #include "bsd-proc.h"
 
@@ -508,6 +510,25 @@ static abi_long freebsd_syscall(void *cpu_env, int num, 
abi_long arg1,
 ret = do_freebsd_sysarch(cpu_env, arg1, arg2);
 break;
 
+/*
+ * socket related system calls
+ */
+case TARGET_FREEBSD_NR_accept: /* accept(2) */
+ret = do_bsd_accept(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_bind: /* bind(2) */
+ret = do_bsd_bind(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_connect: /* connect(2) */
+ret = do_bsd_connect(arg1, arg2, arg3);
+break;
+
+case TARGET_FREEBSD_NR_getpeername: /* getpeername(2) */
+ret = do_bsd_getpeername(arg1, arg2, arg3);
+break;
+
 default:
 qemu_log_mask(LOG_UNIMP, "Unsupported syscall: %d\n", num);
 ret = -TARGET_ENOSYS;
-- 
2.40.0

[PATCH 07/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Stacey Son 

added bsd-user/bsd-socket.h, which contains the implementation of the
socket related system call shims.
---
 bsd-user/bsd-socket.h | 61 +++
 1 file changed, 61 insertions(+)
 create mode 100644 bsd-user/bsd-socket.h

diff --git a/bsd-user/bsd-socket.h b/bsd-user/bsd-socket.h
new file mode 100644
index 00..7da4cf11a0
--- /dev/null
+++ b/bsd-user/bsd-socket.h
@@ -0,0 +1,61 @@
+/*
+ *  socket related system call shims
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef BSD_SOCKET_H
+#define BSD_SOCKET_H
+
+#include 
+#include 
+#include 
+#include 
+
+#include "qemu-bsd.h"
+
+ssize_t safe_recvfrom(int s, void *buf, size_t len, int flags,
+struct sockaddr *restrict from, socklen_t *restrict fromlen);
+ssize_t safe_sendto(int s, const void *buf, size_t len, int flags,
+const struct sockaddr *to, socklen_t tolen);
+int safe_select(int nfds, fd_set *readfs, fd_set *writefds, fd_set *exceptfds,
+struct timeval *timeout);
+int safe_pselect(int nfds, fd_set *restrict readfds,
+fd_set *restrict writefds, fd_set *restrict exceptfds,
+const struct timespec *restrict timeout,
+const sigset_t *restrict newsigmask);
+
+/* bind(2) */
+static inline abi_long do_bsd_bind(int sockfd, abi_ulong target_addr,
+   socklen_t addrlen)
+{
+abi_long ret;
+void *addr;
+
+if ((int)addrlen < 0) {
+return -TARGET_EINVAL;
+}
+
+addr = alloca(addrlen + 1);
+ret = target_to_host_sockaddr(addr, target_addr, addrlen);
+if (is_error(ret)) {
+return ret;
+}
+
+return get_errno(bind(sockfd, addr, addrlen));
+}
+
+#endif /* BSD_SOCKET_H */
-- 
2.40.0

[PATCH 06/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Karim Taha 

add bsd-user/bsd-socket.c to meson.build
---
 bsd-user/meson.build | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bsd-user/meson.build b/bsd-user/meson.build
index 5243122fc5..f648bd3554 100644
--- a/bsd-user/meson.build
+++ b/bsd-user/meson.build
@@ -7,6 +7,7 @@ bsd_user_ss = ss.source_set()
 common_user_inc += include_directories('include')
 
 bsd_user_ss.add(files(
+  'bsd-socket.c',
   'bsdload.c',
   'elfload.c',
   'main.c',
-- 
2.40.0

[PATCH 09/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Stacey Son 

added the accept(2) syscall to bsd-user/bsd-socket.h.
---
 bsd-user/bsd-socket.h | 33 +
 1 file changed, 33 insertions(+)

diff --git a/bsd-user/bsd-socket.h b/bsd-user/bsd-socket.h
index f191f22d63..f748266730 100644
--- a/bsd-user/bsd-socket.h
+++ b/bsd-user/bsd-socket.h
@@ -79,4 +79,37 @@ static inline abi_long do_bsd_connect(int sockfd, abi_ulong 
target_addr,
 return get_errno(connect(sockfd, addr, addrlen));
 }
 
+/* accept(2) */
+static inline abi_long do_bsd_accept(int fd, abi_ulong target_addr,
+ abi_ulong target_addrlen_addr)
+{
+socklen_t addrlen;
+void *addr;
+abi_long ret;
+
+if (target_addr == 0) {
+return get_errno(accept(fd, NULL, NULL));
+}
+/* return EINVAL if addrlen pointer is invalid */
+if (get_user_u32(addrlen, target_addrlen_addr)) {
+return -TARGET_EINVAL;
+}
+if ((int)addrlen < 0) {
+return -TARGET_EINVAL;
+}
+if (!access_ok(VERIFY_WRITE, target_addr, addrlen)) {
+return -TARGET_EINVAL;
+}
+addr = alloca(addrlen);
+
+ret = get_errno(accept(fd, addr, ));
+if (!is_error(ret)) {
+host_to_target_sockaddr(target_addr, addr, addrlen);
+if (put_user_u32(addrlen, target_addrlen_addr)) {
+ret = -TARGET_EFAULT;
+}
+}
+return ret;
+}
+
 #endif /* BSD_SOCKET_H */
-- 
2.40.0

[PATCH 10/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Stacey Son 

added the getpeername(2) syscall to bsd-user/bsd-socket.h.
---
 bsd-user/bsd-socket.h | 28 
 1 file changed, 28 insertions(+)

diff --git a/bsd-user/bsd-socket.h b/bsd-user/bsd-socket.h
index f748266730..16fae3752a 100644
--- a/bsd-user/bsd-socket.h
+++ b/bsd-user/bsd-socket.h
@@ -112,4 +112,32 @@ static inline abi_long do_bsd_accept(int fd, abi_ulong 
target_addr,
 return ret;
 }
 
+/* getpeername(2) */
+static inline abi_long do_bsd_getpeername(int fd, abi_ulong target_addr,
+  abi_ulong target_addrlen_addr)
+{
+socklen_t addrlen;
+void *addr;
+abi_long ret;
+
+if (get_user_u32(addrlen, target_addrlen_addr)) {
+return -TARGET_EFAULT;
+}
+if ((int)addrlen < 0) {
+return -TARGET_EINVAL;
+}
+if (!access_ok(VERIFY_WRITE, target_addr, addrlen)) {
+return -TARGET_EFAULT;
+}
+addr = alloca(addrlen);
+ret = get_errno(getpeername(fd, addr, ));
+if (!is_error(ret)) {
+host_to_target_sockaddr(target_addr, addr, addrlen);
+if (put_user_u32(addrlen, target_addrlen_addr)) {
+ret = -TARGET_EFAULT;
+}
+}
+return ret;
+}
+
 #endif /* BSD_SOCKET_H */
-- 
2.40.0

[PATCH 02/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Stacey Son 

add the relevant definitions of struct target_sockaddr and struct
target_ip_mreq and the related flags, to be used in
bsd-user/bsd-socket.c for the socket conversion functions: 
target_to_host_sockaddr, host_to_target_sockaddr, target_to_host_ip_mreq
---
 bsd-user/syscall_defs.h | 110 
 1 file changed, 110 insertions(+)

diff --git a/bsd-user/syscall_defs.h b/bsd-user/syscall_defs.h
index b6d113d24a..f041245792 100644
--- a/bsd-user/syscall_defs.h
+++ b/bsd-user/syscall_defs.h
@@ -179,6 +179,116 @@ struct target_freebsd__wrusage {
 struct target_freebsd_rusage wru_children;
 };
 
+/*
+ * sys/socket.h
+ */
+
+/*
+ * Types
+ */
+#define TARGET_SOCK_STREAM  1   /* stream socket */
+#define TARGET_SOCK_DGRAM   2   /* datagram socket */
+#define TARGET_SOCK_RAW 3   /* raw-protocol interface */
+#define TARGET_SOCK_RDM 4   /* reliably-delivered message */
+#define TARGET_SOCK_SEQPACKET   5   /* sequenced packet stream */
+
+
+/*
+ * Option flags per-socket.
+ */
+
+#define TARGET_SO_DEBUG 0x0001  /* turn on debugging info recording */
+#define TARGET_SO_ACCEPTCONN0x0002  /* socket has had listen() */
+#define TARGET_SO_REUSEADDR 0x0004  /* allow local address reuse */
+#define TARGET_SO_KEEPALIVE 0x0008  /* keep connections alive */
+#define TARGET_SO_DONTROUTE 0x0010  /* just use interface addresses */
+#define TARGET_SO_BROADCAST 0x0020  /* permit sending of broadcast msgs */
+#define TARGET_SO_USELOOPBACK   0x0040  /* bypass hardware when possible */
+#define TARGET_SO_LINGER0x0080  /* linger on close if data present */
+#define TARGET_SO_OOBINLINE 0x0100  /* leave received OOB data in line */
+#define TARGET_SO_REUSEPORT 0x0200  /* allow local address & port reuse */
+#define TARGET_SO_TIMESTAMP 0x0400  /* timestamp received dgram traffic */
+#define TARGET_SO_NOSIGPIPE 0x0800  /* no SIGPIPE from EPIPE */
+#define TARGET_SO_ACCEPTFILTER  0x1000  /* there is an accept filter */
+#define TARGET_SO_BINTIME   0x2000  /* timestamp received dgram traffic */
+#define TARGET_SO_NO_OFFLOAD0x4000  /* socket cannot be offloaded */
+#define TARGET_SO_NO_DDP0x8000  /* disable direct data placement */
+
+/*
+ * Additional options, not kept in so_options.
+ */
+#define TARGET_SO_SNDBUF0x1001  /* send buffer size */
+#define TARGET_SO_RCVBUF0x1002  /* receive buffer size */
+#define TARGET_SO_SNDLOWAT  0x1003  /* send low-water mark */
+#define TARGET_SO_RCVLOWAT  0x1004  /* receive low-water mark */
+#define TARGET_SO_SNDTIMEO  0x1005  /* send timeout */
+#define TARGET_SO_RCVTIMEO  0x1006  /* receive timeout */
+#define TARGET_SO_ERROR 0x1007  /* get error status and clear */
+#define TARGET_SO_TYPE  0x1008  /* get socket type */
+#define TARGET_SO_LABEL 0x1009  /* socket's MAC label */
+#define TARGET_SO_PEERLABEL 0x1010  /* socket's peer's MAC label */
+#define TARGET_SO_LISTENQLIMIT  0x1011  /* socket's backlog limit */
+#define TARGET_SO_LISTENQLEN0x1012  /* socket's complete queue length */
+#define TARGET_SO_LISTENINCQLEN 0x1013  /* socket's incomplete queue length */
+#define TARGET_SO_SETFIB0x1014  /* use this FIB to route */
+#define TARGET_SO_USER_COOKIE   0x1015  /* user cookie (dummynet etc.) */
+#define TARGET_SO_PROTOCOL  0x1016  /* get socket protocol (Linux name) */
+
+/* alias for SO_PROTOCOL (SunOS name) */
+#define TARGET_SO_PROTOTYPE TARGET_SO_PROTOCOL
+
+/*
+ * Level number for (get/set)sockopt() to apply to socket itself.
+ */
+#define TARGET_SOL_SOCKET   0x  /* options for socket level */
+
+#ifndef CMSG_ALIGN
+#define CMSG_ALIGN(len) (((len) + sizeof(long) - 1) & ~(sizeof(long) - 1))
+#endif
+
+/*
+ * sys/socket.h
+ */
+struct target_msghdr {
+abi_longmsg_name;   /* Socket name */
+int32_t msg_namelen;/* Length of name */
+abi_longmsg_iov;/* Data blocks */
+int32_t msg_iovlen; /* Number of blocks */
+abi_longmsg_control;/* Per protocol magic (eg BSD fd passing) */
+int32_t msg_controllen; /* Length of cmsg list */
+int32_t msg_flags;  /* flags on received message */
+};
+
+struct target_sockaddr {
+uint8_t sa_len;
+uint8_t sa_family;
+uint8_t sa_data[14];
+} QEMU_PACKED;
+
+struct target_in_addr {
+uint32_t s_addr; /* big endian */
+};
+
+struct target_cmsghdr {
+uint32_tcmsg_len;
+int32_t cmsg_level;
+int32_t cmsg_type;
+};
+
+/*
+ * netinet/in.h
+ */
+struct target_ip_mreq {
+struct target_in_addr   imr_multiaddr;
+struct target_in_addr   imr_interface;
+};
+
+struct target_ip_mreqn {
+struct target_in_addr   imr_multiaddr;
+struct target_in_addr   imr_address;
+int32_t imr_ifindex;
+};
+
 #define safe_syscall0(type, name) \
 type safe_##name(void) \
 { \
-- 
2.40.0

[PATCH 05/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Stacey Son 

add bsd-user/bsd-socket.c, which contains the actual definitions of the
socket conversion functions.
---
 bsd-user/bsd-socket.c | 108 ++
 1 file changed, 108 insertions(+)
 create mode 100644 bsd-user/bsd-socket.c

diff --git a/bsd-user/bsd-socket.c b/bsd-user/bsd-socket.c
new file mode 100644
index 00..8a5e4d
--- /dev/null
+++ b/bsd-user/bsd-socket.c
@@ -0,0 +1,108 @@
+/*
+ *  BSD socket system call related helpers
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+#include "qemu/osdep.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include "qemu.h"
+#include "qemu-bsd.h"
+
+/*
+ * socket conversion
+ */
+abi_long target_to_host_sockaddr(struct sockaddr *addr, abi_ulong target_addr,
+ socklen_t len)
+{
+const socklen_t unix_maxlen = sizeof(struct sockaddr_un);
+sa_family_t sa_family;
+struct target_sockaddr *target_saddr;
+
+target_saddr = lock_user(VERIFY_READ, target_addr, len, 1);
+if (target_saddr == 0) {
+return -TARGET_EFAULT;
+}
+
+sa_family = target_saddr->sa_family;
+
+/*
+ * Oops. The caller might send a incomplete sun_path; sun_path
+ * must be terminated by \0 (see the manual page), but unfortunately
+ * it is quite common to specify sockaddr_un length as
+ * "strlen(x->sun_path)" while it should be "strlen(...) + 1". We will
+ * fix that here if needed.
+ */
+if (target_saddr->sa_family == AF_UNIX) {
+if (len < unix_maxlen && len > 0) {
+char *cp = (char *)target_saddr;
+
+if (cp[len - 1] && !cp[len]) {
+len++;
+}
+}
+if (len > unix_maxlen) {
+len = unix_maxlen;
+}
+}
+
+memcpy(addr, target_saddr, len);
+addr->sa_family = sa_family;/* type uint8_t */
+addr->sa_len = target_saddr->sa_len;/* type uint8_t */
+unlock_user(target_saddr, target_addr, 0);
+
+return 0;
+}
+
+abi_long host_to_target_sockaddr(abi_ulong target_addr, struct sockaddr *addr,
+ socklen_t len)
+{
+struct target_sockaddr *target_saddr;
+
+target_saddr = lock_user(VERIFY_WRITE, target_addr, len, 0);
+if (target_saddr == 0) {
+return -TARGET_EFAULT;
+}
+memcpy(target_saddr, addr, len);
+target_saddr->sa_family = addr->sa_family;  /* type uint8_t */
+target_saddr->sa_len = addr->sa_len;/* type uint8_t */
+unlock_user(target_saddr, target_addr, len);
+
+return 0;
+}
+
+abi_long target_to_host_ip_mreq(struct ip_mreqn *mreqn, abi_ulong target_addr,
+socklen_t len)
+{
+struct target_ip_mreqn *target_smreqn;
+
+target_smreqn = lock_user(VERIFY_READ, target_addr, len, 1);
+if (target_smreqn == 0) {
+return -TARGET_EFAULT;
+}
+mreqn->imr_multiaddr.s_addr = target_smreqn->imr_multiaddr.s_addr;
+mreqn->imr_address.s_addr = target_smreqn->imr_address.s_addr;
+if (len == sizeof(struct target_ip_mreqn)) {
+mreqn->imr_ifindex = tswapal(target_smreqn->imr_ifindex);
+}
+unlock_user(target_smreqn, target_addr, 0);
+
+return 0;
+}
-- 
2.40.0

[PATCH 04/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Stacey Son 

added bsd-user/qemu-bsd.h, required by bsd-user/bsd-socket.h, contains
forward declarations of the socket conversion functions defined in 
bsd-user/bsd-socket.c.
---
 bsd-user/qemu-bsd.h | 36 
 1 file changed, 36 insertions(+)
 create mode 100644 bsd-user/qemu-bsd.h

diff --git a/bsd-user/qemu-bsd.h b/bsd-user/qemu-bsd.h
new file mode 100644
index 00..a052688596
--- /dev/null
+++ b/bsd-user/qemu-bsd.h
@@ -0,0 +1,36 @@
+/*
+ *  BSD conversion extern declarations
+ *
+ *  Copyright (c) 2013 Stacey D. Son
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see .
+ */
+
+#ifndef QEMU_BSD_H
+#define QEMU_BSD_H
+
+#include 
+#include 
+#include 
+#include 
+
+/* bsd-socket.c */
+abi_long target_to_host_sockaddr(struct sockaddr *addr, abi_ulong target_addr,
+socklen_t len);
+abi_long host_to_target_sockaddr(abi_ulong target_addr, struct sockaddr *addr,
+socklen_t len);
+abi_long target_to_host_ip_mreq(struct ip_mreqn *mreqn, abi_ulong target_addr,
+socklen_t len);
+
+#endif /* QEMU_BSD_H */
-- 
2.40.0

[PATCH 01/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Warner Losh 

Allow guest_base to be initialized on 64-bit hosts, the initial value is used 
by g2h_untagged function defined in include/exec/cpu_ldst.h
---
 bsd-user/main.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index babc3b009b..afdc1b5f3c 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -50,8 +50,22 @@
 #include "target_arch_cpu.h"
 
 int singlestep;
-uintptr_t guest_base;
+
+/*
+ * Going hand in hand with the va space needed (see below), we need
+ * to find a host address to map the guest to. Assume that qemu
+ * itself doesn't need memory above 32GB (or that we don't collide
+ * with anything interesting). This is selected rather arbitrarily,
+ * but seems to produce good results in tests to date.
+ */
+# if HOST_LONG_BITS >= 64
+uintptr_t guest_base = 0x8ul;/* at 32GB */
+bool have_guest_base = true;
+#else
+uintptr_t guest_base;/* TODO: use sysctl to find big enough hole */
 bool have_guest_base;
+#endif
+
 /*
  * When running 32-on-64 we should make sure we can fit all of the possible
  * guest address space into a contiguous chunk of virtual host memory.
-- 
2.40.0

[PATCH 03/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Sean Bruno 

Co-authored-by: Kyle Evans 
---
 bsd-user/syscall_defs.h | 38 ++
 1 file changed, 38 insertions(+)

diff --git a/bsd-user/syscall_defs.h b/bsd-user/syscall_defs.h
index f041245792..b594fafecc 100644
--- a/bsd-user/syscall_defs.h
+++ b/bsd-user/syscall_defs.h
@@ -275,6 +275,44 @@ struct target_cmsghdr {
 int32_t cmsg_type;
 };
 
+/*
+ * mips32 is the exception to the general rule of long-alignment; it
+ * unconditionally uses 64-bit alignment instead.
+ */
+#if defined(TARGET_MIPS) && TARGET_ABI_BITS == 32
+#define TARGET_ALIGNBYTES   (sizeof(abi_llong) - 1)
+#else
+#define TARGET_ALIGNBYTES   (sizeof(abi_long) - 1)
+#endif
+
+#define TARGET_CMSG_NXTHDR(mhdr, cmsg, cmsg_start) \
+   __target_cmsg_nxthdr(mhdr, cmsg, cmsg_start)
+#define TARGET_CMSG_ALIGN(len) (((len) + TARGET_ALIGNBYTES) \
+   & (size_t) ~TARGET_ALIGNBYTES)
+#define TARGET_CMSG_DATA(cmsg) \
+((unsigned char *)(cmsg) + TARGET_CMSG_ALIGN(sizeof(struct 
target_cmsghdr)))
+#define TARGET_CMSG_SPACE(len) \
+(TARGET_CMSG_ALIGN(sizeof(struct target_cmsghdr)) + TARGET_CMSG_ALIGN(len))
+#define TARGET_CMSG_LEN(len) \
+(TARGET_CMSG_ALIGN(sizeof(struct target_cmsghdr)) + (len))
+
+static inline struct target_cmsghdr *
+__target_cmsg_nxthdr(struct target_msghdr *__mhdr,
+ struct target_cmsghdr *__cmsg,
+ struct target_cmsghdr *__cmsg_start)
+{
+struct target_cmsghdr *__ptr;
+
+__ptr = (struct target_cmsghdr *)((unsigned char *) __cmsg +
+TARGET_CMSG_ALIGN(tswap32(__cmsg->cmsg_len)));
+if ((unsigned long)((char *)(__ptr + 1) - (char *)__cmsg_start) >
+tswap32(__mhdr->msg_controllen)) {
+/* No more entries.  */
+return (struct target_cmsghdr *)0;
+}
+return __ptr;
+}
+
 /*
  * netinet/in.h
  */
-- 
2.40.0

[PATCH 00/11] Contribution task implementations, for the 'FreeBSD user emulation improvements' project.

2023-04-20 Thread Karim Taha

Upstream the implementations of bind(2), connect(2), accept(2) and
getpeername(2) system calls from the blitz branch of the bsd-user fork hosted at
https://github.com/qemu-bsd-user/qemu-bsd-user/tree/blitz.

Karim Taha (1):
  Signed-off-by: Karim Taha 

Sean Bruno (1):
  Signed-off-by: Karim Taha 

Stacey Son (7):
  Signed-off-by: Karim Taha 
  Signed-off-by: Karim Taha 
  Signed-off-by: Karim Taha 
  Signed-off-by: Karim Taha 
  Signed-off-by: Karim Taha 
  Signed-off-by: Karim Taha 
  Signed-off-by: Karim Taha 

Warner Losh (2):
  Signed-off-by: Karim Taha 
  Signed-off-by: Karim Taha 

 bsd-user/bsd-socket.c | 108 +
 bsd-user/bsd-socket.h | 143 
 bsd-user/freebsd/os-syscall.c |  21 +
 bsd-user/main.c   |  16 +++-
 bsd-user/meson.build  |   1 +
 bsd-user/qemu-bsd.h   |  36 +
 bsd-user/syscall_defs.h   | 148 ++
 7 files changed, 472 insertions(+), 1 deletion(-)
 create mode 100644 bsd-user/bsd-socket.c
 create mode 100644 bsd-user/bsd-socket.h
 create mode 100644 bsd-user/qemu-bsd.h

-- 
2.40.0

[PATCH 08/11] Signed-off-by: Karim Taha

2023-04-20 Thread Karim Taha

From: Stacey Son 

added the connect(2) syscall to bsd-user/bsd-socket.h.
---
 bsd-user/bsd-socket.h | 21 +
 1 file changed, 21 insertions(+)

diff --git a/bsd-user/bsd-socket.h b/bsd-user/bsd-socket.h
index 7da4cf11a0..f191f22d63 100644
--- a/bsd-user/bsd-socket.h
+++ b/bsd-user/bsd-socket.h
@@ -58,4 +58,25 @@ static inline abi_long do_bsd_bind(int sockfd, abi_ulong 
target_addr,
 return get_errno(bind(sockfd, addr, addrlen));
 }
 
+/* connect(2) */
+static inline abi_long do_bsd_connect(int sockfd, abi_ulong target_addr,
+  socklen_t addrlen)
+{
+abi_long ret;
+void *addr;
+
+if ((int)addrlen < 0) {
+return -TARGET_EINVAL;
+}
+addr = alloca(addrlen + 1);
+
+ret = target_to_host_sockaddr(addr, target_addr, addrlen);
+
+if (is_error(ret)) {
+return ret;
+}
+
+return get_errno(connect(sockfd, addr, addrlen));
+}
+
 #endif /* BSD_SOCKET_H */
-- 
2.40.0

Re: [PATCH] hw/riscv: virt: Enable booting M-mode or S-mode FW from pflash0

2023-04-20 Thread Bin Meng

On Fri, Apr 21, 2023 at 12:44 PM Sunil V L  wrote:
>
> On Fri, Apr 21, 2023 at 12:39:46PM +0800, Bin Meng wrote:
> > On Fri, Apr 21, 2023 at 12:34 PM Sunil V L  wrote:
> > >
> > > Currently, virt machine supports two pflash instances each with
> > > 32MB size. However, the first pflash is always assumed to
> > > contain M-mode firmware and reset vector is set to this if
> > > enabled. Hence, for S-mode payloads like EDK2, only one pflash
> > > instance is available for use. This means both code and NV variables
> > > of EDK2 will need to use the same pflash.
> > >
> > > The OS distros keep the EDK2 FW code as readonly. When non-volatile
> > > variables also need to share the same pflash, it is not possible
> > > to keep it as readonly since variables need write access.
> > >
> > > To resolve this issue, the code and NV variables need to be separated.
> > > But in that case we need an extra flash. Hence, modify the convention
> > > such that pflash0 will contain the M-mode FW only when "-bios none"
> > > option is used. Otherwise, pflash0 will contain the S-mode payload FW.
> > > This enables both pflash instances available for EDK2 use.
> > >
> > > Example usage:
> > > 1) pflash0 containing M-mode FW
> > > qemu-system-riscv64 -bios none -pflash  -machine virt
> > > or
> > > qemu-system-riscv64 -bios none \
> > > -drive file=,if=pflash,format=raw,unit=0 -machine virt
> > >
> > > 2) pflash0 containing S-mode payload like EDK2
> > > qemu-system-riscv64 -pflash  -pflash  -machine 
> > >  virt
> > > or
> > > qemu-system-riscv64 -bios  \
> > > -pflash  \
> > > -pflash  \
> > > -machine  virt
> > > or
> > > qemu-system-riscv64 -bios  \
> > > -drive file=,if=pflash,format=raw,unit=0,readonly=on \
> > > -drive file=,if=pflash,format=raw,unit=1 \
> > > -machine virt
> >
> > Please update the docs in docs/system/riscv/virt.rst to include how to
> > run EDK2 bios with these settings.
> >
> Thanks Bin. Shall I do it as a separate patch after this gets approved?
> The reason is, I need to make changes in EDK2 to work with this. Once
> EDK2 changes are also in place, will send a patch to update this
> documentation. Does it make sense?
>

Yeah, in the doc we should provide the EDK2 URL and commit that was
verified to work, so it makes sense we can add such info when EDK2 is
available.

Regards,
Bin

Re: [PATCH] hw/riscv: virt: Enable booting M-mode or S-mode FW from pflash0

2023-04-20 Thread Sunil V L

On Fri, Apr 21, 2023 at 12:39:46PM +0800, Bin Meng wrote:
> On Fri, Apr 21, 2023 at 12:34 PM Sunil V L  wrote:
> >
> > Currently, virt machine supports two pflash instances each with
> > 32MB size. However, the first pflash is always assumed to
> > contain M-mode firmware and reset vector is set to this if
> > enabled. Hence, for S-mode payloads like EDK2, only one pflash
> > instance is available for use. This means both code and NV variables
> > of EDK2 will need to use the same pflash.
> >
> > The OS distros keep the EDK2 FW code as readonly. When non-volatile
> > variables also need to share the same pflash, it is not possible
> > to keep it as readonly since variables need write access.
> >
> > To resolve this issue, the code and NV variables need to be separated.
> > But in that case we need an extra flash. Hence, modify the convention
> > such that pflash0 will contain the M-mode FW only when "-bios none"
> > option is used. Otherwise, pflash0 will contain the S-mode payload FW.
> > This enables both pflash instances available for EDK2 use.
> >
> > Example usage:
> > 1) pflash0 containing M-mode FW
> > qemu-system-riscv64 -bios none -pflash  -machine virt
> > or
> > qemu-system-riscv64 -bios none \
> > -drive file=,if=pflash,format=raw,unit=0 -machine virt
> >
> > 2) pflash0 containing S-mode payload like EDK2
> > qemu-system-riscv64 -pflash  -pflash  -machine  
> > virt
> > or
> > qemu-system-riscv64 -bios  \
> > -pflash  \
> > -pflash  \
> > -machine  virt
> > or
> > qemu-system-riscv64 -bios  \
> > -drive file=,if=pflash,format=raw,unit=0,readonly=on \
> > -drive file=,if=pflash,format=raw,unit=1 \
> > -machine virt
> 
> Please update the docs in docs/system/riscv/virt.rst to include how to
> run EDK2 bios with these settings.
> 
Thanks Bin. Shall I do it as a separate patch after this gets approved?
The reason is, I need to make changes in EDK2 to work with this. Once
EDK2 changes are also in place, will send a patch to update this
documentation. Does it make sense?

Thanks,
Sunil

Re: [PATCH] hw/riscv: virt: Enable booting M-mode or S-mode FW from pflash0

2023-04-20 Thread Bin Meng

On Fri, Apr 21, 2023 at 12:34 PM Sunil V L  wrote:
>
> Currently, virt machine supports two pflash instances each with
> 32MB size. However, the first pflash is always assumed to
> contain M-mode firmware and reset vector is set to this if
> enabled. Hence, for S-mode payloads like EDK2, only one pflash
> instance is available for use. This means both code and NV variables
> of EDK2 will need to use the same pflash.
>
> The OS distros keep the EDK2 FW code as readonly. When non-volatile
> variables also need to share the same pflash, it is not possible
> to keep it as readonly since variables need write access.
>
> To resolve this issue, the code and NV variables need to be separated.
> But in that case we need an extra flash. Hence, modify the convention
> such that pflash0 will contain the M-mode FW only when "-bios none"
> option is used. Otherwise, pflash0 will contain the S-mode payload FW.
> This enables both pflash instances available for EDK2 use.
>
> Example usage:
> 1) pflash0 containing M-mode FW
> qemu-system-riscv64 -bios none -pflash  -machine virt
> or
> qemu-system-riscv64 -bios none \
> -drive file=,if=pflash,format=raw,unit=0 -machine virt
>
> 2) pflash0 containing S-mode payload like EDK2
> qemu-system-riscv64 -pflash  -pflash  -machine  
> virt
> or
> qemu-system-riscv64 -bios  \
> -pflash  \
> -pflash  \
> -machine  virt
> or
> qemu-system-riscv64 -bios  \
> -drive file=,if=pflash,format=raw,unit=0,readonly=on \
> -drive file=,if=pflash,format=raw,unit=1 \
> -machine virt

Please update the docs in docs/system/riscv/virt.rst to include how to
run EDK2 bios with these settings.

>
> Signed-off-by: Sunil V L 
> Reported-by: Heinrich Schuchardt 
> ---
>  hw/riscv/virt.c | 51 ++---
>  1 file changed, 19 insertions(+), 32 deletions(-)
>
> diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
> index 4e3efbee16..1187a60d6e 100644
> --- a/hw/riscv/virt.c
> +++ b/hw/riscv/virt.c
> @@ -1245,7 +1245,7 @@ static void virt_machine_done(Notifier *notifier, void 
> *data)
>  target_ulong firmware_end_addr, kernel_start_addr;
>  const char *firmware_name = riscv_default_firmware_name(>soc[0]);
>  uint32_t fdt_load_addr;
> -uint64_t kernel_entry;
> +uint64_t kernel_entry = 0;
>
>  /*
>   * Only direct boot kernel is currently supported for KVM VM,
> @@ -1266,42 +1266,29 @@ static void virt_machine_done(Notifier *notifier, 
> void *data)
>  firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
>   start_addr, NULL);
>
> -if (drive_get(IF_PFLASH, 0, 1)) {
> -/*
> - * S-mode FW like EDK2 will be kept in second plash (unit 1).
> - * When both kernel, initrd and pflash options are provided in the
> - * command line, the kernel and initrd will be copied to the fw_cfg
> - * table and opensbi will jump to the flash address which is the
> - * entry point of S-mode FW. It is the job of the S-mode FW to load
> - * the kernel and initrd using fw_cfg table.
> - *
> - * If only pflash is given but not -kernel, then it is the job of
> - * of the S-mode firmware to locate and load the kernel.
> - * In either case, the next_addr for opensbi will be the flash 
> address.
> - */
> -riscv_setup_firmware_boot(machine);
> -kernel_entry = virt_memmap[VIRT_FLASH].base +
> -   virt_memmap[VIRT_FLASH].size / 2;
> -} else if (machine->kernel_filename) {
> +if (drive_get(IF_PFLASH, 0, 0)) {
> +if (machine->firmware && !strcmp(machine->firmware, "none")) {
> +/*
> + * Pflash was supplied but bios is none, let's overwrite the
> + * address we jump to after reset to the base of the flash.
> + */
> +start_addr = virt_memmap[VIRT_FLASH].base;
> +} else {
> +/*
> + * Pflash was supplied but bios is not none. In this case,
> + * base of the flash would contain S-mode payload.
> + */
> +riscv_setup_firmware_boot(machine);
> +kernel_entry = virt_memmap[VIRT_FLASH].base;
> +}
> +}
> +
> +if (machine->kernel_filename && !kernel_entry) {
>  kernel_start_addr = riscv_calc_kernel_start_addr(>soc[0],
>   firmware_end_addr);
>
>  kernel_entry = riscv_load_kernel(machine, >soc[0],
>   kernel_start_addr, true, NULL);
> -} else {
> -   /*
> -* If dynamic firmware is used, it doesn't know where is the next mode
> -* if kernel argument is not set.
> -*/
> -kernel_entry = 0;
> -}
> -
> -if (drive_get(IF_PFLASH, 0, 0)) {
> -/*
> - * Pflash was supplied, let's overwrite the address we jump to after
> - * reset to the base of the

[PATCH] hw/riscv: virt: Enable booting M-mode or S-mode FW from pflash0

2023-04-20 Thread Sunil V L

Currently, virt machine supports two pflash instances each with
32MB size. However, the first pflash is always assumed to
contain M-mode firmware and reset vector is set to this if
enabled. Hence, for S-mode payloads like EDK2, only one pflash
instance is available for use. This means both code and NV variables
of EDK2 will need to use the same pflash.

The OS distros keep the EDK2 FW code as readonly. When non-volatile
variables also need to share the same pflash, it is not possible
to keep it as readonly since variables need write access.

To resolve this issue, the code and NV variables need to be separated.
But in that case we need an extra flash. Hence, modify the convention
such that pflash0 will contain the M-mode FW only when "-bios none"
option is used. Otherwise, pflash0 will contain the S-mode payload FW.
This enables both pflash instances available for EDK2 use.

Example usage:
1) pflash0 containing M-mode FW
qemu-system-riscv64 -bios none -pflash  -machine virt
or
qemu-system-riscv64 -bios none \
-drive file=,if=pflash,format=raw,unit=0 -machine virt

2) pflash0 containing S-mode payload like EDK2
qemu-system-riscv64 -pflash  -pflash  -machine  virt
or
qemu-system-riscv64 -bios  \
-pflash  \
-pflash  \
-machine  virt
or
qemu-system-riscv64 -bios  \
-drive file=,if=pflash,format=raw,unit=0,readonly=on \
-drive file=,if=pflash,format=raw,unit=1 \
-machine virt

Signed-off-by: Sunil V L 
Reported-by: Heinrich Schuchardt 
---
 hw/riscv/virt.c | 51 ++---
 1 file changed, 19 insertions(+), 32 deletions(-)

diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 4e3efbee16..1187a60d6e 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -1245,7 +1245,7 @@ static void virt_machine_done(Notifier *notifier, void 
*data)
 target_ulong firmware_end_addr, kernel_start_addr;
 const char *firmware_name = riscv_default_firmware_name(>soc[0]);
 uint32_t fdt_load_addr;
-uint64_t kernel_entry;
+uint64_t kernel_entry = 0;
 
 /*
  * Only direct boot kernel is currently supported for KVM VM,
@@ -1266,42 +1266,29 @@ static void virt_machine_done(Notifier *notifier, void 
*data)
 firmware_end_addr = riscv_find_and_load_firmware(machine, firmware_name,
  start_addr, NULL);
 
-if (drive_get(IF_PFLASH, 0, 1)) {
-/*
- * S-mode FW like EDK2 will be kept in second plash (unit 1).
- * When both kernel, initrd and pflash options are provided in the
- * command line, the kernel and initrd will be copied to the fw_cfg
- * table and opensbi will jump to the flash address which is the
- * entry point of S-mode FW. It is the job of the S-mode FW to load
- * the kernel and initrd using fw_cfg table.
- *
- * If only pflash is given but not -kernel, then it is the job of
- * of the S-mode firmware to locate and load the kernel.
- * In either case, the next_addr for opensbi will be the flash address.
- */
-riscv_setup_firmware_boot(machine);
-kernel_entry = virt_memmap[VIRT_FLASH].base +
-   virt_memmap[VIRT_FLASH].size / 2;
-} else if (machine->kernel_filename) {
+if (drive_get(IF_PFLASH, 0, 0)) {
+if (machine->firmware && !strcmp(machine->firmware, "none")) {
+/*
+ * Pflash was supplied but bios is none, let's overwrite the
+ * address we jump to after reset to the base of the flash.
+ */
+start_addr = virt_memmap[VIRT_FLASH].base;
+} else {
+/*
+ * Pflash was supplied but bios is not none. In this case,
+ * base of the flash would contain S-mode payload.
+ */
+riscv_setup_firmware_boot(machine);
+kernel_entry = virt_memmap[VIRT_FLASH].base;
+}
+}
+
+if (machine->kernel_filename && !kernel_entry) {
 kernel_start_addr = riscv_calc_kernel_start_addr(>soc[0],
  firmware_end_addr);
 
 kernel_entry = riscv_load_kernel(machine, >soc[0],
  kernel_start_addr, true, NULL);
-} else {
-   /*
-* If dynamic firmware is used, it doesn't know where is the next mode
-* if kernel argument is not set.
-*/
-kernel_entry = 0;
-}
-
-if (drive_get(IF_PFLASH, 0, 0)) {
-/*
- * Pflash was supplied, let's overwrite the address we jump to after
- * reset to the base of the flash.
- */
-start_addr = virt_memmap[VIRT_FLASH].base;
 }
 
 fdt_load_addr = riscv_compute_fdt_addr(memmap[VIRT_DRAM].base,
-- 
2.34.1

Re: [PATCH v3 2/2] tests/avocado/tuxrun_baselines.py: improve code coverage for ppc64

2023-04-20 Thread Kautuk Consul

On 2023-04-21 09:23:52, Harsh Prateek Bora wrote:
> 
> 
> On 4/21/23 09:12, Kautuk Consul wrote:
> > Hi,
> > 
> > On 2023-04-20 19:20:40, Harsh Prateek Bora wrote:
> > > Since we are optimising code a lot, one suggestion below:
> > > 
> > > 
> > > On Thu, Apr 20, 2023 at 6:23 PM Kautuk Consul 
> > > wrote:
> > > 
> > > > Commit c0c8687ef0fd990db8db1655a8a6c5a5e35dd4bb disabled the
> > > > boot_linux.py test-case due to which the code coverage for ppc
> > > > decreased by around 2%. As per the discussion on
> > > > https://lore.kernel.org/qemu-devel/87sfdpqcy4@linaro.org/ it
> > > > was mentioned that the baseline test for ppc64 could be modified
> > > > to make up this 2% code coverage. This patch attempts to achieve
> > > > this 2% code coverage by adding various device command line
> > > > arguments (to ./qemu-system-ppc64) in the tuxrun_baselines.py
> > > > test-case.
> > > > 
> > > > The code coverage report with boot_linux.py, without it and finally
> > > > with these tuxrun_baselines.py changes is as follows:
> > > > 
> > > > With boot_linux.py
> > > > --
> > > >lines..: 13.8% (58006 of 420997 lines)
> > > >functions..: 20.7% (7675 of 36993 functions)
> > > >branches...: 9.2% (22146 of 240611 branches)
> > > > Without boot_linux.py (without this patch changes)
> > > > --
> > > >lines..: 11.9% (50174 of 420997 lines)
> > > >functions..: 18.8% (6947 of 36993 functions)
> > > >branches...: 7.4% (17580 of 239017 branches)
> > > > Without boot_linux.py (with this patch changes)
> > > > ---
> > > >lines..: 13.8% (58287 of 420997 lines)
> > > >functions..: 20.7% (7640 of 36993 functions)
> > > >branches...: 8.4% (20223 of 240611 branches)
> > > > 
> > > > Signed-off-by: Kautuk Consul 
> > > > Reported-by: Alex Bennée 
> > > > ---
> > > >   tests/avocado/tuxrun_baselines.py | 120 +-
> > > >   1 file changed, 116 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/tests/avocado/tuxrun_baselines.py
> > > > b/tests/avocado/tuxrun_baselines.py
> > > > index d343376faa..ae082ac028 100644
> > > > --- a/tests/avocado/tuxrun_baselines.py
> > > > +++ b/tests/avocado/tuxrun_baselines.py
> > > > @@ -11,6 +11,7 @@
> > > > 
> > > >   import os
> > > >   import time
> > > > +import tempfile
> > > > 
> > > > 
> > > >   """
> > > > -self.common_tuxrun(drive="scsi-hd")
> > > > +
> > > > +# add device args to command line.
> > > > +self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0
> > > > -:22',
> > > > + '-device', 'virtio-net,netdev=vnet')
> > > > +self.vm.add_args('-netdev', '{"type":"user","id":"hostnet0"}',
> > > > + '-device', 
> > > > '{"driver":"virtio-net-pci","netdev":'
> > > > +
> > > >   '"hostnet0","id":"net0","mac":"52:54:00:4c:e3:86",'
> > > > + '"bus":"pci.0","addr":"0x9"}')
> > > > +self.vm.add_args('-device',
> > > > '{"driver":"qemu-xhci","p2":15,"p3":15,'
> > > > + '"id":"usb","bus":"pci.0","addr":"0x2"}')
> > > > +self.vm.add_args('-device',
> > > > '{"driver":"virtio-scsi-pci","id":"scsi0"'
> > > > + ',"bus":"pci.0","addr":"0x3"}')
> > > > +self.vm.add_args('-device', 
> > > > '{"driver":"virtio-serial-pci","id":'
> > > > + 
> > > > '"virtio-serial0","bus":"pci.0","addr":"0x4"}')
> > > > +self.vm.add_args('-device', 
> > > > '{"driver":"scsi-cd","bus":"scsi0.0"'
> > > > + 
> > > > ',"channel":0,"scsi-id":0,"lun":0,"device_id":'
> > > > + '"drive-scsi0-0-0-0","id":"scsi0-0-0-0"}')
> > > > +self.vm.add_args('-device', '{"driver":"virtio-balloon-pci",'
> > > > + '"id":"balloon0","bus":"pci.0","addr":"0x6"}')
> > > > +self.vm.add_args('-audiodev', 
> > > > '{"id":"audio1","driver":"none"}')
> > > > +self.vm.add_args('-device', 
> > > > '{"driver":"usb-tablet","id":"input0"'
> > > > + ',"bus":"usb.0","port":"1"}')
> > > > +self.vm.add_args('-device', '{"driver":"usb-kbd","id":"input1"'
> > > > + ',"bus":"usb.0","port":"2"}')
> > > > +self.vm.add_args('-device', '{"driver":"VGA","id":"video0",'
> > > > + '"vgamem_mb":16,"bus":"pci.0","addr":"0x7"}')
> > > > +self.vm.add_args('-object',
> > > > '{"qom-type":"rng-random","id":"objrng0"'
> > > > + ',"filename":"/dev/urandom"}',
> > > > + '-device',
> > > > '{"driver":"virtio-rng-pci","rng":"objrng0"'
> > > > + ',"id":"rng0","bus":"pci.0","addr":"0x8"}')
> > > > +self.vm.add_args('-object',
> > > > '{"qom-type":"cryptodev-backend-builtin",'
> > > > +

[PATCH v4 1/2] avocado_qemu/init.py: factor out the qemu-img finding

2023-04-20 Thread Kautuk Consul

Factor out the code that finds the qemu-img binary in the
QemuSystemTest class and create a new get_qemu_img() function
with it. This function will get called also from the new code
in tuxrun_baselines.py avocado test-case.

Signed-off-by: Kautuk Consul 
---
 tests/avocado/avocado_qemu/__init__.py | 27 +++---
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/tests/avocado/avocado_qemu/__init__.py 
b/tests/avocado/avocado_qemu/__init__.py
index cb71f50db9..d8f390e27a 100644
--- a/tests/avocado/avocado_qemu/__init__.py
+++ b/tests/avocado/avocado_qemu/__init__.py
@@ -330,6 +330,19 @@ def _new_vm(self, name, *args):
 vm.add_args(*args)
 return vm
 
+def get_qemu_img(self):
+self.log.debug('Looking for and selecting a qemu-img binary')
+
+# If qemu-img has been built, use it, otherwise the system wide one
+# will be used.
+qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
+if not os.path.exists(qemu_img):
+qemu_img = find_command('qemu-img', False)
+if qemu_img is False:
+self.cancel('Could not find "qemu-img"')
+
+return qemu_img
+
 @property
 def vm(self):
 return self.get_vm(name='default')
@@ -594,17 +607,9 @@ def set_up_existing_ssh_keys(self):
 return (ssh_public_key, ssh_private_key)
 
 def download_boot(self):
-self.log.debug('Looking for and selecting a qemu-img binary to be '
-   'used to create the bootable snapshot image')
-# If qemu-img has been built, use it, otherwise the system wide one
-# will be used.  If none is available, the test will cancel.
-qemu_img = os.path.join(BUILD_DIR, 'qemu-img')
-if not os.path.exists(qemu_img):
-qemu_img = find_command('qemu-img', False)
-if qemu_img is False:
-self.cancel('Could not find "qemu-img", which is required to '
-'create the bootable image')
-vmimage.QEMU_IMG = qemu_img
+# Set the qemu-img binary.
+# If none is available, the test will cancel.
+vmimage.QEMU_IMG = super().get_qemu_img()
 
 self.log.info('Downloading/preparing boot image')
 # Fedora 31 only provides ppc64le images
-- 
2.25.1

[PATCH v4 2/2] tests/avocado/tuxrun_baselines.py: improve code coverage for ppc64

2023-04-20 Thread Kautuk Consul

Commit c0c8687ef0fd990db8db1655a8a6c5a5e35dd4bb disabled the
boot_linux.py test-case due to which the code coverage for ppc
decreased by around 2%. As per the discussion on
https://lore.kernel.org/qemu-devel/87sfdpqcy4@linaro.org/ it
was mentioned that the baseline test for ppc64 could be modified
to make up this 2% code coverage. This patch attempts to achieve
this 2% code coverage by adding various device command line
arguments (to ./qemu-system-ppc64) in the tuxrun_baselines.py
test-case.

The code coverage report with boot_linux.py, without it and finally
with these tuxrun_baselines.py changes is as follows:

With boot_linux.py
--
  lines..: 13.8% (58006 of 420997 lines)
  functions..: 20.7% (7675 of 36993 functions)
  branches...: 9.2% (22146 of 240611 branches)
Without boot_linux.py (without this patch changes)
--
  lines..: 11.9% (50174 of 420997 lines)
  functions..: 18.8% (6947 of 36993 functions)
  branches...: 7.4% (17580 of 239017 branches)
Without boot_linux.py (with this patch changes)
---
  lines..: 13.8% (58287 of 420997 lines)
  functions..: 20.7% (7640 of 36993 functions)
  branches...: 8.4% (20223 of 240611 branches)

Signed-off-by: Kautuk Consul 
Reported-by: Alex Bennée 
---
 tests/avocado/tuxrun_baselines.py | 68 +--
 1 file changed, 64 insertions(+), 4 deletions(-)

diff --git a/tests/avocado/tuxrun_baselines.py 
b/tests/avocado/tuxrun_baselines.py
index d343376faa..546abd9396 100644
--- a/tests/avocado/tuxrun_baselines.py
+++ b/tests/avocado/tuxrun_baselines.py
@@ -11,6 +11,7 @@
 
 import os
 import time
+import tempfile
 
 from avocado import skip, skipIf
 from avocado_qemu import QemuSystemTest
@@ -72,6 +73,8 @@ def setUp(self):
 # Occasionally we need extra devices to hook things up
 self.extradev = self.get_tag('extradev')
 
+self.qemu_img = super().get_qemu_img()
+
 def wait_for_console_pattern(self, success_message, vm=None):
 wait_for_console_pattern(self, success_message,
  failure_message='Kernel panic - not syncing',
@@ -164,6 +167,63 @@ def common_tuxrun(self, dt=None,
 self.vm.launch()
 self.run_tuxtest_tests(haltmsg)
 
+def ppc64_common_tuxrun(self, prefix):
+# add device args to command line.
+self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0-:22',
+ '-device', 'virtio-net,netdev=vnet')
+self.vm.add_args('-netdev', '{"type":"user","id":"hostnet0"}',
+ '-device', '{"driver":"virtio-net-pci","netdev":'
+ '"hostnet0","id":"net0","mac":"52:54:00:4c:e3:86",'
+ '"bus":"pci.0","addr":"0x9"}')
+self.vm.add_args('-device', '{"driver":"qemu-xhci","p2":15,"p3":15,'
+ '"id":"usb","bus":"pci.0","addr":"0x2"}')
+self.vm.add_args('-device', '{"driver":"virtio-scsi-pci","id":"scsi0"'
+ ',"bus":"pci.0","addr":"0x3"}')
+self.vm.add_args('-device', '{"driver":"virtio-serial-pci","id":'
+ '"virtio-serial0","bus":"pci.0","addr":"0x4"}')
+self.vm.add_args('-device', '{"driver":"scsi-cd","bus":"scsi0.0"'
+ ',"channel":0,"scsi-id":0,"lun":0,"device_id":'
+ '"drive-scsi0-0-0-0","id":"scsi0-0-0-0"}')
+self.vm.add_args('-device', '{"driver":"virtio-balloon-pci",'
+ '"id":"balloon0","bus":"pci.0","addr":"0x6"}')
+self.vm.add_args('-audiodev', '{"id":"audio1","driver":"none"}')
+self.vm.add_args('-device', '{"driver":"usb-tablet","id":"input0"'
+ ',"bus":"usb.0","port":"1"}')
+self.vm.add_args('-device', '{"driver":"usb-kbd","id":"input1"'
+ ',"bus":"usb.0","port":"2"}')
+self.vm.add_args('-device', '{"driver":"VGA","id":"video0",'
+ '"vgamem_mb":16,"bus":"pci.0","addr":"0x7"}')
+self.vm.add_args('-object', '{"qom-type":"rng-random","id":"objrng0"'
+ ',"filename":"/dev/urandom"}',
+ '-device', 
'{"driver":"virtio-rng-pci","rng":"objrng0"'
+ ',"id":"rng0","bus":"pci.0","addr":"0x8"}')
+self.vm.add_args('-object', '{"qom-type":"cryptodev-backend-builtin",'
+ '"id":"objcrypto0","queues":1}',
+ '-device', '{"driver":"virtio-crypto-pci",'
+ '"cryptodev":"objcrypto0","id":"crypto0","bus"'
+ ':"pci.0","addr":"0xa"}')
+self.vm.add_args('-device', '{"driver":"spapr-pci-host-bridge"'
+ ',"index":1,"id":"pci.1"}')
+self.vm.add_args('-device', '{"driver":"spapr-vscsi","id":"scsi1"'
+ ',"reg":12288}')
+

[PATCH v4 0/2] Improve code coverage for ppc64

2023-04-20 Thread Kautuk Consul

Commit c0c8687ef0fd990db8db1655a8a6c5a5e35dd4bb disabled the
boot_linux.py test-case due to which the code coverage for ppc
decreased by around 2%. As per the discussion on
https://lore.kernel.org/qemu-devel/87sfdpqcy4@linaro.org/ it
was mentioned that the baseline test for ppc64 could be modified
to make up this 2% code coverage. This patchset attempts to achieve
this 2% code coverage by adding various device command line
arguments (to ./qemu-system-ppc64) in the tuxrun_baselines.py
test-case.

Changes since v3:
- Create a common ppc64_common_tuxrun routine in tuxrun_baselines.py
  and call that from the ppc64 and ppc64le test case routines.

Kautuk Consul (2):
  avocado_qemu/__init__.py: factor out the qemu-img finding
  tests/avocado/tuxrun_baselines.py: improve code coverage for ppc64

 tests/avocado/avocado_qemu/__init__.py | 27 +-
 tests/avocado/tuxrun_baselines.py  | 68 --
 2 files changed, 80 insertions(+), 15 deletions(-)

-- 
2.25.1

Re: [PATCH v4 1/6] tests/requirements.txt: bump up avocado-framework version to 101.0

2023-04-20 Thread Cleber Rosa




On 4/18/23 03:10, Thomas Huth wrote:

On 17/04/2023 15.43, Alex Bennée wrote:

From: Kautuk Consul 

Avocado version 101.0 has a fix to re-compute the checksum
of an asset file if the algorithm used in the *-CHECKSUM
file isn't the same as the one being passed to it by the
avocado user (i.e. the avocado_qemu python module).
In the earlier avocado versions this fix wasn't there due
to which if the checksum wouldn't match the earlier
checksum (calculated by a different algorithm), the avocado
code would start downloading a fresh image from the internet
URL thus making the test-cases take longer to execute.

Bump up the avocado-framework version to 101.0.

Signed-off-by: Kautuk Consul 
Tested-by: Hariharan T S 
Message-Id: <20230327115030.3418323-2-kcon...@linux.vnet.ibm.com>
Message-Id: <20230330101141.30199-10-alex.ben...@linaro.org>

---
v2
   - limit --max-parallel-tasks $(JOBS_OPTION:-j%=%)
---
  tests/Makefile.include | 18 +++---
  tests/requirements.txt |  2 +-
  2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 9422ddaece..a4de0ad5a2 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -138,14 +138,18 @@ get-vm-image-fedora-31-%: check-venv
  # download all vm images, according to defined targets
  get-vm-images: check-venv $(patsubst %,get-vm-image-fedora-31-%, 
$(FEDORA_31_DOWNLOAD))
  +JOBS_OPTION=$(lastword -j1 $(filter-out -j, $(filter 
-j%,$(MAKEFLAGS

+
  check-avocado: check-venv $(TESTS_RESULTS_DIR) get-vm-images
-    $(call quiet-command, \
-    $(TESTS_PYTHON) -m avocado \
-    --show=$(AVOCADO_SHOW) run 
--job-results-dir=$(TESTS_RESULTS_DIR) \

-    $(if $(AVOCADO_TAGS),, --filter-by-tags-include-empty \
-    --filter-by-tags-include-empty-key) \
-    $(AVOCADO_CMDLINE_TAGS) \
-    $(if $(GITLAB_CI),,--failfast) $(AVOCADO_TESTS), \
+    $(call quiet-command, \
+    $(TESTS_PYTHON) -m avocado \
+    --show=$(AVOCADO_SHOW) run 
--job-results-dir=$(TESTS_RESULTS_DIR) \

+    $(if $(AVOCADO_TAGS),, \
+    --filter-by-tags-include-empty \
+    --filter-by-tags-include-empty-key) \
+    --max-parallel-tasks $(JOBS_OPTION:-j%=%) \
+    $(AVOCADO_CMDLINE_TAGS) \
+    $(if $(GITLAB_CI),,--failfast) 
$(AVOCADO_TESTS), \


We might need to revisit this --failfast logic, too. If I've got that 
right, failfast is now the default with the new system? So we might 
want to disable it by default again if GITLAB_CI is not set?


 Thomas


Hi Thomas,

I must be missing something, because under Avocado 101.0, I'm getting 
the following behavior without the --failfast flag:


  $ avocado run --max-parallel-tasks=1 -- /bin/true /bin/false /bin/true
  JOB ID : 646f476f01b8d5599a57530606de543f2d9a5366
  JOB LOG    : 
/root/avocado/job-results/job-2023-04-21T04.15-646f476/job.log

   (1/3) /bin/true: STARTED
   (1/3) /bin/true: PASS (0.01 s)
   (2/3) /bin/false: STARTED
   (2/3) /bin/false: FAIL (0.01 s)
   (3/3) /bin/true: STARTED
   (3/3) /bin/true: PASS (0.01 s)
  RESULTS    : PASS 2 | ERROR 0 | FAIL 1 | SKIP 0 | WARN 0 | INTERRUPT 
0 | CANCEL 0

  JOB TIME   : 1.01 s

  Test summary:
  /bin/false: FAIL

And this with --failfast:

  $ avocado run --failfast --max-parallel-tasks=1 -- /bin/true 
/bin/false /bin/true

  JOB ID : ae4894607a42194a7382efa545eccaccf7495fa3
  JOB LOG    : 
/root/avocado/job-results/job-2023-04-21T04.17-ae48946/job.log

   (1/3) /bin/true: STARTED
   (1/3) /bin/true: PASS (0.01 s)
   (2/3) /bin/false: STARTED
   (2/3) /bin/false: FAIL (0.01 s)
  Interrupting job (failfast).
  RESULTS    : PASS 1 | ERROR 0 | FAIL 1 | SKIP 1 | WARN 0 | INTERRUPT 
0 | CANCEL 0

  JOB TIME   : 0.75 s

  Test summary:
  /bin/false: FAIL

Maybe it's something in the Makefile I'm missing, or something specific 
to these tests, but that would be a long shot.


Thanks,

- Cleber.

Re: [PATCH v4 1/6] tests/requirements.txt: bump up avocado-framework version to 101.0

2023-04-20 Thread Cleber Rosa




On 4/20/23 12:54, John Snow wrote:



On Thu, Apr 20, 2023, 5:19 AM Philippe Mathieu-Daudé 
 wrote:


On 17/4/23 19:44, Alex Bennée wrote:
>
> John Snow  writes:
>
>> On Mon, Apr 17, 2023 at 9:43 AM Alex Bennée
 wrote:
>>>
>>> From: Kautuk Consul 
>>>
>>> Avocado version 101.0 has a fix to re-compute the checksum
>>> of an asset file if the algorithm used in the *-CHECKSUM
>>> file isn't the same as the one being passed to it by the
>>> avocado user (i.e. the avocado_qemu python module).
>>> In the earlier avocado versions this fix wasn't there due
>>> to which if the checksum wouldn't match the earlier
>>> checksum (calculated by a different algorithm), the avocado
>>> code would start downloading a fresh image from the internet
>>> URL thus making the test-cases take longer to execute.
>>>
>>> Bump up the avocado-framework version to 101.0.
>>>
>>> Signed-off-by: Kautuk Consul 
>>> Tested-by: Hariharan T S 
>>> Message-Id: <20230327115030.3418323-2-kcon...@linux.vnet.ibm.com>
>>> Message-Id: <20230330101141.30199-10-alex.ben...@linaro.org>
>>>
>>> ---
>>> v2
>>>    - limit --max-parallel-tasks $(JOBS_OPTION:-j%=%)
>>> ---
>>>   tests/Makefile.include | 18 +++---
>>>   tests/requirements.txt |  2 +-
>>>   2 files changed, 12 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/tests/Makefile.include b/tests/Makefile.include
>>> index 9422ddaece..a4de0ad5a2 100644
>>> --- a/tests/Makefile.include
>>> +++ b/tests/Makefile.include
>>> @@ -138,14 +138,18 @@ get-vm-image-fedora-31-%: check-venv
>>>   # download all vm images, according to defined targets
>>>   get-vm-images: check-venv $(patsubst
%,get-vm-image-fedora-31-%, $(FEDORA_31_DOWNLOAD))
>>>
>>> +JOBS_OPTION=$(lastword -j1 $(filter-out -j, $(filter
-j%,$(MAKEFLAGS
>>> +
>>>   check-avocado: check-venv $(TESTS_RESULTS_DIR) get-vm-images
>>> -       $(call quiet-command, \
>>> -            $(TESTS_PYTHON) -m avocado \
>>> -            --show=$(AVOCADO_SHOW) run
--job-results-dir=$(TESTS_RESULTS_DIR) \
>>> -            $(if $(AVOCADO_TAGS),,
--filter-by-tags-include-empty \
>>> -  --filter-by-tags-include-empty-key) \
>>> -            $(AVOCADO_CMDLINE_TAGS) \
>>> -            $(if $(GITLAB_CI),,--failfast) $(AVOCADO_TESTS), \
>>> +       $(call quiet-command,                                    \
>>> +            $(TESTS_PYTHON) -m avocado                      
                     \
>>> +            --show=$(AVOCADO_SHOW) run
--job-results-dir=$(TESTS_RESULTS_DIR)  \
>>> +            $(if $(AVOCADO_TAGS),,                          
         \
>>> +  --filter-by-tags-include-empty                          \
>>> +  --filter-by-tags-include-empty-key)                     \
>>> +               --max-parallel-tasks $(JOBS_OPTION:-j%=%)     
                 \
>>> +            $(AVOCADO_CMDLINE_TAGS)                          
        \
>>> +            $(if $(GITLAB_CI),,--failfast) $(AVOCADO_TESTS), 
                 \
>>>               "AVOCADO", "tests/avocado")
>>>
>>>   check-acceptance-deprecated-warning:
>>> diff --git a/tests/requirements.txt b/tests/requirements.txt
>>> index 0ba561b6bd..a6f73da681 100644
>>> --- a/tests/requirements.txt
>>> +++ b/tests/requirements.txt
>>> @@ -2,5 +2,5 @@
>>>   # in the tests/venv Python virtual environment. For more info,
>>>   # refer to: https://pip.pypa.io/en/stable/user_guide/#id1
>>>   # Note that qemu.git/python/ is always implicitly installed.
>>> -avocado-framework==88.1
>>> +avocado-framework==101.0
>>>   pycdlib==1.11.0
>>> --
>>> 2.39.2
>>>
>>
>> I thought there were test failures that prohibited us from
bumping the
>> Avocado-Framework version. Did those get rectified recently?

No, still not working on Darwin. At this point I'm thinking at sending
a patch clarifying testing QEMU with Avocado is not supported on
Darwin.


What's broken on Darwin?


Hi John,

IIRC, the main issue is that the legacy runner (the one active in 88.1) 
pickles some stuff that do not play nice under Darwin.  It has never (to 
the best of my knowledge) worked under Darwin.



Is it broken with avocado-framework==88.1 too? (Is this a regression?)

It was already broken with avocado-framework==88.1, but it should mostly 
work with 101.0 (see previous reply).



We might need to consider entirely separate issues:

(1) We need to upgrade avocado-framework to a supported LTS version >= 
v90 for dependency harmony with qemu.git/python testing


(2) We need to upgrade avocado-framework to >= v101.0 to fix the 
stated checksum issue in this patch


(3) We would like avocado tests to work on Darwin. (Have they ever 
worked? When did they break?

Re: [PATCH v3 0/2] Improve avocado code coverage for ppc64

2023-04-20 Thread Kautuk Consul

On 2023-04-20 16:01:45, Alex Bennée wrote:
> 
> Kautuk Consul  writes:
> 
> > Commit c0c8687ef0fd990db8db1655a8a6c5a5e35dd4bb disabled the
> > boot_linux.py test-case due to which the code coverage for
> > powerpc decreased by 2%. This patchset aims to make up this
> > 2% code coverage by modifying the tuxrun_baselines.py test-case.
> 
> BTW did you see we have a basic tool to compare runs now in
> scripts/coverage/compare_gcov_json.py if you want to see exactly what
> the differences between a boot_linux and tuxrun_baseline test is.
I focused on just getting the lines' and functions' coverage equal to the
boot_linux.py test-case. I just added devices to meet the coverage
percentage and didn't really focus on imitating the code coverage that
boot_linux.py does.
> 
> Currently its only file granularity but it could be fixed up to do a
> more detailed line by line breakdown.
I am currently getting the following error while trying to execute this script:
root@melvil:/home/kautuk/qemu/build_without# python3 
../scripts/coverage/compare_gcov_json.py 
Traceback (most recent call last):
  File "../scripts/coverage/compare_gcov_json.py", line 39, in 
def load_json(json_file_path: Path, verbose = False) -> dict[str, set[int]]:
TypeError: 'type' object is not subscriptable
root@melvil:/home/kautuk/qemu/build_without#
> 
> >
> > Changes since v1 and v2:
> > - Modified the way and code that creates the temporary qcow2
> >   image that the tuxrun_baselines.py is now using.
> > - Factored out code in avocado_qemu/__init__.py to create
> >   the get_qemu_img() function that will now get called from
> >   tuxrun_baselines.py.
> >
> > Kautuk Consul (2):
> >   avocado_qemu/__init__.py: factor out the qemu-img finding
> >   tests/avocado/tuxrun_baselines.py: improve code coverage for ppc64
> >
> >  tests/avocado/avocado_qemu/__init__.py |  27 +++---
> >  tests/avocado/tuxrun_baselines.py  | 120 -
> >  2 files changed, 132 insertions(+), 15 deletions(-)
> 
> 
> -- 
> Alex Bennée
> Virtualisation Tech Lead @ Linaro

Re: [PATCH v4 1/6] tests/requirements.txt: bump up avocado-framework version to 101.0

2023-04-20 Thread Cleber Rosa




On 4/20/23 05:18, Philippe Mathieu-Daudé wrote:

On 17/4/23 19:44, Alex Bennée wrote:


John Snow  writes:

On Mon, Apr 17, 2023 at 9:43 AM Alex Bennée  
wrote:


From: Kautuk Consul 

Avocado version 101.0 has a fix to re-compute the checksum
of an asset file if the algorithm used in the *-CHECKSUM
file isn't the same as the one being passed to it by the
avocado user (i.e. the avocado_qemu python module).
In the earlier avocado versions this fix wasn't there due
to which if the checksum wouldn't match the earlier
checksum (calculated by a different algorithm), the avocado
code would start downloading a fresh image from the internet
URL thus making the test-cases take longer to execute.

Bump up the avocado-framework version to 101.0.

Signed-off-by: Kautuk Consul 
Tested-by: Hariharan T S 
Message-Id: <20230327115030.3418323-2-kcon...@linux.vnet.ibm.com>
Message-Id: <20230330101141.30199-10-alex.ben...@linaro.org>

---
v2
   - limit --max-parallel-tasks $(JOBS_OPTION:-j%=%)
---
  tests/Makefile.include | 18 +++---
  tests/requirements.txt |  2 +-
  2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 9422ddaece..a4de0ad5a2 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -138,14 +138,18 @@ get-vm-image-fedora-31-%: check-venv
  # download all vm images, according to defined targets
  get-vm-images: check-venv $(patsubst %,get-vm-image-fedora-31-%, 
$(FEDORA_31_DOWNLOAD))


+JOBS_OPTION=$(lastword -j1 $(filter-out -j, $(filter 
-j%,$(MAKEFLAGS

+
  check-avocado: check-venv $(TESTS_RESULTS_DIR) get-vm-images
-   $(call quiet-command, \
-    $(TESTS_PYTHON) -m avocado \
-    --show=$(AVOCADO_SHOW) run 
--job-results-dir=$(TESTS_RESULTS_DIR) \

-    $(if $(AVOCADO_TAGS),, --filter-by-tags-include-empty \
-   --filter-by-tags-include-empty-key) \
-    $(AVOCADO_CMDLINE_TAGS) \
-    $(if $(GITLAB_CI),,--failfast) $(AVOCADO_TESTS), \
+   $(call quiet-command, \
+    $(TESTS_PYTHON) -m 
avocado \
+    --show=$(AVOCADO_SHOW) run 
--job-results-dir=$(TESTS_RESULTS_DIR)  \

+    $(if $(AVOCADO_TAGS),, \
+ --filter-by-tags-include-empty  \
+ --filter-by-tags-include-empty-key) \
+   --max-parallel-tasks 
$(JOBS_OPTION:-j%=%)   \

+ $(AVOCADO_CMDLINE_TAGS) \
+    $(if $(GITLAB_CI),,--failfast) 
$(AVOCADO_TESTS),   \

  "AVOCADO", "tests/avocado")

  check-acceptance-deprecated-warning:
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 0ba561b6bd..a6f73da681 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -2,5 +2,5 @@
  # in the tests/venv Python virtual environment. For more info,
  # refer to: https://pip.pypa.io/en/stable/user_guide/#id1
  # Note that qemu.git/python/ is always implicitly installed.
-avocado-framework==88.1
+avocado-framework==101.0
  pycdlib==1.11.0
--
2.39.2



I thought there were test failures that prohibited us from bumping the
Avocado-Framework version. Did those get rectified recently?


No, still not working on Darwin. At this point I'm thinking at sending
a patch clarifying testing QEMU with Avocado is not supported on Darwin.


Hi Phil,

Do you mean with or without this version bump?

Even though it was somewhat recently[1] that another level of Darwin 
compatibility was added to Avocado, where a good part of Avocado's 
selftests[2] were enabled in OS X, Avocado 101.0 should not be that far 
behind.


I honestly believe people testing and reporting successful execution of 
the Avocado tests under Darwin with latest Avocado. Hopefully my memory 
is not playing tricks on me, but I'll attempt to verify that.


Cheers,

- Cleber.

[1] - https://github.com/avocado-framework/avocado/pull/5622

[2] - 
https://github.com/avocado-framework/avocado/actions/runs/4417285317/jobs/7742772932?pr=5622#step:10:11




I'm not seeing any with your patch applied.


Are you using running the Cirrus-CI jobs?

Re: [PATCH v4 1/6] tests/requirements.txt: bump up avocado-framework version to 101.0

2023-04-20 Thread Cleber Rosa




On 4/17/23 12:50, John Snow wrote:

On Mon, Apr 17, 2023 at 9:43 AM Alex Bennée  wrote:

From: Kautuk Consul 

Avocado version 101.0 has a fix to re-compute the checksum
of an asset file if the algorithm used in the *-CHECKSUM
file isn't the same as the one being passed to it by the
avocado user (i.e. the avocado_qemu python module).
In the earlier avocado versions this fix wasn't there due
to which if the checksum wouldn't match the earlier
checksum (calculated by a different algorithm), the avocado
code would start downloading a fresh image from the internet
URL thus making the test-cases take longer to execute.

Bump up the avocado-framework version to 101.0.

Signed-off-by: Kautuk Consul 
Tested-by: Hariharan T S 
Message-Id: <20230327115030.3418323-2-kcon...@linux.vnet.ibm.com>
Message-Id: <20230330101141.30199-10-alex.ben...@linaro.org>

---
v2
   - limit --max-parallel-tasks $(JOBS_OPTION:-j%=%)
---
  tests/Makefile.include | 18 +++---
  tests/requirements.txt |  2 +-
  2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/tests/Makefile.include b/tests/Makefile.include
index 9422ddaece..a4de0ad5a2 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -138,14 +138,18 @@ get-vm-image-fedora-31-%: check-venv
  # download all vm images, according to defined targets
  get-vm-images: check-venv $(patsubst %,get-vm-image-fedora-31-%, 
$(FEDORA_31_DOWNLOAD))

+JOBS_OPTION=$(lastword -j1 $(filter-out -j, $(filter -j%,$(MAKEFLAGS
+
  check-avocado: check-venv $(TESTS_RESULTS_DIR) get-vm-images
-   $(call quiet-command, \
-$(TESTS_PYTHON) -m avocado \
---show=$(AVOCADO_SHOW) run --job-results-dir=$(TESTS_RESULTS_DIR) \
-$(if $(AVOCADO_TAGS),, --filter-by-tags-include-empty \
-   --filter-by-tags-include-empty-key) \
-$(AVOCADO_CMDLINE_TAGS) \
-$(if $(GITLAB_CI),,--failfast) $(AVOCADO_TESTS), \
+   $(call quiet-command,   
\
+$(TESTS_PYTHON) -m avocado 
\
+--show=$(AVOCADO_SHOW) run --job-results-dir=$(TESTS_RESULTS_DIR)  
\
+$(if $(AVOCADO_TAGS),, 
\
+   --filter-by-tags-include-empty  
\
+   --filter-by-tags-include-empty-key) 
\
+   --max-parallel-tasks $(JOBS_OPTION:-j%=%)   
\
+$(AVOCADO_CMDLINE_TAGS)
\
+$(if $(GITLAB_CI),,--failfast) $(AVOCADO_TESTS),   
\
  "AVOCADO", "tests/avocado")

  check-acceptance-deprecated-warning:
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 0ba561b6bd..a6f73da681 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -2,5 +2,5 @@
  # in the tests/venv Python virtual environment. For more info,
  # refer to: https://pip.pypa.io/en/stable/user_guide/#id1
  # Note that qemu.git/python/ is always implicitly installed.
-avocado-framework==88.1
+avocado-framework==101.0
  pycdlib==1.11.0
--
2.39.2


I thought there were test failures that prohibited us from bumping the
Avocado-Framework version. Did those get rectified recently?


Hi John,

While testing before and after behavior of pretty much every test in 
tests/avocado/,  I've found that some tests may get affected by the 
extra isolation (and overhead) of the new runner in recent Avocado and 
may get interrupted more often than on the previous runner.  But this is 
highly dependent on the amount of resources the machine you run the 
tests on has, and how tight the current timeout is.


Running all the tests under old and new Avocado also revealed some tests 
that are equally broken under both versions.  Thomas has bisected some, 
and Cedric has come up with some patches too.


To risk not being too verbose by default, let me know if you want the 
full details.  Also, see my previous reply about the two issues I'm 
aware that would make this bump as seamless as possible.


Thanks,

- Cleber.

Re: [PATCH v3 2/2] tests/avocado/tuxrun_baselines.py: improve code coverage for ppc64

2023-04-20 Thread Harsh Prateek Bora





On 4/21/23 09:12, Kautuk Consul wrote:

Hi,

On 2023-04-20 19:20:40, Harsh Prateek Bora wrote:

Since we are optimising code a lot, one suggestion below:


On Thu, Apr 20, 2023 at 6:23 PM Kautuk Consul 
wrote:


Commit c0c8687ef0fd990db8db1655a8a6c5a5e35dd4bb disabled the
boot_linux.py test-case due to which the code coverage for ppc
decreased by around 2%. As per the discussion on
https://lore.kernel.org/qemu-devel/87sfdpqcy4@linaro.org/ it
was mentioned that the baseline test for ppc64 could be modified
to make up this 2% code coverage. This patch attempts to achieve
this 2% code coverage by adding various device command line
arguments (to ./qemu-system-ppc64) in the tuxrun_baselines.py
test-case.

The code coverage report with boot_linux.py, without it and finally
with these tuxrun_baselines.py changes is as follows:

With boot_linux.py
--
   lines..: 13.8% (58006 of 420997 lines)
   functions..: 20.7% (7675 of 36993 functions)
   branches...: 9.2% (22146 of 240611 branches)
Without boot_linux.py (without this patch changes)
--
   lines..: 11.9% (50174 of 420997 lines)
   functions..: 18.8% (6947 of 36993 functions)
   branches...: 7.4% (17580 of 239017 branches)
Without boot_linux.py (with this patch changes)
---
   lines..: 13.8% (58287 of 420997 lines)
   functions..: 20.7% (7640 of 36993 functions)
   branches...: 8.4% (20223 of 240611 branches)

Signed-off-by: Kautuk Consul 
Reported-by: Alex Bennée 
---
  tests/avocado/tuxrun_baselines.py | 120 +-
  1 file changed, 116 insertions(+), 4 deletions(-)

diff --git a/tests/avocado/tuxrun_baselines.py
b/tests/avocado/tuxrun_baselines.py
index d343376faa..ae082ac028 100644
--- a/tests/avocado/tuxrun_baselines.py
+++ b/tests/avocado/tuxrun_baselines.py
@@ -11,6 +11,7 @@

  import os
  import time
+import tempfile


  """
-self.common_tuxrun(drive="scsi-hd")
+
+# add device args to command line.
+self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0
-:22',
+ '-device', 'virtio-net,netdev=vnet')
+self.vm.add_args('-netdev', '{"type":"user","id":"hostnet0"}',
+ '-device', '{"driver":"virtio-net-pci","netdev":'
+
  '"hostnet0","id":"net0","mac":"52:54:00:4c:e3:86",'
+ '"bus":"pci.0","addr":"0x9"}')
+self.vm.add_args('-device',
'{"driver":"qemu-xhci","p2":15,"p3":15,'
+ '"id":"usb","bus":"pci.0","addr":"0x2"}')
+self.vm.add_args('-device',
'{"driver":"virtio-scsi-pci","id":"scsi0"'
+ ',"bus":"pci.0","addr":"0x3"}')
+self.vm.add_args('-device', '{"driver":"virtio-serial-pci","id":'
+ '"virtio-serial0","bus":"pci.0","addr":"0x4"}')
+self.vm.add_args('-device', '{"driver":"scsi-cd","bus":"scsi0.0"'
+ ',"channel":0,"scsi-id":0,"lun":0,"device_id":'
+ '"drive-scsi0-0-0-0","id":"scsi0-0-0-0"}')
+self.vm.add_args('-device', '{"driver":"virtio-balloon-pci",'
+ '"id":"balloon0","bus":"pci.0","addr":"0x6"}')
+self.vm.add_args('-audiodev', '{"id":"audio1","driver":"none"}')
+self.vm.add_args('-device', '{"driver":"usb-tablet","id":"input0"'
+ ',"bus":"usb.0","port":"1"}')
+self.vm.add_args('-device', '{"driver":"usb-kbd","id":"input1"'
+ ',"bus":"usb.0","port":"2"}')
+self.vm.add_args('-device', '{"driver":"VGA","id":"video0",'
+ '"vgamem_mb":16,"bus":"pci.0","addr":"0x7"}')
+self.vm.add_args('-object',
'{"qom-type":"rng-random","id":"objrng0"'
+ ',"filename":"/dev/urandom"}',
+ '-device',
'{"driver":"virtio-rng-pci","rng":"objrng0"'
+ ',"id":"rng0","bus":"pci.0","addr":"0x8"}')
+self.vm.add_args('-object',
'{"qom-type":"cryptodev-backend-builtin",'
+ '"id":"objcrypto0","queues":1}',
+ '-device', '{"driver":"virtio-crypto-pci",'
+ '"cryptodev":"objcrypto0","id":"crypto0","bus"'
+ ':"pci.0","addr":"0xa"}')
+self.vm.add_args('-device', '{"driver":"spapr-pci-host-bridge"'
+ ',"index":1,"id":"pci.1"}')
+self.vm.add_args('-device', '{"driver":"spapr-vscsi","id":"scsi1"'
+ ',"reg":12288}')
+self.vm.add_args('-m', '2G,slots=32,maxmem=4G',
+ '-object', 'memory-backend-ram,id=ram1,size=1G',
+ '-device', 'pc-dimm,id=dimm1,memdev=ram1')
+




Most of the above code for adding args is getting duplicated in the test
for ppc64le below as well.
It could be contained in a helper routine which could be called from both,
and would be

Re: [PATCH v4 1/6] tests/requirements.txt: bump up avocado-framework version to 101.0

2023-04-20 Thread Cleber Rosa




On 4/17/23 09:43, Alex Bennée wrote:

From: Kautuk Consul 

Avocado version 101.0 has a fix to re-compute the checksum
of an asset file if the algorithm used in the *-CHECKSUM
file isn't the same as the one being passed to it by the
avocado user (i.e. the avocado_qemu python module).
In the earlier avocado versions this fix wasn't there due
to which if the checksum wouldn't match the earlier
checksum (calculated by a different algorithm), the avocado
code would start downloading a fresh image from the internet
URL thus making the test-cases take longer to execute.

Bump up the avocado-framework version to 101.0.

Signed-off-by: Kautuk Consul 
Tested-by: Hariharan T S 
Message-Id: <20230327115030.3418323-2-kcon...@linux.vnet.ibm.com>
Message-Id: <20230330101141.30199-10-alex.ben...@linaro.org>

---
v2
   - limit --max-parallel-tasks $(JOBS_OPTION:-j%=%)
---
  tests/Makefile.include | 18 +++---
  tests/requirements.txt |  2 +-
  2 files changed, 12 insertions(+), 8 deletions(-)


Hi everyone,

Looks like I've mistakenly replied to the earlier thread, so here's 
pretty much the same message on a better location.


First of all, thanks to Kautuk for sending this and thanks to Hariharan 
for further testing it.


I'd like to give some context which not everyone may be aware of. 
Avocado 101.0 is a very different when compared with 88.1. Everything 
related to the execution of tests is brand new.  To be more precise, on 
version 91.0[1], this new runner[2] became the default. On version 97.0, 
the old runner implementation (currently in use in QEMU) was finally 
removed.


On most releases since then, I've been running the QEMU tests with the 
latest Avocado, and finding issues that are (as resources allow) 
addressed in later versions.   As you probably noticed, Avocado 101.0 
runs the QEMU tests without much (or any) visible issues for most 
people.  But, I'm aware of two pending issues that may or may not be a 
big deal to users:


I) The logging behavior is a bit different since Avocado 88.1. At a 
given point it was considered that Avocado should not mess around 
inadvertently with Python's root logger, and should be more picky about 
it includes in logs.  For most cases, a simple workaround[4] does the 
trick.  But, for some other use cases (say for 3rd party libraries' logs 
you want logged alongside Avocado's logs) there's a pending PR[5] that 
will take care of all known limitations.


II) The support for killing tests (internally in Avocado represented as 
more generic "tasks") and all its children is a bit lacking.  This is an 
issue I'm actively working on[6].  This may leave some processes (such 
as "qemu-system-*") running even after a test was interrupted.


Fixes for both of these issues are due to be in version 102.0. The ETA 
for version 102.0 is 1-2 weeks.


With that being said, I'm more than OK with this patch (alongside PATCH 
2, without which havoc ensues :) provided people understand the two 
pending issues above.  If this patch is taken before Avocado 102.0 is 
released, the delta from 101.0 would be much smaller, so it should be an 
easier change to test.


Cheers,

- Cleber.


[1] - https://avocado-framework.readthedocs.io/en/101.0/releases/91_0.html

[2] - The new runner is called "nrunner" and I am to be blamed for the 
naming lacking any originality


[3] - 
https://avocado-framework.readthedocs.io/en/101.0/releases/97_0.html#users-test-writers


[4] - 
https://gitlab.com/cleber.gnu/qemu/-/commit/a9f39c4f6671b756196a185c7275eb7ebd13e588


[5] - https://github.com/avocado-framework/avocado/pull/5645

[6] - https://github.com/avocado-framework/avocado/issues/4994

Re: [PATCH 1/2] tests/requirements.txt: bump up avocado-framework version to 101.0

2023-04-20 Thread Cleber Rosa




On 3/27/23 07:50, Kautuk Consul wrote:

Avocado version 101.0 has a fix to re-compute the checksum
of an asset file if the algorithm used in the *-CHECKSUM
file isn't the same as the one being passed to it by the
avocado user (i.e. the avocado_qemu python module).
In the earlier avocado versions this fix wasn't there due
to which if the checksum wouldn't match the earlier
checksum (calculated by a different algorithm), the avocado
code would start downloading a fresh image from the internet
URL thus making the test-cases take longer to execute.

Bump up the avocado-framework version to 101.0.


Hi Kautuk,

First of all, thanks for working on this, and thanks to Hariharan for 
testing it.


I'd like to give some context which not everyone may be aware of.  
Avocado 101.0 is a very different when compared with 88.1. Everything 
related to the execution of tests is brand new.  To be more precise, on 
version 91.0[1], this new runner[2] became the default. On version 97.0, 
the old runner implementation (currently in use in QEMU) was finally 
removed.


On most releases since then, I've been running the QEMU tests with the 
latest Avocado, and finding issues that are (as resources allow) 
addressed in later versions.   As you probably noticed, Avocado 101.0 
runs the QEMU tests without much (or any) visible issues for most 
people.  But, I'm aware of two pending issues that may or may not be a 
big deal to users:


I) The logging behavior is a bit different since Avocado 88.1. At a 
given point it was considered that Avocado should not mess around 
inadvertently with Python's root logger, and should be more picky about 
it includes in logs.  For most cases, a simple workaround[4] does the 
trick.  But, for some other use cases (say for 3rd party libraries' logs 
you want logged alongside Avocado's logs) there's a pending PR[5] that 
will take care of all known limitations.


II) The support for killing tests (internally in Avocado represented as 
more generic "tasks") and all its children is a bit lacking.  This is an 
issue I'm actively working on[6].  This may leave some processes (such 
as "qemu-system-*") running even after a test was interrupted.


Fixes for both of these issues are due to be in version 102.0. The ETA 
for version 102.0 is 1-2 weeks.


With that being said, I'm more than OK with this patch (alongside PATCH 
2, without which havoc ensues :) provided people understand the two 
pending issues above.  If this patch is taken before Avocado 102.0 is 
released, the delta from 101.0 would be much smaller, so it should be an 
easier change to test.


Cheers,

- Cleber.


[1] - https://avocado-framework.readthedocs.io/en/101.0/releases/91_0.html

[2] - The new runner is called "nrunner" and I am to be blamed for the 
naming lacking any originality


[3] - 
https://avocado-framework.readthedocs.io/en/101.0/releases/97_0.html#users-test-writers


[4] - 
https://gitlab.com/cleber.gnu/qemu/-/commit/a9f39c4f6671b756196a185c7275eb7ebd13e588


[5] - https://github.com/avocado-framework/avocado/pull/5645

[6] - https://github.com/avocado-framework/avocado/issues/4994

Re: [PATCH v3 2/2] tests/avocado/tuxrun_baselines.py: improve code coverage for ppc64

2023-04-20 Thread Kautuk Consul

Hi,

On 2023-04-20 19:20:40, Harsh Prateek Bora wrote:
> Since we are optimising code a lot, one suggestion below:
> 
> 
> On Thu, Apr 20, 2023 at 6:23 PM Kautuk Consul 
> wrote:
> 
> > Commit c0c8687ef0fd990db8db1655a8a6c5a5e35dd4bb disabled the
> > boot_linux.py test-case due to which the code coverage for ppc
> > decreased by around 2%. As per the discussion on
> > https://lore.kernel.org/qemu-devel/87sfdpqcy4@linaro.org/ it
> > was mentioned that the baseline test for ppc64 could be modified
> > to make up this 2% code coverage. This patch attempts to achieve
> > this 2% code coverage by adding various device command line
> > arguments (to ./qemu-system-ppc64) in the tuxrun_baselines.py
> > test-case.
> >
> > The code coverage report with boot_linux.py, without it and finally
> > with these tuxrun_baselines.py changes is as follows:
> >
> > With boot_linux.py
> > --
> >   lines..: 13.8% (58006 of 420997 lines)
> >   functions..: 20.7% (7675 of 36993 functions)
> >   branches...: 9.2% (22146 of 240611 branches)
> > Without boot_linux.py (without this patch changes)
> > --
> >   lines..: 11.9% (50174 of 420997 lines)
> >   functions..: 18.8% (6947 of 36993 functions)
> >   branches...: 7.4% (17580 of 239017 branches)
> > Without boot_linux.py (with this patch changes)
> > ---
> >   lines..: 13.8% (58287 of 420997 lines)
> >   functions..: 20.7% (7640 of 36993 functions)
> >   branches...: 8.4% (20223 of 240611 branches)
> >
> > Signed-off-by: Kautuk Consul 
> > Reported-by: Alex Bennée 
> > ---
> >  tests/avocado/tuxrun_baselines.py | 120 +-
> >  1 file changed, 116 insertions(+), 4 deletions(-)
> >
> > diff --git a/tests/avocado/tuxrun_baselines.py
> > b/tests/avocado/tuxrun_baselines.py
> > index d343376faa..ae082ac028 100644
> > --- a/tests/avocado/tuxrun_baselines.py
> > +++ b/tests/avocado/tuxrun_baselines.py
> > @@ -11,6 +11,7 @@
> >
> >  import os
> >  import time
> > +import tempfile
> >
> > 
> >  """
> > -self.common_tuxrun(drive="scsi-hd")
> > +
> > +# add device args to command line.
> > +self.vm.add_args('-netdev', 'user,id=vnet,hostfwd=:127.0.0.1:0
> > -:22',
> > + '-device', 'virtio-net,netdev=vnet')
> > +self.vm.add_args('-netdev', '{"type":"user","id":"hostnet0"}',
> > + '-device', '{"driver":"virtio-net-pci","netdev":'
> > +
> >  '"hostnet0","id":"net0","mac":"52:54:00:4c:e3:86",'
> > + '"bus":"pci.0","addr":"0x9"}')
> > +self.vm.add_args('-device',
> > '{"driver":"qemu-xhci","p2":15,"p3":15,'
> > + '"id":"usb","bus":"pci.0","addr":"0x2"}')
> > +self.vm.add_args('-device',
> > '{"driver":"virtio-scsi-pci","id":"scsi0"'
> > + ',"bus":"pci.0","addr":"0x3"}')
> > +self.vm.add_args('-device', '{"driver":"virtio-serial-pci","id":'
> > + '"virtio-serial0","bus":"pci.0","addr":"0x4"}')
> > +self.vm.add_args('-device', '{"driver":"scsi-cd","bus":"scsi0.0"'
> > + ',"channel":0,"scsi-id":0,"lun":0,"device_id":'
> > + '"drive-scsi0-0-0-0","id":"scsi0-0-0-0"}')
> > +self.vm.add_args('-device', '{"driver":"virtio-balloon-pci",'
> > + '"id":"balloon0","bus":"pci.0","addr":"0x6"}')
> > +self.vm.add_args('-audiodev', '{"id":"audio1","driver":"none"}')
> > +self.vm.add_args('-device', '{"driver":"usb-tablet","id":"input0"'
> > + ',"bus":"usb.0","port":"1"}')
> > +self.vm.add_args('-device', '{"driver":"usb-kbd","id":"input1"'
> > + ',"bus":"usb.0","port":"2"}')
> > +self.vm.add_args('-device', '{"driver":"VGA","id":"video0",'
> > + '"vgamem_mb":16,"bus":"pci.0","addr":"0x7"}')
> > +self.vm.add_args('-object',
> > '{"qom-type":"rng-random","id":"objrng0"'
> > + ',"filename":"/dev/urandom"}',
> > + '-device',
> > '{"driver":"virtio-rng-pci","rng":"objrng0"'
> > + ',"id":"rng0","bus":"pci.0","addr":"0x8"}')
> > +self.vm.add_args('-object',
> > '{"qom-type":"cryptodev-backend-builtin",'
> > + '"id":"objcrypto0","queues":1}',
> > + '-device', '{"driver":"virtio-crypto-pci",'
> > + '"cryptodev":"objcrypto0","id":"crypto0","bus"'
> > + ':"pci.0","addr":"0xa"}')
> > +self.vm.add_args('-device', '{"driver":"spapr-pci-host-bridge"'
> > + ',"index":1,"id":"pci.1"}')
> > +self.vm.add_args('-device', '{"driver":"spapr-vscsi","id":"scsi1"'
> > + ',"reg":12288}')
> > +self.vm.add_args('-m', '2G,slots=32,maxmem=4G',
> > +

Re: [PATCH v3 13/20] block/export: rewrite vduse-blk drain code

2023-04-20 Thread Yongji Xie

Hi Stefan,

On Thu, Apr 20, 2023 at 7:39 PM Stefan Hajnoczi  wrote:
>
> vduse_blk_detach_ctx() waits for in-flight requests using
> AIO_WAIT_WHILE(). This is not allowed according to a comment in
> bdrv_set_aio_context_commit():
>
>   /*
>* Take the old AioContex when detaching it from bs.
>* At this point, new_context lock is already acquired, and we are now
>* also taking old_context. This is safe as long as bdrv_detach_aio_context
>* does not call AIO_POLL_WHILE().
>*/
>
> Use this opportunity to rewrite the drain code in vduse-blk:
>
> - Use the BlockExport refcount so that vduse_blk_exp_delete() is only
>   called when there are no more requests in flight.
>
> - Implement .drained_poll() so in-flight request coroutines are stopped
>   by the time .bdrv_detach_aio_context() is called.
>
> - Remove AIO_WAIT_WHILE() from vduse_blk_detach_ctx() to solve the
>   .bdrv_detach_aio_context() constraint violation. It's no longer
>   needed due to the previous changes.
>
> - Always handle the VDUSE file descriptor, even in drained sections. The
>   VDUSE file descriptor doesn't submit I/O, so it's safe to handle it in
>   drained sections. This ensures that the VDUSE kernel code gets a fast
>   response.
>
> - Suspend virtqueue fd handlers in .drained_begin() and resume them in
>   .drained_end(). This eliminates the need for the
>   aio_set_fd_handler(is_external=true) flag, which is being removed from
>   QEMU.
>
> This is a long list but splitting it into individual commits would
> probably lead to git bisect failures - the changes are all related.
>
> Signed-off-by: Stefan Hajnoczi 
> ---
>  block/export/vduse-blk.c | 132 +++
>  1 file changed, 93 insertions(+), 39 deletions(-)
>
> diff --git a/block/export/vduse-blk.c b/block/export/vduse-blk.c
> index f7ae44e3ce..35dc8fcf45 100644
> --- a/block/export/vduse-blk.c
> +++ b/block/export/vduse-blk.c
> @@ -31,7 +31,8 @@ typedef struct VduseBlkExport {
>  VduseDev *dev;
>  uint16_t num_queues;
>  char *recon_file;
> -unsigned int inflight;
> +unsigned int inflight; /* atomic */
> +bool vqs_started;
>  } VduseBlkExport;
>
>  typedef struct VduseBlkReq {
> @@ -41,13 +42,24 @@ typedef struct VduseBlkReq {
>
>  static void vduse_blk_inflight_inc(VduseBlkExport *vblk_exp)
>  {
> -vblk_exp->inflight++;
> +if (qatomic_fetch_inc(_exp->inflight) == 0) {

I wonder why we need to use atomic operations here.

> +/* Prevent export from being deleted */
> +aio_context_acquire(vblk_exp->export.ctx);
> +blk_exp_ref(_exp->export);
> +aio_context_release(vblk_exp->export.ctx);
> +}
>  }
>
>  static void vduse_blk_inflight_dec(VduseBlkExport *vblk_exp)
>  {
> -if (--vblk_exp->inflight == 0) {
> +if (qatomic_fetch_dec(_exp->inflight) == 1) {
> +/* Wake AIO_WAIT_WHILE() */
>  aio_wait_kick();
> +
> +/* Now the export can be deleted */
> +aio_context_acquire(vblk_exp->export.ctx);
> +blk_exp_unref(_exp->export);
> +aio_context_release(vblk_exp->export.ctx);
>  }
>  }
>
> @@ -124,8 +136,12 @@ static void vduse_blk_enable_queue(VduseDev *dev, 
> VduseVirtq *vq)
>  {
>  VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
>
> +if (!vblk_exp->vqs_started) {
> +return; /* vduse_blk_drained_end() will start vqs later */
> +}
> +
>  aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
> -   true, on_vduse_vq_kick, NULL, NULL, NULL, vq);
> +   false, on_vduse_vq_kick, NULL, NULL, NULL, vq);
>  /* Make sure we don't miss any kick afer reconnecting */
>  eventfd_write(vduse_queue_get_fd(vq), 1);
>  }
> @@ -133,9 +149,14 @@ static void vduse_blk_enable_queue(VduseDev *dev, 
> VduseVirtq *vq)
>  static void vduse_blk_disable_queue(VduseDev *dev, VduseVirtq *vq)
>  {
>  VduseBlkExport *vblk_exp = vduse_dev_get_priv(dev);
> +int fd = vduse_queue_get_fd(vq);
>
> -aio_set_fd_handler(vblk_exp->export.ctx, vduse_queue_get_fd(vq),
> -   true, NULL, NULL, NULL, NULL, NULL);
> +if (fd < 0) {
> +return;
> +}
> +
> +aio_set_fd_handler(vblk_exp->export.ctx, fd, false,
> +   NULL, NULL, NULL, NULL, NULL);
>  }
>
>  static const VduseOps vduse_blk_ops = {
> @@ -152,42 +173,19 @@ static void on_vduse_dev_kick(void *opaque)
>
>  static void vduse_blk_attach_ctx(VduseBlkExport *vblk_exp, AioContext *ctx)
>  {
> -int i;
> -
>  aio_set_fd_handler(vblk_exp->export.ctx, vduse_dev_get_fd(vblk_exp->dev),
> -   true, on_vduse_dev_kick, NULL, NULL, NULL,
> +   false, on_vduse_dev_kick, NULL, NULL, NULL,
> vblk_exp->dev);
>
> -for (i = 0; i < vblk_exp->num_queues; i++) {
> -VduseVirtq *vq = vduse_dev_get_queue(vblk_exp->dev, i);
> -int fd = vduse_queue_get_fd(vq);
> -
> -if (fd < 0) {
> -

[RESEND][QEMU PATCH] accel/kvm: Don't use KVM maximum support number to alloc user memslots

2023-04-20 Thread Robert Hoo

Corrects QEMU to properly use what KVM_CAP_NR_MEMSLOTS means,
i.e. the maximum user memslots KVM supports.
1. Rename KVMState::nr_slots --> max_slots.
2. Remember nr_slots in each KML. This also decouples each KML, e.g. x86's
   two KMLs don't need to have same size of slots[].
3. Change back initial slot[] size to 32, increase it dynamically
   (exponentially) till the maximum number KVM supports. 32 should suites
   almost all normal guests.

Background:
Since KVM commit 4fc096a99e01d ("KVM: Raise the maximum number of user
memslots"), KVM_CAP_NR_MEMSLOTS returns 32764 (SHRT_MAX - 3), which is a
huge increase from previous 509 (x86). This change based on the fact that
KVM alloc memslots dynamically. But QEMU allocates that huge number of user
memslots statically. This is indeed unwanted by both sides. It makes:

1. Memory waste. Allocates (SHRT_MAX - 3) * sizeof(KVMSlot), while a
typical VM needs far less, e.g. my VM just reached 9th as the highest mem
slot ever used. x86 further, has 2 kmls; approx. 2 x 2MB for each VM.
2. Time waste. Several KML Slot functions go through the whole
KML::slots[], (SHRT_MAX - 3) makes it far longer than necessary, e.g.
kvm_lookup_matching_slot(), kvm_physical_memory_addr_from_host(),
kvm_physical_log_clear(), kvm_log_sync_global().

Functional Test:
Temporarily set KVM_DEF_NR_SLOTS = 8, let it go through slot[] dynamic
increase, VM launched and works well.

Performance improvement:
Func time (ns) of kvm_lookup_matching_slot(), for example,
Before  After
sample count58746812
mean11403   5867
min 17751722
max 784949  30263

Signed-off-by: Robert Hoo 
---
Resend:
Add stats about kvm_lookup_matching_slot() for example.
CC kvm mail list per get_maintainer.pl suggests.
I believe this benefits Live Migration, but not devices at hand to do
the system level test.

 accel/kvm/kvm-all.c  | 57 +---
 include/sysemu/kvm_int.h |  4 ++-
 2 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index cf3a88d90e..708170139c 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -178,22 +178,50 @@ int kvm_get_max_memslots(void)
 {
 KVMState *s = KVM_STATE(current_accel());
 
-return s->nr_slots;
+return s->max_slots;
 }
 
-/* Called with KVMMemoryListener.slots_lock held */
+/* Called with kvm_slots_lock()'ed */
 static KVMSlot *kvm_get_free_slot(KVMMemoryListener *kml)
 {
 KVMState *s = kvm_state;
+KVMSlot *new_slots;
 int i;
+int new_nr, old_nr;
 
-for (i = 0; i < s->nr_slots; i++) {
+for (i = 0; i < kml->nr_slots; i++) {
 if (kml->slots[i].memory_size == 0) {
 return >slots[i];
 }
 }
 
-return NULL;
+/* Already reached maximum, no more can expand */
+if (kml->nr_slots >= s->max_slots) {
+return NULL;
+}
+
+new_nr = 2 * kml->nr_slots;
+new_nr = MIN(new_nr, s->max_slots);
+/* It might overflow */
+if (new_nr < 0 || new_nr <= kml->nr_slots) {
+return NULL;
+}
+
+new_slots = g_try_new0(KVMSlot, new_nr);
+if (!new_slots) {
+return NULL;
+}
+
+memcpy(new_slots, kml->slots, kml->nr_slots * sizeof(KVMSlot));
+old_nr = kml->nr_slots;
+kml->nr_slots = new_nr;
+g_free(kml->slots);
+kml->slots = new_slots;
+for (i = old_nr; i < kml->nr_slots; i++) {
+kml->slots[i].slot = i;
+}
+
+return >slots[old_nr];
 }
 
 bool kvm_has_free_slot(MachineState *ms)
@@ -226,10 +254,9 @@ static KVMSlot *kvm_lookup_matching_slot(KVMMemoryListener 
*kml,
  hwaddr start_addr,
  hwaddr size)
 {
-KVMState *s = kvm_state;
 int i;
 
-for (i = 0; i < s->nr_slots; i++) {
+for (i = 0; i < kml->nr_slots; i++) {
 KVMSlot *mem = >slots[i];
 
 if (start_addr == mem->start_addr && size == mem->memory_size) {
@@ -271,7 +298,7 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void 
*ram,
 int i, ret = 0;
 
 kvm_slots_lock();
-for (i = 0; i < s->nr_slots; i++) {
+for (i = 0; i < kml->nr_slots; i++) {
 KVMSlot *mem = >slots[i];
 
 if (ram >= mem->ram && ram < mem->ram + mem->memory_size) {
@@ -1002,7 +1029,7 @@ static int kvm_physical_log_clear(KVMMemoryListener *kml,
 
 kvm_slots_lock();
 
-for (i = 0; i < s->nr_slots; i++) {
+for (i = 0; i < kml->nr_slots; i++) {
 mem = >slots[i];
 /* Discard slots that are empty or do not overlap the section */
 if (!mem->memory_size ||
@@ -1566,7 +1593,6 @@ static void kvm_log_sync(MemoryListener *listener,
 static void kvm_log_sync_global(MemoryListener *l)
 {
 KVMMemoryListener *kml = container_of(l, KVMMemoryListener,

RE: [PATCH v2 3/4] build: move COLO under CONFIG_REPLICATION

2023-04-20 Thread Zhang, Chen




> -Original Message-
> From: Vladimir Sementsov-Ogievskiy 
> Sent: Thursday, April 20, 2023 6:53 AM
> To: qemu-devel@nongnu.org
> Cc: qemu-bl...@nongnu.org; michael.r...@amd.com; arm...@redhat.com;
> ebl...@redhat.com; jasow...@redhat.com; quint...@redhat.com; Zhang,
> Hailiang ; phi...@linaro.org;
> th...@redhat.com; berra...@redhat.com; marcandre.lur...@redhat.com;
> pbonz...@redhat.com; d...@treblig.org; hre...@redhat.com;
> kw...@redhat.com; Zhang, Chen ;
> lizhij...@fujitsu.com; Vladimir Sementsov-Ogievskiy  team.ru>
> Subject: [PATCH v2 3/4] build: move COLO under CONFIG_REPLICATION
> 
> We don't allow to use x-colo capability when replication is not configured. 
> So,
> no reason to build COLO when replication is disabled, it's unusable in this
> case.

Yes, you are right for current status. Because COLO best practices is 
replication + colo live migration + colo proxy.
But doesn't mean it has to be done in all scenarios as I explanation in V1.
The better way is allow to use x-colo capability firstly, and separate this 
patch
with two config options: --disable-replication  and --disable-x-colo.

Thanks
Chen

> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  hmp-commands.hx|  2 ++
>  migration/colo.c   |  6 +
>  migration/meson.build  |  6 +++--
>  migration/migration-hmp-cmds.c |  2 ++
>  migration/migration.c  | 19 +++---
>  net/meson.build|  5 +++-
>  qapi/migration.json| 12 ++---
>  stubs/colo.c   | 47 ++
>  stubs/meson.build  |  1 +
>  9 files changed, 78 insertions(+), 22 deletions(-)  create mode 100644
> stubs/colo.c
> 
> diff --git a/hmp-commands.hx b/hmp-commands.hx index
> bb85ee1d26..fbd0932232 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1035,6 +1035,7 @@ SRST
>migration (or once already in postcopy).
>  ERST
> 
> +#ifdef CONFIG_REPLICATION
>  {
>  .name   = "x_colo_lost_heartbeat",
>  .args_type  = "",
> @@ -1043,6 +1044,7 @@ ERST
>"a failover or takeover is needed.",
>  .cmd = hmp_x_colo_lost_heartbeat,
>  },
> +#endif
> 
>  SRST
>  ``x_colo_lost_heartbeat``
> diff --git a/migration/colo.c b/migration/colo.c index
> 0716e64689..089c491d70 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -196,6 +196,12 @@ COLOMode get_colo_mode(void)
>  }
>  }
> 
> +bool migrate_colo_enabled(void)
> +{
> +MigrationState *s = migrate_get_current();
> +return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
> +}
> +
>  void colo_do_failover(void)
>  {
>  /* Make sure VM stopped while failover happened. */ diff --git
> a/migration/meson.build b/migration/meson.build index
> 0d1bb9f96e..3fccf79f12 100644
> --- a/migration/meson.build
> +++ b/migration/meson.build
> @@ -13,8 +13,6 @@ softmmu_ss.add(files(
>'block-dirty-bitmap.c',
>'channel.c',
>'channel-block.c',
> -  'colo-failover.c',
> -  'colo.c',
>'exec.c',
>'fd.c',
>'global_state.c',
> @@ -29,6 +27,10 @@ softmmu_ss.add(files(
>'threadinfo.c',
>  ), gnutls)
> 
> +if get_option('replication').allowed()
> +  softmmu_ss.add(files('colo-failover.c', 'colo.c')) endif
> +
>  softmmu_ss.add(when: rdma, if_true: files('rdma.c'))  if
> get_option('live_block_migration').allowed()
>softmmu_ss.add(files('block.c'))
> diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-
> cmds.c index 72519ea99f..4601c48f41 100644
> --- a/migration/migration-hmp-cmds.c
> +++ b/migration/migration-hmp-cmds.c
> @@ -640,6 +640,7 @@ void hmp_migrate_start_postcopy(Monitor *mon,
> const QDict *qdict)
>  hmp_handle_error(mon, err);
>  }
> 
> +#ifdef CONFIG_REPLICATION
>  void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)  {
>  Error *err = NULL;
> @@ -647,6 +648,7 @@ void hmp_x_colo_lost_heartbeat(Monitor *mon,
> const QDict *qdict)
>  qmp_x_colo_lost_heartbeat();
>  hmp_handle_error(mon, err);
>  }
> +#endif
> 
>  typedef struct HMPMigrationStatus {
>  QEMUTimer *timer;
> diff --git a/migration/migration.c b/migration/migration.c index
> bda4789193..2382958364 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -165,7 +165,9 @@
> INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
>  MIGRATION_CAPABILITY_RDMA_PIN_ALL,
>  MIGRATION_CAPABILITY_COMPRESS,
>  MIGRATION_CAPABILITY_XBZRLE,
> +#ifdef CONFIG_REPLICATION
>  MIGRATION_CAPABILITY_X_COLO,
> +#endif
>  MIGRATION_CAPABILITY_VALIDATE_UUID,
>  MIGRATION_CAPABILITY_ZERO_COPY_SEND);
> 
> @@ -1329,15 +1331,6 @@ static bool migrate_caps_check(bool *cap_list,
>  }
>  #endif
> 
> -#ifndef CONFIG_REPLICATION
> -if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
> -error_setg(errp, "QEMU compiled without replication module"
> -   " can't enable COLO");
> -error_append_hint(errp, "Please enable

RE: [PATCH v2 4/4] configure: add --disable-colo-filters option

2023-04-20 Thread Zhang, Chen



> -Original Message-
> From: Vladimir Sementsov-Ogievskiy 
> Sent: Thursday, April 20, 2023 7:26 PM
> To: Zhang, Chen ; qemu-devel@nongnu.org
> Cc: qemu-bl...@nongnu.org; michael.r...@amd.com; arm...@redhat.com;
> ebl...@redhat.com; jasow...@redhat.com; quint...@redhat.com; Zhang,
> Hailiang ; phi...@linaro.org;
> th...@redhat.com; berra...@redhat.com; marcandre.lur...@redhat.com;
> pbonz...@redhat.com; d...@treblig.org; hre...@redhat.com;
> kw...@redhat.com; lizhij...@fujitsu.com
> Subject: Re: [PATCH v2 4/4] configure: add --disable-colo-filters option
> 
> On 20.04.23 12:09, Zhang, Chen wrote:
> >
> >
> >> -Original Message-
> >> From: Vladimir Sementsov-Ogievskiy 
> >> Sent: Thursday, April 20, 2023 6:53 AM
> >> To: qemu-devel@nongnu.org
> >> Cc: qemu-bl...@nongnu.org; michael.r...@amd.com;
> arm...@redhat.com;
> >> ebl...@redhat.com; jasow...@redhat.com; quint...@redhat.com;
> Zhang,
> >> Hailiang ; phi...@linaro.org;
> >> th...@redhat.com; berra...@redhat.com;
> marcandre.lur...@redhat.com;
> >> pbonz...@redhat.com; d...@treblig.org; hre...@redhat.com;
> >> kw...@redhat.com; Zhang, Chen ;
> >> lizhij...@fujitsu.com; Vladimir Sementsov-Ogievskiy
> >> 
> >> Subject: [PATCH v2 4/4] configure: add --disable-colo-filters option
> >>
> >> Add option to not build COLO Proxy subsystem if it is not needed.
> >
> > I think no need to add the --disable-colo-filter option.
> > Net-filters just general infrastructure. Another example is COLO also
> > use the -chardev socket to connect each filters. No need to add the --
> disable-colo-chardev
> > Please drop this patch.
> 
> I don't follow your reasoning. Of course, we don't need any option like this,
> and can simply include to build all the components we don't use. So "no
> need" is correct for any --disable-* option.
> Still, I think, it's good, when you can exclude from build the subsystems that
> you don't need / don't want to maintain or ship to your end users.

Yes, I agree with your idea.

> 
> In MAINTAINERS these two filters are in "COLO Proxy" susbsystem. Is it
> correct? What's wrong with adding an option to not build this subsystem? I
> can rename it to --disable-colo-proxy.

The history is COLO project contributed QEMU filter framework and 
filter-mirror/redirector...etc..
And the "COLO Proxy" susbsystem covered the filters do not means it belong to 
COLO.
So, It is unreasonable to tell users that you cannot use 
filter-mirror/rediretor for network debugging
Or other purpose because you have not enabled COLO proxy.

> 
> > But for COLO network part, still something need to do:
> > You can add --disable-colo-proxy not to build the net/colo-compare.c  if it 
> > is
> not needed.
> > This file just for COLO and not belong to network filters.
> 
> net/colo-compare.c is used only only for COLO, which in turn used only with
> CONFIG_REPLICATION enabled, see patch 3. So, no reason to add separate
> option for it, it should be disabled with --disable-replication.

Yes, and as Lukas said, COLO is the only user for the filter-rewriter currently.
It is more appropriate to add filter-rewriter replace the filter-mirror here.
I saw the patch 3, it is better to move it to this patch.

Thanks
Chen

> 
> --
> Best regards,
> Vladimir

[RFC PATCH 08/13] gfxstream + rutabaga prep: added need defintions, fields, and options

2023-04-20 Thread Gurchetan Singh

This modifies the common virtio-gpu.h file have the fields and
defintions needed by gfxstream/rutabaga.  It also modifies VirtioGPUGL
to have the runtime options needed by rutabaga.  They are:

- a colon separated list of capset names, defined in the virtio spec
- a wayland socket path to enable guest Wayland passthrough

The command to run these would be:

-device virtio-vga-gl,capset_names=gfxstream:cross-domain, \
wayland_socket_path=/run/user/1000/wayland-0,hostmem=8G  \

Signed-off-by: Gurchetan Singh 
---
 hw/display/virtio-gpu-gl.c | 2 ++
 include/hw/virtio/virtio-gpu.h | 8 
 2 files changed, 10 insertions(+)

diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
index 547e697333..15270b0c8a 100644
--- a/hw/display/virtio-gpu-gl.c
+++ b/hw/display/virtio-gpu-gl.c
@@ -29,6 +29,8 @@ static void virtio_gpu_gl_device_realize(DeviceState *qdev, 
Error **errp)
 static Property virtio_gpu_gl_properties[] = {
 DEFINE_PROP_BIT("stats", VirtIOGPU, parent_obj.conf.flags,
 VIRTIO_GPU_FLAG_STATS_ENABLED, false),
+DEFINE_PROP_STRING("capset_names", VirtIOGPUGL, capset_names),
+DEFINE_PROP_STRING("wayland_socket_path", VirtIOGPUGL, 
wayland_socket_path),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 421733d751..a35ade3608 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -94,6 +94,7 @@ enum virtio_gpu_base_conf_flags {
 VIRTIO_GPU_FLAG_DMABUF_ENABLED,
 VIRTIO_GPU_FLAG_BLOB_ENABLED,
 VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED,
+VIRTIO_GPU_FLAG_RUTABAGA_ENABLED,
 };
 
 #define virtio_gpu_virgl_enabled(_cfg) \
@@ -106,6 +107,8 @@ enum virtio_gpu_base_conf_flags {
 (_cfg.flags & (1 << VIRTIO_GPU_FLAG_DMABUF_ENABLED))
 #define virtio_gpu_blob_enabled(_cfg) \
 (_cfg.flags & (1 << VIRTIO_GPU_FLAG_BLOB_ENABLED))
+#define virtio_gpu_rutabaga_enabled(_cfg) \
+(_cfg.flags & (1 << VIRTIO_GPU_FLAG_RUTABAGA_ENABLED))
 #define virtio_gpu_hostmem_enabled(_cfg) \
 (_cfg.hostmem > 0)
 #define virtio_gpu_context_init_enabled(_cfg) \
@@ -217,6 +220,11 @@ struct VirtIOGPUGL {
 
 bool renderer_inited;
 bool renderer_reset;
+
+char *capset_names;
+char *wayland_socket_path;
+uint32_t num_capsets;
+void *rutabaga;
 };
 
 struct VhostUserGPU {
-- 
2.40.0.634.g4ca3ef3211-goog

[RFC PATCH 02/13] virtio-gpu: hostmem

2023-04-20 Thread Gurchetan Singh

From: Gerd Hoffmann 

Use VIRTIO_GPU_SHM_ID_HOST_VISIBLE as id for virtio-gpu.

Signed-off-by: Antonio Caggiano 
Acked-by: Michael S. Tsirkin 
---
 hw/display/virtio-gpu-pci.c| 14 ++
 hw/display/virtio-gpu.c|  1 +
 hw/display/virtio-vga.c| 33 -
 include/hw/virtio/virtio-gpu.h |  5 +
 4 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/hw/display/virtio-gpu-pci.c b/hw/display/virtio-gpu-pci.c
index 93f214ff58..da6a99f038 100644
--- a/hw/display/virtio-gpu-pci.c
+++ b/hw/display/virtio-gpu-pci.c
@@ -33,6 +33,20 @@ static void virtio_gpu_pci_base_realize(VirtIOPCIProxy 
*vpci_dev, Error **errp)
 DeviceState *vdev = DEVICE(g);
 int i;
 
+if (virtio_gpu_hostmem_enabled(g->conf)) {
+vpci_dev->msix_bar_idx = 1;
+vpci_dev->modern_mem_bar_idx = 2;
+memory_region_init(>hostmem, OBJECT(g), "virtio-gpu-hostmem",
+   g->conf.hostmem);
+pci_register_bar(_dev->pci_dev, 4,
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_PREFETCH |
+ PCI_BASE_ADDRESS_MEM_TYPE_64,
+ >hostmem);
+virtio_pci_add_shm_cap(vpci_dev, 4, 0, g->conf.hostmem,
+   VIRTIO_GPU_SHM_ID_HOST_VISIBLE);
+}
+
 virtio_pci_force_virtio_1(vpci_dev);
 if (!qdev_realize(vdev, BUS(_dev->bus), errp)) {
 return;
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 5e15c79b94..7b592f998d 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1425,6 +1425,7 @@ static Property virtio_gpu_properties[] = {
  256 * MiB),
 DEFINE_PROP_BIT("blob", VirtIOGPU, parent_obj.conf.flags,
 VIRTIO_GPU_FLAG_BLOB_ENABLED, false),
+DEFINE_PROP_SIZE("hostmem", VirtIOGPU, parent_obj.conf.hostmem, 0),
 DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/display/virtio-vga.c b/hw/display/virtio-vga.c
index e6fb0aa876..c8552ff760 100644
--- a/hw/display/virtio-vga.c
+++ b/hw/display/virtio-vga.c
@@ -115,17 +115,32 @@ static void virtio_vga_base_realize(VirtIOPCIProxy 
*vpci_dev, Error **errp)
 pci_register_bar(_dev->pci_dev, 0,
  PCI_BASE_ADDRESS_MEM_PREFETCH, >vram);
 
-/*
- * Configure virtio bar and regions
- *
- * We use bar #2 for the mmio regions, to be compatible with stdvga.
- * virtio regions are moved to the end of bar #2, to make room for
- * the stdvga mmio registers at the start of bar #2.
- */
-vpci_dev->modern_mem_bar_idx = 2;
-vpci_dev->msix_bar_idx = 4;
 vpci_dev->modern_io_bar_idx = 5;
 
+if (!virtio_gpu_hostmem_enabled(g->conf)) {
+/*
+ * Configure virtio bar and regions
+ *
+ * We use bar #2 for the mmio regions, to be compatible with stdvga.
+ * virtio regions are moved to the end of bar #2, to make room for
+ * the stdvga mmio registers at the start of bar #2.
+ */
+vpci_dev->modern_mem_bar_idx = 2;
+vpci_dev->msix_bar_idx = 4;
+} else {
+vpci_dev->msix_bar_idx = 1;
+vpci_dev->modern_mem_bar_idx = 2;
+memory_region_init(>hostmem, OBJECT(g), "virtio-gpu-hostmem",
+   g->conf.hostmem);
+pci_register_bar(_dev->pci_dev, 4,
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_PREFETCH |
+ PCI_BASE_ADDRESS_MEM_TYPE_64,
+ >hostmem);
+virtio_pci_add_shm_cap(vpci_dev, 4, 0, g->conf.hostmem,
+   VIRTIO_GPU_SHM_ID_HOST_VISIBLE);
+}
+
 if (!(vpci_dev->flags & VIRTIO_PCI_FLAG_PAGE_PER_VQ)) {
 /*
  * with page-per-vq=off there is no padding space we can use
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 2e28507efe..eafce75b04 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -102,12 +102,15 @@ enum virtio_gpu_base_conf_flags {
 (_cfg.flags & (1 << VIRTIO_GPU_FLAG_DMABUF_ENABLED))
 #define virtio_gpu_blob_enabled(_cfg) \
 (_cfg.flags & (1 << VIRTIO_GPU_FLAG_BLOB_ENABLED))
+#define virtio_gpu_hostmem_enabled(_cfg) \
+(_cfg.hostmem > 0)
 
 struct virtio_gpu_base_conf {
 uint32_t max_outputs;
 uint32_t flags;
 uint32_t xres;
 uint32_t yres;
+uint64_t hostmem;
 };
 
 struct virtio_gpu_ctrl_command {
@@ -131,6 +134,8 @@ struct VirtIOGPUBase {
 int renderer_blocked;
 int enable;
 
+MemoryRegion hostmem;
+
 struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS];
 
 int enabled_output_bitmask;
-- 
2.40.0.634.g4ca3ef3211-goog

[RFC PATCH 03/13] virtio-gpu blob prep: improve decoding and add memory region

2023-04-20 Thread Gurchetan Singh

From: Antonio Caggiano 

This adds preparatory functions needed to:

 - decode blob cmds
 - track memory regions associated with mappable blobs
 - tracking iovecs

Signed-off-by: Antonio Caggiano 
Signed-off-by: Dmitry Osipenko 
Signed-off-by: Gurchetan Singh 
---
 hw/display/virtio-gpu.c  | 11 +++
 include/hw/virtio/virtio-gpu-bswap.h | 18 ++
 include/hw/virtio/virtio-gpu.h   |  8 
 3 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 7b592f998d..938eed9181 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -33,16 +33,11 @@
 
 #define VIRTIO_GPU_VM_VERSION 1
 
-static struct virtio_gpu_simple_resource*
-virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id);
 static struct virtio_gpu_simple_resource *
 virtio_gpu_find_check_resource(VirtIOGPU *g, uint32_t resource_id,
bool require_backing,
const char *caller, uint32_t *error);
 
-static void virtio_gpu_cleanup_mapping(VirtIOGPU *g,
-   struct virtio_gpu_simple_resource *res);
-
 void virtio_gpu_update_cursor_data(VirtIOGPU *g,
struct virtio_gpu_scanout *s,
uint32_t resource_id)
@@ -115,7 +110,7 @@ static void update_cursor(VirtIOGPU *g, struct 
virtio_gpu_update_cursor *cursor)
   cursor->resource_id ? 1 : 0);
 }
 
-static struct virtio_gpu_simple_resource *
+struct virtio_gpu_simple_resource *
 virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id)
 {
 struct virtio_gpu_simple_resource *res;
@@ -872,8 +867,8 @@ void virtio_gpu_cleanup_mapping_iov(VirtIOGPU *g,
 g_free(iov);
 }
 
-static void virtio_gpu_cleanup_mapping(VirtIOGPU *g,
-   struct virtio_gpu_simple_resource *res)
+void virtio_gpu_cleanup_mapping(VirtIOGPU *g,
+struct virtio_gpu_simple_resource *res)
 {
 virtio_gpu_cleanup_mapping_iov(g, res->iov, res->iov_cnt);
 res->iov = NULL;
diff --git a/include/hw/virtio/virtio-gpu-bswap.h 
b/include/hw/virtio/virtio-gpu-bswap.h
index 9124108485..dd1975e2d4 100644
--- a/include/hw/virtio/virtio-gpu-bswap.h
+++ b/include/hw/virtio/virtio-gpu-bswap.h
@@ -63,10 +63,28 @@ virtio_gpu_create_blob_bswap(struct 
virtio_gpu_resource_create_blob *cblob)
 {
 virtio_gpu_ctrl_hdr_bswap(>hdr);
 le32_to_cpus(>resource_id);
+le32_to_cpus(>blob_mem);
 le32_to_cpus(>blob_flags);
+le32_to_cpus(>nr_entries);
+le64_to_cpus(>blob_id);
 le64_to_cpus(>size);
 }
 
+static inline void
+virtio_gpu_map_blob_bswap(struct virtio_gpu_resource_map_blob *mblob)
+{
+virtio_gpu_ctrl_hdr_bswap(>hdr);
+le32_to_cpus(>resource_id);
+le64_to_cpus(>offset);
+}
+
+static inline void
+virtio_gpu_unmap_blob_bswap(struct virtio_gpu_resource_unmap_blob *ublob)
+{
+virtio_gpu_ctrl_hdr_bswap(>hdr);
+le32_to_cpus(>resource_id);
+}
+
 static inline void
 virtio_gpu_scanout_blob_bswap(struct virtio_gpu_set_scanout_blob *ssb)
 {
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index eafce75b04..326988e4d5 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -55,6 +55,9 @@ struct virtio_gpu_simple_resource {
 int dmabuf_fd;
 uint8_t *remapped;
 
+MemoryRegion region;
+void *mapped;
+
 QTAILQ_ENTRY(virtio_gpu_simple_resource) next;
 };
 
@@ -245,6 +248,9 @@ void virtio_gpu_base_fill_display_info(VirtIOGPUBase *g,
 struct virtio_gpu_resp_display_info *dpy_info);
 
 /* virtio-gpu.c */
+struct virtio_gpu_simple_resource *
+virtio_gpu_find_resource(VirtIOGPU *g, uint32_t resource_id);
+
 void virtio_gpu_ctrl_response(VirtIOGPU *g,
   struct virtio_gpu_ctrl_command *cmd,
   struct virtio_gpu_ctrl_hdr *resp,
@@ -263,6 +269,8 @@ int virtio_gpu_create_mapping_iov(VirtIOGPU *g,
   uint32_t *niov);
 void virtio_gpu_cleanup_mapping_iov(VirtIOGPU *g,
 struct iovec *iov, uint32_t count);
+void virtio_gpu_cleanup_mapping(VirtIOGPU *g,
+struct virtio_gpu_simple_resource *res);
 void virtio_gpu_process_cmdq(VirtIOGPU *g);
 void virtio_gpu_device_realize(DeviceState *qdev, Error **errp);
 void virtio_gpu_reset(VirtIODevice *vdev);
-- 
2.40.0.634.g4ca3ef3211-goog

[RFC PATCH 07/13] gfxstream + rutabaga prep: define callbacks in realize function

2023-04-20 Thread Gurchetan Singh

This reduces the amount of renderer backend specific needed to
be exposed to the GL device.  We only need one realize function
per renderer backend.

Signed-off-by: Gurchetan Singh 
---
 hw/display/virtio-gpu-gl.c | 17 +++--
 hw/display/virtio-gpu-virgl.c  | 35 --
 include/hw/virtio/virtio-gpu.h |  7 ---
 3 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
index 2d140e8792..547e697333 100644
--- a/hw/display/virtio-gpu-gl.c
+++ b/hw/display/virtio-gpu-gl.c
@@ -21,6 +21,11 @@
 #include "hw/virtio/virtio-gpu-pixman.h"
 #include "hw/qdev-properties.h"
 
+static void virtio_gpu_gl_device_realize(DeviceState *qdev, Error **errp)
+{
+virtio_gpu_virgl_device_realize(qdev, errp);
+}
+
 static Property virtio_gpu_gl_properties[] = {
 DEFINE_PROP_BIT("stats", VirtIOGPU, parent_obj.conf.flags,
 VIRTIO_GPU_FLAG_STATS_ENABLED, false),
@@ -34,13 +39,13 @@ static void virtio_gpu_gl_class_init(ObjectClass *klass, 
void *data)
 VirtIOGPUBaseClass *vbc = VIRTIO_GPU_BASE_CLASS(klass);
 VirtIOGPUClass *vgc = VIRTIO_GPU_CLASS(klass);
 
-vbc->gl_flushed = virtio_gpu_virgl_flushed;
-vgc->handle_ctrl = virtio_gpu_virgl_handle_ctrl;
-vgc->process_cmd = virtio_gpu_virgl_process_cmd;
-vgc->update_cursor_data = virtio_gpu_virgl_update_cursor;
+vbc->gl_flushed = NULL;
+vgc->handle_ctrl = NULL;
+vgc->process_cmd = NULL;
+vgc->update_cursor_data = NULL;
 
-vdc->realize = virtio_gpu_virgl_device_realize;
-vdc->reset = virtio_gpu_virgl_reset;
+vdc->realize = virtio_gpu_gl_device_realize;
+vdc->reset = NULL;
 device_class_set_props(dc, virtio_gpu_gl_properties);
 }
 
diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 87eccb2b97..5be288562d 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -401,8 +401,9 @@ static void virgl_cmd_get_capset(VirtIOGPU *g,
 g_free(resp);
 }
 
-void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
-  struct virtio_gpu_ctrl_command *cmd)
+static void
+virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
+ struct virtio_gpu_ctrl_command *cmd)
 {
 VIRTIO_GPU_FILL_CMD(cmd->cmd_hdr);
 
@@ -637,7 +638,7 @@ static int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g)
 return capset2_max_ver ? 2 : 1;
 }
 
-void
+static void
 virtio_gpu_virgl_update_cursor(VirtIOGPU *g,
struct virtio_gpu_scanout *s,
uint32_t resource_id)
@@ -661,14 +662,14 @@ virtio_gpu_virgl_update_cursor(VirtIOGPU *g,
 free(data);
 }
 
-void virtio_gpu_virgl_flushed(VirtIOGPUBase *b)
+static void virtio_gpu_virgl_flushed(VirtIOGPUBase *b)
 {
 VirtIOGPU *g = VIRTIO_GPU(b);
 
 virtio_gpu_process_cmdq(g);
 }
 
-void virtio_gpu_virgl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+static void virtio_gpu_virgl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 {
 VirtIOGPU *g = VIRTIO_GPU(vdev);
 VirtIOGPUGL *gl = VIRTIO_GPU_GL(vdev);
@@ -700,7 +701,7 @@ void virtio_gpu_virgl_handle_ctrl(VirtIODevice *vdev, 
VirtQueue *vq)
 virtio_gpu_virgl_fence_poll(g);
 }
 
-void virtio_gpu_virgl_reset(VirtIODevice *vdev)
+static void virtio_gpu_virgl_reset(VirtIODevice *vdev)
 {
 VirtIOGPU *g = VIRTIO_GPU(vdev);
 VirtIOGPUGL *gl = VIRTIO_GPU_GL(vdev);
@@ -719,7 +720,21 @@ void virtio_gpu_virgl_reset(VirtIODevice *vdev)
 
 void virtio_gpu_virgl_device_realize(DeviceState *qdev, Error **errp)
 {
-VirtIOGPU *g = VIRTIO_GPU(qdev);
+VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
+VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
+
+VirtIOGPUBase *bdev = VIRTIO_GPU_BASE(qdev);
+VirtIOGPUBaseClass *vbc = VIRTIO_GPU_BASE_GET_CLASS(bdev);
+
+VirtIOGPU *gpudev = VIRTIO_GPU(qdev);
+VirtIOGPUClass *vgc = VIRTIO_GPU_GET_CLASS(gpudev);
+
+vbc->gl_flushed = virtio_gpu_virgl_flushed;
+vgc->handle_ctrl = virtio_gpu_virgl_handle_ctrl;
+vgc->process_cmd = virtio_gpu_virgl_process_cmd;
+vgc->update_cursor_data = virtio_gpu_virgl_update_cursor;
+
+vdc->reset = virtio_gpu_virgl_reset;
 
 #if HOST_BIG_ENDIAN
 error_setg(errp, "virgl is not supported on bigendian platforms");
@@ -737,9 +752,9 @@ void virtio_gpu_virgl_device_realize(DeviceState *qdev, 
Error **errp)
 return;
 }
 
-g->parent_obj.conf.flags |= (1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
-VIRTIO_GPU_BASE(g)->virtio_config.num_capsets =
-virtio_gpu_virgl_get_num_capsets(g);
+gpudev->parent_obj.conf.flags |= (1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
+VIRTIO_GPU_BASE(gpudev)->virtio_config.num_capsets =
+virtio_gpu_virgl_get_num_capsets(gpudev);
 
 virtio_gpu_device_realize(qdev, errp);
 }
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 7317b60004..421733d751 100644
---

[RFC PATCH 11/13] gfxstream + rutabaga: enable rutabaga

2023-04-20 Thread Gurchetan Singh

This change enables rutabaga to receive virtio-gpu-3d hypercalls
when it is active.

Signed-off-by: Gurchetan Singh 
---
 hw/display/virtio-gpu-base.c | 3 ++-
 hw/display/virtio-gpu-gl.c   | 9 -
 hw/display/virtio-gpu.c  | 5 +++--
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
index 6c5f1f327f..7913d9b82e 100644
--- a/hw/display/virtio-gpu-base.c
+++ b/hw/display/virtio-gpu-base.c
@@ -206,7 +206,8 @@ virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t 
features,
 {
 VirtIOGPUBase *g = VIRTIO_GPU_BASE(vdev);
 
-if (virtio_gpu_virgl_enabled(g->conf)) {
+if (virtio_gpu_virgl_enabled(g->conf) ||
+virtio_gpu_rutabaga_enabled(g->conf)) {
 features |= (1 << VIRTIO_GPU_F_VIRGL);
 }
 if (virtio_gpu_edid_enabled(g->conf)) {
diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
index 15270b0c8a..fc09900ed9 100644
--- a/hw/display/virtio-gpu-gl.c
+++ b/hw/display/virtio-gpu-gl.c
@@ -23,7 +23,14 @@
 
 static void virtio_gpu_gl_device_realize(DeviceState *qdev, Error **errp)
 {
-virtio_gpu_virgl_device_realize(qdev, errp);
+VirtIOGPUGL *virtio_gpu = VIRTIO_GPU_GL(qdev);
+virtio_gpu->rutabaga = NULL;
+virtio_gpu_rutabaga_device_realize(qdev, errp);
+
+/* Fallback to virgl if rutabaga fails to initialize */
+if (!virtio_gpu->rutabaga) {
+virtio_gpu_virgl_device_realize(qdev, errp);
+}
 }
 
 static Property virtio_gpu_gl_properties[] = {
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c
index 938eed9181..3e92f9db6b 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1314,8 +1314,9 @@ void virtio_gpu_device_realize(DeviceState *qdev, Error 
**errp)
 VirtIOGPU *g = VIRTIO_GPU(qdev);
 
 if (virtio_gpu_blob_enabled(g->parent_obj.conf)) {
-if (!virtio_gpu_have_udmabuf()) {
-error_setg(errp, "cannot enable blob resources without udmabuf");
+if (!virtio_gpu_have_udmabuf() &&
+!virtio_gpu_rutabaga_enabled(g->parent_obj.conf)) {
+error_setg(errp, "need udmabuf or rutabaga for blob resources");
 return;
 }
 
-- 
2.40.0.634.g4ca3ef3211-goog

[RFC PATCH 01/13] virtio: Add shared memory capability

2023-04-20 Thread Gurchetan Singh

From: "Dr. David Alan Gilbert" 

Define a new capability type 'VIRTIO_PCI_CAP_SHARED_MEMORY_CFG' to allow
defining shared memory regions with sizes and offsets of 2^32 and more.
Multiple instances of the capability are allowed and distinguished
by a device-specific 'id'.

Signed-off-by: Dr. David Alan Gilbert 
Signed-off-by: Antonio Caggiano 
---
 hw/virtio/virtio-pci.c | 18 ++
 include/hw/virtio/virtio-pci.h |  4 
 2 files changed, 22 insertions(+)

diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c
index 247325c193..08ba76fca2 100644
--- a/hw/virtio/virtio-pci.c
+++ b/hw/virtio/virtio-pci.c
@@ -1399,6 +1399,24 @@ static int virtio_pci_add_mem_cap(VirtIOPCIProxy *proxy,
 return offset;
 }
 
+int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy,
+   uint8_t bar, uint64_t offset, uint64_t length,
+   uint8_t id)
+{
+struct virtio_pci_cap64 cap = {
+.cap.cap_len = sizeof cap,
+.cap.cfg_type = VIRTIO_PCI_CAP_SHARED_MEMORY_CFG,
+};
+
+cap.cap.bar = bar;
+cap.cap.length = cpu_to_le32(length);
+cap.length_hi = cpu_to_le32(length >> 32);
+cap.cap.offset = cpu_to_le32(offset);
+cap.offset_hi = cpu_to_le32(offset >> 32);
+cap.cap.id = id;
+return virtio_pci_add_mem_cap(proxy, );
+}
+
 static uint64_t virtio_pci_common_read(void *opaque, hwaddr addr,
unsigned size)
 {
diff --git a/include/hw/virtio/virtio-pci.h b/include/hw/virtio/virtio-pci.h
index ab2051b64b..5a3f182f99 100644
--- a/include/hw/virtio/virtio-pci.h
+++ b/include/hw/virtio/virtio-pci.h
@@ -264,4 +264,8 @@ unsigned virtio_pci_optimal_num_queues(unsigned 
fixed_queues);
 void virtio_pci_set_guest_notifier_fd_handler(VirtIODevice *vdev, VirtQueue 
*vq,
   int n, bool assign,
   bool with_irqfd);
+
+int virtio_pci_add_shm_cap(VirtIOPCIProxy *proxy, uint8_t bar, uint64_t offset,
+   uint64_t length, uint8_t id);
+
 #endif
-- 
2.40.0.634.g4ca3ef3211-goog

[RFC PATCH 05/13] gfxstream + rutabaga prep: virtio_gpu_gl -> virtio_gpu_virgl

2023-04-20 Thread Gurchetan Singh

The virtio-gpu GL device has a heavy dependence on virgl.
Acknowledge this by naming functions accurately.

Signed-off-by: Gurchetan Singh 
---
 hw/display/virtio-gpu-gl.c | 27 ++-
 hw/display/virtio-gpu-virgl.c  |  2 +-
 include/hw/virtio/virtio-gpu.h |  2 +-
 3 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
index e06be60dfb..dc648aacb2 100644
--- a/hw/display/virtio-gpu-gl.c
+++ b/hw/display/virtio-gpu-gl.c
@@ -25,9 +25,10 @@
 
 #include 
 
-static void virtio_gpu_gl_update_cursor_data(VirtIOGPU *g,
- struct virtio_gpu_scanout *s,
- uint32_t resource_id)
+static void
+virtio_gpu_virgl_update_cursor(VirtIOGPU *g,
+   struct virtio_gpu_scanout *s,
+   uint32_t resource_id)
 {
 uint32_t width, height;
 uint32_t pixels, *data;
@@ -48,14 +49,14 @@ static void virtio_gpu_gl_update_cursor_data(VirtIOGPU *g,
 free(data);
 }
 
-static void virtio_gpu_gl_flushed(VirtIOGPUBase *b)
+static void virtio_gpu_virgl_flushed(VirtIOGPUBase *b)
 {
 VirtIOGPU *g = VIRTIO_GPU(b);
 
 virtio_gpu_process_cmdq(g);
 }
 
-static void virtio_gpu_gl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
+static void virtio_gpu_virgl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
 {
 VirtIOGPU *g = VIRTIO_GPU(vdev);
 VirtIOGPUGL *gl = VIRTIO_GPU_GL(vdev);
@@ -71,7 +72,7 @@ static void virtio_gpu_gl_handle_ctrl(VirtIODevice *vdev, 
VirtQueue *vq)
 }
 if (gl->renderer_reset) {
 gl->renderer_reset = false;
-virtio_gpu_virgl_reset(g);
+virtio_gpu_virglrenderer_reset(g);
 }
 
 cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
@@ -87,7 +88,7 @@ static void virtio_gpu_gl_handle_ctrl(VirtIODevice *vdev, 
VirtQueue *vq)
 virtio_gpu_virgl_fence_poll(g);
 }
 
-static void virtio_gpu_gl_reset(VirtIODevice *vdev)
+static void virtio_gpu_virgl_reset(VirtIODevice *vdev)
 {
 VirtIOGPU *g = VIRTIO_GPU(vdev);
 VirtIOGPUGL *gl = VIRTIO_GPU_GL(vdev);
@@ -104,7 +105,7 @@ static void virtio_gpu_gl_reset(VirtIODevice *vdev)
 }
 }
 
-static void virtio_gpu_gl_device_realize(DeviceState *qdev, Error **errp)
+static void virtio_gpu_virgl_device_realize(DeviceState *qdev, Error **errp)
 {
 VirtIOGPU *g = VIRTIO_GPU(qdev);
 
@@ -143,13 +144,13 @@ static void virtio_gpu_gl_class_init(ObjectClass *klass, 
void *data)
 VirtIOGPUBaseClass *vbc = VIRTIO_GPU_BASE_CLASS(klass);
 VirtIOGPUClass *vgc = VIRTIO_GPU_CLASS(klass);
 
-vbc->gl_flushed = virtio_gpu_gl_flushed;
-vgc->handle_ctrl = virtio_gpu_gl_handle_ctrl;
+vbc->gl_flushed = virtio_gpu_virgl_flushed;
+vgc->handle_ctrl = virtio_gpu_virgl_handle_ctrl;
 vgc->process_cmd = virtio_gpu_virgl_process_cmd;
-vgc->update_cursor_data = virtio_gpu_gl_update_cursor_data;
+vgc->update_cursor_data = virtio_gpu_virgl_update_cursor;
 
-vdc->realize = virtio_gpu_gl_device_realize;
-vdc->reset = virtio_gpu_gl_reset;
+vdc->realize = virtio_gpu_virgl_device_realize;
+vdc->reset = virtio_gpu_virgl_reset;
 device_class_set_props(dc, virtio_gpu_gl_properties);
 }
 
diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index 1c47603d40..f91d33ce13 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -599,7 +599,7 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g)
 }
 }
 
-void virtio_gpu_virgl_reset(VirtIOGPU *g)
+void virtio_gpu_virglrenderer_reset(VirtIOGPU *g)
 {
 virgl_renderer_reset();
 }
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index adee17968d..e256e44172 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -297,7 +297,7 @@ void virtio_gpu_virgl_process_cmd(VirtIOGPU *g,
   struct virtio_gpu_ctrl_command *cmd);
 void virtio_gpu_virgl_fence_poll(VirtIOGPU *g);
 void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g);
-void virtio_gpu_virgl_reset(VirtIOGPU *g);
+void virtio_gpu_virglrenderer_reset(VirtIOGPU *g);
 int virtio_gpu_virgl_init(VirtIOGPU *g);
 int virtio_gpu_virgl_get_num_capsets(VirtIOGPU *g);
 
-- 
2.40.0.634.g4ca3ef3211-goog

[RFC PATCH 04/13] virtio-gpu: CONTEXT_INIT feature

2023-04-20 Thread Gurchetan Singh

From: Antonio Caggiano 

The feature can be enabled when a backend wants it.

Signed-off-by: Antonio Caggiano 
Reviewed-by: Marc-André Lureau 
Signed-off-by: Gurchetan Singh 
---
 hw/display/virtio-gpu-base.c   | 3 +++
 include/hw/virtio/virtio-gpu.h | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c
index a29f191aa8..6c5f1f327f 100644
--- a/hw/display/virtio-gpu-base.c
+++ b/hw/display/virtio-gpu-base.c
@@ -215,6 +215,9 @@ virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t 
features,
 if (virtio_gpu_blob_enabled(g->conf)) {
 features |= (1 << VIRTIO_GPU_F_RESOURCE_BLOB);
 }
+if (virtio_gpu_context_init_enabled(g->conf)) {
+features |= (1 << VIRTIO_GPU_F_CONTEXT_INIT);
+}
 
 return features;
 }
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 326988e4d5..adee17968d 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -93,6 +93,7 @@ enum virtio_gpu_base_conf_flags {
 VIRTIO_GPU_FLAG_EDID_ENABLED,
 VIRTIO_GPU_FLAG_DMABUF_ENABLED,
 VIRTIO_GPU_FLAG_BLOB_ENABLED,
+VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED,
 };
 
 #define virtio_gpu_virgl_enabled(_cfg) \
@@ -107,6 +108,8 @@ enum virtio_gpu_base_conf_flags {
 (_cfg.flags & (1 << VIRTIO_GPU_FLAG_BLOB_ENABLED))
 #define virtio_gpu_hostmem_enabled(_cfg) \
 (_cfg.hostmem > 0)
+#define virtio_gpu_context_init_enabled(_cfg) \
+(_cfg.flags & (1 << VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED))
 
 struct virtio_gpu_base_conf {
 uint32_t max_outputs;
-- 
2.40.0.634.g4ca3ef3211-goog

[RFC PATCH 06/13] gfxstream + rutabaga prep: make GL device more library agnostic

2023-04-20 Thread Gurchetan Singh

Rather than create a virtio-gpu-gfxstream device and it's
associated variants (vga, pci), let's just extend the GL device.

We need to:
- Move all virgl functions to their own file
- Only all needed class callbacks in the generic GL device

Signed-off-by: Gurchetan Singh 
---
 hw/display/virtio-gpu-gl.c | 110 --
 hw/display/virtio-gpu-virgl.c  | 119 +++--
 include/hw/virtio/virtio-gpu.h |  11 +--
 3 files changed, 120 insertions(+), 120 deletions(-)

diff --git a/hw/display/virtio-gpu-gl.c b/hw/display/virtio-gpu-gl.c
index dc648aacb2..2d140e8792 100644
--- a/hw/display/virtio-gpu-gl.c
+++ b/hw/display/virtio-gpu-gl.c
@@ -15,122 +15,12 @@
 #include "qemu/iov.h"
 #include "qemu/module.h"
 #include "qemu/error-report.h"
-#include "qapi/error.h"
-#include "sysemu/sysemu.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-gpu.h"
 #include "hw/virtio/virtio-gpu-bswap.h"
 #include "hw/virtio/virtio-gpu-pixman.h"
 #include "hw/qdev-properties.h"
 
-#include 
-
-static void
-virtio_gpu_virgl_update_cursor(VirtIOGPU *g,
-   struct virtio_gpu_scanout *s,
-   uint32_t resource_id)
-{
-uint32_t width, height;
-uint32_t pixels, *data;
-
-data = virgl_renderer_get_cursor_data(resource_id, , );
-if (!data) {
-return;
-}
-
-if (width != s->current_cursor->width ||
-height != s->current_cursor->height) {
-free(data);
-return;
-}
-
-pixels = s->current_cursor->width * s->current_cursor->height;
-memcpy(s->current_cursor->data, data, pixels * sizeof(uint32_t));
-free(data);
-}
-
-static void virtio_gpu_virgl_flushed(VirtIOGPUBase *b)
-{
-VirtIOGPU *g = VIRTIO_GPU(b);
-
-virtio_gpu_process_cmdq(g);
-}
-
-static void virtio_gpu_virgl_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
-{
-VirtIOGPU *g = VIRTIO_GPU(vdev);
-VirtIOGPUGL *gl = VIRTIO_GPU_GL(vdev);
-struct virtio_gpu_ctrl_command *cmd;
-
-if (!virtio_queue_ready(vq)) {
-return;
-}
-
-if (!gl->renderer_inited) {
-virtio_gpu_virgl_init(g);
-gl->renderer_inited = true;
-}
-if (gl->renderer_reset) {
-gl->renderer_reset = false;
-virtio_gpu_virglrenderer_reset(g);
-}
-
-cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
-while (cmd) {
-cmd->vq = vq;
-cmd->error = 0;
-cmd->finished = false;
-QTAILQ_INSERT_TAIL(>cmdq, cmd, next);
-cmd = virtqueue_pop(vq, sizeof(struct virtio_gpu_ctrl_command));
-}
-
-virtio_gpu_process_cmdq(g);
-virtio_gpu_virgl_fence_poll(g);
-}
-
-static void virtio_gpu_virgl_reset(VirtIODevice *vdev)
-{
-VirtIOGPU *g = VIRTIO_GPU(vdev);
-VirtIOGPUGL *gl = VIRTIO_GPU_GL(vdev);
-
-virtio_gpu_reset(vdev);
-
-/*
- * GL functions must be called with the associated GL context in main
- * thread, and when the renderer is unblocked.
- */
-if (gl->renderer_inited && !gl->renderer_reset) {
-virtio_gpu_virgl_reset_scanout(g);
-gl->renderer_reset = true;
-}
-}
-
-static void virtio_gpu_virgl_device_realize(DeviceState *qdev, Error **errp)
-{
-VirtIOGPU *g = VIRTIO_GPU(qdev);
-
-#if HOST_BIG_ENDIAN
-error_setg(errp, "virgl is not supported on bigendian platforms");
-return;
-#endif
-
-if (!object_resolve_path_type("", TYPE_VIRTIO_GPU_GL, NULL)) {
-error_setg(errp, "at most one %s device is permitted", 
TYPE_VIRTIO_GPU_GL);
-return;
-}
-
-if (!display_opengl) {
-error_setg(errp, "opengl is not available");
-return;
-}
-
-g->parent_obj.conf.flags |= (1 << VIRTIO_GPU_FLAG_VIRGL_ENABLED);
-VIRTIO_GPU_BASE(g)->virtio_config.num_capsets =
-virtio_gpu_virgl_get_num_capsets(g);
-
-virtio_gpu_device_realize(qdev, errp);
-}
-
 static Property virtio_gpu_gl_properties[] = {
 DEFINE_PROP_BIT("stats", VirtIOGPU, parent_obj.conf.flags,
 VIRTIO_GPU_FLAG_STATS_ENABLED, false),
diff --git a/hw/display/virtio-gpu-virgl.c b/hw/display/virtio-gpu-virgl.c
index f91d33ce13..87eccb2b97 100644
--- a/hw/display/virtio-gpu-virgl.c
+++ b/hw/display/virtio-gpu-virgl.c
@@ -14,6 +14,8 @@
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
 #include "qemu/iov.h"
+#include "qapi/error.h"
+#include "sysemu/sysemu.h"
 #include "trace.h"
 #include "hw/virtio/virtio.h"
 #include "hw/virtio/virtio-gpu.h"
@@ -584,12 +586,12 @@ static void virtio_gpu_fence_poll(void *opaque)
 }
 }
 
-void virtio_gpu_virgl_fence_poll(VirtIOGPU *g)
+static void virtio_gpu_virgl_fence_poll(VirtIOGPU *g)
 {
 virtio_gpu_fence_poll(g);
 }
 
-void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g)
+static void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g)
 {
 int i;
 
@@ -599,12 +601,12 @@ void virtio_gpu_virgl_reset_scanout(VirtIOGPU *g)
 }
 }
 
-void virtio_gpu_virglrenderer_reset(VirtIOGPU *g)
+static

[RFC PATCH 12/13] HACK: use memory region API to inject memory to guest

2023-04-20 Thread Gurchetan Singh

I just copied the patches that have been floating around that do
this, but it doesn't seem to robustly work.  This current
implementation is probably good enough to run vkcube or simple
apps, but whenever a test starts to aggressively map/unmap memory,
things do explode on the QEMU side.

A simple way to reproduce is run:

./deqp-vk --deqp-case=deqp-vk --deqp-case=dEQP-VK.memory.mapping.suballocation.*

You should get stack traces that sometimes look like this:

0  __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737316304448) 
at ./nptl/pthread_kill.c:44
1  __pthread_kill_internal (signo=6, threadid=140737316304448) at 
./nptl/pthread_kill.c:78
2  __GI___pthread_kill (threadid=140737316304448, signo=signo@entry=6) at 
./nptl/pthread_kill.c:89
3  0x77042476 in __GI_raise (sig=sig@entry=6) at 
../sysdeps/posix/raise.c:26
4  0x770287f3 in __GI_abort () at ./stdlib/abort.c:79
5  0x770896f6 in __libc_message (action=action@entry=do_abort, 
fmt=fmt@entry=0x771dbb8c "%s\n") at ../sysdeps/posix/libc_fatal.c:155
6  0x770a0d7c in malloc_printerr (str=str@entry=0x771de7b0 "double 
free or corruption (out)") at ./malloc/malloc.c:5664
7  0x770a2ef0 in _int_free (av=0x77219c80 , 
p=0x57793e00, have_lock=) at ./malloc/malloc.c:4588
8  0x770a54d3 in __GI___libc_free (mem=) at 
./malloc/malloc.c:3391
9  0x55d65e7e in phys_section_destroy (mr=0x57793e10) at 
../softmmu/physmem.c:1003
10 0x55d65ed0 in phys_sections_free (map=0x56d4b410) at 
../softmmu/physmem.c:1011
11 0x55d69578 in address_space_dispatch_free (d=0x56d4b400) at 
../softmmu/physmem.c:2430
12 0x55d58412 in flatview_destroy (view=0x572bb090) at 
../softmmu/memory.c:292
13 0x5600fd23 in call_rcu_thread (opaque=0x0) at ../util/rcu.c:284
14 0x560026d4 in qemu_thread_start (args=0x569cafa0) at 
../util/qemu-thread-posix.c:541
15 0x77094b43 in start_thread (arg=) at 
./nptl/pthread_create.c:442
16 0x77126a00 in clone3 () at 
../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

or this:

0x55e1dc80 in object_unref (objptr=0x6d656d3c6b6e696c) at 
../qom/object.c:1198
1198g_assert(obj->ref > 0);
(gdb) bt
0  0x55e1dc80 in object_unref (objptr=0x6d656d3c6b6e696c) at 
../qom/object.c:1198
1  0x55d5cca5 in memory_region_unref (mr=0x572b9e20) at 
../softmmu/memory.c:1799
2  0x55d65e47 in phys_section_destroy (mr=0x572b9e20) at 
../softmmu/physmem.c:998
3  0x55d65ec7 in phys_sections_free (map=0x588365c0) at 
../softmmu/physmem.c:1011
4  0x55d6956f in address_space_dispatch_free (d=0x588365b0) at 
../softmmu/physmem.c:2430
5  0x55d58409 in flatview_destroy (view=0x58836570) at 
../softmmu/memory.c:292
6  0x5600fd1a in call_rcu_thread (opaque=0x0) at ../util/rcu.c:284
7  0x560026cb in qemu_thread_start (args=0x569cafa0) at 
../util/qemu-thread-posix.c:541
8  0x77094b43 in start_thread (arg=) at 
./nptl/pthread_create.c:442
9  0x77126a00 in clone3 () at 
../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

The reason seems to be memory regions are handled on a different
thread than the virtio-gpu thread, and that inevitably leads to
raciness.  The memory region docs[a] generally seems to dissuade this:

"In order to do this, as a general rule do not create or destroy
 memory regions dynamically during a device’s lifetime, and only
 call object_unparent() in the memory region owner’s instance_finalize
 callback. The dynamically allocated data structure that contains
 the memory region then should obviously be freed in the
 instance_finalize callback as well."

Though instance_finalize is called before device destruction, so
storing the memory until then is unlikely to be an option.  The
tests do pass when virtio-gpu doesn't free the memory, but
progressively the guest becomes slower and then OOMs.

Though the api does make an exception:

"There is an exception to the above rule: it is okay to call
object_unparent at any time for an alias or a container region. It is
therefore also okay to create or destroy alias and container regions
dynamically during a device’s lifetime."

I believe we are trying to create a container subregion, but that's
still failing?  Are we doing it right?  Any memory region experts
here can help out?  The other revelant patch in this series
is "virtio-gpu: hostmem".

[a] https://qemu.readthedocs.io/en/latest/devel/memory.html

Signed-off-by: Gurchetan Singh 
---
 hw/display/virtio-gpu-rutabaga.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/hw/display/virtio-gpu-rutabaga.c b/hw/display/virtio-gpu-rutabaga.c
index 5fd1154198..196267aac2 100644
--- a/hw/display/virtio-gpu-rutabaga.c
+++ b/hw/display/virtio-gpu-rutabaga.c
@@ -159,6 +159,12 @@ static int32_t rutabaga_handle_unmap(VirtIOGPU *g,
 GET_VIRTIO_GPU_GL(g);
 GET_RUTABAGA(virtio_gpu);
 
+

[RFC PATCH 10/13] gfxstream + rutabaga: add initial support for gfxstream

2023-04-20 Thread Gurchetan Singh

This adds initial support for gfxstream and cross-domain.  Both
features rely on virtio-gpu blob resources and context types, which
are also implemented in this patch.

gfxstream has a long and illustrious history in Android graphics
paravirtualization.  It has been powering graphics in the Android
Studio Emulator for more than a decade, which is the main developer
platform.

Originally conceived by Jesse Hall, it was first known as "EmuGL" [a].
The key design characteristic was a 1:1 threading model and
auto-generation, which fit nicely with the OpenGLES spec.  It also
allowed easy layering with ANGLE on the host, which provides the GLES
implementations on Windows or MacOS enviroments.

gfxstream has traditionally been maintained by a single engineer, and
between 2015 to 2021, the iron throne passed to Frank Yang.  Just to
name a few accomplishments in a reign filled with many of them: newer
versions of GLES, address space graphics, snapshot support and CTS
compliant Vulkan [b].

One major drawback was the use of out-of-tree goldfish drivers.
Android engineers didn't know much about DRM/KMS and especially TTM so
a simple guest to host pipe was conceived.

Luckily, virtio-gpu 3D started to emerge in 2016 due to the work of
the Mesa/virglrenderer communities.  In 2018, the initial virtio-gpu
port of gfxstream was done by Cuttlefish enthusiast Alistair Delva.
It was a symbol compatible replacement of virglrenderer [c] and named
"AVDVirglrenderer".  This implementation forms the basis of the
current gfxstream host implementation still in use today.

cross-domain support follows a similar arc.  Originally conceived by
Wayland aficionado David Reveman and crosvm enjoyer Zach Reizner in
2018, it initially relied on the downstream "virtio-wl" device.

In 2020 and 2021, virtio-gpu was extended to include blob resources
and multiple timelines by yours truly, features gfxstream/cross-domain
both require to function correctly.

Right now, we stand at the precipice of a truly fantastic possibility:
the Android Emulator powered by upstream QEMU and upstream Linux
kernel.  gfxstream will then be packaged properfully, and app
developers can even fix gfxstream bugs on their own if they encounter
them.

It's been quite the ride, my friends.  Where will gfxstream head next,
nobody really knows.  I wouldn't be surprised if it's around for
another decade, maintained by a new generation of Android graphics
enthusiasts.  One thing is for sure, though -- it'll be filled with
friendship and magic!

Technical details:
  - Very simple initial display integration: just used Pixman
  - Largely, 1:1 mapping of virtio-gpu hypercalls to rutabaga function
calls

[a] https://android-review.googlesource.com/c/platform/development/+/34470
[b] 
https://android-review.googlesource.com/q/topic:%22vulkan-hostconnection-start%22
[c] 
https://android-review.googlesource.com/c/device/generic/goldfish-opengl/+/761927

Signed-off-by: Gurchetan Singh 
---
 hw/display/virtio-gpu-rutabaga.c | 995 +++
 1 file changed, 995 insertions(+)
 create mode 100644 hw/display/virtio-gpu-rutabaga.c

diff --git a/hw/display/virtio-gpu-rutabaga.c b/hw/display/virtio-gpu-rutabaga.c
new file mode 100644
index 00..5fd1154198
--- /dev/null
+++ b/hw/display/virtio-gpu-rutabaga.c
@@ -0,0 +1,995 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/iov.h"
+#include "trace.h"
+#include "hw/virtio/virtio.h"
+#include "hw/virtio/virtio-gpu.h"
+#include "hw/virtio/virtio-gpu-pixman.h"
+#include "hw/virtio/virtio-iommu.h"
+
+#include 
+
+static int virtio_gpu_rutabaga_init(VirtIOGPU *g);
+
+#define GET_VIRTIO_GPU_GL(x)  \
+VirtIOGPUGL *virtio_gpu = VIRTIO_GPU_GL(x);   \
+
+#define GET_RUTABAGA(x)   \
+struct rutabaga *rutabaga = (struct rutabaga *)(x->rutabaga); \
+
+#define CHECK(condition, cmd) \
+do {  \
+if (!condition) { \
+qemu_log_mask(LOG_GUEST_ERROR, "CHECK_RESULT failed in %s() %s:"  \
+  "%d\n", __func__, __FILE__, __LINE__);  \
+cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC;  \
+return;   \
+   }  \
+} while (0)
+
+#define CHECK_RESULT(result, cmd) CHECK(result == 0, cmd)
+
+static void
+virtio_gpu_rutabaga_update_cursor(VirtIOGPU *g, struct virtio_gpu_scanout *s,
+  uint32_t resource_id)
+{
+struct virtio_gpu_simple_resource *res;
+struct rutabaga_transfer transfer = { 0 };
+struct

[RFC PATCH 00/13] gfxstream + rutabaga_gfx: a surprising delight or startling epiphany?

2023-04-20 Thread Gurchetan Singh

From: Gurchetan Singh 

Rationale:

- gfxstream [a] is good for the Android Emulator/upstream QEMU
  alignment
- Wayland passhthrough [b] via the cross-domain context type is good
  for Linux on Linux display virtualization
- rutabaga_gfx [c] sits on top of gfxstream, cross-domain and even
  virglrenderer
- This series ports rutabaga_gfx to QEMU

Feedback requested:

- How is everyone feeling about gfxstream/rutabaga_gfx, especially UI
  maintainers?  I've been assuming it is a definite win, so if there's
  a divergence of opinion on that, we should resolve that quickly.

- Need help from memory region API experts on "HACK: use memory region
  API to inject memory to guest"

- Need help from QEMU multi-threaded experts on "HACK: schedule fence
  return on main AIO context"

--
| Longer |
--

Dear all,

The people have demanded it, and we have listened.  Just the other
day, some kids came up to me on the street -- hat in hand, teardrops
in their eyes -- and asked "please sir, can you perchance port
gfxstream and rutabaga_gfx to upstream QEMU?".  I honestly can't take
it anymore.

In a way, I can understand the fanaticism of the gfxstreamists -- the
benefits of gfxstream + rutabaga_gfx in upstream QEMU are massive for
all involved:

(i) Android Emulator aligned with QEMU

The biggest use case is no doubt the Android Emulator.  Although used
by millions of developers around the world [d][e], the Android Emulator
itself uses currently uses a forked QEMU 2.12.  The initial fork
happened in the early days of Android (circa 2006 [f]) and while the
situation has improved, a QEMU update inside the Android Emulator only
happens once every 3-5 years. Indeed, most Android Emulator developers
aren't even subscribed to qemu-devel@ given this situation.  Their
task is often to get the next foldable config working or fix that UI
bug, but long term technical debt is something that is rarely
prioritized.

This one of those years when QEMU will be upreved, though.  Soon, the
emulator will be based on QEMU7.2 and new controls will be instituted
to make QEMU modifications harder.  Things that can be upstreamed
will be upstreamed.

One of the biggest downstream pieces of the Android Emulator is the
gfxstream graphics stack, and it has some nontrivial features that
aren't easy to implement elsewhere [g].

The lore of gfxstream is detailed in patch 10, but suffice to say
getting gfxstream mainlined would help move the Android Emulator out
of it's downstream mud hut into the light, love and compassion of
upstream.

(ii) Wayland passthrough

For the Linux guest on Linux host use case, we've elected to port
rutabaga_gfx into QEMU rather than gfxstream.  rutabaga_gfx sits on
top of gfxstream, virglrenderer, and the cross-domain context type.
With the cross-domain context type, one can avoid a guest compositor
pass to display VM windows like host normal windows.  It's now
possible to run the examples found in the crosvm book [h] with this
patchset.  There are a few problems [i], but fixing them is O(days).

This use case is less strong than the Android Emulator one, since
anyone who would play a game in a Linux guest via QEMU would be able
to run it natively.  But it could be good for developers who need to
test code in a virtual machine.

--
| Issues |
--

The two biggest unsolved issues are the last two "HACK:" patches.
Feedback from QEMU memory management and threading experts would be
greatly appreciated.

--
| UI integration |
--

This patchset intentionally uses the simplest KMS display integration
possible: framebuffer copies to Pixman.  The reason is Linux guests
are expected to use Wayland Passthrough, and the Android Emulator UI
integration is very complex.  gfxstream doesn't have a "context 0"
like virglrenderer that can force synchronization between QEMU's and
gfxstream's GL code.

Initially, we just want to run the Android Emulator in headless mode,
and we have a few subsequent followup ideas in mind for UI integration
(all of with the goal to be minimally invasive for QEMU).  Note: even
with Android in headless mode, QEMU upstream will be used in production
and not just be a developer toy.

--
| Packaging / Versioning |
--

We have to build QEMU from sources due to compliance reasons, so we
haven't created Debian packages for either gfxstream or rutabaga_gfx
yet.  QEMU is upstream of Debian/Portage anyways.  Let us know the
standard on packaging and we should be able to follow it.

Versioning would be keyed on initial merge into QEMU.

--
| Testing|
--

A document on how to test the patchset is availble on QEMU Gitlab [j].

[a] https://android.googlesource.com/device/generic/vulkan-cereal/
[b] https://www.youtube.com/watch?v=OZJiHMtIQ2M
[c]

[RFC PATCH 09/13] gfxstream + rutabaga: add required meson changes

2023-04-20 Thread Gurchetan Singh

- Add meson detection of rutabaga_gfx
- Compile stubs when rutabaga_gfx_ffi is not installed
- Compile stubs when virglrenderer is not installed

Signed-off-by: Gurchetan Singh 
---
 hw/display/meson.build | 40 --
 hw/display/virtio-gpu-rutabaga-stubs.c |  8 ++
 hw/display/virtio-gpu-virgl-stubs.c|  8 ++
 include/hw/virtio/virtio-gpu.h |  5 +++-
 meson.build|  8 ++
 meson_options.txt  |  2 ++
 scripts/meson-buildoptions.sh  |  3 ++
 7 files changed, 64 insertions(+), 10 deletions(-)
 create mode 100644 hw/display/virtio-gpu-rutabaga-stubs.c
 create mode 100644 hw/display/virtio-gpu-virgl-stubs.c

diff --git a/hw/display/meson.build b/hw/display/meson.build
index 4191694380..48785cfcb6 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -63,6 +63,8 @@ softmmu_ss.add(when: 'CONFIG_ARTIST', if_true: 
files('artist.c'))
 
 softmmu_ss.add(when: [pixman, 'CONFIG_ATI_VGA'], if_true: files('ati.c', 
'ati_2d.c', 'ati_dbg.c'))
 
+virgl_found = virgl.found() and opengl.found()
+rutabaga_found = rutabaga.found()
 
 if config_all_devices.has_key('CONFIG_VIRTIO_GPU')
   virtio_gpu_ss = ss.source_set()
@@ -73,12 +75,27 @@ if config_all_devices.has_key('CONFIG_VIRTIO_GPU')
   virtio_gpu_ss.add(when: 'CONFIG_VHOST_USER_GPU', if_true: 
files('vhost-user-gpu.c'))
   hw_display_modules += {'virtio-gpu': virtio_gpu_ss}
 
-  if virgl.found() and opengl.found()
-virtio_gpu_gl_ss = ss.source_set()
-virtio_gpu_gl_ss.add(when: ['CONFIG_VIRTIO_GPU', virgl, opengl],
- if_true: [files('virtio-gpu-gl.c', 
'virtio-gpu-virgl.c'), pixman, virgl])
-hw_display_modules += {'virtio-gpu-gl': virtio_gpu_gl_ss}
+  virtio_gpu_gl_ss = ss.source_set()
+  if virgl_found or rutabaga_found
+virtio_gpu_gl_ss.add(when: ['CONFIG_VIRTIO_GPU'],
+ if_true: [files('virtio-gpu-gl.c'), pixman])
   endif
+
+  if virgl_found
+virtio_gpu_gl_ss.add(when: ['CONFIG_VIRTIO_GPU'],
+ if_true: [files('virtio-gpu-virgl.c'), virgl])
+  else
+virtio_gpu_gl_ss.add([files('virtio-gpu-virgl-stubs.c')])
+  endif
+
+  if rutabaga_found
+virtio_gpu_gl_ss.add(when: ['CONFIG_VIRTIO_GPU'],
+ if_true: [files('virtio-gpu-rutabaga.c'), rutabaga])
+  else
+virtio_gpu_gl_ss.add([files('virtio-gpu-rutabaga-stubs.c')])
+  endif
+
+  hw_display_modules += {'virtio-gpu-gl': virtio_gpu_gl_ss}
 endif
 
 if config_all_devices.has_key('CONFIG_VIRTIO_PCI')
@@ -89,9 +106,10 @@ if config_all_devices.has_key('CONFIG_VIRTIO_PCI')
 if_true: files('vhost-user-gpu-pci.c'))
   hw_display_modules += {'virtio-gpu-pci': virtio_gpu_pci_ss}
 
-  if virgl.found() and opengl.found()
+
+  if virgl_found or rutabaga_found
 virtio_gpu_pci_gl_ss = ss.source_set()
-virtio_gpu_pci_gl_ss.add(when: ['CONFIG_VIRTIO_GPU', 'CONFIG_VIRTIO_PCI', 
virgl, opengl],
+virtio_gpu_pci_gl_ss.add(when: ['CONFIG_VIRTIO_GPU', 'CONFIG_VIRTIO_PCI'],
  if_true: [files('virtio-gpu-pci-gl.c'), pixman])
 hw_display_modules += {'virtio-gpu-pci-gl': virtio_gpu_pci_gl_ss}
   endif
@@ -108,8 +126,12 @@ if config_all_devices.has_key('CONFIG_VIRTIO_VGA')
   hw_display_modules += {'virtio-vga': virtio_vga_ss}
 
   virtio_vga_gl_ss = ss.source_set()
-  virtio_vga_gl_ss.add(when: ['CONFIG_VIRTIO_VGA', virgl, opengl],
-   if_true: [files('virtio-vga-gl.c'), pixman])
+
+  if virgl_found or rutabaga_found
+virtio_vga_gl_ss.add(when: ['CONFIG_VIRTIO_VGA'],
+ if_true: [files('virtio-vga-gl.c'), pixman])
+  endif
+
   virtio_vga_gl_ss.add(when: 'CONFIG_ACPI', if_true: files('acpi-vga.c'),
 if_false: files('acpi-vga-stub.c'))
   hw_display_modules += {'virtio-vga-gl': virtio_vga_gl_ss}
diff --git a/hw/display/virtio-gpu-rutabaga-stubs.c 
b/hw/display/virtio-gpu-rutabaga-stubs.c
new file mode 100644
index 00..26c38d3892
--- /dev/null
+++ b/hw/display/virtio-gpu-rutabaga-stubs.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "qemu/osdep.h"
+#include "hw/virtio/virtio-gpu.h"
+
+void virtio_gpu_rutabaga_device_realize(DeviceState *qdev, Error **errp)
+{
+/* nothing (stub) */
+}
diff --git a/hw/display/virtio-gpu-virgl-stubs.c 
b/hw/display/virtio-gpu-virgl-stubs.c
new file mode 100644
index 00..b29e35f990
--- /dev/null
+++ b/hw/display/virtio-gpu-virgl-stubs.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "qemu/osdep.h"
+#include "hw/virtio/virtio-gpu.h"
+
+void virtio_gpu_virgl_device_realize(DeviceState *qdev, Error **errp)
+{
+/* nothing (stub) */
+}
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index a35ade3608..034c71e8f5 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -300,7 +300,10 @@ int

[RFC PATCH 13/13] HACK: schedule fence return on main AIO context

2023-04-20 Thread Gurchetan Singh

gfxstream and both cross-domain (and even newer versions
virglrenderer: see VIRGL_RENDERER_ASYNC_FENCE_CB) like to signal
fence completion on threads ("callback threads") that are
different from the thread that processes the command queue
("main thread").

This is generally possible with locking, and this what we do
in crosvm and other virtio-gpu1.1 implementations.  However, on
QEMU a deadlock is observed if virtio_gpu_ctrl_response_nodata(..)
[used in the fence callback] is used from a thread that is not the
main thread.

The reason is the main thread takes the big QEMU lock (bql) somewhere
when processing the command queue, and virtio_gpu_ctrl_response_nodata(..)
needs that lock.  If you add in the lock needed to protect >fenceq
from concurrent access by the main thread and the callback threads,
you end can end up with deadlocks.

It's possible to workaround this by scheduling the return of the fence
descriptors via aio_bh_schedule_oneshot_full(..), but that somewhat
negates the rationale for the asynchronous callbacks.

I also played around with aio_context_acquire()/aio_context_release(),
doesn't seem to help.

Is signaling the virtio_queue outside of the main thread possible?  If
so, how?

Signed-off-by: Gurchetan Singh 
---
 hw/display/virtio-gpu-rutabaga.c | 29 ++---
 include/hw/virtio/virtio-gpu.h   |  1 +
 2 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/hw/display/virtio-gpu-rutabaga.c b/hw/display/virtio-gpu-rutabaga.c
index 196267aac2..5c296aeef1 100644
--- a/hw/display/virtio-gpu-rutabaga.c
+++ b/hw/display/virtio-gpu-rutabaga.c
@@ -31,6 +31,11 @@ static int virtio_gpu_rutabaga_init(VirtIOGPU *g);
 
 #define CHECK_RESULT(result, cmd) CHECK(result == 0, cmd)
 
+struct rutabaga_aio_data {
+struct VirtIOGPUGL *virtio_gpu;
+struct rutabaga_fence fence;
+};
+
 static void
 virtio_gpu_rutabaga_update_cursor(VirtIOGPU *g, struct virtio_gpu_scanout *s,
   uint32_t resource_id)
@@ -823,10 +828,11 @@ virtio_gpu_rutabaga_process_cmd(VirtIOGPU *g,
 }
 
 static void
-virtio_gpu_rutabaga_fence_cb(uint64_t user_data,
- struct rutabaga_fence fence_data)
+virtio_gpu_rutabaga_aio_cb(void *opaque)
 {
-VirtIOGPU *g = (VirtIOGPU *)(void*)(uintptr_t)user_data;
+struct rutabaga_aio_data *data =  (struct rutabaga_aio_data *)opaque;
+VirtIOGPU *g = (VirtIOGPU *)data->virtio_gpu;
+struct rutabaga_fence fence_data = data->fence;
 struct virtio_gpu_ctrl_command *cmd, *tmp;
 
 bool signaled_ctx_specific = fence_data.flags & 
RUTABAGA_FLAG_INFO_RING_IDX;
@@ -856,6 +862,22 @@ virtio_gpu_rutabaga_fence_cb(uint64_t user_data,
 QTAILQ_REMOVE(>fenceq, cmd, next);
 g_free(cmd);
 }
+
+g_free(data);
+}
+
+static void
+virtio_gpu_rutabaga_fence_cb(uint64_t user_data,
+ struct rutabaga_fence fence_data) {
+struct rutabaga_aio_data *data;
+VirtIOGPU *g = (VirtIOGPU *)(void*)(uintptr_t)user_data;
+GET_VIRTIO_GPU_GL(g);
+
+data = g_new0(struct rutabaga_aio_data, 1);
+data->virtio_gpu = virtio_gpu;
+data->fence = fence_data;
+aio_bh_schedule_oneshot_full(virtio_gpu->ctx, virtio_gpu_rutabaga_aio_cb,
+ (void *)data, "aio");
 }
 
 static int virtio_gpu_rutabaga_init(VirtIOGPU *g)
@@ -912,6 +934,7 @@ static int virtio_gpu_rutabaga_init(VirtIOGPU *g)
 free(channels.channels);
 }
 
+virtio_gpu->ctx = qemu_get_aio_context();
 return result;
 }
 
diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h
index 034c71e8f5..b33ad0c68f 100644
--- a/include/hw/virtio/virtio-gpu.h
+++ b/include/hw/virtio/virtio-gpu.h
@@ -225,6 +225,7 @@ struct VirtIOGPUGL {
 char *wayland_socket_path;
 uint32_t num_capsets;
 void *rutabaga;
+AioContext *ctx;
 };
 
 struct VhostUserGPU {
-- 
2.40.0.634.g4ca3ef3211-goog

I've made a qemu appimage

2023-04-20 Thread Lucas Machado Zainote

hi there!? I have made a qemu appimage that works in the most linux
distros, if you have interest you can use mine as an example to ship your
software for the most linux distros.
all in a single file working perfectly.
in my github you may find commands to use this appimage.

https://github.com/lucasmz1

And of course I want to ask your permission to redistribute this
application in github page.

Thanks for the great application you've developed.

Thank you!

Re: [PATCH] target/riscv: add Ventana's Veyron V1 CPU

2023-04-20 Thread Alistair Francis

On Tue, Apr 18, 2023 at 10:37 PM Daniel Henrique Barboza
 wrote:
>
> From: Rahul Pathak 
>
> Add a virtual CPU for Ventana's first CPU named veyron-v1. It runs
> exclusively for the rv64 target. It's tested with the 'virt' board.
>
> CPU specs and general information can be found here:
>
> https://www.nextplatform.com/2023/02/02/the-first-risc-v-shot-across-the-datacenter-bow/
>
> Signed-off-by: Rahul Pathak 
> Signed-off-by: Mayuresh Chitale 
> Signed-off-by: Daniel Henrique Barboza 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
>  target/riscv/cpu-qom.h  |  1 +
>  target/riscv/cpu.c  | 38 +
>  target/riscv/cpu_vendorid.h |  4 
>  3 files changed, 43 insertions(+)
>
> diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
> index b29090ad86..04af50983e 100644
> --- a/target/riscv/cpu-qom.h
> +++ b/target/riscv/cpu-qom.h
> @@ -41,6 +41,7 @@
>  #define TYPE_RISCV_CPU_SIFIVE_U34   RISCV_CPU_TYPE_NAME("sifive-u34")
>  #define TYPE_RISCV_CPU_SIFIVE_U54   RISCV_CPU_TYPE_NAME("sifive-u54")
>  #define TYPE_RISCV_CPU_THEAD_C906   RISCV_CPU_TYPE_NAME("thead-c906")
> +#define TYPE_RISCV_CPU_VEYRON_V1RISCV_CPU_TYPE_NAME("veyron-v1")
>  #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host")
>
>  #if defined(TARGET_RISCV32)
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index befa64528f..db0875fb43 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -422,6 +422,43 @@ static void rv64_thead_c906_cpu_init(Object *obj)
>  #endif
>  }
>
> +static void rv64_veyron_v1_cpu_init(Object *obj)
> +{
> +CPURISCVState *env = _CPU(obj)->env;
> +RISCVCPU *cpu = RISCV_CPU(obj);
> +
> +set_misa(env, MXL_RV64, RVG | RVC | RVS | RVU | RVH);
> +env->priv_ver = PRIV_VERSION_1_12_0;
> +
> +/* Enable ISA extensions */
> +cpu->cfg.mmu = true;
> +cpu->cfg.ext_icbom = true;
> +cpu->cfg.cbom_blocksize = 64;
> +cpu->cfg.cboz_blocksize = 64;
> +cpu->cfg.ext_icboz = true;
> +cpu->cfg.ext_smaia = true;
> +cpu->cfg.ext_ssaia = true;
> +cpu->cfg.ext_sscofpmf = true;
> +cpu->cfg.ext_sstc = true;
> +cpu->cfg.ext_svinval = true;
> +cpu->cfg.ext_svnapot = true;
> +cpu->cfg.ext_svpbmt = true;
> +cpu->cfg.ext_smstateen = true;
> +cpu->cfg.ext_zba = true;
> +cpu->cfg.ext_zbb = true;
> +cpu->cfg.ext_zbc = true;
> +cpu->cfg.ext_zbs = true;
> +cpu->cfg.ext_XVentanaCondOps = true;
> +
> +cpu->cfg.mvendorid = VEYRON_V1_MVENDORID;
> +cpu->cfg.marchid = VEYRON_V1_MARCHID;
> +cpu->cfg.mimpid = VEYRON_V1_MIMPID;
> +
> +#ifndef CONFIG_USER_ONLY
> +set_satp_mode_max_supported(cpu, VM_1_10_SV48);
> +#endif
> +}
> +
>  static void rv128_base_cpu_init(Object *obj)
>  {
>  if (qemu_tcg_mttcg_enabled()) {
> @@ -1827,6 +1864,7 @@ static const TypeInfo riscv_cpu_type_infos[] = {
>  DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_U54,   rv64_sifive_u_cpu_init),
>  DEFINE_CPU(TYPE_RISCV_CPU_SHAKTI_C, rv64_sifive_u_cpu_init),
>  DEFINE_CPU(TYPE_RISCV_CPU_THEAD_C906,   rv64_thead_c906_cpu_init),
> +DEFINE_CPU(TYPE_RISCV_CPU_VEYRON_V1,rv64_veyron_v1_cpu_init),
>  DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE128,  rv128_base_cpu_init),
>  #endif
>  };
> diff --git a/target/riscv/cpu_vendorid.h b/target/riscv/cpu_vendorid.h
> index a5aa249bc9..96b6b9c2cb 100644
> --- a/target/riscv/cpu_vendorid.h
> +++ b/target/riscv/cpu_vendorid.h
> @@ -3,4 +3,8 @@
>
>  #define THEAD_VENDOR_ID 0x5b7
>
> +#define VEYRON_V1_MARCHID   0x8001
> +#define VEYRON_V1_MIMPID0x111
> +#define VEYRON_V1_MVENDORID 0x61f
> +
>  #endif /*  TARGET_RISCV_CPU_VENDORID_H */
> --
> 2.40.0
>
>

Re: [PATCH] target/riscv: add Ventana's Veyron V1 CPU

2023-04-20 Thread Alistair Francis

On Tue, Apr 18, 2023 at 10:37 PM Daniel Henrique Barboza
 wrote:
>
> From: Rahul Pathak 
>
> Add a virtual CPU for Ventana's first CPU named veyron-v1. It runs
> exclusively for the rv64 target. It's tested with the 'virt' board.
>
> CPU specs and general information can be found here:
>
> https://www.nextplatform.com/2023/02/02/the-first-risc-v-shot-across-the-datacenter-bow/
>
> Signed-off-by: Rahul Pathak 
> Signed-off-by: Mayuresh Chitale 
> Signed-off-by: Daniel Henrique Barboza 

Acked-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu-qom.h  |  1 +
>  target/riscv/cpu.c  | 38 +
>  target/riscv/cpu_vendorid.h |  4 
>  3 files changed, 43 insertions(+)
>
> diff --git a/target/riscv/cpu-qom.h b/target/riscv/cpu-qom.h
> index b29090ad86..04af50983e 100644
> --- a/target/riscv/cpu-qom.h
> +++ b/target/riscv/cpu-qom.h
> @@ -41,6 +41,7 @@
>  #define TYPE_RISCV_CPU_SIFIVE_U34   RISCV_CPU_TYPE_NAME("sifive-u34")
>  #define TYPE_RISCV_CPU_SIFIVE_U54   RISCV_CPU_TYPE_NAME("sifive-u54")
>  #define TYPE_RISCV_CPU_THEAD_C906   RISCV_CPU_TYPE_NAME("thead-c906")
> +#define TYPE_RISCV_CPU_VEYRON_V1RISCV_CPU_TYPE_NAME("veyron-v1")
>  #define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host")
>
>  #if defined(TARGET_RISCV32)
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index befa64528f..db0875fb43 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -422,6 +422,43 @@ static void rv64_thead_c906_cpu_init(Object *obj)
>  #endif
>  }
>
> +static void rv64_veyron_v1_cpu_init(Object *obj)
> +{
> +CPURISCVState *env = _CPU(obj)->env;
> +RISCVCPU *cpu = RISCV_CPU(obj);
> +
> +set_misa(env, MXL_RV64, RVG | RVC | RVS | RVU | RVH);
> +env->priv_ver = PRIV_VERSION_1_12_0;
> +
> +/* Enable ISA extensions */
> +cpu->cfg.mmu = true;
> +cpu->cfg.ext_icbom = true;
> +cpu->cfg.cbom_blocksize = 64;
> +cpu->cfg.cboz_blocksize = 64;
> +cpu->cfg.ext_icboz = true;
> +cpu->cfg.ext_smaia = true;
> +cpu->cfg.ext_ssaia = true;
> +cpu->cfg.ext_sscofpmf = true;
> +cpu->cfg.ext_sstc = true;
> +cpu->cfg.ext_svinval = true;
> +cpu->cfg.ext_svnapot = true;
> +cpu->cfg.ext_svpbmt = true;
> +cpu->cfg.ext_smstateen = true;
> +cpu->cfg.ext_zba = true;
> +cpu->cfg.ext_zbb = true;
> +cpu->cfg.ext_zbc = true;
> +cpu->cfg.ext_zbs = true;
> +cpu->cfg.ext_XVentanaCondOps = true;
> +
> +cpu->cfg.mvendorid = VEYRON_V1_MVENDORID;
> +cpu->cfg.marchid = VEYRON_V1_MARCHID;
> +cpu->cfg.mimpid = VEYRON_V1_MIMPID;
> +
> +#ifndef CONFIG_USER_ONLY
> +set_satp_mode_max_supported(cpu, VM_1_10_SV48);
> +#endif
> +}
> +
>  static void rv128_base_cpu_init(Object *obj)
>  {
>  if (qemu_tcg_mttcg_enabled()) {
> @@ -1827,6 +1864,7 @@ static const TypeInfo riscv_cpu_type_infos[] = {
>  DEFINE_CPU(TYPE_RISCV_CPU_SIFIVE_U54,   rv64_sifive_u_cpu_init),
>  DEFINE_CPU(TYPE_RISCV_CPU_SHAKTI_C, rv64_sifive_u_cpu_init),
>  DEFINE_CPU(TYPE_RISCV_CPU_THEAD_C906,   rv64_thead_c906_cpu_init),
> +DEFINE_CPU(TYPE_RISCV_CPU_VEYRON_V1,rv64_veyron_v1_cpu_init),
>  DEFINE_DYNAMIC_CPU(TYPE_RISCV_CPU_BASE128,  rv128_base_cpu_init),
>  #endif
>  };
> diff --git a/target/riscv/cpu_vendorid.h b/target/riscv/cpu_vendorid.h
> index a5aa249bc9..96b6b9c2cb 100644
> --- a/target/riscv/cpu_vendorid.h
> +++ b/target/riscv/cpu_vendorid.h
> @@ -3,4 +3,8 @@
>
>  #define THEAD_VENDOR_ID 0x5b7
>
> +#define VEYRON_V1_MARCHID   0x8001
> +#define VEYRON_V1_MIMPID0x111
> +#define VEYRON_V1_MVENDORID 0x61f
> +
>  #endif /*  TARGET_RISCV_CPU_VENDORID_H */
> --
> 2.40.0
>
>

Re: [PATCH RESEND v7 12/12] target/riscv: forbid write_misa() for static CPUs

2023-04-20 Thread Alistair Francis

On Thu, Apr 20, 2023 at 7:22 PM Daniel Henrique Barboza
 wrote:
>
> Static CPUs don't want their extensions changed by user interaction. We
> can prevent it during init by not exposing user facing properties, but
> write_misa() is also capable of disabling/enabling extension during
> runtime.
>
> We have a way of telling whether a CPU is static or not by checking for
> TYPE_RISCV_DYNAMIC_CPU. Use it to make write_misa() a no-op for these
> CPUs.
>
> Signed-off-by: Daniel Henrique Barboza 
> ---
>  target/riscv/cpu.c | 5 +
>  target/riscv/cpu.h | 2 ++
>  target/riscv/csr.c | 5 +
>  3 files changed, 12 insertions(+)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 4fa720a39d..3cbcf6d320 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1452,6 +1452,11 @@ static void riscv_cpu_init(Object *obj)
>  #endif /* CONFIG_USER_ONLY */
>  }
>
> +bool riscv_cpu_is_static(RISCVCPU *cpu)
> +{
> +return object_dynamic_cast(OBJECT(cpu), TYPE_RISCV_DYNAMIC_CPU) == NULL;
> +}
> +
>  typedef struct RISCVCPUMisaExtConfig {
>  const char *name;
>  const char *description;
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 1f39edc687..1913ab9d8d 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -587,6 +587,8 @@ G_NORETURN void riscv_raise_exception(CPURISCVState *env,
>  target_ulong riscv_cpu_get_fflags(CPURISCVState *env);
>  void riscv_cpu_set_fflags(CPURISCVState *env, target_ulong);
>
> +bool riscv_cpu_is_static(RISCVCPU *cpu);
> +
>  #include "exec/cpu-all.h"
>
>  FIELD(TB_FLAGS, MEM_IDX, 0, 3)
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index d449da2657..929c5477dd 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -1391,6 +1391,11 @@ static RISCVException write_misa(CPURISCVState *env, 
> int csrno,
>  uint32_t orig_misa_ext = env->misa_ext;
>  Error *local_err = NULL;
>
> +if (riscv_cpu_is_static(cpu)) {
> +/* never write MISA for static CPUs */
> +return RISCV_EXCP_NONE;
> +}

Do we actually need this? We already check misa_w which would be
disabled. What's the harm in allowing someone to manually enable
misa_w and then change the MISA?

Also, it's possible that static CPUs actually support changing the
MISA value at run time.

Alistair

> +
>  if (!riscv_cpu_cfg(env)->misa_w) {
>  /* drop write to misa */
>  return RISCV_EXCP_NONE;
> --
> 2.40.0
>
>

Re: [PATCH RESEND v7 11/12] target/riscv: rework write_misa()

2023-04-20 Thread Alistair Francis

On Thu, Apr 20, 2023 at 7:22 PM Daniel Henrique Barboza
 wrote:
>
> write_misa() must use as much common logic as possible. We want to open
> code just the bits that are exclusive to the CSR write operation and TCG
> internals.
>
> Our validation is done with riscv_cpu_validate_set_extensions(), but we
> need a small tweak first. When enabling RVG we're doing:
>
> env->misa_ext |= RVI | RVM | RVA | RVF | RVD;
> env->misa_ext_mask = env->misa_ext;
>
> This works fine for realize() time but this can potentially overwrite
> env->misa_ext_mask if we reutilize the function for write_misa().
> Instead of doing misa_ext_mask = misa_ext, sum up the RVG extensions in
> misa_ext_mask as well. This won't change realize() time behavior
> (misa_ext_mask is still == misa_ext)  and will ensure that write_misa()
> won't change misa_ext_mask by accident.
>
> After that, rewrite write_misa() to work as follows:
>
> - mask the write using misa_ext_mask to avoid enabling unsupported
>   extensions;
>
> - suppress RVC if the next insn isn't aligned;
>
> - disable RVG if any of RVG dependencies are being disabled by the user;
>
> - assign env->misa_ext and run riscv_cpu_validate_set_extensions(). On
>   error, rollback to the previous values of misa_ext and misa_ext_mask;
>
> - on success, check if there's a chance that misa_ext_mask was
>   overwritten during the process and restore it;

Is this right? If the guest does a combined valid/invalid modification
shouldn't the valid modifications stick?

Alistair

>
> - handle RVF and MSTATUS_FS and continue as usual.
>
> Let's keep write_misa() as experimental for now until this logic gains
> enough mileage.
>
> Signed-off-by: Daniel Henrique Barboza 
> Reviewed-by: Weiwei Li 
> ---
>  target/riscv/cpu.c |  4 ++--
>  target/riscv/cpu.h |  1 +
>  target/riscv/csr.c | 47 --
>  3 files changed, 23 insertions(+), 29 deletions(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index 7d407321aa..4fa720a39d 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -944,7 +944,7 @@ static void riscv_cpu_validate_misa_mxl(RISCVCPU *cpu, 
> Error **errp)
>   * Check consistency between chosen extensions while setting
>   * cpu->cfg accordingly.
>   */
> -static void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
> +void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp)
>  {
>  CPURISCVState *env = >env;
>  Error *local_err = NULL;
> @@ -960,7 +960,7 @@ static void riscv_cpu_validate_set_extensions(RISCVCPU 
> *cpu, Error **errp)
>  cpu->cfg.ext_ifencei = true;
>
>  env->misa_ext |= RVI | RVM | RVA | RVF | RVD;
> -env->misa_ext_mask = env->misa_ext;
> +env->misa_ext_mask |= RVI | RVM | RVA | RVF | RVD;
>  }
>
>  if (riscv_has_ext(env, RVI) && riscv_has_ext(env, RVE)) {
> diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
> index 15423585d0..1f39edc687 100644
> --- a/target/riscv/cpu.h
> +++ b/target/riscv/cpu.h
> @@ -548,6 +548,7 @@ bool riscv_cpu_tlb_fill(CPUState *cs, vaddr address, int 
> size,
>  bool probe, uintptr_t retaddr);
>  char *riscv_isa_string(RISCVCPU *cpu);
>  void riscv_cpu_list(void);
> +void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp);
>
>  #define cpu_list riscv_cpu_list
>  #define cpu_mmu_index riscv_cpu_mmu_index
> diff --git a/target/riscv/csr.c b/target/riscv/csr.c
> index 865ee9efda..d449da2657 100644
> --- a/target/riscv/csr.c
> +++ b/target/riscv/csr.c
> @@ -1387,39 +1387,18 @@ static RISCVException read_misa(CPURISCVState *env, 
> int csrno,
>  static RISCVException write_misa(CPURISCVState *env, int csrno,
>   target_ulong val)
>  {
> +RISCVCPU *cpu = env_archcpu(env);
> +uint32_t orig_misa_ext = env->misa_ext;
> +Error *local_err = NULL;
> +
>  if (!riscv_cpu_cfg(env)->misa_w) {
>  /* drop write to misa */
>  return RISCV_EXCP_NONE;
>  }
>
> -/* 'I' or 'E' must be present */
> -if (!(val & (RVI | RVE))) {
> -/* It is not, drop write to misa */
> -return RISCV_EXCP_NONE;
> -}
> -
> -/* 'E' excludes all other extensions */
> -if (val & RVE) {
> -/*
> - * when we support 'E' we can do "val = RVE;" however
> - * for now we just drop writes if 'E' is present.
> - */
> -return RISCV_EXCP_NONE;
> -}
> -
> -/*
> - * misa.MXL writes are not supported by QEMU.
> - * Drop writes to those bits.
> - */
> -
>  /* Mask extensions that are not supported by this hart */
>  val &= env->misa_ext_mask;
>
> -/* 'D' depends on 'F', so clear 'D' if 'F' is not present */
> -if ((val & RVD) && !(val & RVF)) {
> -val &= ~RVD;
> -}
> -
>  /*
>   * Suppress 'C' if next instruction is not aligned
>   * TODO: this should check next_pc
> @@ -1428,18 +1407,32 @@ static RISCVException

Re: [PATCH RESEND v7 05/12] target/riscv: Mask the implicitly enabled extensions in isa_string based on priv version

2023-04-20 Thread Alistair Francis

On Thu, Apr 20, 2023 at 7:23 PM Daniel Henrique Barboza
 wrote:
>
> From: Weiwei Li 
>
> Using implicitly enabled extensions such as Zca/Zcf/Zcd instead of their
> super extensions can simplify the extension related check. However, they
> may have higher priv version than their super extensions. So we should mask
> them in the isa_string based on priv version to make them invisible to user
> if the specified priv version is lower than their minimal priv version.
>
> Signed-off-by: Weiwei Li 
> Signed-off-by: Junqiang Wang 
> Reviewed-by: Daniel Henrique Barboza 

Acked-by: Alistair Francis 

Alistair

> ---
>  target/riscv/cpu.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
> index dd35cf378f..9bb0e6b180 100644
> --- a/target/riscv/cpu.c
> +++ b/target/riscv/cpu.c
> @@ -1721,7 +1721,8 @@ static void riscv_isa_string_ext(RISCVCPU *cpu, char 
> **isa_str,
>  int i;
>
>  for (i = 0; i < ARRAY_SIZE(isa_edata_arr); i++) {
> -if (isa_ext_is_enabled(cpu, _edata_arr[i])) {
> +if (cpu->env.priv_ver >= isa_edata_arr[i].min_version &&
> +isa_ext_is_enabled(cpu, _edata_arr[i])) {
>  new = g_strconcat(old, "_", isa_edata_arr[i].name, NULL);
>  g_free(old);
>  old = new;
> --
> 2.40.0
>
>

Re: [PATCH v3] riscv: Make sure an exception is raised if a pte is malformed

2023-04-20 Thread Alistair Francis

On Fri, Apr 21, 2023 at 1:07 AM Alexandre Ghiti  wrote:
>
> As per the specification, in 64-bit, if any of the pte reserved bits
> 60-54 is set an exception should be triggered (see 4.4.1, "Addressing and
> Memory Protection"). In addition, we must check the napot/pbmt bits are
> not set if those extensions are not active.
>
> Reported-by: Andrea Parri 
> Signed-off-by: Alexandre Ghiti 
> Reviewed-by: Alistair Francis 

Thanks!

Applied to riscv-to-apply.next

Alistair

> ---
> Changes in v3:
> - Rebase on top of https://github.com/alistair23/qemu/tree/riscv-to-apply.next
>
> Changes in v2:
> - Handle napot and pbmt exception
>
>  target/riscv/cpu_bits.h   |  1 +
>  target/riscv/cpu_helper.c | 15 +++
>  2 files changed, 12 insertions(+), 4 deletions(-)
>
> diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h
> index fb63b8e125..59f0ffd9e1 100644
> --- a/target/riscv/cpu_bits.h
> +++ b/target/riscv/cpu_bits.h
> @@ -644,6 +644,7 @@ typedef enum {
>  #define PTE_SOFT0x300 /* Reserved for Software */
>  #define PTE_PBMT0x6000ULL /* Page-based memory types 
> */
>  #define PTE_N   0x8000ULL /* NAPOT translation */
> +#define PTE_RESERVED0x1FC0ULL /* Reserved bits */
>  #define PTE_ATTR(PTE_N | PTE_PBMT) /* All attributes bits */
>
>  /* Page table PPN shift amount */
> diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
> index b68dcfe7b6..57d04385f1 100644
> --- a/target/riscv/cpu_helper.c
> +++ b/target/riscv/cpu_helper.c
> @@ -927,13 +927,20 @@ restart:
>
>  if (riscv_cpu_sxl(env) == MXL_RV32) {
>  ppn = pte >> PTE_PPN_SHIFT;
> -} else if (pbmte || riscv_cpu_cfg(env)->ext_svnapot) {
> -ppn = (pte & (target_ulong)PTE_PPN_MASK) >> PTE_PPN_SHIFT;
>  } else {
> -ppn = pte >> PTE_PPN_SHIFT;
> -if ((pte & ~(target_ulong)PTE_PPN_MASK) >> PTE_PPN_SHIFT) {
> +if (pte & PTE_RESERVED) {
> +return TRANSLATE_FAIL;
> +}
> +
> +if (!pbmte && (pte & PTE_PBMT)) {
>  return TRANSLATE_FAIL;
>  }
> +
> +if (!riscv_cpu_cfg(env)->ext_svnapot && (pte & PTE_N)) {
> +return TRANSLATE_FAIL;
> +}
> +
> +ppn = (pte & (target_ulong)PTE_PPN_MASK) >> PTE_PPN_SHIFT;
>  }
>
>  if (!(pte & PTE_V)) {
> --
> 2.37.2
>
>

[PATCH] hvf: Enable 1G page support

2023-04-20 Thread Alexander Graf

Hvf on x86 only supported 2MiB large pages, but never bothered to strip
out the 1GiB page size capability from -cpu host. With QEMU 8.0.0 this
became a problem because OVMF started to use 1GiB pages by default.

Let's just unconditionally add 1GiB page walk support to the walker.

With this fix applied, I can successfully run OVMF again.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1603
Signed-off-by: Alexander Graf 
Reported-by: Akihiro Suda 
Reported-by: Philippe Mathieu-Daudé 

---

On my test VM, Linux dies later on with issues in interrupt delivery. But
those are unrelated to this patch; I confirmed that I get the same behavior
with 1GiB page support disabled.
---
 target/i386/hvf/x86_mmu.c | 30 --
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/target/i386/hvf/x86_mmu.c b/target/i386/hvf/x86_mmu.c
index 96d117567e..1d860651c6 100644
--- a/target/i386/hvf/x86_mmu.c
+++ b/target/i386/hvf/x86_mmu.c
@@ -38,6 +38,7 @@
 #define LEGACY_PTE_PAGE_MASK(0xllu << 12)
 #define PAE_PTE_PAGE_MASK   ((-1llu << 12) & ((1llu << 52) - 1))
 #define PAE_PTE_LARGE_PAGE_MASK ((-1llu << (21)) & ((1llu << 52) - 1))
+#define PAE_PTE_SUPER_PAGE_MASK ((-1llu << (30)) & ((1llu << 52) - 1))
 
 struct gpt_translation {
 target_ulong  gva;
@@ -96,7 +97,7 @@ static bool get_pt_entry(struct CPUState *cpu, struct 
gpt_translation *pt,
 
 /* test page table entry */
 static bool test_pt_entry(struct CPUState *cpu, struct gpt_translation *pt,
-  int level, bool *is_large, bool pae)
+  int level, int *largeness, bool pae)
 {
 uint64_t pte = pt->pte[level];
 
@@ -118,9 +119,9 @@ static bool test_pt_entry(struct CPUState *cpu, struct 
gpt_translation *pt,
 goto exit;
 }
 
-if (1 == level && pte_large_page(pte)) {
+if (level && pte_large_page(pte)) {
 pt->err_code |= MMU_PAGE_PT;
-*is_large = true;
+*largeness = level;
 }
 if (!level) {
 pt->err_code |= MMU_PAGE_PT;
@@ -152,9 +153,18 @@ static inline uint64_t pse_pte_to_page(uint64_t pte)
 return ((pte & 0x1fe000) << 19) | (pte & 0xffc0);
 }
 
-static inline uint64_t large_page_gpa(struct gpt_translation *pt, bool pae)
+static inline uint64_t large_page_gpa(struct gpt_translation *pt, bool pae,
+  int largeness)
 {
-VM_PANIC_ON(!pte_large_page(pt->pte[1]))
+VM_PANIC_ON(!pte_large_page(pt->pte[largeness]))
+
+/* 1Gib large page  */
+if (pae && largeness == 2) {
+return (pt->pte[2] & PAE_PTE_SUPER_PAGE_MASK) | (pt->gva & 0x3fff);
+}
+
+VM_PANIC_ON(largeness != 1)
+
 /* 2Mb large page  */
 if (pae) {
 return (pt->pte[1] & PAE_PTE_LARGE_PAGE_MASK) | (pt->gva & 0x1f);
@@ -170,7 +180,7 @@ static bool walk_gpt(struct CPUState *cpu, target_ulong 
addr, int err_code,
  struct gpt_translation *pt, bool pae)
 {
 int top_level, level;
-bool is_large = false;
+int largeness = 0;
 target_ulong cr3 = rvmcs(cpu->hvf->fd, VMCS_GUEST_CR3);
 uint64_t page_mask = pae ? PAE_PTE_PAGE_MASK : LEGACY_PTE_PAGE_MASK;
 
@@ -186,19 +196,19 @@ static bool walk_gpt(struct CPUState *cpu, target_ulong 
addr, int err_code,
 for (level = top_level; level > 0; level--) {
 get_pt_entry(cpu, pt, level, pae);
 
-if (!test_pt_entry(cpu, pt, level - 1, _large, pae)) {
+if (!test_pt_entry(cpu, pt, level - 1, , pae)) {
 return false;
 }
 
-if (is_large) {
+if (largeness) {
 break;
 }
 }
 
-if (!is_large) {
+if (!largeness) {
 pt->gpa = (pt->pte[0] & page_mask) | (pt->gva & 0xfff);
 } else {
-pt->gpa = large_page_gpa(pt, pae);
+pt->gpa = large_page_gpa(pt, pae, largeness);
 }
 
 return true;
-- 
2.39.2 (Apple Git-143)

[PATCH 1/8] accel: Replace `target_ulong` with `vaddr` in TB/TLB

2023-04-20 Thread Anton Johansson via

Changes pc and cs_base in TranslationBlock from target_ulong to vaddr.
Auxilliary structs and tb_*()/tlb_*() functions that depend on this
change are also updated to take a vaddr for guest virtual addresses.

Signed-off-by: Anton Johansson 
---
 accel/stubs/tcg-stub.c   |   2 +-
 accel/tcg/cpu-exec.c |  49 +-
 accel/tcg/cputlb.c   | 179 +--
 accel/tcg/internal.h |   6 +-
 accel/tcg/tb-hash.h  |  12 +--
 accel/tcg/tb-jmp-cache.h |   2 +-
 accel/tcg/tb-maint.c |   2 +-
 accel/tcg/translate-all.c|  15 +--
 include/exec/cpu-defs.h  |   4 +-
 include/exec/cpu_ldst.h  |   6 +-
 include/exec/exec-all.h  |  82 
 include/qemu/plugin-memory.h |   2 +-
 12 files changed, 181 insertions(+), 180 deletions(-)

diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
index 813695b402..0998e601ad 100644
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -18,7 +18,7 @@ void tb_flush(CPUState *cpu)
 {
 }
 
-void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
+void tlb_set_dirty(CPUState *cpu, vaddr vaddr)
 {
 }
 
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 8370c92c05..426e3fbea7 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -169,8 +169,8 @@ uint32_t curr_cflags(CPUState *cpu)
 }
 
 struct tb_desc {
-target_ulong pc;
-target_ulong cs_base;
+vaddr pc;
+vaddr cs_base;
 CPUArchState *env;
 tb_page_addr_t page_addr0;
 uint32_t flags;
@@ -195,7 +195,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
 return true;
 } else {
 tb_page_addr_t phys_page1;
-target_ulong virt_page1;
+vaddr virt_page1;
 
 /*
  * We know that the first page matched, and an otherwise valid TB
@@ -216,8 +216,8 @@ static bool tb_lookup_cmp(const void *p, const void *d)
 return false;
 }
 
-static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
-  target_ulong cs_base, uint32_t flags,
+static TranslationBlock *tb_htable_lookup(CPUState *cpu, vaddr pc,
+  vaddr cs_base, uint32_t flags,
   uint32_t cflags)
 {
 tb_page_addr_t phys_pc;
@@ -241,8 +241,8 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu, 
target_ulong pc,
 }
 
 /* Might cause an exception, so have a longjmp destination ready */
-static inline TranslationBlock *tb_lookup(CPUState *cpu, target_ulong pc,
-  target_ulong cs_base,
+static inline TranslationBlock *tb_lookup(CPUState *cpu, vaddr pc,
+  vaddr cs_base,
   uint32_t flags, uint32_t cflags)
 {
 TranslationBlock *tb;
@@ -297,13 +297,13 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu, 
target_ulong pc,
 return tb;
 }
 
-static void log_cpu_exec(target_ulong pc, CPUState *cpu,
+static void log_cpu_exec(vaddr pc, CPUState *cpu,
  const TranslationBlock *tb)
 {
 if (qemu_log_in_addr_range(pc)) {
 qemu_log_mask(CPU_LOG_EXEC,
-  "Trace %d: %p [" TARGET_FMT_lx
-  "/" TARGET_FMT_lx "/%08x/%08x] %s\n",
+  "Trace %d: %p [%" VADDR_PRIx
+  "/%" VADDR_PRIx "/%08x/%08x] %s\n",
   cpu->cpu_index, tb->tc.ptr, tb->cs_base, pc,
   tb->flags, tb->cflags, lookup_symbol(pc));
 
@@ -327,7 +327,7 @@ static void log_cpu_exec(target_ulong pc, CPUState *cpu,
 }
 }
 
-static bool check_for_breakpoints_slow(CPUState *cpu, target_ulong pc,
+static bool check_for_breakpoints_slow(CPUState *cpu, vaddr pc,
uint32_t *cflags)
 {
 CPUBreakpoint *bp;
@@ -393,7 +393,7 @@ static bool check_for_breakpoints_slow(CPUState *cpu, 
target_ulong pc,
 return false;
 }
 
-static inline bool check_for_breakpoints(CPUState *cpu, target_ulong pc,
+static inline bool check_for_breakpoints(CPUState *cpu, vaddr pc,
  uint32_t *cflags)
 {
 return unlikely(!QTAILQ_EMPTY(>breakpoints)) &&
@@ -412,10 +412,11 @@ const void *HELPER(lookup_tb_ptr)(CPUArchState *env)
 {
 CPUState *cpu = env_cpu(env);
 TranslationBlock *tb;
-target_ulong cs_base, pc;
+vaddr cs_base = 0, pc = 0;
 uint32_t flags, cflags;
 
-cpu_get_tb_cpu_state(env, , _base, );
+cpu_get_tb_cpu_state(env, (target_ulong *) ,
+ (target_ulong *) _base, );
 
 cflags = curr_cflags(cpu);
 if (check_for_breakpoints(cpu, pc, )) {
@@ -488,10 +489,10 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int 
*tb_exit)
 cc->set_pc(cpu, last_tb->pc);
 }
 if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
-target_ulong pc = log_pc(cpu, last_tb);

[PATCH 8/8] tcg: Replace target_ulong with vaddr in tcg_gen_code()

2023-04-20 Thread Anton Johansson via

Signed-off-by: Anton Johansson 
---
 include/tcg/tcg.h | 2 +-
 tcg/tcg.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 5cfaa53938..b8dbc66610 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -852,7 +852,7 @@ void tcg_register_thread(void);
 void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
 
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start);
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, vaddr pc_start);
 
 void tb_target_set_jmp_target(const TranslationBlock *, int,
   uintptr_t, uintptr_t);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index bb52bc060b..3823c3156a 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -4922,7 +4922,7 @@ int64_t tcg_cpu_exec_time(void)
 #endif
 
 
-int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
+int tcg_gen_code(TCGContext *s, TranslationBlock *tb, vaddr pc_start)
 {
 #ifdef CONFIG_PROFILER
 TCGProfile *prof = >prof;
-- 
2.39.1

[PATCH 4/8] accel/tcg: Replace target_ulong with vaddr in helper_unaligned_*()

2023-04-20 Thread Anton Johansson via

Updates helper_unaligned_[ld|st] to take the store/load address as a
vaddr instead of a target_ulong.

Signed-off-by: Anton Johansson 
---
 accel/tcg/user-exec.c  | 4 ++--
 include/tcg/tcg-ldst.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 7ec49933fb..d0673de771 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -904,12 +904,12 @@ static void validate_memop(MemOpIdx oi, MemOp expected)
 #endif
 }
 
-void helper_unaligned_ld(CPUArchState *env, target_ulong addr)
+void helper_unaligned_ld(CPUArchState *env, vaddr addr)
 {
 cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_LOAD, GETPC());
 }
 
-void helper_unaligned_st(CPUArchState *env, target_ulong addr)
+void helper_unaligned_st(CPUArchState *env, vaddr addr)
 {
 cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
 }
diff --git a/include/tcg/tcg-ldst.h b/include/tcg/tcg-ldst.h
index 2ba22bd5fe..cad7782e2e 100644
--- a/include/tcg/tcg-ldst.h
+++ b/include/tcg/tcg-ldst.h
@@ -72,8 +72,8 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, 
uint64_t val,
 
 #else
 
-G_NORETURN void helper_unaligned_ld(CPUArchState *env, target_ulong addr);
-G_NORETURN void helper_unaligned_st(CPUArchState *env, target_ulong addr);
+G_NORETURN void helper_unaligned_ld(CPUArchState *env, vaddr addr);
+G_NORETURN void helper_unaligned_st(CPUArchState *env, vaddr addr);
 
 #endif /* CONFIG_SOFTMMU */
 #endif /* TCG_LDST_H */
-- 
2.39.1

[PATCH 0/8] Start replacing target_ulong with vaddr

2023-04-20 Thread Anton Johansson via

This is a first patchset in removing target_ulong from non-target/
directories.  As use of target_ulong is spread accross the codebase we
are attempting to target as few maintainers as possible with each
patchset in order to ease reviewing.

The following instances of target_ulong remain in accel/ and tcg/
- atomic helpers (atomic_common.c.inc), cpu_atomic_*()
  (atomic_template.h,) and cpu_[st|ld]*()
  (cputlb.c/ldst_common.c.inc) are only used in target/ and can
  be pulled out into a separate target-specific file;

- Calls to cpu_get_tb_cpu_state() cast pc and cs_base to
  target_ulong in order to avoid having to touch the target/
  directory in this patchset;

- walk_memory_regions() is used in user-exec.c and
  linux-user/elfload.c;

- helper_ret_*_mmu() is used in tcg/*/tcg-target.c.inc and
  tcg/tci.c;

- Functions in translate-all.c dealing with TCGContext.gen_insn_data
  will be switched off target_ulong once gen_insn_data and
  TARGET_INSN_START_WORDS have been dealt with;

- kvm_find_sw_breakpoint() in kvm-all.c used in target/;

- The last address when looking up page flags for an interval
  [start,last] in pageflags_find()/pageflags_next() is still
  target_long.  I'm not sure why this is required.  This only
  seems relevant on 32-bit guests since the addresses passed
  to the interval tree are uint64_t's, are we relying on a
  sign extension here for some reason?

- CPUTLBEntry and functions that read from it will be left for a
  later date;

Smaller patchsets implementing above changes will follow.

Finally, the grand goal is to allow for heterogeneous QEMU binaries
consisting of multiple frontends.

RFC: https://lists.nongnu.org/archive/html/qemu-devel/2022-12/msg04518.html

Anton Johansson (8):
  accel: Replace `target_ulong` with `vaddr` in TB/TLB
  accel: Replace target_ulong with vaddr in probe_*()
  accel/tcg: Replace target_ulong with vaddr in *_mmu_lookup()
  accel/tcg: Replace target_ulong with vaddr in helper_unaligned_*()
  accel/tcg: Replace target_ulong with vaddr in translator_*()
  accel/tcg: Replace target_ulong with vaddr in page_*()
  cpu: Replace target_ulong with vaddr in tb_invalidate_phys_addr()
  tcg: Replace target_ulong with vaddr in tcg_gen_code()

 accel/stubs/tcg-stub.c   |   6 +-
 accel/tcg/cpu-exec.c |  49 -
 accel/tcg/cputlb.c   | 195 +--
 accel/tcg/internal.h |   6 +-
 accel/tcg/tb-hash.h  |  12 +--
 accel/tcg/tb-jmp-cache.h |   2 +-
 accel/tcg/tb-maint.c |   2 +-
 accel/tcg/translate-all.c|  15 +--
 accel/tcg/translator.c   |  10 +-
 accel/tcg/user-exec.c|  59 ++-
 cpu.c|   2 +-
 include/exec/cpu-all.h   |  10 +-
 include/exec/cpu-defs.h  |   4 +-
 include/exec/cpu_ldst.h  |   6 +-
 include/exec/exec-all.h  |  98 +-
 include/exec/translate-all.h |   2 +-
 include/exec/translator.h|   6 +-
 include/qemu/plugin-memory.h |   2 +-
 include/tcg/tcg-ldst.h   |   4 +-
 include/tcg/tcg.h|   2 +-
 tcg/tcg.c|   2 +-
 21 files changed, 247 insertions(+), 247 deletions(-)

--
2.39.1

[PATCH 3/8] accel/tcg: Replace target_ulong with vaddr in *_mmu_lookup()

2023-04-20 Thread Anton Johansson via

Update atomic_mmu_lookup() and cpu_mmu_lookup() to take the guest
virtual address as a vaddr instead of a target_ulong.

Signed-off-by: Anton Johansson 
---
 accel/tcg/cputlb.c| 4 ++--
 accel/tcg/user-exec.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index f35da2fc3b..bf4858c116 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1753,7 +1753,7 @@ bool tlb_plugin_lookup(CPUState *cpu, vaddr addr, int 
mmu_idx,
  *
  * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
  */
-static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
+static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr,
MemOpIdx oi, int size, int prot,
uintptr_t retaddr)
 {
@@ -1762,7 +1762,7 @@ static void *atomic_mmu_lookup(CPUArchState *env, 
target_ulong addr,
 int a_bits = get_alignment_bits(mop);
 uintptr_t index;
 CPUTLBEntry *tlbe;
-target_ulong tlb_addr;
+vaddr tlb_addr;
 void *hostaddr;
 CPUTLBEntryFull *full;
 
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 9d49fe95e4..7ec49933fb 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -914,7 +914,7 @@ void helper_unaligned_st(CPUArchState *env, target_ulong 
addr)
 cpu_loop_exit_sigbus(env_cpu(env), addr, MMU_DATA_STORE, GETPC());
 }
 
-static void *cpu_mmu_lookup(CPUArchState *env, target_ulong addr,
+static void *cpu_mmu_lookup(CPUArchState *env, vaddr addr,
 MemOpIdx oi, uintptr_t ra, MMUAccessType type)
 {
 MemOp mop = get_memop(oi);
@@ -1226,7 +1226,7 @@ uint64_t cpu_ldq_code(CPUArchState *env, abi_ptr ptr)
  *
  * @prot may be PAGE_READ, PAGE_WRITE, or PAGE_READ|PAGE_WRITE.
  */
-static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
+static void *atomic_mmu_lookup(CPUArchState *env, vaddr addr,
MemOpIdx oi, int size, int prot,
uintptr_t retaddr)
 {
-- 
2.39.1

[PATCH 2/8] accel: Replace target_ulong with vaddr in probe_*()

2023-04-20 Thread Anton Johansson via

Functions for probing memory accesses (and functions that call these)
are updated to take a vaddr for guest virtual addresses over
target_ulong.

Signed-off-by: Anton Johansson 
---
 accel/stubs/tcg-stub.c  |  4 ++--
 accel/tcg/cputlb.c  | 12 ++--
 accel/tcg/user-exec.c   |  8 
 include/exec/exec-all.h | 14 +++---
 4 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/accel/stubs/tcg-stub.c b/accel/stubs/tcg-stub.c
index 0998e601ad..a9e7a2d5b4 100644
--- a/accel/stubs/tcg-stub.c
+++ b/accel/stubs/tcg-stub.c
@@ -26,14 +26,14 @@ void tcg_flush_jmp_cache(CPUState *cpu)
 {
 }
 
-int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
+int probe_access_flags(CPUArchState *env, vaddr addr, int size,
MMUAccessType access_type, int mmu_idx,
bool nonfault, void **phost, uintptr_t retaddr)
 {
  g_assert_not_reached();
 }
 
-void *probe_access(CPUArchState *env, target_ulong addr, int size,
+void *probe_access(CPUArchState *env, vaddr addr, int size,
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
 {
  /* Handled by hardware accelerator. */
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 7400b860aa..f35da2fc3b 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1516,7 +1516,7 @@ static void notdirty_write(CPUState *cpu, vaddr 
mem_vaddr, unsigned size,
 }
 }
 
-static int probe_access_internal(CPUArchState *env, target_ulong addr,
+static int probe_access_internal(CPUArchState *env, vaddr addr,
  int fault_size, MMUAccessType access_type,
  int mmu_idx, bool nonfault,
  void **phost, CPUTLBEntryFull **pfull,
@@ -1524,7 +1524,7 @@ static int probe_access_internal(CPUArchState *env, 
target_ulong addr,
 {
 uintptr_t index = tlb_index(env, mmu_idx, addr);
 CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
-target_ulong tlb_addr, page_addr;
+vaddr tlb_addr, page_addr;
 size_t elt_ofs;
 int flags;
 
@@ -1585,7 +1585,7 @@ static int probe_access_internal(CPUArchState *env, 
target_ulong addr,
 return flags;
 }
 
-int probe_access_full(CPUArchState *env, target_ulong addr, int size,
+int probe_access_full(CPUArchState *env, vaddr addr, int size,
   MMUAccessType access_type, int mmu_idx,
   bool nonfault, void **phost, CPUTLBEntryFull **pfull,
   uintptr_t retaddr)
@@ -1602,7 +1602,7 @@ int probe_access_full(CPUArchState *env, target_ulong 
addr, int size,
 return flags;
 }
 
-int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
+int probe_access_flags(CPUArchState *env, vaddr addr, int size,
MMUAccessType access_type, int mmu_idx,
bool nonfault, void **phost, uintptr_t retaddr)
 {
@@ -1623,7 +1623,7 @@ int probe_access_flags(CPUArchState *env, target_ulong 
addr, int size,
 return flags;
 }
 
-void *probe_access(CPUArchState *env, target_ulong addr, int size,
+void *probe_access(CPUArchState *env, vaddr addr, int size,
MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
 {
 CPUTLBEntryFull *full;
@@ -1682,7 +1682,7 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
  * NOTE: This function will trigger an exception if the page is
  * not executable.
  */
-tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, target_ulong addr,
+tb_page_addr_t get_page_addr_code_hostp(CPUArchState *env, vaddr addr,
 void **hostp)
 {
 CPUTLBEntryFull *full;
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index a7e0c3e2f4..9d49fe95e4 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -721,7 +721,7 @@ int page_unprotect(target_ulong address, uintptr_t pc)
 return current_tb_invalidated ? 2 : 1;
 }
 
-static int probe_access_internal(CPUArchState *env, target_ulong addr,
+static int probe_access_internal(CPUArchState *env, vaddr addr,
  int fault_size, MMUAccessType access_type,
  bool nonfault, uintptr_t ra)
 {
@@ -759,7 +759,7 @@ static int probe_access_internal(CPUArchState *env, 
target_ulong addr,
 cpu_loop_exit_sigsegv(env_cpu(env), addr, access_type, maperr, ra);
 }
 
-int probe_access_flags(CPUArchState *env, target_ulong addr, int size,
+int probe_access_flags(CPUArchState *env, vaddr addr, int size,
MMUAccessType access_type, int mmu_idx,
bool nonfault, void **phost, uintptr_t ra)
 {
@@ -771,7 +771,7 @@ int probe_access_flags(CPUArchState *env, target_ulong 
addr, int size,
 return flags;
 }
 
-void *probe_access(CPUArchState *env, target_ulong addr, int size,
+void *probe_access(CPUArchState *env, vaddr addr, int size,
MMUAccessType access_type, int mmu_idx,

[PATCH 7/8] cpu: Replace target_ulong with vaddr in tb_invalidate_phys_addr()

2023-04-20 Thread Anton Johansson via

Signed-off-by: Anton Johansson 
---
 cpu.c   | 2 +-
 include/exec/exec-all.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpu.c b/cpu.c
index 849bac062c..c245727ca6 100644
--- a/cpu.c
+++ b/cpu.c
@@ -293,7 +293,7 @@ void list_cpus(const char *optarg)
 }
 
 #if defined(CONFIG_USER_ONLY)
-void tb_invalidate_phys_addr(target_ulong addr)
+void tb_invalidate_phys_addr(vaddr addr)
 {
 mmap_lock();
 tb_invalidate_phys_page(addr);
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index c6cb3fcb8a..a00c298e4b 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -673,7 +673,7 @@ uint32_t curr_cflags(CPUState *cpu);
 
 /* TranslationBlock invalidate API */
 #if defined(CONFIG_USER_ONLY)
-void tb_invalidate_phys_addr(target_ulong addr);
+void tb_invalidate_phys_addr(vaddr addr);
 #else
 void tb_invalidate_phys_addr(AddressSpace *as, hwaddr addr, MemTxAttrs attrs);
 #endif
-- 
2.39.1

[PATCH 6/8] accel/tcg: Replace target_ulong with vaddr in page_*()

2023-04-20 Thread Anton Johansson via

Use vaddr for guest virtual addresses for functions dealing with page
flags.

Signed-off-by: Anton Johansson 
---
 accel/tcg/user-exec.c| 43 ++--
 include/exec/cpu-all.h   | 10 -
 include/exec/translate-all.h |  2 +-
 3 files changed, 27 insertions(+), 28 deletions(-)

diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index d0673de771..d3626c16d7 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -144,7 +144,7 @@ typedef struct PageFlagsNode {
 
 static IntervalTreeRoot pageflags_root;
 
-static PageFlagsNode *pageflags_find(target_ulong start, target_long last)
+static PageFlagsNode *pageflags_find(vaddr start, target_long last)
 {
 IntervalTreeNode *n;
 
@@ -152,7 +152,7 @@ static PageFlagsNode *pageflags_find(target_ulong start, 
target_long last)
 return n ? container_of(n, PageFlagsNode, itree) : NULL;
 }
 
-static PageFlagsNode *pageflags_next(PageFlagsNode *p, target_ulong start,
+static PageFlagsNode *pageflags_next(PageFlagsNode *p, vaddr start,
  target_long last)
 {
 IntervalTreeNode *n;
@@ -205,7 +205,7 @@ void page_dump(FILE *f)
 walk_memory_regions(f, dump_region);
 }
 
-int page_get_flags(target_ulong address)
+int page_get_flags(vaddr address)
 {
 PageFlagsNode *p = pageflags_find(address, address);
 
@@ -228,7 +228,7 @@ int page_get_flags(target_ulong address)
 }
 
 /* A subroutine of page_set_flags: insert a new node for [start,last]. */
-static void pageflags_create(target_ulong start, target_ulong last, int flags)
+static void pageflags_create(vaddr start, vaddr last, int flags)
 {
 PageFlagsNode *p = g_new(PageFlagsNode, 1);
 
@@ -239,13 +239,13 @@ static void pageflags_create(target_ulong start, 
target_ulong last, int flags)
 }
 
 /* A subroutine of page_set_flags: remove everything in [start,last]. */
-static bool pageflags_unset(target_ulong start, target_ulong last)
+static bool pageflags_unset(vaddr start, vaddr last)
 {
 bool inval_tb = false;
 
 while (true) {
 PageFlagsNode *p = pageflags_find(start, last);
-target_ulong p_last;
+vaddr p_last;
 
 if (!p) {
 break;
@@ -284,8 +284,7 @@ static bool pageflags_unset(target_ulong start, 
target_ulong last)
  * A subroutine of page_set_flags: nothing overlaps [start,last],
  * but check adjacent mappings and maybe merge into a single range.
  */
-static void pageflags_create_merge(target_ulong start, target_ulong last,
-   int flags)
+static void pageflags_create_merge(vaddr start, vaddr last, int flags)
 {
 PageFlagsNode *next = NULL, *prev = NULL;
 
@@ -336,11 +335,11 @@ static void pageflags_create_merge(target_ulong start, 
target_ulong last,
 #define PAGE_STICKY  (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
 
 /* A subroutine of page_set_flags: add flags to [start,last]. */
-static bool pageflags_set_clear(target_ulong start, target_ulong last,
+static bool pageflags_set_clear(vaddr start, vaddr last,
 int set_flags, int clear_flags)
 {
 PageFlagsNode *p;
-target_ulong p_start, p_last;
+vaddr p_start, p_last;
 int p_flags, merge_flags;
 bool inval_tb = false;
 
@@ -480,7 +479,7 @@ static bool pageflags_set_clear(target_ulong start, 
target_ulong last,
  * The flag PAGE_WRITE_ORG is positioned automatically depending
  * on PAGE_WRITE.  The mmap_lock should already be held.
  */
-void page_set_flags(target_ulong start, target_ulong last, int flags)
+void page_set_flags(vaddr start, vaddr last, int flags)
 {
 bool reset = false;
 bool inval_tb = false;
@@ -520,9 +519,9 @@ void page_set_flags(target_ulong start, target_ulong last, 
int flags)
 }
 }
 
-int page_check_range(target_ulong start, target_ulong len, int flags)
+int page_check_range(vaddr start, vaddr len, int flags)
 {
-target_ulong last;
+vaddr last;
 int locked;  /* tri-state: =0: unlocked, +1: global, -1: local */
 int ret;
 
@@ -601,7 +600,7 @@ int page_check_range(target_ulong start, target_ulong len, 
int flags)
 void page_protect(tb_page_addr_t address)
 {
 PageFlagsNode *p;
-target_ulong start, last;
+vaddr start, last;
 int prot;
 
 assert_memory_lock();
@@ -642,7 +641,7 @@ void page_protect(tb_page_addr_t address)
  * immediately exited. (We can only return 2 if the 'pc' argument is
  * non-zero.)
  */
-int page_unprotect(target_ulong address, uintptr_t pc)
+int page_unprotect(vaddr address, uintptr_t pc)
 {
 PageFlagsNode *p;
 bool current_tb_invalidated;
@@ -676,7 +675,7 @@ int page_unprotect(target_ulong address, uintptr_t pc)
 }
 #endif
 } else {
-target_ulong start, len, i;
+vaddr start, len, i;
 int prot;
 
 if (qemu_host_page_size <= TARGET_PAGE_SIZE) {
@@ -691,7 +690,7 @@ int page_unprotect(target_ulong address, uintptr_t pc)
 prot = 0;
 
 for (i = 0; i

[PATCH 5/8] accel/tcg: Replace target_ulong with vaddr in translator_*()

2023-04-20 Thread Anton Johansson via

Use vaddr for guest virtual address in translator_use_goto_tb() and
translator_loop().

Signed-off-by: Anton Johansson 
---
 accel/tcg/translator.c| 10 +-
 include/exec/translator.h |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 7bda43ff61..58737eff97 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -18,7 +18,7 @@
 #include "exec/plugin-gen.h"
 #include "exec/replay-core.h"
 
-bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest)
+bool translator_use_goto_tb(DisasContextBase *db, vaddr dest)
 {
 /* Suppress goto_tb if requested. */
 if (tb_cflags(db->tb) & CF_NO_GOTO_TB) {
@@ -30,8 +30,8 @@ bool translator_use_goto_tb(DisasContextBase *db, 
target_ulong dest)
 }
 
 void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
- target_ulong pc, void *host_pc,
- const TranslatorOps *ops, DisasContextBase *db)
+ vaddr pc, void *host_pc, const TranslatorOps *ops,
+ DisasContextBase *db)
 {
 uint32_t cflags = tb_cflags(tb);
 bool plugin_enabled;
@@ -137,10 +137,10 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, 
int *max_insns,
 }
 
 static void *translator_access(CPUArchState *env, DisasContextBase *db,
-   target_ulong pc, size_t len)
+   vaddr pc, size_t len)
 {
 void *host;
-target_ulong base, end;
+vaddr base, end;
 TranslationBlock *tb;
 
 tb = db->tb;
diff --git a/include/exec/translator.h b/include/exec/translator.h
index 797fef7515..fa3c2ba529 100644
--- a/include/exec/translator.h
+++ b/include/exec/translator.h
@@ -147,8 +147,8 @@ typedef struct TranslatorOps {
  * - When too many instructions have been translated.
  */
 void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
- target_ulong pc, void *host_pc,
- const TranslatorOps *ops, DisasContextBase *db);
+ vaddr pc, void *host_pc, const TranslatorOps *ops,
+ DisasContextBase *db);
 
 /**
  * translator_use_goto_tb
@@ -158,7 +158,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, 
int *max_insns,
  * Return true if goto_tb is allowed between the current TB
  * and the destination PC.
  */
-bool translator_use_goto_tb(DisasContextBase *db, target_ulong dest);
+bool translator_use_goto_tb(DisasContextBase *db, vaddr dest);
 
 /*
  * Translator Load Functions
-- 
2.39.1

Re: [PATCH] async: Suppress GCC13 false positive in aio_bh_poll()

2023-04-20 Thread Daniel Henrique Barboza





On 4/20/23 18:07, Daniel Henrique Barboza wrote:



On 4/20/23 17:29, Cédric Le Goater wrote:

From: Cédric Le Goater 

GCC13 reports an error :

../util/async.c: In function ‘aio_bh_poll’:
include/qemu/queue.h:303:22: error: storing the address of local variable 
‘slice’ in ‘*ctx.bh_slice_list.sqh_last’ [-Werror=dangling-pointer=]
   303 | (head)->sqh_last = &(elm)->field.sqe_next; 
 \
   | ~^~~~
../util/async.c:169:5: note: in expansion of macro ‘QSIMPLEQ_INSERT_TAIL’
   169 | QSIMPLEQ_INSERT_TAIL(>bh_slice_list, , next);
   | ^~~~
../util/async.c:161:17: note: ‘slice’ declared here
   161 | BHListSlice slice;
   | ^
../util/async.c:161:17: note: ‘ctx’ declared here

But the local variable 'slice' is removed from the global context list
in following loop of the same routine. Add a pragma to silent GCC.

Cc: Stefan Hajnoczi 
Cc: Paolo Bonzini 
Cc: Daniel P. Berrangé 
Signed-off-by: Cédric Le Goater 
---



Reviewed-by: Daniel Henrique Barboza 
Tested-by: Daniel Henrique Barboza 


If no one opposes I'll queue this patch, and the following 2 already reviewed
patches, in ppc-next:

[PATCH for-8.0 v2 3/3] target/ppc: Fix helper_pminsn() prototype
[PATCH for-8.0 v2 2/3] target/s390x: Fix float_comp_to_cc() prototype



Nevermind, these 2 patches are already applied. We're missing just this one.



Daniel




The reason is that I updated to Fedora 38 too soon and became aggravated by
these GCC13 false positives.



Thanks,


Daniel




  util/async.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/util/async.c b/util/async.c
index 21016a1ac7..856e1a8a33 100644
--- a/util/async.c
+++ b/util/async.c
@@ -164,7 +164,21 @@ int aio_bh_poll(AioContext *ctx)
  /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue().  */
  QSLIST_MOVE_ATOMIC(_list, >bh_list);
+
+    /*
+ * GCC13 [-Werror=dangling-pointer=] complains that the local variable
+ * 'slice' is being stored in the global 'ctx->bh_slice_list' but the
+ * list is emptied before this function returns.
+ */
+#if !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wdangling-pointer="
+#endif
  QSIMPLEQ_INSERT_TAIL(>bh_slice_list, , next);
+#if !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
  while ((s = QSIMPLEQ_FIRST(>bh_slice_list))) {
  QEMUBH *bh;

Re: [PATCH v2 03/13] ram.c: Dont change param->block in the compress thread

2023-04-20 Thread Juan Quintela

Lukas Straub  wrote:
> Instead introduce a extra parameter to trigger the compress thread.
> Now, when the compress thread is done, we know what RAMBlock and
> offset it did compress.
>
> This will be used in the next commits to move save_page_header()
> out of compress code.
>
> Signed-off-by: Lukas Straub 

Reviewed-by: Juan Quintela 

Much better, thanks.

Re: [PATCH v2 02/13] ram.c: Let the compress threads return a CompressResult enum

2023-04-20 Thread Juan Quintela

Lukas Straub  wrote:
> This will be used in the next commits to move save_page_header()
> out of compress code.
>
> Signed-off-by: Lukas Straub 

Reviewed-by: Juan Quintela

Re: [PATCH v2 01/13] qtest/migration-test.c: Add postcopy tests with compress enabled

2023-04-20 Thread Juan Quintela

Lukas Straub  wrote:
> On Thu, 20 Apr 2023 12:20:25 +0200
> Juan Quintela  wrote:
>
>> Lukas Straub  wrote:
>> > Add postcopy tests with compress enabled to ensure nothing breaks
>> > with the refactoring in the next commits.
>> >
>> > preempt+compress is blocked, so no test needed for that case.
>> >
>> > Signed-off-by: Lukas Straub   
>> 
>> Reviewed-by: Juan Quintela 
>> 
>> And I wanted to removed the old compression code and it gets new users.  
>> Sniff.
>
> Who know how many compress threads users are out there...

Not too much.
We broke it during development and nobody found it.

And the reason that I wrote the multifd-zlib compression code was
because I was not able to get a migration-test working with compression,
so 

> By the way, I'm not against deprecating compress threads in the long
> run. I'm already working on (cleanly :)) adding colo support with
> multifd.

Ok, then I will still put the deprecate comment there.


>> > ---
>> >  tests/qtest/migration-test.c | 83 +++-
>> >  1 file changed, 53 insertions(+), 30 deletions(-)
>> >
>> > diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c
>> > index 1f2a019ce0..930cb4f29d 100644
>> > --- a/tests/qtest/migration-test.c
>> > +++ b/tests/qtest/migration-test.c
>> > @@ -1127,6 +1127,36 @@ test_migrate_tls_x509_finish(QTestState *from,
>> >  #endif /* CONFIG_TASN1 */
>> >  #endif /* CONFIG_GNUTLS */
>> >
>> > +static void *
>> > +test_migrate_compress_start(QTestState *from,
>> > +QTestState *to)
>> > +{
>> > +migrate_set_parameter_int(from, "compress-level", 1);
>> > +migrate_set_parameter_int(from, "compress-threads", 4);
>> > +migrate_set_parameter_bool(from, "compress-wait-thread", true);
>> > +migrate_set_parameter_int(to, "decompress-threads", 4);
>> > +
>> > +migrate_set_capability(from, "compress", true);
>> > +migrate_set_capability(to, "compress", true);
>> > +
>> > +return NULL;
>> > +}  
>> 
>> Independently of this patch, we need to change this test to use 4
>> compression tests and 3 decompression or anything that is not the same
>> number in both sides.
>> 
>> I was complaining about this and when I arrived to the end of the path
>> found that this was code movement.
>> 
>> Later, Juan.
>> 
>
> Oops, forgot to mention, the test is based on this patch
> https://lore.kernel.org/qemu-devel/2f4abb67cf5f3e1591b276462a93bdd20bbc.1680618040.git.lukasstra...@web.de/
>
> Will probably carry the patch with this series then. So you mean 4
> compress _threads_ and 3 decompress _threads_?

Yeap.

Later, Juan.

Re: [PATCH] async: Suppress GCC13 false positive in aio_bh_poll()

2023-04-20 Thread Daniel Henrique Barboza





On 4/20/23 17:29, Cédric Le Goater wrote:

From: Cédric Le Goater 

GCC13 reports an error :

../util/async.c: In function ‘aio_bh_poll’:
include/qemu/queue.h:303:22: error: storing the address of local variable 
‘slice’ in ‘*ctx.bh_slice_list.sqh_last’ [-Werror=dangling-pointer=]
   303 | (head)->sqh_last = &(elm)->field.sqe_next; 
 \
   | ~^~~~
../util/async.c:169:5: note: in expansion of macro ‘QSIMPLEQ_INSERT_TAIL’
   169 | QSIMPLEQ_INSERT_TAIL(>bh_slice_list, , next);
   | ^~~~
../util/async.c:161:17: note: ‘slice’ declared here
   161 | BHListSlice slice;
   | ^
../util/async.c:161:17: note: ‘ctx’ declared here

But the local variable 'slice' is removed from the global context list
in following loop of the same routine. Add a pragma to silent GCC.

Cc: Stefan Hajnoczi 
Cc: Paolo Bonzini 
Cc: Daniel P. Berrangé 
Signed-off-by: Cédric Le Goater 
---



Reviewed-by: Daniel Henrique Barboza 
Tested-by: Daniel Henrique Barboza 


If no one opposes I'll queue this patch, and the following 2 already reviewed
patches, in ppc-next:

[PATCH for-8.0 v2 3/3] target/ppc: Fix helper_pminsn() prototype
[PATCH for-8.0 v2 2/3] target/s390x: Fix float_comp_to_cc() prototype


The reason is that I updated to Fedora 38 too soon and became aggravated by
these GCC13 false positives.



Thanks,


Daniel




  util/async.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/util/async.c b/util/async.c
index 21016a1ac7..856e1a8a33 100644
--- a/util/async.c
+++ b/util/async.c
@@ -164,7 +164,21 @@ int aio_bh_poll(AioContext *ctx)
  
  /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue().  */

  QSLIST_MOVE_ATOMIC(_list, >bh_list);
+
+/*
+ * GCC13 [-Werror=dangling-pointer=] complains that the local variable
+ * 'slice' is being stored in the global 'ctx->bh_slice_list' but the
+ * list is emptied before this function returns.
+ */
+#if !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wdangling-pointer="
+#endif
  QSIMPLEQ_INSERT_TAIL(>bh_slice_list, , next);
+#if !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
  
  while ((s = QSIMPLEQ_FIRST(>bh_slice_list))) {

  QEMUBH *bh;

Re: [PATCH v2 3/4] build: move COLO under CONFIG_REPLICATION

2023-04-20 Thread Dr. David Alan Gilbert

* Vladimir Sementsov-Ogievskiy (vsement...@yandex-team.ru) wrote:
> We don't allow to use x-colo capability when replication is not
> configured. So, no reason to build COLO when replication is disabled,
> it's unusable in this case.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  hmp-commands.hx|  2 ++
>  migration/colo.c   |  6 +
>  migration/meson.build  |  6 +++--
>  migration/migration-hmp-cmds.c |  2 ++
>  migration/migration.c  | 19 +++---
>  net/meson.build|  5 +++-
>  qapi/migration.json| 12 ++---
>  stubs/colo.c   | 47 ++
>  stubs/meson.build  |  1 +
>  9 files changed, 78 insertions(+), 22 deletions(-)
>  create mode 100644 stubs/colo.c
> 
> diff --git a/hmp-commands.hx b/hmp-commands.hx
> index bb85ee1d26..fbd0932232 100644
> --- a/hmp-commands.hx
> +++ b/hmp-commands.hx
> @@ -1035,6 +1035,7 @@ SRST
>migration (or once already in postcopy).
>  ERST
>  
> +#ifdef CONFIG_REPLICATION
>  {
>  .name   = "x_colo_lost_heartbeat",
>  .args_type  = "",
> @@ -1043,6 +1044,7 @@ ERST
>"a failover or takeover is needed.",
>  .cmd = hmp_x_colo_lost_heartbeat,
>  },
> +#endif

We seem to be inconsistent about whether the ifdef includes the
SRST/ERST doc section; some ifdef do and some don't; and thus
it depends whether or not you want the command documented
even though it's compiled out.

I think it's probably OK, but maybe worth reconsidering:

Acked-by: Dr. David Alan Gilbert 

>  SRST
>  ``x_colo_lost_heartbeat``
> diff --git a/migration/colo.c b/migration/colo.c
> index 0716e64689..089c491d70 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -196,6 +196,12 @@ COLOMode get_colo_mode(void)
>  }
>  }
>  
> +bool migrate_colo_enabled(void)
> +{
> +MigrationState *s = migrate_get_current();
> +return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
> +}
> +
>  void colo_do_failover(void)
>  {
>  /* Make sure VM stopped while failover happened. */
> diff --git a/migration/meson.build b/migration/meson.build
> index 0d1bb9f96e..3fccf79f12 100644
> --- a/migration/meson.build
> +++ b/migration/meson.build
> @@ -13,8 +13,6 @@ softmmu_ss.add(files(
>'block-dirty-bitmap.c',
>'channel.c',
>'channel-block.c',
> -  'colo-failover.c',
> -  'colo.c',
>'exec.c',
>'fd.c',
>'global_state.c',
> @@ -29,6 +27,10 @@ softmmu_ss.add(files(
>'threadinfo.c',
>  ), gnutls)
>  
> +if get_option('replication').allowed()
> +  softmmu_ss.add(files('colo-failover.c', 'colo.c'))
> +endif
> +
>  softmmu_ss.add(when: rdma, if_true: files('rdma.c'))
>  if get_option('live_block_migration').allowed()
>softmmu_ss.add(files('block.c'))
> diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c
> index 72519ea99f..4601c48f41 100644
> --- a/migration/migration-hmp-cmds.c
> +++ b/migration/migration-hmp-cmds.c
> @@ -640,6 +640,7 @@ void hmp_migrate_start_postcopy(Monitor *mon, const QDict 
> *qdict)
>  hmp_handle_error(mon, err);
>  }
>  
> +#ifdef CONFIG_REPLICATION
>  void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict *qdict)
>  {
>  Error *err = NULL;
> @@ -647,6 +648,7 @@ void hmp_x_colo_lost_heartbeat(Monitor *mon, const QDict 
> *qdict)
>  qmp_x_colo_lost_heartbeat();
>  hmp_handle_error(mon, err);
>  }
> +#endif
>  
>  typedef struct HMPMigrationStatus {
>  QEMUTimer *timer;
> diff --git a/migration/migration.c b/migration/migration.c
> index bda4789193..2382958364 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -165,7 +165,9 @@ 
> INITIALIZE_MIGRATE_CAPS_SET(check_caps_background_snapshot,
>  MIGRATION_CAPABILITY_RDMA_PIN_ALL,
>  MIGRATION_CAPABILITY_COMPRESS,
>  MIGRATION_CAPABILITY_XBZRLE,
> +#ifdef CONFIG_REPLICATION
>  MIGRATION_CAPABILITY_X_COLO,
> +#endif
>  MIGRATION_CAPABILITY_VALIDATE_UUID,
>  MIGRATION_CAPABILITY_ZERO_COPY_SEND);
>  
> @@ -1329,15 +1331,6 @@ static bool migrate_caps_check(bool *cap_list,
>  }
>  #endif
>  
> -#ifndef CONFIG_REPLICATION
> -if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
> -error_setg(errp, "QEMU compiled without replication module"
> -   " can't enable COLO");
> -error_append_hint(errp, "Please enable replication before COLO.\n");
> -return false;
> -}
> -#endif
> -
>  if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
>  /* This check is reasonably expensive, so only when it's being
>   * set the first time, also it's only the destination that needs
> @@ -3577,12 +3570,6 @@ fail:
>MIGRATION_STATUS_FAILED);
>  }
>  
> -bool migrate_colo_enabled(void)
> -{
> -MigrationState *s = migrate_get_current();
> -return s->enabled_capabilities[MIGRATION_CAPABILITY_X_COLO];
> -}
> -
>  typedef enum MigThrError {
>  /* No error

Re: [PATCH v2 28/43] migration: Move migrate_use_tls() to options.c

2023-04-20 Thread Juan Quintela

Fabiano Rosas  wrote:
> Juan Quintela  writes:
>
>  
>> +/*
>> + * pseudo capabilities
>> + *
>> + * This are functions that are used in a similar way that capabilities
>> + * check, but they are not a capability.
>
> s/This/These/
> s/that capabilities/to capabilities/
>

Fixed, thanks.

Re: [PATCH 9/9] docs/style: call out the use of GUARD macros

2023-04-20 Thread Juan Quintela

Alex Bennée  wrote:
> There use makes our code safer so we should mention them.
>
> Signed-off-by: Alex Bennée 


Reviewed-by: Juan Quintela

Re: [PATCH 8/9] docs/devel: mention the spacing requirement for QOM

2023-04-20 Thread Juan Quintela

Alex Bennée  wrote:
> We have a more complete document on QOM but we should at least mention
> the style requirements in the style guide.
>
> Signed-off-by: Alex Bennée 
> Cc: Mark Cave-Ayland 

Reviewed-by: Juan Quintela

Re: [PATCH 7/9] docs/devel: make a statement about includes

2023-04-20 Thread Juan Quintela

Alex Bennée  wrote:
> While not explicitly disallowing header macro abuse (because that
> would make us hypocrites) lets at least address some things to think
> about.
>
> Signed-off-by: Alex Bennée 
> ---
>  docs/devel/style.rst | 14 ++
>  1 file changed, 14 insertions(+)
>
> diff --git a/docs/devel/style.rst b/docs/devel/style.rst
> index 68aa776930..5bc6f2f095 100644
> --- a/docs/devel/style.rst
> +++ b/docs/devel/style.rst
> @@ -300,6 +300,20 @@ putting those into qemu/typedefs.h instead of including 
> the header.
>  
>  Cyclic inclusion is forbidden.
>  
> +Generative Includes
> +---
> +
> +QEMU makes fairly extensive use of the macro pre-processor to
> +instantiate multiple similar functions. While such abuse of the macro
> +processor isn't discouraged it can make debugging and code navigation
> +harder. You should consider carefully if the same effect can be
> +achieved by making it easy for the compiler to constant fold or using
> +python scripting to generate grep friendly code.
> +
> +If you do use template header files they should be named with the
> +``.c.inc`` or ``.h.inc`` suffix to make it clear they are being
> +included for expansion.
> +
>  C types
>  ===

Reviewed-by: Juan Quintela 

Fair enough.

Re: [PATCH 6/9] docs/system: remove excessive punctuation from guest-loader docs

2023-04-20 Thread Juan Quintela

Alex Bennée  wrote:
> A possessive its needs no ' whereas the contraction of it is does.
>
> Signed-off-by: Alex Bennée 

Reviewed-by: Juan Quintela

Re: [PATCH 2/9] qemu-options.hx: Update descriptions of memory options for NUMA node

2023-04-20 Thread Juan Quintela

Alex Bennée  wrote:
> From: Yohei Kojima 
>
> This commit adds the following description:
> 1. `memdev` option is recommended over `mem` option (see [1,2])
> 2. users must specify memory for all NUMA nodes (see [2])
>
> This commit also separates descriptions for `mem` and `memdev` into two
> paragraphs. The old doc describes legacy `mem` option first, and it was
> a bit confusing.
>
> Related documantations:
> [1] https://wiki.qemu.org/ChangeLog/5.1#Incompatible_changes
> [2] https://www.qemu.org/docs/master/about/removed-features.html
>
> Signed-off-by: Yohei Kojima 
> Message-Id: 
> 

Reviewed-by: Juan Quintela

Re: [PATCH 1/9] docs/devel/kconfig.rst: Fix incorrect markup

2023-04-20 Thread Juan Quintela

Alex Bennée  wrote:
> From: Peter Maydell 
>
> In rST markup syntax, the inline markup (*italics*, **bold** and
> ``monospaced``) must be separated from the surrending text by
> non-word characters, otherwise it is not interpreted as markup.
> To force interpretation as markup in the middle of a word,
> you need to use a backslash-escaped space (which will not
> appear as a space in the output).
>
> Fix a missing backslash-space in this file, which meant that the ``
> after "select" was output literally and the monospacing was
> incorrectly extended all the way to the end of the next monospaced
> word.
>
> Signed-off-by: Peter Maydell 
> Message-Id: <20230411105424.3994585-1-peter.mayd...@linaro.org>

Reviewed-by: Juan Quintela

Re: [PATCH] migration: Minor control flow simplification

2023-04-20 Thread Juan Quintela

Eric Blake  wrote:
> No need to declare a temporary variable.
>
> Suggested-by: Juan Quintela 
> Fixes: 1df36e8c6289 ("migration: Handle block device inactivation failures 
> better")
> Signed-off-by: Eric Blake 
> ---

Reviewed-by: Juan Quintela 

Queued one way or another.

Re: [PATCH] async: Suppress GCC13 false positive in aio_bh_poll()

2023-04-20 Thread Cédric Le Goater


+ Φλ

On 4/20/23 22:29, Cédric Le Goater wrote:

From: Cédric Le Goater 

GCC13 reports an error :

../util/async.c: In function ‘aio_bh_poll’:
include/qemu/queue.h:303:22: error: storing the address of local variable 
‘slice’ in ‘*ctx.bh_slice_list.sqh_last’ [-Werror=dangling-pointer=]
   303 | (head)->sqh_last = &(elm)->field.sqe_next; 
 \
   | ~^~~~
../util/async.c:169:5: note: in expansion of macro ‘QSIMPLEQ_INSERT_TAIL’
   169 | QSIMPLEQ_INSERT_TAIL(>bh_slice_list, , next);
   | ^~~~
../util/async.c:161:17: note: ‘slice’ declared here
   161 | BHListSlice slice;
   | ^
../util/async.c:161:17: note: ‘ctx’ declared here

But the local variable 'slice' is removed from the global context list
in following loop of the same routine. Add a pragma to silent GCC.

Cc: Stefan Hajnoczi 
Cc: Paolo Bonzini 
Cc: Daniel P. Berrangé 
Signed-off-by: Cédric Le Goater 
---
  util/async.c | 14 ++
  1 file changed, 14 insertions(+)

diff --git a/util/async.c b/util/async.c
index 21016a1ac7..856e1a8a33 100644
--- a/util/async.c
+++ b/util/async.c
@@ -164,7 +164,21 @@ int aio_bh_poll(AioContext *ctx)
  
  /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue().  */

  QSLIST_MOVE_ATOMIC(_list, >bh_list);
+
+/*
+ * GCC13 [-Werror=dangling-pointer=] complains that the local variable
+ * 'slice' is being stored in the global 'ctx->bh_slice_list' but the
+ * list is emptied before this function returns.
+ */
+#if !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wdangling-pointer="
+#endif
  QSIMPLEQ_INSERT_TAIL(>bh_slice_list, , next);
+#if !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
  
  while ((s = QSIMPLEQ_FIRST(>bh_slice_list))) {

  QEMUBH *bh;

[PATCH] async: Suppress GCC13 false positive in aio_bh_poll()

2023-04-20 Thread Cédric Le Goater

From: Cédric Le Goater 

GCC13 reports an error :

../util/async.c: In function ‘aio_bh_poll’:
include/qemu/queue.h:303:22: error: storing the address of local variable 
‘slice’ in ‘*ctx.bh_slice_list.sqh_last’ [-Werror=dangling-pointer=]
  303 | (head)->sqh_last = &(elm)->field.sqe_next;  
\
  | ~^~~~
../util/async.c:169:5: note: in expansion of macro ‘QSIMPLEQ_INSERT_TAIL’
  169 | QSIMPLEQ_INSERT_TAIL(>bh_slice_list, , next);
  | ^~~~
../util/async.c:161:17: note: ‘slice’ declared here
  161 | BHListSlice slice;
  | ^
../util/async.c:161:17: note: ‘ctx’ declared here

But the local variable 'slice' is removed from the global context list
in following loop of the same routine. Add a pragma to silent GCC.

Cc: Stefan Hajnoczi 
Cc: Paolo Bonzini 
Cc: Daniel P. Berrangé 
Signed-off-by: Cédric Le Goater 
---
 util/async.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/util/async.c b/util/async.c
index 21016a1ac7..856e1a8a33 100644
--- a/util/async.c
+++ b/util/async.c
@@ -164,7 +164,21 @@ int aio_bh_poll(AioContext *ctx)
 
 /* Synchronizes with QSLIST_INSERT_HEAD_ATOMIC in aio_bh_enqueue().  */
 QSLIST_MOVE_ATOMIC(_list, >bh_list);
+
+/*
+ * GCC13 [-Werror=dangling-pointer=] complains that the local variable
+ * 'slice' is being stored in the global 'ctx->bh_slice_list' but the
+ * list is emptied before this function returns.
+ */
+#if !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpragmas"
+#pragma GCC diagnostic ignored "-Wdangling-pointer="
+#endif
 QSIMPLEQ_INSERT_TAIL(>bh_slice_list, , next);
+#if !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
 
 while ((s = QSIMPLEQ_FIRST(>bh_slice_list))) {
 QEMUBH *bh;
-- 
2.40.0

[PATCH trivial] spelling: information

2023-04-20 Thread Michael Tokarev

3 trivial fixes: 2 .json comments which goes to
executables, and 1 .h file comment.

Signed-off-by: Michael Tokarev 

diff --git a/include/ui/clipboard.h b/include/ui/clipboard.h
--- a/include/ui/clipboard.h
+++ b/include/ui/clipboard.h
@@ -173 +173 @@ void qemu_clipboard_peer_release(QemuClipboardPeer *peer,
- * Return the current clipboard data & owner informations.
+ * Return the current clipboard data & owner information.

diff --git a/qapi/cryptodev.json b/qapi/cryptodev.json
--- a/qapi/cryptodev.json
+++ b/qapi/cryptodev.json
@@ -71 +71 @@
-# @client: the additional infomation of the crypto device
+# @client: the additional information of the crypto device

diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -894 +894 @@
-# NVMe smart informations, based on NVMe specification,
+# NVMe smart information, based on NVMe specification,

Re: [PATCH 2/2] docs/interop: Delete qmp-intro.txt

2023-04-20 Thread Eric Blake

On Thu, Apr 20, 2023 at 04:03:52PM +0100, Peter Maydell wrote:
> qmp-intro.txt is quite small and provides very little information
> that isn't already in the documentation elsewhere.  Fold the example
> command lines into qemu-options.hx, and delete the now-unneeded plain
> text document.
> 
> While we're touching the qemu-options.hx documentation text,
> wordsmith it a little bit and improve the rST formatting.
> 
> Signed-off-by: Peter Maydell 
> ---
>  docs/interop/qmp-intro.txt | 88 --
>  qemu-options.hx| 26 ---
>  2 files changed, 21 insertions(+), 93 deletions(-)
>  delete mode 100644 docs/interop/qmp-intro.txt

Reviewed-by: Eric Blake 

>  DEF("mon", HAS_ARG, QEMU_OPTION_mon, \
>  "-mon [chardev=]name[,mode=readline|control][,pretty[=on|off]]\n", 
> QEMU_ARCH_ALL)
>  SRST
>  ``-mon [chardev=]name[,mode=readline|control][,pretty[=on|off]]``
> -Setup monitor on chardev name. ``mode=control`` configures 
> -a QMP monitor (a JSON RPC-style protocol) and it is not the
> -same as HMP, the human monitor that has a "(qemu)" prompt.
> +Set up a monitor connected to the chardev ``name``.
> +QEMU supports two monitors: the Human Monitor Protocol
> +(HMP; for human interaction), and the QEMU Monitor Protocol
> +(QMP; a JSON RPC-style protocol).
> +The default is HMP; ``mode=control`` selects QMP instead.
>  ``pretty`` is only valid when ``mode=control``, 

Is the pre-existing trailing space on this line important to rST?

>  turning on JSON pretty printing to ease
>  human reading and debugging.
> +
> +For example::
> +
> +  -chardev socket,id=mon1,host=localhost,port=,server=on,wait=off \
> +  -mon chardev=mon1,mode=control,pretty=on
> +
> +enables the QMP monitor on localhost port  with pretty-printing.
>  ERST
>  
>  DEF("debugcon", HAS_ARG, QEMU_OPTION_debugcon, \
> -- 
> 2.34.1
> 
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [PATCH 8/9] docs/devel: mention the spacing requirement for QOM

2023-04-20 Thread Mark Cave-Ayland


On 20/04/2023 16:57, Alex Bennée wrote:


We have a more complete document on QOM but we should at least mention
the style requirements in the style guide.

Signed-off-by: Alex Bennée 
Cc: Mark Cave-Ayland 
---
  docs/devel/qom.rst   |  2 ++
  docs/devel/style.rst | 29 +
  2 files changed, 31 insertions(+)

diff --git a/docs/devel/qom.rst b/docs/devel/qom.rst
index 3e34b07c98..c9237950d0 100644
--- a/docs/devel/qom.rst
+++ b/docs/devel/qom.rst
@@ -1,3 +1,5 @@
+.. _qom:
+
  ===
  The QEMU Object Model (QOM)
  ===
diff --git a/docs/devel/style.rst b/docs/devel/style.rst
index 5bc6f2f095..0bd01f3fca 100644
--- a/docs/devel/style.rst
+++ b/docs/devel/style.rst
@@ -628,6 +628,35 @@ are still some caveats to beware of
  QEMU Specific Idioms
  
  
+QEMU Object Model Declarations

+==
+
+The QEMU Object Model (QOM) provides a framework for handling objects
+in the base C language. The first declaration of a storage or class
+structure should always be the parent and leave a visual space between
+that declaration and the new code.
+
+.. code-block:: c
+
+typedef struct MyDeviceState {
+DeviceState parent_obj;
+
+/* Properties */
+int prop_a;
+char *prob_b;
+/* Other stuff */
+int internal_state;
+} MyDeviceState;
+
+typedef struct MyDeviceClass {
+ObjectClass parent_class;


This one should be DeviceClass in this particular example.


+void (*new_fn1)(void);
+bool (*new_fn2)(CPUState *);
+   } MyDeviceClass;
+
+See :ref:`qom` for more details.


A couple of points:

1) It is probably worth removing the typedefs given that they are handled by the 
various QOM macros


2) There should be mention of the fixed names "parent_obj" and "parent_class" 
for
the first declaration.

How about something like this:


QEMU Object Model Declarations
==

The QEMU Object Model (QOM) provides a framework for handling objects
in the base C language. The first declaration of a storage or class
structure should always be the parent and leave a visual space between
that declaration and the new code.

For a storage structure the first declaration should always be called
"parent_obj" and for a class structure the first member should always
be called "parent_class" as below:

.. code-block:: c

struct MyDeviceState {
DeviceState parent_obj;

/* Properties */
int prop_a;
char *prob_b;
/* Other stuff */
int internal_state;
};

struct MyDeviceClass {
DeviceClass parent_class;

void (*new_fn1)(void);
bool (*new_fn2)(CPUState *);
};

Note that there is no need to provide typedefs for QOM structures since these are 
generated automatically by the QOM declaration macros. See :ref:`qom` for more details.



ATB,

Mark.

Re: [PATCH v2 30/43] migration: Create migrate_max_bandwidth() function

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Signed-off-by: Juan Quintela 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 29/43] migration: Move migrate_postcopy() to options.c

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Signed-off-by: Juan Quintela 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 03/43] migration: Create migration_cap_set()

2023-04-20 Thread Juan Quintela

Fabiano Rosas  wrote:
> Juan Quintela  writes:
>
>> And remove the convoluted use of qmp_migrate_set_capabilities() to
>> enable disable MIGRATION_CAPABILITY_BLOCK.
>>
>> Signed-off-by: Juan Quintela 
>> ---
>>  migration/migration.c | 34 --
>>  1 file changed, 16 insertions(+), 18 deletions(-)
>>
>> diff --git a/migration/migration.c b/migration/migration.c
>> index 74f28cdca6..4bf5df4778 100644
>> --- a/migration/migration.c
>> +++ b/migration/migration.c
>> @@ -1913,25 +1913,24 @@ void migrate_set_state(int *state, int old_state, 
>> int new_state)
>>  }
>>  }
>>  
>> -static MigrationCapabilityStatus *migrate_cap_add(MigrationCapability index,
>> -  bool state)
>> +static bool migrate_cap_set(int cap, bool value, Error **errp)
>
> Just a nit, the commit message says migration_cap_set.

Good catch. Fixed.

> Reviewed-by: Fabiano Rosas 

Thanks.

Re: [RFC PATCH v1 00/26] migration: File based migration with multifd and fixed-ram

2023-04-20 Thread Peter Xu

On Thu, Apr 20, 2023 at 10:02:43AM +0100, Daniel P. Berrangé wrote:
> On Wed, Apr 19, 2023 at 03:07:19PM -0400, Peter Xu wrote:
> > On Wed, Apr 19, 2023 at 06:12:05PM +0100, Daniel P. Berrangé wrote:
> > > On Tue, Apr 18, 2023 at 03:26:45PM -0400, Peter Xu wrote:
> > > > On Tue, Apr 18, 2023 at 05:58:44PM +0100, Daniel P. Berrangé wrote:
> > > > > Libvirt has multiple APIs where it currently uses its migrate-to-file
> > > > > approach
> > > > > 
> > > > >   * virDomainManagedSave()
> > > > > 
> > > > > This saves VM state to an libvirt managed file, stops the VM, and 
> > > > > the
> > > > > file state is auto-restored on next request to start the VM, and 
> > > > > the
> > > > > file deleted. The VM CPUs are stopped during both save + restore
> > > > > phase
> > > > > 
> > > > >   * virDomainSave/virDomainRestore
> > > > > 
> > > > > The former saves VM state to a file specified by the mgmt 
> > > > > app/user.
> > > > > A later call to virDomaniRestore starts the VM using that saved
> > > > > state. The mgmt app / user can delete the file state, or re-use
> > > > > it many times as they desire. The VM CPUs are stopped during both
> > > > > save + restore phase
> > > > > 
> > > > >   * virDomainSnapshotXXX
> > > > > 
> > > > > This family of APIs takes snapshots of the VM disks, optionally
> > > > > also including the full VM state to a separate file. The snapshots
> > > > > can later be restored. The VM CPUs remain running during the
> > > > > save phase, but are stopped during restore phase
> > > > 
> > > > For this one IMHO it'll be good if Libvirt can consider leveraging the 
> > > > new
> > > > background-snapshot capability (QEMU 6.0+, so not very new..).  Or is 
> > > > there
> > > > perhaps any reason why a generic migrate:fd approach is better?
> > > 
> > > I'm not sure I fully understand the implications of 'background-snapshot' 
> > > ?
> > > 
> > > Based on what the QAPI comment says, it sounds potentially interesting,
> > > as conceptually it would be nicer to have the memory / state snapshot
> > > represent the VM at the point where we started the snapshot operation,
> > > rather than where we finished the snapshot operation.
> > > 
> > > It would not solve the performance problems that the work in this thread
> > > was intended to address though.  With large VMs (100's of GB of RAM),
> > > saving all the RAM state to disk takes a very long time, regardless of
> > > whether the VM vCPUs are paused or running.
> > 
> > I think it solves the performance problem by only copy each of the guest
> > page once, even if the guest is running.
> 
> I think we're talking about different performance problems.
> 
> What you describe here is about ensuring the snapshot is of finite size
> and completes in linear time, by ensuring each page is written only
> once.
> 
> What I'm talking about is being able to parallelize the writing of all
> RAM, so if a single thread can saturate the storage, using multiple
> threads will make the overal process faster, even when we're only
> writing each page once.

It depends on how much we want it.  Here the live snapshot scenaior could
probably leverage a same multi-threading framework with a vm suspend case
because it can assume all the pages are static and only saved once.

But I agree it's at least not there yet.. so we can directly leverage
multifd at least for now.

> 
> > Different from mostly all the rest of "migrate" use cases, background
> > snapshot does not use the generic dirty tracking at all (for KVM that's
> > get-dirty-log), instead it uses userfaultfd wr-protects, so that when
> > taking the snapshot all the guest pages will be protected once.
> 
> Oh, so that means this 'background-snapshot' feature only works on
> Linux, and only when permissions allow it. The migration parameter
> probably should be marked with 'CONFIG_LINUX' in the QAPI schema
> to make it clear this is a non-portable feature.

Indeed, I can have a follow up patch for this.  But it'll be the same as
some other features, like, postcopy (and all its sub-features including
postcopy-blocktime and postcopy-preempt)?

> 
> > It guarantees the best efficiency of creating a snapshot with VM running,
> > afaict.  I sincerely think Libvirt should have someone investigating and
> > see whether virDomainSnapshotXXX() can be implemented by this cap rather
> > than the default migration.
> 
> Since the background-snapshot feature is not universally available,
> it will only ever be possible to use it as an optional enhancement
> with virDomainSnapshotXXX, we'll need the portable impl to be the
> default / fallback.

I am actually curious on how a live snapshot can be implemented correctly
if without something like background snapshot.  I raised this question in
another reply here:

https://lore.kernel.org/all/ZDWBSuGDU9IMohEf@x1n/

I was using fixed-ram and vm suspend as example, but I assume it applies to
any live snapshot that is based on

Re: [PATCH v2 28/43] migration: Move migrate_use_tls() to options.c

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Once there, rename it to migrate_tls() and make it return bool for
> consistency.
>
> Signed-off-by: Juan Quintela 
> ---
>  migration/migration.c |  9 -
>  migration/migration.h |  2 --
>  migration/options.c   | 16 +++-
>  migration/options.h   |  9 +
>  migration/tls.c   |  3 ++-
>  5 files changed, 26 insertions(+), 13 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index 2191437b15..bbc9a07fd7 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -2251,15 +2251,6 @@ bool migrate_postcopy(void)
>  return migrate_postcopy_ram() || migrate_dirty_bitmaps();
>  }
>  
> -int migrate_use_tls(void)
> -{
> -MigrationState *s;
> -
> -s = migrate_get_current();
> -
> -return s->parameters.tls_creds && *s->parameters.tls_creds;
> -}
> -
>  /* migration thread support */
>  /*
>   * Something bad happened to the RP stream, mark an error
> diff --git a/migration/migration.h b/migration/migration.h
> index 3ae938b19c..2099470e8e 100644
> --- a/migration/migration.h
> +++ b/migration/migration.h
> @@ -449,8 +449,6 @@ MigrationState *migrate_get_current(void);
>  
>  bool migrate_postcopy(void);
>  
> -int migrate_use_tls(void);
> -
>  uint64_t ram_get_total_transferred_pages(void);
>  
>  /* Sending on the return path - generic and then for each message type */
> diff --git a/migration/options.c b/migration/options.c
> index a111d0d43f..6db221157f 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -204,6 +204,20 @@ bool migrate_zero_copy_send(void)
>  
>  return s->capabilities[MIGRATION_CAPABILITY_ZERO_COPY_SEND];
>  }
> +
> +/* pseudo capabilities */
> +
> +bool migrate_tls(void)
> +{
> +MigrationState *s;
> +
> +s = migrate_get_current();
> +
> +return s->parameters.tls_creds && *s->parameters.tls_creds;
> +}
> +
> +
> +
>  typedef enum WriteTrackingSupport {
>  WT_SUPPORT_UNKNOWN = 0,
>  WT_SUPPORT_ABSENT,
> @@ -353,7 +367,7 @@ bool migrate_caps_check(bool *old_caps, bool *new_caps, 
> Error **errp)
>   new_caps[MIGRATION_CAPABILITY_COMPRESS] ||
>   new_caps[MIGRATION_CAPABILITY_XBZRLE] ||
>   migrate_multifd_compression() ||
> - migrate_use_tls())) {
> + migrate_tls())) {
>  error_setg(errp,
> "Zero copy only available for non-compressed non-TLS 
> multifd migration");
>  return false;
> diff --git a/migration/options.h b/migration/options.h
> index 99f6bbd7a1..c91d5cbef0 100644
> --- a/migration/options.h
> +++ b/migration/options.h
> @@ -38,6 +38,15 @@ bool migrate_xbzrle(void);
>  bool migrate_zero_blocks(void);
>  bool migrate_zero_copy_send(void);
>  
> +/*
> + * pseudo capabilities
> + *
> + * This are functions that are used in a similar way that capabilities
> + * check, but they are not a capability.

s/This/These/
s/that capabilities/to capabilities/

> + */
> +
> +bool migrate_tls(void);
> +
>  /* capabilities helpers */
>  
>  bool migrate_caps_check(bool *old_caps, bool *new_caps, Error **errp);
> diff --git a/migration/tls.c b/migration/tls.c
> index 4d2166a209..acd38e0b62 100644
> --- a/migration/tls.c
> +++ b/migration/tls.c
> @@ -22,6 +22,7 @@
>  #include "channel.h"
>  #include "migration.h"
>  #include "tls.h"
> +#include "options.h"
>  #include "crypto/tlscreds.h"
>  #include "qemu/error-report.h"
>  #include "qapi/error.h"
> @@ -165,7 +166,7 @@ void migration_tls_channel_connect(MigrationState *s,
>  
>  bool migrate_channel_requires_tls_upgrade(QIOChannel *ioc)
>  {
> -if (!migrate_use_tls()) {
> +if (!migrate_tls()) {
>  return false;
>  }

Re: [PATCH v2 27/43] migration: Create migrate_cpu_throttle_tailslow() function

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Signed-off-by: Juan Quintela 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 26/43] migration: Create migrate_cpu_throttle_increment() function

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Signed-off-by: Juan Quintela 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 25/43] migration: Create migrate_cpu_throttle_initial() to option.c

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Signed-off-by: Juan Quintela 

Reviewed-by: Fabiano Rosas

Re: [PATCH] coverity: physmem: use simple assertions instead of modelling

2023-04-20 Thread Vladimir Sementsov-Ogievskiy


On 16.03.23 00:22, Paolo Bonzini wrote:

On 3/15/23 15:28, Vladimir Sementsov-Ogievskiy wrote:

On 22.02.23 18:57, Peter Maydell wrote:

On Wed, 22 Feb 2023 at 14:19, Stefan Hajnoczi  wrote:


On Wed, 15 Feb 2023 at 15:22, Vladimir Sementsov-Ogievskiy
 wrote:


ping

[add Stefan]


I'm not familiar with the Coverity models. Peter Maydell is the maintainer.


We haven't run Coverity scans since September last year.


What's the problem with it? May I help somehow?


The container broke when libslirp was removed, and I've been procrastinating 
fixing it. 

Paolo


Hi!

I see Coverity works again. Could we give this patch a try?

Locally, I now run Coverity on master, on master with dropped model (half of my 
patch) and with my full patch.

The model, that this patch drops, fixes 94 issues. The assertion I propose 
fixes same 94 issues and two more resource leaks.

The model, that this patch drops, also bring 4 issues. The assertion I propose 
brings no new issues.

Of course, my local setup is different from QEMU Coverity cloud run.

--
Best regards,
Vladimir

Re: [PATCH v2 24/43] migration: Move migrate_announce_params() to option.c

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Signed-off-by: Juan Quintela 

Reviewed-by: Fabiano Rosas 

> ---
>  migration/migration.c | 14 --
>  migration/options.c   | 19 +++
>  2 files changed, 19 insertions(+), 14 deletions(-)
>
> diff --git a/migration/migration.c b/migration/migration.c
> index dbb89c2e7b..2191437b15 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -954,20 +954,6 @@ MigrationParameters *qmp_query_migrate_parameters(Error 
> **errp)
>  return params;
>  }
>  
> -AnnounceParameters *migrate_announce_params(void)
> -{
> -static AnnounceParameters ap;
> -
> -MigrationState *s = migrate_get_current();
> -
> -ap.initial = s->parameters.announce_initial;
> -ap.max = s->parameters.announce_max;
> -ap.rounds = s->parameters.announce_rounds;
> -ap.step = s->parameters.announce_step;
> -
> -return 
> -}
> -
>  /*
>   * Return true if we're already in the middle of a migration
>   * (i.e. any of the active or setup states)
> diff --git a/migration/options.c b/migration/options.c
> index 2cb04fbbd1..ed9d2a226f 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -16,6 +16,7 @@
>  #include "qapi/qapi-commands-migration.h"
>  #include "qapi/qmp/qerror.h"
>  #include "sysemu/runstate.h"
> +#include "migration/misc.h"
>  #include "migration.h"
>  #include "ram.h"
>  #include "options.h"
> @@ -589,3 +590,21 @@ uint64_t migrate_xbzrle_cache_size(void)
>  
>  return s->parameters.xbzrle_cache_size;
>  }
> +
> +/* parameters helpers */
> +
> +AnnounceParameters *migrate_announce_params(void)
> +{
> +static AnnounceParameters ap;
> +
> +MigrationState *s = migrate_get_current();
> +
> +ap.initial = s->parameters.announce_initial;
> +ap.max = s->parameters.announce_max;
> +ap.rounds = s->parameters.announce_rounds;
> +ap.step = s->parameters.announce_step;
> +
> +return 
> +}
> +
> +

Extra whitespace here^

Re: [PATCH v2 23/43] migration: Create migrate_max_cpu_throttle()

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Signed-off-by: Juan Quintela 

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 22/43] migration: Create migrate_checkpoint_delay()

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Signed-off-by: Juan Quintela 

Reviewed-by: Fabiano Rosas

Re: [PATCH v3] migration: move migration_global_dump() to migration-hmp-cmds.c

2023-04-20 Thread Peter Xu

On Thu, Apr 20, 2023 at 02:10:38PM +0200, Juan Quintela wrote:
> It is only used there, so we can make it static.
> Once there, remove spice.h that it is not used.
> 
> Signed-off-by: Juan Quintela 

Reviewed-by: Peter Xu 

-- 
Peter Xu

Re: [PATCH v2 21/43] migration: Create migrate_throttle_trigger_threshold()

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Signed-off-by: Juan Quintela 
> ---
>  migration/options.c | 9 +
>  migration/options.h | 1 +
>  migration/ram.c | 3 +--
>  3 files changed, 11 insertions(+), 2 deletions(-)
>
> diff --git a/migration/options.c b/migration/options.c
> index 2b6d88b4b9..b9f3815f7e 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -554,6 +554,15 @@ int migrate_multifd_zstd_level(void)
>  return s->parameters.multifd_zstd_level;
>  }
>  
> +uint8_t migrate_throttle_trigger_threshold(void)
> +{
> +MigrationState *s;
> +
> +s = migrate_get_current();
> +
> +return s->parameters.throttle_trigger_threshold;
> +}
> +
>  uint64_t migrate_xbzrle_cache_size(void)
>  {
>  MigrationState *s;
> diff --git a/migration/options.h b/migration/options.h
> index 96d5a8e6e4..aa54443353 100644
> --- a/migration/options.h
> +++ b/migration/options.h
> @@ -55,6 +55,7 @@ int migrate_multifd_channels(void);
>  MultiFDCompression migrate_multifd_compression(void);
>  int migrate_multifd_zlib_level(void);
>  int migrate_multifd_zstd_level(void);
> +uint8_t migrate_throttle_trigger_threshold(void);
>  uint64_t migrate_xbzrle_cache_size(void);
>  
>  #endif
> diff --git a/migration/ram.c b/migration/ram.c
> index 7f28588dde..68801012ba 100644
> --- a/migration/ram.c
> +++ b/migration/ram.c
> @@ -1179,8 +1179,7 @@ static void migration_update_rates(RAMState *rs, 
> int64_t end_time)
>  
>  static void migration_trigger_throttle(RAMState *rs)
>  {
> -MigrationState *s = migrate_get_current();
> -uint64_t threshold = s->parameters.throttle_trigger_threshold;
> +uint64_t threshold = migrate_throttle_trigger_threshold();
>  uint64_t bytes_xfer_period =
>  stat64_get(_counters.transferred) - rs->bytes_xfer_prev;
>  uint64_t bytes_dirty_period = rs->num_dirty_pages_period * 
> TARGET_PAGE_SIZE;

Reviewed-by: Fabiano Rosas

Re: [PATCH v2 13/43] migration: Create migrate_rdma_pin_all() function

2023-04-20 Thread Fabiano Rosas

Juan Quintela  writes:

> Signed-off-by: Juan Quintela 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> ---
>  migration/options.c | 7 +++
>  migration/options.h | 1 +
>  migration/rdma.c| 6 +++---
>  3 files changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/migration/options.c b/migration/options.c
> index 2003e413da..9c9b8e5863 100644
> --- a/migration/options.c
> +++ b/migration/options.c
> @@ -138,6 +138,13 @@ bool migrate_postcopy_ram(void)
>  return s->capabilities[MIGRATION_CAPABILITY_POSTCOPY_RAM];
>  }
>  
> +bool migrate_rdma_pin_all(void)
> +{
> +MigrationState *s = migrate_get_current();
> +
> +return s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL];
> +}
> +
>  bool migrate_release_ram(void)
>  {
>  MigrationState *s;
> diff --git a/migration/options.h b/migration/options.h
> index 316efd1063..25c002b37a 100644
> --- a/migration/options.h
> +++ b/migration/options.h
> @@ -30,6 +30,7 @@ bool migrate_pause_before_switchover(void);
>  bool migrate_postcopy_blocktime(void);
>  bool migrate_postcopy_preempt(void);
>  bool migrate_postcopy_ram(void);
> +bool migrate_rdma_pin_all(void);
>  bool migrate_release_ram(void);
>  bool migrate_return_path(void);
>  bool migrate_validate_uuid(void);
> diff --git a/migration/rdma.c b/migration/rdma.c
> index bf55e2f163..3e7b68c482 100644
> --- a/migration/rdma.c
> +++ b/migration/rdma.c
> @@ -35,6 +35,7 @@
>  #include 
>  #include "trace.h"
>  #include "qom/object.h"
> +#include "options.h"
>  #include 
>  
>  /*
> @@ -4178,8 +4179,7 @@ void rdma_start_outgoing_migration(void *opaque,
>  goto err;
>  }
>  
> -ret = qemu_rdma_source_init(rdma,
> -s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp);
> +ret = qemu_rdma_source_init(rdma,migrate_rdma_pin_all(), errp);

Missing a space after the comma here.

>  
>  if (ret) {
>  goto err;
> @@ -4201,7 +4201,7 @@ void rdma_start_outgoing_migration(void *opaque,
>  }
>  
>  ret = qemu_rdma_source_init(rdma_return_path,
> -s->capabilities[MIGRATION_CAPABILITY_RDMA_PIN_ALL], errp);
> +migrate_rdma_pin_all(), errp);
>  
>  if (ret) {
>  goto return_path_err;

1 2 3 4 5 6 >

1 - 100 of 588 matches

Mail list logo