Re: [PATCH RFC 23/32] python//machine.py: reorder __init__

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 7:53 AM, John Snow wrote:

Put the init arg handling all at the top, and mostly in order (deviating
when one is dependent on another), and put what is effectively runtime
state declaration at the bottom.

Signed-off-by: John Snow 
---
  python/qemu/lib/machine.py | 29 +
  1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index 6a4aea7725..beb31be453 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -80,38 +80,43 @@ def __init__(self, binary, args=None, wrapper=None, 
name=None,
  @param socket_scm_helper: helper program, required for send_fd_scm()
  @note: Qemu process is not started until launch() is used.
  '''
+# Direct user configuration
+
+self._binary = binary
+
  if args is None:
  args = []
+# Copy mutable input: we will be modifying our copy
+self._args = list(args)
+
  if wrapper is None:
  wrapper = []
-if name is None:
-name = "qemu-%d" % os.getpid()
-if sock_dir is None:
-sock_dir = test_dir
-self._name = name
+self._wrapper = wrapper
+
+self._name = name or "qemu-%d" % os.getpid()
+self._test_dir = test_dir
+self._sock_dir = sock_dir or self._test_dir
+self._socket_scm_helper = socket_scm_helper
+
  if monitor_address is not None:
  self._monitor_address = monitor_address
  self._remove_monitor_sockfile = False
  else:
  self._monitor_address = os.path.join(
-sock_dir, f"{name}-monitor.sock"
+self._sock_dir, f"{self._name}-monitor.sock"
  )
  self._remove_monitor_sockfile = True
+
+# Runstate
  self._qemu_log_path = None
  self._qemu_log_file = None
  self._popen = None
-self._binary = binary
-self._args = list(args) # Force copy args in case we modify them
-self._wrapper = wrapper
  self._events = []
  self._iolog = None
-self._socket_scm_helper = socket_scm_helper
  self._qmp_set = True   # Enable QMP monitor by default.
  self._qmp = None
  self._qemu_full_args = None
-self._test_dir = test_dir
  self._temp_dir = None
-self._sock_dir = sock_dir
  self._launched = False
  self._machine = None
  self._console_index = 0



Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH RFC 21/32] python//machine.py: remove logging configuration

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 7:53 AM, John Snow wrote:

Python 3.5 and above do not print a warning when logging is not
configured. As a library, it's best practice to leave logging
configuration to the client executable.

Signed-off-by: John Snow 
---
  python/qemu/lib/machine.py | 3 ---
  1 file changed, 3 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index c31bf7cabb..e92afe8649 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -110,9 +110,6 @@ def __init__(self, binary, args=None, wrapper=None, 
name=None,
  self._console_socket = None
  self._remove_files = []
  
-# just in case logging wasn't configured by the main script:

-logging.basicConfig()
-
  def __enter__(self):
  return self
  



Reviewed-by: Philippe Mathieu-Daudé 




[PATCH RFC 27/32] python//machine.py: Add _qmp access shim

2020-05-14 Thread John Snow
Like many other Optional[] types, it's not always a given that this
object will be set. Wrap it in a type-shim that raises a meaningful
error and will always return a concrete type.

Signed-off-by: John Snow 
---
 python/qemu/lib/machine.py | 12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index 34e6b6f9e9..dfa8449b62 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -117,7 +117,7 @@ def __init__(self, binary, args=None, wrapper=None, 
name=None,
 self._events = []
 self._iolog = None
 self._qmp_set = True   # Enable QMP monitor by default.
-self._qmp = None
+self._qmp_connection: Optional[qmp.QEMUMonitorProtocol] = None
 self._qemu_full_args = None
 self._temp_dir = None
 self._launched = False
@@ -283,7 +283,7 @@ def _pre_launch(self):
 if self._remove_monitor_sockfile:
 assert isinstance(self._monitor_address, str)
 self._remove_files.append(self._monitor_address)
-self._qmp = qmp.QEMUMonitorProtocol(
+self._qmp_connection = qmp.QEMUMonitorProtocol(
 self._monitor_address,
 server=True,
 nickname=self._name
@@ -416,7 +416,13 @@ def set_qmp_monitor(self, enabled=True):
 self._qmp_set = True
 else:
 self._qmp_set = False
-self._qmp = None
+self._qmp_connection = None
+
+@property
+def _qmp(self) -> qmp.QEMUMonitorProtocol:
+if self._qmp_connection is None:
+raise QEMUMachineError("Attempt to access QMP with no connection")
+return self._qmp_connection
 
 @classmethod
 def _qmp_args(cls, _conv_keys: bool = True, **args: Any) -> Dict[str, Any]:
-- 
2.21.1




Re: [PATCH v3 03/10] iotests/283: make executable

2020-05-14 Thread Philippe Mathieu-Daudé

On 4/21/20 9:35 AM, Vladimir Sementsov-Ogievskiy wrote:

All other test files are executable, except for this one. Fix that.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  tests/qemu-iotests/283 | 0
  1 file changed, 0 insertions(+), 0 deletions(-)
  mode change 100644 => 100755 tests/qemu-iotests/283

diff --git a/tests/qemu-iotests/283 b/tests/qemu-iotests/283
old mode 100644
new mode 100755



Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v2 5/6] target/ppc: Fix arguments to ppc_radix64_partition_scoped_xlate()

2020-05-14 Thread Cédric Le Goater
On 5/14/20 12:57 AM, Greg Kurz wrote:
> The last two arguments have the bool type. Also, we shouldn't raise an
> exception when using gdbstub.
> 
> This was found while reading the code. Since it only affects the powernv
> machine, I didn't dig further to find an actual bug.
> 
> Fixes: d04ea940c597 "target/ppc: Add support for Radix partition-scoped 
> translation"
> Signed-off-by: Greg Kurz 

Reviewed-by: Cédric Le Goater 

> ---
>  target/ppc/mmu-radix64.c |6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index fb7dfe25ba6f..7ce37cb778db 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -339,7 +339,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
> *cpu, int rwx,
>   */
>  ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr,
>   pate, _raddr, _prot,
> - _page_size, 1, 1);
> + _page_size, true,
> + cause_excp);
>  if (ret) {
>  return ret;
>  }
> @@ -378,7 +379,8 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
> *cpu, int rwx,
>  do {
>  ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr,
>   pate, _raddr, _prot,
> - _page_size, 1, 1);
> + _page_size, true,
> + cause_excp);
>  if (ret) {
>  return ret;
>  }
> 




Re: [PATCH v2 2/6] target/ppc: Pass const pointer to ppc_radix64_get_fully_qualified_addr()

2020-05-14 Thread Cédric Le Goater
On 5/14/20 12:56 AM, Greg Kurz wrote:
> This doesn't require write access to the CPU registers.
> 
> Signed-off-by: Greg Kurz 

Reviewed-by: Cédric Le Goater 

> ---
>  target/ppc/mmu-radix64.c |3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index 1404e53deca8..c76879f65b78 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -28,7 +28,8 @@
>  #include "mmu-radix64.h"
>  #include "mmu-book3s-v3.h"
>  
> -static bool ppc_radix64_get_fully_qualified_addr(CPUPPCState *env, vaddr 
> eaddr,
> +static bool ppc_radix64_get_fully_qualified_addr(const CPUPPCState *env,
> + vaddr eaddr,
>   uint64_t *lpid, uint64_t 
> *pid)
>  {
>  if (msr_hv) { /* MSR[HV] -> Hypervisor/bare metal */
> 




Re: [PATCH] vhost-user: add support for VHOST_USER_SET_STATUS

2020-05-14 Thread Jason Wang



On 2020/5/14 下午3:33, Maxime Coquelin wrote:

It is usefull for the Vhost-user backend to know
about about the Virtio device status updates,
especially when the driver sets the DRIVER_OK bit.

With that information, no more need to do hazardous
assumptions on when the driver is done with the
device configuration.

Signed-off-by: Maxime Coquelin 
---

This patch applies on top of Cindy's "vDPA support in qemu"
series, which introduces the .vhost_set_state vhost-backend
ops.

  docs/interop/vhost-user.rst | 12 
  hw/net/vhost_net.c  | 10 +-
  hw/virtio/vhost-user.c  | 35 +++
  3 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 3b1b6602c7..f108de7458 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -815,6 +815,7 @@ Protocol features
#define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD   12
#define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13
#define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
+  #define VHOST_USER_PROTOCOL_F_STATUS   15
  
  Master message types

  
@@ -1263,6 +1264,17 @@ Master message types
  
The state.num field is currently reserved and must be set to 0.
  
+``VHOST_USER_SET_STATUS``

+  :id: 36
+  :equivalent ioctl: VHOST_VDPA_SET_STATUS
+  :slave payload: N/A
+  :master payload: ``u64``
+
+  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
+  successfully negotiated, this message is submitted by the master to
+  notify the backend with updated device status as defined in the Virtio
+  specification.
+
  Slave message types
  ---
  
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c

index 463e333531..37f3156dbc 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
  {
  struct vhost_net *net = get_vhost_net(nc);
  struct vhost_dev *hdev = >dev;
-if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
-if (hdev->vhost_ops->vhost_set_state) {
-return hdev->vhost_ops->vhost_set_state(hdev, state);
- }
-}
+
+if (hdev->vhost_ops->vhost_set_state) {
+return hdev->vhost_ops->vhost_set_state(hdev, state);
+}
+
  return 0;
  }
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index ec21e8fbe8..b7e52d97fc 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
  VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
  VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
  VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
+VHOST_USER_PROTOCOL_F_STATUS = 15,
  VHOST_USER_PROTOCOL_F_MAX
  };
  
@@ -100,6 +101,7 @@ typedef enum VhostUserRequest {

  VHOST_USER_SET_INFLIGHT_FD = 32,
  VHOST_USER_GPU_SET_SOCKET = 33,
  VHOST_USER_RESET_DEVICE = 34,
+VHOST_USER_SET_STATUS = 36,
  VHOST_USER_MAX
  } VhostUserRequest;
  
@@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct vhost_dev *dev,

  return 0;
  }
  
+static int vhost_user_set_state(struct vhost_dev *dev, int state)

+{
+bool reply_supported = virtio_has_feature(dev->protocol_features,
+  VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+VhostUserMsg msg = {
+.hdr.request = VHOST_USER_SET_STATUS,
+.hdr.flags = VHOST_USER_VERSION,
+.hdr.size = sizeof(msg.payload.u64),
+.payload.u64 = (uint64_t)state,
+};
+
+if (!virtio_has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_STATUS)) {
+return -1;
+}
+
+if (reply_supported) {
+msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+}
+
+if (vhost_user_write(dev, , NULL, 0) < 0) {
+return -1;
+}
+
+if (reply_supported) {
+return process_message_reply(dev, );
+}
+
+return 0;
+}



Interesting, I wonder how vm stop will be handled in this case.

In the case of vDPA kernel, we probable don't want to mirror the virtio 
device status to vdpa device status directly. Since qemu may stop 
vhost-vdpa device through e.g resting vdpa device, but in the mean time, 
guest should not detect such condition in virtio device status.


So in the new version of vDPA support, we probably need to do:

static int vhost_vdpa_set_state(struct vhost_dev *dev, bool started)
{
    if (started) {
    uint8_t status = 0;

    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
    vhost_vdpa_call(dev, VHOST_VDPA_GET_STATUS, );

    return !(status & VIRTIO_CONFIG_S_DRIVER_OK);
    } else {
    vhost_vdpa_reset_device(dev);
    vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
   VIRTIO_CONFIG_S_DRIVER);
    return 0;
    }
}

And vhost_set_state() will be called from vhost_dev_start()/stop().

Does this work for 

proposal: deprecate -readconfig/-writeconfig

2020-05-14 Thread Paolo Bonzini
IMHO configuration files are in general a failed experiment.  In
practice, they do not add much value over just a shell script because
they don't allow configuring all QEMU options, they are very much fixed
(by their nature).  I think it's more or less agreed that they are not
solving any problem for higher-level management stacks as well; those
would prefer to configure the VM via QMP or another API.

So, any objections to deprecating -readconfig and -writeconfig?

Thanks,

Paolo




[PATCH RFC 29/32] python//qtest.py: Check before accessing _qtest

2020-05-14 Thread John Snow
It can be None; so add assertions or exceptions where appropriate to
guard the access accordingly.

Signed-off-by: John Snow 
---
 python/qemu/lib/qtest.py | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/python/qemu/lib/qtest.py b/python/qemu/lib/qtest.py
index a8be0c782f..05c63a1d58 100644
--- a/python/qemu/lib/qtest.py
+++ b/python/qemu/lib/qtest.py
@@ -126,7 +126,8 @@ def _pre_launch(self):
 super()._pre_launch()
 self._qtest = QEMUQtestProtocol(self._qtest_path, server=True)
 
-def _post_launch(self):
+def _post_launch(self) -> None:
+assert self._qtest is not None
 super()._post_launch()
 self._qtest.accept()
 
@@ -134,6 +135,13 @@ def _post_shutdown(self):
 super()._post_shutdown()
 self._remove_if_exists(self._qtest_path)
 
-def qtest(self, cmd):
-'''Send a qtest command to guest'''
+def qtest(self, cmd: str) -> str:
+"""
+Send a qtest command to the guest.
+
+:param cmd: qtest command to send
+:return: qtest server response
+"""
+if self._qtest is None:
+raise RuntimeError("qtest socket not available")
 return self._qtest.cmd(cmd)
-- 
2.21.1




Re: [RESEND PATCH v3 1/1] ppc/spapr: Add hotremovable flag on DIMM LMBs on drmem_v2

2020-05-14 Thread Cédric Le Goater
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -446,7 +446,8 @@ static int spapr_dt_dynamic_memory_v2(SpaprMachineState 
>> *spapr, void *fdt,
>>  g_assert(drc);
>>  elem = spapr_get_drconf_cell(size / lmb_size, addr,
>>   spapr_drc_index(drc), node,
>> - SPAPR_LMB_FLAGS_ASSIGNED);
>> + (SPAPR_LMB_FLAGS_ASSIGNED |
>> +  SPAPR_LMB_FLAGS_HOTREMOVABLE);


This is missing a ')'

C.



Re: [PATCH RFC 16/32] python//qmp.py: re-absorb MonitorResponseError

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 7:53 AM, John Snow wrote:

When I initially split this out, I considered this more of a machine
error than a QMP protocol error, but I think that's misguided.

Move this back to qmp.py and name it QMPResponseError. Convert
qmp.command() to use this exception type.

Signed-off-by: John Snow 


Reviewed-by: Philippe Mathieu-Daudé 


---
  python/qemu/lib/machine.py| 15 +--
  python/qemu/lib/qmp.py| 17 +++--
  scripts/render_block_graph.py |  4 ++--
  3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index 2f94c851ed..c31bf7cabb 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -48,19 +48,6 @@ class QEMUMachineAddDeviceError(QEMUMachineError):
  """
  
  
-class MonitorResponseError(qmp.QMPError):

-"""
-Represents erroneous QMP monitor reply
-"""
-def __init__(self, reply):
-try:
-desc = reply["error"]["desc"]
-except KeyError:
-desc = reply
-super().__init__(desc)
-self.reply = reply
-
-
  class QEMUMachine:
  """
  A QEMU VM
@@ -433,7 +420,7 @@ def command(self, cmd, conv_keys=True, **args):
  if reply is None:
  raise qmp.QMPError("Monitor is closed")
  if "error" in reply:
-raise MonitorResponseError(reply)
+raise qmp.QMPResponseError(reply)
  return reply["return"]
  
  def get_qmp_event(self, wait=False):

diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index 911da59888..82f86b4e45 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -61,6 +61,19 @@ class QMPTimeoutError(QMPError):
  """
  
  
+class QMPResponseError(QMPError):

+"""
+Represents erroneous QMP monitor reply
+"""
+def __init__(self, reply: QMPMessage):
+try:
+desc = reply['error']['desc']
+except KeyError:
+desc = reply
+super().__init__(desc)
+self.reply = reply
+
+
  class QEMUMonitorProtocol:
  """
  Provide an API to connect to QEMU via QEMU Monitor Protocol (QMP) and then
@@ -250,8 +263,8 @@ def command(self, cmd, **kwds):
  Build and send a QMP command to the monitor, report errors if any
  """
  ret = self.cmd(cmd, kwds)
-if "error" in ret:
-raise Exception(ret['error']['desc'])
+if 'error' in ret:
+raise QMPResponseError(ret)
  return ret['return']
  
  def pull_event(self, wait=False):

diff --git a/scripts/render_block_graph.py b/scripts/render_block_graph.py
index 8048d9fbbe..332ab49a91 100755
--- a/scripts/render_block_graph.py
+++ b/scripts/render_block_graph.py
@@ -26,7 +26,7 @@
  
  sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))

  from qemu.lib import QEMUMonitorProtocol
-from qemu.lib.machine import MonitorResponseError
+from qemu.lib.qmp import QMPResponseError
  
  
  def perm(arr):

@@ -103,7 +103,7 @@ def command(self, cmd):
  reply = json.loads(subprocess.check_output(ar))
  
  if 'error' in reply:

-raise MonitorResponseError(reply)
+raise QEMUResponseError(reply)
  
  return reply['return']
  






Re: [PATCH 0/4] target/i386: miscellaneous x87 fixes

2020-05-14 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/alpine.deb.2.21.2005132345100.11...@digraph.polyomino.org.uk/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Message-id: alpine.deb.2.21.2005132345100.11...@digraph.polyomino.org.uk
Subject: [PATCH 0/4] target/i386: miscellaneous x87 fixes
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 - [tag update]  patchew/20200513011648.166876-1-ebl...@redhat.com -> 
patchew/20200513011648.166876-1-ebl...@redhat.com
 - [tag update]  patchew/20200514035230.25756-1-js...@redhat.com -> 
patchew/20200514035230.25756-1-js...@redhat.com
 * [new tag] patchew/20200514055403.18902-1-js...@redhat.com -> 
patchew/20200514055403.18902-1-js...@redhat.com
Switched to a new branch 'test'
9abdf27 target/i386: fix fbstp handling of out-of-range values
53fdfa9 target/i386: fix fbstp handling of negative zero
350fd81 target/i386: fix fxam handling of invalid encodings
0cd240c target/i386: fix floating-point load-constant rounding

=== OUTPUT BEGIN ===
1/4 Checking commit 0cd240c60f14 (target/i386: fix floating-point load-constant 
rounding)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#107: 
new file mode 100644

ERROR: Use of volatile is usually wrong, please add a comment
#117: FILE: tests/tcg/i386/test-i386-fldcst.c:6:
+volatile long double ld_res;

ERROR: spaces required around that '+' (ctx:VxV)
#129: FILE: tests/tcg/i386/test-i386-fldcst.c:18:
+if (ld_res != 0x3.5269e12f346e2bf8p+0L) {
^

ERROR: spaces required around that '+' (ctx:VxV)
#138: FILE: tests/tcg/i386/test-i386-fldcst.c:27:
+if (ld_res != 0x3.5269e12f346e2bf8p+0L) {
^

ERROR: spaces required around that '+' (ctx:VxV)
#147: FILE: tests/tcg/i386/test-i386-fldcst.c:36:
+if (ld_res != 0x3.5269e12f346e2bf8p+0L) {
^

ERROR: spaces required around that '+' (ctx:VxV)
#156: FILE: tests/tcg/i386/test-i386-fldcst.c:45:
+if (ld_res != 0x3.5269e12f346e2bfcp+0L) {
^

ERROR: spaces required around that '+' (ctx:VxV)
#166: FILE: tests/tcg/i386/test-i386-fldcst.c:55:
+if (ld_res != 0x1.71547652b82fe178p+0L) {
^

ERROR: spaces required around that '+' (ctx:VxV)
#175: FILE: tests/tcg/i386/test-i386-fldcst.c:64:
+if (ld_res != 0x1.71547652b82fe176p+0L) {
^

ERROR: spaces required around that '+' (ctx:VxV)
#184: FILE: tests/tcg/i386/test-i386-fldcst.c:73:
+if (ld_res != 0x1.71547652b82fe176p+0L) {
^

ERROR: spaces required around that '+' (ctx:VxV)
#193: FILE: tests/tcg/i386/test-i386-fldcst.c:82:
+if (ld_res != 0x1.71547652b82fe178p+0L) {
^

ERROR: spaces required around that '+' (ctx:VxV)
#203: FILE: tests/tcg/i386/test-i386-fldcst.c:92:
+if (ld_res != 0x3.243f6a8885a308d4p+0L) {
^

ERROR: spaces required around that '+' (ctx:VxV)
#212: FILE: tests/tcg/i386/test-i386-fldcst.c:101:
+if (ld_res != 0x3.243f6a8885a308dp+0L) {
   ^

ERROR: spaces required around that '+' (ctx:VxV)
#221: FILE: tests/tcg/i386/test-i386-fldcst.c:110:
+if (ld_res != 0x3.243f6a8885a308dp+0L) {
   ^

ERROR: spaces required around that '+' (ctx:VxV)
#230: FILE: tests/tcg/i386/test-i386-fldcst.c:119:
+if (ld_res != 0x3.243f6a8885a308d4p+0L) {
^

ERROR: spaces required around that '-' (ctx:VxV)
#240: FILE: tests/tcg/i386/test-i386-fldcst.c:129:
+if (ld_res != 0x4.d104d427de7fbcc8p-4L) {
^

ERROR: spaces required around that '-' (ctx:VxV)
#249: FILE: tests/tcg/i386/test-i386-fldcst.c:138:
+if (ld_res != 0x4.d104d427de7fbccp-4L) {
   ^

ERROR: spaces required around that '-' (ctx:VxV)
#258: FILE: tests/tcg/i386/test-i386-fldcst.c:147:
+if (ld_res != 0x4.d104d427de7fbccp-4L) {
   ^

ERROR: spaces required around that '-' (ctx:VxV)
#267: FILE: tests/tcg/i386/test-i386-fldcst.c:156:
+if (ld_res != 0x4.d104d427de7fbcc8p-4L) {
^

ERROR: spaces required around that '-' (ctx:VxV)
#277: FILE: tests/tcg/i386/test-i386-fldcst.c:166:
+if (ld_res != 0xb.17217f7d1cf79acp-4L) {
   ^

ERROR: spaces required around that '-' (ctx:VxV)
#286: FILE: 

Re: [PATCH v4 5/9] blockdev: Split off basic bitmap operations for qemu-img

2020-05-14 Thread Vladimir Sementsov-Ogievskiy

13.05.2020 04:16, Eric Blake wrote:

Upcoming patches want to add some basic bitmap manipulation abilities
to qemu-img.  But blockdev.o is too heavyweight to link into qemu-img
(among other things, it would drag in block jobs and transaction
support - qemu-img does offline manipulation, where atomicity is less
important because there are no concurrent modifications to compete
with), so it's time to split off the bare bones of what we will need
into a new file block/monitor/bitmap-qmp-cmds.o.

This is sufficient to expose 6 QMP commands for use by qemu-img (add,
remove, clear, enable, disable, merge), as well as move the three
helper functions touched in the previous patch.  Regarding
MAINTAINERS, the new file is automatically part of block core, but
also makes sense as related to other dirty bitmap files.

Signed-off-by: Eric Blake 
Reviewed-by: Max Reitz 
---
  Makefile.objs   |   3 +-
  block/monitor/bitmap-qmp-cmds.c | 323 
  blockdev.c  | 284 
  MAINTAINERS |   1 +
  block/monitor/Makefile.objs |   1 +
  5 files changed, 326 insertions(+), 286 deletions(-)
  create mode 100644 block/monitor/bitmap-qmp-cmds.c

diff --git a/Makefile.objs b/Makefile.objs
index a7c967633acf..99774cfd2545 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -13,9 +13,8 @@ chardev-obj-y = chardev/

  authz-obj-y = authz/

-block-obj-y = nbd/
+block-obj-y = block/ block/monitor/ nbd/ scsi/
  block-obj-y += block.o blockjob.o job.o
-block-obj-y += block/ scsi/
  block-obj-y += qemu-io-cmds.o
  block-obj-$(CONFIG_REPLICATION) += replication.o

diff --git a/block/monitor/bitmap-qmp-cmds.c b/block/monitor/bitmap-qmp-cmds.c
new file mode 100644
index ..748e1e682483
--- /dev/null
+++ b/block/monitor/bitmap-qmp-cmds.c


Hmm, shouldn't transaction bitmap actions be moved here too? May be, not in 
these series..


@@ -0,0 +1,323 @@
+/*
+ * QEMU host block device bitmaps


A bit conflicts with tha fact that they are not of block-device level but of 
node-level.

May be just "Block dirty bitmap qmp commands" ?


+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard


Does it really apply here? block-dirty-bitmap-add was added in 2015.. May be 
Red Hat and Virtuozzo copyrights would be more appropriate.


+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#include "sysemu/blockdev.h"
+#include "block/block.h"
+#include "block/block_int.h"
+#include "qapi/qapi-commands-block.h"
+#include "qapi/error.h"


compiles for with only four:

  #include "qemu/osdep.h"
 
  #include "block/block_int.h"

  #include "qapi/qapi-commands-block.h"
  #include "qapi/error.h"

with at least extra includes dropped:
Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir



Re: [PATCH v2 0/1] qemu-nbd: Close inherited stderr

2020-05-14 Thread Raphael Pour
[...] introduced with e6df58a5, stderr won't get closed if the fork
option is __not__ set.

On 5/14/20 8:31 AM, Raphael Pour wrote:
> introduced with e6df58a5, stderr won't get closed if the fork option is
> set.

-- 
Hetzner Online GmbH
Am Datacenter-Park 1
08223 Falkenstein/Vogtland
raphael.p...@hetzner.com
www.hetzner.com

Registergericht Ansbach, HRB 6089
Geschäftsführer: Martin Hetzner, Stephan Konvickova, Günther Müller



signature.asc
Description: OpenPGP digital signature


[PATCH v2 1/1] qemu-nbd: Close inherited stderr

2020-05-14 Thread Raphael Pour
Close inherited stderr of the parent if fork_process is false.
Otherwise no one will close it. (introduced by e6df58a5)
---
 qemu-nbd.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/qemu-nbd.c b/qemu-nbd.c
index 4aa005004e..a324d21c5e 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -916,7 +916,13 @@ int main(int argc, char **argv)
 } else if (pid == 0) {
 close(stderr_fd[0]);
 
-old_stderr = dup(STDERR_FILENO);
+/* Remember parents stderr only if the fork option is set.
+ * The child won't close this inherited fd otherwise.
+ */
+if (fork_process) {
+  old_stderr = dup(STDERR_FILENO);
+}
+
 ret = qemu_daemon(1, 0);
 
 /* Temporarily redirect stderr to the parent's pipe...  */
-- 
2.25.4




Re: [PATCH v2 3/6] target/ppc: Don't initialize some local variables in ppc_radix64_xlate()

2020-05-14 Thread Cédric Le Goater
On 5/14/20 12:57 AM, Greg Kurz wrote:
> It is the job of the ppc_radix64_get_fully_qualified_addr() function
> which is called at the beginning of ppc_radix64_xlate() to set both
> lpid *and* pid. It doesn't buy us anything to initialize them first.
> 
> Worse, a bug in ppc_radix64_get_fully_qualified_addr(), eg. failing to
> set either lpid or pid, would be undetectable by static analysis tools
> like coverity.
> 
> Some recent versions of gcc (eg. gcc-9.3.1-2.fc30) may still think
> that lpid or pid is used uninitialized though, so this also adds
> default cases in the switch statements to make it clear this cannot
> happen.
> 
> Signed-off-by: Greg Kurz 

Reviewed-by: Cédric Le Goater 

> ---
>  target/ppc/mmu-radix64.c |6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index c76879f65b78..07f956c9864f 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -50,6 +50,8 @@ static bool ppc_radix64_get_fully_qualified_addr(const 
> CPUPPCState *env,
>  *lpid = 0;
>  *pid = 0;
>  break;
> +default:
> +g_assert_not_reached();
>  }
>  } else {  /* !MSR[HV] -> Guest */
>  switch (eaddr & R_EADDR_QUADRANT) {
> @@ -64,6 +66,8 @@ static bool ppc_radix64_get_fully_qualified_addr(const 
> CPUPPCState *env,
>  *lpid = env->spr[SPR_LPIDR];
>  *pid = 0; /* pid set to 0 -> addresses guest operating system */
>  break;
> +default:
> +g_assert_not_reached();
>  }
>  }
>  
> @@ -433,7 +437,7 @@ static int ppc_radix64_xlate(PowerPCCPU *cpu, vaddr 
> eaddr, int rwx,
>   bool cause_excp)
>  {
>  CPUPPCState *env = >env;
> -uint64_t lpid = 0, pid = 0;
> +uint64_t lpid, pid;
>  ppc_v3_pate_t pate;
>  int psize, prot;
>  hwaddr g_raddr;
> 




[PATCH v2 0/1] qemu-nbd: Close inherited stderr

2020-05-14 Thread Raphael Pour
Hello,

introduced with e6df58a5, stderr won't get closed if the fork option is
set. This causes other processes reading stderr to block infinietly or
crash while relying on EOF.

v2:
  - Instead of closing the inherited stderr in the child, avoid the dup 
in the parent if the fork option is not set.

Raphael Pour (1):
  qemu-nbd: Close inherited stderr

 qemu-nbd.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

-- 
2.25.4




Re: [PATCH v4 6/9] qemu-img: Add bitmap sub-command

2020-05-14 Thread Vladimir Sementsov-Ogievskiy

13.05.2020 04:16, Eric Blake wrote:

Include actions for --add, --remove, --clear, --enable, --disable, and
--merge (note that --clear is a bit of fluff, because the same can be
accomplished by removing a bitmap and then adding a new one in its
place, but it matches what QMP commands exist).  Listing is omitted,
because it does not require a bitmap name and because it was already
possible with 'qemu-img info'.  A single command line can play one or
more bitmap commands in sequence on the same bitmap name (although all
added bitmaps share the same granularity, and and all merged bitmaps
come from the same source file).  Merge defaults to other bitmaps in
the primary image, but can also be told to merge bitmaps from a
distinct image.

While this supports --image-opts for the file being modified, I did
not think it worth the extra complexity to support that for the source
file in a cross-file merges.  Likewise, I chose to have --merge only
take a single source rather than following the QMP support for
multiple merges in one go (although you can still use more than one
--merge in the command line); in part because qemu-img is offline and
therefore atomicity is not an issue.

Upcoming patches will add iotest coverage of these commands while
also testing other features.

Signed-off-by: Eric Blake 
Reviewed-by: Max Reitz 


I'm sorry for asking it only now on v4.. But still. Why do we need it? We can 
instead run qemu binary (or even new qemu-storage-daemon) and just use existing 
qmp commands. Is there a real benefit in developing qemu-img, maintaining two 
interfaces for the same thing? Of-course, just run qmp commands from terminal 
is a lot less comfortable than just a qemu img command.. But may be we need 
some wrapper, which make it simple to run one qmp command on an image?

It's simple to make a python wrapper working like

qemu-qmp block-dirty-bitmap-add '{node: self, name: bitmap0, persistent: true}' 
/path/to/x.qcow2


--
Best regards,
Vladimir



[PATCH] vhost-user: add support for VHOST_USER_SET_STATUS

2020-05-14 Thread Maxime Coquelin
It is usefull for the Vhost-user backend to know
about about the Virtio device status updates,
especially when the driver sets the DRIVER_OK bit.

With that information, no more need to do hazardous
assumptions on when the driver is done with the
device configuration.

Signed-off-by: Maxime Coquelin 
---

This patch applies on top of Cindy's "vDPA support in qemu"
series, which introduces the .vhost_set_state vhost-backend
ops.

 docs/interop/vhost-user.rst | 12 
 hw/net/vhost_net.c  | 10 +-
 hw/virtio/vhost-user.c  | 35 +++
 3 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 3b1b6602c7..f108de7458 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -815,6 +815,7 @@ Protocol features
   #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD   12
   #define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13
   #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
+  #define VHOST_USER_PROTOCOL_F_STATUS   15
 
 Master message types
 
@@ -1263,6 +1264,17 @@ Master message types
 
   The state.num field is currently reserved and must be set to 0.
 
+``VHOST_USER_SET_STATUS``
+  :id: 36
+  :equivalent ioctl: VHOST_VDPA_SET_STATUS
+  :slave payload: N/A
+  :master payload: ``u64``
+
+  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
+  successfully negotiated, this message is submitted by the master to
+  notify the backend with updated device status as defined in the Virtio
+  specification.
+
 Slave message types
 ---
 
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 463e333531..37f3156dbc 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
 {
 struct vhost_net *net = get_vhost_net(nc);
 struct vhost_dev *hdev = >dev;
-if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
-if (hdev->vhost_ops->vhost_set_state) {
-return hdev->vhost_ops->vhost_set_state(hdev, state);
- }
-}
+
+if (hdev->vhost_ops->vhost_set_state) {
+return hdev->vhost_ops->vhost_set_state(hdev, state);
+}
+
 return 0;
 }
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index ec21e8fbe8..b7e52d97fc 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
 VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
 VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
 VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
+VHOST_USER_PROTOCOL_F_STATUS = 15,
 VHOST_USER_PROTOCOL_F_MAX
 };
 
@@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
 VHOST_USER_SET_INFLIGHT_FD = 32,
 VHOST_USER_GPU_SET_SOCKET = 33,
 VHOST_USER_RESET_DEVICE = 34,
+VHOST_USER_SET_STATUS = 36,
 VHOST_USER_MAX
 } VhostUserRequest;
 
@@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct vhost_dev 
*dev,
 return 0;
 }
 
+static int vhost_user_set_state(struct vhost_dev *dev, int state)
+{
+bool reply_supported = virtio_has_feature(dev->protocol_features,
+  VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+VhostUserMsg msg = {
+.hdr.request = VHOST_USER_SET_STATUS,
+.hdr.flags = VHOST_USER_VERSION,
+.hdr.size = sizeof(msg.payload.u64),
+.payload.u64 = (uint64_t)state,
+};
+
+if (!virtio_has_feature(dev->protocol_features,
+VHOST_USER_PROTOCOL_F_STATUS)) {
+return -1;
+}
+
+if (reply_supported) {
+msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+}
+
+if (vhost_user_write(dev, , NULL, 0) < 0) {
+return -1;
+}
+
+if (reply_supported) {
+return process_message_reply(dev, );
+}
+
+return 0;
+}
+
 bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp)
 {
 if (user->chr) {
@@ -1947,4 +1981,5 @@ const VhostOps user_ops = {
 .vhost_backend_mem_section_filter = vhost_user_mem_section_filter,
 .vhost_get_inflight_fd = vhost_user_get_inflight_fd,
 .vhost_set_inflight_fd = vhost_user_set_inflight_fd,
+.vhost_set_state = vhost_user_set_state,
 };
-- 
2.25.4




[PATCH RFC 30/32] python/qemu/lib: make 'args' style arguments immutable

2020-05-14 Thread John Snow
These arguments don't need to be mutable and aren't really used as
such. Clarify their types as immutable and adjust code to match where
necessary.

In general, It's probably best not to accept a user-defined mutable
object and store it as internal object state unless there's a strong
justification for doing so. Instead, try to use generic types as input
with empty tuples as the default, and coerce to list where necessary.

Signed-off-by: John Snow 
---
 python/qemu/lib/machine.py | 30 +-
 python/qemu/lib/qtest.py   | 16 
 2 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index fb1a02b53c..ec2bb28b86 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -18,6 +18,7 @@
 #
 
 import errno
+from itertools import chain
 import logging
 import os
 import subprocess
@@ -29,6 +30,8 @@
 Dict,
 List,
 Optional,
+Sequence,
+Tuple,
 Type,
 )
 from types import TracebackType
@@ -67,8 +70,12 @@ class QEMUMachine:
 # vm is guaranteed to be shut down here
 """
 
-def __init__(self, binary, args=None, wrapper=None, name=None,
- test_dir="/var/tmp",
+def __init__(self,
+ binary: str,
+ args: Sequence[str] = (),
+ wrapper: Sequence[str] = (),
+ name: Optional[str] = None,
+ test_dir: str = "/var/tmp",
  monitor_address: Optional[SocketAddrT] = None,
  socket_scm_helper=None, sock_dir=None):
 '''
@@ -86,14 +93,7 @@ def __init__(self, binary, args=None, wrapper=None, 
name=None,
 # Direct user configuration
 
 self._binary = binary
-
-if args is None:
-args = []
-# Copy mutable input: we will be modifying our copy
 self._args = list(args)
-
-if wrapper is None:
-wrapper = []
 self._wrapper = wrapper
 
 self._name = name or "qemu-%d" % os.getpid()
@@ -118,7 +118,7 @@ def __init__(self, binary, args=None, wrapper=None, 
name=None,
 self._iolog = None
 self._qmp_set = True   # Enable QMP monitor by default.
 self._qmp_connection: Optional[qmp.QEMUMonitorProtocol] = None
-self._qemu_full_args = None
+self._qemu_full_args: Tuple[str, ...] = ()
 self._temp_dir = None
 self._launched = False
 self._machine = None
@@ -323,7 +323,7 @@ def launch(self):
 raise QEMUMachineError('VM already launched')
 
 self._iolog = None
-self._qemu_full_args = None
+self._qemu_full_args = ()
 try:
 self._launch()
 self._launched = True
@@ -343,8 +343,12 @@ def _launch(self):
 """
 devnull = open(os.path.devnull, 'rb')
 self._pre_launch()
-self._qemu_full_args = (self._wrapper + [self._binary] +
-self._base_args + self._args)
+self._qemu_full_args = tuple(
+chain(self._wrapper,
+  [self._binary],
+  self._base_args,
+  self._args)
+)
 LOG.debug('VM launch command: %r', ' '.join(self._qemu_full_args))
 self._popen = subprocess.Popen(self._qemu_full_args,
stdin=devnull,
diff --git a/python/qemu/lib/qtest.py b/python/qemu/lib/qtest.py
index 05c63a1d58..ae4661d4d3 100644
--- a/python/qemu/lib/qtest.py
+++ b/python/qemu/lib/qtest.py
@@ -22,6 +22,7 @@
 from typing import (
 List,
 Optional,
+Sequence,
 TextIO,
 )
 
@@ -103,8 +104,13 @@ class QEMUQtestMachine(QEMUMachine):
 A QEMU VM, with a qtest socket available.
 """
 
-def __init__(self, binary, args=None, name=None, test_dir="/var/tmp",
- socket_scm_helper=None, sock_dir=None):
+def __init__(self,
+ binary: str,
+ args: Sequence[str] = (),
+ name: Optional[str] = None,
+ test_dir: str = "/var/tmp",
+ socket_scm_helper: Optional[str] = None,
+ sock_dir: Optional[str] = None):
 if name is None:
 name = "qemu-%d" % os.getpid()
 if sock_dir is None:
@@ -118,8 +124,10 @@ def __init__(self, binary, args=None, name=None, 
test_dir="/var/tmp",
 @property
 def _base_args(self) -> List[str]:
 args = super()._base_args
-args.extend(['-qtest', 'unix:path=' + self._qtest_path,
- '-accel', 'qtest'])
+args.extend([
+'-qtest', f"unix:path={self._qtest_path}",
+'-accel', 'qtest'
+])
 return args
 
 def _pre_launch(self):
-- 
2.21.1




[PATCH RFC 16/32] python//qmp.py: re-absorb MonitorResponseError

2020-05-14 Thread John Snow
When I initially split this out, I considered this more of a machine
error than a QMP protocol error, but I think that's misguided.

Move this back to qmp.py and name it QMPResponseError. Convert
qmp.command() to use this exception type.

Signed-off-by: John Snow 
---
 python/qemu/lib/machine.py| 15 +--
 python/qemu/lib/qmp.py| 17 +++--
 scripts/render_block_graph.py |  4 ++--
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index 2f94c851ed..c31bf7cabb 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -48,19 +48,6 @@ class QEMUMachineAddDeviceError(QEMUMachineError):
 """
 
 
-class MonitorResponseError(qmp.QMPError):
-"""
-Represents erroneous QMP monitor reply
-"""
-def __init__(self, reply):
-try:
-desc = reply["error"]["desc"]
-except KeyError:
-desc = reply
-super().__init__(desc)
-self.reply = reply
-
-
 class QEMUMachine:
 """
 A QEMU VM
@@ -433,7 +420,7 @@ def command(self, cmd, conv_keys=True, **args):
 if reply is None:
 raise qmp.QMPError("Monitor is closed")
 if "error" in reply:
-raise MonitorResponseError(reply)
+raise qmp.QMPResponseError(reply)
 return reply["return"]
 
 def get_qmp_event(self, wait=False):
diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index 911da59888..82f86b4e45 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -61,6 +61,19 @@ class QMPTimeoutError(QMPError):
 """
 
 
+class QMPResponseError(QMPError):
+"""
+Represents erroneous QMP monitor reply
+"""
+def __init__(self, reply: QMPMessage):
+try:
+desc = reply['error']['desc']
+except KeyError:
+desc = reply
+super().__init__(desc)
+self.reply = reply
+
+
 class QEMUMonitorProtocol:
 """
 Provide an API to connect to QEMU via QEMU Monitor Protocol (QMP) and then
@@ -250,8 +263,8 @@ def command(self, cmd, **kwds):
 Build and send a QMP command to the monitor, report errors if any
 """
 ret = self.cmd(cmd, kwds)
-if "error" in ret:
-raise Exception(ret['error']['desc'])
+if 'error' in ret:
+raise QMPResponseError(ret)
 return ret['return']
 
 def pull_event(self, wait=False):
diff --git a/scripts/render_block_graph.py b/scripts/render_block_graph.py
index 8048d9fbbe..332ab49a91 100755
--- a/scripts/render_block_graph.py
+++ b/scripts/render_block_graph.py
@@ -26,7 +26,7 @@
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'python'))
 from qemu.lib import QEMUMonitorProtocol
-from qemu.lib.machine import MonitorResponseError
+from qemu.lib.qmp import QMPResponseError
 
 
 def perm(arr):
@@ -103,7 +103,7 @@ def command(self, cmd):
 reply = json.loads(subprocess.check_output(ar))
 
 if 'error' in reply:
-raise MonitorResponseError(reply)
+raise QEMUResponseError(reply)
 
 return reply['return']
 
-- 
2.21.1




[PATCH RFC 18/32] python//qmp.py: add casts to JSON deserialization

2020-05-14 Thread John Snow
mypy and python type hints are not powerful enough to properly describe
JSON messages in Python 3.6. The best we can do, generally, is describe
them as Dict[str, Any].

Add casts to coerce this type for static analysis; but do NOT enforce
this type at runtime in any way.

Note: Python 3.8 adds a TypedDict construct which allows for the
description of more arbitrary Dictionary shapes. There is a third-party
module, "Pydantic", which is compatible with 3.6 that can be used
instead of the JSON library that parses JSON messages to fully-typed
Python objects, and may be preferable in some cases.

(That is well beyond the scope of this commit or series.)

Signed-off-by: John Snow 
---
 python/qemu/lib/qmp.py | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index 0036204218..e460234f2e 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -13,6 +13,7 @@
 import logging
 from typing import (
 Any,
+cast,
 Dict,
 Optional,
 TextIO,
@@ -129,7 +130,10 @@ def __json_read(self, only_event=False):
 data = self.__sockfile.readline()
 if not data:
 return None
-resp = json.loads(data)
+# By definition, any JSON received from QMP is a QMPMessage,
+# and we are asserting only at static analysis time that it
+# has a particular shape.
+resp = cast(QMPMessage, json.loads(data))
 if 'event' in resp:
 self.logger.debug("<<< %s", resp)
 self.__events.append(resp)
@@ -261,7 +265,7 @@ def command(self, cmd, **kwds):
 ret = self.cmd(cmd, kwds)
 if 'error' in ret:
 raise QMPResponseError(ret)
-return ret['return']
+return cast(QMPReturnValue, ret['return'])
 
 def pull_event(self, wait=False):
 """
-- 
2.21.1




[PATCH RFC 20/32] python//qmp.py: assert sockfile is not None

2020-05-14 Thread John Snow
In truth, if you don't do this, you'll just get a TypeError
exception. Now, you'll get an AssertionError.

Is this tangibly better? No.
Does mypy complain less? Yes.

Signed-off-by: John Snow 
---
 python/qemu/lib/qmp.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index 5fb16f4b42..1aefc00c93 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -132,6 +132,7 @@ def __negotiate_capabilities(self):
 raise QMPCapabilitiesError
 
 def __json_read(self, only_event=False):
+assert self.__sockfile is not None
 while True:
 data = self.__sockfile.readline()
 if not data:
-- 
2.21.1




[PATCH RFC 19/32] python//qmp.py: add QMPProtocolError

2020-05-14 Thread John Snow
In the case that we receive a reply but are unable to understand it, use
this exception name to indicate that case.

Signed-off-by: John Snow 
---
 python/qemu/lib/qmp.py | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index e460234f2e..5fb16f4b42 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -62,6 +62,12 @@ class QMPTimeoutError(QMPError):
 """
 
 
+class QMPProtocolError(QMPError):
+"""
+QMP protocol error; unexpected response
+"""
+
+
 class QMPResponseError(QMPError):
 """
 Represents erroneous QMP monitor reply
@@ -265,6 +271,10 @@ def command(self, cmd, **kwds):
 ret = self.cmd(cmd, kwds)
 if 'error' in ret:
 raise QMPResponseError(ret)
+if 'return' not in ret:
+raise QMPProtocolError(
+"'return' key not found in QMP response '{}'".format(str(ret))
+)
 return cast(QMPReturnValue, ret['return'])
 
 def pull_event(self, wait=False):
-- 
2.21.1




[PATCH RFC 32/32] python/qemu/lib: Add mypy type annotations

2020-05-14 Thread John Snow
These should all be purely annotations with no changes in behavior at
all.

Signed-off-by: John Snow 
---
 python/qemu/lib/accel.py   |  8 ++--
 python/qemu/lib/machine.py | 94 +-
 python/qemu/lib/qmp.py | 44 ++
 python/qemu/lib/qtest.py   | 27 ++-
 4 files changed, 99 insertions(+), 74 deletions(-)

diff --git a/python/qemu/lib/accel.py b/python/qemu/lib/accel.py
index 7fabe62920..4325114e51 100644
--- a/python/qemu/lib/accel.py
+++ b/python/qemu/lib/accel.py
@@ -17,6 +17,7 @@
 import logging
 import os
 import subprocess
+from typing import List, Optional
 
 LOG = logging.getLogger(__name__)
 
@@ -29,7 +30,7 @@
 }
 
 
-def list_accel(qemu_bin):
+def list_accel(qemu_bin: str) -> List[str]:
 """
 List accelerators enabled in the QEMU binary.
 
@@ -49,7 +50,8 @@ def list_accel(qemu_bin):
 return [acc.strip() for acc in out.splitlines()[1:]]
 
 
-def kvm_available(target_arch=None, qemu_bin=None):
+def kvm_available(target_arch: Optional[str] = None,
+  qemu_bin: Optional[str] = None) -> bool:
 """
 Check if KVM is available using the following heuristic:
   - Kernel module is present in the host;
@@ -72,7 +74,7 @@ def kvm_available(target_arch=None, qemu_bin=None):
 return True
 
 
-def tcg_available(qemu_bin):
+def tcg_available(qemu_bin: str) -> bool:
 """
 Check if TCG is available.
 
diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index ec2bb28b86..c84db2bf8f 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -27,6 +27,7 @@
 import tempfile
 from typing import (
 Any,
+BinaryIO,
 Dict,
 List,
 Optional,
@@ -37,7 +38,7 @@
 from types import TracebackType
 
 from . import qmp
-from .qmp import SocketAddrT, QMPMessage
+from .qmp import QMPMessage, QMPReturnValue, SocketAddrT
 
 LOG = logging.getLogger(__name__)
 
@@ -60,7 +61,7 @@ class QEMUMachineAddDeviceError(QEMUMachineError):
 
 class QEMUMachine:
 """
-A QEMU VM
+A QEMU VM.
 
 Use this object as a context manager to ensure
 the QEMU process terminates::
@@ -77,7 +78,8 @@ def __init__(self,
  name: Optional[str] = None,
  test_dir: str = "/var/tmp",
  monitor_address: Optional[SocketAddrT] = None,
- socket_scm_helper=None, sock_dir=None):
+ socket_scm_helper: Optional[str] = None,
+ sock_dir: Optional[str] = None):
 '''
 Initialize a QEMUMachine
 
@@ -111,27 +113,27 @@ def __init__(self,
 self._remove_monitor_sockfile = True
 
 # Runstate
-self._qemu_log_path = None
-self._qemu_log_file = None
+self._qemu_log_path: Optional[str] = None
+self._qemu_log_file: Optional[BinaryIO] = None
 self._popen: Optional['subprocess.Popen[bytes]'] = None
-self._events = []
-self._iolog = None
+self._events: List[QMPMessage] = []
+self._iolog: Optional[str] = None
 self._qmp_set = True   # Enable QMP monitor by default.
 self._qmp_connection: Optional[qmp.QEMUMonitorProtocol] = None
 self._qemu_full_args: Tuple[str, ...] = ()
-self._temp_dir = None
+self._temp_dir: Optional[str] = None
 self._launched = False
-self._machine = None
+self._machine: Optional[str] = None
 self._console_index = 0
 self._console_set = False
-self._console_device_type = None
+self._console_device_type: Optional[str] = None
 self._console_address = os.path.join(
 self._sock_dir, f"{self._name}-console.sock"
 )
-self._console_socket = None
-self._remove_files = []
+self._console_socket: Optional[socket.socket] = None
+self._remove_files: List[str] = []
 
-def __enter__(self):
+def __enter__(self) -> 'QEMUMachine':
 return self
 
 def __exit__(self,
@@ -140,14 +142,15 @@ def __exit__(self,
  exc_tb: Optional[TracebackType]) -> None:
 self.shutdown()
 
-def add_monitor_null(self):
+def add_monitor_null(self) -> None:
 """
 This can be used to add an unused monitor instance.
 """
 self._args.append('-monitor')
 self._args.append('null')
 
-def add_fd(self, fd, fdset, opaque, opts=''):
+def add_fd(self, fd: int, fdset: int,
+   opaque: str, opts: str = '') -> 'QEMUMachine':
 """
 Pass a file descriptor to the VM
 """
@@ -166,7 +169,8 @@ def add_fd(self, fd, fdset, opaque, opts=''):
 self._args.append(','.join(options))
 return self
 
-def send_fd_scm(self, fd=None, file_path=None):
+def send_fd_scm(self, fd: Optional[int] = None,
+file_path: Optional[str] = None) -> int:
 """
 Send an fd or file_path to socket_scm_helper.
 
@@ -210,7 +214,7 @@ def 

[PATCH RFC 31/32] python/qemu: add mypy to Pipfile

2020-05-14 Thread John Snow
Signed-off-by: John Snow 
---
 python/Pipfile  |  1 +
 python/Pipfile.lock | 37 -
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/python/Pipfile b/python/Pipfile
index e396e56e06..80bd6c3188 100644
--- a/python/Pipfile
+++ b/python/Pipfile
@@ -6,6 +6,7 @@ verify_ssl = true
 [dev-packages]
 pylint = "==2.5.0"
 flake8 = "*"
+mypy = "*"
 
 [packages]
 
diff --git a/python/Pipfile.lock b/python/Pipfile.lock
index 15cd8a918f..085f23fe8a 100644
--- a/python/Pipfile.lock
+++ b/python/Pipfile.lock
@@ -1,7 +1,7 @@
 {
 "_meta": {
 "hash": {
-"sha256": 
"58115144ace0f646b5c62da260fb4867ac0a0e485de3f5b0a56c7854afa21f5b"
+"sha256": 
"17a30ead8719d80349ff5473bda3133fd4559a4e83d3f9d669631ac4b2171502"
 },
 "pipfile-spec": 6,
 "requires": {
@@ -79,6 +79,33 @@
 ],
 "version": "==0.6.1"
 },
+"mypy": {
+"hashes": [
+
"sha256:15b948e1302682e3682f11f50208b726a246ab4e6c1b39f9264a8796bb416aa2",
+
"sha256:219a3116ecd015f8dca7b5d2c366c973509dfb9a8fc97ef044a36e3da66144a1",
+
"sha256:3b1fc683fb204c6b4403a1ef23f0b1fac8e4477091585e0c8c54cbdf7d7bb164",
+
"sha256:3beff56b453b6ef94ecb2996bea101a08f1f8a9771d3cbf4988a61e4d9973761",
+
"sha256:7687f6455ec3ed7649d1ae574136835a4272b65b3ddcf01ab8704ac65616c5ce",
+
"sha256:7ec45a70d40ede1ec7ad7f95b3c94c9cf4c186a32f6bacb1795b60abd2f9ef27",
+
"sha256:86c857510a9b7c3104cf4cde1568f4921762c8f9842e987bc03ed4f160925754",
+
"sha256:8a627507ef9b307b46a1fea9513d5c98680ba09591253082b4c48697ba05a4ae",
+
"sha256:8dfb69fbf9f3aeed18afffb15e319ca7f8da9642336348ddd6cab2713ddcf8f9",
+
"sha256:a34b577cdf6313bf24755f7a0e3f3c326d5c1f4fe7422d1d06498eb25ad0c600",
+
"sha256:a8ffcd53cb5dfc131850851cc09f1c44689c2812d0beb954d8138d4f5fc17f65",
+
"sha256:b90928f2d9eb2f33162405f32dde9f6dcead63a0971ca8a1b50eb4ca3e35ceb8",
+
"sha256:c56ffe22faa2e51054c5f7a3bc70a370939c2ed4de308c690e7949230c995913",
+
"sha256:f91c7ae919bbc3f96cd5e5b2e786b2b108343d1d7972ea130f7de27fdd547cf3"
+],
+"index": "pypi",
+"version": "==0.770"
+},
+"mypy-extensions": {
+"hashes": [
+
"sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d",
+
"sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"
+],
+"version": "==0.4.3"
+},
 "pycodestyle": {
 "hashes": [
 
"sha256:95a2219d12372f05704562a14ec30bc76b05a5b297b21a5dfe3f6fac3491ae56",
@@ -142,6 +169,14 @@
 "markers": "implementation_name == 'cpython' and python_version < 
'3.8'",
 "version": "==1.4.1"
 },
+"typing-extensions": {
+"hashes": [
+
"sha256:6e95524d8a547a91e08f404ae485bbb71962de46967e1b71a0cb89af24e761c5",
+
"sha256:79ee589a3caca649a9bfd2a8de4709837400dfa00b6cc81962a1e6a1815969ae",
+
"sha256:f8d2bd89d25bc39dabe7d23df520442fa1d8969b82544370e03d88b5a591c392"
+],
+"version": "==3.7.4.2"
+},
 "wrapt": {
 "hashes": [
 
"sha256:b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7"
-- 
2.21.1




Re: [PATCH RFC 19/32] python//qmp.py: add QMPProtocolError

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 7:53 AM, John Snow wrote:

In the case that we receive a reply but are unable to understand it, use
this exception name to indicate that case.

Signed-off-by: John Snow 
---
  python/qemu/lib/qmp.py | 10 ++
  1 file changed, 10 insertions(+)

diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index e460234f2e..5fb16f4b42 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -62,6 +62,12 @@ class QMPTimeoutError(QMPError):
  """
  
  
+class QMPProtocolError(QMPError):

+"""
+QMP protocol error; unexpected response
+"""
+
+
  class QMPResponseError(QMPError):
  """
  Represents erroneous QMP monitor reply
@@ -265,6 +271,10 @@ def command(self, cmd, **kwds):
  ret = self.cmd(cmd, kwds)
  if 'error' in ret:
  raise QMPResponseError(ret)
+if 'return' not in ret:
+raise QMPProtocolError(
+"'return' key not found in QMP response '{}'".format(str(ret))
+)
  return cast(QMPReturnValue, ret['return'])
  
  def pull_event(self, wait=False):




Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH RFC 29/32] python//qtest.py: Check before accessing _qtest

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 7:54 AM, John Snow wrote:

It can be None; so add assertions or exceptions where appropriate to
guard the access accordingly.

Signed-off-by: John Snow 
---
  python/qemu/lib/qtest.py | 14 +++---
  1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/python/qemu/lib/qtest.py b/python/qemu/lib/qtest.py
index a8be0c782f..05c63a1d58 100644
--- a/python/qemu/lib/qtest.py
+++ b/python/qemu/lib/qtest.py
@@ -126,7 +126,8 @@ def _pre_launch(self):
  super()._pre_launch()
  self._qtest = QEMUQtestProtocol(self._qtest_path, server=True)
  
-def _post_launch(self):

+def _post_launch(self) -> None:
+assert self._qtest is not None
  super()._post_launch()
  self._qtest.accept()
  
@@ -134,6 +135,13 @@ def _post_shutdown(self):

  super()._post_shutdown()
  self._remove_if_exists(self._qtest_path)
  
-def qtest(self, cmd):

-'''Send a qtest command to guest'''
+def qtest(self, cmd: str) -> str:
+"""
+Send a qtest command to the guest.
+
+:param cmd: qtest command to send
+:return: qtest server response
+"""
+if self._qtest is None:
+raise RuntimeError("qtest socket not available")
  return self._qtest.cmd(cmd)



Reviewed-by: Philippe Mathieu-Daudé 




[Bug 1686980] Re: qemu is very slow when adding 16, 384 virtio-scsi drives

2020-05-14 Thread Thomas Huth
Is this faster nowadays if you use the new -blockdev parameter instead
of -drive?

** Changed in: qemu
   Status: New => Incomplete

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1686980

Title:
  qemu is very slow when adding 16,384 virtio-scsi drives

Status in QEMU:
  Incomplete

Bug description:
  qemu runs very slowly when adding many virtio-scsi drives.  I have
  attached a small reproducer shell script which demonstrates this.

  Using perf shows the following stack trace taking all the time:

  72.42%71.15%  qemu-system-x86  qemu-system-x86_64   [.] drive_get
  |  
   --72.32%--drive_get
 |  
  --1.24%--__irqentry_text_start
|  
 --1.22%--smp_apic_timer_interrupt
   |  

--1.00%--local_apic_timer_interrupt
  |  
   
--1.00%--hrtimer_interrupt
 |  
  
--0.83%--__hrtimer_run_queues

|  

 --0.64%--tick_sched_timer

  21.70%21.34%  qemu-system-x86  qemu-system-x86_64   [.] 
blk_legacy_dinfo
  |
  ---blk_legacy_dinfo

   3.65% 3.59%  qemu-system-x86  qemu-system-x86_64   [.] blk_next
  |
  ---blk_next

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1686980/+subscriptions



Re: [PATCH v5 00/12] Cadence GEM Fixes

2020-05-14 Thread Jason Wang



On 2020/5/12 下午10:54, Sai Pavan Boddu wrote:

Hi,

Following patch series fixes issues with priority queues,
Adds JUMBO Frame support,
Makes Debug statements compilable &
Fixes related to multicast frames.

Changes for V2:
Fixed build failure on fedora docker machine
Fix buggy debug print to use sized integer casting
Changes for V3:
1/10: Fixed debug statments to use %u and %zd
  Remove rxoffset for buffer address
2/10: Add inline functions to get tx/rx queue base address.
4/10: fix read only mask
5/10: Move packet buffers to CadenceGEMState
6/10: Add JUMBO MAX LEN register
Changes for V4:
7/11: Fix up the existing code style in register defines
8/11: jumbo-max-len property sets the default value of jumbo frame
  Add frame lenght checks for tx and rx
Changes for V5:
8/11: Add a cap on jumbo frame size and print guest errors if exceeded.
   Move jumo_max_len property into static properties section.

Sai Pavan Boddu (11):
   net: cadence_gem: Fix debug statements
   net: cadence_gem: Fix the queue address update during wrap around
   net: cadence_gem: Fix irq update w.r.t queue
   net: cadence_gem: Define access permission for interrupt registers
   net: cadence_gem: Set ISR according to queue in use
   net: cadence_gem: Move tx/rx packet buffert to CadenceGEMState
   net: cadence_gem: Fix up code style
   net: cadence_gem: Add support for jumbo frames
   net: cadnece_gem: Update irq_read_clear field of designcfg_debug1 reg
   net: cadence_gem: Update the reset value for interrupt mask register
   net: cadence_gem: TX_LAST bit should be set by guest

Tong Ho (1):
   net: cadence_gem: Fix RX address filtering

  hw/net/cadence_gem.c | 458 ---
  include/hw/net/cadence_gem.h |   6 +
  2 files changed, 265 insertions(+), 199 deletions(-)



Applied.

Thanks





Re: [RESEND PATCH v3 1/1] ppc/spapr: Add hotremovable flag on DIMM LMBs on drmem_v2

2020-05-14 Thread David Gibson
On Thu, May 14, 2020 at 08:05:17AM +0200, Cédric Le Goater wrote:
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -446,7 +446,8 @@ static int 
> >> spapr_dt_dynamic_memory_v2(SpaprMachineState *spapr, void *fdt,
> >>  g_assert(drc);
> >>  elem = spapr_get_drconf_cell(size / lmb_size, addr,
> >>   spapr_drc_index(drc), node,
> >> - SPAPR_LMB_FLAGS_ASSIGNED);
> >> + (SPAPR_LMB_FLAGS_ASSIGNED |
> >> +  SPAPR_LMB_FLAGS_HOTREMOVABLE);
> 
> 
> This is missing a ')'

So it is.  I've corrected this in my tree, but please do compile test
your patches before sending.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [RFC v2] migration: Add migrate-set-bitmap-node-mapping

2020-05-14 Thread Max Reitz
On 13.05.20 22:09, Vladimir Sementsov-Ogievskiy wrote:
> 13.05.2020 17:56, Max Reitz wrote:
>> This command allows mapping block node names to aliases for the purpose
>> of block dirty bitmap migration.
>>
>> This way, management tools can use different node names on the source
>> and destination and pass the mapping of how bitmaps are to be
>> transferred to qemu (on the source, the destination, or even both with
>> arbitrary aliases in the migration stream).
>>
>> Suggested-by: Vladimir Sementsov-Ogievskiy 
>> Signed-off-by: Max Reitz 
>> ---
>> Branch: https://github.com/XanClic/qemu.git
>> migration-bitmap-mapping-rfc-v2
>> Branch: https://git.xanclic.moe/XanClic/qemu.git
>> migration-bitmap-mapping-rfc-v2
>>
>> (Sorry, v1 was just broken.  This one should work better.)
>>
>> Vladimir has proposed something like this in April:
>> https://lists.nongnu.org/archive/html/qemu-block/2020-04/msg00171.html
>>
>> Now I’ve been asked by my manager to look at this, so I decided to just
>> write a patch to see how it’d play out.
> 
> Great! Sometimes I remember about this thing, but never start
> implementing :)
> 
>>
>> This is an RFC, because I’d like to tack on tests to the final version,
>> but I’m not sure whether I can come up with something before the end of
>> the week (and I’ll be on PTO for the next two weeks).
>>
>> Also, I don’t know whether migration/block-dirty-bitmap.c is the best
>> place to put qmp_migrate_set_bitmap_mapping(), but it appears we already
>> have some QMP handlers in migration/, so I suppose it isn’t too bad.
>> ---
>>   qapi/migration.json    | 36 
>>   migration/block-dirty-bitmap.c | 60 --
>>   2 files changed, 94 insertions(+), 2 deletions(-)
>>
>> diff --git a/qapi/migration.json b/qapi/migration.json
>> index d5000558c6..97037ea635 100644
>> --- a/qapi/migration.json
>> +++ b/qapi/migration.json
>> @@ -1621,3 +1621,39 @@
>>   ##
>>   { 'event': 'UNPLUG_PRIMARY',
>>     'data': { 'device-id': 'str' } }
>> +
>> +##
>> +# @MigrationBlockNodeMapping:
>> +#
>> +# Maps a block node name to an alias for migration.
>> +#
>> +# @node-name: A block node name.
>> +#
>> +# @alias: An alias name for migration (for example the node name on
>> +# the opposite site).
>> +#
>> +# Since: 5.1
>> +##
>> +{ 'struct': 'MigrationBlockNodeMapping',
>> +  'data': {
>> +  'node-name': 'str',
>> +  'alias': 'str'
>> +  } }
>> +
>> +##
>> +# @migrate-set-bitmap-node-mapping:
>> +#
>> +# Maps block node names to arbitrary aliases for the purpose of dirty
>> +# bitmap migration.  Such aliases may for example be the corresponding
>> +# node names on the opposite site.
>> +#
>> +# By default, every node name is mapped to itself.
>> +#
>> +# @mapping: The mapping; must be one-to-one, but not necessarily
>> +#   complete.  Any mapping not given will be reset to the
>> +#   default (i.e. the identity mapping).
>> +#
>> +# Since: 5.1
>> +##
>> +{ 'command': 'migrate-set-bitmap-node-mapping',
>> +  'data': { 'mapping': ['MigrationBlockNodeMapping'] } }
> 
> Hm. I like it, it's simpler and clearer than what I was thinking about.
> 
> 1. So, you decided to make only node-mapping, not bitmap-mapping, so we
> can't rename bitmaps in-flight and can't migrate bitmaps from one node
> to several and visa-versa. I think it's OK, nothing good in such
> possibilities, and this simplifies things.

If it turns out that we’d want it, I suppose we can also still always
extend MigrationBlockNodeMapping by another mapping array for bitmaps.

> 2. If I understand correctly, default to node-name matching doesn't make
> real sense for libvirt.. But on the other hand, libvirt should not be
> considered as the ony user of Qemu. Still, the default seems unsafe..
> Could we make it optional? Or add an option to disable this default for
> absolutely strict behavior?

It was my understanding that libvirt (which should know about all
bitmaps on all nodes) would and could ensure itself that all nodes are
mapped according to what it needs.  (But that’s why Peter is CC’d, to
get his input.)

But your idea seems simple, so why not.

> May be, add a parameter
> 
> fallback: node-name | error | drop
> 
> where
>   node-name: use node-name as an alias, if found bitmap on the node not
> mentioned in @mapping [should not be useful for libvirt, but may be for
> others]
>   error: just error-out if such bitmap found [libvirt should use it, I
> propose it as a default value for @fallback]

You mean error out during migration?  Hm.  I suppose that’s OK, if some
mapping erroneously isn’t set and the node name doesn’t exist in the
destination, we’ll error out, too, so...

Shouldn’t be too difficult to implement, just put the enum in
dirty_bitmap_mig_state, and then do what it says when no entry can be
found in the mapping QDict.

>   drop: just ignore such bitmap - it will be lost [just and idea, I
> doubt that it is really useful]
> 
> ===
> 
> Also, we 

Re: [PATCH v6 19/20] hw/block/nvme: do cmb/pmr init as part of pci init

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 6:46 AM, Klaus Jensen wrote:

From: Klaus Jensen 


Having the patch subject duplicated ease review (not all email client 
display email subject close to email content):


"Do cmb/pmr init as part of pci init."

Reviewed-by: Philippe Mathieu-Daudé 



Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
  hw/block/nvme.c | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 7254b66ae199..2addcc86034a 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1527,6 +1527,12 @@ static void nvme_init_pci(NvmeCtrl *n, PCIDevice 
*pci_dev)
  pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY |
   PCI_BASE_ADDRESS_MEM_TYPE_64, >iomem);
  msix_init_exclusive_bar(pci_dev, n->params.max_ioqpairs + 1, 4, NULL);
+
+if (n->params.cmb_size_mb) {
+nvme_init_cmb(n, pci_dev);
+} else if (n->pmrdev) {
+nvme_init_pmr(n, pci_dev);
+}
  }
  
  static void nvme_realize(PCIDevice *pci_dev, Error **errp)

@@ -1588,12 +1594,6 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
**errp)
  n->bar.vs = 0x00010200;
  n->bar.intmc = n->bar.intms = 0;
  
-if (n->params.cmb_size_mb) {

-nvme_init_cmb(n, pci_dev);
-} else if (n->pmrdev) {
-nvme_init_pmr(n, pci_dev);
-}
-
  for (i = 0; i < n->num_namespaces; i++) {
  nvme_init_namespace(n, >namespaces[i], _err);
  if (local_err) {






Re: [PATCH RFC 14/32] python//qmp.py: use True/False for non/blocking modes

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 7:53 AM, John Snow wrote:

The type system doesn't want integers.

Signed-off-by: John Snow 
---
  python/qemu/lib/qmp.py | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index b91c9d5c1c..a634c4e26c 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -120,14 +120,14 @@ def __get_events(self, wait=False):
  """
  
  # Check for new events regardless and pull them into the cache:

-self.__sock.setblocking(0)
+self.__sock.setblocking(False)
  try:
  self.__json_read()
  except OSError as err:
  if err.errno == errno.EAGAIN:
  # No data available
  pass
-self.__sock.setblocking(1)
+self.__sock.setblocking(True)
  
  # Wait for new events, if needed.

  # if wait is 0.0, this means "no wait" and is also implicitly false.



Reviewed-by: Philippe Mathieu-Daudé 




Re: [PATCH v10 14/14] iotests: use python logging for iotests.log()

2020-05-14 Thread John Snow



On 3/31/20 9:44 AM, Kevin Wolf wrote:
> Am 31.03.2020 um 02:00 hat John Snow geschrieben:
>> We can turn logging on/off globally instead of per-function.
>>
>> Remove use_log from run_job, and use python logging to turn on
>> diffable output when we run through a script entry point.
>>
>> iotest 245 changes output order due to buffering reasons.
>>
>>
>> An extended note on python logging:
>>
>> A NullHandler is added to `qemu.iotests` to stop output from being
>> generated if this code is used as a library without configuring logging.
>> A NullHandler is only needed at the root, so a duplicate handler is not
>> needed for `qemu.iotests.diff_io`.
>>
>> When logging is not configured, messages at the 'WARNING' levels or
>> above are printed with default settings. The NullHandler stops this from
>> occurring, which is considered good hygiene for code used as a library.
>>
>> See https://docs.python.org/3/howto/logging.html#library-config
>>
>> When logging is actually enabled (always at the behest of an explicit
>> call by a client script), a root logger is implicitly created at the
>> root, which allows messages to propagate upwards and be handled/emitted
>> from the root logger with default settings.
>>
>> When we want iotest logging, we attach a handler to the
>> qemu.iotests.diff_io logger and disable propagation to avoid possible
>> double-printing.
>>
>> For more information on python logging infrastructure, I highly
>> recommend downloading the pip package `logging_tree`, which provides
>> convenient visualizations of the hierarchical logging configuration
>> under different circumstances.
>>
>> See https://pypi.org/project/logging_tree/ for more information.
>>
>> Signed-off-by: John Snow 
>> Reviewed-by: Max Reitz 
> 
> Should we enable logger if -d is given?
> 
> Previously we had:
> 
> $ ./check -d -T -raw 281
> [...]
> 281 not run: not suitable for this image format: raw
> 281  not run[15:39:03] [15:39:04]not suitable for 
> this image format: raw
> Not run: 281
> 
> After this series, the first line of output from notrun() is missing.
> Not that I think it's important to have the line, but as long as we
> bother to call logger.warning(), I thought that maybe we want to be able
> to actually see the effect of it somehwere?
> 
> Kevin
> 

Uh, okay. So this is weirder than I thought it was going to be!

So, if you move the debug configuration up above the _verify calls,
you'll see the message printed out to the debug stream:

DEBUG:qemu.iotests:iotests debugging messages active
WARNING:qemu.iotests:281 not run: not suitable for this image format: raw

...but if you omit the `-d` flag, the message vanishes into a black
hole. Did it always work like that ...?

(I'll keep looking. --js)




Re: [PATCH] python: remove more instances of sys.version_info

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 5:52 AM, John Snow wrote:

We guarantee 3.5+ everywhere; remove more dead checks. In general, try
to avoid using version checks and instead prefer to attempt behavior
when possible.

Signed-off-by: John Snow 
---
  scripts/analyze-migration.py |  5 -
  scripts/decodetree.py| 25 +---
  scripts/qmp/qmp-shell|  3 ---
  tests/docker/docker.py   |  5 +++--
  tests/qemu-iotests/nbd-fault-injector.py |  5 +
  5 files changed, 13 insertions(+), 30 deletions(-)

diff --git a/scripts/analyze-migration.py b/scripts/analyze-migration.py
index 96a31d3974..95838cbff3 100755
--- a/scripts/analyze-migration.py
+++ b/scripts/analyze-migration.py
@@ -25,11 +25,6 @@
  import sys
  
  
-MIN_PYTHON = (3, 2)

-if sys.version_info < MIN_PYTHON:
-sys.exit("Python %s.%s or later is required.\n" % MIN_PYTHON)
-
-
  def mkdir_p(path):
  try:
  os.makedirs(path)
diff --git a/scripts/decodetree.py b/scripts/decodetree.py
index 46ab917807..f9d204aa36 100755
--- a/scripts/decodetree.py
+++ b/scripts/decodetree.py
@@ -75,13 +75,6 @@ def output(*args):
  output_fd.write(a)
  
  
-if sys.version_info >= (3, 4):

-re_fullmatch = re.fullmatch
-else:
-def re_fullmatch(pat, str):
-return re.match('^' + pat + '$', str)
-
-
  def output_autogen():
  output('/* This file is autogenerated by scripts/decodetree.py.  */\n\n')
  
@@ -428,18 +421,18 @@ def parse_field(lineno, name, toks):

  width = 0
  func = None
  for t in toks:
-if re_fullmatch('!function=' + re_ident, t):
+if re.fullmatch('!function=' + re_ident, t):
  if func:
  error(lineno, 'duplicate function')
  func = t.split('=')
  func = func[1]
  continue
  
-if re_fullmatch('[0-9]+:s[0-9]+', t):

+if re.fullmatch('[0-9]+:s[0-9]+', t):
  # Signed field extract
  subtoks = t.split(':s')
  sign = True
-elif re_fullmatch('[0-9]+:[0-9]+', t):
+elif re.fullmatch('[0-9]+:[0-9]+', t):
  # Unsigned field extract
  subtoks = t.split(':')
  sign = False
@@ -488,11 +481,11 @@ def parse_arguments(lineno, name, toks):
  flds = []
  extern = False
  for t in toks:
-if re_fullmatch('!extern', t):
+if re.fullmatch('!extern', t):
  extern = True
  anyextern = True
  continue
-if not re_fullmatch(re_ident, t):
+if not re.fullmatch(re_ident, t):
  error(lineno, 'invalid argument set token "{0}"'.format(t))
  if t in flds:
  error(lineno, 'duplicate argument "{0}"'.format(t))
@@ -621,13 +614,13 @@ def parse_generic(lineno, is_format, name, toks):
  continue
  
  # 'Foo=%Bar' imports a field with a different name.

-if re_fullmatch(re_ident + '=%' + re_ident, t):
+if re.fullmatch(re_ident + '=%' + re_ident, t):
  (fname, iname) = t.split('=%')
  flds = add_field_byname(lineno, flds, fname, iname)
  continue
  
  # 'Foo=number' sets an argument field to a constant value

-if re_fullmatch(re_ident + '=[+-]?[0-9]+', t):
+if re.fullmatch(re_ident + '=[+-]?[0-9]+', t):
  (fname, value) = t.split('=')
  value = int(value)
  flds = add_field(lineno, flds, fname, ConstField(value))
@@ -635,7 +628,7 @@ def parse_generic(lineno, is_format, name, toks):
  
  # Pattern of 0s, 1s, dots and dashes indicate required zeros,

  # required ones, or dont-cares.
-if re_fullmatch('[01.-]+', t):
+if re.fullmatch('[01.-]+', t):
  shift = len(t)
  fms = t.replace('0', '1')
  fms = fms.replace('.', '0')
@@ -652,7 +645,7 @@ def parse_generic(lineno, is_format, name, toks):
  fixedmask = (fixedmask << shift) | fms
  undefmask = (undefmask << shift) | ubm
  # Otherwise, fieldname:fieldwidth
-elif re_fullmatch(re_ident + ':s?[0-9]+', t):
+elif re.fullmatch(re_ident + ':s?[0-9]+', t):
  (fname, flen) = t.split(':')
  sign = False
  if flen[0] == 's':
diff --git a/scripts/qmp/qmp-shell b/scripts/qmp/qmp-shell
index a01d31de1e..c5eef06f3f 100755
--- a/scripts/qmp/qmp-shell
+++ b/scripts/qmp/qmp-shell
@@ -77,9 +77,6 @@ import re
  sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'python'))
  from qemu import qmp
  
-if sys.version_info[0] == 2:


:)

Reviewed-by: Philippe Mathieu-Daudé 


-input = raw_input
-
  class QMPCompleter(list):
  def complete(self, text, state):
  for cmd in self:
diff --git a/tests/docker/docker.py b/tests/docker/docker.py
index d8268c..5a9735db78 100755
--- a/tests/docker/docker.py
+++ b/tests/docker/docker.py
@@ -258,12 +258,13 @@ def 

Re: [PATCH v2 0/6] target/ppc: Various clean-up and fixes for radix64

2020-05-14 Thread David Gibson
On Thu, May 14, 2020 at 12:56:42AM +0200, Greg Kurz wrote:
> First three patches of this series are simple cleanups. The other
> ones fix some regressions introduced by Cedric's recent addition
> of partition-scoped translation.

Applied to ppc-for-5.1, thanks.

> 
> Changes since v1:
> - fix build break in patch 3
> - introduce guest_visible argument in patch 6
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Bug 1878501] Re: qemu-i386 does not define AT_SYSINFO

2020-05-14 Thread Laurent Vivier
Richard,

this problem seems related to the work you already done on vsyscalls:

  b26491b4d4f8 ("linux-user/i386: Emulate x86_64 vsyscalls")

I don't know if we should support AT_SYSINFO or consider this as a bug
of the target libc.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1878501

Title:
  qemu-i386 does not define AT_SYSINFO

Status in QEMU:
  New

Bug description:
  qemu-i386 does not define the AT_SYSINFO auxval when running i386
  Linux binaries.

  On most libcs, this is properly handled, but this is mandatory for the
  i686 Bionic (Android) libc or it will segfault.

  This is due to a blind assumption that getauxval(AT_SYSINFO) will
  return a valid function pointer:

  The code varies from version to version, but it looks like this:

  void *__libc_sysinfo;
  // mangled as _Z19__libc_init_sysinfov
  void __libc_init_sysinfo() {
bool dummy;
// __bionic_getauxval = getauxval
__libc_sysinfo = reinterpret_cast(__bionic_getauxval(AT_SYSINFO, 
dummy));
  }

  A simple way to reproduce is to compile a basic C program against the
  NDK:

  int main(void) { return 0; }

  $ i686-linux-android-clang -static empty.c -o empty
  $ qemu-i386 -cpu max ./empty
  qemu: uncaught target signal 11 (Segmentation fault) - core dumped
  Segmentation fault

  The place where it segfaults is misleading: It will, at least on the
  current NDK, crash on __set_thread_area, this is due to it calling a
  function pointer to __libc_sysinfo returned by __kernel_syscall.

  QEMU 4.1.1 (aarch64)
  Pixel 2 XL via Termux

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1878501/+subscriptions



[PATCH RFC 28/32] python//machine.py: fix _popen access

2020-05-14 Thread John Snow
As always, Optional[T] causes problems with unchecked access. Add a
helper that asserts the pipe is present before we attempt to talk with
it.

Signed-off-by: John Snow 
---
 python/qemu/lib/machine.py | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index dfa8449b62..fb1a02b53c 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -113,7 +113,7 @@ def __init__(self, binary, args=None, wrapper=None, 
name=None,
 # Runstate
 self._qemu_log_path = None
 self._qemu_log_file = None
-self._popen = None
+self._popen: Optional['subprocess.Popen[bytes]'] = None
 self._events = []
 self._iolog = None
 self._qmp_set = True   # Enable QMP monitor by default.
@@ -225,6 +225,12 @@ def is_running(self):
 """Returns true if the VM is running."""
 return self._popen is not None and self._popen.poll() is None
 
+@property
+def _subp(self) -> 'subprocess.Popen[bytes]':
+if self._popen is None:
+raise QEMUMachineError('Subprocess pipe not present')
+return self._popen
+
 def exitcode(self):
 """Returns the exit code if possible, or None."""
 if self._popen is None:
@@ -235,7 +241,7 @@ def get_pid(self):
 """Returns the PID of the running process, or None."""
 if not self.is_running():
 return None
-return self._popen.pid
+return self._subp.pid
 
 def _load_io_log(self):
 if self._qemu_log_path is not None:
@@ -352,7 +358,7 @@ def wait(self):
 """
 Wait for the VM to power off
 """
-self._popen.wait()
+self._subp.wait()
 if self._qmp:
 self._qmp.close()
 self._load_io_log()
@@ -371,11 +377,11 @@ def _issue_shutdown(self, has_quit: bool = False) -> None:
 self._qmp.close()
 
 try:
-self._popen.wait(timeout=3)
+self._subp.wait(timeout=3)
 except subprocess.TimeoutExpired:
-self._popen.kill()
+self._subp.kill()
 
-self._popen.wait()
+self._subp.wait()
 
 def shutdown(self, has_quit: bool = False) -> None:
 """
-- 
2.21.1




[PATCH RFC 24/32] python//machine.py: Don't modify state in _base_args()

2020-05-14 Thread John Snow
Don't append to the _remove_files list during _base_args; instead do so
during _launch. Rework _base_args as a @property to help facilitate
this impression.

This has the additional benefit of making the type of _console_address
easier to analyze statically.

Signed-off-by: John Snow 
---
 python/qemu/lib/machine.py | 16 ++--
 python/qemu/lib/qtest.py   | 11 ---
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index beb31be453..8548c7c32d 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -25,6 +25,7 @@
 import socket
 import tempfile
 from typing import (
+List,
 Optional,
 Type,
 )
@@ -122,7 +123,9 @@ def __init__(self, binary, args=None, wrapper=None, 
name=None,
 self._console_index = 0
 self._console_set = False
 self._console_device_type = None
-self._console_address = None
+self._console_address = os.path.join(
+self._sock_dir, f"{self._name}-console.sock"
+)
 self._console_socket = None
 self._remove_files = []
 
@@ -237,7 +240,8 @@ def _load_io_log(self):
 with open(self._qemu_log_path, "r") as iolog:
 self._iolog = iolog.read()
 
-def _base_args(self):
+@property
+def _base_args(self) -> List[str]:
 args = ['-display', 'none', '-vga', 'none']
 
 if self._qmp_set:
@@ -255,9 +259,6 @@ def _base_args(self):
 for _ in range(self._console_index):
 args.extend(['-serial', 'null'])
 if self._console_set:
-self._console_address = os.path.join(self._sock_dir,
- self._name + "-console.sock")
-self._remove_files.append(self._console_address)
 chardev = ('socket,id=console,path=%s,server,nowait' %
self._console_address)
 args.extend(['-chardev', chardev])
@@ -273,6 +274,9 @@ def _pre_launch(self):
 self._qemu_log_path = os.path.join(self._temp_dir, self._name + ".log")
 self._qemu_log_file = open(self._qemu_log_path, 'wb')
 
+if self._console_set:
+self._remove_files.append(self._console_address)
+
 if self._qmp_set:
 if self._remove_monitor_sockfile:
 assert isinstance(self._monitor_address, str)
@@ -332,7 +336,7 @@ def _launch(self):
 devnull = open(os.path.devnull, 'rb')
 self._pre_launch()
 self._qemu_full_args = (self._wrapper + [self._binary] +
-self._base_args() + self._args)
+self._base_args + self._args)
 LOG.debug('VM launch command: %r', ' '.join(self._qemu_full_args))
 self._popen = subprocess.Popen(self._qemu_full_args,
stdin=devnull,
diff --git a/python/qemu/lib/qtest.py b/python/qemu/lib/qtest.py
index 4c88590eb0..a8be0c782f 100644
--- a/python/qemu/lib/qtest.py
+++ b/python/qemu/lib/qtest.py
@@ -19,7 +19,11 @@
 
 import socket
 import os
-from typing import Optional, TextIO
+from typing import (
+List,
+Optional,
+TextIO,
+)
 
 from .machine import QEMUMachine
 
@@ -111,8 +115,9 @@ def __init__(self, binary, args=None, name=None, 
test_dir="/var/tmp",
 self._qtest = None
 self._qtest_path = os.path.join(sock_dir, name + "-qtest.sock")
 
-def _base_args(self):
-args = super()._base_args()
+@property
+def _base_args(self) -> List[str]:
+args = super()._base_args
 args.extend(['-qtest', 'unix:path=' + self._qtest_path,
  '-accel', 'qtest'])
 return args
-- 
2.21.1




[PATCH RFC 12/32] python/qemu/lib: fix socket.makefile() typing

2020-05-14 Thread John Snow
Note:

A bug in typeshed (https://github.com/python/typeshed/issues/3977)
misinterprets the type of makefile(). Work around this by explicitly
stating that we are opening a text-mode file.

Signed-off-by: John Snow 
---
 python/qemu/lib/qmp.py   | 10 +++---
 python/qemu/lib/qtest.py | 12 
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index 6ae7693965..73d49050ed 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -11,6 +11,10 @@
 import errno
 import socket
 import logging
+from typing import (
+Optional,
+TextIO,
+)
 
 
 class QMPError(Exception):
@@ -61,7 +65,7 @@ def __init__(self, address, server=False, nickname=None):
 self.__events = []
 self.__address = address
 self.__sock = self.__get_sock()
-self.__sockfile = None
+self.__sockfile: Optional[TextIO] = None
 self._nickname = nickname
 if self._nickname:
 self.logger = logging.getLogger('QMP').getChild(self._nickname)
@@ -157,7 +161,7 @@ def connect(self, negotiate=True):
 @raise QMPCapabilitiesError if fails to negotiate capabilities
 """
 self.__sock.connect(self.__address)
-self.__sockfile = self.__sock.makefile()
+self.__sockfile = self.__sock.makefile(mode='r')
 if negotiate:
 return self.__negotiate_capabilities()
 return None
@@ -180,7 +184,7 @@ def accept(self, timeout=15.0):
 """
 self.__sock.settimeout(timeout)
 self.__sock, _ = self.__sock.accept()
-self.__sockfile = self.__sock.makefile()
+self.__sockfile = self.__sock.makefile(mode='r')
 return self.__negotiate_capabilities()
 
 def cmd_obj(self, qmp_cmd):
diff --git a/python/qemu/lib/qtest.py b/python/qemu/lib/qtest.py
index 7943487c2b..4c88590eb0 100644
--- a/python/qemu/lib/qtest.py
+++ b/python/qemu/lib/qtest.py
@@ -19,6 +19,7 @@
 
 import socket
 import os
+from typing import Optional, TextIO
 
 from .machine import QEMUMachine
 
@@ -40,7 +41,7 @@ class QEMUQtestProtocol:
 def __init__(self, address, server=False):
 self._address = address
 self._sock = self._get_sock()
-self._sockfile = None
+self._sockfile: Optional[TextIO] = None
 if server:
 self._sock.bind(self._address)
 self._sock.listen(1)
@@ -59,7 +60,7 @@ def connect(self):
 @raise socket.error on socket connection errors
 """
 self._sock.connect(self._address)
-self._sockfile = self._sock.makefile()
+self._sockfile = self._sock.makefile(mode='r')
 
 def accept(self):
 """
@@ -68,7 +69,7 @@ def accept(self):
 @raise socket.error on socket connection errors
 """
 self._sock, _ = self._sock.accept()
-self._sockfile = self._sock.makefile()
+self._sockfile = self._sock.makefile(mode='r')
 
 def cmd(self, qtest_cmd):
 """
@@ -76,6 +77,7 @@ def cmd(self, qtest_cmd):
 
 @param qtest_cmd: qtest command text to be sent
 """
+assert self._sockfile is not None
 self._sock.sendall((qtest_cmd + "\n").encode('utf-8'))
 resp = self._sockfile.readline()
 return resp
@@ -83,7 +85,9 @@ def cmd(self, qtest_cmd):
 def close(self):
 """Close this socket."""
 self._sock.close()
-self._sockfile.close()
+if self._sockfile:
+self._sockfile.close()
+self._sockfile = None
 
 def settimeout(self, timeout):
 """Set a timeout, in seconds."""
-- 
2.21.1




[PATCH RFC 25/32] python//machine.py: Handle None events in event_wait

2020-05-14 Thread John Snow
If the timeout is 0, we can get None back. Handle this explicitly.

Signed-off-by: John Snow 
---
 python/qemu/lib/machine.py | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index 8548c7c32d..61ee3a0e81 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -32,7 +32,7 @@
 from types import TracebackType
 
 from . import qmp
-from .qmp import SocketAddrT
+from .qmp import SocketAddrT, QMPMessage
 
 LOG = logging.getLogger(__name__)
 
@@ -519,6 +519,8 @@ def _match(event):
 return True
 return False
 
+event: Optional[QMPMessage]
+
 # Search cached events
 for event in self._events:
 if _match(event):
@@ -528,6 +530,8 @@ def _match(event):
 # Poll for new events
 while True:
 event = self._qmp.pull_event(wait=timeout)
+if event is None:
+break
 if _match(event):
 return event
 self._events.append(event)
-- 
2.21.1




Re: [PATCH v2 4/6] target/ppc: Add missing braces in ppc_radix64_partition_scoped_xlate()

2020-05-14 Thread Cédric Le Goater
On 5/14/20 12:57 AM, Greg Kurz wrote:
> As per CODING_STYLE.
> 
> Fixes: d04ea940c597 "target/ppc: Add support for Radix partition-scoped 
> translation"
> Signed-off-by: Greg Kurz 

Reviewed-by: Cédric Le Goater 

> ---
>  target/ppc/mmu-radix64.c |3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index 07f956c9864f..fb7dfe25ba6f 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -286,8 +286,9 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU 
> *cpu, int rwx,
>pate.dw0 & PRTBE_R_RPDS, h_raddr, h_page_size,
>, _cause, _addr) ||
>  ppc_radix64_check_prot(cpu, rwx, pte, _cause, h_prot, true)) {
> -if (pde_addr) /* address being translated was that of a guest pde */
> +if (pde_addr) { /* address being translated was that of a guest pde 
> */
>  fault_cause |= DSISR_PRTABLE_FAULT;
> +}
>  if (cause_excp) {
>  ppc_radix64_raise_hsi(cpu, rwx, eaddr, g_raddr, fault_cause);
>  }
> 




Re: [PATCH v2 6/6] target/ppc: Don't update radix PTE R/C bits with gdbstub

2020-05-14 Thread Cédric Le Goater
On 5/14/20 12:57 AM, Greg Kurz wrote:
> gdbstub shouldn't silently change guest visible state when doing address
> translation. Since the R/C bits can only be updated when handling a MMU
> fault, let's reuse the cause_excp flag and rename it to guest_visible.
> While here drop a not very useful comment.
> 
> This was found while reading the code. I could verify that this affects
> both powernv and pseries, but I failed to observe any actual bug.
> 
> Fixes: d04ea940c597 "target/ppc: Add support for Radix partition-scoped 
> translation"
> Signed-off-by: Greg Kurz 

Reviewed-by: Cédric Le Goater 

> ---
>  target/ppc/mmu-radix64.c |   39 +--
>  1 file changed, 21 insertions(+), 18 deletions(-)
> 
> diff --git a/target/ppc/mmu-radix64.c b/target/ppc/mmu-radix64.c
> index 7ce37cb778db..0d3922537c4c 100644
> --- a/target/ppc/mmu-radix64.c
> +++ b/target/ppc/mmu-radix64.c
> @@ -274,7 +274,7 @@ static int ppc_radix64_partition_scoped_xlate(PowerPCCPU 
> *cpu, int rwx,
>ppc_v3_pate_t pate,
>hwaddr *h_raddr, int *h_prot,
>int *h_page_size, bool 
> pde_addr,
> -  bool cause_excp)
> +  bool guest_visible)
>  {
>  int fault_cause = 0;
>  hwaddr pte_addr;
> @@ -289,14 +289,15 @@ static int 
> ppc_radix64_partition_scoped_xlate(PowerPCCPU *cpu, int rwx,
>  if (pde_addr) { /* address being translated was that of a guest pde 
> */
>  fault_cause |= DSISR_PRTABLE_FAULT;
>  }
> -if (cause_excp) {
> +if (guest_visible) {
>  ppc_radix64_raise_hsi(cpu, rwx, eaddr, g_raddr, fault_cause);
>  }
>  return 1;
>  }
>  
> -/* Update Reference and Change Bits */
> -ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot);
> +if (guest_visible) {
> +ppc_radix64_set_rc(cpu, rwx, pte, pte_addr, h_prot);
> +}
>  
>  return 0;
>  }
> @@ -305,7 +306,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
> *cpu, int rwx,
>  vaddr eaddr, uint64_t pid,
>  ppc_v3_pate_t pate, hwaddr 
> *g_raddr,
>  int *g_prot, int *g_page_size,
> -bool cause_excp)
> +bool guest_visible)
>  {
>  CPUState *cs = CPU(cpu);
>  CPUPPCState *env = >env;
> @@ -319,7 +320,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
> *cpu, int rwx,
>  size = 1ULL << ((pate.dw1 & PATE1_R_PRTS) + 12);
>  if (offset >= size) {
>  /* offset exceeds size of the process table */
> -if (cause_excp) {
> +if (guest_visible) {
>  ppc_radix64_raise_si(cpu, rwx, eaddr, DSISR_NOPTE);
>  }
>  return 1;
> @@ -340,7 +341,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
> *cpu, int rwx,
>  ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, prtbe_addr,
>   pate, _raddr, _prot,
>   _page_size, true,
> - cause_excp);
> + guest_visible);
>  if (ret) {
>  return ret;
>  }
> @@ -360,7 +361,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
> *cpu, int rwx,
>  _cause, _addr);
>  if (ret) {
>  /* No valid PTE */
> -if (cause_excp) {
> +if (guest_visible) {
>  ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
>  }
>  return ret;
> @@ -380,7 +381,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
> *cpu, int rwx,
>  ret = ppc_radix64_partition_scoped_xlate(cpu, 0, eaddr, pte_addr,
>   pate, _raddr, _prot,
>   _page_size, true,
> - cause_excp);
> + guest_visible);
>  if (ret) {
>  return ret;
>  }
> @@ -389,7 +390,7 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 
> *cpu, int rwx,
>   , g_page_size, , 
> _cause);
>  if (ret) {
>  /* No valid pte */
> -if (cause_excp) {
> +if (guest_visible) {
>  ppc_radix64_raise_si(cpu, rwx, eaddr, fault_cause);
>  }
>  return ret;
> @@ -406,13 +407,15 @@ static int ppc_radix64_process_scoped_xlate(PowerPCCPU 

Re: [RFC v2] migration: Add migrate-set-bitmap-node-mapping

2020-05-14 Thread Max Reitz
On 13.05.20 18:11, Eric Blake wrote:
> On 5/13/20 9:56 AM, Max Reitz wrote:
>> This command allows mapping block node names to aliases for the purpose
>> of block dirty bitmap migration.
>>
>> This way, management tools can use different node names on the source
>> and destination and pass the mapping of how bitmaps are to be
>> transferred to qemu (on the source, the destination, or even both with
>> arbitrary aliases in the migration stream).
>>
>> Suggested-by: Vladimir Sementsov-Ogievskiy 
>> Signed-off-by: Max Reitz 
>> ---
> 
>> @@ -713,6 +731,44 @@ static bool dirty_bitmap_has_postcopy(void *opaque)
>>   return true;
>>   }
>>   +void
>> qmp_migrate_set_bitmap_node_mapping(MigrationBlockNodeMappingList
>> *mapping,
>> + Error **errp)
>> +{
>> +    QDict *in_mapping = qdict_new();
>> +    QDict *out_mapping = qdict_new();
>> +
>> +    for (; mapping; mapping = mapping->next) {
>> +    MigrationBlockNodeMapping *entry = mapping->value;
>> +
>> +    if (qdict_haskey(out_mapping, entry->node_name)) {
>> +    error_setg(errp, "Cannot map node name '%s' twice",
>> +   entry->node_name);
>> +    goto fail;
>> +    }
> 
> Can we call this command more than once?  Is it cumulative (call it once
> to set mapping for "a", second time to also set mapping for "b"), or
> should it reset (second call wipes out all mappings from first call, any
> mappings that must exist must be passed in the final call)?

I tried to make it clear in the documentation:

> +# @mapping: The mapping; must be one-to-one, but not necessarily
> +#   complete.  Any mapping not given will be reset to the
> +#   default (i.e. the identity mapping).

So everything that isn’t set in the second call is reset.  I thought
about what you proposed (because I guess that’s the most intuitive
idea), but after consideration I didn’t see why we’d need different
behavior, so it would only serve to make the code more complicated.

Max

> The idea makes sense, and the interface seems usable.  It's nice that
> either source, destination, or both sides of migration can use it (which
> helps in upgrade vs. downgrade scenarios).



signature.asc
Description: OpenPGP digital signature


Re: [PATCH v2 4/5] vhost: check vring address before calling unmap

2020-05-14 Thread Jason Wang



On 2020/5/13 下午5:36, Dima Stepanov wrote:

On Wed, May 13, 2020 at 11:00:38AM +0800, Jason Wang wrote:

On 2020/5/12 下午5:08, Dima Stepanov wrote:

On Tue, May 12, 2020 at 11:26:11AM +0800, Jason Wang wrote:

On 2020/5/11 下午5:11, Dima Stepanov wrote:

On Mon, May 11, 2020 at 11:05:58AM +0800, Jason Wang wrote:

On 2020/4/30 下午9:36, Dima Stepanov wrote:

Since disconnect can happen at any time during initialization not all
vring buffers (for instance used vring) can be intialized successfully.
If the buffer was not initialized then vhost_memory_unmap call will lead
to SIGSEGV. Add checks for the vring address value before calling unmap.
Also add assert() in the vhost_memory_unmap() routine.

Signed-off-by: Dima Stepanov
---
  hw/virtio/vhost.c | 27 +--
  1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index ddbdc53..3ee50c4 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -314,6 +314,8 @@ static void vhost_memory_unmap(struct vhost_dev *dev, void 
*buffer,
 hwaddr len, int is_write,
 hwaddr access_len)
  {
+assert(buffer);
+
  if (!vhost_dev_has_iommu(dev)) {
  cpu_physical_memory_unmap(buffer, len, is_write, access_len);
  }
@@ -1132,12 +1134,25 @@ static void vhost_virtqueue_stop(struct vhost_dev *dev,
  vhost_vq_index);
  }
-vhost_memory_unmap(dev, vq->used, virtio_queue_get_used_size(vdev, idx),
-   1, virtio_queue_get_used_size(vdev, idx));
-vhost_memory_unmap(dev, vq->avail, virtio_queue_get_avail_size(vdev, idx),
-   0, virtio_queue_get_avail_size(vdev, idx));
-vhost_memory_unmap(dev, vq->desc, virtio_queue_get_desc_size(vdev, idx),
-   0, virtio_queue_get_desc_size(vdev, idx));
+/*
+ * Since the vhost-user disconnect can happen during initialization
+ * check if vring was initialized, before making unmap.
+ */
+if (vq->used) {
+vhost_memory_unmap(dev, vq->used,
+   virtio_queue_get_used_size(vdev, idx),
+   1, virtio_queue_get_used_size(vdev, idx));
+}
+if (vq->avail) {
+vhost_memory_unmap(dev, vq->avail,
+   virtio_queue_get_avail_size(vdev, idx),
+   0, virtio_queue_get_avail_size(vdev, idx));
+}
+if (vq->desc) {
+vhost_memory_unmap(dev, vq->desc,
+   virtio_queue_get_desc_size(vdev, idx),
+   0, virtio_queue_get_desc_size(vdev, idx));
+}

Any reason not checking hdev->started instead? vhost_dev_start() will set it
to true if virtqueues were correctly mapped.

Thanks

Well i see it a little bit different:
  - vhost_dev_start() sets hdev->started to true before starting
virtqueues
  - vhost_virtqueue_start() maps all the memory
If we hit the vhost disconnect at the start of the
vhost_virtqueue_start(), for instance for this call:
   r = dev->vhost_ops->vhost_set_vring_base(dev, );
Then we will call vhost_user_blk_disconnect:
   vhost_user_blk_disconnect()->
 vhost_user_blk_stop()->
   vhost_dev_stop()->
 vhost_virtqueue_stop()
As a result we will come in this routine with the hdev->started still
set to true, but if used/avail/desc fields still uninitialized and set
to 0.

I may miss something, but consider both vhost_dev_start() and
vhost_user_blk_disconnect() were serialized in main loop. Can this really
happen?

Yes, consider the case when we start the vhost-user-blk device:
   vhost_dev_start->
 vhost_virtqueue_start
And we got a disconnect in the middle of vhost_virtqueue_start()
routine, for instance:
   1000 vq->num = state.num = virtio_queue_get_num(vdev, idx);
   1001 r = dev->vhost_ops->vhost_set_vring_num(dev, );
   1002 if (r) {
   1003 VHOST_OPS_DEBUG("vhost_set_vring_num failed");
   1004 return -errno;
   1005 }
   --> Here we got a disconnect <--
   1006
   1007 state.num = virtio_queue_get_last_avail_idx(vdev, idx);
   1008 r = dev->vhost_ops->vhost_set_vring_base(dev, );
   1009 if (r) {
   1010 VHOST_OPS_DEBUG("vhost_set_vring_base failed");
   1011 return -errno;
   1012 }
As a result call to vhost_set_vring_base will call the disconnect
routine. The backtrace log for SIGSEGV is as follows:
   Thread 4 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
   [Switching to Thread 0x72ea9700 (LWP 183150)]
   0x74d60840 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
   (gdb) bt
   #0  0x74d60840 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
   #1  0x5590fd90 in flatview_write_continue (fv=0x7fffec4a2600,
   addr=0, attrs=..., ptr=0x0, len=1028, addr1=0,
   l=1028, mr=0x56b1b310) at ./exec.c:3142
   #2  0x5590fe98 in flatview_write 

[Bug 1805256] Re: qemu-img hangs on rcu_call_ready_event logic in Aarch64 when converting images

2020-05-14 Thread Andrew Cloke
** Changed in: kunpeng920
   Status: Triaged => In Progress

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1805256

Title:
  qemu-img hangs on rcu_call_ready_event logic in Aarch64 when
  converting images

Status in kunpeng920:
  In Progress
Status in kunpeng920 ubuntu-18.04 series:
  Triaged
Status in kunpeng920 ubuntu-18.04-hwe series:
  Triaged
Status in kunpeng920 ubuntu-19.10 series:
  Triaged
Status in kunpeng920 ubuntu-20.04 series:
  Triaged
Status in kunpeng920 upstream-kernel series:
  Fix Committed
Status in QEMU:
  Fix Released
Status in qemu package in Ubuntu:
  In Progress
Status in qemu source package in Bionic:
  In Progress
Status in qemu source package in Disco:
  In Progress
Status in qemu source package in Eoan:
  In Progress
Status in qemu source package in Focal:
  In Progress

Bug description:
  [Impact]

  * QEMU locking primitives might face a race condition in QEMU Async
  I/O bottom halves scheduling. This leads to a dead lock making either
  QEMU or one of its tools to hang indefinitely.

  [Test Case]

  * qemu-img convert -f qcow2 -O qcow2 ./disk01.qcow2 ./output.qcow2

  Hangs indefinitely approximately 30% of the runs in Aarch64.

  [Regression Potential]

  * This is a change to a core part of QEMU: The AIO scheduling. It
  works like a "kernel" scheduler, whereas kernel schedules OS tasks,
  the QEMU AIO code is responsible to schedule QEMU coroutines or event
  listeners callbacks.

  * There was a long discussion upstream about primitives and Aarch64.
  After quite sometime Paolo released this patch and it solves the
  issue. Tested platforms were: amd64 and aarch64 based on his commit
  log.

  * Christian suggests that this fix stay little longer in -proposed to
  make sure it won't cause any regressions.

  * dannf suggests we also check for performance regressions; e.g. how
  long it takes to convert a cloud image on high-core systems.

  [Other Info]

   * Original Description bellow:

  Command:

  qemu-img convert -f qcow2 -O qcow2 ./disk01.qcow2 ./output.qcow2

  Hangs indefinitely approximately 30% of the runs.

  

  Workaround:

  qemu-img convert -m 1 -f qcow2 -O qcow2 ./disk01.qcow2 ./output.qcow2

  Run "qemu-img convert" with "a single coroutine" to avoid this issue.

  

  (gdb) thread 1
  ...
  (gdb) bt
  #0 0xbf1ad81c in __GI_ppoll
  #1 0xaabcf73c in ppoll
  #2 qemu_poll_ns
  #3 0xaabd0764 in os_host_main_loop_wait
  #4 main_loop_wait
  ...

  (gdb) thread 2
  ...
  (gdb) bt
  #0 syscall ()
  #1 0xaabd41cc in qemu_futex_wait
  #2 qemu_event_wait (ev=ev@entry=0xaac86ce8 )
  #3 0xaabed05c in call_rcu_thread
  #4 0xaabd34c8 in qemu_thread_start
  #5 0xbf25c880 in start_thread
  #6 0xbf1b6b9c in thread_start ()

  (gdb) thread 3
  ...
  (gdb) bt
  #0 0xbf11aa20 in __GI___sigtimedwait
  #1 0xbf2671b4 in __sigwait
  #2 0xaabd1ddc in sigwait_compat
  #3 0xaabd34c8 in qemu_thread_start
  #4 0xbf25c880 in start_thread
  #5 0xbf1b6b9c in thread_start

  

  (gdb) run
  Starting program: /usr/bin/qemu-img convert -f qcow2 -O qcow2
  ./disk01.ext4.qcow2 ./output.qcow2

  [New Thread 0xbec5ad90 (LWP 72839)]
  [New Thread 0xbe459d90 (LWP 72840)]
  [New Thread 0xbdb57d90 (LWP 72841)]
  [New Thread 0xacac9d90 (LWP 72859)]
  [New Thread 0xa7ffed90 (LWP 72860)]
  [New Thread 0xa77fdd90 (LWP 72861)]
  [New Thread 0xa6ffcd90 (LWP 72862)]
  [New Thread 0xa67fbd90 (LWP 72863)]
  [New Thread 0xa5ffad90 (LWP 72864)]

  [Thread 0xa5ffad90 (LWP 72864) exited]
  [Thread 0xa6ffcd90 (LWP 72862) exited]
  [Thread 0xa77fdd90 (LWP 72861) exited]
  [Thread 0xbdb57d90 (LWP 72841) exited]
  [Thread 0xa67fbd90 (LWP 72863) exited]
  [Thread 0xacac9d90 (LWP 72859) exited]
  [Thread 0xa7ffed90 (LWP 72860) exited]

  
  """

  All the tasks left are blocked in a system call, so no task left to call
  qemu_futex_wake() to unblock thread #2 (in futex()), which would unblock
  thread #1 (doing poll() in a pipe with thread #2).

  Those 7 threads exit before disk conversion is complete (sometimes in
  the beginning, sometimes at the end).

  

  On the HiSilicon D06 system - a 96 core NUMA arm64 box - qemu-img
  frequently hangs (~50% of the time) with this command:

  qemu-img convert -f qcow2 -O qcow2 /tmp/cloudimg /tmp/cloudimg2

  Where "cloudimg" is a standard qcow2 Ubuntu cloud image. This
  qcow2->qcow2 conversion happens to be something uvtool does every time
  it fetches images.

  Once hung, attaching gdb gives the following backtrace:

  (gdb) bt
  #0  0xae4f8154 in __GI_ppoll (fds=0xe8a67dc0, 
nfds=187650274213760,
  timeout=, timeout@entry=0x0, sigmask=0xc123b950)
  at ../sysdeps/unix/sysv/linux/ppoll.c:39
  #1  0xbbefaf00 in ppoll (__ss=0x0, __timeout=0x0, 

Re: Assertion failure through vring_split_desc_read

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 1:24 AM, John Snow wrote:



On 5/10/20 11:51 PM, Alexander Bulekov wrote:

Hello,
While fuzzing, I found an input that triggers an assertion failure
through virtio-rng -> vring_split_desc_read. Maybe this is related to:
Message-ID: <20200511033001.dzvtbdhl3oz5p...@mozz.bu.edu>
Assertion failure through virtio_lduw_phys_cached

#8 0x7fe6a9acf091 in __assert_fail 
/build/glibc-GwnBeO/glibc-2.30/assert/assert.c:101:3
#9 0x564cbe7d96fd in address_space_read_cached include/exec/memory.h:2423:5
#10 0x564cbe7e79c5 in vring_split_desc_read hw/virtio/virtio.c:236:5
#11 0x564cbe7e84ce in virtqueue_split_read_next_desc hw/virtio/virtio.c:929:5
#12 0x564cbe78f86b in virtqueue_split_get_avail_bytes hw/virtio/virtio.c:1009:18
#13 0x564cbe78ab22 in virtqueue_get_avail_bytes hw/virtio/virtio.c:1208:9
#14 0x564cc08aade1 in get_request_size hw/virtio/virtio-rng.c:40:5
#15 0x564cc08aa20b in virtio_rng_process hw/virtio/virtio-rng.c:115:12
#16 0x564cc08a8c48 in virtio_rng_set_status hw/virtio/virtio-rng.c:172:5
#17 0x564cbe7a50be in virtio_set_status hw/virtio/virtio.c:1876:9
#18 0x564cc08d1b8f in virtio_pci_common_write hw/virtio/virtio-pci.c:1245:9

I can reproduce it in a qemu 5.0 build using these qtest commands:
https://paste.debian.net/plain/1146089
(not including them here, as some are quite long)

wget https://paste.debian.net/plain/1146089 -O qtest-trace; 
~/Development/qemu/build/i386-softmmu/qemu-system-i386 -M pc-q35-5.0  -device 
virtio-rng-pci,addr=04.0 -display none -nodefaults -nographic -qtest stdio < 
qtest-trace

Please let me know if I can provide any further info.
-Alex



Do you have a writeup somewhere of how you are approaching fuzzing and
how you've found this pile of bugs so far?


There is docs/devel/fuzzing.txt:

https://git.qemu.org/?p=qemu.git;a=blob;f=docs/devel/fuzzing.txt;hb=v5.0.0



Might make for a good blog post.


Good idea!



--js







[PATCH RFC 21/32] python//machine.py: remove logging configuration

2020-05-14 Thread John Snow
Python 3.5 and above do not print a warning when logging is not
configured. As a library, it's best practice to leave logging
configuration to the client executable.

Signed-off-by: John Snow 
---
 python/qemu/lib/machine.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index c31bf7cabb..e92afe8649 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -110,9 +110,6 @@ def __init__(self, binary, args=None, wrapper=None, 
name=None,
 self._console_socket = None
 self._remove_files = []
 
-# just in case logging wasn't configured by the main script:
-logging.basicConfig()
-
 def __enter__(self):
 return self
 
-- 
2.21.1




[PATCH RFC 22/32] python//machine.py: Fix monitor address typing

2020-05-14 Thread John Snow
Prior to this, it's difficult for mypy to intuit what the concrete type
of the monitor address is; it has difficulty inferring the type across
two variables.

Create _monitor_address as a property that always returns a valid
address to simply static type analysis.

To preserve our ability to clean up, use a simple boolean to indicate
whether or not we should try to clean up the sock file after execution.

Signed-off-by: John Snow 
---
 python/qemu/lib/machine.py | 45 --
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index e92afe8649..6a4aea7725 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -24,10 +24,14 @@
 import shutil
 import socket
 import tempfile
-from typing import Optional, Type
+from typing import (
+Optional,
+Type,
+)
 from types import TracebackType
 
 from . import qmp
+from .qmp import SocketAddrT
 
 LOG = logging.getLogger(__name__)
 
@@ -61,7 +65,8 @@ class QEMUMachine:
 """
 
 def __init__(self, binary, args=None, wrapper=None, name=None,
- test_dir="/var/tmp", monitor_address=None,
+ test_dir="/var/tmp",
+ monitor_address: Optional[SocketAddrT] = None,
  socket_scm_helper=None, sock_dir=None):
 '''
 Initialize a QEMUMachine
@@ -84,8 +89,14 @@ def __init__(self, binary, args=None, wrapper=None, 
name=None,
 if sock_dir is None:
 sock_dir = test_dir
 self._name = name
-self._monitor_address = monitor_address
-self._vm_monitor = None
+if monitor_address is not None:
+self._monitor_address = monitor_address
+self._remove_monitor_sockfile = False
+else:
+self._monitor_address = os.path.join(
+sock_dir, f"{name}-monitor.sock"
+)
+self._remove_monitor_sockfile = True
 self._qemu_log_path = None
 self._qemu_log_file = None
 self._popen = None
@@ -223,15 +234,17 @@ def _load_io_log(self):
 
 def _base_args(self):
 args = ['-display', 'none', '-vga', 'none']
+
 if self._qmp_set:
 if isinstance(self._monitor_address, tuple):
-moncdev = "socket,id=mon,host=%s,port=%s" % (
-self._monitor_address[0],
-self._monitor_address[1])
+moncdev = "socket,id=mon,host={},port={}".format(
+*self._monitor_address
+)
 else:
-moncdev = 'socket,id=mon,path=%s' % self._vm_monitor
+moncdev = f"socket,id=mon,path={self._monitor_address}"
 args.extend(['-chardev', moncdev, '-mon',
  'chardev=mon,mode=control'])
+
 if self._machine is not None:
 args.extend(['-machine', self._machine])
 for _ in range(self._console_index):
@@ -256,14 +269,14 @@ def _pre_launch(self):
 self._qemu_log_file = open(self._qemu_log_path, 'wb')
 
 if self._qmp_set:
-if self._monitor_address is not None:
-self._vm_monitor = self._monitor_address
-else:
-self._vm_monitor = os.path.join(self._sock_dir,
-self._name + "-monitor.sock")
-self._remove_files.append(self._vm_monitor)
-self._qmp = qmp.QEMUMonitorProtocol(self._vm_monitor, server=True,
-nickname=self._name)
+if self._remove_monitor_sockfile:
+assert isinstance(self._monitor_address, str)
+self._remove_files.append(self._monitor_address)
+self._qmp = qmp.QEMUMonitorProtocol(
+self._monitor_address,
+server=True,
+nickname=self._name
+)
 
 def _post_launch(self):
 if self._qmp:
-- 
2.21.1




[PATCH RFC 17/32] python//qmp.py: Do not return None from cmd_obj

2020-05-14 Thread John Snow
This makes typing the qmp library difficult, as it necessitates wrapping
Optional[] around the type for every return type up the stack. At some
point, it becomes difficult to discern or remember why it's None instead
of the expected object.

Use the python exception system to tell us exactly why we didn't get an
object. Remove this special-cased return.

Signed-off-by: John Snow 
---
 python/qemu/lib/qmp.py | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index 82f86b4e45..0036204218 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -224,22 +224,18 @@ def accept(self, timeout=15.0):
 self.__sockfile = self.__sock.makefile(mode='r')
 return self.__negotiate_capabilities()
 
-def cmd_obj(self, qmp_cmd):
+def cmd_obj(self, qmp_cmd: QMPMessage) -> QMPMessage:
 """
 Send a QMP command to the QMP Monitor.
 
 @param qmp_cmd: QMP command to be sent as a Python dict
-@return QMP response as a Python dict or None if the connection has
-been closed
+@return QMP response as a Python dict
 """
 self.logger.debug(">>> %s", qmp_cmd)
-try:
-self.__sock.sendall(json.dumps(qmp_cmd).encode('utf-8'))
-except OSError as err:
-if err.errno == errno.EPIPE:
-return None
-raise err
+self.__sock.sendall(json.dumps(qmp_cmd).encode('utf-8'))
 resp = self.__json_read()
+if resp is None:
+raise QMPConnectError("Unexpected empty reply from server")
 self.logger.debug("<<< %s", resp)
 return resp
 
-- 
2.21.1




[PATCH RFC 23/32] python//machine.py: reorder __init__

2020-05-14 Thread John Snow
Put the init arg handling all at the top, and mostly in order (deviating
when one is dependent on another), and put what is effectively runtime
state declaration at the bottom.

Signed-off-by: John Snow 
---
 python/qemu/lib/machine.py | 29 +
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index 6a4aea7725..beb31be453 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -80,38 +80,43 @@ def __init__(self, binary, args=None, wrapper=None, 
name=None,
 @param socket_scm_helper: helper program, required for send_fd_scm()
 @note: Qemu process is not started until launch() is used.
 '''
+# Direct user configuration
+
+self._binary = binary
+
 if args is None:
 args = []
+# Copy mutable input: we will be modifying our copy
+self._args = list(args)
+
 if wrapper is None:
 wrapper = []
-if name is None:
-name = "qemu-%d" % os.getpid()
-if sock_dir is None:
-sock_dir = test_dir
-self._name = name
+self._wrapper = wrapper
+
+self._name = name or "qemu-%d" % os.getpid()
+self._test_dir = test_dir
+self._sock_dir = sock_dir or self._test_dir
+self._socket_scm_helper = socket_scm_helper
+
 if monitor_address is not None:
 self._monitor_address = monitor_address
 self._remove_monitor_sockfile = False
 else:
 self._monitor_address = os.path.join(
-sock_dir, f"{name}-monitor.sock"
+self._sock_dir, f"{self._name}-monitor.sock"
 )
 self._remove_monitor_sockfile = True
+
+# Runstate
 self._qemu_log_path = None
 self._qemu_log_file = None
 self._popen = None
-self._binary = binary
-self._args = list(args) # Force copy args in case we modify them
-self._wrapper = wrapper
 self._events = []
 self._iolog = None
-self._socket_scm_helper = socket_scm_helper
 self._qmp_set = True   # Enable QMP monitor by default.
 self._qmp = None
 self._qemu_full_args = None
-self._test_dir = test_dir
 self._temp_dir = None
-self._sock_dir = sock_dir
 self._launched = False
 self._machine = None
 self._console_index = 0
-- 
2.21.1




Re: [PATCH RFC 11/32] python/qemu/lib: remove Python2 style super() calls

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 7:53 AM, John Snow wrote:

Use the Python3 style instead.

Signed-off-by: John Snow 


Reviewed-by: Philippe Mathieu-Daudé 


---
  python/qemu/lib/machine.py |  2 +-
  python/qemu/lib/qtest.py   | 15 +++
  2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index 4b260fa2cb..b2f0412197 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -55,7 +55,7 @@ def __init__(self, reply):
  desc = reply["error"]["desc"]
  except KeyError:
  desc = reply
-super(MonitorResponseError, self).__init__(desc)
+super().__init__(desc)
  self.reply = reply
  
  
diff --git a/python/qemu/lib/qtest.py b/python/qemu/lib/qtest.py

index 53d814c064..7943487c2b 100644
--- a/python/qemu/lib/qtest.py
+++ b/python/qemu/lib/qtest.py
@@ -101,29 +101,28 @@ def __init__(self, binary, args=None, name=None, 
test_dir="/var/tmp",
  name = "qemu-%d" % os.getpid()
  if sock_dir is None:
  sock_dir = test_dir
-super(QEMUQtestMachine,
-  self).__init__(binary, args, name=name, test_dir=test_dir,
- socket_scm_helper=socket_scm_helper,
- sock_dir=sock_dir)
+super().__init__(binary, args, name=name, test_dir=test_dir,
+ socket_scm_helper=socket_scm_helper,
+ sock_dir=sock_dir)
  self._qtest = None
  self._qtest_path = os.path.join(sock_dir, name + "-qtest.sock")
  
  def _base_args(self):

-args = super(QEMUQtestMachine, self)._base_args()
+args = super()._base_args()
  args.extend(['-qtest', 'unix:path=' + self._qtest_path,
   '-accel', 'qtest'])
  return args
  
  def _pre_launch(self):

-super(QEMUQtestMachine, self)._pre_launch()
+super()._pre_launch()
  self._qtest = QEMUQtestProtocol(self._qtest_path, server=True)
  
  def _post_launch(self):

-super(QEMUQtestMachine, self)._post_launch()
+super()._post_launch()
  self._qtest.accept()
  
  def _post_shutdown(self):

-super(QEMUQtestMachine, self)._post_shutdown()
+super()._post_shutdown()
  self._remove_if_exists(self._qtest_path)
  
  def qtest(self, cmd):







Re: [PATCH 0/3] s390x: improve documentation

2020-05-14 Thread Cornelia Huck
On Tue,  5 May 2020 15:50:22 +0200
Cornelia Huck  wrote:

> The documentation for the s390x system emulation target still has quite
> a bit of room for improvement, so I started adding some device documentation.
> 
> I'm not quite happy with the long command/output lines in the 3270 and
> vfio-ccw sections, but don't know how to make that more readable. Suggestions
> welcome.
> 
> Cornelia Huck (3):
>   docs/s390x: document the virtual css
>   docs/s390x: document 3270
>   docs/s390x: document vfio-ccw
> 
>  docs/system/s390x/3270.rst | 32 +
>  docs/system/s390x/css.rst  | 64 ++
>  docs/system/s390x/vfio-ccw.rst | 58 ++
>  docs/system/target-s390x.rst   |  3 ++
>  4 files changed, 157 insertions(+)
>  create mode 100644 docs/system/s390x/3270.rst
>  create mode 100644 docs/system/s390x/css.rst
>  create mode 100644 docs/system/s390x/vfio-ccw.rst
> 

Anybody feel like taking a look?




Re: [PATCH v2 1/6] target/ppc: Pass const pointer to ppc_radix64_get_prot_amr()

2020-05-14 Thread Cédric Le Goater
On 5/14/20 12:56 AM, Greg Kurz wrote:
> This doesn't require write access to the CPU structure.
> 
> Signed-off-by: Greg Kurz 

Reviewed-by: Cédric Le Goater 

> ---
>  target/ppc/mmu-radix64.h |4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/target/ppc/mmu-radix64.h b/target/ppc/mmu-radix64.h
> index 96228546aa85..f28c5794d071 100644
> --- a/target/ppc/mmu-radix64.h
> +++ b/target/ppc/mmu-radix64.h
> @@ -55,9 +55,9 @@ static inline int ppc_radix64_get_prot_eaa(uint64_t pte)
> (pte & R_PTE_EAA_X ? PAGE_EXEC : 0);
>  }
>  
> -static inline int ppc_radix64_get_prot_amr(PowerPCCPU *cpu)
> +static inline int ppc_radix64_get_prot_amr(const PowerPCCPU *cpu)
>  {
> -CPUPPCState *env = >env;
> +const CPUPPCState *env = >env;
>  int amr = env->spr[SPR_AMR] >> 62; /* We only care about key0 AMR63:62 */
>  int iamr = env->spr[SPR_IAMR] >> 62; /* We only care about key0 
> IAMR63:62 */
>  
> 




Re: [PATCH v6 18/20] hw/block/nvme: factor out pmr setup

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 6:46 AM, Klaus Jensen wrote:

From: Klaus Jensen 

Signed-off-by: Klaus Jensen 
Reviewed-by: Maxim Levitsky 
---
  hw/block/nvme.c | 95 ++---
  1 file changed, 51 insertions(+), 44 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index d71a5f142d51..7254b66ae199 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -58,6 +58,7 @@
  #define NVME_REG_SIZE 0x1000
  #define NVME_DB_SIZE  4
  #define NVME_CMB_BIR 2
+#define NVME_PMR_BIR 2
  
  #define NVME_GUEST_ERR(trace, fmt, ...) \

  do { \
@@ -1463,6 +1464,55 @@ static void nvme_init_cmb(NvmeCtrl *n, PCIDevice 
*pci_dev)
   PCI_BASE_ADDRESS_MEM_PREFETCH, >ctrl_mem);
  }
  
+static void nvme_init_pmr(NvmeCtrl *n, PCIDevice *pci_dev)

+{
+/* Controller Capabilities register */
+NVME_CAP_SET_PMRS(n->bar.cap, 1);
+
+/* PMR Capabities register */
+n->bar.pmrcap = 0;
+NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
+NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
+NVME_PMRCAP_SET_BIR(n->bar.pmrcap, NVME_PMR_BIR);
+NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
+/* Turn on bit 1 support */
+NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02);
+NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0);
+NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0);
+
+/* PMR Control register */
+n->bar.pmrctl = 0;
+NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0);
+
+/* PMR Status register */
+n->bar.pmrsts = 0;
+NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0);
+NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0);
+NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0);
+NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0);
+
+/* PMR Elasticity Buffer Size register */
+n->bar.pmrebs = 0;
+NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0);
+NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0);
+NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0);
+
+/* PMR Sustained Write Throughput register */
+n->bar.pmrswtp = 0;
+NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0);
+NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0);
+
+/* PMR Memory Space Control register */
+n->bar.pmrmsc = 0;
+NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0);
+NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0);
+
+pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
+ PCI_BASE_ADDRESS_SPACE_MEMORY |
+ PCI_BASE_ADDRESS_MEM_TYPE_64 |
+ PCI_BASE_ADDRESS_MEM_PREFETCH, >pmrdev->mr);
+}
+
  static void nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev)
  {
  uint8_t *pci_conf = pci_dev->config;
@@ -1541,50 +1591,7 @@ static void nvme_realize(PCIDevice *pci_dev, Error 
**errp)
  if (n->params.cmb_size_mb) {
  nvme_init_cmb(n, pci_dev);
  } else if (n->pmrdev) {
-/* Controller Capabilities register */
-NVME_CAP_SET_PMRS(n->bar.cap, 1);
-
-/* PMR Capabities register */
-n->bar.pmrcap = 0;
-NVME_PMRCAP_SET_RDS(n->bar.pmrcap, 0);
-NVME_PMRCAP_SET_WDS(n->bar.pmrcap, 0);
-NVME_PMRCAP_SET_BIR(n->bar.pmrcap, 2);
-NVME_PMRCAP_SET_PMRTU(n->bar.pmrcap, 0);
-/* Turn on bit 1 support */
-NVME_PMRCAP_SET_PMRWBM(n->bar.pmrcap, 0x02);
-NVME_PMRCAP_SET_PMRTO(n->bar.pmrcap, 0);
-NVME_PMRCAP_SET_CMSS(n->bar.pmrcap, 0);
-
-/* PMR Control register */
-n->bar.pmrctl = 0;
-NVME_PMRCTL_SET_EN(n->bar.pmrctl, 0);
-
-/* PMR Status register */
-n->bar.pmrsts = 0;
-NVME_PMRSTS_SET_ERR(n->bar.pmrsts, 0);
-NVME_PMRSTS_SET_NRDY(n->bar.pmrsts, 0);
-NVME_PMRSTS_SET_HSTS(n->bar.pmrsts, 0);
-NVME_PMRSTS_SET_CBAI(n->bar.pmrsts, 0);
-
-/* PMR Elasticity Buffer Size register */
-n->bar.pmrebs = 0;
-NVME_PMREBS_SET_PMRSZU(n->bar.pmrebs, 0);
-NVME_PMREBS_SET_RBB(n->bar.pmrebs, 0);
-NVME_PMREBS_SET_PMRWBZ(n->bar.pmrebs, 0);
-
-/* PMR Sustained Write Throughput register */
-n->bar.pmrswtp = 0;
-NVME_PMRSWTP_SET_PMRSWTU(n->bar.pmrswtp, 0);
-NVME_PMRSWTP_SET_PMRSWTV(n->bar.pmrswtp, 0);
-
-/* PMR Memory Space Control register */
-n->bar.pmrmsc = 0;
-NVME_PMRMSC_SET_CMSE(n->bar.pmrmsc, 0);
-NVME_PMRMSC_SET_CBA(n->bar.pmrmsc, 0);
-
-pci_register_bar(pci_dev, NVME_PMRCAP_BIR(n->bar.pmrcap),
-PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
-PCI_BASE_ADDRESS_MEM_PREFETCH, >pmrdev->mr);
+nvme_init_pmr(n, pci_dev);
  }
  
  for (i = 0; i < n->num_namespaces; i++) {




Reviewed-by: Philippe Mathieu-Daudé 




[PATCH RFC 15/32] python//qmp.py: Define common types

2020-05-14 Thread John Snow
Define some common types that we'll need to annotate a lot of other
functions going forward.

Signed-off-by: John Snow 
---
 python/qemu/lib/qmp.py | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index a634c4e26c..911da59888 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -12,13 +12,31 @@
 import socket
 import logging
 from typing import (
+Any,
+Dict,
 Optional,
 TextIO,
 Type,
+Tuple,
+Union,
 )
 from types import TracebackType
 
 
+# QMPMessage is a QMP Message of any kind.
+# e.g. {'yee': 'haw'}
+#
+# QMPReturnValue is the inner value of return values only.
+# {'return': {}} is the QMPMessage,
+# {} is the QMPReturnValue.
+QMPMessage = Dict[str, Any]
+QMPReturnValue = Dict[str, Any]
+
+InternetAddrT = Tuple[str, str]
+UnixAddrT = str
+SocketAddrT = Union[InternetAddrT, UnixAddrT]
+
+
 class QMPError(Exception):
 """
 QMP base exception
-- 
2.21.1




[PATCH RFC 26/32] python//machine.py: use qmp.command

2020-05-14 Thread John Snow
machine.py and qmp.py both do the same thing here; refactor machine.py
to use qmp.py's functionality more directly.

Signed-off-by: John Snow 
---
 python/qemu/lib/machine.py | 26 +++---
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/python/qemu/lib/machine.py b/python/qemu/lib/machine.py
index 61ee3a0e81..34e6b6f9e9 100644
--- a/python/qemu/lib/machine.py
+++ b/python/qemu/lib/machine.py
@@ -25,6 +25,8 @@
 import socket
 import tempfile
 from typing import (
+Any,
+Dict,
 List,
 Optional,
 Type,
@@ -416,17 +418,23 @@ def set_qmp_monitor(self, enabled=True):
 self._qmp_set = False
 self._qmp = None
 
-def qmp(self, cmd, conv_keys=True, **args):
-"""
-Invoke a QMP command and return the response dict
-"""
+@classmethod
+def _qmp_args(cls, _conv_keys: bool = True, **args: Any) -> Dict[str, Any]:
 qmp_args = dict()
 for key, value in args.items():
-if conv_keys:
+if _conv_keys:
 qmp_args[key.replace('_', '-')] = value
 else:
 qmp_args[key] = value
+return qmp_args
 
+def qmp(self, cmd: str,
+conv_keys: bool = True,
+**args: Any) -> QMPMessage:
+"""
+Invoke a QMP command and return the response dict
+"""
+qmp_args = self._qmp_args(conv_keys, **args)
 return self._qmp.cmd(cmd, args=qmp_args)
 
 def command(self, cmd, conv_keys=True, **args):
@@ -435,12 +443,8 @@ def command(self, cmd, conv_keys=True, **args):
 On success return the response dict.
 On failure raise an exception.
 """
-reply = self.qmp(cmd, conv_keys, **args)
-if reply is None:
-raise qmp.QMPError("Monitor is closed")
-if "error" in reply:
-raise qmp.QMPResponseError(reply)
-return reply["return"]
+qmp_args = self._qmp_args(conv_keys, **args)
+return self._qmp.command(cmd, **qmp_args)
 
 def get_qmp_event(self, wait=False):
 """
-- 
2.21.1




[PATCH RFC 09/32] python/qemu: add pylint to Pipfile

2020-05-14 Thread John Snow
A bug in pylint 2.5.1 and 2.5.2 causes false positives for
relative imports. This version is pinned at 2.5.0 until a fix is
available.

Signed-off-by: John Snow 
---
 python/Pipfile  |   1 +
 python/Pipfile.lock | 123 
 2 files changed, 124 insertions(+)
 create mode 100644 python/Pipfile.lock

diff --git a/python/Pipfile b/python/Pipfile
index 9534830b5e..ddb2b5a518 100644
--- a/python/Pipfile
+++ b/python/Pipfile
@@ -4,6 +4,7 @@ url = "https://pypi.org/simple;
 verify_ssl = true
 
 [dev-packages]
+pylint = "==2.5.0"
 
 [packages]
 
diff --git a/python/Pipfile.lock b/python/Pipfile.lock
new file mode 100644
index 00..e6faa832e4
--- /dev/null
+++ b/python/Pipfile.lock
@@ -0,0 +1,123 @@
+{
+"_meta": {
+"hash": {
+"sha256": 
"7815dedfd7481b645389153dd45e9adb82c72956d0efc74d8f087497624b75e4"
+},
+"pipfile-spec": 6,
+"requires": {
+"python_version": "3.6"
+},
+"sources": [
+{
+"name": "pypi",
+"url": "https://pypi.org/simple;,
+"verify_ssl": true
+}
+]
+},
+"default": {},
+"develop": {
+"astroid": {
+"hashes": [
+
"sha256:4c17cea3e592c21b6e222f673868961bad77e1f985cb1694ed077475a89229c1",
+
"sha256:d8506842a3faf734b81599c8b98dcc423de863adcc1999248480b18bd31a0f38"
+],
+"version": "==2.4.1"
+},
+"isort": {
+"hashes": [
+
"sha256:54da7e92468955c4fceacd0c86bd0ec997b0e1ee80d97f67c35a78b719dccab1",
+
"sha256:6e811fcb295968434526407adb8796944f1988c5b65e8139058f2014cbe100fd"
+],
+"version": "==4.3.21"
+},
+"lazy-object-proxy": {
+"hashes": [
+
"sha256:0c4b206227a8097f05c4dbdd323c50edf81f15db3b8dc064d08c62d37e1a504d",
+
"sha256:194d092e6f246b906e8f70884e620e459fc54db3259e60cf69a4d66c3fda3449",
+
"sha256:1be7e4c9f96948003609aa6c974ae59830a6baecc5376c25c92d7d697e684c08",
+
"sha256:4677f594e474c91da97f489fea5b7daa17b5517190899cf213697e48d3902f5a",
+
"sha256:48dab84ebd4831077b150572aec802f303117c8cc5c871e182447281ebf3ac50",
+
"sha256:5541cada25cd173702dbd99f8e22434105456314462326f06dba3e180f203dfd",
+
"sha256:59f79fef100b09564bc2df42ea2d8d21a64fdcda64979c0fa3db7bdaabaf6239",
+
"sha256:8d859b89baf8ef7f8bc6b00aa20316483d67f0b1cbf422f5b4dc56701c8f2ffb",
+
"sha256:9254f4358b9b541e3441b007a0ea0764b9d056afdeafc1a5569eee1cc6c1b9ea",
+
"sha256:9651375199045a358eb6741df3e02a651e0330be090b3bc79f6d0de31a80ec3e",
+
"sha256:97bb5884f6f1cdce0099f86b907aa41c970c3c672ac8b9c8352789e103cf3156",
+
"sha256:9b15f3f4c0f35727d3a0fba4b770b3c4ebbb1fa907dbcc046a1d2799f3edd142",
+
"sha256:a2238e9d1bb71a56cd710611a1614d1194dc10a175c1e08d75e1a7bcc250d442",
+
"sha256:a6ae12d08c0bf9909ce12385803a543bfe99b95fe01e752536a60af2b7797c62",
+
"sha256:ca0a928a3ddbc5725be2dd1cf895ec0a254798915fb3a36af0964a0a4149e3db",
+
"sha256:cb2c7c57005a6804ab66f106ceb8482da55f5314b7fcb06551db1edae4ad1531",
+
"sha256:d74bb8693bf9cf75ac3b47a54d716bbb1a92648d5f781fc799347cfc95952383",
+
"sha256:d945239a5639b3ff35b70a88c5f2f491913eb94871780ebfabb2568bd58afc5a",
+
"sha256:eba7011090323c1dadf18b3b689845fd96a61ba0a1dfbd7f24b921398affc357",
+
"sha256:efa1909120ce98bbb3777e8b6f92237f5d5c8ea6758efea36a473e1d38f7d3e4",
+
"sha256:f3900e8a5de27447acbf900b4750b0ddfd7ec1ea7fbaf11dfa911141bc522af0"
+],
+"version": "==1.4.3"
+},
+"mccabe": {
+"hashes": [
+
"sha256:ab8a6258860da4b6677da4bd2fe5dc2c659cff31b3ee4f7f5d64e79735b80d42",
+
"sha256:dd8d182285a0fe56bace7f45b5e7d1a6ebcbf524e8f3bd87eb0f125271b8831f"
+],
+"version": "==0.6.1"
+},
+"pylint": {
+"hashes": [
+
"sha256:588e114e3f9a1630428c35b7dd1c82c1c93e1b0e78ee312ae4724c5e1a1e0245",
+
"sha256:bd556ba95a4cf55a1fc0004c00cf4560b1e70598a54a74c6904d933c8f3bd5a8"
+],
+"index": "pypi",
+"version": "==2.5.0"
+},
+"six": {
+"hashes": [
+
"sha256:236bdbdce46e6e6a3d61a337c0f8b763ca1e8717c03b369e87a7ec7ce1319c0a",
+
"sha256:8f3cd2e254d8f793e7f3d6d9df77b92252b52637291d0f0da013c76ea2724b6c"
+],
+"version": "==1.14.0"
+},
+"toml": {
+"hashes": [
+
"sha256:229f81c57791a41d65e399fc06bf0848bab550a9dfd5ed66df18ce5f05e73d5c",
+

[PATCH RFC 14/32] python//qmp.py: use True/False for non/blocking modes

2020-05-14 Thread John Snow
The type system doesn't want integers.

Signed-off-by: John Snow 
---
 python/qemu/lib/qmp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/qemu/lib/qmp.py b/python/qemu/lib/qmp.py
index b91c9d5c1c..a634c4e26c 100644
--- a/python/qemu/lib/qmp.py
+++ b/python/qemu/lib/qmp.py
@@ -120,14 +120,14 @@ def __get_events(self, wait=False):
 """
 
 # Check for new events regardless and pull them into the cache:
-self.__sock.setblocking(0)
+self.__sock.setblocking(False)
 try:
 self.__json_read()
 except OSError as err:
 if err.errno == errno.EAGAIN:
 # No data available
 pass
-self.__sock.setblocking(1)
+self.__sock.setblocking(True)
 
 # Wait for new events, if needed.
 # if wait is 0.0, this means "no wait" and is also implicitly false.
-- 
2.21.1




Re: [PATCH v2 5/5] vhost: add device started check in migration set log

2020-05-14 Thread Jason Wang



On 2020/5/13 下午5:47, Dima Stepanov wrote:

 case CHR_EVENT_CLOSED:
 /* a close event may happen during a read/write, but vhost
  * code assumes the vhost_dev remains setup, so delay the
  * stop & clear to idle.
  * FIXME: better handle failure in vhost code, remove bh
  */
 if (s->watch) {
 AioContext *ctx = qemu_get_current_aio_context();

 g_source_remove(s->watch);
 s->watch = 0;
 qemu_chr_fe_set_handlers(>chr, NULL, NULL, NULL, NULL,
  NULL, NULL, false);

 aio_bh_schedule_oneshot(ctx, chr_closed_bh, opaque);
 }
 break;

I think it's time we dropped the FIXME and moved the handling to common
code. Jason? Marc-André?

I agree. Just to confirm, do you prefer bh or doing changes like what is
done in this series? It looks to me bh can have more easier codes.

Could it be a good idea just to make disconnect in the char device but
postphone clean up in the vhost-user-blk (or any other vhost-user
device) itself? So we are moving the postphone logic and decision from
the char device to vhost-user device. One of the idea i have is as
follows:
   - Put ourself in the INITIALIZATION state
   - Start these vhost-user "handshake" commands
   - If we got a disconnect error, perform disconnect, but don't clean up
 device (it will be clean up on the roll back). I can be done by
 checking the state in vhost_user_..._disconnect routine or smth like it



Any issue you saw just using the aio bh as Michael posted above.

Then we don't need to deal with the silent vhost_dev_stop() and we will 
have codes that is much more easier to understand.


Thank



   - vhost-user command returns error back to the _start() routine
   - Rollback in one place in the start() routine, by calling this
 postphoned clean up for the disconnect






Re: Questionable aspects of QEMU Error's design

2020-05-14 Thread Vladimir Sementsov-Ogievskiy

28.04.2020 08:20, Vladimir Sementsov-Ogievskiy wrote:

27.04.2020 18:36, Markus Armbruster wrote:

Markus Armbruster  writes:


Markus Armbruster  writes:


QEMU's Error was patterned after GLib's GError.  Differences include:

[...]

* Return value conventions

   Common: non-void functions return a distinct error value on failure
   when such a value can be defined.  Patterns:

   - Functions returning non-null pointers on success return null pointer
 on failure.

   - Functions returning non-negative integers on success return a
 negative error code on failure.

   Different: GLib discourages void functions, because these lead to
   awkward error checking code.  We have tons of them, and tons of
   awkward error checking code:

 Error *err = NULL;
 frobnicate(arg, );
 if (err) {
 ... recover ...
 error_propagate(errp, err);
 }

   instead of

 if (!frobnicate(arg, errp))
 ... recover ...
 }

   Can also lead to pointless creation of Error objects.

   I consider this a design mistake.  Can we still fix it?  We have more
   than 2000 void functions taking an Error ** parameter...

   Transforming code that receives and checks for errors with Coccinelle
   shouldn't be hard.  Transforming code that returns errors seems more
   difficult.  We need to transform explicit and implicit return to
   either return true or return false, depending on what we did to the
   @errp parameter on the way to the return.  Hmm.

[...]

To figure out what functions with an Error ** parameter return, I used
Coccinelle to find such function definitions and print the return types.
Summary of results:

    2155 void
 873 signed integer
 494 pointer
 153 bool
  33 unsigned integer
   6 enum
    -
    3714 total

I then used Coccinelle to find checked calls of void functions (passing
_fatal or _abort is not considered "checking" here).  These
calls become simpler if we make the functions return a useful value.  I
found a bit under 600 direct calls, and some 50 indirect calls.

Most frequent direct calls:

 127 object_property_set_bool
  27 qemu_opts_absorb_qdict
  16 visit_type_str
  14 visit_type_int
  10 visit_type_uint32

Let's have a closer look at object_property_set() & friends.  Out of
almost 1000 calls, some 150 are checked.  While I'm sure many of the
unchecked calls can't actually fail, I am concerned some unchecked calls
can.

If we adopt the convention to return a value that indicates success /
failure, we should consider converting object.h to it sooner rather than
later.

Please understand these are rough numbers from quick & dirty scripts.


FYI, I'm working on converting QemuOpts, QAPI visitors and QOM.  I keep
running into bugs.  So far:

 [PATCH v2 for-5.1 0/9] qemu-option: Fix corner cases and clean up
 [PATCH for-5.1 0/5] qobject: Minor spring cleaning
 [PATCH v2 00/14] Miscellaneous error handling fixes
 [PATCH 0/4] Subject: [PATCH 0/4] smbus: SPD fixes
 [PATCH 0/3] fuzz: Probably there is a better way to do this
 [PATCH v2 00/15] qapi: Spring cleaning
 [PATCH 00/11] More miscellaneous error handling fixes

I got another one coming for QOM and qdev before I can post the
conversion.

Vladimir, since the conversion will mess with error_propagate(), I'd
like to get it in before your auto-propagation work.



OK, just let me know when to regenerate the series, it's not hard.



Hi! Is all that merged? Should I resend now?

--
Best regards,
Vladimir



Re: RFC: use VFIO over a UNIX domain socket to implement device offloading

2020-05-14 Thread Alex Williamson
On Thu, 14 May 2020 09:32:15 -0700
John G Johnson  wrote:

>   Thanos and I have made some changes to the doc in response to the
> feedback we’ve received.  The biggest difference is that it is less reliant
> on the reader being familiar with the current VFIO implementation.  We’d
> appreciate any additional feedback you could give on the changes.  Thanks
> in advance.
> 
>   Thanos and JJ
> 
> 
> The link remains the same:
> 
> https://docs.google.com/document/d/1FspkL0hVEnZqHbdoqGLUpyC38rSk_7HhY471TsVwyK8/edit?usp=sharing

Hi,

I'm confused by VFIO_USER_ADD_MEMORY_REGION vs VFIO_USER_IOMMU_MAP_DMA.
The former seems intended to provide the server with access to the
entire GPA space, while the latter indicates an IOVA to GPA mapping of
those regions.  Doesn't this break the basic isolation of a vIOMMU?
This essentially says to me "here's all the guest memory, but please
only access these regions for which we're providing DMA mappings".
That invites abuse.

Also regarding VFIO_USER_ADD_MEMORY_REGION, it's not clear to me how
"an array of file descriptors will be sent as part of the message
meta-data" works.  Also consider s/SUB/DEL/.  Why is the Device ID in
the table specified as 0?  How does a client learn their Device ID?

VFIO_USER_DEVICE_GET_REGION_INFO (or anything else making use of a
capability chain), the cap_offset and next pointers within the chain
need to specify what their offset is relative to (ie. the start of the
packet, the start of the vfio compatible data structure, etc).  I
assume the latter for client compatibility.

Also on REGION_INFO, offset is specified as "the base offset to be
given to the mmap() call for regions with the MMAP attribute".  Base
offset from what?  Is the mmap performed on the socket fd?  Do we not
allow read/write, we need to use VFIO_USER_MMIO_READ/WRITE instead?
Why do we specify "MMIO" in those operations versus simply "REGION"?
Are we arbitrarily excluding support for I/O port regions or device
specific regions?  If these commands replace direct read and write to
an fd offset, how is PCI config space handled?

VFIO_USER_MMIO_READ specifies the count field is zero and the reply
will include the count specifying the amount of data read.  How does
the client specify how much data to read?  Via message size?

VFIO_USER_DMA_READ/WRITE, is the address a GPA or IOVA?  IMO the device
should only ever have access via IOVA, which implies a DMA mapping
exists for the device.  Can you provide an example of why we need these
commands since there seems little point to this interface if a device
cannot directly interact with VM memory.

The IOMMU commands should be unnecessary, a vIOMMU should be
transparent to the server by virtue that the device only knows about
IOVA mappings accessible to the device.  Requiring the client to expose
all memory to the server implies that the server must always be trusted.

Interrupt info format, s/type/index/, s/vector/subindex/

In addition to the unused ioctls, the entire concept of groups and
containers are not found in this specification.  To some degree that
makes sense and even mdevs and typically SR-IOV VFs have a 1:1 device
to group relationship.  However, the container is very much involved in
the development of migration support, where it's the container that
provides dirty bitmaps.  Since we're doing map and unmap without that
container concept here, perhaps we'd equally apply those APIs to this
same socket.  Thanks,

Alex




[PATCH v2 00/17] target/mips: FPU and other cleanups and improvements

2020-05-14 Thread Aleksandar Markovic
This series contains mostly cosmetic FPU cleanups aimed to
make source code recognition easier for tools like gdb, gcov,
calgrind, and others.

There is also a patch that refactors conversion from ieee to
mips fp exception flags. This refactoring will improve the
performance of almost all fp-related mips instructions, albait
very modestly (less that one percent).

Finally, there is a patch that frees mips_malta.c from
checkpatch warnings.

v1->v2:

  - added more demacroing

Aleksandar Markovic (17):
  target/mips: fpu: Demacro ADD.
  target/mips: fpu: Demacro SUB.
  target/mips: fpu: Demacro MUL.
  target/mips: fpu: Demacro DIV.
  target/mips: fpu: Remove now unused macro FLOAT_BINOP
  target/mips: fpu: Demacro MADD.
  target/mips: fpu: Demacro MSUB.
  target/mips: fpu: Demacro NMADD.
  target/mips: fpu: Demacro NMSUB.
  target/mips: fpu: Remove now unused UNFUSED_FMA and FLOAT_FMA macros
  target/mips: fpu: Demacro CLASS.
  target/mips: fpu: Remove now unused FLOAT_CLASS macro
  target/mips: fpu: Demacro RINT.
  target/mips: fpu: Remove now unused FLOAT_RINT macro
  target/mips: fpu: Name better paired-single variables
  target/mips: fpu: Refactor conversion from ieee to mips exception
flags
  hw/mips: Convert Malta "ifdef 0"-ed code to comments

 hw/mips/mips_malta.c |  20 +-
 target/mips/fpu_helper.c | 658 +++
 target/mips/internal.h   |   1 -
 target/mips/msa_helper.c |  77 +++--
 4 files changed, 517 insertions(+), 239 deletions(-)

-- 
2.20.1




[PATCH Kernel v20 3/8] vfio iommu: Cache pgsize_bitmap in struct vfio_iommu

2020-05-14 Thread Kirti Wankhede
Calculate and cache pgsize_bitmap when iommu->domain_list is updated
and iommu->external_domain is set for mdev device.
Add iommu->lock protection when cached pgsize_bitmap is accessed.

Signed-off-by: Kirti Wankhede 
Reviewed-by: Neo Jia 
---
 drivers/vfio/vfio_iommu_type1.c | 88 +++--
 1 file changed, 49 insertions(+), 39 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index fa735047b04d..de17787ffece 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -69,6 +69,7 @@ struct vfio_iommu {
struct rb_root  dma_list;
struct blocking_notifier_head notifier;
unsigned intdma_avail;
+   uint64_tpgsize_bitmap;
boolv2;
boolnesting;
 };
@@ -805,15 +806,14 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, 
struct vfio_dma *dma)
iommu->dma_avail++;
 }
 
-static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu)
+static void vfio_pgsize_bitmap(struct vfio_iommu *iommu)
 {
struct vfio_domain *domain;
-   unsigned long bitmap = ULONG_MAX;
 
-   mutex_lock(>lock);
+   iommu->pgsize_bitmap = ULONG_MAX;
+
list_for_each_entry(domain, >domain_list, next)
-   bitmap &= domain->domain->pgsize_bitmap;
-   mutex_unlock(>lock);
+   iommu->pgsize_bitmap &= domain->domain->pgsize_bitmap;
 
/*
 * In case the IOMMU supports page sizes smaller than PAGE_SIZE
@@ -823,12 +823,10 @@ static unsigned long vfio_pgsize_bitmap(struct vfio_iommu 
*iommu)
 * granularity while iommu driver can use the sub-PAGE_SIZE size
 * to map the buffer.
 */
-   if (bitmap & ~PAGE_MASK) {
-   bitmap &= PAGE_MASK;
-   bitmap |= PAGE_SIZE;
+   if (iommu->pgsize_bitmap & ~PAGE_MASK) {
+   iommu->pgsize_bitmap &= PAGE_MASK;
+   iommu->pgsize_bitmap |= PAGE_SIZE;
}
-
-   return bitmap;
 }
 
 static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
@@ -839,19 +837,28 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
size_t unmapped = 0;
int ret = 0, retries = 0;
 
-   mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
+   mutex_lock(>lock);
+
+   mask = ((uint64_t)1 << __ffs(iommu->pgsize_bitmap)) - 1;
+
+   if (unmap->iova & mask) {
+   ret = -EINVAL;
+   goto unlock;
+   }
+
+   if (!unmap->size || unmap->size & mask) {
+   ret = -EINVAL;
+   goto unlock;
+   }
 
-   if (unmap->iova & mask)
-   return -EINVAL;
-   if (!unmap->size || unmap->size & mask)
-   return -EINVAL;
if (unmap->iova + unmap->size - 1 < unmap->iova ||
-   unmap->size > SIZE_MAX)
-   return -EINVAL;
+   unmap->size > SIZE_MAX) {
+   ret = -EINVAL;
+   goto unlock;
+   }
 
WARN_ON(mask & PAGE_MASK);
 again:
-   mutex_lock(>lock);
 
/*
 * vfio-iommu-type1 (v1) - User mappings were coalesced together to
@@ -930,6 +937,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
blocking_notifier_call_chain(>notifier,
VFIO_IOMMU_NOTIFY_DMA_UNMAP,
_unmap);
+   mutex_lock(>lock);
goto again;
}
unmapped += dma->size;
@@ -1045,24 +1053,28 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
if (map->size != size || map->vaddr != vaddr || map->iova != iova)
return -EINVAL;
 
-   mask = ((uint64_t)1 << __ffs(vfio_pgsize_bitmap(iommu))) - 1;
-
-   WARN_ON(mask & PAGE_MASK);
-
/* READ/WRITE from device perspective */
if (map->flags & VFIO_DMA_MAP_FLAG_WRITE)
prot |= IOMMU_WRITE;
if (map->flags & VFIO_DMA_MAP_FLAG_READ)
prot |= IOMMU_READ;
 
-   if (!prot || !size || (size | iova | vaddr) & mask)
-   return -EINVAL;
+   mutex_lock(>lock);
 
-   /* Don't allow IOVA or virtual address wrap */
-   if (iova + size - 1 < iova || vaddr + size - 1 < vaddr)
-   return -EINVAL;
+   mask = ((uint64_t)1 << __ffs(iommu->pgsize_bitmap)) - 1;
 
-   mutex_lock(>lock);
+   WARN_ON(mask & PAGE_MASK);
+
+   if (!prot || !size || (size | iova | vaddr) & mask) {
+   ret = -EINVAL;
+   goto out_unlock;
+   }
+
+   /* Don't allow IOVA or virtual address wrap */
+   if (iova + size - 1 < iova || vaddr + size - 1 < vaddr) {
+   ret = -EINVAL;
+   goto out_unlock;
+   }
 
if (vfio_find_dma(iommu, iova, size)) {
ret = -EEXIST;
@@ -1668,6 +1680,7 @@ static 

[PATCH Kernel v20 8/8] vfio: Selective dirty page tracking if IOMMU backed device pins pages

2020-05-14 Thread Kirti Wankhede
Added a check such that only singleton IOMMU groups can pin pages.
>From the point when vendor driver pins any pages, consider IOMMU group
dirty page scope to be limited to pinned pages.

To optimize to avoid walking list often, added flag
pinned_page_dirty_scope to indicate if all of the vfio_groups for each
vfio_domain in the domain_list dirty page scope is limited to pinned
pages. This flag is updated on first pinned pages request for that IOMMU
group and on attaching/detaching group.

Signed-off-by: Kirti Wankhede 
Reviewed-by: Neo Jia 
---
 drivers/vfio/vfio.c |  13 +++--
 drivers/vfio/vfio_iommu_type1.c | 104 
 include/linux/vfio.h|   4 +-
 3 files changed, 109 insertions(+), 12 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 765e0e5d83ed..580099afeaff 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -85,6 +85,7 @@ struct vfio_group {
atomic_topened;
wait_queue_head_t   container_q;
boolnoiommu;
+   unsigned intdev_counter;
struct kvm  *kvm;
struct blocking_notifier_head   notifier;
 };
@@ -555,6 +556,7 @@ struct vfio_device *vfio_group_create_device(struct 
vfio_group *group,
 
mutex_lock(>device_lock);
list_add(>group_next, >device_list);
+   group->dev_counter++;
mutex_unlock(>device_lock);
 
return device;
@@ -567,6 +569,7 @@ static void vfio_device_release(struct kref *kref)
struct vfio_group *group = device->group;
 
list_del(>group_next);
+   group->dev_counter--;
mutex_unlock(>device_lock);
 
dev_set_drvdata(device->dev, NULL);
@@ -1945,6 +1948,9 @@ int vfio_pin_pages(struct device *dev, unsigned long 
*user_pfn, int npage,
if (!group)
return -ENODEV;
 
+   if (group->dev_counter > 1)
+   return -EINVAL;
+
ret = vfio_group_add_container_user(group);
if (ret)
goto err_pin_pages;
@@ -1952,7 +1958,8 @@ int vfio_pin_pages(struct device *dev, unsigned long 
*user_pfn, int npage,
container = group->container;
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
-   ret = driver->ops->pin_pages(container->iommu_data, user_pfn,
+   ret = driver->ops->pin_pages(container->iommu_data,
+group->iommu_group, user_pfn,
 npage, prot, phys_pfn);
else
ret = -ENOTTY;
@@ -2050,8 +2057,8 @@ int vfio_group_pin_pages(struct vfio_group *group,
driver = container->iommu_driver;
if (likely(driver && driver->ops->pin_pages))
ret = driver->ops->pin_pages(container->iommu_data,
-user_iova_pfn, npage,
-prot, phys_pfn);
+group->iommu_group, user_iova_pfn,
+npage, prot, phys_pfn);
else
ret = -ENOTTY;
 
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 3edb3c3e6170..a52c4ae8907b 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -73,6 +73,7 @@ struct vfio_iommu {
boolv2;
boolnesting;
booldirty_page_tracking;
+   boolpinned_page_dirty_scope;
 };
 
 struct vfio_domain {
@@ -100,6 +101,7 @@ struct vfio_group {
struct iommu_group  *iommu_group;
struct list_headnext;
boolmdev_group; /* An mdev group */
+   boolpinned_page_dirty_scope;
 };
 
 struct vfio_iova {
@@ -143,6 +145,10 @@ struct vfio_regions {
 
 static int put_pfn(unsigned long pfn, int prot);
 
+static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
+  struct iommu_group *iommu_group);
+
+static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu);
 /*
  * This code handles mapping and unmapping of user data buffers
  * into DMA'ble space using the IOMMU
@@ -590,11 +596,13 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, 
dma_addr_t iova,
 }
 
 static int vfio_iommu_type1_pin_pages(void *iommu_data,
+ struct iommu_group *iommu_group,
  unsigned long *user_pfn,
  int npage, int prot,
  unsigned long *phys_pfn)
 {
struct vfio_iommu *iommu = iommu_data;
+   struct vfio_group *group;
int i, j, ret;
unsigned long remote_vaddr;
struct vfio_dma *dma;
@@ 

[PATCH Kernel v20 6/8] vfio iommu: Update UNMAP_DMA ioctl to get dirty bitmap before unmap

2020-05-14 Thread Kirti Wankhede
DMA mapped pages, including those pinned by mdev vendor drivers, might
get unpinned and unmapped while migration is active and device is still
running. For example, in pre-copy phase while guest driver could access
those pages, host device or vendor driver can dirty these mapped pages.
Such pages should be marked dirty so as to maintain memory consistency
for a user making use of dirty page tracking.

To get bitmap during unmap, user should allocate memory for bitmap, set
it all zeros, set size of allocated memory, set page size to be
considered for bitmap and set flag VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP.

Signed-off-by: Kirti Wankhede 
Reviewed-by: Neo Jia 
---
 drivers/vfio/vfio_iommu_type1.c | 77 ++---
 include/uapi/linux/vfio.h   | 10 ++
 2 files changed, 75 insertions(+), 12 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index b76d3b14abfd..a1dc57bcece5 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -195,11 +195,15 @@ static void vfio_unlink_dma(struct vfio_iommu *iommu, 
struct vfio_dma *old)
 static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize)
 {
uint64_t npages = dma->size / pgsize;
+   size_t bitmap_size;
 
if (npages > DIRTY_BITMAP_PAGES_MAX)
return -EINVAL;
 
-   dma->bitmap = kvzalloc(DIRTY_BITMAP_BYTES(npages), GFP_KERNEL);
+   /* Allocate extra 64 bits which are used for bitmap manipulation */
+   bitmap_size = DIRTY_BITMAP_BYTES(npages) + sizeof(u64);
+
+   dma->bitmap = kvzalloc(bitmap_size, GFP_KERNEL);
if (!dma->bitmap)
return -ENOMEM;
 
@@ -999,23 +1003,25 @@ static int verify_bitmap_size(uint64_t npages, uint64_t 
bitmap_size)
 }
 
 static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
-struct vfio_iommu_type1_dma_unmap *unmap)
+struct vfio_iommu_type1_dma_unmap *unmap,
+struct vfio_bitmap *bitmap)
 {
-   uint64_t mask;
struct vfio_dma *dma, *dma_last = NULL;
-   size_t unmapped = 0;
+   size_t unmapped = 0, pgsize;
int ret = 0, retries = 0;
+   unsigned long pgshift;
 
mutex_lock(>lock);
 
-   mask = ((uint64_t)1 << __ffs(iommu->pgsize_bitmap)) - 1;
+   pgshift = __ffs(iommu->pgsize_bitmap);
+   pgsize = (size_t)1 << pgshift;
 
-   if (unmap->iova & mask) {
+   if (unmap->iova & (pgsize - 1)) {
ret = -EINVAL;
goto unlock;
}
 
-   if (!unmap->size || unmap->size & mask) {
+   if (!unmap->size || unmap->size & (pgsize - 1)) {
ret = -EINVAL;
goto unlock;
}
@@ -1026,9 +1032,15 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
goto unlock;
}
 
-   WARN_ON(mask & PAGE_MASK);
-again:
+   /* When dirty tracking is enabled, allow only min supported pgsize */
+   if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
+   (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) {
+   ret = -EINVAL;
+   goto unlock;
+   }
 
+   WARN_ON((pgsize - 1) & PAGE_MASK);
+again:
/*
 * vfio-iommu-type1 (v1) - User mappings were coalesced together to
 * avoid tracking individual mappings.  This means that the granularity
@@ -1066,6 +1078,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
ret = -EINVAL;
goto unlock;
}
+
dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
ret = -EINVAL;
@@ -1083,6 +1096,23 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
if (dma->task->mm != current->mm)
break;
 
+   if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
+   (dma_last != dma)) {
+
+   /*
+* mark all pages dirty if all pages are pinned and
+* mapped
+*/
+   if (dma->iommu_mapped)
+   bitmap_set(dma->bitmap, 0,
+  dma->size >> pgshift);
+
+   ret = update_user_bitmap(bitmap->data, dma,
+unmap->iova, pgsize);
+   if (ret)
+   break;
+   }
+
if (!RB_EMPTY_ROOT(>pfn_list)) {
struct vfio_iommu_type1_dma_unmap nb_unmap;
 
@@ -2447,17 +2477,40 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
 
} else if (cmd == VFIO_IOMMU_UNMAP_DMA) {
struct vfio_iommu_type1_dma_unmap unmap;
-   long ret;

Re: [PATCH v2 0/9] pc-bios: s390x: Cleanup part 1

2020-05-14 Thread no-reply
Patchew URL: 
https://patchew.org/QEMU/20200514123729.156283-1-fran...@linux.ibm.com/



Hi,

This series seems to have some coding style problems. See output below for
more information:

Message-id: 20200514123729.156283-1-fran...@linux.ibm.com
Subject: [PATCH v2 0/9] pc-bios: s390x: Cleanup part 1
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash
git rev-parse base > /dev/null || exit 0
git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram
./scripts/checkpatch.pl --mailback base..
=== TEST SCRIPT END ===

Switched to a new branch 'test'
5049518 pc-bios: s390x: Make u32 ptr check explicit
10aee2e pc-bios: s390x: Replace 0x00 with 0x0 or 0
168421e pc-bios: s390x: Use ebcdic2ascii table
51051af pc-bios: s390x: Move panic() into header and add infinite loop
9cf4c1c pc-bios: s390x: Use PSW masks where possible
e886137 pc-bios: s390x: Rename and use PSW_MASK_ZMODE constant
b10800a pc-bios: s390x: Get rid of magic offsets into the lowcore
a105085 pc-bios: s390x: Consolidate timing functions into time.h
3a83ebd pc-bios: s390x: cio.c cleanup and compile fix

=== OUTPUT BEGIN ===
1/9 Checking commit 3a83ebd32644 (pc-bios: s390x: cio.c cleanup and compile fix)
ERROR: code indent should never use tabs
#66: FILE: pc-bios/s390-ccw/cio.c:319:
+.pfch = 1,^I/* QEMU's cio implementation requires prefetch */$

ERROR: code indent should never use tabs
#67: FILE: pc-bios/s390-ccw/cio.c:320:
+.c64 = 1,^I/* QEMU's cio implementation requires 64-bit idaws */$

ERROR: code indent should never use tabs
#68: FILE: pc-bios/s390-ccw/cio.c:321:
+.lpm = 0xFF,^I/* All paths allowed */$

total: 3 errors, 0 warnings, 63 lines checked

Patch 1/9 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

2/9 Checking commit a105085b3794 (pc-bios: s390x: Consolidate timing functions 
into time.h)
WARNING: added, moved or deleted file(s), does MAINTAINERS need updating?
#102: 
new file mode 100644

total: 0 errors, 1 warnings, 167 lines checked

Patch 2/9 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
3/9 Checking commit b10800afbdc8 (pc-bios: s390x: Get rid of magic offsets into 
the lowcore)
ERROR: spaces required around that ':' (ctx:VxV)
#29: FILE: pc-bios/s390-ccw/cio.h:127:
+__u16 cssid:8;
^

ERROR: spaces required around that ':' (ctx:VxV)
#30: FILE: pc-bios/s390-ccw/cio.h:128:
+__u16 reserved:4;
   ^

ERROR: spaces required around that ':' (ctx:VxV)
#31: FILE: pc-bios/s390-ccw/cio.h:129:
+__u16 m:1;
^

ERROR: spaces required around that ':' (ctx:VxV)
#32: FILE: pc-bios/s390-ccw/cio.h:130:
+__u16 ssid:2;
   ^

ERROR: spaces required around that ':' (ctx:VxV)
#33: FILE: pc-bios/s390-ccw/cio.h:131:
+__u16 one:1;
  ^

total: 5 errors, 0 warnings, 37 lines checked

Patch 3/9 has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

4/9 Checking commit e886137b0e3c (pc-bios: s390x: Rename and use PSW_MASK_ZMODE 
constant)
5/9 Checking commit 9cf4c1c4626a (pc-bios: s390x: Use PSW masks where possible)
6/9 Checking commit 51051afe9a9e (pc-bios: s390x: Move panic() into header and 
add infinite loop)
7/9 Checking commit 168421e3c66e (pc-bios: s390x: Use ebcdic2ascii table)
8/9 Checking commit 10aee2ea877c (pc-bios: s390x: Replace 0x00 with 0x0 or 0)
9/9 Checking commit 5049518e6552 (pc-bios: s390x: Make u32 ptr check explicit)
=== OUTPUT END ===

Test command exited with code: 1


The full log is available at
http://patchew.org/logs/20200514123729.156283-1-fran...@linux.ibm.com/testing.checkpatch/?type=message.
---
Email generated automatically by Patchew [https://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [PATCH] msix: allow qword MSI-X table accesses

2020-05-14 Thread Philippe Mathieu-Daudé

On 5/14/20 5:16 PM, Michael S. Tsirkin wrote:

PCI spec says:

For all accesses to MSI-X Table and MSI-X PBA fields, software must use
aligned full DWORD or aligned full QWORD transactions; otherwise, the
result is undefined.

However, since MSI-X was converted to use memory API, QEMU
started blocking qword transactions, only allowing DWORD
ones. Guests do not seem to use QWORD accesses, but let's
be spec compliant.

Fixes: 95524ae8dc8f ("msix: convert to memory API")


9 years =)

Reviewed-by: Philippe Mathieu-Daudé 


Signed-off-by: Michael S. Tsirkin 
---
  hw/pci/msix.c | 6 ++
  1 file changed, 6 insertions(+)

diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 29187898f2..e6a5559038 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -199,6 +199,9 @@ static const MemoryRegionOps msix_table_mmio_ops = {
  .endianness = DEVICE_LITTLE_ENDIAN,
  .valid = {
  .min_access_size = 4,
+.max_access_size = 8,
+},
+.impl = {
  .max_access_size = 4,
  },
  };
@@ -227,6 +230,9 @@ static const MemoryRegionOps msix_pba_mmio_ops = {
  .endianness = DEVICE_LITTLE_ENDIAN,
  .valid = {
  .min_access_size = 4,
+.max_access_size = 8,
+},
+.impl = {
  .max_access_size = 4,
  },
  };






Re: [PATCH v2 4/5] migration/block-dirty-bitmap: fix bitmaps migration during mirror job

2020-05-14 Thread Eric Blake

On 12/19/19 2:51 AM, Vladimir Sementsov-Ogievskiy wrote:

Important thing for bitmap migration is to select destination block
node to obtain the migrated bitmap.

Prepatch, on source we use bdrv_get_device_or_node_name() to identify
the node, and on target we do bdrv_lookup_bs.
bdrv_get_device_or_node_name() returns blk name only for direct
children of blk. So, bitmaps of direct children of blks are migrated by
blk name and others - by node name.

Libvirt currently is unprepared to bitmap migration by node-name,
node-names are mostly auto-generated. So actually only migration by blk
name works.


It depends on whether -blockdev is in use.  With -blockdev, libvirt 
should be supplying a node name everywhere, without, it is only device 
names.




Now, consider classic libvirt migrations assisted by mirror block job:
mirror block job inserts filter, so our source is not a direct child of
blk, and bitmaps are migrated by node-names. And this just don't work.


Does Max' work to improve seeing through filters fix this?



Let's fix it by allowing use blk-name even if some implicit filters are
inserted.

Note, that we possibly want to allow explicit filters skipping too, but
this is another story.

Note2: we, of course, can't skip filters and use blk name to migrate
bitmaps in filtered node by blk name for this blk if these filters have
named bitmaps which should be migrated.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1652424


That bug has been marked CLOSED in the meantime, but it appears to be 
only because libvirt is now using -blockdev rather than the older drive, 
while the problem affects drive usage.



Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  migration/block-dirty-bitmap.c | 39 +-
  1 file changed, 38 insertions(+), 1 deletion(-)




Okay, after reading some more history on this project (the curse of 
coming up to speed after volunteering to become a co-maintainer), it 
looks like Max's idea replaces this patch altogether.  How much of the 
rest of the series is still important?


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3226
Virtualization:  qemu.org | libvirt.org




Re: [PATCH v2] bitmaps: Update maintainer

2020-05-14 Thread John Snow



On 5/14/20 2:00 PM, Eric Blake wrote:
> Dirty bitmaps are important to incremental backups, including exposure
> over NBD where I'm already maintainer.  Also, I'm aware that lately I
> have been doing as much code/review on bitmaps as John Snow who is
> trying to scale back in order to focus elsewhere; and many of the
> recent patches have come from Vladimir, who is also interested in
> taking on maintainer duties, but would like to start with
> co-maintainership.  Therefore, it's time to revamp the ownership of
> this category, as agreed between the three of us.

Great!

> 
> Signed-off-by: Eric Blake 
> ---
> 
> v2: further tweak to maintainership, update T: listing
> 
>  MAINTAINERS | 7 ---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/MAINTAINERS b/MAINTAINERS
> index d11f3cb97613..ae23062a51ac 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -2001,8 +2001,9 @@ F: qapi/transaction.json
>  T: git https://repo.or.cz/qemu/armbru.git block-next
> 
>  Dirty Bitmaps
> -M: John Snow 
> -R: Vladimir Sementsov-Ogievskiy 
> +M: Eric Blake 
> +M: Vladimir Sementsov-Ogievskiy 
> +R: John Snow 
>  L: qemu-bl...@nongnu.org
>  S: Supported
>  F: include/qemu/hbitmap.h
> @@ -2013,7 +2014,7 @@ F: migration/block-dirty-bitmap.c
>  F: util/hbitmap.c
>  F: tests/test-hbitmap.c
>  F: docs/interop/bitmaps.rst
> -T: git https://github.com/jnsnow/qemu.git bitmaps
> +T: git https://repo.or.cz/qemu/ericb.git bitmaps
> 
>  Character device backends
>  M: Marc-André Lureau 
> 

Acked-by: John Snow 
Reviewed-by: John Snow 

You'll want to work out repo access betwixt yourselves, but I'll leave
that detail for you to work out.

Thank you,
--js




Re: [PATCH 1/1] virtio-9pfs: don't truncate response

2020-05-14 Thread Stefano Stabellini
On Thu, 14 May 2020, Christian Schoenebeck wrote:
> Commit SHA-1 16724a173049ac29c7b5ade741da93a0f46edff7 introduced
> truncating the response to the currently available transport buffer size,
> which was supposed to fix an 9pfs error on Xen boot where transport buffer
> might still be smaller than required for response.
> 
> Unfortunately this change broke small reads (with less than 12 bytes).
> 
> To fix this introduced bug for virtio at least, let's revert this change
> for the virtio transport. Unlike with Xen, we should never come into
> this situation with virtio that the available transport buffer would be
> too small for delivering any response to client. So truncating the buffer
> is not necessary with virtio in the first place.
> 
> This bug still needs to be addressed for Xen appropriately though.
> 
> Fixes: 16724a173049ac29c7b5ade741da93a0f46edff7 (for virtio only)
> Fixes: https://bugs.launchpad.net/bugs/1877688 (for virtio only)
> Signed-off-by: Christian Schoenebeck 

Reviewed-by: Stefano Stabellini 


> ---
>  hw/9pfs/virtio-9p-device.c | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/9pfs/virtio-9p-device.c b/hw/9pfs/virtio-9p-device.c
> index 536447a355..bb6154945a 100644
> --- a/hw/9pfs/virtio-9p-device.c
> +++ b/hw/9pfs/virtio-9p-device.c
> @@ -154,16 +154,13 @@ static void virtio_init_in_iov_from_pdu(V9fsPDU *pdu, 
> struct iovec **piov,
>  VirtQueueElement *elem = v->elems[pdu->idx];
>  size_t buf_size = iov_size(elem->in_sg, elem->in_num);
>  
> -if (buf_size < P9_IOHDRSZ) {
> +if (buf_size < *size) {
>  VirtIODevice *vdev = VIRTIO_DEVICE(v);
>  
>  virtio_error(vdev,
> - "VirtFS reply type %d needs %zu bytes, buffer has %zu, 
> less than minimum",
> + "VirtFS reply type %d needs %zu bytes, buffer has %zu",
>   pdu->id + 1, *size, buf_size);
>  }
> -if (buf_size < *size) {
> -*size = buf_size;
> -}
>  
>  *piov = elem->in_sg;
>  *pniov = elem->in_num;
> -- 
> 2.20.1
> 



[PATCH 0/1] virtio-9pfs: don't truncate response

2020-05-14 Thread Christian Schoenebeck
The following patch reverts
SHA-1 16724a173049ac29c7b5ade741da93a0f46edff for the virtio backend.

Greg, it is intended as a quick fix for
https://bugs.launchpad.net/bugs/1877688 at least for virtio, for the
case the appropriate fix on Xen side might still take a while. Because
this bug is too serious to let it rest for too long.

In case Stefano comes up with a fix for Xen soon, you might just ignore
this patch and just revert SHA-1 16724a173049ac29c7b5ade741da93a0f46edff
entirely instead of course.

Christian Schoenebeck (1):
  virtio-9pfs: don't truncate response

 hw/9pfs/virtio-9p-device.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

-- 
2.20.1




[PATCH v2 09/17] target/mips: fpu: Demacro NMSUB.

2020-05-14 Thread Aleksandar Markovic
This is just a cosmetic change to enable tools like gcov, gdb,
callgrind, etc. to better display involved source code.

Signed-off-by: Aleksandar Markovic 
---
 target/mips/fpu_helper.c | 44 +++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/target/mips/fpu_helper.c b/target/mips/fpu_helper.c
index d4c065f281..927bac24ac 100644
--- a/target/mips/fpu_helper.c
+++ b/target/mips/fpu_helper.c
@@ -1495,7 +1495,6 @@ uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,  
   \
 update_fcr31(env, GETPC());  \
 return ((uint64_t)fsth0 << 32) | fst0;   \
 }
-FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c)
 #undef FLOAT_FMA
 
 uint64_t helper_float_madd_d(CPUMIPSState *env, uint64_t fst0,
@@ -1619,6 +1618,49 @@ uint64_t helper_float_nmadd_ps(CPUMIPSState *env, 
uint64_t fdt0,
 return ((uint64_t)fsth0 << 32) | fstl0;
 }
 
+uint64_t helper_float_nmsub_d(CPUMIPSState *env, uint64_t fst0,
+ uint64_t fst1, uint64_t fst2)
+{
+fst0 = float64_mul(fst0, fst1, >active_fpu.fp_status);
+fst0 = float64_sub(fst0, fst2, >active_fpu.fp_status);
+fst0 = float64_chs(fst0);
+
+update_fcr31(env, GETPC());
+return fst0;
+}
+
+uint32_t helper_float_nmsub_s(CPUMIPSState *env, uint32_t fst0,
+ uint32_t fst1, uint32_t fst2)
+{
+fst0 = float32_mul(fst0, fst1, >active_fpu.fp_status);
+fst0 = float32_sub(fst0, fst2, >active_fpu.fp_status);
+fst0 = float32_chs(fst0);
+
+update_fcr31(env, GETPC());
+return fst0;
+}
+
+uint64_t helper_float_nmsub_ps(CPUMIPSState *env, uint64_t fdt0,
+  uint64_t fdt1, uint64_t fdt2)
+{
+uint32_t fstl0 = fdt0 & 0X;
+uint32_t fsth0 = fdt0 >> 32;
+uint32_t fstl1 = fdt1 & 0X;
+uint32_t fsth1 = fdt1 >> 32;
+uint32_t fstl2 = fdt2 & 0X;
+uint32_t fsth2 = fdt2 >> 32;
+
+fstl0 = float32_mul(fstl0, fstl1, >active_fpu.fp_status);
+fstl0 = float32_sub(fstl0, fstl2, >active_fpu.fp_status);
+fstl0 = float32_chs(fstl0);
+fsth0 = float32_mul(fsth0, fsth1, >active_fpu.fp_status);
+fsth0 = float32_sub(fsth0, fsth2, >active_fpu.fp_status);
+fsth0 = float32_chs(fsth0);
+
+update_fcr31(env, GETPC());
+return ((uint64_t)fsth0 << 32) | fstl0;
+}
+
 
 #define FLOAT_FMADDSUB(name, bits, muladd_arg)  \
 uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env, \
-- 
2.20.1




[PATCH v2 17/17] hw/mips: Convert Malta "ifdef 0"-ed code to comments

2020-05-14 Thread Aleksandar Markovic
The checkpatch complain about "#ifdef 0". Convert corresponding
dead code to comments. In future, these cases could be converted
to some no-nonsense logging/tracing.

Signed-off-by: Aleksandar Markovic 
CC: Philippe Mathieu-Daudé 
---
 hw/mips/mips_malta.c | 20 
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index e4c4de1b4e..f91fa34b06 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -427,10 +427,12 @@ static uint64_t malta_fpga_read(void *opaque, hwaddr addr,
 break;
 
 default:
-#if 0
-printf("malta_fpga_read: Bad register offset 0x" TARGET_FMT_lx "\n",
-   addr);
-#endif
+/*
+ * Possible logging:
+ *
+ *printf("malta_fpga_read: Bad register offset 0x" TARGET_FMT_lx "\n",
+ *   addr);
+ */
 break;
 }
 return val;
@@ -515,10 +517,12 @@ static void malta_fpga_write(void *opaque, hwaddr addr,
 break;
 
 default:
-#if 0
-printf("malta_fpga_write: Bad register offset 0x" TARGET_FMT_lx "\n",
-   addr);
-#endif
+/*
+ * Possible logging:
+ *
+ *printf("malta_fpga_write: Bad register offset 0x" TARGET_FMT_lx "\n",
+ *   addr);
+ */
 break;
 }
 }
-- 
2.20.1




[PATCH v4 2/2] Makefile: remove old compatibility gunks

2020-05-14 Thread Claudio Fontana
Signed-off-by: Claudio Fontana 
Reviewed-by: Markus Armbruster 
Reviewed-by: Philippe Mathieu-Daudé 
---
 Makefile | 6 --
 1 file changed, 6 deletions(-)

diff --git a/Makefile b/Makefile
index 36a6454295..2873d59ea2 100644
--- a/Makefile
+++ b/Makefile
@@ -562,12 +562,6 @@ slirp/all: .git-submodule-status
CC="$(CC)" AR="$(AR)"   LD="$(LD)" RANLIB="$(RANLIB)"   \
CFLAGS="$(QEMU_CFLAGS) $(CFLAGS)" LDFLAGS="$(QEMU_LDFLAGS)")
 
-# Compatibility gunk to keep make working across the rename of targets
-# for recursion, to be removed some time after 4.1.
-subdir-dtc: dtc/all
-subdir-capstone: capstone/all
-subdir-slirp: slirp/all
-
 $(filter %/all, $(TARGET_DIRS_RULES)): libqemuutil.a $(common-obj-y) \
$(qom-obj-y)
 
-- 
2.16.4




Re: [PATCH v10 14/14] iotests: use python logging for iotests.log()

2020-05-14 Thread John Snow



On 5/14/20 6:06 AM, Kevin Wolf wrote:
> Am 14.05.2020 um 08:24 hat John Snow geschrieben:
>> On 3/31/20 9:44 AM, Kevin Wolf wrote:
>>> Am 31.03.2020 um 02:00 hat John Snow geschrieben:
 We can turn logging on/off globally instead of per-function.

 Remove use_log from run_job, and use python logging to turn on
 diffable output when we run through a script entry point.

 iotest 245 changes output order due to buffering reasons.


 An extended note on python logging:

 A NullHandler is added to `qemu.iotests` to stop output from being
 generated if this code is used as a library without configuring logging.
 A NullHandler is only needed at the root, so a duplicate handler is not
 needed for `qemu.iotests.diff_io`.

 When logging is not configured, messages at the 'WARNING' levels or
 above are printed with default settings. The NullHandler stops this from
 occurring, which is considered good hygiene for code used as a library.

 See https://docs.python.org/3/howto/logging.html#library-config

 When logging is actually enabled (always at the behest of an explicit
 call by a client script), a root logger is implicitly created at the
 root, which allows messages to propagate upwards and be handled/emitted
 from the root logger with default settings.

 When we want iotest logging, we attach a handler to the
 qemu.iotests.diff_io logger and disable propagation to avoid possible
 double-printing.

 For more information on python logging infrastructure, I highly
 recommend downloading the pip package `logging_tree`, which provides
 convenient visualizations of the hierarchical logging configuration
 under different circumstances.

 See https://pypi.org/project/logging_tree/ for more information.

 Signed-off-by: John Snow 
 Reviewed-by: Max Reitz 
>>>
>>> Should we enable logger if -d is given?
>>>
>>> Previously we had:
>>>
>>> $ ./check -d -T -raw 281
>>> [...]
>>> 281 not run: not suitable for this image format: raw
>>> 281  not run[15:39:03] [15:39:04]not suitable 
>>> for this image format: raw
>>> Not run: 281
>>>
>>> After this series, the first line of output from notrun() is missing.
>>> Not that I think it's important to have the line, but as long as we
>>> bother to call logger.warning(), I thought that maybe we want to be able
>>> to actually see the effect of it somehwere?
>>>
>>> Kevin
>>>
>>
>> Uh, okay. So this is weirder than I thought it was going to be!
>>
>> So, if you move the debug configuration up above the _verify calls,
>> you'll see the message printed out to the debug stream:
>>
>> DEBUG:qemu.iotests:iotests debugging messages active
>> WARNING:qemu.iotests:281 not run: not suitable for this image format: raw
>>
>> ...but if you omit the `-d` flag, the message vanishes into a black
>> hole. Did it always work like that ...?
> 
> Yes, this is how it used to work. It's a result of ./check only printing
> the test output with -d, and such log messages are basically just test
> output.
> 
> And I think it's exactly what we want: Without -d, you want only the
> summary, i.e. a single line that says "pass", "fail" or "notrun",
> potentially with a small note at the end of the line, but that's it.
> 

OK, maybe. So I guess what happens here is that if you don't use -d, the
output gets redirected to file, and that file is summarily deleted.

Your phrase "but as long as we bother to call logger.warning(), I
thought that maybe we want to be able to actually see the effect of it
somewhere" stuck with me -- I think you're right.

I kind of do expect that if I call a function called warning() that it's
gonna do some damage. principle of least surprise, etc.

So two things:

(1) Maybe the iotest logger ought to always use stderr, and we should
see any calls to warning() or error() even when debugging is off.

(2) These skip notifications are not warnings, they are informational
and can be disabled when `-d` is omitted. (Especially because they are
represented through another channel.)

--js


(I'll send the fixup for the simpler thing first, and you can take or
leave the second thing.)




Re: [PATCH] hw/ide: Make IDEDMAOps handlers take a const IDEDMA pointer

2020-05-14 Thread John Snow



On 5/12/20 3:49 PM, Philippe Mathieu-Daudé wrote:
> Handlers don't need to modify the IDEDMA structure.
> Make it const.
> 
> Signed-off-by: Philippe Mathieu-Daudé 

I'll trust your judgment. As long as it still compiles and passes
qtests, I'm happy if you're happy.

Acked-by: John Snow 

> ---
>  include/hw/ide/internal.h | 12 ++--
>  hw/ide/ahci.c | 18 +-
>  hw/ide/core.c |  6 +++---
>  hw/ide/macio.c|  6 +++---
>  hw/ide/pci.c  | 12 ++--
>  5 files changed, 27 insertions(+), 27 deletions(-)
> 
> diff --git a/include/hw/ide/internal.h b/include/hw/ide/internal.h
> index 55da35d768..1a7869e85d 100644
> --- a/include/hw/ide/internal.h
> +++ b/include/hw/ide/internal.h
> @@ -322,12 +322,12 @@ typedef enum { IDE_HD, IDE_CD, IDE_CFATA } IDEDriveKind;
>  
>  typedef void EndTransferFunc(IDEState *);
>  
> -typedef void DMAStartFunc(IDEDMA *, IDEState *, BlockCompletionFunc *);
> -typedef void DMAVoidFunc(IDEDMA *);
> -typedef int DMAIntFunc(IDEDMA *, bool);
> -typedef int32_t DMAInt32Func(IDEDMA *, int32_t len);
> -typedef void DMAu32Func(IDEDMA *, uint32_t);
> -typedef void DMAStopFunc(IDEDMA *, bool);
> +typedef void DMAStartFunc(const IDEDMA *, IDEState *, BlockCompletionFunc *);
> +typedef void DMAVoidFunc(const IDEDMA *);
> +typedef int DMAIntFunc(const IDEDMA *, bool);
> +typedef int32_t DMAInt32Func(const IDEDMA *, int32_t len);
> +typedef void DMAu32Func(const IDEDMA *, uint32_t);
> +typedef void DMAStopFunc(const IDEDMA *, bool);
>  
>  struct unreported_events {
>  bool eject_request;
> diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
> index 13d91e109a..168d34e9f2 100644
> --- a/hw/ide/ahci.c
> +++ b/hw/ide/ahci.c
> @@ -44,7 +44,7 @@ static int handle_cmd(AHCIState *s, int port, uint8_t slot);
>  static void ahci_reset_port(AHCIState *s, int port);
>  static bool ahci_write_fis_d2h(AHCIDevice *ad);
>  static void ahci_init_d2h(AHCIDevice *ad);
> -static int ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit);
> +static int ahci_dma_prepare_buf(const IDEDMA *dma, int32_t limit);
>  static bool ahci_map_clb_address(AHCIDevice *ad);
>  static bool ahci_map_fis_address(AHCIDevice *ad);
>  static void ahci_unmap_clb_address(AHCIDevice *ad);
> @@ -1338,7 +1338,7 @@ out:
>  }
>  
>  /* Transfer PIO data between RAM and device */
> -static void ahci_pio_transfer(IDEDMA *dma)
> +static void ahci_pio_transfer(const IDEDMA *dma)
>  {
>  AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
>  IDEState *s = >port.ifs[0];
> @@ -1397,7 +1397,7 @@ out:
>  }
>  }
>  
> -static void ahci_start_dma(IDEDMA *dma, IDEState *s,
> +static void ahci_start_dma(const IDEDMA *dma, IDEState *s,
> BlockCompletionFunc *dma_cb)
>  {
>  AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
> @@ -1406,7 +1406,7 @@ static void ahci_start_dma(IDEDMA *dma, IDEState *s,
>  dma_cb(s, 0);
>  }
>  
> -static void ahci_restart_dma(IDEDMA *dma)
> +static void ahci_restart_dma(const IDEDMA *dma)
>  {
>  /* Nothing to do, ahci_start_dma already resets s->io_buffer_offset.  */
>  }
> @@ -1415,7 +1415,7 @@ static void ahci_restart_dma(IDEDMA *dma)
>   * IDE/PIO restarts are handled by the core layer, but NCQ commands
>   * need an extra kick from the AHCI HBA.
>   */
> -static void ahci_restart(IDEDMA *dma)
> +static void ahci_restart(const IDEDMA *dma)
>  {
>  AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
>  int i;
> @@ -1432,7 +1432,7 @@ static void ahci_restart(IDEDMA *dma)
>   * Called in DMA and PIO R/W chains to read the PRDT.
>   * Not shared with NCQ pathways.
>   */
> -static int32_t ahci_dma_prepare_buf(IDEDMA *dma, int32_t limit)
> +static int32_t ahci_dma_prepare_buf(const IDEDMA *dma, int32_t limit)
>  {
>  AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
>  IDEState *s = >port.ifs[0];
> @@ -1453,7 +1453,7 @@ static int32_t ahci_dma_prepare_buf(IDEDMA *dma, 
> int32_t limit)
>   * Called via dma_buf_commit, for both DMA and PIO paths.
>   * sglist destruction is handled within dma_buf_commit.
>   */
> -static void ahci_commit_buf(IDEDMA *dma, uint32_t tx_bytes)
> +static void ahci_commit_buf(const IDEDMA *dma, uint32_t tx_bytes)
>  {
>  AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
>  
> @@ -1461,7 +1461,7 @@ static void ahci_commit_buf(IDEDMA *dma, uint32_t 
> tx_bytes)
>  ad->cur_cmd->status = cpu_to_le32(tx_bytes);
>  }
>  
> -static int ahci_dma_rw_buf(IDEDMA *dma, bool is_write)
> +static int ahci_dma_rw_buf(const IDEDMA *dma, bool is_write)
>  {
>  AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
>  IDEState *s = >port.ifs[0];
> @@ -1486,7 +1486,7 @@ static int ahci_dma_rw_buf(IDEDMA *dma, bool is_write)
>  return 1;
>  }
>  
> -static void ahci_cmd_done(IDEDMA *dma)
> +static void ahci_cmd_done(const IDEDMA *dma)
>  {
>  AHCIDevice *ad = DO_UPCAST(AHCIDevice, dma, dma);
>  
> diff --git a/hw/ide/core.c b/hw/ide/core.c
> index 689bb36409..d997a78e47 100644

[PATCH Kernel v20 0/8] Add UAPIs to support migration for VFIO devices

2020-05-14 Thread Kirti Wankhede
Hi,

This patch set adds:
* IOCTL VFIO_IOMMU_DIRTY_PAGES to get dirty pages bitmap with
  respect to IOMMU container rather than per device. All pages pinned by
  vendor driver through vfio_pin_pages external API has to be marked as
  dirty during  migration. When IOMMU capable device is present in the
  container and all pages are pinned and mapped, then all pages are marked
  dirty.
  When there are CPU writes, CPU dirty page tracking can identify dirtied
  pages, but any page pinned by vendor driver can also be written by
  device. As of now there is no device which has hardware support for
  dirty page tracking. So all pages which are pinned should be considered
  as dirty.
  This ioctl is also used to start/stop dirty pages tracking for pinned and
  unpinned pages while migration is active.

* Updated IOCTL VFIO_IOMMU_UNMAP_DMA to get dirty pages bitmap before
  unmapping IO virtual address range.
  With vIOMMU, during pre-copy phase of migration, while CPUs are still
  running, IO virtual address unmap can happen while device still keeping
  reference of guest pfns. Those pages should be reported as dirty before
  unmap, so that VFIO user space application can copy content of those
  pages from source to destination.

* Patch 8 detect if IOMMU capable device driver is smart to report pages
  to be marked dirty by pinning pages using vfio_pin_pages() API.


Yet TODO:
Since there is no device which has hardware support for system memmory
dirty bitmap tracking, right now there is no other API from vendor driver
to VFIO IOMMU module to report dirty pages. In future, when such hardware
support will be implemented, an API will be required such that vendor
driver could report dirty pages to VFIO module during migration phases.

Adding revision history from previous QEMU patch set to understand KABI
changes done till now

v19 -> v20
- Fixed ioctl to get dirty bitmap to get bitmap of multiple vfio_dmas
- Fixed unmap ioctl to get dirty bitmap of multiple vfio_dmas.
- Removed flag definition from migration capability.

v18 -> v19
- Updated migration capability with supported page sizes bitmap for dirty
  page tracking and  maximum bitmap size supported by kernel module.
- Added patch to calculate and cache pgsize_bitmap when iommu->domain_list
  is updated.
- Removed extra buffers added in previous version for bitmap manipulation
  and optimised the code.

v17 -> v18
- Add migration capability to the capability chain for VFIO_IOMMU_GET_INFO
  ioctl
- Updated UMAP_DMA ioctl to return bitmap of multiple vfio_dma

v16 -> v17
- Fixed errors reported by kbuild test robot  on i386

v15 -> v16
- Minor edits and nit picks (Auger Eric)
- On copying bitmap to user, re-populated bitmap only for pinned pages,
  excluding unmapped pages and CPU dirtied pages.
- Patches are on tag: next-20200318 and 1-3 patches from Yan's series
  https://lkml.org/lkml/2020/3/12/1255

v14 -> v15
- Minor edits and nit picks.
- In the verification of user allocated bitmap memory, added check of
   maximum size.
- Patches are on tag: next-20200318 and 1-3 patches from Yan's series
  https://lkml.org/lkml/2020/3/12/1255

v13 -> v14
- Added struct vfio_bitmap to kabi. updated structure
  vfio_iommu_type1_dirty_bitmap_get and vfio_iommu_type1_dma_unmap.
- All small changes suggested by Alex.
- Patches are on tag: next-20200318 and 1-3 patches from Yan's series
  https://lkml.org/lkml/2020/3/12/1255

v12 -> v13
- Changed bitmap allocation in vfio_iommu_type1 to per vfio_dma
- Changed VFIO_IOMMU_DIRTY_PAGES ioctl behaviour to be per vfio_dma range.
- Changed vfio_iommu_type1_dirty_bitmap structure to have separate data
  field.

v11 -> v12
- Changed bitmap allocation in vfio_iommu_type1.
- Remove atomicity of ref_count.
- Updated comments for migration device state structure about error
  reporting.
- Nit picks from v11 reviews

v10 -> v11
- Fix pin pages API to free vpfn if it is marked as unpinned tracking page.
- Added proposal to detect if IOMMU capable device calls external pin pages
  API to mark pages dirty.
- Nit picks from v10 reviews

v9 -> v10:
- Updated existing VFIO_IOMMU_UNMAP_DMA ioctl to get dirty pages bitmap
  during unmap while migration is active
- Added flag in VFIO_IOMMU_GET_INFO to indicate driver support dirty page
  tracking.
- If iommu_mapped, mark all pages dirty.
- Added unpinned pages tracking while migration is active.
- Updated comments for migration device state structure with bit
  combination table and state transition details.

v8 -> v9:
- Split patch set in 2 sets, Kernel and QEMU.
- Dirty pages bitmap is queried from IOMMU container rather than from
  vendor driver for per device. Added 2 ioctls to achieve this.

v7 -> v8:
- Updated comments for KABI
- Added BAR address validation check during PCI device's config space load
  as suggested by Dr. David Alan Gilbert.
- Changed vfio_migration_set_state() to set or clear device state flags.
- Some nit fixes.

v6 -> v7:
- Fix build failures.

v5 -> v6:
- Fix build 

[PATCH Kernel v20 1/8] vfio: UAPI for migration interface for device state

2020-05-14 Thread Kirti Wankhede
- Defined MIGRATION region type and sub-type.

- Defined vfio_device_migration_info structure which will be placed at the
  0th offset of migration region to get/set VFIO device related
  information. Defined members of structure and usage on read/write access.

- Defined device states and state transition details.

- Defined sequence to be followed while saving and resuming VFIO device.

Signed-off-by: Kirti Wankhede 
Reviewed-by: Neo Jia 
---
 include/uapi/linux/vfio.h | 228 ++
 1 file changed, 228 insertions(+)

diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 015516bcfaa3..ad9bb5af3463 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -305,6 +305,7 @@ struct vfio_region_info_cap_type {
 #define VFIO_REGION_TYPE_PCI_VENDOR_MASK   (0x)
 #define VFIO_REGION_TYPE_GFX(1)
 #define VFIO_REGION_TYPE_CCW   (2)
+#define VFIO_REGION_TYPE_MIGRATION  (3)
 
 /* sub-types for VFIO_REGION_TYPE_PCI_* */
 
@@ -379,6 +380,233 @@ struct vfio_region_gfx_edid {
 /* sub-types for VFIO_REGION_TYPE_CCW */
 #define VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD  (1)
 
+/* sub-types for VFIO_REGION_TYPE_MIGRATION */
+#define VFIO_REGION_SUBTYPE_MIGRATION   (1)
+
+/*
+ * The structure vfio_device_migration_info is placed at the 0th offset of
+ * the VFIO_REGION_SUBTYPE_MIGRATION region to get and set VFIO device related
+ * migration information. Field accesses from this structure are only supported
+ * at their native width and alignment. Otherwise, the result is undefined and
+ * vendor drivers should return an error.
+ *
+ * device_state: (read/write)
+ *  - The user application writes to this field to inform the vendor driver
+ *about the device state to be transitioned to.
+ *  - The vendor driver should take the necessary actions to change the
+ *device state. After successful transition to a given state, the
+ *vendor driver should return success on write(device_state, state)
+ *system call. If the device state transition fails, the vendor driver
+ *should return an appropriate -errno for the fault condition.
+ *  - On the user application side, if the device state transition fails,
+ *   that is, if write(device_state, state) returns an error, read
+ *   device_state again to determine the current state of the device from
+ *   the vendor driver.
+ *  - The vendor driver should return previous state of the device unless
+ *the vendor driver has encountered an internal error, in which case
+ *the vendor driver may report the device_state 
VFIO_DEVICE_STATE_ERROR.
+ *  - The user application must use the device reset ioctl to recover the
+ *device from VFIO_DEVICE_STATE_ERROR state. If the device is
+ *indicated to be in a valid device state by reading device_state, the
+ *user application may attempt to transition the device to any valid
+ *state reachable from the current state or terminate itself.
+ *
+ *  device_state consists of 3 bits:
+ *  - If bit 0 is set, it indicates the _RUNNING state. If bit 0 is clear,
+ *it indicates the _STOP state. When the device state is changed to
+ *_STOP, driver should stop the device before write() returns.
+ *  - If bit 1 is set, it indicates the _SAVING state, which means that the
+ *driver should start gathering device state information that will be
+ *provided to the VFIO user application to save the device's state.
+ *  - If bit 2 is set, it indicates the _RESUMING state, which means that
+ *the driver should prepare to resume the device. Data provided through
+ *the migration region should be used to resume the device.
+ *  Bits 3 - 31 are reserved for future use. To preserve them, the user
+ *  application should perform a read-modify-write operation on this
+ *  field when modifying the specified bits.
+ *
+ *  +--- _RESUMING
+ *  |+-- _SAVING
+ *  ||+- _RUNNING
+ *  |||
+ *  000b => Device Stopped, not saving or resuming
+ *  001b => Device running, which is the default state
+ *  010b => Stop the device & save the device state, stop-and-copy state
+ *  011b => Device running and save the device state, pre-copy state
+ *  100b => Device stopped and the device state is resuming
+ *  101b => Invalid state
+ *  110b => Error state
+ *  111b => Invalid state
+ *
+ * State transitions:
+ *
+ *  _RESUMING  _RUNNINGPre-copyStop-and-copy   _STOP
+ *(100b) (001b) (011b)(010b)   (000b)
+ * 0. Running or default state
+ * |
+ *
+ * 1. Normal Shutdown (optional)
+ * |->|
+ *
+ * 2. Save the state or suspend
+ * |->|-->|
+ *
+ * 3. 

RE: [PATCH 6/6] migration/colo.c: Move colo_notify_compares_event to the right place

2020-05-14 Thread Zhanghailiang
> -Original Message-
> From: Lukas Straub [mailto:lukasstra...@web.de]
> Sent: Thursday, May 14, 2020 10:31 PM
> To: Zhanghailiang 
> Cc: qemu-devel ; Zhang Chen
> ; Juan Quintela ; Dr. David
> Alan Gilbert 
> Subject: Re: [PATCH 6/6] migration/colo.c: Move
> colo_notify_compares_event to the right place
> 
> On Thu, 14 May 2020 13:27:30 +
> Zhanghailiang  wrote:
> 
> > Cc: Zhang Chen 
> >
> > >
> > > If the secondary has to failover during checkpointing, it still is
> > > in the old state (i.e. different state than primary). Thus we can't
> > > expose the primary state until after the checkpoint is sent.
> > >
> >
> > Hmm, do you mean we should not flush the net packages to client
> > connection until checkpointing Process almost success because it may fail
> during checkpointing ?
> 
> No.
> If the primary fails/crashes during checkpointing, the secondary is still in
> different state than the primary because it didn't receive the full 
> checkpoint.
> We can release the miscompared packets only after both primary and
> secondary are in the same state.
> 
> Example:
> 1. Client opens a TCP connection, sends SYN.
> 2. Primary accepts the connection with SYN-ACK, but due to
> nondeterministic execution the secondary is delayed.
> 3. Checkpoint happens, primary releases the SYN-ACK packet but then
> crashes while sending the checkpoint.
> 4. The Secondary fails over. At this point it is still in the old state where 
> it
> hasn't sent the SYN-ACK packet.
> 5. The client responds with ACK to the SYN-ACK packet.
> 6. Because it doesn't know the connection, the secondary responds with RST,
> connection reset.
> 

Good example. For this patch, it is OK, I will add reviewed-by in your origin 
patch.


> Regards,
> Lukas Straub
> 
> > > This fixes sporadic connection reset of client connections during 
> > > failover.
> > >
> > > Signed-off-by: Lukas Straub 
> > > ---
> > >  migration/colo.c | 12 ++--
> > >  1 file changed, 6 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/migration/colo.c b/migration/colo.c index
> > > a69782efc5..a3fc21e86e 100644
> > > --- a/migration/colo.c
> > > +++ b/migration/colo.c
> > > @@ -430,12 +430,6 @@ static int
> > > colo_do_checkpoint_transaction(MigrationState *s,
> > >  goto out;
> > >  }
> > >
> > > -qemu_event_reset(>colo_checkpoint_event);
> > > -colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT,
> > > _err);
> > > -if (local_err) {
> > > -goto out;
> > > -}
> > > -
> > >  /* Disable block migration */
> > >  migrate_set_block_enabled(false, _err);
> > >  qemu_mutex_lock_iothread();
> > > @@ -494,6 +488,12 @@ static int
> > > colo_do_checkpoint_transaction(MigrationState *s,
> > >  goto out;
> > >  }
> > >
> > > +qemu_event_reset(>colo_checkpoint_event);
> > > +colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT,
> > > _err);
> > > +if (local_err) {
> > > +goto out;
> > > +}
> > > +
> > >  colo_receive_check_message(s->rp_state.from_dst_file,
> > > COLO_MESSAGE_VMSTATE_LOADED,
> _err);
> > >  if (local_err) {
> > > --
> > > 2.20.1




Re: [RFC 16/18] use errp for gmpo kvm_init

2020-05-14 Thread David Gibson
On Thu, May 14, 2020 at 06:09:46PM +0100, Dr. David Alan Gilbert wrote:
> * David Gibson (da...@gibson.dropbear.id.au) wrote:
[snip]
> > @@ -649,20 +649,20 @@ static int sev_kvm_init(GuestMemoryProtection *gmpo)
> >  devname = object_property_get_str(OBJECT(sev), "sev-device", NULL);
> >  sev->sev_fd = open(devname, O_RDWR);
> >  if (sev->sev_fd < 0) {
> > -error_report("%s: Failed to open %s '%s'", __func__,
> > - devname, strerror(errno));
> > -}
> > -g_free(devname);
> > -if (sev->sev_fd < 0) {
> > +g_free(devname);
> > +error_setg(errp, "%s: Failed to open %s '%s'", __func__,
> > +   devname, strerror(errno));
> > +g_free(devname);
> 
> You seem to have double free'd devname - would g_autofree work here?

Oops, fixed.  I'm not really familiar with the g_autofree stuff as
yet, so, maybe?

I also entirely forgot to write a non-placeholder commit message for
this patch.  Oops.

> other than that, looks OK to me.



> 
> Dave
> 
> >  goto err;
> >  }
> > +g_free(devname);
> >  
> >  ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, ,
> >   _error);
> >  if (ret) {
> > -error_report("%s: failed to get platform status ret=%d "
> > - "fw_error='%d: %s'", __func__, ret, fw_error,
> > - fw_error_to_str(fw_error));
> > +error_setg(errp, "%s: failed to get platform status ret=%d "
> > +   "fw_error='%d: %s'", __func__, ret, fw_error,
> > +   fw_error_to_str(fw_error));
> >  goto err;
> >  }
> >  sev->build_id = status.build;
> > @@ -672,14 +672,14 @@ static int sev_kvm_init(GuestMemoryProtection *gmpo)
> >  trace_kvm_sev_init();
> >  ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT, NULL, _error);
> >  if (ret) {
> > -error_report("%s: failed to initialize ret=%d fw_error=%d '%s'",
> > - __func__, ret, fw_error, fw_error_to_str(fw_error));
> > +error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d 
> > '%s'",
> > +   __func__, ret, fw_error, fw_error_to_str(fw_error));
> >  goto err;
> >  }
> >  
> >  ret = sev_launch_start(sev);
> >  if (ret) {
> > -error_report("%s: failed to create encryption context", __func__);
> > +error_setg(errp, "%s: failed to create encryption context", 
> > __func__);
> >  goto err;
> >  }
> >  

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [RFC 16/18] use errp for gmpo kvm_init

2020-05-14 Thread David Gibson
On Thu, May 14, 2020 at 06:09:46PM +0100, Dr. David Alan Gilbert wrote:
> Dave:
>   You've got some screwy mail headers here, the qemu-devel@nongnu.-rg is
> the best one anmd the p...@us.redhat.com is weird as well.

Yeah, apparently I forgot how to type when I entered my git-publish
command line :/.

> 
> * David Gibson (da...@gibson.dropbear.id.au) wrote:
> > ---
> >  accel/kvm/kvm-all.c|  4 +++-
> >  include/exec/guest-memory-protection.h |  2 +-
> >  target/i386/sev.c  | 32 +-
> >  3 files changed, 20 insertions(+), 18 deletions(-)
> > 
> > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> > index 5451728425..392ab02867 100644
> > --- a/accel/kvm/kvm-all.c
> > +++ b/accel/kvm/kvm-all.c
> > @@ -2045,9 +2045,11 @@ static int kvm_init(MachineState *ms)
> >  if (ms->gmpo) {
> >  GuestMemoryProtectionClass *gmpc =
> >  GUEST_MEMORY_PROTECTION_GET_CLASS(ms->gmpo);
> > +Error *local_err = NULL;
> >  
> > -ret = gmpc->kvm_init(ms->gmpo);
> > +ret = gmpc->kvm_init(ms->gmpo, _err);
> >  if (ret < 0) {
> > +error_report_err(local_err);
> >  goto err;
> >  }
> >  }
> > diff --git a/include/exec/guest-memory-protection.h 
> > b/include/exec/guest-memory-protection.h
> > index 7d959b4910..2a88475136 100644
> > --- a/include/exec/guest-memory-protection.h
> > +++ b/include/exec/guest-memory-protection.h
> > @@ -32,7 +32,7 @@ typedef struct GuestMemoryProtection 
> > GuestMemoryProtection;
> >  typedef struct GuestMemoryProtectionClass {
> >  InterfaceClass parent;
> >  
> > -int (*kvm_init)(GuestMemoryProtection *);
> > +int (*kvm_init)(GuestMemoryProtection *, Error **);
> >  int (*encrypt_data)(GuestMemoryProtection *, uint8_t *, uint64_t);
> >  } GuestMemoryProtectionClass;
> >  
> > diff --git a/target/i386/sev.c b/target/i386/sev.c
> > index 2051fae0c1..82f16b2f3b 100644
> > --- a/target/i386/sev.c
> > +++ b/target/i386/sev.c
> > @@ -617,7 +617,7 @@ sev_vm_state_change(void *opaque, int running, RunState 
> > state)
> >  }
> >  }
> >  
> > -static int sev_kvm_init(GuestMemoryProtection *gmpo)
> > +static int sev_kvm_init(GuestMemoryProtection *gmpo, Error **errp)
> >  {
> >  SevGuestState *sev = SEV_GUEST(gmpo);
> >  char *devname;
> > @@ -633,14 +633,14 @@ static int sev_kvm_init(GuestMemoryProtection *gmpo)
> >  host_cbitpos = ebx & 0x3f;
> >  
> >  if (host_cbitpos != sev->cbitpos) {
> > -error_report("%s: cbitpos check failed, host '%d' requested '%d'",
> > - __func__, host_cbitpos, sev->cbitpos);
> > +error_setg(errp, "%s: cbitpos check failed, host '%d' requested 
> > '%d'",
> > +   __func__, host_cbitpos, sev->cbitpos);
> >  goto err;
> >  }
> >  
> >  if (sev->reduced_phys_bits < 1) {
> > -error_report("%s: reduced_phys_bits check failed, it should be 
> > >=1,"
> > - " requested '%d'", __func__, sev->reduced_phys_bits);
> > +error_setg(errp, "%s: reduced_phys_bits check failed, it should be 
> > >=1,"
> > +   " requested '%d'", __func__, sev->reduced_phys_bits);
> >  goto err;
> >  }
> >  
> > @@ -649,20 +649,20 @@ static int sev_kvm_init(GuestMemoryProtection *gmpo)
> >  devname = object_property_get_str(OBJECT(sev), "sev-device", NULL);
> >  sev->sev_fd = open(devname, O_RDWR);
> >  if (sev->sev_fd < 0) {
> > -error_report("%s: Failed to open %s '%s'", __func__,
> > - devname, strerror(errno));
> > -}
> > -g_free(devname);
> > -if (sev->sev_fd < 0) {
> > +g_free(devname);
> > +error_setg(errp, "%s: Failed to open %s '%s'", __func__,
> > +   devname, strerror(errno));
> > +g_free(devname);
> 
> You seem to have double free'd devname - would g_autofree work here?
> 
> other than that, looks OK to me.
> 
> Dave
> 
> >  goto err;
> >  }
> > +g_free(devname);
> >  
> >  ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, ,
> >   _error);
> >  if (ret) {
> > -error_report("%s: failed to get platform status ret=%d "
> > - "fw_error='%d: %s'", __func__, ret, fw_error,
> > - fw_error_to_str(fw_error));
> > +error_setg(errp, "%s: failed to get platform status ret=%d "
> > +   "fw_error='%d: %s'", __func__, ret, fw_error,
> > +   fw_error_to_str(fw_error));
> >  goto err;
> >  }
> >  sev->build_id = status.build;
> > @@ -672,14 +672,14 @@ static int sev_kvm_init(GuestMemoryProtection *gmpo)
> >  trace_kvm_sev_init();
> >  ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT, NULL, _error);
> >  if (ret) {
> > -error_report("%s: failed to initialize ret=%d fw_error=%d '%s'",
> > - __func__, ret, fw_error, 

[Bug 1856335] Re: Cache Layout wrong on many Zen Arch CPUs

2020-05-14 Thread Jan Klos
The problem is that disabled cores are not taken into account.. ALL Zen2
CPUs have L3 cache group per CCX and every CCX has 4 cores, the problem
is that some cores in each CCX (1 for 6 and 12-core CPUs, 2 for 3100)
are disabled for some models, but they still use their core ids (as can
be seen in virsh capabilities | grep "cpu id" output in above comments).
Looking at target/i386/cpu.c:5529, this is not taken into account.

Maybe the cleanest way to fix this is to emulate the host topology by
also skipping disabled core ids in the VM? That way, die offset will
actually match the real host CPU topology...

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1856335

Title:
  Cache Layout wrong on many Zen Arch CPUs

Status in QEMU:
  New

Bug description:
  AMD CPUs have L3 cache per 2, 3 or 4 cores. Currently, TOPOEXT seems
  to always map Cache ass if it was an 4-Core per CCX CPU, which is
  incorrect, and costs upwards 30% performance (more realistically 10%)
  in L3 Cache Layout aware applications.

  Example on a 4-CCX CPU (1950X /w 8 Cores and no SMT):

    
  EPYC-IBPB
  AMD
  

  In windows, coreinfo reports correctly:

    Unified Cache 1, Level 3,8 MB, Assoc  16, LineSize  64
    Unified Cache 6, Level 3,8 MB, Assoc  16, LineSize  64

  On a 3-CCX CPU (3960X /w 6 cores and no SMT):

   
  EPYC-IBPB
  AMD
  

  in windows, coreinfo reports incorrectly:

  --  Unified Cache  1, Level 3,8 MB, Assoc  16, LineSize  64
  **  Unified Cache  6, Level 3,8 MB, Assoc  16, LineSize  64

  Validated against 3.0, 3.1, 4.1 and 4.2 versions of qemu-kvm.

  With newer Qemu there is a fix (that does behave correctly) in using the dies 
parameter:
   

  The problem is that the dies are exposed differently than how AMD does
  it natively, they are exposed to Windows as sockets, which means, that
  if you are nto a business user, you can't ever have a machine with
  more than two CCX (6 cores) as consumer versions of Windows only
  supports two sockets. (Should this be reported as a separate bug?)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1856335/+subscriptions



Re: [PATCH Kernel v20 5/8] vfio iommu: Implementation of ioctl for dirty pages tracking

2020-05-14 Thread Alex Williamson
On Fri, 15 May 2020 02:07:44 +0530
Kirti Wankhede  wrote:

> VFIO_IOMMU_DIRTY_PAGES ioctl performs three operations:
> - Start dirty pages tracking while migration is active
> - Stop dirty pages tracking.
> - Get dirty pages bitmap. Its user space application's responsibility to
>   copy content of dirty pages from source to destination during migration.
> 
> To prevent DoS attack, memory for bitmap is allocated per vfio_dma
> structure. Bitmap size is calculated considering smallest supported page
> size. Bitmap is allocated for all vfio_dmas when dirty logging is enabled
> 
> Bitmap is populated for already pinned pages when bitmap is allocated for
> a vfio_dma with the smallest supported page size. Update bitmap from
> pinning functions when tracking is enabled. When user application queries
> bitmap, check if requested page size is same as page size used to
> populated bitmap. If it is equal, copy bitmap, but if not equal, return
> error.
> 
> Signed-off-by: Kirti Wankhede 
> Reviewed-by: Neo Jia 
> 
> Fixed error reported by build bot by changing pgsize type from uint64_t
> to size_t.
> Reported-by: kbuild test robot 
> ---
>  drivers/vfio/vfio_iommu_type1.c | 294 
> +++-
>  1 file changed, 288 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index de17787ffece..b76d3b14abfd 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -72,6 +72,7 @@ struct vfio_iommu {
>   uint64_tpgsize_bitmap;
>   boolv2;
>   boolnesting;
> + booldirty_page_tracking;
>  };
>  
>  struct vfio_domain {
> @@ -92,6 +93,7 @@ struct vfio_dma {
>   boollock_cap;   /* capable(CAP_IPC_LOCK) */
>   struct task_struct  *task;
>   struct rb_root  pfn_list;   /* Ex-user pinned pfn list */
> + unsigned long   *bitmap;
>  };
>  
>  struct vfio_group {
> @@ -126,6 +128,19 @@ struct vfio_regions {
>  #define IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)  \
>   (!list_empty(>domain_list))
>  
> +#define DIRTY_BITMAP_BYTES(n)(ALIGN(n, BITS_PER_TYPE(u64)) / 
> BITS_PER_BYTE)
> +
> +/*
> + * Input argument of number of bits to bitmap_set() is unsigned integer, 
> which
> + * further casts to signed integer for unaligned multi-bit operation,
> + * __bitmap_set().
> + * Then maximum bitmap size supported is 2^31 bits divided by 2^3 bits/byte,
> + * that is 2^28 (256 MB) which maps to 2^31 * 2^12 = 2^43 (8TB) on 4K page
> + * system.
> + */
> +#define DIRTY_BITMAP_PAGES_MAX((u64)INT_MAX)
> +#define DIRTY_BITMAP_SIZE_MAX 
> DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
> +
>  static int put_pfn(unsigned long pfn, int prot);
>  
>  /*
> @@ -176,6 +191,74 @@ static void vfio_unlink_dma(struct vfio_iommu *iommu, 
> struct vfio_dma *old)
>   rb_erase(>node, >dma_list);
>  }
>  
> +
> +static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize)
> +{
> + uint64_t npages = dma->size / pgsize;
> +
> + if (npages > DIRTY_BITMAP_PAGES_MAX)
> + return -EINVAL;
> +
> + dma->bitmap = kvzalloc(DIRTY_BITMAP_BYTES(npages), GFP_KERNEL);
> + if (!dma->bitmap)
> + return -ENOMEM;
> +
> + return 0;
> +}
> +
> +static void vfio_dma_bitmap_free(struct vfio_dma *dma)
> +{
> + kfree(dma->bitmap);
> + dma->bitmap = NULL;
> +}
> +
> +static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize)
> +{
> + struct rb_node *p;
> +
> + for (p = rb_first(>pfn_list); p; p = rb_next(p)) {
> + struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn, node);
> +
> + bitmap_set(dma->bitmap, (vpfn->iova - dma->iova) / pgsize, 1);
> + }
> +}
> +
> +static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize)
> +{
> + struct rb_node *n = rb_first(>dma_list);
> +
> + for (; n; n = rb_next(n)) {
> + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
> + int ret;
> +
> + ret = vfio_dma_bitmap_alloc(dma, pgsize);
> + if (ret) {
> + struct rb_node *p = rb_prev(n);
> +
> + for (; p; p = rb_prev(p)) {
> + struct vfio_dma *dma = rb_entry(n,
> + struct vfio_dma, node);
> +
> + vfio_dma_bitmap_free(dma);
> + }
> + return ret;
> + }
> + vfio_dma_populate_bitmap(dma, pgsize);
> + }
> + return 0;
> +}
> +
> +static void vfio_dma_bitmap_free_all(struct vfio_iommu *iommu)
> +{
> + struct rb_node *n = rb_first(>dma_list);
> +
> + for (; n; n = rb_next(n)) {
> + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
> +
> +  

Re: [PATCH v3 1/2] chardev: enable distinct input for -chardev file

2020-05-14 Thread Markus Armbruster
Alexander Bulekov  writes:

> char-file already supports distinct paths for input/output but it was
> only possible to specify a distinct input through QMP. With this change,
> we can also specify a distinct input with the -chardev file argument:
> qemu -chardev file,id=char1,path=/out/file,pathin=/in/file
>
> Signed-off-by: Alexander Bulekov 
> Reviewed-by: Stefan Hajnoczi 
> Reviewed-by: Darren Kenny 
> ---
>  chardev/char-file.c | 5 +
>  chardev/char.c  | 3 +++
>  qemu-options.hx | 7 +--
>  3 files changed, 13 insertions(+), 2 deletions(-)
[...]
> diff --git a/qemu-options.hx b/qemu-options.hx
> index 292d4e7c0c..488961099b 100644
> --- a/qemu-options.hx
> +++ b/qemu-options.hx
> @@ -2938,7 +2938,7 @@ DEF("chardev", HAS_ARG, QEMU_OPTION_chardev,
>  "-chardev 
> vc,id=id[[,width=width][,height=height]][[,cols=cols][,rows=rows]]\n"
>  " [,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
>  "-chardev ringbuf,id=id[,size=size][,logfile=PATH][,logappend=on|off]\n"
> -"-chardev 
> file,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
> +"-chardev 
> file,id=id,path=path[,pathin=PATH][,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
>  "-chardev 
> pipe,id=id,path=path[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
>  #ifdef _WIN32
>  "-chardev console,id=id[,mux=on|off][,logfile=PATH][,logappend=on|off]\n"
> @@ -3137,13 +3137,16 @@ The available backends are:
>  Create a ring buffer with fixed size ``size``. size must be a power
>  of two and defaults to ``64K``.
>  
> -``-chardev file,id=id,path=path``
> +``-chardev file,id=id,path=path[,pathin=pathin]``
>  Log all traffic received from the guest to a file.
>  
>  ``path`` specifies the path of the file to be opened. This file will
>  be created if it does not already exist, and overwritten if it does.
>  ``path`` is required.
>  
> +``pathin`` specifies a separate file as the input to the chardev. If
> +``pathin`` is omitted, ``path`` is used for both input and output
> +
>  ``-chardev pipe,id=id,path=path``
>  Create a two-way connection to the guest. The behaviour differs
>  slightly between Windows hosts and other hosts:

"pathin" is ugly.  What about "path-input"?




Re: [PATCH Kernel v20 6/8] vfio iommu: Update UNMAP_DMA ioctl to get dirty bitmap before unmap

2020-05-14 Thread Alex Williamson
On Fri, 15 May 2020 02:07:45 +0530
Kirti Wankhede  wrote:

> DMA mapped pages, including those pinned by mdev vendor drivers, might
> get unpinned and unmapped while migration is active and device is still
> running. For example, in pre-copy phase while guest driver could access
> those pages, host device or vendor driver can dirty these mapped pages.
> Such pages should be marked dirty so as to maintain memory consistency
> for a user making use of dirty page tracking.
> 
> To get bitmap during unmap, user should allocate memory for bitmap, set
> it all zeros, set size of allocated memory, set page size to be
> considered for bitmap and set flag VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP.
> 
> Signed-off-by: Kirti Wankhede 
> Reviewed-by: Neo Jia 
> ---
>  drivers/vfio/vfio_iommu_type1.c | 77 
> ++---
>  include/uapi/linux/vfio.h   | 10 ++
>  2 files changed, 75 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index b76d3b14abfd..a1dc57bcece5 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -195,11 +195,15 @@ static void vfio_unlink_dma(struct vfio_iommu *iommu, 
> struct vfio_dma *old)
>  static int vfio_dma_bitmap_alloc(struct vfio_dma *dma, size_t pgsize)
>  {
>   uint64_t npages = dma->size / pgsize;
> + size_t bitmap_size;
>  
>   if (npages > DIRTY_BITMAP_PAGES_MAX)
>   return -EINVAL;
>  
> - dma->bitmap = kvzalloc(DIRTY_BITMAP_BYTES(npages), GFP_KERNEL);
> + /* Allocate extra 64 bits which are used for bitmap manipulation */
> + bitmap_size = DIRTY_BITMAP_BYTES(npages) + sizeof(u64);
> +
> + dma->bitmap = kvzalloc(bitmap_size, GFP_KERNEL);
>   if (!dma->bitmap)
>   return -ENOMEM;
>  
> @@ -999,23 +1003,25 @@ static int verify_bitmap_size(uint64_t npages, 
> uint64_t bitmap_size)
>  }
>  
>  static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
> -  struct vfio_iommu_type1_dma_unmap *unmap)
> +  struct vfio_iommu_type1_dma_unmap *unmap,
> +  struct vfio_bitmap *bitmap)
>  {
> - uint64_t mask;
>   struct vfio_dma *dma, *dma_last = NULL;
> - size_t unmapped = 0;
> + size_t unmapped = 0, pgsize;
>   int ret = 0, retries = 0;
> + unsigned long pgshift;
>  
>   mutex_lock(>lock);
>  
> - mask = ((uint64_t)1 << __ffs(iommu->pgsize_bitmap)) - 1;
> + pgshift = __ffs(iommu->pgsize_bitmap);
> + pgsize = (size_t)1 << pgshift;
>  
> - if (unmap->iova & mask) {
> + if (unmap->iova & (pgsize - 1)) {
>   ret = -EINVAL;
>   goto unlock;
>   }
>  
> - if (!unmap->size || unmap->size & mask) {
> + if (!unmap->size || unmap->size & (pgsize - 1)) {
>   ret = -EINVAL;
>   goto unlock;
>   }
> @@ -1026,9 +1032,15 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>   goto unlock;
>   }
>  
> - WARN_ON(mask & PAGE_MASK);
> -again:
> + /* When dirty tracking is enabled, allow only min supported pgsize */
> + if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
> + (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) {
> + ret = -EINVAL;
> + goto unlock;
> + }
>  
> + WARN_ON((pgsize - 1) & PAGE_MASK);
> +again:
>   /*
>* vfio-iommu-type1 (v1) - User mappings were coalesced together to
>* avoid tracking individual mappings.  This means that the granularity
> @@ -1066,6 +1078,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>   ret = -EINVAL;
>   goto unlock;
>   }
> +
>   dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
>   if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
>   ret = -EINVAL;
> @@ -1083,6 +1096,23 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>   if (dma->task->mm != current->mm)
>   break;
>  
> + if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
> + (dma_last != dma)) {
> +
> + /*
> +  * mark all pages dirty if all pages are pinned and
> +  * mapped
> +  */
> + if (dma->iommu_mapped)
> + bitmap_set(dma->bitmap, 0,
> +dma->size >> pgshift);

Nit, all the callers of update_user_bitmap() precede the call with this
identical operation, we should probably push it into the function to do
it.

> +
> + ret = update_user_bitmap(bitmap->data, dma,
> +  unmap->iova, pgsize);
> + if (ret)
> + break;
> + }
> +

As noted last 

RE: About migration/colo issue

2020-05-14 Thread Zhanghailiang
Hi Zhang Chen,

>From your tracing log, it seems to be hanged in colo_flush_ram_cache()?
Does it come across a dead loop there ?
I'll test it by using the new qemu.

Thanks,
Hailiang

From: Zhang, Chen [mailto:chen.zh...@intel.com]
Sent: Friday, May 15, 2020 11:16 AM
To: Zhanghailiang ; Dr . David Alan Gilbert 
; qemu-devel ; Li Zhijian 

Cc: Jason Wang ; Lukas Straub 
Subject: About migration/colo issue

Hi Hailiang/Dave.

I found a urgent problem in current upstream code, COLO will stuck on secondary 
checkpoint and later.
The guest will stuck by this issue.
I have bisect upstream code, this issue caused by Hailiang's optimize patch:

>From 0393031a16735835a441b6d6e0495a1bd14adb90 Mon Sep 17 00:00:00 2001
From: zhanghailiang 
mailto:zhang.zhanghaili...@huawei.com>>
Date: Mon, 24 Feb 2020 14:54:10 +0800
Subject: [PATCH] COLO: Optimize memory back-up process

This patch will reduce the downtime of VM for the initial process,
Previously, we copied all these memory in preparing stage of COLO
while we need to stop VM, which is a time-consuming process.
Here we optimize it by a trick, back-up every page while in migration
process while COLO is enabled, though it affects the speed of the
migration, but it obviously reduce the downtime of back-up all SVM'S
memory in COLO preparing stage.

Signed-off-by: zhanghailiang 
mailto:zhang.zhanghaili...@huawei.com>>
Message-Id: 
<20200224065414.36524-5-zhang.zhanghaili...@huawei.com>
Signed-off-by: Dr. David Alan Gilbert 
mailto:dgilb...@redhat.com>>
  minor typo fixes

Hailiang, do you have time to look into it?


The detail log:
Primary node:
13322@1589511271.917346:colo_receive_message
 Receive 'checkpoint-ready' message
{"timestamp": {"seconds": 1589511271, "microseconds": 917406}, "event": 
"RESUME"}
13322@1589511271.917842:colo_vm_state_change
 Change 'stop' => 'run'
13322@1589511291.243346:colo_send_message
 Send 'checkpoint-request' message
13322@1589511291.243978:colo_receive_message
 Receive 'checkpoint-reply' message
{"timestamp": {"seconds": 1589511291, "microseconds": 244096}, "event": "STOP"}
13322@1589511291.24:colo_vm_state_change
 Change 'run' => 'stop'
13322@1589511291.244561:colo_send_message
 Send 'vmstate-send' message
13322@1589511291.258594:colo_send_message
 Send 'vmstate-size' message
13322@1589511305.412479:colo_receive_message
 Receive 'vmstate-received' message
13322@1589511309.031826:colo_receive_message
 Receive 'vmstate-loaded' message
{"timestamp": {"seconds": 1589511309, "microseconds": 31862}, "event": "RESUME"}
13322@1589511309.033075:colo_vm_state_change
 Change 'stop' => 'run'
{"timestamp": {"seconds": 1589511311, "microseconds": 111617}, "event": 
"VNC_CONNECTED", "data": {"server": {"auth": "none", "family": "ipv4", 
"service": "5907", "host": "0.0.0.0", "websocket": false}, "client": {"family": 
"ipv4", "service": "51564", "host": "10.239.13.19", "websocket": false}}}
{"timestamp": {"seconds": 1589511311, "microseconds": 116197}, "event": 
"VNC_INITIALIZED", "data": {"server": {"auth": "none", "family": "ipv4", 
"service": "5907", "host": "0.0.0.0", "websocket": false}, "client": {"family": 
"ipv4", "service": "51564", "host": "10.239.13.19", "websocket": false}}}
13322@1589511311.243271:colo_send_message
 Send 'checkpoint-request' message
13322@1589511311.351361:colo_receive_message
 Receive 'checkpoint-reply' message
{"timestamp": {"seconds": 1589511311, "microseconds": 351439}, "event": "STOP"}
13322@1589511311.415779:colo_vm_state_change
 Change 'run' => 'stop'
13322@1589511311.416001:colo_send_message
 Send 'vmstate-send' message
13322@1589511311.418620:colo_send_message
 Send 'vmstate-size' message

Secondary node:
{"timestamp": {"seconds": 1589510920, "microseconds": 778207}, "event": 
"RESUME"}
23619@1589510920.778835:colo_vm_state_change
 Change 'stop' => 'run'
23619@1589510920.778891:colo_send_message
 Send 'checkpoint-ready' message
23619@1589510940.105539:colo_receive_message
 Receive 'checkpoint-request' message
{"timestamp": {"seconds": 

RE: [PATCH 6/6] migration/colo.c: Move colo_notify_compares_event to the right place

2020-05-14 Thread Zhanghailiang
Reviewed-by: zhanghailiang 

> -Original Message-
> From: Lukas Straub [mailto:lukasstra...@web.de]
> Sent: Monday, May 11, 2020 7:11 PM
> To: qemu-devel 
> Cc: Zhanghailiang ; Juan Quintela
> ; Dr. David Alan Gilbert 
> Subject: [PATCH 6/6] migration/colo.c: Move colo_notify_compares_event
> to the right place
> 
> If the secondary has to failover during checkpointing, it still is in the old 
> state
> (i.e. different state than primary). Thus we can't expose the primary state
> until after the checkpoint is sent.
> 
> This fixes sporadic connection reset of client connections during failover.
> 
> Signed-off-by: Lukas Straub 
> ---
>  migration/colo.c | 12 ++--
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
> diff --git a/migration/colo.c b/migration/colo.c index
> a69782efc5..a3fc21e86e 100644
> --- a/migration/colo.c
> +++ b/migration/colo.c
> @@ -430,12 +430,6 @@ static int
> colo_do_checkpoint_transaction(MigrationState *s,
>  goto out;
>  }
> 
> -qemu_event_reset(>colo_checkpoint_event);
> -colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT,
> _err);
> -if (local_err) {
> -goto out;
> -}
> -
>  /* Disable block migration */
>  migrate_set_block_enabled(false, _err);
>  qemu_mutex_lock_iothread();
> @@ -494,6 +488,12 @@ static int
> colo_do_checkpoint_transaction(MigrationState *s,
>  goto out;
>  }
> 
> +qemu_event_reset(>colo_checkpoint_event);
> +colo_notify_compares_event(NULL, COLO_EVENT_CHECKPOINT,
> _err);
> +if (local_err) {
> +goto out;
> +}
> +
>  colo_receive_check_message(s->rp_state.from_dst_file,
> COLO_MESSAGE_VMSTATE_LOADED, _err);
>  if (local_err) {
> --
> 2.20.1



About migration/colo issue

2020-05-14 Thread Zhang, Chen
Hi Hailiang/Dave.

I found a urgent problem in current upstream code, COLO will stuck on secondary 
checkpoint and later.
The guest will stuck by this issue.
I have bisect upstream code, this issue caused by Hailiang's optimize patch:

>From 0393031a16735835a441b6d6e0495a1bd14adb90 Mon Sep 17 00:00:00 2001
From: zhanghailiang 
Date: Mon, 24 Feb 2020 14:54:10 +0800
Subject: [PATCH] COLO: Optimize memory back-up process

This patch will reduce the downtime of VM for the initial process,
Previously, we copied all these memory in preparing stage of COLO
while we need to stop VM, which is a time-consuming process.
Here we optimize it by a trick, back-up every page while in migration
process while COLO is enabled, though it affects the speed of the
migration, but it obviously reduce the downtime of back-up all SVM'S
memory in COLO preparing stage.

Signed-off-by: zhanghailiang 
Message-Id: <20200224065414.36524-5-zhang.zhanghaili...@huawei.com>
Signed-off-by: Dr. David Alan Gilbert 
  minor typo fixes

Hailiang, do you have time to look into it?


The detail log:
Primary node:
13322@1589511271.917346:colo_receive_message Receive 'checkpoint-ready' message
{"timestamp": {"seconds": 1589511271, "microseconds": 917406}, "event": 
"RESUME"}
13322@1589511271.917842:colo_vm_state_change Change 'stop' => 'run'
13322@1589511291.243346:colo_send_message Send 'checkpoint-request' message
13322@1589511291.243978:colo_receive_message Receive 'checkpoint-reply' message
{"timestamp": {"seconds": 1589511291, "microseconds": 244096}, "event": "STOP"}
13322@1589511291.24:colo_vm_state_change Change 'run' => 'stop'
13322@1589511291.244561:colo_send_message Send 'vmstate-send' message
13322@1589511291.258594:colo_send_message Send 'vmstate-size' message
13322@1589511305.412479:colo_receive_message Receive 'vmstate-received' message
13322@1589511309.031826:colo_receive_message Receive 'vmstate-loaded' message
{"timestamp": {"seconds": 1589511309, "microseconds": 31862}, "event": "RESUME"}
13322@1589511309.033075:colo_vm_state_change Change 'stop' => 'run'
{"timestamp": {"seconds": 1589511311, "microseconds": 111617}, "event": 
"VNC_CONNECTED", "data": {"server": {"auth": "none", "family": "ipv4", 
"service": "5907", "host": "0.0.0.0", "websocket": false}, "client": {"family": 
"ipv4", "service": "51564", "host": "10.239.13.19", "websocket": false}}}
{"timestamp": {"seconds": 1589511311, "microseconds": 116197}, "event": 
"VNC_INITIALIZED", "data": {"server": {"auth": "none", "family": "ipv4", 
"service": "5907", "host": "0.0.0.0", "websocket": false}, "client": {"family": 
"ipv4", "service": "51564", "host": "10.239.13.19", "websocket": false}}}
13322@1589511311.243271:colo_send_message Send 'checkpoint-request' message
13322@1589511311.351361:colo_receive_message Receive 'checkpoint-reply' message
{"timestamp": {"seconds": 1589511311, "microseconds": 351439}, "event": "STOP"}
13322@1589511311.415779:colo_vm_state_change Change 'run' => 'stop'
13322@1589511311.416001:colo_send_message Send 'vmstate-send' message
13322@1589511311.418620:colo_send_message Send 'vmstate-size' message

Secondary node:
{"timestamp": {"seconds": 1589510920, "microseconds": 778207}, "event": 
"RESUME"}
23619@1589510920.778835:colo_vm_state_change Change 'stop' => 'run'
23619@1589510920.778891:colo_send_message Send 'checkpoint-ready' message
23619@1589510940.105539:colo_receive_message Receive 'checkpoint-request' 
message
{"timestamp": {"seconds": 1589510940, "microseconds": 105712}, "event": "STOP"}
23619@1589510940.105917:colo_vm_state_change Change 'run' => 'stop'
23619@1589510940.105971:colo_send_message Send 'checkpoint-reply' message
23619@1589510940.106767:colo_receive_message Receive 'vmstate-send' message
23619@1589510940.122808:colo_flush_ram_cache_begin dirty_pages 2456
23619@1589510953.618672:colo_flush_ram_cache_end
23619@1589510953.945083:colo_receive_message Receive 'vmstate-size' message
23619@1589510954.274816:colo_send_message Send 'vmstate-received' message
qemu-system-x86_64: warning: TSC frequency mismatch between VM (2792980 kHz) 
and host (2925999 kHz), and TSC scaling unavailable
{"timestamp": {"seconds": 1589510957, "microseconds": 754184}, "event": 
"RESUME"}
23619@1589510957.894113:colo_vm_state_change Change 'stop' => 'run'
23619@1589510957.894162:colo_send_message Send 'vmstate-loaded' message
23619@1589510960.105977:colo_receive_message Receive 'checkpoint-request' 
message
{"timestamp": {"seconds": 1589510960, "microseconds": 106148}, "event": "STOP"}
23619@1589510960.213773:colo_vm_state_change Change 'run' => 'stop'
23619@1589510960.213797:colo_send_message Send 'checkpoint-reply' message
23619@1589510960.278771:colo_receive_message Receive 'vmstate-send' message
23619@1589510960.423268:colo_flush_ram_cache_begin dirty_pages 25









Re: [PATCH Kernel v20 0/8] Add UAPIs to support migration for VFIO devices

2020-05-14 Thread Alex Williamson
Hi Yan & Intel folks,

I'm starting to run out of comments on this series, where are you with
porting GVT-g migration to this API?  Are there remaining blocking
issues?  Are we satisfied that the API is sufficient to support vIOMMU
now?  Thanks,

Alex


On Fri, 15 May 2020 02:07:39 +0530
Kirti Wankhede  wrote:

> Hi,
> 
> This patch set adds:
> * IOCTL VFIO_IOMMU_DIRTY_PAGES to get dirty pages bitmap with
>   respect to IOMMU container rather than per device. All pages pinned by
>   vendor driver through vfio_pin_pages external API has to be marked as
>   dirty during  migration. When IOMMU capable device is present in the
>   container and all pages are pinned and mapped, then all pages are marked
>   dirty.
>   When there are CPU writes, CPU dirty page tracking can identify dirtied
>   pages, but any page pinned by vendor driver can also be written by
>   device. As of now there is no device which has hardware support for
>   dirty page tracking. So all pages which are pinned should be considered
>   as dirty.
>   This ioctl is also used to start/stop dirty pages tracking for pinned and
>   unpinned pages while migration is active.
> 
> * Updated IOCTL VFIO_IOMMU_UNMAP_DMA to get dirty pages bitmap before
>   unmapping IO virtual address range.
>   With vIOMMU, during pre-copy phase of migration, while CPUs are still
>   running, IO virtual address unmap can happen while device still keeping
>   reference of guest pfns. Those pages should be reported as dirty before
>   unmap, so that VFIO user space application can copy content of those
>   pages from source to destination.
> 
> * Patch 8 detect if IOMMU capable device driver is smart to report pages
>   to be marked dirty by pinning pages using vfio_pin_pages() API.
> 
> 
> Yet TODO:
> Since there is no device which has hardware support for system memmory
> dirty bitmap tracking, right now there is no other API from vendor driver
> to VFIO IOMMU module to report dirty pages. In future, when such hardware
> support will be implemented, an API will be required such that vendor
> driver could report dirty pages to VFIO module during migration phases.
> 
> Adding revision history from previous QEMU patch set to understand KABI
> changes done till now
> 
> v19 -> v20
> - Fixed ioctl to get dirty bitmap to get bitmap of multiple vfio_dmas
> - Fixed unmap ioctl to get dirty bitmap of multiple vfio_dmas.
> - Removed flag definition from migration capability.
> 
> v18 -> v19
> - Updated migration capability with supported page sizes bitmap for dirty
>   page tracking and  maximum bitmap size supported by kernel module.
> - Added patch to calculate and cache pgsize_bitmap when iommu->domain_list
>   is updated.
> - Removed extra buffers added in previous version for bitmap manipulation
>   and optimised the code.
> 
> v17 -> v18
> - Add migration capability to the capability chain for VFIO_IOMMU_GET_INFO
>   ioctl
> - Updated UMAP_DMA ioctl to return bitmap of multiple vfio_dma
> 
> v16 -> v17
> - Fixed errors reported by kbuild test robot  on i386
> 
> v15 -> v16
> - Minor edits and nit picks (Auger Eric)
> - On copying bitmap to user, re-populated bitmap only for pinned pages,
>   excluding unmapped pages and CPU dirtied pages.
> - Patches are on tag: next-20200318 and 1-3 patches from Yan's series
>   https://lkml.org/lkml/2020/3/12/1255
> 
> v14 -> v15
> - Minor edits and nit picks.
> - In the verification of user allocated bitmap memory, added check of
>maximum size.
> - Patches are on tag: next-20200318 and 1-3 patches from Yan's series
>   https://lkml.org/lkml/2020/3/12/1255
> 
> v13 -> v14
> - Added struct vfio_bitmap to kabi. updated structure
>   vfio_iommu_type1_dirty_bitmap_get and vfio_iommu_type1_dma_unmap.
> - All small changes suggested by Alex.
> - Patches are on tag: next-20200318 and 1-3 patches from Yan's series
>   https://lkml.org/lkml/2020/3/12/1255
> 
> v12 -> v13
> - Changed bitmap allocation in vfio_iommu_type1 to per vfio_dma
> - Changed VFIO_IOMMU_DIRTY_PAGES ioctl behaviour to be per vfio_dma range.
> - Changed vfio_iommu_type1_dirty_bitmap structure to have separate data
>   field.
> 
> v11 -> v12
> - Changed bitmap allocation in vfio_iommu_type1.
> - Remove atomicity of ref_count.
> - Updated comments for migration device state structure about error
>   reporting.
> - Nit picks from v11 reviews
> 
> v10 -> v11
> - Fix pin pages API to free vpfn if it is marked as unpinned tracking page.
> - Added proposal to detect if IOMMU capable device calls external pin pages
>   API to mark pages dirty.
> - Nit picks from v10 reviews
> 
> v9 -> v10:
> - Updated existing VFIO_IOMMU_UNMAP_DMA ioctl to get dirty pages bitmap
>   during unmap while migration is active
> - Added flag in VFIO_IOMMU_GET_INFO to indicate driver support dirty page
>   tracking.
> - If iommu_mapped, mark all pages dirty.
> - Added unpinned pages tracking while migration is active.
> - Updated comments for migration device state structure with bit
>   combination 

RE: About migration/colo issue

2020-05-14 Thread Zhang, Chen


From: Zhanghailiang 
Sent: Friday, May 15, 2020 11:29 AM
To: Zhang, Chen ; Dr . David Alan Gilbert 
; qemu-devel ; Li Zhijian 

Cc: Jason Wang ; Lukas Straub 
Subject: RE: About migration/colo issue

Hi Zhang Chen,

>From your tracing log, it seems to be hanged in colo_flush_ram_cache()?
Does it come across a dead loop there ?

Maybe, I haven't looked in depth.

I'll test it by using the new qemu.

Thanks

Thanks,
Hailiang

From: Zhang, Chen [mailto:chen.zh...@intel.com]
Sent: Friday, May 15, 2020 11:16 AM
To: Zhanghailiang 
mailto:zhang.zhanghaili...@huawei.com>>; Dr . 
David Alan Gilbert mailto:dgilb...@redhat.com>>; 
qemu-devel mailto:qemu-devel@nongnu.org>>; Li Zhijian 
mailto:lizhij...@cn.fujitsu.com>>
Cc: Jason Wang mailto:jasow...@redhat.com>>; Lukas Straub 
mailto:lukasstra...@web.de>>
Subject: About migration/colo issue

Hi Hailiang/Dave.

I found a urgent problem in current upstream code, COLO will stuck on secondary 
checkpoint and later.
The guest will stuck by this issue.
I have bisect upstream code, this issue caused by Hailiang's optimize patch:

>From 0393031a16735835a441b6d6e0495a1bd14adb90 Mon Sep 17 00:00:00 2001
From: zhanghailiang 
mailto:zhang.zhanghaili...@huawei.com>>
Date: Mon, 24 Feb 2020 14:54:10 +0800
Subject: [PATCH] COLO: Optimize memory back-up process

This patch will reduce the downtime of VM for the initial process,
Previously, we copied all these memory in preparing stage of COLO
while we need to stop VM, which is a time-consuming process.
Here we optimize it by a trick, back-up every page while in migration
process while COLO is enabled, though it affects the speed of the
migration, but it obviously reduce the downtime of back-up all SVM'S
memory in COLO preparing stage.

Signed-off-by: zhanghailiang 
mailto:zhang.zhanghaili...@huawei.com>>
Message-Id: 
<20200224065414.36524-5-zhang.zhanghaili...@huawei.com>
Signed-off-by: Dr. David Alan Gilbert 
mailto:dgilb...@redhat.com>>
  minor typo fixes

Hailiang, do you have time to look into it?


The detail log:
Primary node:
13322@1589511271.917346:colo_receive_message
 Receive 'checkpoint-ready' message
{"timestamp": {"seconds": 1589511271, "microseconds": 917406}, "event": 
"RESUME"}
13322@1589511271.917842:colo_vm_state_change
 Change 'stop' => 'run'
13322@1589511291.243346:colo_send_message
 Send 'checkpoint-request' message
13322@1589511291.243978:colo_receive_message
 Receive 'checkpoint-reply' message
{"timestamp": {"seconds": 1589511291, "microseconds": 244096}, "event": "STOP"}
13322@1589511291.24:colo_vm_state_change
 Change 'run' => 'stop'
13322@1589511291.244561:colo_send_message
 Send 'vmstate-send' message
13322@1589511291.258594:colo_send_message
 Send 'vmstate-size' message
13322@1589511305.412479:colo_receive_message
 Receive 'vmstate-received' message
13322@1589511309.031826:colo_receive_message
 Receive 'vmstate-loaded' message
{"timestamp": {"seconds": 1589511309, "microseconds": 31862}, "event": "RESUME"}
13322@1589511309.033075:colo_vm_state_change
 Change 'stop' => 'run'
{"timestamp": {"seconds": 1589511311, "microseconds": 111617}, "event": 
"VNC_CONNECTED", "data": {"server": {"auth": "none", "family": "ipv4", 
"service": "5907", "host": "0.0.0.0", "websocket": false}, "client": {"family": 
"ipv4", "service": "51564", "host": "10.239.13.19", "websocket": false}}}
{"timestamp": {"seconds": 1589511311, "microseconds": 116197}, "event": 
"VNC_INITIALIZED", "data": {"server": {"auth": "none", "family": "ipv4", 
"service": "5907", "host": "0.0.0.0", "websocket": false}, "client": {"family": 
"ipv4", "service": "51564", "host": "10.239.13.19", "websocket": false}}}
13322@1589511311.243271:colo_send_message
 Send 'checkpoint-request' message
13322@1589511311.351361:colo_receive_message
 Receive 'checkpoint-reply' message
{"timestamp": {"seconds": 1589511311, "microseconds": 351439}, "event": "STOP"}
13322@1589511311.415779:colo_vm_state_change
 Change 'run' => 'stop'
13322@1589511311.416001:colo_send_message
 Send 'vmstate-send' message
13322@1589511311.418620:colo_send_message
 Send 'vmstate-size' message

Secondary node:
{"timestamp": {"seconds": 1589510920, "microseconds": 

Re: [Virtio-fs] [PATCH] virtiofsd: remove symlink fallbacks

2020-05-14 Thread Liu Bo
On Thu, May 14, 2020 at 04:07:36PM +0200, Miklos Szeredi wrote:
> Path lookup in the kernel has special rules for looking up magic symlinks
> under /proc.  If a filesystem operation is instructed to follow symlinks
> (e.g. via AT_SYMLINK_FOLLOW or lack of AT_SYMLINK_NOFOLLOW), and the final
> component is such a proc symlink, then the target of the magic symlink is
> used for the operation, even if the target itself is a symlink.  I.e. path
> lookup is always terminated after following a final magic symlink.
>

Hi Miklos,

Are these mentioned special rules supported by a recent kernel version
or are they there from day one linux?

thanks,
liubo

> I was erronously assuming that in the above case the target symlink would
> also be followed, and so workarounds were added for a couple of operations
> to handle the symlink case.  Since the symlink can be handled simply by
> following the proc symlink, these workardouds are not needed.
> 
> Also remove the "norace" option, which disabled the workarounds.
> 
> Commit bdfd66788349 ("virtiofsd: Fix xattr operations") already dealt with
> the same issue for xattr operations.
> 
> Signed-off-by: Miklos Szeredi 
> ---
>  tools/virtiofsd/passthrough_ll.c | 175 ++-
>  1 file changed, 6 insertions(+), 169 deletions(-)
> 
> diff --git a/tools/virtiofsd/passthrough_ll.c 
> b/tools/virtiofsd/passthrough_ll.c
> index 3ba1d9098460..2ce7c96085bf 100644
> --- a/tools/virtiofsd/passthrough_ll.c
> +++ b/tools/virtiofsd/passthrough_ll.c
> @@ -140,7 +140,6 @@ enum {
>  struct lo_data {
>  pthread_mutex_t mutex;
>  int debug;
> -int norace;
>  int writeback;
>  int flock;
>  int posix_lock;
> @@ -176,7 +175,6 @@ static const struct fuse_opt lo_opts[] = {
>  { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE },
>  { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO },
>  { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
> -{ "norace", offsetof(struct lo_data, norace), 1 },
>  { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
>  { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
>  FUSE_OPT_END
> @@ -592,136 +590,6 @@ static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
>  fuse_reply_attr(req, , lo->timeout);
>  }
>  
> -/*
> - * Increments parent->nlookup and caller must release refcount using
> - * lo_inode_put().
> - */
> -static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
> -  char path[PATH_MAX], struct lo_inode **parent)
> -{
> -char procname[64];
> -char *last;
> -struct stat stat;
> -struct lo_inode *p;
> -int retries = 2;
> -int res;
> -
> -retry:
> -sprintf(procname, "%i", inode->fd);
> -
> -res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX);
> -if (res < 0) {
> -fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
> -goto fail_noretry;
> -}
> -
> -if (res >= PATH_MAX) {
> -fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
> -goto fail_noretry;
> -}
> -path[res] = '\0';
> -
> -last = strrchr(path, '/');
> -if (last == NULL) {
> -/* Shouldn't happen */
> -fuse_log(
> -FUSE_LOG_WARNING,
> -"%s: INTERNAL ERROR: bad path read from proc\n", __func__);
> -goto fail_noretry;
> -}
> -if (last == path) {
> -p = >root;
> -pthread_mutex_lock(>mutex);
> -p->nlookup++;
> -g_atomic_int_inc(>refcount);
> -pthread_mutex_unlock(>mutex);
> -} else {
> -*last = '\0';
> -res = fstatat(AT_FDCWD, last == path ? "/" : path, , 0);
> -if (res == -1) {
> -if (!retries) {
> -fuse_log(FUSE_LOG_WARNING,
> - "%s: failed to stat parent: %m\n", __func__);
> -}
> -goto fail;
> -}
> -p = lo_find(lo, );
> -if (p == NULL) {
> -if (!retries) {
> -fuse_log(FUSE_LOG_WARNING,
> - "%s: failed to find parent\n", __func__);
> -}
> -goto fail;
> -}
> -}
> -last++;
> -res = fstatat(p->fd, last, , AT_SYMLINK_NOFOLLOW);
> -if (res == -1) {
> -if (!retries) {
> -fuse_log(FUSE_LOG_WARNING,
> - "%s: failed to stat last\n", __func__);
> -}
> -goto fail_unref;
> -}
> -if (stat.st_dev != inode->key.dev || stat.st_ino != inode->key.ino) {
> -if (!retries) {
> -fuse_log(FUSE_LOG_WARNING,
> - "%s: failed to match last\n", __func__);
> -}
> -goto fail_unref;
> -}
> -*parent = p;
> -memmove(path, last, strlen(last) + 1);
> -
> -return 0;
> -
> -fail_unref:
> -unref_inode_lolocked(lo, p, 1);
> -lo_inode_put(lo, );
> -fail:
> -if 

Re: [PATCH] vhost-user: add support for VHOST_USER_SET_STATUS

2020-05-14 Thread Jason Wang



On 2020/5/14 下午6:14, Maxime Coquelin wrote:


On 5/14/20 9:53 AM, Jason Wang wrote:

On 2020/5/14 下午3:33, Maxime Coquelin wrote:

It is usefull for the Vhost-user backend to know
about about the Virtio device status updates,
especially when the driver sets the DRIVER_OK bit.

With that information, no more need to do hazardous
assumptions on when the driver is done with the
device configuration.

Signed-off-by: Maxime Coquelin 
---

This patch applies on top of Cindy's "vDPA support in qemu"
series, which introduces the .vhost_set_state vhost-backend
ops.

   docs/interop/vhost-user.rst | 12 
   hw/net/vhost_net.c  | 10 +-
   hw/virtio/vhost-user.c  | 35 +++
   3 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst
index 3b1b6602c7..f108de7458 100644
--- a/docs/interop/vhost-user.rst
+++ b/docs/interop/vhost-user.rst
@@ -815,6 +815,7 @@ Protocol features
     #define VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD   12
     #define VHOST_USER_PROTOCOL_F_RESET_DEVICE 13
     #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14
+  #define VHOST_USER_PROTOCOL_F_STATUS   15
     Master message types
   
@@ -1263,6 +1264,17 @@ Master message types
       The state.num field is currently reserved and must be set to 0.
   +``VHOST_USER_SET_STATUS``
+  :id: 36
+  :equivalent ioctl: VHOST_VDPA_SET_STATUS
+  :slave payload: N/A
+  :master payload: ``u64``
+
+  When the ``VHOST_USER_PROTOCOL_F_STATUS`` protocol feature has been
+  successfully negotiated, this message is submitted by the master to
+  notify the backend with updated device status as defined in the Virtio
+  specification.
+
   Slave message types
   ---
   diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 463e333531..37f3156dbc 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -517,10 +517,10 @@ int vhost_set_state(NetClientState *nc, int state)
   {
   struct vhost_net *net = get_vhost_net(nc);
   struct vhost_dev *hdev = >dev;
-    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
-    if (hdev->vhost_ops->vhost_set_state) {
-    return hdev->vhost_ops->vhost_set_state(hdev, state);
- }
-    }
+
+    if (hdev->vhost_ops->vhost_set_state) {
+    return hdev->vhost_ops->vhost_set_state(hdev, state);
+    }
+
   return 0;
   }
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index ec21e8fbe8..b7e52d97fc 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -59,6 +59,7 @@ enum VhostUserProtocolFeature {
   VHOST_USER_PROTOCOL_F_HOST_NOTIFIER = 11,
   VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD = 12,
   VHOST_USER_PROTOCOL_F_RESET_DEVICE = 13,
+    VHOST_USER_PROTOCOL_F_STATUS = 15,
   VHOST_USER_PROTOCOL_F_MAX
   };
   @@ -100,6 +101,7 @@ typedef enum VhostUserRequest {
   VHOST_USER_SET_INFLIGHT_FD = 32,
   VHOST_USER_GPU_SET_SOCKET = 33,
   VHOST_USER_RESET_DEVICE = 34,
+    VHOST_USER_SET_STATUS = 36,
   VHOST_USER_MAX
   } VhostUserRequest;
   @@ -1886,6 +1888,38 @@ static int vhost_user_set_inflight_fd(struct
vhost_dev *dev,
   return 0;
   }
   +static int vhost_user_set_state(struct vhost_dev *dev, int state)
+{
+    bool reply_supported = virtio_has_feature(dev->protocol_features,
+
VHOST_USER_PROTOCOL_F_REPLY_ACK);
+
+    VhostUserMsg msg = {
+    .hdr.request = VHOST_USER_SET_STATUS,
+    .hdr.flags = VHOST_USER_VERSION,
+    .hdr.size = sizeof(msg.payload.u64),
+    .payload.u64 = (uint64_t)state,
+    };
+
+    if (!virtio_has_feature(dev->protocol_features,
+    VHOST_USER_PROTOCOL_F_STATUS)) {
+    return -1;
+    }
+
+    if (reply_supported) {
+    msg.hdr.flags |= VHOST_USER_NEED_REPLY_MASK;
+    }
+
+    if (vhost_user_write(dev, , NULL, 0) < 0) {
+    return -1;
+    }
+
+    if (reply_supported) {
+    return process_message_reply(dev, );
+    }
+
+    return 0;
+}


Interesting, I wonder how vm stop will be handled in this case.

For now, my DPDK series only use DRIVER_OK to help determine when the
driver is done with the initialization. For VM stop, it still relies on
GET_VRING_BASE.

GET_VRING_BASE arrives before DRIVER_OK bit is cleared is the tests I've
done (logs from backend side):

VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE

destroy port /tmp/vhost-user1, did: 0
VHOST_CONFIG: vring base idx:0 file:41
VHOST_CONFIG: read message VHOST_USER_GET_VRING_BASE
VHOST_CONFIG: vring base idx:1 file:0
VHOST_CONFIG: read message VHOST_USER_SET_STATUS
VHOST_CONFIG: New device status(0x000b):
-ACKNOWLEDGE: 1
-DRIVER: 1
-FEATURES_OK: 1
-DRIVER_OK: 0
-DEVICE_NEED_RESET: 0
-FAILED: 0



Then it looks like a function duplication, e.g backend could be stopped 
either via GET_VRING_BASE or VHOST_USER_SET_STATUS(0).


And I guess 

[Bug 1877716] Re: Win10 guest unusable after a few minutes

2020-05-14 Thread Anatol Pomozov
Thank you Stefan for the fixes. Once these patches land the upstream
repo I'll pull it into the Arch package and reenable io_uring.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1877716

Title:
  Win10 guest unusable after a few minutes

Status in QEMU:
  New

Bug description:
  On Arch Linux, the recent qemu package update seems to misbehave on
  some systems. In my case, my Windows 10 guest runs fine for around 5
  minutes and then start to get really sluggish, even unresponsive. It
  needs to be forced off. I could reproduce this on a minimal VM with no
  passthrough, although my current testing setup involves an nvme pcie
  passthrough.

  I bisected it to the following commit which rapidly starts to run sluggishly 
on my setup:
  https://github.com/qemu/qemu/commit/73fd282e7b6dd4e4ea1c3bbb3d302c8db51e4ccf

  I've ran the previous commit (
  https://github.com/qemu/qemu/commit/b321051cf48ccc2d3d832af111d688f2282f089b
  ) for the entire night without an issue so far.

  I believe this might be a duplicate of
  https://bugs.launchpad.net/qemu/+bug/1873032 , although I'm not sure.

  Linux cc 5.6.10-arch1-1 #1 SMP PREEMPT Sat, 02 May 2020 19:11:54 + x86_64 
GNU/Linux
  AMD Ryzen 7 2700X Eight-Core Processor

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1877716/+subscriptions



[PATCH v2 14/17] target/mips: fpu: Remove now unused FLOAT_RINT macro

2020-05-14 Thread Aleksandar Markovic
After demacroing RINT., this macro is not needed anymore.

Signed-off-by: Aleksandar Markovic 
---
 target/mips/fpu_helper.c | 13 -
 1 file changed, 13 deletions(-)

diff --git a/target/mips/fpu_helper.c b/target/mips/fpu_helper.c
index dae1331f23..56ba49104e 100644
--- a/target/mips/fpu_helper.c
+++ b/target/mips/fpu_helper.c
@@ -1102,19 +1102,6 @@ uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, 
uint64_t fdt0)
 return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-#define FLOAT_RINT(name, bits)  \
-uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env, \
- uint ## bits ## _t fs) \
-{   \
-uint ## bits ## _t fdret;   \
-\
-fdret = float ## bits ## _round_to_int(fs, >active_fpu.fp_status); \
-update_fcr31(env, GETPC()); \
-return fdret;   \
-}
-
-#undef FLOAT_RINT
-
 uint64_t helper_float_rint_d(CPUMIPSState *env, uint64_t fs)
 {
 uint64_t fdret;
-- 
2.20.1




[PATCH v2 08/17] target/mips: fpu: Demacro NMADD.

2020-05-14 Thread Aleksandar Markovic
This is just a cosmetic change to enable tools like gcov, gdb,
callgrind, etc. to better display involved source code.

Signed-off-by: Aleksandar Markovic 
---
 target/mips/fpu_helper.c | 44 +++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/target/mips/fpu_helper.c b/target/mips/fpu_helper.c
index e37fc4075d..d4c065f281 100644
--- a/target/mips/fpu_helper.c
+++ b/target/mips/fpu_helper.c
@@ -1495,7 +1495,6 @@ uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,  
   \
 update_fcr31(env, GETPC());  \
 return ((uint64_t)fsth0 << 32) | fst0;   \
 }
-FLOAT_FMA(nmadd, float_muladd_negate_result)
 FLOAT_FMA(nmsub, float_muladd_negate_result | float_muladd_negate_c)
 #undef FLOAT_FMA
 
@@ -1577,6 +1576,49 @@ uint64_t helper_float_msub_ps(CPUMIPSState *env, 
uint64_t fdt0,
 return ((uint64_t)fsth0 << 32) | fstl0;
 }
 
+uint64_t helper_float_nmadd_d(CPUMIPSState *env, uint64_t fst0,
+ uint64_t fst1, uint64_t fst2)
+{
+fst0 = float64_mul(fst0, fst1, >active_fpu.fp_status);
+fst0 = float64_add(fst0, fst2, >active_fpu.fp_status);
+fst0 = float64_chs(fst0);
+
+update_fcr31(env, GETPC());
+return fst0;
+}
+
+uint32_t helper_float_nmadd_s(CPUMIPSState *env, uint32_t fst0,
+ uint32_t fst1, uint32_t fst2)
+{
+fst0 = float32_mul(fst0, fst1, >active_fpu.fp_status);
+fst0 = float32_add(fst0, fst2, >active_fpu.fp_status);
+fst0 = float32_chs(fst0);
+
+update_fcr31(env, GETPC());
+return fst0;
+}
+
+uint64_t helper_float_nmadd_ps(CPUMIPSState *env, uint64_t fdt0,
+  uint64_t fdt1, uint64_t fdt2)
+{
+uint32_t fstl0 = fdt0 & 0X;
+uint32_t fsth0 = fdt0 >> 32;
+uint32_t fstl1 = fdt1 & 0X;
+uint32_t fsth1 = fdt1 >> 32;
+uint32_t fstl2 = fdt2 & 0X;
+uint32_t fsth2 = fdt2 >> 32;
+
+fstl0 = float32_mul(fstl0, fstl1, >active_fpu.fp_status);
+fstl0 = float32_add(fstl0, fstl2, >active_fpu.fp_status);
+fstl0 = float32_chs(fstl0);
+fsth0 = float32_mul(fsth0, fsth1, >active_fpu.fp_status);
+fsth0 = float32_add(fsth0, fsth2, >active_fpu.fp_status);
+fsth0 = float32_chs(fsth0);
+
+update_fcr31(env, GETPC());
+return ((uint64_t)fsth0 << 32) | fstl0;
+}
+
 
 #define FLOAT_FMADDSUB(name, bits, muladd_arg)  \
 uint ## bits ## _t helper_float_ ## name(CPUMIPSState *env, \
-- 
2.20.1




[PATCH v2 15/17] target/mips: fpu: Name better paired-single variables

2020-05-14 Thread Aleksandar Markovic
Use consistently 'l' and 'h' for low and high halves.

Signed-off-by: Aleksandar Markovic 
---
 target/mips/fpu_helper.c | 62 
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/target/mips/fpu_helper.c b/target/mips/fpu_helper.c
index 56ba49104e..dbb8ca5692 100644
--- a/target/mips/fpu_helper.c
+++ b/target/mips/fpu_helper.c
@@ -1059,14 +1059,14 @@ uint32_t helper_float_recip1_s(CPUMIPSState *env, 
uint32_t fst0)
 
 uint64_t helper_float_recip1_ps(CPUMIPSState *env, uint64_t fdt0)
 {
-uint32_t fst2;
+uint32_t fstl2;
 uint32_t fsth2;
 
-fst2 = float32_div(float32_one, fdt0 & 0X,
-   >active_fpu.fp_status);
+fstl2 = float32_div(float32_one, fdt0 & 0X,
+>active_fpu.fp_status);
 fsth2 = float32_div(float32_one, fdt0 >> 32, >active_fpu.fp_status);
 update_fcr31(env, GETPC());
-return ((uint64_t)fsth2 << 32) | fst2;
+return ((uint64_t)fsth2 << 32) | fstl2;
 }
 
 uint64_t helper_float_rsqrt1_d(CPUMIPSState *env, uint64_t fdt0)
@@ -1091,15 +1091,15 @@ uint32_t helper_float_rsqrt1_s(CPUMIPSState *env, 
uint32_t fst0)
 
 uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, uint64_t fdt0)
 {
-uint32_t fst2;
+uint32_t fstl2;
 uint32_t fsth2;
 
-fst2 = float32_sqrt(fdt0 & 0X, >active_fpu.fp_status);
+fstl2 = float32_sqrt(fdt0 & 0X, >active_fpu.fp_status);
 fsth2 = float32_sqrt(fdt0 >> 32, >active_fpu.fp_status);
-fst2 = float32_div(float32_one, fst2, >active_fpu.fp_status);
+fstl2 = float32_div(float32_one, fstl2, >active_fpu.fp_status);
 fsth2 = float32_div(float32_one, fsth2, >active_fpu.fp_status);
 update_fcr31(env, GETPC());
-return ((uint64_t)fsth2 << 32) | fst2;
+return ((uint64_t)fsth2 << 32) | fstl2;
 }
 
 uint64_t helper_float_rint_d(CPUMIPSState *env, uint64_t fs)
@@ -1367,19 +1367,19 @@ uint32_t helper_float_recip2_s(CPUMIPSState *env, 
uint32_t fst0, uint32_t fst2)
 
 uint64_t helper_float_recip2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t 
fdt2)
 {
-uint32_t fst0 = fdt0 & 0X;
+uint32_t fstl0 = fdt0 & 0X;
 uint32_t fsth0 = fdt0 >> 32;
-uint32_t fst2 = fdt2 & 0X;
+uint32_t fstl2 = fdt2 & 0X;
 uint32_t fsth2 = fdt2 >> 32;
 
-fst2 = float32_mul(fst0, fst2, >active_fpu.fp_status);
+fstl2 = float32_mul(fstl0, fstl2, >active_fpu.fp_status);
 fsth2 = float32_mul(fsth0, fsth2, >active_fpu.fp_status);
-fst2 = float32_chs(float32_sub(fst2, float32_one,
+fstl2 = float32_chs(float32_sub(fstl2, float32_one,
>active_fpu.fp_status));
 fsth2 = float32_chs(float32_sub(fsth2, float32_one,
>active_fpu.fp_status));
 update_fcr31(env, GETPC());
-return ((uint64_t)fsth2 << 32) | fst2;
+return ((uint64_t)fsth2 << 32) | fstl2;
 }
 
 uint64_t helper_float_rsqrt2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
@@ -1404,51 +1404,51 @@ uint32_t helper_float_rsqrt2_s(CPUMIPSState *env, 
uint32_t fst0, uint32_t fst2)
 
 uint64_t helper_float_rsqrt2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t 
fdt2)
 {
-uint32_t fst0 = fdt0 & 0X;
+uint32_t fstl0 = fdt0 & 0X;
 uint32_t fsth0 = fdt0 >> 32;
-uint32_t fst2 = fdt2 & 0X;
+uint32_t fstl2 = fdt2 & 0X;
 uint32_t fsth2 = fdt2 >> 32;
 
-fst2 = float32_mul(fst0, fst2, >active_fpu.fp_status);
+fstl2 = float32_mul(fstl0, fstl2, >active_fpu.fp_status);
 fsth2 = float32_mul(fsth0, fsth2, >active_fpu.fp_status);
-fst2 = float32_sub(fst2, float32_one, >active_fpu.fp_status);
+fstl2 = float32_sub(fstl2, float32_one, >active_fpu.fp_status);
 fsth2 = float32_sub(fsth2, float32_one, >active_fpu.fp_status);
-fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32,
+fstl2 = float32_chs(float32_div(fstl2, FLOAT_TWO32,
>active_fpu.fp_status));
 fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32,
>active_fpu.fp_status));
 update_fcr31(env, GETPC());
-return ((uint64_t)fsth2 << 32) | fst2;
+return ((uint64_t)fsth2 << 32) | fstl2;
 }
 
 uint64_t helper_float_addr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
 {
-uint32_t fst0 = fdt0 & 0X;
+uint32_t fstl0 = fdt0 & 0X;
 uint32_t fsth0 = fdt0 >> 32;
-uint32_t fst1 = fdt1 & 0X;
+uint32_t fstl1 = fdt1 & 0X;
 uint32_t fsth1 = fdt1 >> 32;
-uint32_t fst2;
+uint32_t fstl2;
 uint32_t fsth2;
 
-fst2 = float32_add(fst0, fsth0, >active_fpu.fp_status);
-fsth2 = float32_add(fst1, fsth1, >active_fpu.fp_status);
+fstl2 = float32_add(fstl0, fsth0, >active_fpu.fp_status);
+fsth2 = float32_add(fstl1, fsth1, >active_fpu.fp_status);
 update_fcr31(env, GETPC());
-return ((uint64_t)fsth2 << 32) | fst2;
+return ((uint64_t)fsth2 << 

Re: [PATCH for-5.1 V3 2/7] hw/mips: Implement the kvm_type() hook in MachineClass

2020-05-14 Thread Aleksandar Markovic
нед, 3. мај 2020. у 12:24 Huacai Chen  је написао/ла:
>
> MIPS has two types of KVM: TE & VZ, and TE is the default type. Now we
> can't create a VZ guest in QEMU because it lacks the kvm_type() hook in
> MachineClass. Besides, libvirt uses a null-machine to detect the kvm
> capability, so by default it will return "KVM not supported" on a VZ
> platform. Thus, null-machine also need the kvm_type() hook.
>
> Signed-off-by: Huacai Chen 
> Co-developed-by: Jiaxun Yang 
> ---

Reviewed-by: Aleksandar Markovic 

>  hw/core/Makefile.objs  |  2 +-
>  hw/core/null-machine.c |  4 
>  hw/mips/Makefile.objs  |  2 +-
>  hw/mips/common.c   | 31 +++
>  include/hw/mips/mips.h |  3 +++
>  5 files changed, 40 insertions(+), 2 deletions(-)
>  create mode 100644 hw/mips/common.c
>
> diff --git a/hw/core/Makefile.objs b/hw/core/Makefile.objs
> index 1d540ed..b5672f4 100644
> --- a/hw/core/Makefile.objs
> +++ b/hw/core/Makefile.objs
> @@ -17,11 +17,11 @@ common-obj-$(CONFIG_SOFTMMU) += vm-change-state-handler.o
>  common-obj-$(CONFIG_SOFTMMU) += qdev-properties-system.o
>  common-obj-$(CONFIG_SOFTMMU) += sysbus.o
>  common-obj-$(CONFIG_SOFTMMU) += machine.o
> -common-obj-$(CONFIG_SOFTMMU) += null-machine.o
>  common-obj-$(CONFIG_SOFTMMU) += loader.o
>  common-obj-$(CONFIG_SOFTMMU) += machine-hmp-cmds.o
>  common-obj-$(CONFIG_SOFTMMU) += numa.o
>  common-obj-$(CONFIG_SOFTMMU) += clock-vmstate.o
> +obj-$(CONFIG_SOFTMMU) += null-machine.o
>  obj-$(CONFIG_SOFTMMU) += machine-qmp-cmds.o
>
>  common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o
> diff --git a/hw/core/null-machine.c b/hw/core/null-machine.c
> index cb47d9d..94a36f9 100644
> --- a/hw/core/null-machine.c
> +++ b/hw/core/null-machine.c
> @@ -17,6 +17,7 @@
>  #include "sysemu/sysemu.h"
>  #include "exec/address-spaces.h"
>  #include "hw/core/cpu.h"
> +#include "hw/mips/mips.h"
>
>  static void machine_none_init(MachineState *mch)
>  {
> @@ -50,6 +51,9 @@ static void machine_none_machine_init(MachineClass *mc)
>  mc->max_cpus = 1;
>  mc->default_ram_size = 0;
>  mc->default_ram_id = "ram";
> +#ifdef TARGET_MIPS
> +mc->kvm_type = mips_kvm_type;
> +#endif
>  }
>
>  DEFINE_MACHINE("none", machine_none_machine_init)
> diff --git a/hw/mips/Makefile.objs b/hw/mips/Makefile.objs
> index 525809a..2f7795b 100644
> --- a/hw/mips/Makefile.objs
> +++ b/hw/mips/Makefile.objs
> @@ -1,4 +1,4 @@
> -obj-y += addr.o mips_int.o
> +obj-y += addr.o common.o mips_int.o
>  obj-$(CONFIG_R4K) += mips_r4k.o
>  obj-$(CONFIG_MALTA) += gt64xxx_pci.o mips_malta.o
>  obj-$(CONFIG_MIPSSIM) += mips_mipssim.o
> diff --git a/hw/mips/common.c b/hw/mips/common.c
> new file mode 100644
> index 000..0e33bd0
> --- /dev/null
> +++ b/hw/mips/common.c
> @@ -0,0 +1,31 @@
> +/*
> + * Common MIPS routines
> + *
> + * Copyright (c) 2020 Huacai Chen (che...@lemote.com)
> + * This code is licensed under the GNU GPL v2.
> + */
> +
> +#include 
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +#include "hw/boards.h"
> +#include "hw/mips/mips.h"
> +#include "sysemu/kvm_int.h"
> +
> +int mips_kvm_type(MachineState *machine, const char *vm_type)
> +{
> +int r;
> +KVMState *s = KVM_STATE(machine->accelerator);
> +
> +r = kvm_check_extension(s, KVM_CAP_MIPS_VZ);
> +if (r > 0) {
> +return KVM_VM_MIPS_VZ;
> +}
> +
> +r = kvm_check_extension(s, KVM_CAP_MIPS_TE);
> +if (r > 0) {
> +return KVM_VM_MIPS_TE;
> +}
> +
> +return -1;
> +}
> diff --git a/include/hw/mips/mips.h b/include/hw/mips/mips.h
> index 0af4c3d..2ac0580 100644
> --- a/include/hw/mips/mips.h
> +++ b/include/hw/mips/mips.h
> @@ -20,4 +20,7 @@ void rc4030_dma_write(void *dma, uint8_t *buf, int len);
>
>  DeviceState *rc4030_init(rc4030_dma **dmas, IOMMUMemoryRegion **dma_mr);
>
> +/* common.c */
> +int mips_kvm_type(MachineState *machine, const char *vm_type);
> +
>  #endif
> --
> 2.7.0
>



[PATCH] es1370: check total frame count against current frame

2020-05-14 Thread P J P
From: Prasad J Pandit 

A guest user may set channel frame count via es1370_write()
such that, in es1370_transfer_audio(), total frame count
'size' is lesser than the number of frames that are processed
'cnt'.

int cnt = d->frame_cnt >> 16;
int size = d->frame_cnt & 0x;

if (size < cnt), it results in incorrect calculations leading
to OOB access issue(s). Add check to avoid it.

Reported-by: Ren Ding 
Reported-by: Hanqing Zhao 
Signed-off-by: Prasad J Pandit 
---
 hw/audio/es1370.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/hw/audio/es1370.c b/hw/audio/es1370.c
index 89c4dabcd4..5f8a83ff56 100644
--- a/hw/audio/es1370.c
+++ b/hw/audio/es1370.c
@@ -643,6 +643,9 @@ static void es1370_transfer_audio (ES1370State *s, struct 
chan *d, int loop_sel,
 int csc_bytes = (csc + 1) << d->shift;
 int cnt = d->frame_cnt >> 16;
 int size = d->frame_cnt & 0x;
+if (size < cnt) {
+return;
+}
 int left = ((size - cnt + 1) << 2) + d->leftover;
 int transferred = 0;
 int temp = MIN (max, MIN (left, csc_bytes));
@@ -651,7 +654,7 @@ static void es1370_transfer_audio (ES1370State *s, struct 
chan *d, int loop_sel,
 addr += (cnt << 2) + d->leftover;
 
 if (index == ADC_CHANNEL) {
-while (temp) {
+while (temp > 0) {
 int acquired, to_copy;
 
 to_copy = MIN ((size_t) temp, sizeof (tmpbuf));
@@ -669,7 +672,7 @@ static void es1370_transfer_audio (ES1370State *s, struct 
chan *d, int loop_sel,
 else {
 SWVoiceOut *voice = s->dac_voice[index];
 
-while (temp) {
+while (temp > 0) {
 int copied, to_copy;
 
 to_copy = MIN ((size_t) temp, sizeof (tmpbuf));
-- 
2.25.4




  1   2   3   4   5   >