date:20190221

[Qemu-devel] [PULL 01/15] kbd-state: don't block auto-repeat events

2019-02-21 Thread Gerd Hoffmann

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Daniel P. Berrangé 
Message-id: 20190220100235.20914-1-kra...@redhat.com
---
 ui/kbd-state.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/ui/kbd-state.c b/ui/kbd-state.c
index ac14add70eab..f3ab2d7a665d 100644
--- a/ui/kbd-state.c
+++ b/ui/kbd-state.c
@@ -42,14 +42,18 @@ void qkbd_state_key_event(QKbdState *kbd, QKeyCode qcode, 
bool down)
 {
 bool state = test_bit(qcode, kbd->keys);
 
-if (state == down) {
+if (down == false  /* got key-up event   */ &&
+state == false /* key is not pressed */) {
 /*
- * Filter out events which don't change the keyboard state.
+ * Filter out suspicious key-up events.
  *
- * Most notably this allows to simply send along all key-up
- * events, and this function will filter out everything where
- * the corresponding key-down event wasn't send to the guest,
- * for example due to being a host hotkey.
+ * This allows simply sending along all key-up events, and
+ * this function will filter out everything where the
+ * corresponding key-down event wasn't sent to the guest, for
+ * example due to being a host hotkey.
+ *
+ * Note that key-down events on already pressed keys are *not*
+ * suspicious, those are keyboard autorepeat events.
  */
 return;
 }
-- 
2.9.3

[Qemu-devel] [PULL 05/15] char/spice: trigger HUP event

2019-02-21 Thread Gerd Hoffmann

From: Marc-André Lureau 

Inform the front-end of disconnected state (spice client
disconnected).

This will wakeup the source handler immediately, so it can detect the
disconnection asap.

Signed-off-by: Marc-André Lureau 
Tested-by: Victor Toso 
Message-id: 20190221110703.5775-2-marcandre.lur...@redhat.com
Signed-off-by: Gerd Hoffmann 
---
 chardev/spice.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/chardev/spice.c b/chardev/spice.c
index 173c257949c0..c2baeb5461fa 100644
--- a/chardev/spice.c
+++ b/chardev/spice.c
@@ -148,15 +148,25 @@ static void vmc_unregister_interface(SpiceChardev *scd)
 static gboolean spice_char_source_prepare(GSource *source, gint *timeout)
 {
 SpiceCharSource *src = (SpiceCharSource *)source;
+Chardev *chr = CHARDEV(src->scd);
 
 *timeout = -1;
 
+if (!chr->be_open) {
+return true;
+}
+
 return !src->scd->blocked;
 }
 
 static gboolean spice_char_source_check(GSource *source)
 {
 SpiceCharSource *src = (SpiceCharSource *)source;
+Chardev *chr = CHARDEV(src->scd);
+
+if (!chr->be_open) {
+return true;
+}
 
 return !src->scd->blocked;
 }
@@ -164,9 +174,12 @@ static gboolean spice_char_source_check(GSource *source)
 static gboolean spice_char_source_dispatch(GSource *source,
 GSourceFunc callback, gpointer user_data)
 {
+SpiceCharSource *src = (SpiceCharSource *)source;
+Chardev *chr = CHARDEV(src->scd);
 GIOFunc func = (GIOFunc)callback;
+GIOCondition cond = chr->be_open ? G_IO_OUT : G_IO_HUP;
 
-return func(NULL, G_IO_OUT, user_data);
+return func(NULL, cond, user_data);
 }
 
 static GSourceFuncs SpiceCharSourceFuncs = {
-- 
2.9.3

[Qemu-devel] [PULL 02/15] spice: set device address and device display ID in QXL interface

2019-02-21 Thread Gerd Hoffmann

From: Lukáš Hrázký 

Calls the new SPICE QXL interface function spice_qxl_set_device_info to
set the hardware address of the graphics device represented by the QXL
interface (e.g. a PCI path) and the device display IDs (the IDs of the
device's monitors that belong to this QXL interface).

Also stops using the deprecated spice_qxl_set_max_monitors, the new
interface function replaces it.

Signed-off-by: Lukáš Hrázký 
Message-Id: <20190215150919.8263-1-lhra...@redhat.com>
Signed-off-by: Gerd Hoffmann 
---
 include/ui/spice-display.h |  4 
 hw/display/qxl.c   | 14 -
 ui/spice-core.c| 51 ++
 ui/spice-display.c | 11 ++
 4 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/include/ui/spice-display.h b/include/ui/spice-display.h
index 87a84a59d4e0..53c3612c3202 100644
--- a/include/ui/spice-display.h
+++ b/include/ui/spice-display.h
@@ -179,3 +179,7 @@ void qemu_spice_wakeup(SimpleSpiceDisplay *ssd);
 void qemu_spice_display_start(void);
 void qemu_spice_display_stop(void);
 int qemu_spice_display_is_running(SimpleSpiceDisplay *ssd);
+
+bool qemu_spice_fill_device_address(QemuConsole *con,
+char *device_address,
+size_t size);
diff --git a/hw/display/qxl.c b/hw/display/qxl.c
index da8fd5a40a14..c8ce5781e037 100644
--- a/hw/display/qxl.c
+++ b/hw/display/qxl.c
@@ -276,7 +276,8 @@ static void qxl_spice_monitors_config_async(PCIQXLDevice 
*qxl, int replay)
 QXL_COOKIE_TYPE_POST_LOAD_MONITORS_CONFIG,
 0));
 } else {
-#if SPICE_SERVER_VERSION >= 0x000c06 /* release 0.12.6 */
+/* >= release 0.12.6, < release 0.14.2 */
+#if SPICE_SERVER_VERSION >= 0x000c06 && SPICE_SERVER_VERSION < 0x000e02
 if (qxl->max_outputs) {
 spice_qxl_set_max_monitors(>ssd.qxl, qxl->max_outputs);
 }
@@ -2188,6 +2189,17 @@ static void qxl_realize_common(PCIQXLDevice *qxl, Error 
**errp)
SPICE_INTERFACE_QXL_MAJOR, SPICE_INTERFACE_QXL_MINOR);
 return;
 }
+
+#if SPICE_SERVER_VERSION >= 0x000e02 /* release 0.14.2 */
+char device_address[256] = "";
+if (qemu_spice_fill_device_address(qxl->vga.con, device_address, 256)) {
+spice_qxl_set_device_info(>ssd.qxl,
+  device_address,
+  0,
+  qxl->max_outputs);
+}
+#endif
+
 qemu_add_vm_change_state_handler(qxl_vm_change_state_handler, qxl);
 
 qxl->update_irq = qemu_bh_new(qxl_update_irq_bh, qxl);
diff --git a/ui/spice-core.c b/ui/spice-core.c
index a40fb2c00dab..37fae3c42405 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -34,6 +34,7 @@
 #include "qemu/option.h"
 #include "migration/misc.h"
 #include "hw/hw.h"
+#include "hw/pci/pci_bus.h"
 #include "ui/spice-display.h"
 
 /* core bits */
@@ -863,6 +864,56 @@ bool qemu_spice_have_display_interface(QemuConsole *con)
 return false;
 }
 
+/*
+ * Recursively (in reverse order) appends addresses of PCI devices as it moves
+ * up in the PCI hierarchy.
+ *
+ * @returns true on success, false when the buffer wasn't large enough
+ */
+static bool append_pci_address(char *buf, size_t buf_size, const PCIDevice 
*pci)
+{
+PCIBus *bus = pci_get_bus(pci);
+/*
+ * equivalent to if (!pci_bus_is_root(bus)), but the function is not built
+ * with PCI_CONFIG=n, avoid using an #ifdef by checking directly
+ */
+if (bus->parent_dev != NULL) {
+append_pci_address(buf, buf_size, bus->parent_dev);
+}
+
+size_t len = strlen(buf);
+ssize_t written = snprintf(buf + len, buf_size - len, "/%02x.%x",
+PCI_SLOT(pci->devfn), PCI_FUNC(pci->devfn));
+
+return written > 0 && written < buf_size - len;
+}
+
+bool qemu_spice_fill_device_address(QemuConsole *con,
+char *device_address,
+size_t size)
+{
+DeviceState *dev = DEVICE(object_property_get_link(OBJECT(con),
+   "device",
+   _abort));
+PCIDevice *pci = (PCIDevice *) object_dynamic_cast(OBJECT(dev),
+   TYPE_PCI_DEVICE);
+
+if (pci == NULL) {
+warn_report("Setting device address of a display device to SPICE: "
+"Not a PCI device.");
+return false;
+}
+
+strncpy(device_address, "pci/", size);
+if (!append_pci_address(device_address, size, pci)) {
+warn_report("Setting device address of a display device to SPICE: "
+"Too many PCI devices in the chain.");
+return false;
+}
+
+return true;
+}
+
 int qemu_spice_add_display_interface(QXLInstance *qxlin, QemuConsole *con)
 {
 if (g_slist_find(spice_consoles, con)) {
diff --git a/ui/spice-display.c

[Qemu-devel] [PULL 12/15] build-sys: add gio-2.0 check

2019-02-21 Thread Gerd Hoffmann

From: Marc-André Lureau 

GIO is required for the "-display spice-app" backend.

Signed-off-by: Marc-André Lureau 
Tested-by: Victor Toso 
Message-id: 20190221110703.5775-9-marcandre.lur...@redhat.com
Signed-off-by: Gerd Hoffmann 
---
 configure | 13 +
 1 file changed, 13 insertions(+)

diff --git a/configure b/configure
index a61682c3c727..05d72f1c565b 100755
--- a/configure
+++ b/configure
@@ -3503,6 +3503,14 @@ for i in $glib_modules; do
 fi
 done
 
+if $pkg_config --atleast-version=$glib_req_ver gio-2.0; then
+gio=yes
+gio_cflags=$($pkg_config --cflags gio-2.0)
+gio_libs=$($pkg_config --libs gio-2.0)
+else
+gio=no
+fi
+
 # Sanity check that the current size_t matches the
 # size that glib thinks it should be. This catches
 # problems on multi-arch where people try to build
@@ -6520,6 +6528,11 @@ if test "$gtk" = "yes" ; then
 echo "CONFIG_GTK_GL=y" >> $config_host_mak
   fi
 fi
+if test "$gio" = "yes" ; then
+echo "CONFIG_GIO=y" >> $config_host_mak
+echo "GIO_CFLAGS=$gio_cflags" >> $config_host_mak
+echo "GIO_LIBS=$gio_libs" >> $config_host_mak
+fi
 echo "CONFIG_TLS_PRIORITY=\"$tls_priority\"" >> $config_host_mak
 if test "$gnutls" = "yes" ; then
   echo "CONFIG_GNUTLS=y" >> $config_host_mak
-- 
2.9.3

[Qemu-devel] [PULL 07/15] spice: avoid spice runtime assert

2019-02-21 Thread Gerd Hoffmann

From: Marc-André Lureau 

The Spice server doesn't like to be started or stopped twice . It
aborts with:

(process:6191): Spice-ERROR **: 19:29:35.912: 
red-worker.c:623:handle_dev_start: assertion `!worker->running' failed

It's easy to avoid that situation since qemu spice_display_is_running
tracks the server state.

After the commit "spice: do not stop spice if VM is paused", it will
be possible to pause and resume the VM, and this will call
qemu_spice_display_start() twice. The easiest is to add a check for
spice_display_is_running with this patch to avoid the assert.

Signed-off-by: Marc-André Lureau 
Tested-by: Victor Toso 
Message-id: 20190221110703.5775-4-marcandre.lur...@redhat.com
Signed-off-by: Gerd Hoffmann 
---
 ui/spice-core.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/ui/spice-core.c b/ui/spice-core.c
index 37fae3c42405..784fddff7d45 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -972,12 +972,20 @@ int qemu_spice_display_add_client(int csock, int 
skipauth, int tls)
 
 void qemu_spice_display_start(void)
 {
+if (spice_display_is_running) {
+return;
+}
+
 spice_display_is_running = true;
 spice_server_vm_start(spice_server);
 }
 
 void qemu_spice_display_stop(void)
 {
+if (!spice_display_is_running) {
+return;
+}
+
 spice_server_vm_stop(spice_server);
 spice_display_is_running = false;
 }
-- 
2.9.3

[Qemu-devel] [PULL 09/15] spice: do not stop spice if VM is paused

2019-02-21 Thread Gerd Hoffmann

From: Marc-André Lureau 

spice_server_vm_start/stop() was added to help migration state (commit
f5bb039c6d97ef3e664094eab3c9a4dc1824ed73).

However, a paused VM could keep running the spice server. This will
allow a Spice client to keep sending commands to a spice chardev. This
allows to stop/cont a VM from a Spice monitor port. Character
devices (vdagent/usb/smartcard/..) should not read from Spice when the
VM is paused.

Signed-off-by: Marc-André Lureau 
Tested-by: Victor Toso 
Message-id: 20190221110703.5775-6-marcandre.lur...@redhat.com
Signed-off-by: Gerd Hoffmann 
---
 ui/spice-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui/spice-core.c b/ui/spice-core.c
index 1cc996027719..4d384974ca34 100644
--- a/ui/spice-core.c
+++ b/ui/spice-core.c
@@ -628,7 +628,7 @@ static void vm_change_state_handler(void *opaque, int 
running,
 {
 if (running) {
 qemu_spice_display_start();
-} else {
+} else if (state != RUN_STATE_PAUSED) {
 qemu_spice_display_stop();
 }
 }
-- 
2.9.3

[Qemu-devel] [PULL 00/15] Ui 20190222 patches

2019-02-21 Thread Gerd Hoffmann

The following changes since commit 2e68b8620637a4ee8c79b5724144b726af1e261b:

  Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-4.0-20190219' into 
staging (2019-02-18 16:20:13 +)

are available in the git repository at:

  git://git.kraxel.org/qemu tags/ui-20190222-pull-request

for you to fetch changes up to d8aec9d9f129e6879ae0669623981892deff86f2:

  display: add -display spice-app launching a Spice client (2019-02-22 07:42:59 
+0100)


ui: add support for -display spice-app
ui: gtk+sdl bugfixes.



Gerd Hoffmann (2):
  kbd-state: don't block auto-repeat events
  sdl2: drop qemu_input_event_send_key_qcode call

Lukáš Hrázký (1):
  spice: set device address and device display ID in QXL interface

Marc-André Lureau (11):
  char/spice: trigger HUP event
  char/spice: discard write() if backend is disconnected
  spice: avoid spice runtime assert
  spice: merge options lists
  spice: do not stop spice if VM is paused
  char: move SpiceChardev and open_spice_port() to spice.h header
  char: register spice ports after spice started
  build-sys: add gio-2.0 check
  qapi: document DisplayType enum
  spice: use a default name for the server
  display: add -display spice-app launching a Spice client

Thomas Huth (1):
  ui/gtk: Fix the license information

 configure  |  13 +++
 include/chardev/spice.h|  27 ++
 include/ui/spice-display.h |   4 +
 chardev/spice.c|  60 --
 hw/display/qxl.c   |  14 +++-
 ui/gtk.c   |  30 +++
 ui/kbd-state.c |  16 ++--
 ui/sdl2-input.c|   3 -
 ui/spice-app.c | 202 +
 ui/spice-core.c|  64 +-
 ui/spice-display.c |  11 +++
 chardev/trace-events   |   1 +
 qapi/ui.json   |  32 ++-
 qemu-options.hx|   5 ++
 ui/Makefile.objs   |   5 ++
 15 files changed, 433 insertions(+), 54 deletions(-)
 create mode 100644 include/chardev/spice.h
 create mode 100644 ui/spice-app.c

-- 
2.9.3

[Qemu-devel] [PULL 13/15] qapi: document DisplayType enum

2019-02-21 Thread Gerd Hoffmann

From: Marc-André Lureau 

Signed-off-by: Marc-André Lureau 
Tested-by: Victor Toso 
Message-id: 20190221110703.5775-10-marcandre.lur...@redhat.com
Signed-off-by: Gerd Hoffmann 
---
 qapi/ui.json | 25 +
 1 file changed, 25 insertions(+)

diff --git a/qapi/ui.json b/qapi/ui.json
index 7d9c4bddaf12..7702ddf583a1 100644
--- a/qapi/ui.json
+++ b/qapi/ui.json
@@ -1085,6 +1085,31 @@
 #
 # Display (user interface) type.
 #
+# @default: The default user interface, selecting from the first available
+#   of gtk, sdl, cocoa, and vnc.
+#
+# @none: No user interface or video output display. The guest will
+#still see an emulated graphics card, but its output will not
+#be displayed to the QEMU user.
+#
+# @gtk: The GTK user interface.
+#
+# @sdl: The SDL user interface.
+#
+# @egl-headless: No user interface, offload GL operations to a local
+#DRI device. Graphical display need to be paired with
+#VNC or Spice. (Since 3.1)
+#
+# @curses: Display video output via curses.  For graphics device
+#  models which support a text mode, QEMU can display this
+#  output using a curses/ncurses interface. Nothing is
+#  displayed when the graphics device is in graphical mode or
+#  if the graphics device does not support a text
+#  mode. Generally only the VGA device models support text
+#  mode.
+#
+# @cocoa: The Cocoa user interface.
+#
 # Since: 2.12
 #
 ##
-- 
2.9.3

Re: [Qemu-devel] Testing sysbus devices

2019-02-21 Thread Markus Armbruster

Stephen Checkoway  writes:

>> On Feb 20, 2019, at 03:55, Laszlo Ersek  wrote:
>> 
>> I would strongly prefer if the guest-side view wouldn't change at all.
>
> It sounds like sector protection isn't something you want and it's not

László is content with the status quo, but I'm not.

> something I currently need so unless that changes, I probably won't do
> anything with it.

Pity.

> My goal is merely to implement some missing flash functionality that I
> need to emulate some hardware that I have. My plan for doing this is
> to not change any defaults (except for a few bug fixes) while doing
> so. I'm happy for the qemu community to take as much or as little as
> it finds useful.

Understand.

> I'll send a patch series for review in the normal fashion, but if
> anyone wants to see my in-progress work, including tests, the diff is
> available here
> .
>
> For my own edification, I'm curious how you're currently dealing with
> some regions of flash that are protected. I believe Markus mentioned
> using multiple flash devices. Are you overlapping the address ranges?

UEFI wants to store some persistent state in flash memory.  Real PCs
have a single flash chip with a suitable part configured to be writable
for firmware.

Since our flash device models can't do that (yet?), we worked around the
missing functionality by exposing two separate flash chips to guests,
one read-only, one writable for firmware.  The two are adjacent, no gap,
with the boundary aligned to 4KiB (page size).

Our track record for doing whatever real hardware does has been okay.
The track record for our own good-enough inventions less so.  I'm not
claiming this one is about to explode into our faces.  Still, I'd like
to clean it up if practical.  If not for PCs (say because complications
for OVMF render that less than practical), then at least for other, less
encumbered machines.

Would be nice if you could pitch in a bit.

Way, way more than you ever wanted to know on configuring flash for PCs:

Subject: Configuring pflash devices for OVMF firmware
Message-ID: <87y378n5iy@dusky.pond.sub.org>
https://lists.nongnu.org/archive/html/qemu-devel/2019-01/msg06606.html

> The current pflash_cfi02.c code assumes, but doesn't check that both
> the total size of the chip as well as the size of each sector is a
> power of two. If you wanted say 7 MB of read/write flash and 1 MB of
> read-only flash, qemu might be willing to create a device with say 7
> MB of storage, but it will definitely misbehave. (I added a check for
> that here
> .)

Awesome.  The magic setup code in hw/i386/pc_sysfw.c will happily create
any size that's a multiple of 4KiB.  The current sizes are 128KiB
writable (power of two, good) and 2MiB - 128KiB for read-only (very much
not a power of two, possibly bad).

Can you tell us a bit more about what exactly can go wrong?

Re: [Qemu-devel] [Bug 1817239] [NEW] add '--targets' option to qemu-binfmt-conf.sh

2019-02-21 Thread Laurent Vivier

On 22/02/2019 04:31, Launchpad Bug Tracker wrote:
> I'd like to ask for the addition of option '--targets' to scripts/qemu-
> binfmt-conf.sh, in order to allow registering the interpreters for the
> given list of architectures only, instead of using all of the ones
> defined in qemu_target_list. The following is a possible patch that
> implements it:
> 
>   qemu-binfmt-conf.sh | 9 -
>   1 file changed, 8 insertions(+), 1 deletion(-)

Please send your patch to the qemu-devel mailing list.

I think it's a good idea but we should homogenize "--debian" and 
"--systemd": remove the parameter from  "--systemd" to generate by 
default all the targets, and allow the user to provide a subset of the 
targets using the newly introduced "--target" parameter.

Thanks,
Laurent

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1817239

Title:
  add '--targets' option to qemu-binfmt-conf.sh

Status in QEMU:
  New

Bug description:
  I'd like to ask for the addition of option '--targets' to scripts
  /qemu-binfmt-conf.sh, in order to allow registering the interpreters
  for the given list of architectures only, instead of using all of the
  ones defined in qemu_target_list. The following is a possible patch
  that implements it:

   qemu-binfmt-conf.sh | 9 -
   1 file changed, 8 insertions(+), 1 deletion(-)

  diff --git a/qemu-binfmt-conf.sh b/qemu-binfmt-conf.sh
  index b5a1674..be4a19b 100644
  --- a/qemu-binfmt-conf.sh
  +++ b/qemu-binfmt-conf.sh
  @@ -170,6 +170,7 @@ usage() {
   Usage: qemu-binfmt-conf.sh [--qemu-path PATH][--debian][--systemd CPU]
  [--help][--credential yes|no][--exportdir PATH]
  [--persistent yes|no][--qemu-suffix SUFFIX]
  +   [--targets TARGETS]

  Configure binfmt_misc to use qemu interpreter

  @@ -189,6 +190,8 @@ Usage: qemu-binfmt-conf.sh [--qemu-path 
PATH][--debian][--systemd CPU]
  --persistent:  if yes, the interpreter is loaded when binfmt is
 configured and remains in memory. All future uses
 are cloned from the open file.
  +   --targets: comma-separated list of targets. If provided, only
  +  the targets in the list are registered.

   To import templates with update-binfmts, use :

  @@ -324,7 +327,7 @@ CREDENTIAL=no
   PERSISTENT=no
   QEMU_SUFFIX=""

  -options=$(getopt -o ds:Q:S:e:hc:p: -l 
debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent: 
-- "$@")
  +options=$(getopt -o ds:Q:S:e:hc:p:t: -l 
debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent:,targets:
 -- "$@")
   eval set -- "$options"

   while true ; do
  @@ -380,6 +383,10 @@ while true ; do
   shift
   PERSISTENT="$1"
   ;;
  +-t|--targets)
  +shift
  +qemu_target_list="$(echo "$1" | tr ',' ' ')"
  +;;
   *)
   break
   ;;
  --
  2.20.1

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1817239/+subscriptions

Re: [Qemu-devel] [PATCH v7 03/17] hw/arm/virt: Split the memory map description

2019-02-21 Thread Heyi Guo


Hi Eric,

Can't we still use one single memory map and update the base of every entry 
following VIRT_MEM? So that we don't need to split memory map or the 
enumeration definition, neither do we need to copy a15memmap into the extended 
memmap.

Thanks,

Heyi


On 2019/2/21 6:39, Eric Auger wrote:

In the prospect to introduce an extended memory map supporting more
RAM, let's split the memory map array into two parts:

- the former a15memmap contains regions below and including the RAM
- extended_memmap, only initialized with entries located after the RAM.
   Only the size of the region is initialized there since their base
   address will be dynamically computed, depending on the top of the
   RAM (initial RAM at the moment), with same alignment as their size.

This new split will allow to grow the RAM size without changing the
description of the high regions.

The patch also moves the memory map setup into machvirt_init().
The rationale is the memory map will be soon affected by the
kvm_type() call that happens after virt_instance_init() and
before machvirt_init().

The memory map is unchanged (the top of the initial RAM still is
256GiB). Then come the high IO regions with same layout as before.

Signed-off-by: Eric Auger 
Reviewed-by: Peter Maydell 

---
v6 -> v7:
- s/a15memmap/base_memmap
- slight rewording of the commit message
- add "if there is less than 256GiB of RAM then the floating area
   starts at the 256GiB mark" in the comment associated to the floating
   memory map
- Added Peter's R-b

v5 -> v6
- removal of many macros in units.h
- introduce the virt_set_memmap helper
- new computation for offsets of high IO regions
- add comments
---
  hw/arm/virt.c | 48 +--
  include/hw/arm/virt.h | 14 +
  2 files changed, 52 insertions(+), 10 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a1955e7764..12039a0367 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -29,6 +29,7 @@
   */
  
  #include "qemu/osdep.h"

+#include "qemu/units.h"
  #include "qapi/error.h"
  #include "hw/sysbus.h"
  #include "hw/arm/arm.h"
@@ -121,7 +122,7 @@
   * Note that devices should generally be placed at multiples of 0x1,
   * to accommodate guests using 64K pages.
   */
-static const MemMapEntry a15memmap[] = {
+static const MemMapEntry base_memmap[] = {
  /* Space up to 0x800 is reserved for a boot ROM */
  [VIRT_FLASH] =  {  0, 0x0800 },
  [VIRT_CPUPERIPHS] = { 0x0800, 0x0002 },
@@ -149,11 +150,21 @@ static const MemMapEntry a15memmap[] = {
  [VIRT_PCIE_PIO] =   { 0x3eff, 0x0001 },
  [VIRT_PCIE_ECAM] =  { 0x3f00, 0x0100 },
  [VIRT_MEM] ={ 0x4000, RAMLIMIT_BYTES },
+};
+
+/*
+ * Highmem IO Regions: This memory map is floating, located after the RAM.
+ * Each IO region offset will be dynamically computed, depending on the
+ * top of the RAM, so that its base get the same alignment as the size,
+ * ie. a 512GiB region will be aligned on a 512GiB boundary. If there is
+ * less than 256GiB of RAM, the floating area starts at the 256GiB mark.
+ */
+static MemMapEntry extended_memmap[] = {
  /* Additional 64 MB redist region (can contain up to 512 redistributors) 
*/
-[VIRT_HIGH_GIC_REDIST2] =   { 0x40ULL, 0x400 },
-[VIRT_HIGH_PCIE_ECAM] = { 0x401000ULL, 0x1000 },
-/* Second PCIe window, 512GB wide at the 512GB boundary */
-[VIRT_HIGH_PCIE_MMIO] = { 0x80ULL, 0x80ULL },
+[VIRT_HIGH_GIC_REDIST2] =   { 0x0, 64 * MiB },
+[VIRT_HIGH_PCIE_ECAM] = { 0x0, 256 * MiB },
+/* Second PCIe window */
+[VIRT_HIGH_PCIE_MMIO] = { 0x0, 512 * GiB },
  };
  
  static const int a15irqmap[] = {

@@ -1354,6 +1365,30 @@ static uint64_t virt_cpu_mp_affinity(VirtMachineState 
*vms, int idx)
  return arm_cpu_mp_affinity(idx, clustersz);
  }
  
+static void virt_set_memmap(VirtMachineState *vms)

+{
+hwaddr base;
+int i;
+
+vms->memmap = extended_memmap;
+
+for (i = 0; i < ARRAY_SIZE(base_memmap); i++) {
+vms->memmap[i] = base_memmap[i];
+}
+
+vms->high_io_base = 256 * GiB; /* Top of the legacy initial RAM region */
+base = vms->high_io_base;
+
+for (i = VIRT_LOWMEMMAP_LAST; i < ARRAY_SIZE(extended_memmap); i++) {
+hwaddr size = extended_memmap[i].size;
+
+base = ROUND_UP(base, size);
+vms->memmap[i].base = base;
+vms->memmap[i].size = size;
+base += size;
+}
+}
+
  static void machvirt_init(MachineState *machine)
  {
  VirtMachineState *vms = VIRT_MACHINE(machine);
@@ -1368,6 +1403,8 @@ static void machvirt_init(MachineState *machine)
  bool firmware_loaded = bios_name || drive_get(IF_PFLASH, 0, 0);
  bool aarch64 = true;
  
+virt_set_memmap(vms);

+
  /* We can probe only here because during property set
   * KVM is not available yet
   */
@@ -1843,7 +1880,6

Re: [Qemu-devel] [PATCH 05/10] ppc405_boards: Don't size flash memory to match backing image

2019-02-21 Thread Markus Armbruster

David Gibson  writes:

> On Thu, Feb 21, 2019 at 05:31:30PM +0100, Markus Armbruster wrote:
>> Alex Bennée  writes:
>> 
>> > Markus Armbruster  writes:
>> >
>> >> Machine "ref405ep" maps its flash memory at address 2^32 - image size.
>> >> Image size is rounded up to the next multiple of 64KiB.  Useless,
>> >> because pflash_cfi02_realize() fails with "failed to read the initial
>> >> flash content" unless the rounding is a no-op.
>> >>
>> >> If the image size exceeds 0x8 Bytes, we overlap first SRAM, then
>> >> other stuff.  No idea how that would play out, but a useful outcomes
>> >> seem unlikely.
>> >>
>> >> Map the flash memory at fixed address 0xFFF8 with size 512KiB,
>> >> regardless of image size, to match the physical hardware.
>> >>
>> >> Machine "taihu" maps its boot flash memory similarly.  The code even
>> >> has a comment /* XXX: should check that size is 2MB */, followed by
>> >> disabled code to adjust the size to 2MiB regardless of image size.
>> >>
>> >> Its code to map its application flash memory looks the same, except
>> >> there the XXX comment asks for 32MiB, and the code to adjust the size
>> >> isn't disabled.  Note that pflash_cfi02_realize() fails with "failed
>> >> to read the initial flash content" for images smaller than 32MiB.
>> >>
>> >> Map the boot flash memory at fixed address 0xFFE0 with size 2MiB,
>> >> to match the physical hardware.  Delete dead code from application
>> >> flash mapping, and simplify some.
>> >>
>> >> Cc: David Gibson 
>> >> Signed-off-by: Markus Armbruster 
>> >> ---
>> >>  hw/ppc/ppc405_boards.c | 53 +-
>> >>  1 file changed, 16 insertions(+), 37 deletions(-)
>> >>
>> >> diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
>> >> index f47b15f10e..728154aebb 100644
>> >> --- a/hw/ppc/ppc405_boards.c
>> >> +++ b/hw/ppc/ppc405_boards.c
>> >> @@ -158,7 +158,7 @@ static void ref405ep_init(MachineState *machine)
>> >>  target_ulong kernel_base, initrd_base;
>> >>  long kernel_size, initrd_size;
>> >>  int linux_boot;
>> >> -int fl_idx, fl_sectors, len;
>> >> +int len;
>> >>  DriveInfo *dinfo;
>> >>  MemoryRegion *sysmem = get_system_memory();
>> >>
>> >> @@ -185,26 +185,19 @@ static void ref405ep_init(MachineState *machine)
>> >>  #ifdef DEBUG_BOARD_INIT
>> >>  printf("%s: register BIOS\n", __func__);
>> >>  #endif
>> >> -fl_idx = 0;
>> >>  #ifdef USE_FLASH_BIOS
>> >> -dinfo = drive_get(IF_PFLASH, 0, fl_idx);
>> >> +dinfo = drive_get(IF_PFLASH, 0, 0);
>> >>  if (dinfo) {
>> >> -BlockBackend *blk = blk_by_legacy_dinfo(dinfo);
>> >> -
>> >> -bios_size = blk_getlength(blk);
>> >> -fl_sectors = (bios_size + 65535) >> 16;
>> >>  #ifdef DEBUG_BOARD_INIT
>> >> -printf("Register parallel flash %d size %lx"
>> >> -   " at addr %lx '%s' %d\n",
>> >> -   fl_idx, bios_size, -bios_size,
>> >> -   blk_name(blk), fl_sectors);
>> >> +printf("Register parallel flash\n");
>> >>  #endif
>> >> -pflash_cfi02_register((uint32_t)(-bios_size),
>> >> +bios_size = 0x8;
>> >
>> >  bios_size = 8 * MiB?
>> 
>> The next line has base address 0xFFF8.  I picked 0x8 to make
>> 0xFFF8 + 0x8 == 0 mod 2^32 more obvious.
>> 
>> If I change 0x8 to 8 * MiB, the size is more obvious, but "at end of
>> 32 bit address space" less so.
>> 
>> If I additionally change the base address back to ((uint32_t)-bios_size,
>> "at end of 32 bit address space" is obvious again, but the actual base
>> address less so.
>
> I have a weak preference for ((uint32_t)-bios_size), with bios_size =
> 8 * MiB.
>
>> 
>> I don't really care myself.  David, you're the maintainer, do you have a
>> preference?
>> 
>> >> +pflash_cfi02_register(0xFFF8,
>> >>NULL, "ef405ep.bios", bios_size,
>> >> -  blk, 65536, fl_sectors, 1,
>> >> +  dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
>> >> +  65536, bios_size / 65536, 1,
>> >
>> > 64 * KiB?
>> 
>> David, same question (two additional instances below).
>
> Here I think 64 * KiB would be nice in each of those places.  Again,
> only a weak preference.

Your weak preference is enough to tip my scales.  Thanks!

Re: [Qemu-devel] [PATCH 02/10] pflash: Macro PFLASH_BUG() is used just once, expand

2019-02-21 Thread Markus Armbruster

Philippe Mathieu-Daudé  writes:

> On 2/21/19 10:38 AM, Peter Maydell wrote:
>> On Thu, 21 Feb 2019 at 09:22, Markus Armbruster  wrote:
>>> Double-checking... you want me to keep goto reset_flash, like this:
>>>
>>> @@ -623,8 +617,8 @@ static void pflash_write(PFlashCFI01 *pfl, hwaddr 
>>> offset,
>>>  pfl->wcycle = 0;
>>>  pfl->status |= 0x80;
>>>  } else {
>>> -DPRINTF("%s: unknown command for \"write block\"\n", 
>>> __func__);
>>> -PFLASH_BUG("Write block confirm");
>>> +qemu_log_mask(LOG_GUEST_ERROR,
>>> +  "unknown command for \"write block\"\n");
>>>  goto reset_flash;
>>>  }
>>>  break;
>> 
>> Yes. (We seem to handle most kinds of guest errors in programming
>> the flash by reset_flash.)
>
> Oh I missed the context of the patch here.
>
> So for the case of the Multi-WRITE command (0xe8):

Since I'm a clueless idiot on pflash, I need to process your argument
real slow, so I can write a commit message that doesn't document my
cluelessness forever.

We have a little state machine, and its state is encoded in
pfl->wcycle. pfl->cmd, pfl->counter.  I'm going to show it as

(value of pfl->wcycle, value of pfl->cmd, value of pfl->counter)

for brevity.

We start with (0, don't care, don't care).

A guest write sends us a width, an address, and a value.
pflash_mem_write_with_attrs() does permission checking, and
pflash_write() the actual work.  We enter it with @offset, @value and
@width holding the message.

cmd = value;

trace_pflash_write(offset, value, width, pfl->wcycle);
if (!pfl->wcycle) {
/* Set the device in I/O access mode */
memory_region_rom_device_set_romd(>mem, false);
}

@cmd is @value truncated to 8 bits.

> 1/ On first write cycle we have
>
>   - address = flash_page_address (we store it in pfl->counter)
>   - data = flash_command (0xe8: enter Multi-WRITE)

switch (pfl->wcycle) {
case 0:
/* read mode */
switch (cmd) {
[...]
case 0xe8: /* Write to buffer */
DPRINTF("%s: Write to buffer\n", __func__);
pfl->status |= 0x80; /* Ready! */
break;
[...]
pfl->wcycle++;
pfl->cmd = cmd;
break;

Transition from (0, don't care, don't care) to (1, 0xE8, don't care).

I can't see "we store it in pfl->counter".

Note that the address (passed in @offset) is entirely ignored.

> 2/ Second cycle:
>
>   - address = flash_page_address
> We should check it matches flash_page_address
> of cycle 1/, but we don't.
>   - data: N
>
> "N is the number of elements (bytes / words / double words),
> minus one, to be written to the write buffer. Expected count
> ranges are N = 00h to N = 7Fh (e.g., 1 to 128 bytes) in 8-bit
> mode, N = 00h to N = 003Fh in 16-bit mode, and N = 00h to
> N = 1Fh in 32-bit mode. Bus cycles 3 and higher are for writing
> data into the write buffer. The confirm command (D0h) is
> expected after exactly N + 1 write cycles; any other command at
> that point in the sequence will prevent the transfer of the
> buffer to the array (the write will be aborted)."
>
> Instead of starting to write the data in a buffer, we write it
> directly to the block backend.

case 1:
switch (pfl->cmd) {
[...]
case 0xe8:
/* Mask writeblock size based on device width, or bank width if
 * device width not specified.
 */
if (pfl->device_width) {
value = extract32(value, 0, pfl->device_width * 8);
} else {
value = extract32(value, 0, pfl->bank_width * 8);
}
DPRINTF("%s: block write of %x bytes\n", __func__, value);
pfl->counter = value;
pfl->wcycle++;
break;
[...]
}
break;

Transition from (1, 0xE8, don't care) to (2, 0xE8, N), where N is passed
in @value.

Again, the address (passed in @offset) is ignored.

Nothing is written to the block backend, yet.

> Instead of starting to write from cycle 3+, we write now in 2,
> and keep cycle count == 2 (pfl->wcycle) until all data is
> written, where we increment at 3.

case 2:
switch (pfl->cmd) {
case 0xe8: /* Block write */
if (!pfl->ro) {
pflash_data_write(pfl, offset, value, width, be);
} else {
pfl->status |= 0x10; /* Programming error */
}

Write to memory, with pflash_data_write(), but don't flush to the
backend, yet.  This is (guest-visibly!) wrong.  It's not quite "instead
of starting to write the data in a buffer, we write it directly to the
block backend."

Note that we happily accept any address and width.  I suspect we should
only accept consecutive addresses and consistent width.

pfl->status |= 0x80;

Re: [Qemu-devel] [PATCH v6 1/7] vhost-user: Support transferring inflight buffer between qemu and backend

2019-02-21 Thread Yongji Xie

On Fri, 22 Feb 2019 at 14:21, Michael S. Tsirkin  wrote:
>
> On Fri, Feb 22, 2019 at 10:47:03AM +0800, Yongji Xie wrote:
> > > > +
> > > > +To track inflight I/O, the queue region should be processed as follows:
> > > > +
> > > > +When receiving available buffers from the driver:
> > > > +
> > > > +1. Get the next available head-descriptor index from available 
> > > > ring, i
> > > > +
> > > > +2. Set desc[i].inflight to 1
> > > > +
> > > > +When supplying used buffers to the driver:
> > > > +
> > > > +1. Get corresponding used head-descriptor index, i
> > > > +
> > > > +2. Set desc[i].next to process_head
> > > > +
> > > > +3. Set process_head to i
> > > > +
> > > > +4. Steps 1,2,3 may be performed repeatedly if batching is possible
> > > > +
> > > > +5. Increase the idx value of used ring by the size of the batch
> > > > +
> > > > +6. Set the inflight field of each DescStateSplit entry in the 
> > > > batch to 0
> > > > +
> > > > +7. Set used_idx to the idx value of used ring
> > > > +
> > > > +When reconnecting:
> > > > +
> > > > +1. If the value of used_idx does not match the idx value of used 
> > > > ring,
> > > > +
> > > > +(a) Subtract the value of used_idx from the idx value of used 
> > > > ring to get
> > > > +the number of in-progress DescStateSplit entries
> > > > +
> > > > +(b) Set the inflight field of the in-progress DescStateSplit 
> > > > entries which
> > > > +start from process_head to 0
> > > > +
> > > > +(c) Set used_idx to the idx value of used ring
> > > > +
> > > > +2. Resubmit each inflight DescStateSplit entry
> > >
> > > I re-read a couple of time and I still don't understand what it says.
> > >
> > > For simplicity consider split ring. So we want a list of heads that are
> > > outstanding. Fair enough. Now device finishes a head. What now? I needs
> > > to drop head from the list. But list is unidirectional (just next, no
> > > prev). So how can you drop an entry from the middle?
> > >
> >
> > The process_head is only used when slave crash between increasing the
> > idx value of used ring and updating used_idx. We use it to find the
> > in-progress DescStateSplit entries before the crash and complete them
> > when reconnecting. Make sure guest and slave have the same view for
> > inflight I/Os.
> >
>
> But I don't understand how does the described process help do it?
>

For example, we need to submit descriptors A, B, C to driver in a batch.

Firstly, we will link those descriptors like:

process_head->A->B->C(A)

Then, we need to update idx value of used vring to mark those
descriptors as used:

_vring.used->idx += 3(B)

At last, clear the inflight field of those descriptors and update
used_idx field:

A.inflight = 0; B.inflight = 0; C.inflight = 0;(C)

used_idx = _vring.used->idx;(D)

After (B), guest can consume the descriptors A,B,C. So we must make
sure the inflight field of A,B,C is cleared when reconnecting to avoid
re-submitting used descriptor. If slave crash during (C), the inflight
field of A,B,C may be incorrect. To detect that case, we can see
whether used_idx matches _vring.used->idx. And through process_head,
we can get the in-progress descriptors A,B,C and clear their inflight
field again when reconnecting.

>
> > In other case, the inflight field is enough to track inflight I/O.
> > When reconnecting, we go through all DescStateSplit entries and
> > re-submit the entry whose inflight field is equal to 1.
>
> What I don't understand is how do we know the order
> in which they have to be resubmitted. Reordering
> operations would be a big problem, won't it?
>

In previous patch, I record avail_idx for each DescStateSplit entry to
preserve the order. Is it useful to fix this?

>
> Let's say I fetch descriptors A, B, C and start
> processing. how does memory look?

A.inflight = 1, C.inflight = 1, B.inflight = 1

> Now I finished B and marked it used. How does
> memory look?
>

A.inflight = 1, C.inflight = 1, B.inflight = 0, process_head = B

> I also wonder how do you address a crash between
> marking descriptor used and clearing inflight.
> Will you redo the descriptor? Is it always safe?
> What if it's a write?
>

It's safe. We can get the in-progess descriptors through process_head
and clear their inflight field when reconnecting.

Thanks,
Yongji

Re: [Qemu-devel] [PATCH 4/4] iothread: push gcontext earlier in the thread_fn

2019-02-21 Thread Peter Xu

On Fri, Feb 22, 2019 at 07:37:02AM +0100, Marc-André Lureau wrote:
> Hi
> 
> On Fri, Feb 22, 2019 at 4:14 AM Peter Xu  wrote:
> >
> > We were pushing the context until right before running the gmainloop.
> > Now since we have everything unconditionally, we can move this
> > earlier.
> >
> > One benefit is that now it's done even before init_done_sem, so as
> > long as the iothread user calls iothread_create() and completes, we
> > know that the thread stack is ready.
> >
> 
> This will change the default context in the iothread, for code running
> there. This may not be a good idea. Until now, only sources dispatched
> from iothread_get_g_main_context() would have default context
> associated to it.
> 
> I don't know if the current behaviour is intentional, but it has some
> logic. With this change, you may create hidden races, by changing the
> default context of sources to the iothread.

Yes I agree that the behavior will be changed in this patch that even
if the iothread user does not use the gcontext they'll also have the
context set.  I would think it should be ok because IMHO events hooked
onto the aio context should not depend on the gcontext, but indeed I'd
like to get some confirmation from others, especially the block layer.

Stefan?

Thanks,

-- 
Peter Xu

Re: [Qemu-devel] [PATCH 2/4] iothread: create the gcontext onconditionally

2019-02-21 Thread Peter Xu

On Fri, Feb 22, 2019 at 07:29:09AM +0100, Marc-André Lureau wrote:

[...]

> > diff --git a/iothread.c b/iothread.c
> > index 6e297e9ef1..6fa87876e0 100644
> > --- a/iothread.c
> > +++ b/iothread.c
> > @@ -65,7 +65,7 @@ static void *iothread_run(void *opaque)
> >   * We must check the running state again in case it was
> >   * changed in previous aio_poll()
> >   */
> > -if (iothread->running && atomic_read(>worker_context)) {
> > +if (iothread->running && atomic_read(>run_gcontext)) {
> >  GMainLoop *loop;
> >
> >  g_main_context_push_thread_default(iothread->worker_context);
> > @@ -114,6 +114,8 @@ static void iothread_instance_init(Object *obj)
> >  iothread->poll_max_ns = IOTHREAD_POLL_MAX_NS_DEFAULT;
> >  iothread->thread_id = -1;
> >  qemu_sem_init(>init_done_sem, 0);
> > +/* By default, we don't run gcontext */
> > +atomic_set(>run_gcontext, 0);
> 
> I think that initialization isn't really necessary, your call.

True; it's more a hint for readers who suddenly jumped into
iothread_run.  Thanks for being kind, since I would prefer to have it
for now. ;)

[...]

> looks good otherwise,
> Reviewed-by: Marc-André Lureau 

Thanks for the quick review,

-- 
Peter Xu

Re: [Qemu-devel] [PATCH 1/4] iothread: replace init_done_cond with a semaphore

2019-02-21 Thread Peter Xu

On Fri, Feb 22, 2019 at 07:25:16AM +0100, Marc-André Lureau wrote:
> Hi
> 
> On Fri, Feb 22, 2019 at 4:14 AM Peter Xu  wrote:
> >
> > Only sending an init-done message using lock+cond seems an overkill to
> > me.  Replacing it with a simpler semaphore.
> >
> > Meanwhile, init the semaphore unconditionally, then we can destroy it
> > unconditionally too in finalize which seems cleaner.
> >
> > Signed-off-by: Peter Xu 
> 
> The lock is also protecting thread_id.

IMHO it's fine because thread_id is only changed at the beginning of
iothread_run where the caller will definitely wait for the thread_id
to be generated.  Here qemu_sem_post() should at least contain one
write memory barrier there to make sure the waker will read the
correct value after sem_wait() and then later on thread_id is never
changed.

Regards,

-- 
Peter Xu

Re: [Qemu-devel] [PATCH 4/4] iothread: push gcontext earlier in the thread_fn

2019-02-21 Thread Marc-André Lureau

Hi

On Fri, Feb 22, 2019 at 4:14 AM Peter Xu  wrote:
>
> We were pushing the context until right before running the gmainloop.
> Now since we have everything unconditionally, we can move this
> earlier.
>
> One benefit is that now it's done even before init_done_sem, so as
> long as the iothread user calls iothread_create() and completes, we
> know that the thread stack is ready.
>

This will change the default context in the iothread, for code running
there. This may not be a good idea. Until now, only sources dispatched
from iothread_get_g_main_context() would have default context
associated to it.

I don't know if the current behaviour is intentional, but it has some
logic. With this change, you may create hidden races, by changing the
default context of sources to the iothread.

> Signed-off-by: Peter Xu 
> ---
>  iothread.c | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/iothread.c b/iothread.c
> index 9abdbace66..7b7cba5d04 100644
> --- a/iothread.c
> +++ b/iothread.c
> @@ -53,7 +53,7 @@ static void *iothread_run(void *opaque)
>  IOThread *iothread = opaque;
>
>  rcu_register_thread();
> -
> +g_main_context_push_thread_default(iothread->worker_context);
>  my_iothread = iothread;
>  iothread->thread_id = qemu_get_thread_id();
>  qemu_sem_post(>init_done_sem);
> @@ -66,12 +66,11 @@ static void *iothread_run(void *opaque)
>   * changed in previous aio_poll()
>   */
>  if (iothread->running && atomic_read(>run_gcontext)) {
> -g_main_context_push_thread_default(iothread->worker_context);
>  g_main_loop_run(iothread->main_loop);
> -g_main_context_pop_thread_default(iothread->worker_context);
>  }
>  }
>
> +g_main_context_pop_thread_default(iothread->worker_context);
>  rcu_unregister_thread();
>  return NULL;
>  }
> --
> 2.17.1
>

Re: [Qemu-devel] [PATCH 3/4] iothread: create main loop unconditionally

2019-02-21 Thread Marc-André Lureau

On Fri, Feb 22, 2019 at 4:14 AM Peter Xu  wrote:
>
> Since we've have the gcontext always there, create the main loop
> altogether.  The iothread_run() is even cleaner.
>
> Signed-off-by: Peter Xu 

Reviewed-by: Marc-André Lureau 

> ---
>  iothread.c | 12 +++-
>  1 file changed, 3 insertions(+), 9 deletions(-)
>
> diff --git a/iothread.c b/iothread.c
> index 6fa87876e0..9abdbace66 100644
> --- a/iothread.c
> +++ b/iothread.c
> @@ -66,17 +66,8 @@ static void *iothread_run(void *opaque)
>   * changed in previous aio_poll()
>   */
>  if (iothread->running && atomic_read(>run_gcontext)) {
> -GMainLoop *loop;
> -
>  g_main_context_push_thread_default(iothread->worker_context);
> -iothread->main_loop =
> -g_main_loop_new(iothread->worker_context, TRUE);
> -loop = iothread->main_loop;
> -
>  g_main_loop_run(iothread->main_loop);
> -iothread->main_loop = NULL;
> -g_main_loop_unref(loop);
> -
>  g_main_context_pop_thread_default(iothread->worker_context);
>  }
>  }
> @@ -141,6 +132,8 @@ static void iothread_instance_finalize(Object *obj)
>  if (iothread->worker_context) {
>  g_main_context_unref(iothread->worker_context);
>  iothread->worker_context = NULL;
> +g_main_loop_unref(iothread->main_loop);
> +iothread->main_loop = NULL;
>  }
>  qemu_sem_destroy(>init_done_sem);
>  }
> @@ -153,6 +146,7 @@ static void iothread_init_gcontext(IOThread *iothread)
>  source = aio_get_g_source(iothread_get_aio_context(iothread));
>  g_source_attach(source, iothread->worker_context);
>  g_source_unref(source);
> +iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
>  }
>
>  static void iothread_complete(UserCreatable *obj, Error **errp)
> --
> 2.17.1
>

Re: [Qemu-devel] [PATCH 2/4] iothread: create the gcontext onconditionally

2019-02-21 Thread Marc-André Lureau

Hi

On Fri, Feb 22, 2019 at 4:14 AM Peter Xu  wrote:
>
> In existing code we create the gcontext dynamically at the first
> access of the gcontext from caller.  That can bring some complexity
> and potential races during using iothread.  Since the context itself
> is not that big a resource, and we won't have millions of iothread,
> let's simply create the gcontext unconditionally.
>
> This will also be a preparation work further to move the thread
> context push operation earlier than before (now it's only pushed right
> before we want to start running the gmainloop).
>
> Removing the g_once since it's not necessary, while introducing a new
> run_gcontext boolean to show whether we want to run the gcontext.
>
> Signed-off-by: Peter Xu 
> ---
>  include/sysemu/iothread.h |  2 +-
>  iothread.c| 43 +++
>  2 files changed, 22 insertions(+), 23 deletions(-)
>
> diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
> index 50411ba54a..5f6240d5cb 100644
> --- a/include/sysemu/iothread.h
> +++ b/include/sysemu/iothread.h
> @@ -24,9 +24,9 @@ typedef struct {
>
>  QemuThread thread;
>  AioContext *ctx;
> +bool run_gcontext;  /* whether we should run gcontext */
>  GMainContext *worker_context;
>  GMainLoop *main_loop;
> -GOnce once;
>  QemuSemaphore init_done_sem; /* is thread init done? */
>  bool stopping;  /* has iothread_stop() been called? */
>  bool running;   /* should iothread_run() continue? */
> diff --git a/iothread.c b/iothread.c
> index 6e297e9ef1..6fa87876e0 100644
> --- a/iothread.c
> +++ b/iothread.c
> @@ -65,7 +65,7 @@ static void *iothread_run(void *opaque)
>   * We must check the running state again in case it was
>   * changed in previous aio_poll()
>   */
> -if (iothread->running && atomic_read(>worker_context)) {
> +if (iothread->running && atomic_read(>run_gcontext)) {
>  GMainLoop *loop;
>
>  g_main_context_push_thread_default(iothread->worker_context);
> @@ -114,6 +114,8 @@ static void iothread_instance_init(Object *obj)
>  iothread->poll_max_ns = IOTHREAD_POLL_MAX_NS_DEFAULT;
>  iothread->thread_id = -1;
>  qemu_sem_init(>init_done_sem, 0);
> +/* By default, we don't run gcontext */
> +atomic_set(>run_gcontext, 0);

I think that initialization isn't really necessary, your call.

>  }
>
>  static void iothread_instance_finalize(Object *obj)
> @@ -143,6 +145,16 @@ static void iothread_instance_finalize(Object *obj)
>  qemu_sem_destroy(>init_done_sem);
>  }
>
> +static void iothread_init_gcontext(IOThread *iothread)
> +{
> +GSource *source;
> +
> +iothread->worker_context = g_main_context_new();
> +source = aio_get_g_source(iothread_get_aio_context(iothread));
> +g_source_attach(source, iothread->worker_context);
> +g_source_unref(source);
> +}
> +
>  static void iothread_complete(UserCreatable *obj, Error **errp)
>  {
>  Error *local_error = NULL;
> @@ -157,6 +169,12 @@ static void iothread_complete(UserCreatable *obj, Error 
> **errp)
>  return;
>  }
>
> +/*
> + * Init one GMainContext for the iothread unconditionally, even if
> + * it's not used
> + */
> +iothread_init_gcontext(iothread);
> +
>  aio_context_set_poll_params(iothread->ctx,
>  iothread->poll_max_ns,
>  iothread->poll_grow,
> @@ -169,8 +187,6 @@ static void iothread_complete(UserCreatable *obj, Error 
> **errp)
>  return;
>  }
>
> -iothread->once = (GOnce) G_ONCE_INIT;
> -
>  /* This assumes we are called from a thread with useful CPU affinity for 
> us
>   * to inherit.
>   */
> @@ -333,27 +349,10 @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
>  return head;
>  }
>
> -static gpointer iothread_g_main_context_init(gpointer opaque)
> -{
> -AioContext *ctx;
> -IOThread *iothread = opaque;
> -GSource *source;
> -
> -iothread->worker_context = g_main_context_new();
> -
> -ctx = iothread_get_aio_context(iothread);
> -source = aio_get_g_source(ctx);
> -g_source_attach(source, iothread->worker_context);
> -g_source_unref(source);
> -
> -aio_notify(iothread->ctx);
> -return NULL;
> -}
> -
>  GMainContext *iothread_get_g_main_context(IOThread *iothread)
>  {
> -g_once(>once, iothread_g_main_context_init, iothread);
> -
> +atomic_set(>run_gcontext, 1);
> +aio_notify(iothread->ctx);
>  return iothread->worker_context;
>  }
>
> --
> 2.17.1
>

looks good otherwise,
Reviewed-by: Marc-André Lureau

Re: [Qemu-devel] [PATCH 1/4] iothread: replace init_done_cond with a semaphore

2019-02-21 Thread Marc-André Lureau

Hi

On Fri, Feb 22, 2019 at 4:14 AM Peter Xu  wrote:
>
> Only sending an init-done message using lock+cond seems an overkill to
> me.  Replacing it with a simpler semaphore.
>
> Meanwhile, init the semaphore unconditionally, then we can destroy it
> unconditionally too in finalize which seems cleaner.
>
> Signed-off-by: Peter Xu 

The lock is also protecting thread_id.

> ---
>  include/sysemu/iothread.h |  3 +--
>  iothread.c| 17 -
>  2 files changed, 5 insertions(+), 15 deletions(-)
>
> diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
> index 8a7ac2c528..50411ba54a 100644
> --- a/include/sysemu/iothread.h
> +++ b/include/sysemu/iothread.h
> @@ -27,8 +27,7 @@ typedef struct {
>  GMainContext *worker_context;
>  GMainLoop *main_loop;
>  GOnce once;
> -QemuMutex init_done_lock;
> -QemuCond init_done_cond;/* is thread initialization done? */
> +QemuSemaphore init_done_sem; /* is thread init done? */
>  bool stopping;  /* has iothread_stop() been called? */
>  bool running;   /* should iothread_run() continue? */
>  int thread_id;
> diff --git a/iothread.c b/iothread.c
> index e615b7ae52..6e297e9ef1 100644
> --- a/iothread.c
> +++ b/iothread.c
> @@ -55,10 +55,8 @@ static void *iothread_run(void *opaque)
>  rcu_register_thread();
>
>  my_iothread = iothread;
> -qemu_mutex_lock(>init_done_lock);
>  iothread->thread_id = qemu_get_thread_id();
> -qemu_cond_signal(>init_done_cond);
> -qemu_mutex_unlock(>init_done_lock);
> +qemu_sem_post(>init_done_sem);
>
>  while (iothread->running) {
>  aio_poll(iothread->ctx, true);
> @@ -115,6 +113,7 @@ static void iothread_instance_init(Object *obj)
>
>  iothread->poll_max_ns = IOTHREAD_POLL_MAX_NS_DEFAULT;
>  iothread->thread_id = -1;
> +qemu_sem_init(>init_done_sem, 0);
>  }
>
>  static void iothread_instance_finalize(Object *obj)
> @@ -123,10 +122,6 @@ static void iothread_instance_finalize(Object *obj)
>
>  iothread_stop(iothread);
>
> -if (iothread->thread_id != -1) {
> -qemu_cond_destroy(>init_done_cond);
> -qemu_mutex_destroy(>init_done_lock);
> -}
>  /*
>   * Before glib2 2.33.10, there is a glib2 bug that GSource context
>   * pointer may not be cleared even if the context has already been
> @@ -145,6 +140,7 @@ static void iothread_instance_finalize(Object *obj)
>  g_main_context_unref(iothread->worker_context);
>  iothread->worker_context = NULL;
>  }
> +qemu_sem_destroy(>init_done_sem);
>  }
>
>  static void iothread_complete(UserCreatable *obj, Error **errp)
> @@ -173,8 +169,6 @@ static void iothread_complete(UserCreatable *obj, Error 
> **errp)
>  return;
>  }
>
> -qemu_mutex_init(>init_done_lock);
> -qemu_cond_init(>init_done_cond);
>  iothread->once = (GOnce) G_ONCE_INIT;
>
>  /* This assumes we are called from a thread with useful CPU affinity for 
> us
> @@ -188,12 +182,9 @@ static void iothread_complete(UserCreatable *obj, Error 
> **errp)
>  g_free(name);
>
>  /* Wait for initialization to complete */
> -qemu_mutex_lock(>init_done_lock);
>  while (iothread->thread_id == -1) {
> -qemu_cond_wait(>init_done_cond,
> -   >init_done_lock);
> +qemu_sem_wait(>init_done_sem);
>  }
> -qemu_mutex_unlock(>init_done_lock);
>  }
>
>  typedef struct {
> --
> 2.17.1
>

[Qemu-devel] [PATCH 1/2] target/hppa: Do not return freed temporary

2019-02-21 Thread Richard Henderson

For priv levels 1 & 2, we were doing so from do_ibranch_priv.

Signed-off-by: Richard Henderson 
---
 target/hppa/translate.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index b4fd307b77..dad8ce563c 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -2007,16 +2007,15 @@ static TCGv_reg do_ibranch_priv(DisasContext *ctx, 
TCGv_reg offset)
 /* Privilege 0 is maximum and is allowed to decrease.  */
 return offset;
 case 3:
-/* Privilege 3 is minimum and is never allowed increase.  */
+/* Privilege 3 is minimum and is never allowed to increase.  */
 dest = get_temp(ctx);
 tcg_gen_ori_reg(dest, offset, 3);
 break;
 default:
-dest = tcg_temp_new();
+dest = get_temp(ctx);
 tcg_gen_andi_reg(dest, offset, -4);
 tcg_gen_ori_reg(dest, dest, ctx->privilege);
 tcg_gen_movcond_reg(TCG_COND_GTU, dest, dest, offset, dest, offset);
-tcg_temp_free(dest);
 break;
 }
 return dest;
-- 
2.17.2

Re: [Qemu-devel] [PATCH v6 1/7] vhost-user: Support transferring inflight buffer between qemu and backend

2019-02-21 Thread Michael S. Tsirkin

On Fri, Feb 22, 2019 at 10:47:03AM +0800, Yongji Xie wrote:
> > > +
> > > +To track inflight I/O, the queue region should be processed as follows:
> > > +
> > > +When receiving available buffers from the driver:
> > > +
> > > +1. Get the next available head-descriptor index from available ring, 
> > > i
> > > +
> > > +2. Set desc[i].inflight to 1
> > > +
> > > +When supplying used buffers to the driver:
> > > +
> > > +1. Get corresponding used head-descriptor index, i
> > > +
> > > +2. Set desc[i].next to process_head
> > > +
> > > +3. Set process_head to i
> > > +
> > > +4. Steps 1,2,3 may be performed repeatedly if batching is possible
> > > +
> > > +5. Increase the idx value of used ring by the size of the batch
> > > +
> > > +6. Set the inflight field of each DescStateSplit entry in the batch 
> > > to 0
> > > +
> > > +7. Set used_idx to the idx value of used ring
> > > +
> > > +When reconnecting:
> > > +
> > > +1. If the value of used_idx does not match the idx value of used 
> > > ring,
> > > +
> > > +(a) Subtract the value of used_idx from the idx value of used 
> > > ring to get
> > > +the number of in-progress DescStateSplit entries
> > > +
> > > +(b) Set the inflight field of the in-progress DescStateSplit 
> > > entries which
> > > +start from process_head to 0
> > > +
> > > +(c) Set used_idx to the idx value of used ring
> > > +
> > > +2. Resubmit each inflight DescStateSplit entry
> >
> > I re-read a couple of time and I still don't understand what it says.
> >
> > For simplicity consider split ring. So we want a list of heads that are
> > outstanding. Fair enough. Now device finishes a head. What now? I needs
> > to drop head from the list. But list is unidirectional (just next, no
> > prev). So how can you drop an entry from the middle?
> >
> 
> The process_head is only used when slave crash between increasing the
> idx value of used ring and updating used_idx. We use it to find the
> in-progress DescStateSplit entries before the crash and complete them
> when reconnecting. Make sure guest and slave have the same view for
> inflight I/Os.
> 

But I don't understand how does the described process help do it?


> In other case, the inflight field is enough to track inflight I/O.
> When reconnecting, we go through all DescStateSplit entries and
> re-submit the entry whose inflight field is equal to 1.

What I don't understand is how do we know the order
in which they have to be resubmitted. Reordering
operations would be a big problem, won't it?


Let's say I fetch descriptors A, B, C and start
processing. how does memory look?
Now I finished B and marked it used. How does
memory look?

I also wonder how do you address a crash between
marking descriptor used and clearing inflight.
Will you redo the descriptor? Is it always safe?
What if it's a write?

Re: [Qemu-devel] [PATCH] hw/ppc: Use object_initialize_child for correct reference counting

2019-02-21 Thread Thomas Huth

On 21/02/2019 19.14, Philippe Mathieu-Daudé wrote:
> On 2/21/19 12:24 PM, Thomas Huth wrote:
>> Both functions, object_initialize() and object_property_add_child() increase
>> the reference counter of the new object, so one of the references has to be
>> dropped afterwards to get the reference counting right. Otherwise the child
>> object will not be properly cleaned up when the parent gets destroyed.
>> Thus let's use now object_initialize_child() instead to get the reference
>> counting here right.
>>
>> Suggested-by: Eduardo Habkost 
>> Signed-off-by: Thomas Huth 
>> ---
>>  hw/ppc/pnv.c | 12 ++--
>>  hw/ppc/pnv_psi.c |  4 ++--
>>  hw/ppc/spapr.c   |  6 +++---
>>  3 files changed, 11 insertions(+), 11 deletions(-)
>>
>> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
>> index da54086..9e03e9c 100644
>> --- a/hw/ppc/pnv.c
>> +++ b/hw/ppc/pnv.c
>> @@ -736,18 +736,18 @@ static void pnv_chip_power8_instance_init(Object *obj)
>>  {
>>  Pnv8Chip *chip8 = PNV8_CHIP(obj);
>>  
>> -object_initialize(>psi, sizeof(chip8->psi), TYPE_PNV_PSI);
>> -object_property_add_child(obj, "psi", OBJECT(>psi), NULL);
>> +object_initialize_child(obj, "psi",  >psi, sizeof(chip8->psi),
>> +TYPE_PNV_PSI, _abort, NULL);
>>  object_property_add_const_link(OBJECT(>psi), "xics",
>> OBJECT(qdev_get_machine()), 
>> _abort);
>>  
>> -object_initialize(>lpc, sizeof(chip8->lpc), TYPE_PNV_LPC);
>> -object_property_add_child(obj, "lpc", OBJECT(>lpc), NULL);
>> +object_initialize_child(obj, "lpc",  >lpc, sizeof(chip8->lpc),
>> +TYPE_PNV_LPC, _abort, NULL);
>>  object_property_add_const_link(OBJECT(>lpc), "psi",
>> OBJECT(>psi), _abort);
>>  
>> -object_initialize(>occ, sizeof(chip8->occ), TYPE_PNV_OCC);
>> -object_property_add_child(obj, "occ", OBJECT(>occ), NULL);
>> +object_initialize_child(obj, "occ",  >occ, sizeof(chip8->occ),
>> +TYPE_PNV_OCC, _abort, NULL);
>>  object_property_add_const_link(OBJECT(>occ), "psi",
>> OBJECT(>psi), _abort);
>>  }
>> diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
>> index 8ced095..44bc0cb 100644
>> --- a/hw/ppc/pnv_psi.c
>> +++ b/hw/ppc/pnv_psi.c
>> @@ -444,8 +444,8 @@ static void pnv_psi_init(Object *obj)
>>  {
>>  PnvPsi *psi = PNV_PSI(obj);
>>  
>> -object_initialize(>ics, sizeof(psi->ics), TYPE_ICS_SIMPLE);
>> -object_property_add_child(obj, "ics-psi", OBJECT(>ics), NULL);
>> +object_initialize_child(obj, "ics-psi",  >ics, sizeof(psi->ics),
>> +TYPE_ICS_SIMPLE, _abort, NULL);
>>  }
>>  
>>  static const uint8_t irq_to_xivr[] = {
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index abf9ebc..6c58dca 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -1696,9 +1696,9 @@ static void spapr_create_nvram(sPAPRMachineState 
>> *spapr)
>>  
>>  static void spapr_rtc_create(sPAPRMachineState *spapr)
>>  {
>> -object_initialize(>rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC);
>> -object_property_add_child(OBJECT(spapr), "rtc", OBJECT(>rtc),
>> -  _fatal);
>> +object_initialize_child(OBJECT(spapr), "rtc",
>> +>rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC,
>> +_fatal, NULL);
>>  object_property_set_bool(OBJECT(>rtc), true, "realized",
>>_fatal);
>>  object_property_add_alias(OBJECT(spapr), "rtc-time", 
>> OBJECT(>rtc),
>>
> 
> What about intc/spapr_xive.c?

Good hint. I missed that one since it's in hw/intc, not in hw/ppc...

David, could you please squash this on top:

diff --git a/hw/intc/spapr_xive.c b/hw/intc/spapr_xive.c
index 290a290..ac5f5ef 100644
--- a/hw/intc/spapr_xive.c
+++ b/hw/intc/spapr_xive.c
@@ -244,13 +244,12 @@ static void spapr_xive_instance_init(Object *obj)
 {
 sPAPRXive *xive = SPAPR_XIVE(obj);
 
-object_initialize(>source, sizeof(xive->source), TYPE_XIVE_SOURCE);
-object_property_add_child(obj, "source", OBJECT(>source), NULL);
+object_initialize_child(obj, "source", >source, sizeof(xive->source),
+TYPE_XIVE_SOURCE, _abort, NULL);
 
-object_initialize(>end_source, sizeof(xive->end_source),
-  TYPE_XIVE_END_SOURCE);
-object_property_add_child(obj, "end_source", OBJECT(>end_source),
-  NULL);
+object_initialize_child(obj, "end_source", >end_source,
+sizeof(xive->end_source), TYPE_XIVE_END_SOURCE,
+_abort, NULL);
 }
 
 static void spapr_xive_realize(DeviceState *dev, Error **errp)


... or shall I rather send a v2 or a separate patch for this?

 Thomas

[Qemu-devel] [PATCH 0/2] target/hppa: Minor fix and improvement

2019-02-21 Thread Richard Henderson

The first patch is via inspection.  Linux only uses priv
levels 0 and 3, and I suspect that HP-UX is the same.

The second patch comes from examining traces.


r~


Richard Henderson (2):
  target/hppa: Do not return freed temporary
  target/hppa: Optimize blr r0,rn

 target/hppa/translate.c | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

-- 
2.17.2

[Qemu-devel] [PATCH] fixup! display: add -display spice-app launching a Spice client

2019-02-21 Thread Marc-André Lureau

---
 ui/spice-app.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ui/spice-app.c b/ui/spice-app.c
index 4f5229f3ee..925b27b708 100644
--- a/ui/spice-app.c
+++ b/ui/spice-app.c
@@ -181,6 +181,8 @@ static void spice_app_display_init(DisplayState *ds, 
DisplayOptions *opts)
 g_app_info_launch_default_for_uri(uri, NULL, );
 if (err) {
 error_report("Failed to launch %s URI: %s", uri, err->message);
+error_report("You need a capable Spice client, "
+ "such as virt-viewer 8.0");
 exit(1);
 }
 g_free(uri);
-- 
2.21.0.rc1

[Qemu-devel] [PATCH 2/2] target/hppa: Optimize blr r0,rn

2019-02-21 Thread Richard Henderson

We can eliminate an extra TB in this case, which merely
loads a "return address" into rn.

Signed-off-by: Richard Henderson 
---
 target/hppa/translate.c | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/target/hppa/translate.c b/target/hppa/translate.c
index dad8ce563c..dc5636fe94 100644
--- a/target/hppa/translate.c
+++ b/target/hppa/translate.c
@@ -3488,12 +3488,16 @@ static bool trans_b_gate(DisasContext *ctx, arg_b_gate 
*a)
 
 static bool trans_blr(DisasContext *ctx, arg_blr *a)
 {
-TCGv_reg tmp = get_temp(ctx);
-
-tcg_gen_shli_reg(tmp, load_gpr(ctx, a->x), 3);
-tcg_gen_addi_reg(tmp, tmp, ctx->iaoq_f + 8);
-/* The computation here never changes privilege level.  */
-return do_ibranch(ctx, tmp, a->l, a->n);
+if (a->x) {
+TCGv_reg tmp = get_temp(ctx);
+tcg_gen_shli_reg(tmp, load_gpr(ctx, a->x), 3);
+tcg_gen_addi_reg(tmp, tmp, ctx->iaoq_f + 8);
+/* The computation here never changes privilege level.  */
+return do_ibranch(ctx, tmp, a->l, a->n);
+} else {
+/* BLR R0,RX is a good way to load PC+8 into RX.  */
+return do_dbranch(ctx, ctx->iaoq_f + 8, a->l, a->n);
+}
 }
 
 static bool trans_bv(DisasContext *ctx, arg_bv *a)
-- 
2.17.2

[Qemu-devel] [PATCH] tcg/ppc: Add vector opcodes

2019-02-21 Thread Richard Henderson

This requires VSX, not just Altivec, so Power7 or later.

Signed-off-by: Richard Henderson 
---

At present there are no tunables that can avoid the 64-bit element
load/store requirement.  As with requiring AVX1 for x86 hosts, I'm
not sure it's worth inventing such a tunable for pre-power7 hosts.

Tested vs aarch64 risu test cases.  It's probably worth testing
this vs Mark's target/ppc conversion.


r~

---
 tcg/ppc/tcg-target.h |  31 +-
 tcg/ppc/tcg-target.opc.h |   3 +
 tcg/ppc/tcg-target.inc.c | 604 +++
 3 files changed, 577 insertions(+), 61 deletions(-)
 create mode 100644 tcg/ppc/tcg-target.opc.h

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 52c1bb04b1..0b9943e7e6 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -31,7 +31,7 @@
 # define TCG_TARGET_REG_BITS  32
 #endif
 
-#define TCG_TARGET_NB_REGS 32
+#define TCG_TARGET_NB_REGS 64
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 
@@ -45,12 +45,22 @@ typedef enum {
 TCG_REG_R24, TCG_REG_R25, TCG_REG_R26, TCG_REG_R27,
 TCG_REG_R28, TCG_REG_R29, TCG_REG_R30, TCG_REG_R31,
 
+TCG_REG_V0,  TCG_REG_V1,  TCG_REG_V2,  TCG_REG_V3,
+TCG_REG_V4,  TCG_REG_V5,  TCG_REG_V6,  TCG_REG_V7,
+TCG_REG_V8,  TCG_REG_V9,  TCG_REG_V10, TCG_REG_V11,
+TCG_REG_V12, TCG_REG_V13, TCG_REG_V14, TCG_REG_V15,
+TCG_REG_V16, TCG_REG_V17, TCG_REG_V18, TCG_REG_V19,
+TCG_REG_V20, TCG_REG_V21, TCG_REG_V22, TCG_REG_V23,
+TCG_REG_V24, TCG_REG_V25, TCG_REG_V26, TCG_REG_V27,
+TCG_REG_V28, TCG_REG_V29, TCG_REG_V30, TCG_REG_V31,
+
 TCG_REG_CALL_STACK = TCG_REG_R1,
 TCG_AREG0 = TCG_REG_R27
 } TCGReg;
 
 extern bool have_isa_2_06;
 extern bool have_isa_3_00;
+extern bool have_isa_2_07_vsx;
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_ext8u_i320 /* andi */
@@ -124,6 +134,25 @@ extern bool have_isa_3_00;
 #define TCG_TARGET_HAS_mulsh_i641
 #endif
 
+/* VSX required over ALTIVEC to perform 64-bit loads.  */
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_HAS_v64  have_isa_2_07_vsx
+#define TCG_TARGET_HAS_v128 have_isa_2_07_vsx
+#define TCG_TARGET_HAS_v256 0
+#endif
+
+#define TCG_TARGET_HAS_andc_vec 1
+#define TCG_TARGET_HAS_orc_vec  1
+#define TCG_TARGET_HAS_not_vec  1
+#define TCG_TARGET_HAS_neg_vec  0
+#define TCG_TARGET_HAS_shi_vec  0
+#define TCG_TARGET_HAS_shs_vec  0
+#define TCG_TARGET_HAS_shv_vec  0
+#define TCG_TARGET_HAS_cmp_vec  1
+#define TCG_TARGET_HAS_mul_vec  1
+#define TCG_TARGET_HAS_sat_vec  1
+#define TCG_TARGET_HAS_minmax_vec   1
+
 void flush_icache_range(uintptr_t start, uintptr_t stop);
 void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
 
diff --git a/tcg/ppc/tcg-target.opc.h b/tcg/ppc/tcg-target.opc.h
new file mode 100644
index 00..4816a6c3d4
--- /dev/null
+++ b/tcg/ppc/tcg-target.opc.h
@@ -0,0 +1,3 @@
+/* Target-specific opcodes for host vector expansion.  These will be
+   emitted by tcg_expand_vec_op.  For those familiar with GCC internals,
+   consider these to be UNSPEC with names.  */
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 773690f1d9..d0782e2eb2 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -42,6 +42,9 @@
 # define TCG_REG_TMP1   TCG_REG_R12
 #endif
 
+#define TCG_VEC_TMP1TCG_REG_V0
+#define TCG_VEC_TMP2TCG_REG_V1
+
 #define TCG_REG_TB TCG_REG_R31
 #define USE_REG_TB (TCG_TARGET_REG_BITS == 64)
 
@@ -63,6 +66,7 @@ static tcg_insn_unit *tb_ret_addr;
 
 bool have_isa_2_06;
 bool have_isa_3_00;
+bool have_isa_2_07_vsx;
 
 #define HAVE_ISA_2_06  have_isa_2_06
 #define HAVE_ISEL  have_isa_2_06
@@ -72,39 +76,15 @@ bool have_isa_3_00;
 #endif
 
 #ifdef CONFIG_DEBUG_TCG
-static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = {
-"r0",
-"r1",
-"r2",
-"r3",
-"r4",
-"r5",
-"r6",
-"r7",
-"r8",
-"r9",
-"r10",
-"r11",
-"r12",
-"r13",
-"r14",
-"r15",
-"r16",
-"r17",
-"r18",
-"r19",
-"r20",
-"r21",
-"r22",
-"r23",
-"r24",
-"r25",
-"r26",
-"r27",
-"r28",
-"r29",
-"r30",
-"r31"
+static const char tcg_target_reg_names[TCG_TARGET_NB_REGS][4] = {
+"r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",
+"r8",  "r9",  "r10", "r11", "r12", "r13", "r14", "r15",
+"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",
+"v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",
+"v8",  "v9",  "v10", "v11", "v12", "v13", "v14", "v15",
+"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
+"v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
 };
 #endif
 
@@ -139,6 +119,26 @@ static const int tcg_target_reg_alloc_order[] = {
 TCG_REG_R5,

Re: [Qemu-devel] [PATCH] hw/arm: Use object_initialize_child for correct reference counting

2019-02-21 Thread Markus Armbruster

Philippe Mathieu-Daudé  writes:

> As Thomas Huth explained:
> "Both functions, object_initialize() and object_property_add_child()
> increase the reference counter of the new object, so one of the
> references has to be dropped afterwards to get the reference counting
> right. Otherwise the child object will not be properly cleaned up
> when the parent gets destroyed.
> Thus let's use now object_initialize_child() instead to get the
> reference counting here right."
>
> This patch was generated using the following Coccinelle script:
>
>  @use_object_initialize_child@
>  identifier parent_obj;
>  expression child;
>  expression propname;
>  expression child_type;
>  expression errp;
>  @@
>  (
>  -object_initialize(, sizeof(child), child_type);
>  -object_property_add_child(parent_obj, propname, OBJECT(), NULL);
>  +object_initialize_child(parent_obj, propname,  , sizeof(child),
>  +child_type, _abort, NULL);
>  |
>  -object_initialize(, sizeof(child), child_type);
>  -object_property_add_child(parent_obj, propname, OBJECT(), errp);
>  +object_initialize_child(parent_obj, propname,  , sizeof(child),
>  +child_type, errp, NULL);
>  )
>
> and a bit of manual fix-up for overly long lines.
>
> Suggested-by: Eduardo Habkost 
> Inspired-by: Thomas Huth 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  hw/arm/aspeed_soc.c  | 43 ++--
>  hw/arm/bcm2835_peripherals.c | 41 +-
>  hw/arm/digic.c   |  4 ++--
>  3 files changed, 45 insertions(+), 43 deletions(-)
>
> diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
> index a27233d487..81665f2948 100644
> --- a/hw/arm/aspeed_soc.c
> +++ b/hw/arm/aspeed_soc.c
> @@ -106,11 +106,11 @@ static void aspeed_soc_init(Object *obj)
>  AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
>  int i;
>  
> -object_initialize(>cpu, sizeof(s->cpu), sc->info->cpu_type);
> -object_property_add_child(obj, "cpu", OBJECT(>cpu), NULL);
> +object_initialize_child(obj, "cpu", >cpu, sizeof(s->cpu),
> +sc->info->cpu_type, _abort, NULL);

This flips from "ignore errors" to "abort on error".  Quite probably an
improvement, but should be mentioned and justified in the commit
message.

[...]

Re: [Qemu-devel] [PATCH v3 11/11] display: add -display spice-app launching a Spice client

2019-02-21 Thread Gerd Hoffmann

  Hi,

> Would something like that be helpful?
> 
> diff --git a/ui/spice-app.c b/ui/spice-app.c
> index 4f5229f3ee..69f92e440c 100644
> --- a/ui/spice-app.c
> +++ b/ui/spice-app.c
> @@ -181,6 +181,7 @@ static void spice_app_display_init(DisplayState
> *ds, DisplayOptions *opts)
>  g_app_info_launch_default_for_uri(uri, NULL, );
>  if (err) {
>  error_report("Failed to launch %s URI: %s", uri, err->message);
> +error_report("You need a capable Spice client, such as
> virt-viewer 8.0");

Yep, looks good to me.  Can you send that as incremental patch?

thanks,
  Gerd

[Qemu-devel] [PATCH v3 2/3] vfio/display: add xres + yres properties

2019-02-21 Thread Gerd Hoffmann

This allows configure the display resolution which the vgpu should use.
The information will be passed to the guest using EDID, so the mdev
driver must support the vfio edid region for this to work.

Signed-off-by: Gerd Hoffmann 
---
 hw/vfio/pci.h |  2 ++
 hw/vfio/display.c | 12 ++--
 hw/vfio/pci.c | 12 
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index b1ae4c07549a..c11c3f167070 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -149,6 +149,8 @@ typedef struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_IGD_OPREGION \
 (1 << VFIO_FEATURE_ENABLE_IGD_OPREGION_BIT)
 OnOffAuto display;
+uint32_t display_xres;
+uint32_t display_yres;
 int32_t bootindex;
 uint32_t igd_gms;
 OffAutoPCIBAR msix_relo;
diff --git a/hw/vfio/display.c b/hw/vfio/display.c
index f59fcc487128..8bf7c890be96 100644
--- a/hw/vfio/display.c
+++ b/hw/vfio/display.c
@@ -46,8 +46,8 @@ static void vfio_display_edid_update(VFIOPCIDevice *vdev, 
bool enabled,
 qemu_edid_info edid = {
 .maxx  = dpy->edid_regs->max_xres,
 .maxy  = dpy->edid_regs->max_yres,
-.prefx = prefx,
-.prefy = prefy,
+.prefx = prefx ?: vdev->display_xres,
+.prefy = prefy ?: vdev->display_yres,
 };
 
 dpy->edid_regs->link_state = VFIO_DEVICE_GFX_LINK_STATE_DOWN;
@@ -128,6 +128,14 @@ static void vfio_display_edid_init(VFIOPCIDevice *vdev)
 pread_field(fd, dpy->edid_info, dpy->edid_regs, max_yres);
 dpy->edid_blob = g_malloc0(dpy->edid_regs->edid_max_size);
 
+/* if xres + yres properties are unset use the maximum resolution */
+if (!vdev->display_xres) {
+vdev->display_xres = dpy->edid_regs->max_xres;
+}
+if (!vdev->display_yres) {
+vdev->display_yres = dpy->edid_regs->max_yres;
+}
+
 vfio_display_edid_update(vdev, true, 0, 0);
 return;
 
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index dd12f363915d..504019c4582b 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3068,6 +3068,16 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 error_setg(errp, "ramfb=on requires display=on");
 goto out_teardown;
 }
+if (vdev->display_xres || vdev->display_yres) {
+if (vdev->dpy == NULL) {
+error_setg(errp, "xres and yres properties require display=on");
+goto out_teardown;
+}
+if (vdev->dpy->edid_regs == NULL) {
+error_setg(errp, "xres and yres properties need edid support");
+goto out_teardown;
+}
+}
 
 vfio_register_err_notifier(vdev);
 vfio_register_req_notifier(vdev);
@@ -3182,6 +3192,8 @@ static Property vfio_pci_dev_properties[] = {
 DEFINE_PROP_STRING("sysfsdev", VFIOPCIDevice, vbasedev.sysfsdev),
 DEFINE_PROP_ON_OFF_AUTO("display", VFIOPCIDevice,
 display, ON_OFF_AUTO_OFF),
+DEFINE_PROP_UINT32("xres", VFIOPCIDevice, display_xres, 0),
+DEFINE_PROP_UINT32("yres", VFIOPCIDevice, display_yres, 0),
 DEFINE_PROP_UINT32("x-intx-mmap-timeout-ms", VFIOPCIDevice,
intx.mmap_timeout, 1100),
 DEFINE_PROP_BIT("x-vga", VFIOPCIDevice, features,
-- 
2.9.3

[Qemu-devel] [PATCH v3 1/3] vfio/display: add edid support.

2019-02-21 Thread Gerd Hoffmann

This patch adds EDID support to the vfio display (aka vgpu) code.
When supported by the mdev driver qemu will generate a EDID blob
and pass it on using the new vfio edid region.  The EDID blob will
be updated on UI changes (i.e. window resize), so the guest can
adapt.

Signed-off-by: Gerd Hoffmann 
---
 include/hw/vfio/vfio-common.h |   3 +
 hw/vfio/display.c | 127 ++
 hw/vfio/trace-events  |   7 +++
 3 files changed, 137 insertions(+)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 7624c9f511c4..5f7f709b95f1 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -148,6 +148,9 @@ typedef struct VFIODMABuf {
 typedef struct VFIODisplay {
 QemuConsole *con;
 RAMFBState *ramfb;
+struct vfio_region_info *edid_info;
+struct vfio_region_gfx_edid *edid_regs;
+uint8_t *edid_blob;
 struct {
 VFIORegion buffer;
 DisplaySurface *surface;
diff --git a/hw/vfio/display.c b/hw/vfio/display.c
index dead30e626cb..f59fcc487128 100644
--- a/hw/vfio/display.c
+++ b/hw/vfio/display.c
@@ -15,15 +15,139 @@
 #include 
 
 #include "sysemu/sysemu.h"
+#include "hw/display/edid.h"
 #include "ui/console.h"
 #include "qapi/error.h"
 #include "pci.h"
+#include "trace.h"
 
 #ifndef DRM_PLANE_TYPE_PRIMARY
 # define DRM_PLANE_TYPE_PRIMARY 1
 # define DRM_PLANE_TYPE_CURSOR  2
 #endif
 
+#define pread_field(_fd, _reg, _ptr, _fld)  \
+if (sizeof(_ptr->_fld) !=   \
+pread(_fd, &(_ptr->_fld), sizeof(_ptr->_fld),   \
+  _reg->offset + offsetof(typeof(*_ptr), _fld)))\
+goto err;
+#define pwrite_field(_fd, _reg, _ptr, _fld) \
+if (sizeof(_ptr->_fld) !=   \
+pwrite(_fd, &(_ptr->_fld), sizeof(_ptr->_fld),  \
+   _reg->offset + offsetof(typeof(*_ptr), _fld)))   \
+goto err;
+
+
+static void vfio_display_edid_update(VFIOPCIDevice *vdev, bool enabled,
+ int prefx, int prefy)
+{
+VFIODisplay *dpy = vdev->dpy;
+int fd = vdev->vbasedev.fd;
+qemu_edid_info edid = {
+.maxx  = dpy->edid_regs->max_xres,
+.maxy  = dpy->edid_regs->max_yres,
+.prefx = prefx,
+.prefy = prefy,
+};
+
+dpy->edid_regs->link_state = VFIO_DEVICE_GFX_LINK_STATE_DOWN;
+pwrite_field(fd, dpy->edid_info, dpy->edid_regs, link_state);
+trace_vfio_display_edid_link_down();
+
+if (!enabled) {
+return;
+}
+
+if (edid.maxx && edid.prefx > edid.maxx) {
+edid.prefx = edid.maxx;
+}
+if (edid.maxy && edid.prefy > edid.maxy) {
+edid.prefy = edid.maxy;
+}
+qemu_edid_generate(dpy->edid_blob,
+   dpy->edid_regs->edid_max_size,
+   );
+trace_vfio_display_edid_update(edid.prefx, edid.prefy);
+
+dpy->edid_regs->edid_size = qemu_edid_size(dpy->edid_blob);
+pwrite_field(fd, dpy->edid_info, dpy->edid_regs, edid_size);
+if (pwrite(fd, dpy->edid_blob, dpy->edid_regs->edid_size,
+   dpy->edid_info->offset + dpy->edid_regs->edid_offset)
+!= dpy->edid_regs->edid_size) {
+goto err;
+}
+
+dpy->edid_regs->link_state = VFIO_DEVICE_GFX_LINK_STATE_UP;
+pwrite_field(fd, dpy->edid_info, dpy->edid_regs, link_state);
+trace_vfio_display_edid_link_up();
+return;
+
+err:
+trace_vfio_display_edid_write_error();
+return;
+}
+
+static int vfio_display_edid_ui_info(void *opaque, uint32_t idx,
+ QemuUIInfo *info)
+{
+VFIOPCIDevice *vdev = opaque;
+VFIODisplay *dpy = vdev->dpy;
+
+if (!dpy->edid_regs) {
+return 0;
+}
+
+if (info->width && info->height) {
+vfio_display_edid_update(vdev, true, info->width, info->height);
+} else {
+vfio_display_edid_update(vdev, false, 0, 0);
+}
+
+return 0;
+}
+
+static void vfio_display_edid_init(VFIOPCIDevice *vdev)
+{
+VFIODisplay *dpy = vdev->dpy;
+int fd = vdev->vbasedev.fd;
+int ret;
+
+ret = vfio_get_dev_region_info(>vbasedev,
+   VFIO_REGION_TYPE_GFX,
+   VFIO_REGION_SUBTYPE_GFX_EDID,
+   >edid_info);
+if (ret) {
+return;
+}
+
+trace_vfio_display_edid_available();
+dpy->edid_regs = g_new0(struct vfio_region_gfx_edid, 1);
+pread_field(fd, dpy->edid_info, dpy->edid_regs, edid_offset);
+pread_field(fd, dpy->edid_info, dpy->edid_regs, edid_max_size);
+pread_field(fd, dpy->edid_info, dpy->edid_regs, max_xres);
+pread_field(fd, dpy->edid_info, dpy->edid_regs, max_yres);
+dpy->edid_blob = g_malloc0(dpy->edid_regs->edid_max_size);
+
+vfio_display_edid_update(vdev, true, 0, 0);
+return;
+

[Qemu-devel] [PATCH v3 3/3] vfio/display: delay link up event

2019-02-21 Thread Gerd Hoffmann

Kick the display link up event with a 0.1 sec delay,
so the guest has a chance to notice the link down first.

Signed-off-by: Gerd Hoffmann 
---
 include/hw/vfio/vfio-common.h |  1 +
 hw/vfio/display.c | 26 +++---
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 5f7f709b95f1..b65a2f051886 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -151,6 +151,7 @@ typedef struct VFIODisplay {
 struct vfio_region_info *edid_info;
 struct vfio_region_gfx_edid *edid_regs;
 uint8_t *edid_blob;
+QEMUTimer *edid_link_timer;
 struct {
 VFIORegion buffer;
 DisplaySurface *surface;
diff --git a/hw/vfio/display.c b/hw/vfio/display.c
index 8bf7c890be96..971e801dc05c 100644
--- a/hw/vfio/display.c
+++ b/hw/vfio/display.c
@@ -38,6 +38,21 @@
 goto err;
 
 
+static void vfio_display_edid_link_up(void *opaque)
+{
+VFIOPCIDevice *vdev = opaque;
+VFIODisplay *dpy = vdev->dpy;
+int fd = vdev->vbasedev.fd;
+
+dpy->edid_regs->link_state = VFIO_DEVICE_GFX_LINK_STATE_UP;
+pwrite_field(fd, dpy->edid_info, dpy->edid_regs, link_state);
+trace_vfio_display_edid_link_up();
+return;
+
+err:
+trace_vfio_display_edid_write_error();
+}
+
 static void vfio_display_edid_update(VFIOPCIDevice *vdev, bool enabled,
  int prefx, int prefy)
 {
@@ -50,6 +65,7 @@ static void vfio_display_edid_update(VFIOPCIDevice *vdev, 
bool enabled,
 .prefy = prefy ?: vdev->display_yres,
 };
 
+timer_del(dpy->edid_link_timer);
 dpy->edid_regs->link_state = VFIO_DEVICE_GFX_LINK_STATE_DOWN;
 pwrite_field(fd, dpy->edid_info, dpy->edid_regs, link_state);
 trace_vfio_display_edid_link_down();
@@ -77,9 +93,8 @@ static void vfio_display_edid_update(VFIOPCIDevice *vdev, 
bool enabled,
 goto err;
 }
 
-dpy->edid_regs->link_state = VFIO_DEVICE_GFX_LINK_STATE_UP;
-pwrite_field(fd, dpy->edid_info, dpy->edid_regs, link_state);
-trace_vfio_display_edid_link_up();
+timer_mod(dpy->edid_link_timer,
+  qemu_clock_get_ms(QEMU_CLOCK_REALTIME) + 100);
 return;
 
 err:
@@ -136,6 +151,9 @@ static void vfio_display_edid_init(VFIOPCIDevice *vdev)
 vdev->display_yres = dpy->edid_regs->max_yres;
 }
 
+dpy->edid_link_timer = timer_new_ms(QEMU_CLOCK_REALTIME,
+vfio_display_edid_link_up, vdev);
+
 vfio_display_edid_update(vdev, true, 0, 0);
 return;
 
@@ -154,6 +172,8 @@ static void vfio_display_edid_exit(VFIODisplay *dpy)
 
 g_free(dpy->edid_regs);
 g_free(dpy->edid_blob);
+timer_del(dpy->edid_link_timer);
+timer_free(dpy->edid_link_timer);
 }
 
 static void vfio_display_update_cursor(VFIODMABuf *dmabuf,
-- 
2.9.3

[Qemu-devel] [PATCH v3 0/3] vfio/display: add edid support.

2019-02-21 Thread Gerd Hoffmann

The 5.0 linux kernel header update finally landed in master.  So this
series has no unmerged dependencies any more.  Rebasing and re-sending
for merge.

This series adds EDID support to the qemu vfio display code.  Various
display-reladed information -- most importantly the display resolution
which should be used -- is passed to the guest that way.  The (initial)
display resolution can be set using the new xres and yres properties.
When supported by the UI it will also be updated on window resizes.

v3:
 - change xres+yres property error handling.
 - swap one leftover fprintf for a tracepoint.

Gerd Hoffmann (3):
  vfio/display: add edid support.
  vfio/display: add xres + yres properties
  vfio/display: delay link up event

 hw/vfio/pci.h |   2 +
 include/hw/vfio/vfio-common.h |   4 ++
 hw/vfio/display.c | 155 ++
 hw/vfio/pci.c |  12 
 hw/vfio/trace-events  |   7 ++
 5 files changed, 180 insertions(+)

-- 
2.9.3

Re: [Qemu-devel] [PATCH v2 1/3] vfio/display: add edid support.

2019-02-21 Thread Gerd Hoffmann

  Hi,

> This is the vdagent using installation like this:
> 
> https://www.ovirt.org/develop/infra/testing/spice.html
> 
> ie. vdservice install, net start vdservice?

If the page says so, probably.

I've tested the agent setup with linux only, where everything happens
automatically, you only have to make sure spice-vdagent.rpm is installed.

> I'm not seeing anything magically change when I do that.  I do have the
> default serial and redirection devices installed by virt-manager:

This one too?


  
  


cheers,
  Gerd

[Qemu-devel] [PULL 2/2] hw/vfio/common: Refactor container initialization

2019-02-21 Thread Alex Williamson

From: Eric Auger 

We introduce the vfio_init_container_type() helper.
It computes the highest usable iommu type and then
set the container and the iommu type.

Its usage in vfio_connect_container() makes the code
ready for addition of new iommu types.

Signed-off-by: Eric Auger 
Reviewed-by: Greg Kurz 
Signed-off-by: Alex Williamson 
---
 hw/vfio/common.c |  114 +-
 1 file changed, 70 insertions(+), 44 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9c3796e7db43..df2b4721bffb 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1054,6 +1054,60 @@ static void vfio_put_address_space(VFIOAddressSpace 
*space)
 }
 }
 
+/*
+ * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
+ */
+static int vfio_get_iommu_type(VFIOContainer *container,
+   Error **errp)
+{
+int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
+  VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
+int i;
+
+for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
+if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
+return iommu_types[i];
+}
+}
+error_setg(errp, "No available IOMMU models");
+return -EINVAL;
+}
+
+static int vfio_init_container(VFIOContainer *container, int group_fd,
+   Error **errp)
+{
+int iommu_type, ret;
+
+iommu_type = vfio_get_iommu_type(container, errp);
+if (iommu_type < 0) {
+return iommu_type;
+}
+
+ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, >fd);
+if (ret) {
+error_setg_errno(errp, errno, "Failed to set group container");
+return -errno;
+}
+
+while (ioctl(container->fd, VFIO_SET_IOMMU, iommu_type)) {
+if (iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+/*
+ * On sPAPR, despite the IOMMU subdriver always advertises v1 and
+ * v2, the running platform may not support v2 and there is no
+ * way to guess it until an IOMMU group gets added to the 
container.
+ * So in case it fails with v2, try v1 as a fallback.
+ */
+iommu_type = VFIO_SPAPR_TCE_IOMMU;
+continue;
+}
+error_setg_errno(errp, errno, "Failed to set iommu for container");
+return -errno;
+}
+
+container->iommu_type = iommu_type;
+return 0;
+}
+
 static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
   Error **errp)
 {
@@ -1119,25 +1173,17 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 container->fd = fd;
 QLIST_INIT(>giommu_list);
 QLIST_INIT(>hostwin_list);
-if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU) ||
-ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU)) {
-bool v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU);
-struct vfio_iommu_type1_info info;
 
-ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, );
-if (ret) {
-error_setg_errno(errp, errno, "failed to set group container");
-ret = -errno;
-goto free_container_exit;
-}
+ret = vfio_init_container(container, group->fd, errp);
+if (ret) {
+goto free_container_exit;
+}
 
-container->iommu_type = v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU;
-ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
-if (ret) {
-error_setg_errno(errp, errno, "failed to set iommu for container");
-ret = -errno;
-goto free_container_exit;
-}
+switch (container->iommu_type) {
+case VFIO_TYPE1v2_IOMMU:
+case VFIO_TYPE1_IOMMU:
+{
+struct vfio_iommu_type1_info info;
 
 /*
  * FIXME: This assumes that a Type1 IOMMU can map any 64-bit
@@ -1155,30 +1201,13 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as,
 }
 vfio_host_win_add(container, 0, (hwaddr)-1, info.iova_pgsizes);
 container->pgsizes = info.iova_pgsizes;
-} else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU) ||
-   ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU)) {
+break;
+}
+case VFIO_SPAPR_TCE_v2_IOMMU:
+case VFIO_SPAPR_TCE_IOMMU:
+{
 struct vfio_iommu_spapr_tce_info info;
-bool v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU);
-
-ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, );
-if (ret) {
-error_setg_errno(errp, errno, "failed to set group container");
-ret = -errno;
-goto free_container_exit;
-}
-container->iommu_type =
-v2 ? VFIO_SPAPR_TCE_v2_IOMMU : VFIO_SPAPR_TCE_IOMMU;
-ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
-if (ret) {
-container->iommu_type =

[Qemu-devel] [PULL 1/2] vfio/common: Work around kernel overflow bug in DMA unmap

2019-02-21 Thread Alex Williamson

A kernel bug was introduced in v4.15 via commit 71a7d3d78e3c which
adds a test for address space wrap-around in the vfio DMA unmap path.
Unfortunately due to overflow, the kernel detects an unmap of the last
page in the 64-bit address space as a wrap-around.  In QEMU, a Q35
guest with VT-d emulation and guest IOMMU enabled will attempt to make
such an unmap request during VM system reset, triggering an error:

  qemu-kvm: VFIO_UNMAP_DMA: -22
  qemu-kvm: vfio_dma_unmap(0x561f059948f0, 0xfef0, 0x0110) = 
-22 (Invalid argument)

Here the IOVA start address (0xfef0) and the size parameter
(0x0110) add to exactly 2^64, triggering the bug.  A
kernel fix is queued for the Linux v5.0 release to address this.

This patch implements a workaround to retry the unmap, excluding the
final page of the range when we detect an unmap failing which matches
the requirements for this issue.  This is expected to be a safe and
complete workaround as the VT-d address space does not extend to the
full 64-bit space and therefore the last page should never be mapped.

This workaround can be removed once all kernels with this bug are
sufficiently deprecated.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=1662291
Reported-by: Pei Zhang 
Debugged-by: Peter Xu 
Reviewed-by: Peter Xu 
Reviewed-by: Cornelia Huck 
Signed-off-by: Alex Williamson 
---
 hw/vfio/common.c |   20 +++-
 hw/vfio/trace-events |1 +
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 4262b80c4450..9c3796e7db43 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -220,7 +220,25 @@ static int vfio_dma_unmap(VFIOContainer *container,
 .size = size,
 };
 
-if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, )) {
+while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, )) {
+/*
+ * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
+ * v4.15) where an overflow in its wrap-around check prevents us from
+ * unmapping the last page of the address space.  Test for the error
+ * condition and re-try the unmap excluding the last page.  The
+ * expectation is that we've never mapped the last page anyway and this
+ * unmap request comes via vIOMMU support which also makes it unlikely
+ * that this page is used.  This bug was introduced well after type1 v2
+ * support was introduced, so we shouldn't need to test for v1.  A fix
+ * is queued for kernel v5.0 so this workaround can be removed once
+ * affected kernels are sufficiently deprecated.
+ */
+if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
+container->iommu_type == VFIO_TYPE1v2_IOMMU) {
+trace_vfio_dma_unmap_overflow_workaround();
+unmap.size -= 1ULL << ctz64(container->pgsizes);
+continue;
+}
 error_report("VFIO_UNMAP_DMA: %d", -errno);
 return -errno;
 }
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index f41ca96160bf..ed2f333ad726 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -110,6 +110,7 @@ vfio_region_mmaps_set_enabled(const char *name, bool 
enabled) "Region %s mmaps e
 vfio_region_sparse_mmap_header(const char *name, int index, int nr_areas) 
"Device %s region %d: %d sparse mmap entries"
 vfio_region_sparse_mmap_entry(int i, unsigned long start, unsigned long end) 
"sparse entry %d [0x%lx - 0x%lx]"
 vfio_get_dev_region(const char *name, int index, uint32_t type, uint32_t 
subtype) "%s index %d, %08x/%0x8"
+vfio_dma_unmap_overflow_workaround(void) ""
 
 # hw/vfio/platform.c
 vfio_platform_base_device_init(char *name, int groupid) "%s belongs to group 
#%d"

[Qemu-devel] [PULL 0/2] vfio updates 2019-02-21

2019-02-21 Thread Alex Williamson

The following changes since commit fc3dbb90f2eb069801bfb4cfe9cbc83cf9c5f4a9:

  Merge remote-tracking branch 'remotes/jnsnow/tags/bitmaps-pull-request' into 
staging (2019-02-21 13:09:33 +)

are available in the Git repository at:

  git://github.com/awilliam/qemu-vfio.git tags/vfio-updates-20190221.0

for you to fetch changes up to 2b6326c0bf2c686ae83d6904899cb80e9ad7a6fb:

  hw/vfio/common: Refactor container initialization (2019-02-21 21:07:03 -0700)


VFIO updates 2019-02-21

 - Workaround kernel overflow bug in vfio type1 DMA unmap
   (Alex Williamson)

 - Refactor vfio container initialization (Eric Auger)


Alex Williamson (1):
  vfio/common: Work around kernel overflow bug in DMA unmap

Eric Auger (1):
  hw/vfio/common: Refactor container initialization

 hw/vfio/common.c | 134 ++-
 hw/vfio/trace-events |   1 +
 2 files changed, 90 insertions(+), 45 deletions(-)

Re: [Qemu-devel] [PATCH] hw/arm: Use object_initialize_child for correct reference counting

2019-02-21 Thread Thomas Huth

On 21/02/2019 19.38, Philippe Mathieu-Daudé wrote:
> As Thomas Huth explained:
> "Both functions, object_initialize() and object_property_add_child()
> increase the reference counter of the new object, so one of the
> references has to be dropped afterwards to get the reference counting
> right. Otherwise the child object will not be properly cleaned up
> when the parent gets destroyed.
> Thus let's use now object_initialize_child() instead to get the
> reference counting here right."
> 
> This patch was generated using the following Coccinelle script:
> 
>  @use_object_initialize_child@
>  identifier parent_obj;
>  expression child;
>  expression propname;
>  expression child_type;
>  expression errp;
>  @@
>  (
>  -object_initialize(, sizeof(child), child_type);
>  -object_property_add_child(parent_obj, propname, OBJECT(), NULL);
>  +object_initialize_child(parent_obj, propname,  , sizeof(child),
>  +child_type, _abort, NULL);
>  |
>  -object_initialize(, sizeof(child), child_type);
>  -object_property_add_child(parent_obj, propname, OBJECT(), errp);
>  +object_initialize_child(parent_obj, propname,  , sizeof(child),
>  +child_type, errp, NULL);
>  )
> 
> and a bit of manual fix-up for overly long lines.
> 
> Suggested-by: Eduardo Habkost 
> Inspired-by: Thomas Huth 
> Signed-off-by: Philippe Mathieu-Daudé 
> ---
>  hw/arm/aspeed_soc.c  | 43 ++--
>  hw/arm/bcm2835_peripherals.c | 41 +-
>  hw/arm/digic.c   |  4 ++--
>  3 files changed, 45 insertions(+), 43 deletions(-)
> 
> diff --git a/hw/arm/aspeed_soc.c b/hw/arm/aspeed_soc.c
> index a27233d487..81665f2948 100644
> --- a/hw/arm/aspeed_soc.c
> +++ b/hw/arm/aspeed_soc.c
> @@ -106,11 +106,11 @@ static void aspeed_soc_init(Object *obj)
>  AspeedSoCClass *sc = ASPEED_SOC_GET_CLASS(s);
>  int i;
>  
> -object_initialize(>cpu, sizeof(s->cpu), sc->info->cpu_type);
> -object_property_add_child(obj, "cpu", OBJECT(>cpu), NULL);
> +object_initialize_child(obj, "cpu", >cpu, sizeof(s->cpu),
> +sc->info->cpu_type, _abort, NULL);
>  
> -object_initialize(>scu, sizeof(s->scu), TYPE_ASPEED_SCU);
> -object_property_add_child(obj, "scu", OBJECT(>scu), NULL);
> +object_initialize_child(obj, "scu", >scu, sizeof(s->scu),
> +TYPE_ASPEED_SCU, _abort, NULL);
>  qdev_set_parent_bus(DEVICE(>scu), sysbus_get_default());
>  qdev_prop_set_uint32(DEVICE(>scu), "silicon-rev",
>   sc->info->silicon_rev);
> @@ -121,35 +121,35 @@ static void aspeed_soc_init(Object *obj)
>  object_property_add_alias(obj, "hw-prot-key", OBJECT(>scu),
>"hw-prot-key", _abort);
>  
> -object_initialize(>vic, sizeof(s->vic), TYPE_ASPEED_VIC);
> -object_property_add_child(obj, "vic", OBJECT(>vic), NULL);
> +object_initialize_child(obj, "vic", >vic, sizeof(s->vic),
> +TYPE_ASPEED_VIC, _abort, NULL);
>  qdev_set_parent_bus(DEVICE(>vic), sysbus_get_default());
>  
> -object_initialize(>timerctrl, sizeof(s->timerctrl), 
> TYPE_ASPEED_TIMER);
> -object_property_add_child(obj, "timerctrl", OBJECT(>timerctrl), NULL);
> +object_initialize_child(obj, "timerctrl", >timerctrl,
> +sizeof(s->timerctrl), TYPE_ASPEED_TIMER,
> +_abort, NULL);
>  object_property_add_const_link(OBJECT(>timerctrl), "scu",
> OBJECT(>scu), _abort);
>  qdev_set_parent_bus(DEVICE(>timerctrl), sysbus_get_default());
>  
> -object_initialize(>i2c, sizeof(s->i2c), TYPE_ASPEED_I2C);
> -object_property_add_child(obj, "i2c", OBJECT(>i2c), NULL);
> +object_initialize_child(obj, "i2c", >i2c, sizeof(s->i2c),
> +TYPE_ASPEED_I2C, _abort, NULL);
>  qdev_set_parent_bus(DEVICE(>i2c), sysbus_get_default());
>  
> -object_initialize(>fmc, sizeof(s->fmc), sc->info->fmc_typename);
> -object_property_add_child(obj, "fmc", OBJECT(>fmc), NULL);
> +object_initialize_child(obj, "fmc", >fmc, sizeof(s->fmc),
> +sc->info->fmc_typename, _abort, NULL);
>  qdev_set_parent_bus(DEVICE(>fmc), sysbus_get_default());
>  object_property_add_alias(obj, "num-cs", OBJECT(>fmc), "num-cs",
>_abort);
>  
>  for (i = 0; i < sc->info->spis_num; i++) {
> -object_initialize(>spi[i], sizeof(s->spi[i]),
> -  sc->info->spi_typename[i]);
> -object_property_add_child(obj, "spi[*]", OBJECT(>spi[i]), NULL);
> +object_initialize_child(obj, "spi[*]", >spi[i], sizeof(s->spi[i]),
> +sc->info->spi_typename[i], _abort, 
> NULL);
>  qdev_set_parent_bus(DEVICE(>spi[i]), sysbus_get_default());
>  }
>  
> -

Re: [Qemu-devel] [PATCH] virtio-net: do not start queues that are not enabled by the guest

2019-02-21 Thread Michael S. Tsirkin

On Thu, Feb 21, 2019 at 10:10:08PM -0500, Michael S. Tsirkin wrote:
> On Fri, Feb 22, 2019 at 11:04:05AM +0800, Jason Wang wrote:
> > 
> > On 2019/2/22 上午9:35, Michael S. Tsirkin wrote:
> > > On Thu, Feb 21, 2019 at 05:40:22PM +0800, Jason Wang wrote:
> > > > On 2019/2/21 下午4:18, Yuri Benditovich wrote:
> > > > 
> > > >  For 1.0 device, we can fix the queue_enable, but for 0.9x 
> > > > device how do
> > > >  you enable one specific queue in this case? (setting status?)
> > > > 
> > > > 
> > > >  Do I understand correctly that for 0.9 device in some cases the 
> > > > device will
> > > >  receive feature _MQ set, but will not receive 
> > > > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET?
> > > >  Or the problem is different?
> > > > 
> > > > 
> > > > Let me clarify, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET is used to control the 
> > > > the
> > > > number of queue pairs used by device for doing transmission and 
> > > > reception. It
> > > > was not used to enable or disable a virtqueue.
> > > > 
> > > > For 1.0 device, we should use queue_enable in pci cfg to enable and 
> > > > disable
> > > > queue:
> > > > 
> > > > 
> > > > We could do:
> > > > 
> > > > 1) allocate memory and set queue_enable for vq0
> > > > 
> > > > 2) allocate memory and set queue_enable for vq1
> > > > 
> > > > 3) Set vq paris to 1
> > > > 
> > > > 4) allocate memory and set queue_enable for vq2
> > > > 
> > > > 5) allocate memory and set queue_enable for vq3
> > > > 
> > > > 6) set vq pairs to 2
> > > 
> > > I do not think spec allows this.
> > > 
> > > 
> > > The driver MUST follow this sequence to initialize a device:
> > > 1. Reset the device.
> > > 2. Set the ACKNOWLEDGE status bit: the guest OS has noticed the device.
> > > 3. Set the DRIVER status bit: the guest OS knows how to drive the device.
> > > 4. Read device feature bits, and write the subset of feature bits 
> > > understood by the OS and driver to the
> > > device. During this step the driver MAY read (but MUST NOT write) the 
> > > device-specific configuration
> > > fields to check that it can support the device before accepting it.
> > > 5. Set the FEATURES_OK status bit. The driver MUST NOT accept new feature 
> > > bits after this step.
> > > 6. Re-read device status to ensure the FEATURES_OK bit is still set: 
> > > otherwise, the device does not
> > > support our subset of features and the device is unusable.
> > > 7. Perform device-specific setup, including discovery of virtqueues for 
> > > the device, optional per-bus setup,
> > > reading and possibly writing the device’s virtio configuration space, and 
> > > population of virtqueues.
> > > 8. Set the DRIVER_OK status bit. At this point the device is “live”.
> > > 
> > > 
> > > Thus vqs are setup at step 7.
> > > 
> > > # of vq pairs are set up through a command which is a special
> > > buffer, and spec says:
> > > 
> > > The driver MUST NOT send any buffer available notifications to the device 
> > > before setting DRIVER_OK.
> > 
> > 
> > So you meant write to queue_enable is forbidden after DRIVER_OK (though it's
> > not very clear to me from the  spec). And if a driver want to enable new
> > queues, it must reset the device?
> 
> 
> That's my reading.  What do you think?

Btw some legacy drivers might violate this by addig buffers
before driver_ok.

> 
> > 
> > > 
> > > 
> > > > But this requires a proper implementation for queue_enable for vhost 
> > > > which is
> > > > missed in qemu and probably what you really want to do.
> > > > 
> > > > but for 0.9x device, there's no such way to do this. That's the issue.
> > > 0.9x there's no queue enable, assumption is PA!=0 means VQ has
> > > been enabled.
> > > 
> > > 
> > > > So
> > > > driver must allocate all queBes before starting the device, otherwise 
> > > > there's
> > > > no way to enable it afterwards.
> > > 
> > > As per spec queues must be allocated before DRIVER_OK.
> > > 
> > > That is universal.
> > 
> > 
> > If I understand correctly, this is not what is done by current windows
> > drivers.
> > 
> > Thanks
> > 
> > 
> > > 
> > > > There're tricks to make it work like what is
> > > > done in your patch, but it depends on a specific implementation like 
> > > > qemu which
> > > > is sub-optimal.
> > > > 
> > > > 
> > > > 
> > > > 
> > > >  A fundamental question is what prevents you from just 
> > > > initialization all
> > > >  queues during driver start? It looks to me this save lots of 
> > > > efforts
> > > >  than allocating queue dynamically.
> > > > 
> > > > 
> > > >  This is not so trivial in Windows driver, as it does not have 
> > > > objects for queues
> > > >  that it does not use. Linux driver first of all allocates all the
> > > >  queues and then
> > > >  adds Rx/Tx to those it will use. Windows driver first decides how 
> > > > many queues
> > > >  it will use then allocates objects for them and initializes them 
> > > > from zero to
> > > >  fully functional state.
> > >

[Qemu-devel] [Bug 1817239] [NEW] add '--targets' option to qemu-binfmt-conf.sh

2019-02-21 Thread umarcor

Public bug reported:

I'd like to ask for the addition of option '--targets' to scripts/qemu-
binfmt-conf.sh, in order to allow registering the interpreters for the
given list of architectures only, instead of using all of the ones
defined in qemu_target_list. The following is a possible patch that
implements it:

 qemu-binfmt-conf.sh | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/qemu-binfmt-conf.sh b/qemu-binfmt-conf.sh
index b5a1674..be4a19b 100644
--- a/qemu-binfmt-conf.sh
+++ b/qemu-binfmt-conf.sh
@@ -170,6 +170,7 @@ usage() {
 Usage: qemu-binfmt-conf.sh [--qemu-path PATH][--debian][--systemd CPU]
[--help][--credential yes|no][--exportdir PATH]
[--persistent yes|no][--qemu-suffix SUFFIX]
+   [--targets TARGETS]

Configure binfmt_misc to use qemu interpreter

@@ -189,6 +190,8 @@ Usage: qemu-binfmt-conf.sh [--qemu-path 
PATH][--debian][--systemd CPU]
--persistent:  if yes, the interpreter is loaded when binfmt is
   configured and remains in memory. All future uses
   are cloned from the open file.
+   --targets: comma-separated list of targets. If provided, only
+  the targets in the list are registered.

 To import templates with update-binfmts, use :

@@ -324,7 +327,7 @@ CREDENTIAL=no
 PERSISTENT=no
 QEMU_SUFFIX=""

-options=$(getopt -o ds:Q:S:e:hc:p: -l 
debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent: 
-- "$@")
+options=$(getopt -o ds:Q:S:e:hc:p:t: -l 
debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent:,targets:
 -- "$@")
 eval set -- "$options"

 while true ; do
@@ -380,6 +383,10 @@ while true ; do
 shift
 PERSISTENT="$1"
 ;;
+-t|--targets)
+shift
+qemu_target_list="$(echo "$1" | tr ',' ' ')"
+;;
 *)
 break
 ;;
--
2.20.1

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1817239

Title:
  add '--targets' option to qemu-binfmt-conf.sh

Status in QEMU:
  New

Bug description:
  I'd like to ask for the addition of option '--targets' to scripts
  /qemu-binfmt-conf.sh, in order to allow registering the interpreters
  for the given list of architectures only, instead of using all of the
  ones defined in qemu_target_list. The following is a possible patch
  that implements it:

   qemu-binfmt-conf.sh | 9 -
   1 file changed, 8 insertions(+), 1 deletion(-)

  diff --git a/qemu-binfmt-conf.sh b/qemu-binfmt-conf.sh
  index b5a1674..be4a19b 100644
  --- a/qemu-binfmt-conf.sh
  +++ b/qemu-binfmt-conf.sh
  @@ -170,6 +170,7 @@ usage() {
   Usage: qemu-binfmt-conf.sh [--qemu-path PATH][--debian][--systemd CPU]
  [--help][--credential yes|no][--exportdir PATH]
  [--persistent yes|no][--qemu-suffix SUFFIX]
  +   [--targets TARGETS]

  Configure binfmt_misc to use qemu interpreter

  @@ -189,6 +190,8 @@ Usage: qemu-binfmt-conf.sh [--qemu-path 
PATH][--debian][--systemd CPU]
  --persistent:  if yes, the interpreter is loaded when binfmt is
 configured and remains in memory. All future uses
 are cloned from the open file.
  +   --targets: comma-separated list of targets. If provided, only
  +  the targets in the list are registered.

   To import templates with update-binfmts, use :

  @@ -324,7 +327,7 @@ CREDENTIAL=no
   PERSISTENT=no
   QEMU_SUFFIX=""

  -options=$(getopt -o ds:Q:S:e:hc:p: -l 
debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent: 
-- "$@")
  +options=$(getopt -o ds:Q:S:e:hc:p:t: -l 
debian,systemd:,qemu-path:,qemu-suffix:,exportdir:,help,credential:,persistent:,targets:
 -- "$@")
   eval set -- "$options"

   while true ; do
  @@ -380,6 +383,10 @@ while true ; do
   shift
   PERSISTENT="$1"
   ;;
  +-t|--targets)
  +shift
  +qemu_target_list="$(echo "$1" | tr ',' ' ')"
  +;;
   *)
   break
   ;;
  --
  2.20.1

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1817239/+subscriptions

Re: [Qemu-devel] [PATCH 1/4] iothread: wait until the glib context is acquired

2019-02-21 Thread Peter Xu

On Thu, Feb 21, 2019 at 11:39:32AM +0100, Marc-André Lureau wrote:
> Hi
> 
> On Thu, Feb 21, 2019 at 9:00 AM Peter Xu  wrote:
> >
> > On Wed, Feb 20, 2019 at 05:06:25PM +0100, Marc-André Lureau wrote:
> > > Another thread may acquire the glib context (temporarily) before
> > > g_main_context_push_thread_default().
> > >
> > > This can happen with the following qemu_chr_fe_set_handlers()
> > > modifications.
> > >
> > > Unfortunately, g_main_context_wait() is deprecated in glib
> > > 2.58 (apparently it was a broken interface). Use a polling loop.
> > >
> > > Signed-off-by: Marc-André Lureau 
> > > ---
> > >  iothread.c | 7 +++
> > >  1 file changed, 7 insertions(+)
> > >
> > > diff --git a/iothread.c b/iothread.c
> > > index e615b7ae52..93cc3aa875 100644
> > > --- a/iothread.c
> > > +++ b/iothread.c
> > > @@ -70,6 +70,11 @@ static void *iothread_run(void *opaque)
> > >  if (iothread->running && atomic_read(>worker_context)) 
> > > {
> > >  GMainLoop *loop;
> > >
> > > +/* we may race with another thread acquiring the context */
> > > +while (!g_main_context_acquire(iothread->worker_context)) {
> > > +g_usleep(1);
> > > +}
> >
> > Could you help explain why need this explicitly?  Since AFAIU
> > g_main_loop_run() below will do context acquire too so IIUC you're
> > taking it twice (while g_main_context_acquire should allow it to
> > happen, though)?
> >
> 
> We call g_main_context_push_thread_default() before run(). It will
> fail if the context is not acquirable.

Thanks for explaining.  It wasn't obvious to me.

I've posted another series to refactor iothread a bit and it should be
able to drop this patch if based on that series (that series should
even remove code instead of adding new).  Please feel free to have a
look, or give it a shot:

  [PATCH 0/4] iothread: create gcontext unconditionally

Regards,

-- 
Peter Xu

[Qemu-devel] [PATCH 4/4] iothread: push gcontext earlier in the thread_fn

2019-02-21 Thread Peter Xu

We were pushing the context until right before running the gmainloop.
Now since we have everything unconditionally, we can move this
earlier.

One benefit is that now it's done even before init_done_sem, so as
long as the iothread user calls iothread_create() and completes, we
know that the thread stack is ready.

Signed-off-by: Peter Xu 
---
 iothread.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/iothread.c b/iothread.c
index 9abdbace66..7b7cba5d04 100644
--- a/iothread.c
+++ b/iothread.c
@@ -53,7 +53,7 @@ static void *iothread_run(void *opaque)
 IOThread *iothread = opaque;
 
 rcu_register_thread();
-
+g_main_context_push_thread_default(iothread->worker_context);
 my_iothread = iothread;
 iothread->thread_id = qemu_get_thread_id();
 qemu_sem_post(>init_done_sem);
@@ -66,12 +66,11 @@ static void *iothread_run(void *opaque)
  * changed in previous aio_poll()
  */
 if (iothread->running && atomic_read(>run_gcontext)) {
-g_main_context_push_thread_default(iothread->worker_context);
 g_main_loop_run(iothread->main_loop);
-g_main_context_pop_thread_default(iothread->worker_context);
 }
 }
 
+g_main_context_pop_thread_default(iothread->worker_context);
 rcu_unregister_thread();
 return NULL;
 }
-- 
2.17.1

[Qemu-devel] [PATCH 0/4] iothread: create gcontext unconditionally

2019-02-21 Thread Peter Xu

When I first read the iothread code, the gcontext confused me for
quite a while.  Meanwhile, I've been tackling with some races due to
this complexity as well.  How much we'll pay for creating the gcontext
unconditionally?  Do we really need this flexibitily (or is it really
a flexibility after all)?  I don't see much gain of existing code, but
I might be wrong.  Anyway, I wrote this patchset to see how the list
would think about it.

This series directly originates from previous discussion with
Marc-Andre where there's a slightly hacky way to try to acquire the
gcontext:

https://lists.gnu.org/archive/html/qemu-devel/2019-02/msg05460.html

Now with this series logically above patch is not needed any more.
Please read patch 4 for more information.

And if this patchset can survive... how about running gcontext
directly in iothread_run()?  I believe there could be a bit more
things to clean but I'll see.

Make check passes for me.

Comments welcomed.  Thanks,

Peter Xu (4):
  iothread: replace init_done_cond with a semaphore
  iothread: create the gcontext onconditionally
  iothread: create main loop unconditionally
  iothread: push gcontext earlier in the thread_fn

 include/sysemu/iothread.h |  5 +--
 iothread.c| 77 +++
 2 files changed, 32 insertions(+), 50 deletions(-)

-- 
2.17.1

[Qemu-devel] [PATCH 2/4] iothread: create the gcontext onconditionally

2019-02-21 Thread Peter Xu

In existing code we create the gcontext dynamically at the first
access of the gcontext from caller.  That can bring some complexity
and potential races during using iothread.  Since the context itself
is not that big a resource, and we won't have millions of iothread,
let's simply create the gcontext unconditionally.

This will also be a preparation work further to move the thread
context push operation earlier than before (now it's only pushed right
before we want to start running the gmainloop).

Removing the g_once since it's not necessary, while introducing a new
run_gcontext boolean to show whether we want to run the gcontext.

Signed-off-by: Peter Xu 
---
 include/sysemu/iothread.h |  2 +-
 iothread.c| 43 +++
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index 50411ba54a..5f6240d5cb 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -24,9 +24,9 @@ typedef struct {
 
 QemuThread thread;
 AioContext *ctx;
+bool run_gcontext;  /* whether we should run gcontext */
 GMainContext *worker_context;
 GMainLoop *main_loop;
-GOnce once;
 QemuSemaphore init_done_sem; /* is thread init done? */
 bool stopping;  /* has iothread_stop() been called? */
 bool running;   /* should iothread_run() continue? */
diff --git a/iothread.c b/iothread.c
index 6e297e9ef1..6fa87876e0 100644
--- a/iothread.c
+++ b/iothread.c
@@ -65,7 +65,7 @@ static void *iothread_run(void *opaque)
  * We must check the running state again in case it was
  * changed in previous aio_poll()
  */
-if (iothread->running && atomic_read(>worker_context)) {
+if (iothread->running && atomic_read(>run_gcontext)) {
 GMainLoop *loop;
 
 g_main_context_push_thread_default(iothread->worker_context);
@@ -114,6 +114,8 @@ static void iothread_instance_init(Object *obj)
 iothread->poll_max_ns = IOTHREAD_POLL_MAX_NS_DEFAULT;
 iothread->thread_id = -1;
 qemu_sem_init(>init_done_sem, 0);
+/* By default, we don't run gcontext */
+atomic_set(>run_gcontext, 0);
 }
 
 static void iothread_instance_finalize(Object *obj)
@@ -143,6 +145,16 @@ static void iothread_instance_finalize(Object *obj)
 qemu_sem_destroy(>init_done_sem);
 }
 
+static void iothread_init_gcontext(IOThread *iothread)
+{
+GSource *source;
+
+iothread->worker_context = g_main_context_new();
+source = aio_get_g_source(iothread_get_aio_context(iothread));
+g_source_attach(source, iothread->worker_context);
+g_source_unref(source);
+}
+
 static void iothread_complete(UserCreatable *obj, Error **errp)
 {
 Error *local_error = NULL;
@@ -157,6 +169,12 @@ static void iothread_complete(UserCreatable *obj, Error 
**errp)
 return;
 }
 
+/*
+ * Init one GMainContext for the iothread unconditionally, even if
+ * it's not used
+ */
+iothread_init_gcontext(iothread);
+
 aio_context_set_poll_params(iothread->ctx,
 iothread->poll_max_ns,
 iothread->poll_grow,
@@ -169,8 +187,6 @@ static void iothread_complete(UserCreatable *obj, Error 
**errp)
 return;
 }
 
-iothread->once = (GOnce) G_ONCE_INIT;
-
 /* This assumes we are called from a thread with useful CPU affinity for us
  * to inherit.
  */
@@ -333,27 +349,10 @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
 return head;
 }
 
-static gpointer iothread_g_main_context_init(gpointer opaque)
-{
-AioContext *ctx;
-IOThread *iothread = opaque;
-GSource *source;
-
-iothread->worker_context = g_main_context_new();
-
-ctx = iothread_get_aio_context(iothread);
-source = aio_get_g_source(ctx);
-g_source_attach(source, iothread->worker_context);
-g_source_unref(source);
-
-aio_notify(iothread->ctx);
-return NULL;
-}
-
 GMainContext *iothread_get_g_main_context(IOThread *iothread)
 {
-g_once(>once, iothread_g_main_context_init, iothread);
-
+atomic_set(>run_gcontext, 1);
+aio_notify(iothread->ctx);
 return iothread->worker_context;
 }
 
-- 
2.17.1

[Qemu-devel] [PATCH 1/4] iothread: replace init_done_cond with a semaphore

2019-02-21 Thread Peter Xu

Only sending an init-done message using lock+cond seems an overkill to
me.  Replacing it with a simpler semaphore.

Meanwhile, init the semaphore unconditionally, then we can destroy it
unconditionally too in finalize which seems cleaner.

Signed-off-by: Peter Xu 
---
 include/sysemu/iothread.h |  3 +--
 iothread.c| 17 -
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
index 8a7ac2c528..50411ba54a 100644
--- a/include/sysemu/iothread.h
+++ b/include/sysemu/iothread.h
@@ -27,8 +27,7 @@ typedef struct {
 GMainContext *worker_context;
 GMainLoop *main_loop;
 GOnce once;
-QemuMutex init_done_lock;
-QemuCond init_done_cond;/* is thread initialization done? */
+QemuSemaphore init_done_sem; /* is thread init done? */
 bool stopping;  /* has iothread_stop() been called? */
 bool running;   /* should iothread_run() continue? */
 int thread_id;
diff --git a/iothread.c b/iothread.c
index e615b7ae52..6e297e9ef1 100644
--- a/iothread.c
+++ b/iothread.c
@@ -55,10 +55,8 @@ static void *iothread_run(void *opaque)
 rcu_register_thread();
 
 my_iothread = iothread;
-qemu_mutex_lock(>init_done_lock);
 iothread->thread_id = qemu_get_thread_id();
-qemu_cond_signal(>init_done_cond);
-qemu_mutex_unlock(>init_done_lock);
+qemu_sem_post(>init_done_sem);
 
 while (iothread->running) {
 aio_poll(iothread->ctx, true);
@@ -115,6 +113,7 @@ static void iothread_instance_init(Object *obj)
 
 iothread->poll_max_ns = IOTHREAD_POLL_MAX_NS_DEFAULT;
 iothread->thread_id = -1;
+qemu_sem_init(>init_done_sem, 0);
 }
 
 static void iothread_instance_finalize(Object *obj)
@@ -123,10 +122,6 @@ static void iothread_instance_finalize(Object *obj)
 
 iothread_stop(iothread);
 
-if (iothread->thread_id != -1) {
-qemu_cond_destroy(>init_done_cond);
-qemu_mutex_destroy(>init_done_lock);
-}
 /*
  * Before glib2 2.33.10, there is a glib2 bug that GSource context
  * pointer may not be cleared even if the context has already been
@@ -145,6 +140,7 @@ static void iothread_instance_finalize(Object *obj)
 g_main_context_unref(iothread->worker_context);
 iothread->worker_context = NULL;
 }
+qemu_sem_destroy(>init_done_sem);
 }
 
 static void iothread_complete(UserCreatable *obj, Error **errp)
@@ -173,8 +169,6 @@ static void iothread_complete(UserCreatable *obj, Error 
**errp)
 return;
 }
 
-qemu_mutex_init(>init_done_lock);
-qemu_cond_init(>init_done_cond);
 iothread->once = (GOnce) G_ONCE_INIT;
 
 /* This assumes we are called from a thread with useful CPU affinity for us
@@ -188,12 +182,9 @@ static void iothread_complete(UserCreatable *obj, Error 
**errp)
 g_free(name);
 
 /* Wait for initialization to complete */
-qemu_mutex_lock(>init_done_lock);
 while (iothread->thread_id == -1) {
-qemu_cond_wait(>init_done_cond,
-   >init_done_lock);
+qemu_sem_wait(>init_done_sem);
 }
-qemu_mutex_unlock(>init_done_lock);
 }
 
 typedef struct {
-- 
2.17.1

[Qemu-devel] [PATCH 3/4] iothread: create main loop unconditionally

2019-02-21 Thread Peter Xu

Since we've have the gcontext always there, create the main loop
altogether.  The iothread_run() is even cleaner.

Signed-off-by: Peter Xu 
---
 iothread.c | 12 +++-
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/iothread.c b/iothread.c
index 6fa87876e0..9abdbace66 100644
--- a/iothread.c
+++ b/iothread.c
@@ -66,17 +66,8 @@ static void *iothread_run(void *opaque)
  * changed in previous aio_poll()
  */
 if (iothread->running && atomic_read(>run_gcontext)) {
-GMainLoop *loop;
-
 g_main_context_push_thread_default(iothread->worker_context);
-iothread->main_loop =
-g_main_loop_new(iothread->worker_context, TRUE);
-loop = iothread->main_loop;
-
 g_main_loop_run(iothread->main_loop);
-iothread->main_loop = NULL;
-g_main_loop_unref(loop);
-
 g_main_context_pop_thread_default(iothread->worker_context);
 }
 }
@@ -141,6 +132,8 @@ static void iothread_instance_finalize(Object *obj)
 if (iothread->worker_context) {
 g_main_context_unref(iothread->worker_context);
 iothread->worker_context = NULL;
+g_main_loop_unref(iothread->main_loop);
+iothread->main_loop = NULL;
 }
 qemu_sem_destroy(>init_done_sem);
 }
@@ -153,6 +146,7 @@ static void iothread_init_gcontext(IOThread *iothread)
 source = aio_get_g_source(iothread_get_aio_context(iothread));
 g_source_attach(source, iothread->worker_context);
 g_source_unref(source);
+iothread->main_loop = g_main_loop_new(iothread->worker_context, TRUE);
 }
 
 static void iothread_complete(UserCreatable *obj, Error **errp)
-- 
2.17.1

Re: [Qemu-devel] [PATCH] virtio-net: do not start queues that are not enabled by the guest

2019-02-21 Thread Michael S. Tsirkin

On Fri, Feb 22, 2019 at 11:04:05AM +0800, Jason Wang wrote:
> 
> On 2019/2/22 上午9:35, Michael S. Tsirkin wrote:
> > On Thu, Feb 21, 2019 at 05:40:22PM +0800, Jason Wang wrote:
> > > On 2019/2/21 下午4:18, Yuri Benditovich wrote:
> > > 
> > >  For 1.0 device, we can fix the queue_enable, but for 0.9x device 
> > > how do
> > >  you enable one specific queue in this case? (setting status?)
> > > 
> > > 
> > >  Do I understand correctly that for 0.9 device in some cases the 
> > > device will
> > >  receive feature _MQ set, but will not receive 
> > > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET?
> > >  Or the problem is different?
> > > 
> > > 
> > > Let me clarify, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET is used to control the the
> > > number of queue pairs used by device for doing transmission and 
> > > reception. It
> > > was not used to enable or disable a virtqueue.
> > > 
> > > For 1.0 device, we should use queue_enable in pci cfg to enable and 
> > > disable
> > > queue:
> > > 
> > > 
> > > We could do:
> > > 
> > > 1) allocate memory and set queue_enable for vq0
> > > 
> > > 2) allocate memory and set queue_enable for vq1
> > > 
> > > 3) Set vq paris to 1
> > > 
> > > 4) allocate memory and set queue_enable for vq2
> > > 
> > > 5) allocate memory and set queue_enable for vq3
> > > 
> > > 6) set vq pairs to 2
> > 
> > I do not think spec allows this.
> > 
> > 
> > The driver MUST follow this sequence to initialize a device:
> > 1. Reset the device.
> > 2. Set the ACKNOWLEDGE status bit: the guest OS has noticed the device.
> > 3. Set the DRIVER status bit: the guest OS knows how to drive the device.
> > 4. Read device feature bits, and write the subset of feature bits 
> > understood by the OS and driver to the
> > device. During this step the driver MAY read (but MUST NOT write) the 
> > device-specific configuration
> > fields to check that it can support the device before accepting it.
> > 5. Set the FEATURES_OK status bit. The driver MUST NOT accept new feature 
> > bits after this step.
> > 6. Re-read device status to ensure the FEATURES_OK bit is still set: 
> > otherwise, the device does not
> > support our subset of features and the device is unusable.
> > 7. Perform device-specific setup, including discovery of virtqueues for the 
> > device, optional per-bus setup,
> > reading and possibly writing the device’s virtio configuration space, and 
> > population of virtqueues.
> > 8. Set the DRIVER_OK status bit. At this point the device is “live”.
> > 
> > 
> > Thus vqs are setup at step 7.
> > 
> > # of vq pairs are set up through a command which is a special
> > buffer, and spec says:
> > 
> > The driver MUST NOT send any buffer available notifications to the device 
> > before setting DRIVER_OK.
> 
> 
> So you meant write to queue_enable is forbidden after DRIVER_OK (though it's
> not very clear to me from the  spec). And if a driver want to enable new
> queues, it must reset the device?


That's my reading.  What do you think?


> 
> > 
> > 
> > > But this requires a proper implementation for queue_enable for vhost 
> > > which is
> > > missed in qemu and probably what you really want to do.
> > > 
> > > but for 0.9x device, there's no such way to do this. That's the issue.
> > 0.9x there's no queue enable, assumption is PA!=0 means VQ has
> > been enabled.
> > 
> > 
> > > So
> > > driver must allocate all queBes before starting the device, otherwise 
> > > there's
> > > no way to enable it afterwards.
> > 
> > As per spec queues must be allocated before DRIVER_OK.
> > 
> > That is universal.
> 
> 
> If I understand correctly, this is not what is done by current windows
> drivers.
> 
> Thanks
> 
> 
> > 
> > > There're tricks to make it work like what is
> > > done in your patch, but it depends on a specific implementation like qemu 
> > > which
> > > is sub-optimal.
> > > 
> > > 
> > > 
> > > 
> > >  A fundamental question is what prevents you from just 
> > > initialization all
> > >  queues during driver start? It looks to me this save lots of 
> > > efforts
> > >  than allocating queue dynamically.
> > > 
> > > 
> > >  This is not so trivial in Windows driver, as it does not have 
> > > objects for queues
> > >  that it does not use. Linux driver first of all allocates all the
> > >  queues and then
> > >  adds Rx/Tx to those it will use. Windows driver first decides how 
> > > many queues
> > >  it will use then allocates objects for them and initializes them 
> > > from zero to
> > >  fully functional state.
> > > 
> > > 
> > > Well, you just need to allocate some memory for the virtqueue, there's no 
> > > need
> > > to make it visible to the rest until it was enabled.
> > > 
> > > Thanks
> > > 
> > > 
> > > 
> > >

[Qemu-devel] [PULL 17/26] contrib/vhost-user-blk: fix the compilation issue

2019-02-21 Thread Michael S. Tsirkin

From: Changpeng Liu 

Signed-off-by: Changpeng Liu 
Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Stefano Garzarella 
Reviewed-by: Michael S. Tsirkin 
Message-Id: <1547615970-23545-2-git-send-email-changpeng@intel.com>
[PMD: this patch was first (incorrectly) introduced as a56de056c91f8]
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20190212140621.17009-3-phi...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Peter Xu 
---
 contrib/vhost-user-blk/vhost-user-blk.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/contrib/vhost-user-blk/vhost-user-blk.c 
b/contrib/vhost-user-blk/vhost-user-blk.c
index 5c2092e13a..43583f2659 100644
--- a/contrib/vhost-user-blk/vhost-user-blk.c
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -20,6 +20,10 @@
 #include "contrib/libvhost-user/libvhost-user-glib.h"
 #include "contrib/libvhost-user/libvhost-user.h"
 
+#if defined(__linux__)
+#include 
+#include 
+#endif
 
 struct virtio_blk_inhdr {
 unsigned char status;
@@ -521,7 +525,7 @@ vub_get_blocksize(int fd)
 
 #if defined(__linux__) && defined(BLKSSZGET)
 if (ioctl(fd, BLKSSZGET, ) == 0) {
-return blocklen;
+return blocksize;
 }
 #endif
 
-- 
MST

[Qemu-devel] [PULL 10/26] roms: add the edk2 project as a git submodule

2019-02-21 Thread Michael S. Tsirkin

From: Laszlo Ersek 

The roms/edk2 submodule can help with three goals:
- build the OVMF and ArmVirtQemu virtual UEFI firmware platforms (to be
  implemented later),
- build the EfiRom tool on the fly, which is used in roms/Makefile, for
  building the "efirom" target,
- build UEFI test applications (to be run in guests), for qtest support.

Edk2 commit 85588389222a3636baf0f9ed8227f2434af4c3f9 stands for the latest
"stable tag", namely "edk2-stable201811".

The edk2 repository tracks some binary files that should not be removed by
QEMU's top-level "make clean"; exempt the full pathnames from the "find"
command.

Cc: "Michael S. Tsirkin" 
Cc: Ard Biesheuvel 
Cc: Gerd Hoffmann 
Cc: Igor Mammedov 
Cc: Philippe Mathieu-Daudé 
Cc: Shannon Zhao 
Signed-off-by: Laszlo Ersek 
Reviewed-by: Gerd Hoffmann 
Message-Id: <20190204160325.4914-2-ler...@redhat.com>
Reviewed-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 
---
 Makefile| 6 +-
 .gitmodules | 3 +++
 roms/edk2   | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)
 create mode 16 roms/edk2

diff --git a/Makefile b/Makefile
index a6de28677f..21ceb8d0ed 100644
--- a/Makefile
+++ b/Makefile
@@ -607,7 +607,11 @@ clean:
rm -f config.mak op-i386.h opc-i386.h gen-op-i386.h op-arm.h opc-arm.h 
gen-op-arm.h
rm -f qemu-options.def
rm -f *.msi
-   find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name 
'*.[oda]' \) -type f -exec rm {} +
+   find . \( -name '*.so' -o -name '*.dll' -o -name '*.mo' -o -name 
'*.[oda]' \) -type f \
+   ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-aarch64.a \
+   ! -path ./roms/edk2/ArmPkg/Library/GccLto/liblto-arm.a \
+   ! -path ./roms/edk2/BaseTools/Source/Python/UPT/Dll/sqlite3.dll 
\
+   -exec rm {} +
rm -f $(filter-out %.tlb,$(TOOLS)) $(HELPERS-y) qemu-ga TAGS cscope.* 
*.pod *~ */*~
rm -f fsdev/*.pod scsi/*.pod
rm -f qemu-img-cmds.h
diff --git a/.gitmodules b/.gitmodules
index 6b91176098..ceafb0ee29 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -49,3 +49,6 @@
 [submodule "tests/fp/berkeley-softfloat-3"]
path = tests/fp/berkeley-softfloat-3
url = https://github.com/cota/berkeley-softfloat-3
+[submodule "roms/edk2"]
+   path = roms/edk2
+   url = https://github.com/tianocore/edk2.git
diff --git a/roms/edk2 b/roms/edk2
new file mode 16
index 00..8558838922
--- /dev/null
+++ b/roms/edk2
@@ -0,0 +1 @@
+Subproject commit 85588389222a3636baf0f9ed8227f2434af4c3f9
-- 
MST

Re: [Qemu-devel] [PATCH] virtio-net: do not start queues that are not enabled by the guest

2019-02-21 Thread Jason Wang




On 2019/2/22 上午9:35, Michael S. Tsirkin wrote:

On Thu, Feb 21, 2019 at 05:40:22PM +0800, Jason Wang wrote:

On 2019/2/21 下午4:18, Yuri Benditovich wrote:

 For 1.0 device, we can fix the queue_enable, but for 0.9x device how do
 you enable one specific queue in this case? (setting status?)


 Do I understand correctly that for 0.9 device in some cases the device will
 receive feature _MQ set, but will not receive 
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET?
 Or the problem is different?


Let me clarify, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET is used to control the the
number of queue pairs used by device for doing transmission and reception. It
was not used to enable or disable a virtqueue.

For 1.0 device, we should use queue_enable in pci cfg to enable and disable
queue:


We could do:

1) allocate memory and set queue_enable for vq0

2) allocate memory and set queue_enable for vq1

3) Set vq paris to 1

4) allocate memory and set queue_enable for vq2

5) allocate memory and set queue_enable for vq3

6) set vq pairs to 2


I do not think spec allows this.


The driver MUST follow this sequence to initialize a device:
1. Reset the device.
2. Set the ACKNOWLEDGE status bit: the guest OS has noticed the device.
3. Set the DRIVER status bit: the guest OS knows how to drive the device.
4. Read device feature bits, and write the subset of feature bits understood by 
the OS and driver to the
device. During this step the driver MAY read (but MUST NOT write) the 
device-specific configuration
fields to check that it can support the device before accepting it.
5. Set the FEATURES_OK status bit. The driver MUST NOT accept new feature bits 
after this step.
6. Re-read device status to ensure the FEATURES_OK bit is still set: otherwise, 
the device does not
support our subset of features and the device is unusable.
7. Perform device-specific setup, including discovery of virtqueues for the 
device, optional per-bus setup,
reading and possibly writing the device’s virtio configuration space, and 
population of virtqueues.
8. Set the DRIVER_OK status bit. At this point the device is “live”.


Thus vqs are setup at step 7.

# of vq pairs are set up through a command which is a special
buffer, and spec says:

The driver MUST NOT send any buffer available notifications to the device 
before setting DRIVER_OK.



So you meant write to queue_enable is forbidden after DRIVER_OK (though 
it's not very clear to me from the  spec). And if a driver want to 
enable new queues, it must reset the device?







But this requires a proper implementation for queue_enable for vhost which is
missed in qemu and probably what you really want to do.

but for 0.9x device, there's no such way to do this. That's the issue.

0.9x there's no queue enable, assumption is PA!=0 means VQ has
been enabled.



So
driver must allocate all queBes before starting the device, otherwise there's
no way to enable it afterwards.


As per spec queues must be allocated before DRIVER_OK.

That is universal.



If I understand correctly, this is not what is done by current windows 
drivers.


Thanks





There're tricks to make it work like what is
done in your patch, but it depends on a specific implementation like qemu which
is sub-optimal.




 A fundamental question is what prevents you from just initialization 
all
 queues during driver start? It looks to me this save lots of efforts
 than allocating queue dynamically.


 This is not so trivial in Windows driver, as it does not have objects for 
queues
 that it does not use. Linux driver first of all allocates all the
 queues and then
 adds Rx/Tx to those it will use. Windows driver first decides how many 
queues
 it will use then allocates objects for them and initializes them from zero 
to
 fully functional state.


Well, you just need to allocate some memory for the virtqueue, there's no need
to make it visible to the rest until it was enabled.

Thanks

[Qemu-devel] [PULL 13/26] tests/uefi-test-tools: add build scripts

2019-02-21 Thread Michael S. Tsirkin

From: Laszlo Ersek 

Introduce the following build scripts under "tests/uefi-test-tools":

* "build.sh" builds a single module (a UEFI application) from
  UefiTestToolsPkg, for a single QEMU emulation target.

  "build.sh" relies on cross-compilers when the emulation target and the
  build host architecture don't match. The cross-compiler prefix is
  computed according to a fixed, Linux-specific pattern. No attempt is
  made to copy or reimplement the GNU Make magic from "qemu/roms/Makefile"
  for cross-compiler prefix determination. The reason is that the build
  host OSes that are officially supported by edk2, and those that are
  supported by QEMU, intersect only in Linux. (Note that the UNIXGCC
  toolchain is being removed from edk2,
  .)

* "Makefile" currently builds the "UefiTestToolsPkg/BiosTablesTest"
  application, for arm, aarch64, i386, and x86_64, with the help of
  "build.sh".

  "Makefile" turns each resultant UEFI executable into a UEFI-bootable,
  qcow2-compressed ISO image. The ISO images are output as
  "tests/data/uefi-boot-images/bios-tables-test..iso.qcow2".

  Each ISO image should be passed to QEMU as follows:

-drive id=boot-cd,if=none,readonly,format=qcow2,file=$ISO \
-device virtio-scsi-pci,id=scsi0 \
-device scsi-cd,drive=boot-cd,bus=scsi0.0,bootindex=0 \

  "Makefile" assumes that "mkdosfs", "mtools", and "genisoimage" are
  present.

Cc: "Michael S. Tsirkin" 
Cc: Ard Biesheuvel 
Cc: Gerd Hoffmann 
Cc: Igor Mammedov 
Cc: Philippe Mathieu-Daudé 
Cc: Shannon Zhao 
Signed-off-by: Laszlo Ersek 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 
Message-Id: <20190204160325.4914-5-ler...@redhat.com>
Reviewed-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 tests/uefi-test-tools/.gitignore |   3 +
 tests/uefi-test-tools/Makefile   | 106 ++
 tests/uefi-test-tools/build.sh   | 145 +++
 3 files changed, 254 insertions(+)
 create mode 100644 tests/uefi-test-tools/.gitignore
 create mode 100644 tests/uefi-test-tools/Makefile
 create mode 100755 tests/uefi-test-tools/build.sh

diff --git a/tests/uefi-test-tools/.gitignore b/tests/uefi-test-tools/.gitignore
new file mode 100644
index 00..9f246701de
--- /dev/null
+++ b/tests/uefi-test-tools/.gitignore
@@ -0,0 +1,3 @@
+Build
+Conf
+log
diff --git a/tests/uefi-test-tools/Makefile b/tests/uefi-test-tools/Makefile
new file mode 100644
index 00..1d78bc14d5
--- /dev/null
+++ b/tests/uefi-test-tools/Makefile
@@ -0,0 +1,106 @@
+# Makefile for the test helper UEFI applications that run in guests.
+#
+# Copyright (C) 2019, Red Hat, Inc.
+#
+# This program and the accompanying materials are licensed and made available
+# under the terms and conditions of the BSD License that accompanies this
+# distribution. The full text of the license may be found at
+# .
+#
+# THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, WITHOUT
+# WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
+
+edk2_dir  := ../../roms/edk2
+images_dir:= ../data/uefi-boot-images
+emulation_targets := arm aarch64 i386 x86_64
+uefi_binaries := bios-tables-test
+intermediate_suffixes := .efi .fat .iso.raw
+
+images: $(foreach binary,$(uefi_binaries), \
+   $(foreach target,$(emulation_targets), \
+   $(images_dir)/$(binary).$(target).iso.qcow2))
+
+# Preserve all intermediate targets if the build succeeds.
+# - Intermediate targets help with development & debugging.
+# - Preserving intermediate targets also keeps spurious changes out of the
+#   final build products, in case the user re-runs "make" without any changes
+#   to the UEFI source code. Normally, the intermediate files would have been
+#   removed by the last "make" invocation, hence the re-run would rebuild them
+#   from the unchanged UEFI sources. Unfortunately, the "mkdosfs" and
+#   "genisoimage" utilities embed timestamp-based information in their outputs,
+#   which causes git to report differences for the tracked qcow2 ISO images.
+.SECONDARY: $(foreach binary,$(uefi_binaries), \
+   $(foreach target,$(emulation_targets), \
+   $(foreach suffix,$(intermediate_suffixes), \
+   Build/$(binary).$(target)$(suffix
+
+# In the pattern rules below, the stem (%, $*) stands for
+# "$(binary).$(target)".
+
+# Convert the raw ISO image to a qcow2 one, enabling compression, and using a
+# small cluster size. This allows for small binary files under git control,
+# hence for small binary patches.
+$(images_dir)/%.iso.qcow2: Build/%.iso.raw
+   mkdir -p -- $(images_dir)
+   $${QTEST_QEMU_IMG:-qemu-img} convert -f raw -O qcow2 -c \
+   -o cluster_size=512 -- $< $@
+
+# Embed the "UEFI system partition" into an

[Qemu-devel] [PULL 12/26] tests: introduce "uefi-test-tools" with the BiosTablesTest UEFI app

2019-02-21 Thread Michael S. Tsirkin

From: Laszlo Ersek 

The "bios-tables-test" program in QEMU's test suite locates the RSD PTR
ACPI table in guest RAM, and (chasing pointers to other ACPI tables)
performs various sanity checks on the QEMU-generated and
firmware-installed tables.

Currently this set of test cases doesn't work with UEFI guests. The ACPI
spec defines distinct methods for OSPM to locate the RSD PTR on
traditional BIOS vs. UEFI platforms, and the UEFI method is more difficult
to implement from the hypervisor side with just raw guest memory access.

Add a UEFI application (to be booted in the UEFI guest) that populates a
small, MB-aligned structure in guest RAM. The structure begins with a
signature GUID. The hypervisor should loop over all MB-aligned pages in
guest RAM until one matches the signature GUID at offset 0, at which point
the hypervisor can fetch the RSDP address field(s) from the structure.

QEMU's test logic currently spins on a pre-determined guest address, until
that address assumes a magic value. The method described in this patch is
conceptually the same ("busy loop until match is found"), except there is
no hard-coded address. This plays a lot more nicely with UEFI guest
firmware (we'll be able to use the normal page allocation UEFI service).
Given the size of EFI_GUID (16 bytes -- 128 bits), mismatches should be
astronomically unlikely. In addition, given the typical guest RAM size for
such tests (128 MB), there are 128 locations to check in one iteration of
the "outer" loop, which shouldn't introduce an intolerable delay after the
guest stores the RSDP address(es), and then the GUID.

The GUID that the hypervisor should search for is

  AB87A6B1-2034-BDA0-71BD-375007757785

Expressed as a byte array:

 {
   0xb1, 0xa6, 0x87, 0xab,
   0x34, 0x20,
   0xa0, 0xbd,
   0x71, 0xbd, 0x37, 0x50, 0x07, 0x75, 0x77, 0x85
 }

Note that in the patch, we define "gBiosTablesTestGuid" with all bits
inverted. This is a simple method to prevent the UEFI binary, which
incorporates "gBiosTablesTestGuid", from matching the actual GUID in guest
RAM.

The UEFI application is written against the edk2 framework, which was
introduced earlier as a git submodule. The next patch will provide build
scripts for maintainers.

The source code follows the edk2 coding style, and is licensed under the
2-clause BSDL (in case someone would like to include UefiTestToolsPkg
content in a different edk2 platform).

The "UefiTestToolsPkg.dsc" platform description file resolves the used
edk2 library classes to instances (= library implementations) such that
the UEFI binaries inherit no platform dependencies. They are expected to
run on any system that conforms to the UEFI-2.3.1 spec (which was released
in 2012). The arch-specific build options are carried over from edk2's
ArmVirtPkg and OvmfPkg platforms.

Cc: "Michael S. Tsirkin" 
Cc: Ard Biesheuvel 
Cc: Gerd Hoffmann 
Cc: Igor Mammedov 
Cc: Philippe Mathieu-Daudé 
Cc: Shannon Zhao 
Signed-off-by: Laszlo Ersek 
Message-Id: <20190204160325.4914-4-ler...@redhat.com>
Reviewed-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 .../Include/Guid/BiosTablesTest.h |  67 +
 .../BiosTablesTest/BiosTablesTest.c   | 130 ++
 tests/uefi-test-tools/LICENSE |  25 
 .../BiosTablesTest/BiosTablesTest.inf |  41 ++
 .../UefiTestToolsPkg/UefiTestToolsPkg.dec |  27 
 .../UefiTestToolsPkg/UefiTestToolsPkg.dsc |  69 ++
 6 files changed, 359 insertions(+)
 create mode 100644 
tests/uefi-test-tools/UefiTestToolsPkg/Include/Guid/BiosTablesTest.h
 create mode 100644 
tests/uefi-test-tools/UefiTestToolsPkg/BiosTablesTest/BiosTablesTest.c
 create mode 100644 tests/uefi-test-tools/LICENSE
 create mode 100644 
tests/uefi-test-tools/UefiTestToolsPkg/BiosTablesTest/BiosTablesTest.inf
 create mode 100644 tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dec
 create mode 100644 tests/uefi-test-tools/UefiTestToolsPkg/UefiTestToolsPkg.dsc

diff --git 
a/tests/uefi-test-tools/UefiTestToolsPkg/Include/Guid/BiosTablesTest.h 
b/tests/uefi-test-tools/UefiTestToolsPkg/Include/Guid/BiosTablesTest.h
new file mode 100644
index 00..0b72c61254
--- /dev/null
+++ b/tests/uefi-test-tools/UefiTestToolsPkg/Include/Guid/BiosTablesTest.h
@@ -0,0 +1,67 @@
+/** @file
+  Expose the address(es) of the ACPI RSD PTR table(s) in a MB-aligned structure
+  to the hypervisor.
+
+  The hypervisor locates the MB-aligned structure based on the signature GUID
+  that is at offset 0 in the structure. Once the RSD PTR address(es) are
+  retrieved, the hypervisor may perform various ACPI checks.
+
+  This feature is a development aid, for supporting ACPI table unit tests in
+  hypervisors. Do not enable in production builds.
+
+  Copyright (C) 2019, Red Hat, Inc.
+
+  This program and the accompanying materials are licensed and made available
+  under the terms and conditions of the BSD License that accompanies this
+

[Qemu-devel] [PATCH v3 2/3] target/arm: Rebuild hflags at el changes and MSR writes

2019-02-21 Thread Richard Henderson

Now setting, but not relying upon, env->hflags.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
v2: Fixed partial conversion to assignment to env->hflags.
---
 target/arm/internals.h |  1 +
 linux-user/syscall.c   |  1 +
 target/arm/cpu.c   |  1 +
 target/arm/helper-a64.c|  3 +++
 target/arm/helper.c|  2 ++
 target/arm/machine.c   |  1 +
 target/arm/op_helper.c |  1 +
 target/arm/translate-a64.c |  6 +-
 target/arm/translate.c | 14 --
 9 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/target/arm/internals.h b/target/arm/internals.h
index 8c1b813364..235f4fafec 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -970,5 +970,6 @@ ARMVAParameters aa64_va_parameters(CPUARMState *env, 
uint64_t va,
 
 uint32_t rebuild_hflags_a32(CPUARMState *env, int el);
 uint32_t rebuild_hflags_a64(CPUARMState *env, int el);
+void rebuild_hflags_any(CPUARMState *env);
 
 #endif
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 5bbb72f3d5..123f342bdc 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -9691,6 +9691,7 @@ static abi_long do_syscall1(void *cpu_env, int num, 
abi_long arg1,
 aarch64_sve_narrow_vq(env, vq);
 }
 env->vfp.zcr_el[1] = vq - 1;
+arm_rebuild_hflags(env);
 ret = vq * 16;
 }
 return ret;
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index edf6e0e1f1..e4da513eb3 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -390,6 +390,7 @@ static void arm_cpu_reset(CPUState *s)
 
 hw_breakpoint_update_all(cpu);
 hw_watchpoint_update_all(cpu);
+arm_rebuild_hflags(env);
 }
 
 bool arm_cpu_exec_interrupt(CPUState *cs, int interrupt_request)
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 70850e564d..17200f1288 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -995,6 +995,7 @@ void HELPER(exception_return)(CPUARMState *env, uint64_t 
new_pc)
 } else {
 env->regs[15] = new_pc & ~0x3;
 }
+env->hflags = rebuild_hflags_a32(env, new_el);
 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
   "AArch32 EL%d PC 0x%" PRIx32 "\n",
   cur_el, new_el, env->regs[15]);
@@ -1006,10 +1007,12 @@ void HELPER(exception_return)(CPUARMState *env, 
uint64_t new_pc)
 }
 aarch64_restore_sp(env, new_el);
 env->pc = new_pc;
+env->hflags = rebuild_hflags_a64(env, new_el);
 qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
   "AArch64 EL%d PC 0x%" PRIx64 "\n",
   cur_el, new_el, env->pc);
 }
+
 /*
  * Note that cur_el can never be 0.  If new_el is 0, then
  * el0_a64 is return_to_aa64, else el0_a64 is ignored.
diff --git a/target/arm/helper.c b/target/arm/helper.c
index 29486a09f6..1140739d6b 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -9201,6 +9201,7 @@ static void take_aarch32_exception(CPUARMState *env, int 
new_mode,
 env->regs[14] = env->regs[15] + offset;
 }
 env->regs[15] = newpc;
+env->hflags = rebuild_hflags_a32(env, arm_current_el(env));
 }
 
 static void arm_cpu_do_interrupt_aarch32_hyp(CPUState *cs)
@@ -9546,6 +9547,7 @@ static void arm_cpu_do_interrupt_aarch64(CPUState *cs)
 
 pstate_write(env, PSTATE_DAIF | new_mode);
 env->aarch64 = 1;
+env->hflags = rebuild_hflags_a64(env, new_el);
 aarch64_restore_sp(env, new_el);
 
 env->pc = addr;
diff --git a/target/arm/machine.c b/target/arm/machine.c
index 124192bfc2..e944d6b736 100644
--- a/target/arm/machine.c
+++ b/target/arm/machine.c
@@ -743,6 +743,7 @@ static int cpu_post_load(void *opaque, int version_id)
 if (!kvm_enabled()) {
 pmu_op_finish(>env);
 }
+arm_rebuild_hflags(>env);
 
 return 0;
 }
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index c998eadfaa..f82eeae7e4 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -571,6 +571,7 @@ void HELPER(cpsr_write_eret)(CPUARMState *env, uint32_t val)
  */
 env->regs[15] &= (env->thumb ? ~1 : ~3);
 
+env->hflags = rebuild_hflags_a32(env, arm_current_el(env));
 qemu_mutex_lock_iothread();
 arm_call_el_change_hook(arm_env_get_cpu(env));
 qemu_mutex_unlock_iothread();
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index af8e4fd4be..a786c7ef5f 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -1841,11 +1841,15 @@ static void handle_sys(DisasContext *s, uint32_t insn, 
bool isread,
 /* I/O operations must end the TB here (whether read or write) */
 gen_io_end();
 s->base.is_jmp = DISAS_UPDATE;
-} else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
+}
+if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
 /* We

[Qemu-devel] [PATCH v3 3/3] target/arm: Rely on hflags correct in cpu_get_tb_cpu_state

2019-02-21 Thread Richard Henderson

This is the payoff.

>From perf record -g data of ubuntu 18 boot and shutdown:

BEFORE:

-   23.02% 2.82%  qemu-system-aar  [.] helper_lookup_tb_ptr
   - 20.22% helper_lookup_tb_ptr
  + 10.05% tb_htable_lookup
  - 9.13% cpu_get_tb_cpu_state
   3.20% aa64_va_parameters_both
   0.55% fp_exception_el

-   11.66% 4.74%  qemu-system-aar  [.] cpu_get_tb_cpu_state
   - 6.96% cpu_get_tb_cpu_state
3.63% aa64_va_parameters_both
0.60% fp_exception_el
0.53% sve_exception_el

AFTER:

-   16.40% 3.40%  qemu-system-aar  [.] helper_lookup_tb_ptr
   - 13.03% helper_lookup_tb_ptr
  + 11.19% tb_htable_lookup
0.55% cpu_get_tb_cpu_state

 0.98% 0.71%  qemu-system-aar  [.] cpu_get_tb_cpu_state

 0.87% 0.24%  qemu-system-aar  [.] rebuild_hflags_a64

Before, helper_lookup_tb_ptr is the second hottest function in the
application, consuming almost a quarter of the runtime.  Within the
entire execution, cpu_get_tb_cpu_state consumes about 12%.

After, helper_lookup_tb_ptr has dropped to the fourth hottest function,
with consumption dropping to a sixth of the runtime.  Within the
entire execution, cpu_get_tb_cpu_state has dropped below 1%, and the
supporting function to rebuild hflags also consumes about 1%.

Assertions are retained for --enable-debug-tcg.

Tested-by: Alex Bennée 
Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
v2: Retain asserts for future debugging.
---
 target/arm/helper.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/target/arm/helper.c b/target/arm/helper.c
index 1140739d6b..0d19333be0 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -14027,19 +14027,29 @@ void HELPER(rebuild_hflags_a64)(CPUARMState *env, 
uint32_t el)
 void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
   target_ulong *cs_base, uint32_t *pflags)
 {
-int current_el = arm_current_el(env);
-uint32_t flags;
+uint32_t flags = env->hflags;
 uint32_t pstate_for_ss;
 
+#ifdef CONFIG_DEBUG_TCG
+{
+int el = arm_current_el(env);
+uint32_t check_flags;
+if (is_a64(env)) {
+check_flags = rebuild_hflags_a64(env, el);
+} else {
+check_flags = rebuild_hflags_a32(env, el);
+}
+g_assert_cmphex(flags, ==, check_flags);
+}
+#endif
+
 *cs_base = 0;
-if (is_a64(env)) {
+if (FIELD_EX32(flags, TBFLAG_ANY, AARCH64_STATE)) {
 *pc = env->pc;
-flags = rebuild_hflags_a64(env, current_el);
 flags = FIELD_DP32(flags, TBFLAG_A64, BTYPE, env->btype);
 pstate_for_ss = env->pstate;
 } else {
 *pc = env->regs[15];
-flags = rebuild_hflags_a32(env, current_el);
 flags = FIELD_DP32(flags, TBFLAG_A32, THUMB, env->thumb);
 flags = FIELD_DP32(flags, TBFLAG_A32, CONDEXEC, env->condexec_bits);
 flags = FIELD_DP32(flags, TBFLAG_A32, VECLEN, env->vfp.vec_len);
-- 
2.17.2

Re: [Qemu-devel] [PATCH v6 1/7] vhost-user: Support transferring inflight buffer between qemu and backend

2019-02-21 Thread Yongji Xie

On Fri, 22 Feb 2019 at 01:27, Michael S. Tsirkin  wrote:
>
> On Mon, Feb 18, 2019 at 06:27:42PM +0800, elohi...@gmail.com wrote:
> > From: Xie Yongji 
> >
> > This patch introduces two new messages VHOST_USER_GET_INFLIGHT_FD
> > and VHOST_USER_SET_INFLIGHT_FD to support transferring a shared
> > buffer between qemu and backend.
> >
> > Firstly, qemu uses VHOST_USER_GET_INFLIGHT_FD to get the
> > shared buffer from backend. Then qemu should send it back
> > through VHOST_USER_SET_INFLIGHT_FD each time we start vhost-user.
> >
> > This shared buffer is used to track inflight I/O by backend.
> > Qemu should retrieve a new one when vm reset.
> >
> > Signed-off-by: Xie Yongji 
> > Signed-off-by: Chai Wen 
> > Signed-off-by: Zhang Yu 
> > ---
> >  docs/interop/vhost-user.txt   | 264 ++
> >  hw/virtio/vhost-user.c| 107 
> >  hw/virtio/vhost.c |  96 +++
> >  include/hw/virtio/vhost-backend.h |  10 ++
> >  include/hw/virtio/vhost.h |  18 ++
> >  5 files changed, 495 insertions(+)
> >
> > diff --git a/docs/interop/vhost-user.txt b/docs/interop/vhost-user.txt
> > index c2194711d9..61c6d0e415 100644
> > --- a/docs/interop/vhost-user.txt
> > +++ b/docs/interop/vhost-user.txt
> > @@ -142,6 +142,17 @@ Depending on the request type, payload can be:
> > Offset: a 64-bit offset of this area from the start of the
> > supplied file descriptor
> >
> > + * Inflight description
> > +   -
> > +   | mmap size | mmap offset | num queues | queue size |
> > +   -
> > +
> > +   mmap size: a 64-bit size of area to track inflight I/O
> > +   mmap offset: a 64-bit offset of this area from the start
> > +of the supplied file descriptor
> > +   num queues: a 16-bit number of virtqueues
> > +   queue size: a 16-bit size of virtqueues
> > +
> >  In QEMU the vhost-user message is implemented with the following struct:
> >
> >  typedef struct VhostUserMsg {
> > @@ -157,6 +168,7 @@ typedef struct VhostUserMsg {
> >  struct vhost_iotlb_msg iotlb;
> >  VhostUserConfig config;
> >  VhostUserVringArea area;
> > +VhostUserInflight inflight;
> >  };
> >  } QEMU_PACKED VhostUserMsg;
> >
> > @@ -175,6 +187,7 @@ the ones that do:
> >   * VHOST_USER_GET_PROTOCOL_FEATURES
> >   * VHOST_USER_GET_VRING_BASE
> >   * VHOST_USER_SET_LOG_BASE (if VHOST_USER_PROTOCOL_F_LOG_SHMFD)
> > + * VHOST_USER_GET_INFLIGHT_FD (if VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)
> >
> >  [ Also see the section on REPLY_ACK protocol extension. ]
> >
> > @@ -188,6 +201,7 @@ in the ancillary data:
> >   * VHOST_USER_SET_VRING_CALL
> >   * VHOST_USER_SET_VRING_ERR
> >   * VHOST_USER_SET_SLAVE_REQ_FD
> > + * VHOST_USER_SET_INFLIGHT_FD (if VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD)
> >
> >  If Master is unable to send the full message or receives a wrong reply it 
> > will
> >  close the connection. An optional reconnection mechanism can be 
> > implemented.
> > @@ -382,6 +396,235 @@ If VHOST_USER_PROTOCOL_F_SLAVE_SEND_FD protocol 
> > feature is negotiated,
> >  slave can send file descriptors (at most 8 descriptors in each message)
> >  to master via ancillary data using this fd communication channel.
> >
> > +Inflight I/O tracking
> > +-
> > +
> > +To support reconnecting after restart or crash, slave may need to resubmit
> > +inflight I/Os. If virtqueue is processed in order, we can easily achieve
> > +that by getting the inflight descriptors from descriptor table (split 
> > virtqueue)
> > +or descriptor ring (packed virtqueue). However, it can't work when we 
> > process
> > +descriptors out-of-order because some entries which store the information 
> > of
> > +inflight descriptors in available ring (split virtqueue) or descriptor
> > +ring (packed virtqueue) might be overrided by new entries. To solve this
> > +problem, slave need to allocate an extra buffer to store this information 
> > of inflight
> > +descriptors and share it with master for persistent. 
> > VHOST_USER_GET_INFLIGHT_FD and
> > +VHOST_USER_SET_INFLIGHT_FD are used to transfer this buffer between master
> > +and slave. And the format of this buffer is described below:
> > +
> > +---
> > +| queue0 region | queue1 region | ... | queueN region |
> > +---
> > +
> > +N is the number of available virtqueues. Slave could get it from num queues
> > +field of VhostUserInflight.
> > +
> > +For split virtqueue, queue region can be implemented as:
> > +
> > +typedef struct DescStateSplit {
> > +/* Indicate whether this descriptor is inflight or not.
> > + * Only available for head-descriptor. */
> > +uint8_t inflight;
> > +
> > +/* Padding */
> > +uint8_t padding;
> > +
> > +/* Link to the last processed entry */
> > +uint16_t next;

[Qemu-devel] [PULL 14/26] tests/data: introduce "uefi-boot-images" with the "bios-tables-test" ISOs

2019-02-21 Thread Michael S. Tsirkin

From: Laszlo Ersek 

Add UEFI-bootable qcow2-compressed ISO images built from:

  tests/uefi-test-tools/UefiTestToolsPkg/BiosTablesTest

Cc: "Michael S. Tsirkin" 
Cc: Ard Biesheuvel 
Cc: Gerd Hoffmann 
Cc: Igor Mammedov 
Cc: Philippe Mathieu-Daudé 
Cc: Shannon Zhao 
Signed-off-by: Laszlo Ersek 
Message-Id: <20190204160325.4914-6-ler...@redhat.com>
Reviewed-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 .../bios-tables-test.aarch64.iso.qcow2  | Bin 0 -> 11776 bytes
 .../bios-tables-test.arm.iso.qcow2  | Bin 0 -> 11776 bytes
 .../bios-tables-test.i386.iso.qcow2 | Bin 0 -> 12800 bytes
 .../bios-tables-test.x86_64.iso.qcow2   | Bin 0 -> 13312 bytes
 4 files changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 
tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2
 create mode 100644 tests/data/uefi-boot-images/bios-tables-test.arm.iso.qcow2
 create mode 100644 tests/data/uefi-boot-images/bios-tables-test.i386.iso.qcow2
 create mode 100644 
tests/data/uefi-boot-images/bios-tables-test.x86_64.iso.qcow2

diff --git a/tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2 
b/tests/data/uefi-boot-images/bios-tables-test.aarch64.iso.qcow2
new file mode 100644
index 
..ac0b7b1b8f8977977afcb1d43f6870d863fd3777
GIT binary patch
literal 11776
zcmeHtWmsF?wr(J}yHngPxD_unxVr_n;#S1S&)H+Hm3O{FW->>THOEYrs*LOn!p8uyH{~`#1|InfT?jZc(us;#z
z5Bbjk?$N>88hEMwi17e`xs8jP7p19<+kY1K+YV#q=;Gq;c@vaeC;3nulU=JiuqUl-{pVe$^WkZU;X~X
zv;0;6cln=qfq&)y#~%By?2umu)WY9>Xj*^J^$$MQJO8!C%*}%n1Tr^2JUT^H{B;M$xL#~em
ztnskm0j;mAFuqXYN!$I1Xi-Jn3gP343H_|b3=b4IeF%7F_rnwyG4o|{rVrE!<{ZM*
zKsNt@t%Ex-5ogD_4QwNmWycwd1?SuNZBY?UJ5xg4$
zTF7H5Yv%N8l5S_JNJp5*Xh+*6izZYORYXh)|PW|RbAoN^aDG-d$c=3v(p0_p1m
zp~%ob#+k=M9>tspo(A_P1Hd99$b_qcs9^x=rtuH}5E%gjtGh#GxQjY3fA~vRdppvz
z7(14G;+kETI9Gct!+aCZhBeWl(-ty2N_UAj02^WhaF|jvyQ9Ez5(OzulO1dna|8
z)nWS_#vTS><_itP{UC>GI0<9P3j!}|@u7r+0019*ZYqA^avct~s?eaQu!Nwfp!Y$`
zh%_OvAwVFk^{}^w=1r}Xn!y7)DUOa2v|ZQNZH@mZHS}@NH=Gqf9X-porNFi
zbJwxiuUl1}Sn&{MZce}gP+8k}Qx^^Y@oERZ}XJl(TfM%L$egSF$yAE|s)2a<;{O
z<*TRUFCkGRX=;SRp)H|m>`#wHf@4ZT5g^C(nvaIspp!&%8xcbtXDG_{f;D{#R
z=7<7L2bXAylt_RON@{KXN?^
z89{?AZLhxvz{8dtE1VZHQ1cYTRLNP)yq@gYVthaZ=YR4Fw()~Tv)LI6y
zOve)9U%du@du6mA|MYtO+LHdGLk>#GF)JDx^5d}aK)^|?DJAK%!-NV?JhaC}&M``Gm=0uEN)l=^A|i9}
z=ySkkN}*PAAQ6HJVuv+t9^f?FNX}H28mKW06D0#Hz7Nw{Fp+g@Ug8VDi7g$D(z*D%
zyQ%-$my>_KI`LmUw1$5w{?hqlU-*mqpY*pK{r`Dg`4ji#H(~)GfS-b${2Orvej{GN
zZzPZs2mBNwR>&_Te)Ahi_Qij3GV|Y@{Ny)Mc~x}$>y`wpK?b8Ot8$p9^uUgSQ?-JGwQEg6af@wT?(B^j}-tKc8{Apx
zhJT1*55zS$UsbIQjltM5GTq*n*lC?={g8OU&+P%Vm7(yULFu2Jqlx`d999SJj}7$T
zd~s~ARN)*(uPr!hH$QGLov2g<^DZl1X^QRbSEj5^gfpcg+~)6-CdU%qc}SI`6yL5e
z%ld2=?I#Nso!;%%9M!t2)ln9tZ@fvA
zGTdiGEa08h)_8GPX5wTNj3E_!R3m;SaqH#*a5emxhoIIF!xcs)`Muv!`MU?<_p~Se~GMjCQ`tKFKpw>Vsv@v}Iz#x9YH`qw^!R^}{!b*6;Yr
zygjSeMzPt;y-2VP6EdzNA%d_Qr`)Le5%eR-mMr$AX48te=ZfIAq0?J4jEer|;8YRz
zifF-Uc4ctwDR@DPehr%Tt`;FzMEIsiimw)1(~N@;n(;6si*E64|0U%$)s%u6
z&;|wW$z(}=@93SQ_Dz1aC)OBLN|WtqikK*V~u)izA!0P5`-
zuh{B>xyVWHZ4K$r==?)d{e_!sQFie{tX$Vcv~rU!s!+o~a|4eFLtR!%gp~=o
zJF|`JV9R~DFStnPhNSf$uEIp}WB7Vn*qXST_2h*u9}=z?V^ui8lB#$j2X?vyc(im>
zPqVM_c<5*at)*?AyKBeYWv4O6oLZ%Wa1)#z0=4^Wb)^$kKwwt_oi+SJg
z7K;k6aoJrBU*KGuzT@D-*B_gkjnY^tTfIy!AT$XzS9vopH8)4
zWeG07MOJIpG^4;JcyI&2NR|4X(94Z-4J=Zu|kIS2y?)fUP^<*yG)JsA9>ZgU=
zNUOVsMT$DkZ@gS>G&!8lv5PYc(h72~Wzj_$X1%{b<`W1rSHcB$?-bbxss!1a16%}d
zcH+Yc3|?s~sZUFt_Id(bS-r8E>fsWCB7aIJl;>0!I*qa^9#4SOkHl9?cd
z0FS=BTUtwwnHrq+mC#2S~t@{=tn7$)8ni56O_1J%*fdLN0xbIvj5`BdjBgra{Fm
zpWUGMq5V79%|Kv5_Ja!U!fwwyfG8G2%tGV4`W2h*~lL-Mn
zc(424N%OYaLC`Kso~W1CF-OtljjpPHDey|Nqt;y|tWsOe(xKmOIW>+qbk}u^?_)v_
zY8gx3ewu6e%4Mj^I<6(Soq9;d=*N5B5%q_BJMnIQavn2vZ^u6T~OqU>V56B
zkdz)`Z$d68Uxe{%tbB;N$oeY9`l;Igf28Y|`zi>fYRtPwV3+K<#i<$-1l-J)C$gN$>#r`_}X3T=v8@uEn;BO!G$I
z95Yb5Gswrgn?iNx^T#ItCwkEcraq^Y?jD}fZ88me9&|fgTEafbU7wUhSI
zO|~Q=A<`V^J(H}52=l_%Ors3j2cJV8icvnKW@?lc(;Ce>-5*3AWHNoTQ|-SV1kOYV

[Qemu-devel] [PULL 00/26] pci, pc, virtio: fixes, cleanups, tests

2019-02-21 Thread Michael S. Tsirkin

The following changes since commit fc3dbb90f2eb069801bfb4cfe9cbc83cf9c5f4a9:

  Merge remote-tracking branch 'remotes/jnsnow/tags/bitmaps-pull-request' into 
staging (2019-02-21 13:09:33 +)

are available in the Git repository at:

  git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to 1f8c04f18d2ee2f6ec88217dfd547ab38d2be5c5:

  pci: Sanity test minimum downstream LNKSTA (2019-02-21 12:28:41 -0500)


pci, pc, virtio: fixes, cleanups, tests

Lots of work on tests: BiosTablesTest UEFI app,
vhost-user testing for non-Linux hosts.
Misc cleanups and fixes all over the place

Signed-off-by: Michael S. Tsirkin 


Alex Williamson (1):
  pci: Sanity test minimum downstream LNKSTA

Alexey Kardashevskiy (1):
  pci: Move NVIDIA vendor id to the rest of ids

Changpeng Liu (1):
  contrib/vhost-user-blk: fix the compilation issue

Daniel P. Berrangé (1):
  hw/smbios: fix offset of type 3 sku field

David Gibson (5):
  virtio-balloon: Remove unnecessary MADV_WILLNEED on deflate
  virtio-balloon: Corrections to address verification
  virtio-balloon: Rework ballon_page() interface
  virtio-balloon: Use ram_block_discard_range() instead of raw madvise()
  virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size

Laszlo Ersek (5):
  roms: add the edk2 project as a git submodule
  roms: build the EfiRom utility from the roms/edk2 submodule
  tests: introduce "uefi-test-tools" with the BiosTablesTest UEFI app
  tests/uefi-test-tools: add build scripts
  tests/data: introduce "uefi-boot-images" with the "bios-tables-test" ISOs

Paolo Bonzini (9):
  vhost-net: move stubs to a separate file
  vhost-net-user: add stubs for when no virtio-net device is present
  vhost: restrict Linux dependency to kernel vhost
  vhost-user: support cross-endian vnet headers
  vhost-net: compile it on all targets that have virtio-net.
  vhost-net: revamp configure logic
  vhost-user-test: create a main loop per TestServer
  vhost-user-test: small changes to init_hugepagefs
  vhost-user-test: create a temporary directory per TestServer

Peter Xu (1):
  i386/kvm: ignore masked irqs when update msi routes

Philippe Mathieu-Daudé (1):
  Revert "contrib/vhost-user-blk: fix the compilation issue"

Wei Yang (1):
  pc-dimm: use same mechanism for [get|set]_addr

 configure  | 102 -
 Makefile   |   6 +-
 default-configs/virtio.mak |   4 +-
 include/exec/poison.h  |   1 -
 include/hw/firmware/smbios.h   |   1 +
 include/hw/pci/pci_ids.h   |   2 +
 include/hw/virtio/virtio-balloon.h |   3 +
 .../UefiTestToolsPkg/Include/Guid/BiosTablesTest.h |  67 +
 hw/mem/pc-dimm.c   |   4 +-
 hw/net/vhost_net-stub.c|  92 
 hw/net/vhost_net.c |  85 +--
 hw/pci/pcie.c  |  13 +-
 hw/smbios/smbios.c |   1 +
 hw/vfio/pci-quirks.c   |   2 -
 hw/virtio/vhost-backend.c  |  12 +-
 hw/virtio/vhost-user.c |  13 +-
 hw/virtio/vhost.c  |   2 +-
 hw/virtio/virtio-balloon.c | 102 ++---
 net/net.c  |   2 +-
 net/vhost-user-stub.c  |  23 +++
 net/vhost-user.c   |  13 ++
 .../BiosTablesTest/BiosTablesTest.c| 130 +
 tests/vhost-user-test.c| 160 +++--
 .gitmodules|   3 +
 backends/Makefile.objs |   5 +-
 hw/net/Makefile.objs   |   4 +-
 hw/virtio/Makefile.objs|   8 +-
 net/Makefile.objs  |   4 +-
 roms/Makefile  |  13 +-
 roms/edk2  |   1 +
 tests/Makefile.include |   5 +-
 .../bios-tables-test.aarch64.iso.qcow2 | Bin 0 -> 11776 bytes
 .../bios-tables-test.arm.iso.qcow2 | Bin 0 -> 11776 bytes
 .../bios-tables-test.i386.iso.qcow2| Bin 0 -> 12800 bytes
 .../bios-tables-test.x86_64.iso.qcow2  | Bin 0 -> 13312 bytes
 tests/uefi-test-tools/.gitignore   |   3 +
 tests/uefi-test-tools/LICENSE  |  25 
 tests/uefi-test-tools/Makefile | 106

[Qemu-devel] [PATCH v3 1/3] target/arm: Split out recompute_hflags et al

2019-02-21 Thread Richard Henderson

We will use these to minimize the computation for every call to
cpu_get_tb_cpu_state.  For now, the env->hflags variable is not used.

Reviewed-by: Alex Bennée 
Signed-off-by: Richard Henderson 
---
v3: Do not cache VECLEN, VECSTRIDE, VFPEN.
Move HANDLER and STACKCHECK to rebuild_hflags_a32.
---
 target/arm/cpu.h   |  28 +++--
 target/arm/helper.h|   3 +
 target/arm/internals.h |   3 +
 target/arm/helper.c| 254 -
 4 files changed, 175 insertions(+), 113 deletions(-)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 84ae6849c2..30532bf53e 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -240,6 +240,9 @@ typedef struct CPUARMState {
 uint32_t pstate;
 uint32_t aarch64; /* 1 if CPU is in aarch64 state; inverse of PSTATE.nRW */
 
+/* Cached TBFLAGS state.  See below for which bits are included.  */
+uint32_t hflags;
+
 /* Frequently accessed CPSR bits are stored separately for efficiency.
This contains all the other bits.  Use cpsr_{read,write} to access
the whole CPSR.  */
@@ -3065,25 +3068,28 @@ static inline bool 
arm_cpu_data_is_big_endian(CPUARMState *env)
 
 #include "exec/cpu-all.h"
 
-/* Bit usage in the TB flags field: bit 31 indicates whether we are
+/*
+ * Bit usage in the TB flags field: bit 31 indicates whether we are
  * in 32 or 64 bit mode. The meaning of the other bits depends on that.
  * We put flags which are shared between 32 and 64 bit mode at the top
  * of the word, and flags which apply to only one mode at the bottom.
+ *
+ * Unless otherwise noted, these bits are cached in env->hflags.
  */
 FIELD(TBFLAG_ANY, AARCH64_STATE, 31, 1)
 FIELD(TBFLAG_ANY, MMUIDX, 28, 3)
 FIELD(TBFLAG_ANY, SS_ACTIVE, 27, 1)
-FIELD(TBFLAG_ANY, PSTATE_SS, 26, 1)
+FIELD(TBFLAG_ANY, PSTATE_SS, 26, 1) /* Not cached. */
 /* Target EL if we take a floating-point-disabled exception */
 FIELD(TBFLAG_ANY, FPEXC_EL, 24, 2)
 FIELD(TBFLAG_ANY, BE_DATA, 23, 1)
 
 /* Bit usage when in AArch32 state: */
-FIELD(TBFLAG_A32, THUMB, 0, 1)
-FIELD(TBFLAG_A32, VECLEN, 1, 3)
-FIELD(TBFLAG_A32, VECSTRIDE, 4, 2)
-FIELD(TBFLAG_A32, VFPEN, 7, 1)
-FIELD(TBFLAG_A32, CONDEXEC, 8, 8)
+FIELD(TBFLAG_A32, THUMB, 0, 1)  /* Not cached. */
+FIELD(TBFLAG_A32, VECLEN, 1, 3) /* Not cached. */
+FIELD(TBFLAG_A32, VECSTRIDE, 4, 2)  /* Not cached. */
+FIELD(TBFLAG_A32, VFPEN, 7, 1)  /* Not cached. */
+FIELD(TBFLAG_A32, CONDEXEC, 8, 8)   /* Not cached. */
 FIELD(TBFLAG_A32, SCTLR_B, 16, 1)
 /* We store the bottom two bits of the CPAR as TB flags and handle
  * checks on the other bits at runtime
@@ -3105,7 +3111,7 @@ FIELD(TBFLAG_A64, SVEEXC_EL, 2, 2)
 FIELD(TBFLAG_A64, ZCR_LEN, 4, 4)
 FIELD(TBFLAG_A64, PAUTH_ACTIVE, 8, 1)
 FIELD(TBFLAG_A64, BT, 9, 1)
-FIELD(TBFLAG_A64, BTYPE, 10, 2)
+FIELD(TBFLAG_A64, BTYPE, 10, 2) /* Not cached. */
 FIELD(TBFLAG_A64, TBID, 12, 2)
 
 static inline bool bswap_code(bool sctlr_b)
@@ -3190,6 +3196,12 @@ void arm_register_pre_el_change_hook(ARMCPU *cpu, 
ARMELChangeHookFn *hook,
 void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook, void
 *opaque);
 
+/**
+ * arm_rebuild_hflags:
+ * Rebuild the cached TBFLAGS for arbitrary changed processor state.
+ */
+void arm_rebuild_hflags(CPUARMState *env);
+
 /**
  * aa32_vfp_dreg:
  * Return a pointer to the Dn register within env in 32-bit mode.
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 923e8e1525..bbc1a48089 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -89,6 +89,9 @@ DEF_HELPER_4(msr_banked, void, env, i32, i32, i32)
 DEF_HELPER_2(get_user_reg, i32, env, i32)
 DEF_HELPER_3(set_user_reg, void, env, i32, i32)
 
+DEF_HELPER_FLAGS_2(rebuild_hflags_a32, TCG_CALL_NO_RWG, void, env, i32)
+DEF_HELPER_FLAGS_2(rebuild_hflags_a64, TCG_CALL_NO_RWG, void, env, i32)
+
 DEF_HELPER_1(vfp_get_fpscr, i32, env)
 DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
 
diff --git a/target/arm/internals.h b/target/arm/internals.h
index a4bd1becb7..8c1b813364 100644
--- a/target/arm/internals.h
+++ b/target/arm/internals.h
@@ -968,4 +968,7 @@ ARMVAParameters aa64_va_parameters_both(CPUARMState *env, 
uint64_t va,
 ARMVAParameters aa64_va_parameters(CPUARMState *env, uint64_t va,
ARMMMUIdx mmu_idx, bool data);
 
+uint32_t rebuild_hflags_a32(CPUARMState *env, int el);
+uint32_t rebuild_hflags_a64(CPUARMState *env, int el);
+
 #endif
diff --git a/target/arm/helper.c b/target/arm/helper.c
index a018eb23fe..29486a09f6 100644
--- a/target/arm/helper.c
+++ b/target/arm/helper.c
@@ -13886,139 +13886,183 @@ ARMMMUIdx arm_stage1_mmu_idx(CPUARMState *env)
 }
 #endif
 
+static uint32_t common_hflags(CPUARMState *env, int el, ARMMMUIdx mmu_idx,
+  int fp_el, uint32_t flags)
+{
+flags = FIELD_DP32(flags, TBFLAG_ANY, FPEXC_EL, fp_el);
+flags = FIELD_DP32(flags, TBFLAG_ANY, MMUIDX,
+   arm_to_core_mmu_idx(mmu_idx));
+if

[Qemu-devel] [PATCH v3 0/3] target/arm: Reduce overhead of cpu_get_tb_cpu_state

2019-02-21 Thread Richard Henderson

Changes since v2:
  * Do not cache VECLEN, VECSTRIDE, VFPEN.
These variables come from VFP_FPSCR and VFP_FPEXC, not from
system control registers.
  * Move HANDLER and STACKCHECK to rebuild_hflags_a32,
instead of building them in rebuild_hflags_common.

Changes since v1:
  * Apparently I had started a last-minute API change, and failed to
covert all of the users, and also failed to re-test afterward.
  * Retain assertions for --enable-debug-tcg.


r~



Richard Henderson (3):
  target/arm: Split out recompute_hflags et al
  target/arm: Rebuild hflags at el changes and MSR writes
  target/arm: Rely on hflags correct in cpu_get_tb_cpu_state

 target/arm/cpu.h   |  28 ++--
 target/arm/helper.h|   3 +
 target/arm/internals.h |   4 +
 linux-user/syscall.c   |   1 +
 target/arm/cpu.c   |   1 +
 target/arm/helper-a64.c|   3 +
 target/arm/helper.c| 266 ++---
 target/arm/machine.c   |   1 +
 target/arm/op_helper.c |   1 +
 target/arm/translate-a64.c |   6 +-
 target/arm/translate.c |  14 +-
 11 files changed, 212 insertions(+), 116 deletions(-)

-- 
2.17.2

[Qemu-devel] [PULL 16/26] Revert "contrib/vhost-user-blk: fix the compilation issue"

2019-02-21 Thread Michael S. Tsirkin

From: Philippe Mathieu-Daudé 

Commit a56de056c91f8 squashed the following two unrelated commits
at once:

- "contrib/vhost-user-blk: fix the compilation issue"
  (Message-Id: 1547615970-23545-2-git-send-email-changpeng@intel.com)
- "i386/kvm: ignore masked irqs when update msi routes"
  (Message-Id: 20190116030815.27273-5-pet...@redhat.com)

While the git history remains bisectable, having a commit that changes
MSI/MSIX code but describes it as "fix vhost-user-blk compilation" is
rather confusing.
Revert the offending commit to properly apply both patches separately.

Reported-by: Peter Xu 
Fixes: a56de056c91f8
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20190212140621.17009-2-phi...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Peter Xu 
---
 contrib/vhost-user-blk/vhost-user-blk.c |  6 +-
 target/i386/kvm.c   | 14 +++---
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/contrib/vhost-user-blk/vhost-user-blk.c 
b/contrib/vhost-user-blk/vhost-user-blk.c
index 43583f2659..5c2092e13a 100644
--- a/contrib/vhost-user-blk/vhost-user-blk.c
+++ b/contrib/vhost-user-blk/vhost-user-blk.c
@@ -20,10 +20,6 @@
 #include "contrib/libvhost-user/libvhost-user-glib.h"
 #include "contrib/libvhost-user/libvhost-user.h"
 
-#if defined(__linux__)
-#include 
-#include 
-#endif
 
 struct virtio_blk_inhdr {
 unsigned char status;
@@ -525,7 +521,7 @@ vub_get_blocksize(int fd)
 
 #if defined(__linux__) && defined(BLKSSZGET)
 if (ioctl(fd, BLKSSZGET, ) == 0) {
-return blocksize;
+return blocklen;
 }
 #endif
 
diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index beae1b99da..9af4542fb8 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -3894,7 +3894,7 @@ static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \
 static void kvm_update_msi_routes_all(void *private, bool global,
   uint32_t index, uint32_t mask)
 {
-int cnt = 0, vector;
+int cnt = 0;
 MSIRouteEntry *entry;
 MSIMessage msg;
 PCIDevice *dev;
@@ -3902,19 +3902,11 @@ static void kvm_update_msi_routes_all(void *private, 
bool global,
 /* TODO: explicit route update */
 QLIST_FOREACH(entry, _route_list, list) {
 cnt++;
-vector = entry->vector;
 dev = entry->dev;
-if (msix_enabled(dev) && !msix_is_masked(dev, vector)) {
-msg = msix_get_message(dev, vector);
-} else if (msi_enabled(dev) && !msi_is_masked(dev, vector)) {
-msg = msi_get_message(dev, vector);
-} else {
-/*
- * Either MSI/MSIX is disabled for the device, or the
- * specific message was masked out.  Skip this one.
- */
+if (!msix_enabled(dev) && !msi_enabled(dev)) {
 continue;
 }
+msg = pci_get_msi_message(dev, entry->vector);
 kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev);
 }
 kvm_irqchip_commit_routes(kvm_state);
-- 
MST

[Qemu-devel] [PULL 15/26] pc-dimm: use same mechanism for [get|set]_addr

2019-02-21 Thread Michael S. Tsirkin

From: Wei Yang 

[get|set]_addr are two counterpart to access PCDIMMDevice.addr.

Since we have already set up a property PC_DIMM_ADDR_PROP for this
field and use this mechanism in set_addr, it would be more proper to use
the same mechanism in get_addr.

This patch uses object_property_get_uint() to replace the direct memory
access to make [get|set]_addr with the same mechanism.

Signed-off-by: Wei Yang 

Message-Id: <20190211064629.20186-1-richardw.y...@linux.intel.com>
Reviewed-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 hw/mem/pc-dimm.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 0c9b9e8292..152400b1fc 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -204,9 +204,7 @@ static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice 
*dimm, Error **errp)
 
 static uint64_t pc_dimm_md_get_addr(const MemoryDeviceState *md)
 {
-const PCDIMMDevice *dimm = PC_DIMM(md);
-
-return dimm->addr;
+return object_property_get_uint(OBJECT(md), PC_DIMM_ADDR_PROP, 
_abort);
 }
 
 static void pc_dimm_md_set_addr(MemoryDeviceState *md, uint64_t addr,
-- 
MST

[Qemu-devel] [PULL 11/26] roms: build the EfiRom utility from the roms/edk2 submodule

2019-02-21 Thread Michael S. Tsirkin

From: Laszlo Ersek 

Building the EfiRom utility from "roms/edk2/BaseTools" should make
"roms/Makefile" more self-contained. Otherwise, we'd call the system-wide
EfiRom for building the combined iPXE option ROMs, but call the sibling
utilities from "roms/edk2/BaseTools" for building "roms/edk2" content.

Cc: "Michael S. Tsirkin" 
Cc: Ard Biesheuvel 
Cc: Gerd Hoffmann 
Cc: Igor Mammedov 
Cc: Philippe Mathieu-Daudé 
Cc: Shannon Zhao 
Signed-off-by: Laszlo Ersek 
Reviewed-by: Gerd Hoffmann 
Reviewed-by: Philippe Mathieu-Daudé 
Tested-by: Philippe Mathieu-Daudé 
Message-Id: <20190204160325.4914-3-ler...@redhat.com>
Reviewed-by: Igor Mammedov 
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
---
 roms/Makefile | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/roms/Makefile b/roms/Makefile
index a6043eff37..78d5dd18c3 100644
--- a/roms/Makefile
+++ b/roms/Makefile
@@ -47,10 +47,7 @@ SEABIOS_EXTRAVERSION="-prebuilt.qemu.org"
 # We need that to combine multiple images (legacy bios,
 # efi ia32, efi x64) into a single rom binary.
 #
-# We try to find it in the path.  You can also pass the location on
-# the command line, i.e. "make EFIROM=/path/to/EfiRom efirom"
-#
-EFIROM ?= $(shell which EfiRom 2>/dev/null)
+EFIROM = edk2/BaseTools/Source/C/bin/EfiRom
 
 default:
@echo "nothing is build by default"
@@ -59,8 +56,7 @@ default:
@echo "  vgabios-- update vgabios binaries (seabios)"
@echo "  sgabios-- update sgabios binaries"
@echo "  pxerom -- update nic roms (bios only)"
-   @echo "  efirom -- update nic roms (bios+efi, this needs"
-   @echo "the EfiRom utility from edk2 / tianocore)"
+   @echo "  efirom -- update nic roms (bios+efi)"
@echo "  slof   -- update slof.bin"
@echo "  skiboot-- update skiboot.lid"
@echo "  u-boot.e500-- update u-boot.e500"
@@ -106,7 +102,7 @@ pxe-rom-%: build-pxe-roms
 
 efirom: $(patsubst %,efi-rom-%,$(pxerom_variants))
 
-efi-rom-%: build-pxe-roms build-efi-roms
+efi-rom-%: build-pxe-roms build-efi-roms $(EFIROM)
$(EFIROM) -f "0x$(VID)" -i "0x$(DID)" -l 0x02 \
-b ipxe/src/bin/$(VID)$(DID).rom \
-ec ipxe/src/bin-i386-efi/$(VID)$(DID).efidrv \
@@ -124,6 +120,8 @@ build-efi-roms: build-pxe-roms
$(patsubst %,bin-i386-efi/%.efidrv,$(pxerom_targets)) \
$(patsubst %,bin-x86_64-efi/%.efidrv,$(pxerom_targets))
 
+$(EFIROM):
+   $(MAKE) -C edk2/BaseTools
 
 slof:
$(MAKE) -C SLOF CROSS=$(powerpc64_cross_prefix) qemu
@@ -150,6 +148,7 @@ clean:
$(MAKE) -C sgabios clean
rm -f sgabios/.depend
$(MAKE) -C ipxe/src veryclean
+   $(MAKE) -C edk2/BaseTools clean
$(MAKE) -C SLOF clean
rm -rf u-boot/build.e500
$(MAKE) -C u-boot-sam460ex distclean
-- 
MST

[Qemu-devel] [PULL 18/26] i386/kvm: ignore masked irqs when update msi routes

2019-02-21 Thread Michael S. Tsirkin

From: Peter Xu 

When we are with intel-iommu device and with IR on, KVM will register
an IEC notifier to detect interrupt updates from the guest and we'll
kick off kvm_update_msi_routes_all() when it happens to make sure
kernel IRQ cache is matching the latest.

Though, kvm_update_msi_routes_all() is buggy in that it ignored the
mask bit of either MSI/MSIX messages and it tries to translate the
message even if the corresponding message was already masked by the
guest driver (hence the MSI/MSIX message will be invalid).

Without this patch, we can receive an error message when we reboot a
guest with both an assigned vfio-pci device and intel-iommu enabled:

  qemu-system-x86_64: vtd_interrupt_remap_msi: MSI address low 32 bit invalid: 
0x0

The error does not affect functionality of the guest since when we
failed to translate we'll just silently continue (which makes sense
since crashing the VM for this seems even worse), but still it's
better to fix it up.

Signed-off-by: Peter Xu 
Reviewed-by: Michael S. Tsirkin 
Message-Id: <20190116030815.27273-5-pet...@redhat.com>
[PMD: this patch was first (incorrectly) introduced as a56de056c91f8]
Signed-off-by: Philippe Mathieu-Daudé 
Message-Id: <20190212140621.17009-4-phi...@redhat.com>
Reviewed-by: Michael S. Tsirkin 
Signed-off-by: Michael S. Tsirkin 
Reviewed-by: Peter Xu 
---
 target/i386/kvm.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/target/i386/kvm.c b/target/i386/kvm.c
index 9af4542fb8..beae1b99da 100644
--- a/target/i386/kvm.c
+++ b/target/i386/kvm.c
@@ -3894,7 +3894,7 @@ static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \
 static void kvm_update_msi_routes_all(void *private, bool global,
   uint32_t index, uint32_t mask)
 {
-int cnt = 0;
+int cnt = 0, vector;
 MSIRouteEntry *entry;
 MSIMessage msg;
 PCIDevice *dev;
@@ -3902,11 +3902,19 @@ static void kvm_update_msi_routes_all(void *private, 
bool global,
 /* TODO: explicit route update */
 QLIST_FOREACH(entry, _route_list, list) {
 cnt++;
+vector = entry->vector;
 dev = entry->dev;
-if (!msix_enabled(dev) && !msi_enabled(dev)) {
+if (msix_enabled(dev) && !msix_is_masked(dev, vector)) {
+msg = msix_get_message(dev, vector);
+} else if (msi_enabled(dev) && !msi_is_masked(dev, vector)) {
+msg = msi_get_message(dev, vector);
+} else {
+/*
+ * Either MSI/MSIX is disabled for the device, or the
+ * specific message was masked out.  Skip this one.
+ */
 continue;
 }
-msg = pci_get_msi_message(dev, entry->vector);
 kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev);
 }
 kvm_irqchip_commit_routes(kvm_state);
-- 
MST

Re: [Qemu-devel] [PATCH] virtio-net: do not start queues that are not enabled by the guest

2019-02-21 Thread Michael S. Tsirkin

On Thu, Feb 21, 2019 at 05:40:22PM +0800, Jason Wang wrote:
> 
> On 2019/2/21 下午4:18, Yuri Benditovich wrote:
> 
> For 1.0 device, we can fix the queue_enable, but for 0.9x device how 
> do
> you enable one specific queue in this case? (setting status?)
> 
> 
> Do I understand correctly that for 0.9 device in some cases the device 
> will
> receive feature _MQ set, but will not receive 
> VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET?
> Or the problem is different?
> 
> 
> Let me clarify, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET is used to control the the
> number of queue pairs used by device for doing transmission and reception. It
> was not used to enable or disable a virtqueue.
> 
> For 1.0 device, we should use queue_enable in pci cfg to enable and disable
> queue:
> 
> 
> We could do:
> 
> 1) allocate memory and set queue_enable for vq0
> 
> 2) allocate memory and set queue_enable for vq1
> 
> 3) Set vq paris to 1
> 
> 4) allocate memory and set queue_enable for vq2
> 
> 5) allocate memory and set queue_enable for vq3
> 
> 6) set vq pairs to 2

I do not think spec allows this.

The driver MUST follow this sequence to initialize a device:
1. Reset the device.
2. Set the ACKNOWLEDGE status bit: the guest OS has noticed the device.
3. Set the DRIVER status bit: the guest OS knows how to drive the device.
4. Read device feature bits, and write the subset of feature bits understood by 
the OS and driver to the
device. During this step the driver MAY read (but MUST NOT write) the 
device-specific configuration
fields to check that it can support the device before accepting it.
5. Set the FEATURES_OK status bit. The driver MUST NOT accept new feature bits 
after this step.
6. Re-read device status to ensure the FEATURES_OK bit is still set: otherwise, 
the device does not
support our subset of features and the device is unusable.
7. Perform device-specific setup, including discovery of virtqueues for the 
device, optional per-bus setup,
reading and possibly writing the device’s virtio configuration space, and 
population of virtqueues.
8. Set the DRIVER_OK status bit. At this point the device is “live”.

Thus vqs are setup at step 7.

# of vq pairs are set up through a command which is a special
buffer, and spec says:

The driver MUST NOT send any buffer available notifications to the device 
before setting DRIVER_OK.

> 
> But this requires a proper implementation for queue_enable for vhost which is
> missed in qemu and probably what you really want to do.
> 
> but for 0.9x device, there's no such way to do this. That's the issue.

0.9x there's no queue enable, assumption is PA!=0 means VQ has
been enabled.

> So
> driver must allocate all queBes before starting the device, otherwise there's
> no way to enable it afterwards.

As per spec queues must be allocated before DRIVER_OK.

That is universal.

> There're tricks to make it work like what is
> done in your patch, but it depends on a specific implementation like qemu 
> which
> is sub-optimal.
> 
> 
> 
> 
> A fundamental question is what prevents you from just initialization 
> all
> queues during driver start? It looks to me this save lots of efforts
> than allocating queue dynamically.
> 
> 
> This is not so trivial in Windows driver, as it does not have objects for 
> queues
> that it does not use. Linux driver first of all allocates all the
> queues and then
> adds Rx/Tx to those it will use. Windows driver first decides how many 
> queues
> it will use then allocates objects for them and initializes them from 
> zero to
> fully functional state.
> 
> 
> Well, you just need to allocate some memory for the virtqueue, there's no need
> to make it visible to the rest until it was enabled.
> 
> Thanks
> 
> 
> 
>

Re: [Qemu-devel] [PATCH v11 7/7] virtio-balloon: VIRTIO_BALLOON_F_FREE_PAGE_HINT

2019-02-21 Thread Wei Wang


On 02/21/2019 06:18 PM, Dr. David Alan Gilbert wrote:

* Wei Wang (wei.w.w...@intel.com) wrote:

On 02/20/2019 09:12 PM, Dr. David Alan Gilbert wrote:

* Wang, Wei W (wei.w.w...@intel.com) wrote:

On Friday, December 14, 2018 7:17 PM, Dr. David Alan Gilbert wrote:

On 12/14/2018 05:56 PM, Dr. David Alan Gilbert wrote:

* Wei Wang (wei.w.w...@intel.com) wrote:

On 12/13/2018 11:45 PM, Dr. David Alan Gilbert wrote:

* Wei Wang (wei.w.w...@intel.com) wrote:

The new feature enables the virtio-balloon device to receive
hints of guest free pages from the free page vq.

A notifier is registered to the migration precopy notifier
chain. The notifier calls free_page_start after the migration
thread syncs the dirty bitmap, so that the free page
optimization starts to clear bits of free pages from the
bitmap. It calls the free_page_stop before the migration
thread syncs the bitmap, which is the end of the current round
of ram save. The free_page_stop is also called to stop the

optimization in the case when there is an error occurred in the process of
ram saving.

Note: balloon will report pages which were free at the time of this

call.

As the reporting happens asynchronously, dirty bit logging
must be enabled before this free_page_start call is made.
Guest reporting must be disabled before the migration dirty bitmap

is synchronized.

Signed-off-by: Wei Wang 
CC: Michael S. Tsirkin 
CC: Dr. David Alan Gilbert 
CC: Juan Quintela 
CC: Peter Xu 

I think I'm OK for this from the migration side, I'd appreciate
someone checking the virtio and aio bits.

I'm not too sure how it gets switched on and off - i.e. if we
get a nice new qemu on a new kernel, what happens when I try and
migrate to the same qemu on an older kernel without these hints?


This feature doesn't rely on the host kernel. Those hints are
reported from the guest kernel.
So migration across different hosts wouldn't affect the use of this

feature.

Please correct me if I didn't get your point.

Ah OK, yes;  now what about migrating from new->old qemu with a new
guest but old machine type?


I think normally, the source QEMU and destination QEMU should have the
same QEMU booting parameter. If the destination QEMU doesn't support
"--device virtio-balloon,free-page-hint=true", which the source QEMU
has, the destination side QEMU will fail to boot, and migration will
not happen then.

Ah that's OK; as long as free-page-hint is false by default that will work fine.

Dave


Hi Dave,

Could we have this feature in QEMU 4.0 (freeze on Mar 12)?

I think so; can you remind me where we're up to:
a) It looks like you've already got the kernel changes merged -
correct?

Yes, they were already merged half year ago.


b) What about the virtio spec changes - where are they upto?

The spec changes are in progress. v1 were posted out, a v2 is in
preparation.


c) Where are the other reviews upto - I think most are reviewed - is
it just 7/7 that is missing the review-by?

7/7 is about the virtio changes, and Michael has given the reviewed-by:
http://lists.nongnu.org/archive/html/qemu-devel/2018-12/msg03732.html

OK, I was going to check with mst for (b) because I prefer it after the
spec changes have been merged, but since mst is OK with it, then we can
merge especially with (a) already merged.


OK, thanks!

Best,
Wei

Re: [Qemu-devel] [PATCH v3 02/25] chardev: Assert IOCanReadHandler can not be negative

2019-02-21 Thread Philippe Mathieu-Daudé

On 2/20/19 12:13 PM, Philippe Mathieu-Daudé wrote:
> On 2/20/19 11:03 AM, Marc-André Lureau wrote:
>> Hi
>>
>> On Wed, Feb 20, 2019 at 2:03 AM Philippe Mathieu-Daudé
>>  wrote:
>>>
>>> The backend should not return a negative length to read.
>>> We will later change the prototype of IOCanReadHandler to return an
>>> unsigned length. Meanwhile make sure the return length is positive.
>>>
>>> Suggested-by: Paolo Bonzini 
>>> Signed-off-by: Philippe Mathieu-Daudé 
>>
>> In such patch, you should do extensive review of existing callbacks,
>> or find a convincing argument that this can't break.
> 
> Argh I missed that.
> 
>> The problem is there are a lot of can_read callbacks, and it's not
>> trivial. The *first* of git-grep is rng_egd_chr_can_read()
>>
>>  57 QSIMPLEQ_FOREACH(req, >parent.requests, next) {
>>  58 size += req->size - req->offset;
>>  59 }
>>  60
>>  61 return size;
>>
>> Clearly not obvious if it returns >= 0.
>>
>> Another approach is to look at the caller and the return value
>> handling. If none handle negative values (or would have wrong
>> behaviour with negative values), the assert() is perhaps justified, as
>> it could prevent from doing more harm.
> 
> I'll go and audit all of them.

Actually I already did the work, but it is in the part #2 after this
series, as suggested by Paolo:

https://lists.gnu.org/archive/html/qemu-devel/2018-10/msg02294.html

I'll simply cherry-pick the commit from series #2 before this patch.

Thanks,

Phil.

>>> ---
>>>  chardev/char.c | 5 -
>>>  1 file changed, 4 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/chardev/char.c b/chardev/char.c
>>> index f6d61fa5f8..71ecd32b25 100644
>>> --- a/chardev/char.c
>>> +++ b/chardev/char.c
>>> @@ -159,12 +159,15 @@ int qemu_chr_write(Chardev *s, const uint8_t *buf, 
>>> int len, bool write_all)
>>>  int qemu_chr_be_can_write(Chardev *s)
>>>  {
>>>  CharBackend *be = s->be;
>>> +int receivable_bytes;
>>>
>>>  if (!be || !be->chr_can_read) {
>>>  return 0;
>>>  }
>>>
>>> -return be->chr_can_read(be->opaque);
>>> +receivable_bytes = be->chr_can_read(be->opaque);
>>> +assert(receivable_bytes >= 0);
>>> +return receivable_bytes;
>>>  }
>>>
>>>  void qemu_chr_be_write_impl(Chardev *s, uint8_t *buf, int len)
>>> --
>>> 2.20.1
>>>

Re: [Qemu-devel] [PATCH v8 10/11] authz: add QAuthZPAM object type for authorizing using PAM

2019-02-21 Thread Philippe Mathieu-Daudé

Hi Daniel,

On 2/15/19 4:57 PM, Daniel P. Berrangé wrote:
> From: "Daniel P. Berrange" 
> 
> Add an authorization backend that talks to PAM to check whether the user
> identity is allowed. This only uses the PAM account validation facility,
> which is essentially just a check to see if the provided username is permitted
> access. It doesn't use the authentication or session parts of PAM, since
> that's dealt with by the relevant part of QEMU (eg VNC server).
> 
> Consider starting QEMU with a VNC server and telling it to use TLS with
> x509 client certificates and configuring it to use an PAM to validate
> the x509 distinguished name. In this example we're telling it to use PAM
> for the QAuthZ impl with a service name of "qemu-vnc"
> 
>  $ qemu-system-x86_64 \
>  -object tls-creds-x509,id=tls0,dir=/home/berrange/security/qemutls,\
>  endpoint=server,verify-peer=yes \
>  -object authz-pam,id=authz0,service=qemu-vnc \
>  -vnc :1,tls-creds=tls0,tls-authz=authz0
> 
> This requires an /etc/pam/qemu-vnc file to be created with the auth
> rules. A very simple file based whitelist can be setup using
> 
>   $ cat > /etc/pam/qemu-vnc <   account requisite   pam_listfile.so item=user sense=allow 
> file=/etc/qemu/vnc.allow
>   EOF
> 
> The /etc/qemu/vnc.allow file simply contains one username per line. Any
> username not in the file is denied. The usernames in this example are
> the x509 distinguished name from the client's x509 cert.
> 
>   $ cat > /etc/qemu/vnc.allow <   CN=laptop.berrange.com,O=Berrange Home,L=London,ST=London,C=GB
>   EOF
> 
> More interesting would be to configure PAM to use an LDAP backend, so
> that the QEMU authorization check data can be centralized instead of
> requiring each compute host to have file maintained.
> 
> The main limitation with this PAM module is that the rules apply to all
> QEMU instances on the host. Setting up different rules per VM, would
> require creating a separate PAM service name & config file for every
> guest. An alternative approach for the future might be to not pass in
> the plain username to PAM, but instead combine the VM name or UUID with
> the username. This requires further consideration though.
> 
> Signed-off-by: Daniel P. Berrange 
> ---
>  authz/Makefile.objs |   3 +
>  authz/pamacct.c | 149 
>  authz/trace-events  |   3 +
>  configure   |  37 ++
>  include/authz/pamacct.h | 100 +++
>  qemu-options.hx |  35 ++
>  tests/Makefile.include  |   2 +
>  tests/test-authz-pam.c  | 124 +
>  8 files changed, 453 insertions(+)
>  create mode 100644 authz/pamacct.c
>  create mode 100644 include/authz/pamacct.h
>  create mode 100644 tests/test-authz-pam.c
> 
> diff --git a/authz/Makefile.objs b/authz/Makefile.objs
> index 8351bf181d..ed7b273596 100644
> --- a/authz/Makefile.objs
> +++ b/authz/Makefile.objs
> @@ -2,3 +2,6 @@ authz-obj-y += base.o
>  authz-obj-y += simple.o
>  authz-obj-y += list.o
>  authz-obj-y += listfile.o
> +authz-obj-$(CONFIG_AUTH_PAM) += pamacct.o
> +
> +pamacct.o-libs = -lpam
> diff --git a/authz/pamacct.c b/authz/pamacct.c
> new file mode 100644
> index 00..8fe4c8ee11
> --- /dev/null
> +++ b/authz/pamacct.c
> @@ -0,0 +1,149 @@
> +/*
> + * QEMU PAM authorization driver
> + *
> + * Copyright (c) 2018 Red Hat, Inc.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see 
> .
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "authz/pamacct.h"
> +#include "authz/trace.h"
> +#include "qom/object_interfaces.h"
> +
> +#include 
> +
> +
> +static bool qauthz_pam_is_allowed(QAuthZ *authz,
> +  const char *identity,
> +  Error **errp)
> +{
> +QAuthZPAM *pauthz = QAUTHZ_PAM(authz);
> +const struct pam_conv pam_conversation = { 0 };
> +pam_handle_t *pamh = NULL;
> +int ret;
> +
> +trace_qauthz_pam_check(authz, identity, pauthz->service);
> +ret = pam_start(pauthz->service,
> +identity,
> +_conversation,
> +);
> +if (ret != PAM_SUCCESS) {
> +error_setg(errp, "Unable to start PAM transaction: %s",
> +   pam_strerror(NULL, ret));
> +return

Re: [Qemu-devel] [PULL 8/9] usb: add device checks before redirector calls to usb_ep_get()

2019-02-21 Thread Liam Merwick


On 20/02/2019 11:24, Yuval Shaia wrote:

On Wed, Feb 20, 2019 at 12:13:45PM +0100, Gerd Hoffmann wrote:

From: Liam Merwick 

Add an assert and an explicit check before the two callers to
usb_ep_get() in the USB redirector code to ensure the device
passed in is not NULL.

Signed-off-by: Liam Merwick 
Message-id: 1549460216-25808-9-git-send-email-liam.merw...@oracle.com
Signed-off-by: Gerd Hoffmann 
---
  hw/usb/redirect.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 18a42d1938..7cb6b120d4 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1728,6 +1728,7 @@ static void usbredir_ep_info(void *priv,
  USBRedirDevice *dev = priv;
  int i;
  
+assert(dev != NULL);


Suggesting:
 assert(dev)


  for (i = 0; i < MAX_ENDPOINTS; i++) {
  dev->endpoint[i].type = ep_info->type[i];
  dev->endpoint[i].interval = ep_info->interval[i];
@@ -2125,7 +2126,7 @@ static int usbredir_post_load(void *priv, int version_id)
  {
  USBRedirDevice *dev = priv;
  
-if (dev->parser == NULL) {

+if (dev == NULL || dev->parser == NULL) {


Suggesting
 if (!dev || !dev->parser)



The rest of the file tests for '== NULL' so I used that for consistency 
(but in any case, my personal preferences is to just use that boolean 
style check with boolean variables).  As it happens, the commits have 
already been pulled.


Regards,
Liam

Re: [Qemu-devel] [PATCH] iotests: handle TypeError for Python3 in test 242

2019-02-21 Thread Cleber Rosa




On 2/18/19 4:25 PM, Philippe Mathieu-Daudé wrote:
> On 2/18/19 9:05 PM, Eric Blake wrote:
>> [adding Eduardo for some python 2-vs-3 advice]
> 
> And Cleber.
> 
>>
>> On 2/18/19 1:59 PM, Andrey Shinkevich wrote:
>>> To write one byte to disk, Python2 may use 'chr' type.
>>> In Python3, conversion to 'byte' type is required.
>>>
>>> Signed-off-by: Andrey Shinkevich 
>>> ---
>>>  tests/qemu-iotests/242 | 9 +++--
>>>  1 file changed, 7 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/tests/qemu-iotests/242 b/tests/qemu-iotests/242
>>> index 16c65ed..6b1f7b8 100755
>>> --- a/tests/qemu-iotests/242
>>> +++ b/tests/qemu-iotests/242
>>> @@ -65,9 +65,14 @@ def toggle_flag(offset):
>>>  with open(disk, "r+b") as f:
>>>  f.seek(offset, 0)
>>>  c = f.read(1)
>>> -toggled = chr(ord(c) ^ bitmap_flag_unknown)
>>> +toggled = ord(c) ^ bitmap_flag_unknown
>>>  f.seek(-1, 1)
>>> -f.write(toggled)
>>> +try:
>>> +# python2
>>> +f.write(chr(toggled))
>>> +except TypeError:
>>> +# python3
>>> +f.write(bytes([toggled]))
>>
>> Looks like it works, but I'm not enough of a python expert to know if
>> there is a more Pythonic elegant approach.
>>

Well, there's no way around the fact that bytes in Python 3 are very
different from bytes in Python 2 (just another name for a string).

What I'd recommend here is to not base the type on the exception, but
choose it depending on the Python version.  Something like:

if sys.version_info.major == 2:
   f.write(chr(toggled))
else:
   f.write(bytes([toggled])]

This is cheaper than raising/catching exceptions, it's self documenting,
and follows the pattern on other tests.

Regards,
- Cleber.

>> If someone else picks it up before my next NBD pull request,
>> Acked-by: Eric Blake 
>>

Re: [Qemu-devel] [PATCH v2 0/3] target/arm: Reduce overhead of cpu_get_tb_cpu_state

2019-02-21 Thread Richard Henderson

On 2/21/19 2:59 PM, Emilio G. Cota wrote:
> Should have done so the first time. Here it is:
> 
> (gdb) r  
> Starting program: /data/src/qemu/build/arm-softmmu/qemu-system-arm -machine 
> type=virt -nographic -m 4096 -netdev user,id=unet,hostfwd=tcp::-:22 -d
> evice virtio-net-device,netdev=unet -drive 
> file=../img/arm/jessie-arm32-die-on-boot.qcow2,id=myblock,index=0,if=none 
> -device virtio-blk-device,drive=m
> yblock -kernel ../img/arm/aarch32-current-linux-kernel-only.img -append 
> console=ttyAMA0\ root=/dev/vda1 -name arm,debug-threads=on -smp 1
> [...]
> VFS: Mounted root (ext4 filesystem) readonly on device 254:1.
> devtmpfs: mounted
> Freeing unused kernel memory: 300K (80669000 - 806b4000)
> **
> ERROR:/data/src/qemu/target/arm/helper.c:14049:cpu_get_tb_cpu_state: 
> assertion failed (flags == check_flags): (0x1008 == 0x10080080)

Thanks, I've now reproduced this.


r~

Re: [Qemu-devel] [PATCH v2 0/3] target/arm: Reduce overhead of cpu_get_tb_cpu_state

2019-02-21 Thread Emilio G. Cota

On Thu, Feb 21, 2019 at 22:36:25 +, Alex Bennée wrote:
> Emilio G. Cota  writes:
> > This brings my arm-softmmu bootup+shutdown test to an early death:
> 
> Can you retry with --enable-tcg-debug?

Should have done so the first time. Here it is:

(gdb) r  
Starting program: /data/src/qemu/build/arm-softmmu/qemu-system-arm -machine 
type=virt -nographic -m 4096 -netdev user,id=unet,hostfwd=tcp::-:22 -d
evice virtio-net-device,netdev=unet -drive 
file=../img/arm/jessie-arm32-die-on-boot.qcow2,id=myblock,index=0,if=none 
-device virtio-blk-device,drive=m
yblock -kernel ../img/arm/aarch32-current-linux-kernel-only.img -append 
console=ttyAMA0\ root=/dev/vda1 -name arm,debug-threads=on -smp 1
[...]
VFS: Mounted root (ext4 filesystem) readonly on device 254:1.
devtmpfs: mounted
Freeing unused kernel memory: 300K (80669000 - 806b4000)
**
ERROR:/data/src/qemu/target/arm/helper.c:14049:cpu_get_tb_cpu_state: assertion 
failed (flags == check_flags): (0x1008 == 0x10080080)

Thread 4 "CPU 0/TCG" received signal SIGABRT, Aborted.
[Switching to Thread 0x7fffee2aa700 (LWP 14033)]
__GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
51  ../sysdeps/unix/sysv/linux/raise.c: No such file or directory.
(gdb) bt
#0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
#1  0x74585801 in __GI_abort () at abort.c:79
#2  0x7570f2a5 in g_assertion_message () from 
/usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#3  0x7570f652 in g_assertion_message_cmpnum () from 
/usr/lib/x86_64-linux-gnu/libglib-2.0.so.0
#4  0x559a339b in cpu_get_tb_cpu_state (env=0x565af060, 
pc=pc@entry=0x7fffee2a98fc, cs_base=cs_base@entry=0x7fffee2a98f8,
pflags=pflags@entry=0x7fffee2a9900) at 
/data/src/qemu/target/arm/helper.c:14049
#5  0x5588fbdb in tb_lookup__cpu_state (cf_mask=524288, 
flags=0x7fffee2a9900, cs_base=0x7fffee2a98f8, pc=0x7fffee2a98fc, cpu=0x0)
at /data/src/qemu/include/exec/tb-lookup.h:28
#6  tb_find (cf_mask=524288, tb_exit=0, last_tb=0x0, cpu=0x0) at 
/data/src/qemu/accel/tcg/cpu-exec.c:404
#7  cpu_exec (cpu=cpu@entry=0x565a6db0) at 
/data/src/qemu/accel/tcg/cpu-exec.c:728
#8  0x5584e49f in tcg_cpu_exec (cpu=0x565a6db0) at 
/data/src/qemu/cpus.c:1429
#9  0x55850623 in qemu_tcg_cpu_thread_fn (arg=arg@entry=0x565a6db0) 
at /data/src/qemu/cpus.c:1733
#10 0x55c83416 in qemu_thread_start (args=) at 
/data/src/qemu/util/qemu-thread-posix.c:502
#11 0x7493d6db in start_thread (arg=0x7fffee2aa700) at 
pthread_create.c:463
#12 0x7466688f in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb)

Thanks,
Emilio

Re: [Qemu-devel] [PATCH] hw/ppc: Use object_initialize_child for correct reference counting

2019-02-21 Thread David Gibson

On Thu, Feb 21, 2019 at 12:24:48PM +0100, Thomas Huth wrote:
> Both functions, object_initialize() and object_property_add_child() increase
> the reference counter of the new object, so one of the references has to be
> dropped afterwards to get the reference counting right. Otherwise the child
> object will not be properly cleaned up when the parent gets destroyed.
> Thus let's use now object_initialize_child() instead to get the reference
> counting here right.
> 
> Suggested-by: Eduardo Habkost 
> Signed-off-by: Thomas Huth 

Nice cleanup for a common pattern, even if it weren't fixing a bug.  Applied.

> ---
>  hw/ppc/pnv.c | 12 ++--
>  hw/ppc/pnv_psi.c |  4 ++--
>  hw/ppc/spapr.c   |  6 +++---
>  3 files changed, 11 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/ppc/pnv.c b/hw/ppc/pnv.c
> index da54086..9e03e9c 100644
> --- a/hw/ppc/pnv.c
> +++ b/hw/ppc/pnv.c
> @@ -736,18 +736,18 @@ static void pnv_chip_power8_instance_init(Object *obj)
>  {
>  Pnv8Chip *chip8 = PNV8_CHIP(obj);
>  
> -object_initialize(>psi, sizeof(chip8->psi), TYPE_PNV_PSI);
> -object_property_add_child(obj, "psi", OBJECT(>psi), NULL);
> +object_initialize_child(obj, "psi",  >psi, sizeof(chip8->psi),
> +TYPE_PNV_PSI, _abort, NULL);
>  object_property_add_const_link(OBJECT(>psi), "xics",
> OBJECT(qdev_get_machine()), _abort);
>  
> -object_initialize(>lpc, sizeof(chip8->lpc), TYPE_PNV_LPC);
> -object_property_add_child(obj, "lpc", OBJECT(>lpc), NULL);
> +object_initialize_child(obj, "lpc",  >lpc, sizeof(chip8->lpc),
> +TYPE_PNV_LPC, _abort, NULL);
>  object_property_add_const_link(OBJECT(>lpc), "psi",
> OBJECT(>psi), _abort);
>  
> -object_initialize(>occ, sizeof(chip8->occ), TYPE_PNV_OCC);
> -object_property_add_child(obj, "occ", OBJECT(>occ), NULL);
> +object_initialize_child(obj, "occ",  >occ, sizeof(chip8->occ),
> +TYPE_PNV_OCC, _abort, NULL);
>  object_property_add_const_link(OBJECT(>occ), "psi",
> OBJECT(>psi), _abort);
>  }
> diff --git a/hw/ppc/pnv_psi.c b/hw/ppc/pnv_psi.c
> index 8ced095..44bc0cb 100644
> --- a/hw/ppc/pnv_psi.c
> +++ b/hw/ppc/pnv_psi.c
> @@ -444,8 +444,8 @@ static void pnv_psi_init(Object *obj)
>  {
>  PnvPsi *psi = PNV_PSI(obj);
>  
> -object_initialize(>ics, sizeof(psi->ics), TYPE_ICS_SIMPLE);
> -object_property_add_child(obj, "ics-psi", OBJECT(>ics), NULL);
> +object_initialize_child(obj, "ics-psi",  >ics, sizeof(psi->ics),
> +TYPE_ICS_SIMPLE, _abort, NULL);
>  }
>  
>  static const uint8_t irq_to_xivr[] = {
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index abf9ebc..6c58dca 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1696,9 +1696,9 @@ static void spapr_create_nvram(sPAPRMachineState *spapr)
>  
>  static void spapr_rtc_create(sPAPRMachineState *spapr)
>  {
> -object_initialize(>rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC);
> -object_property_add_child(OBJECT(spapr), "rtc", OBJECT(>rtc),
> -  _fatal);
> +object_initialize_child(OBJECT(spapr), "rtc",
> +>rtc, sizeof(spapr->rtc), TYPE_SPAPR_RTC,
> +_fatal, NULL);
>  object_property_set_bool(OBJECT(>rtc), true, "realized",
>_fatal);
>  object_property_add_alias(OBJECT(spapr), "rtc-time", OBJECT(>rtc),

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v4] ppc: add host-serial and host-model machine attributes

2019-02-21 Thread David Gibson

On Thu, Feb 21, 2019 at 09:25:46AM +, Daniel P. Berrangé wrote:
> On Tue, Feb 19, 2019 at 01:55:01PM +1100, David Gibson wrote:
> > On Mon, Feb 18, 2019 at 11:43:49PM +0530, P J P wrote:
> > > From: Prasad J Pandit 
> > > 
> > > On ppc hosts, hypervisor shares following system attributes
> > > 
> > >   - /proc/device-tree/system-id
> > >   - /proc/device-tree/model
> > > 
> > > with a guest. This could lead to information leakage and misuse.[*]
> > > Add machine attributes to control such system information exposure
> > > to a guest.
> > > 
> > > [*] https://wiki.openstack.org/wiki/OSSN/OSSN-0028
> > > 
> > > Reported-by: Daniel P. Berrangé 
> > > Fix-suggested-by: Daniel P. Berrangé 
> > > Signed-off-by: Prasad J Pandit 
> > 
> > Applied to ppc-for-4.0, thanks.
> 
> Could you add the word  "CVE-2019-8934" to the commit message for this
> patch before sending a pulll request - either end of subject line, or
> just before the Reported-by line.

Done, thanks.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v3 3/5] Add migration functions for VFIO devices

2019-02-21 Thread Alex Williamson

On Wed, 20 Feb 2019 02:53:18 +0530
Kirti Wankhede  wrote:

> - Migration function are implemented for VFIO_DEVICE_TYPE_PCI device.
> - Added SaveVMHandlers and implemented all basic functions required for live
>   migration.
> - Added VM state change handler to know running or stopped state of VM.
> - Added migration state change notifier to get notification on migration state
>   change. This state is translated to VFIO device state and conveyed to vendor
>   driver.
> - VFIO device supports migration or not is decided based of migration region
>   query. If migration region query is successful then migration is supported
>   else migration is blocked.
> - Structure vfio_device_migration_info is mapped at 0th offset of migration
>   region and should always trapped by VFIO device's driver. Added both type of
>   access support, trapped or mmapped, for data section of the region.
> - To save device state, read pending_bytes and data_offset using structure
>   vfio_device_migration_info, accordingly copy data from the region.
> - To restore device state, write data_offset and data_size in the structure
>   and write data in the region.
> - To get dirty page bitmap, write start address and pfn count then read count 
> of
>   pfns copied and accordingly read those from the rest of the region or mmaped
>   part of the region. This copy is iterated till page bitmap for all requested
>   pfns are copied.
> 
> Signed-off-by: Kirti Wankhede 
> Reviewed-by: Neo Jia 
> ---
>  hw/vfio/Makefile.objs |   2 +-
>  hw/vfio/migration.c   | 714 
> ++
>  include/hw/vfio/vfio-common.h |  20 ++
>  3 files changed, 735 insertions(+), 1 deletion(-)
>  create mode 100644 hw/vfio/migration.c
> 
> diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
> index abad8b818c9b..36033d1437c5 100644
> --- a/hw/vfio/Makefile.objs
> +++ b/hw/vfio/Makefile.objs
> @@ -1,4 +1,4 @@
> -obj-y += common.o spapr.o
> +obj-y += common.o spapr.o migration.o
>  obj-$(CONFIG_VFIO_PCI) += pci.o pci-quirks.o display.o
>  obj-$(CONFIG_VFIO_CCW) += ccw.o
>  obj-$(CONFIG_VFIO_PLATFORM) += platform.o
> diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
> new file mode 100644
> index ..d7b6d972c043
> --- /dev/null
> +++ b/hw/vfio/migration.c
> @@ -0,0 +1,714 @@
> +/*
> + * Migration support for VFIO devices
> + *
> + * Copyright NVIDIA, Inc. 2018
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
> + */
> +
> +#include "qemu/osdep.h"
> +#include 
> +
> +#include "hw/vfio/vfio-common.h"
> +#include "cpu.h"
> +#include "migration/migration.h"
> +#include "migration/qemu-file.h"
> +#include "migration/register.h"
> +#include "migration/blocker.h"
> +#include "migration/misc.h"
> +#include "qapi/error.h"
> +#include "exec/ramlist.h"
> +#include "exec/ram_addr.h"
> +#include "pci.h"
> +
> +/*
> + * Flags used as delimiter:
> + * 0x => MSB 32-bit all 1s
> + * 0xef10 => emulated (virtual) function IO

:^)

> + * 0x => 16-bits reserved for flags
> + */
> +#define VFIO_MIG_FLAG_END_OF_STATE  (0xef11ULL)
> +#define VFIO_MIG_FLAG_DEV_CONFIG_STATE  (0xef12ULL)
> +#define VFIO_MIG_FLAG_DEV_SETUP_STATE   (0xef13ULL)
> +#define VFIO_MIG_FLAG_DEV_DATA_STATE(0xef14ULL)
> +
> +static void vfio_migration_region_exit(VFIODevice *vbasedev)
> +{
> +VFIOMigration *migration = vbasedev->migration;
> +
> +if (!migration) {
> +return;
> +}
> +
> +if (migration->region.buffer.size) {
> +vfio_region_exit(>region.buffer);
> +vfio_region_finalize(>region.buffer);
> +}
> +}
> +
> +static int vfio_migration_region_init(VFIODevice *vbasedev)
> +{
> +VFIOMigration *migration = vbasedev->migration;
> +Object *obj = NULL;
> +int ret = -EINVAL;
> +
> +if (!migration) {
> +return ret;
> +}
> +
> +/* Migration support added for PCI device only */
> +if (vbasedev->type == VFIO_DEVICE_TYPE_PCI) {
> +obj = vfio_pci_get_object(vbasedev);
> +}
> +
> +if (!obj) {
> +return ret;
> +}
> +
> +ret = vfio_region_setup(obj, vbasedev, >region.buffer,
> +migration->region.index, "migration");
> +if (ret) {
> +error_report("Failed to setup VFIO migration region %d: %s",
> +  migration->region.index, strerror(-ret));
> +goto err;
> +}
> +
> +if (!migration->region.buffer.size) {
> +ret = -EINVAL;
> +error_report("Invalid region size of VFIO migration region %d: %s",
> + migration->region.index, strerror(-ret));
> +goto err;
> +}
> +
> +if (migration->region.buffer.mmaps) {
> +ret = vfio_region_mmap(>region.buffer);
> +if (ret) {
> +error_report("Failed to mmap VFIO migration region %d: %s",
> +

Re: [Qemu-devel] [PATCH v2 0/3] target/arm: Reduce overhead of cpu_get_tb_cpu_state

2019-02-21 Thread Alex Bennée



Emilio G. Cota  writes:

> On Tue, Feb 19, 2019 at 15:34:18 -0800, Richard Henderson wrote:
>> Changes since v1:
>>   * Apparently I had started a last-minute API change, and failed to
>> covert all of the users, and also failed to re-test afterward.
>>   * Retain assertions for --enable-debug-tcg.
>
> This brings my arm-softmmu bootup+shutdown test to an early death:

Can you retry with --enable-tcg-debug?

>
> [...]
> VFS: Mounted root (ext4 filesystem) readonly on device 254:1.
> devtmpfs: mounted
> Freeing unused kernel memory: 300K (80669000 - 806b4000)
> BUG: unsupported FP instruction in kernel mode
> Internal error: Oops - undefined instruction: 0 [#1] SMP ARM
> Modules linked in:
> CPU: 0 PID: 1 Comm: init Not tainted 4.5.0-ajb #10
> Hardware name: Generic DT based system
> task: eec58000 ti: eec52000 task.ti: eec52000
> PC is at vfp_reload_hw+0xc/0x44
> LR is at __und_usr_fault_32+0x0/0x8
> pc : [<8000ab94>]lr : [<800136c0>]psr: 000c0013
> sp : eec53fb0  ip : 7eb88918  fp : 
> r10: eec520f8  r9 : 8001371c  r8 : 0b00
> r7 : 0001  r6 : eec5204c  r5 : 4000  r4 : 
> r3 : 806e1058  r2 : 76fc1362  r1 : 4000  r0 : ecac8b10
> Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
> Control: 30c5387d  Table: ae4048c0  DAC: fffd
> Process init (pid: 1, stack limit = 0xeec52210)
> Stack: (0xeec53fb0 to 0xeec54000)
> 3fa0: 7eb888f0  003fb0d6 fd90
> 3fc0: 7eb888e0 76fd8050  7eb88ab0  0001223c 76fd8958 7eb88a90
> 3fe0: 7eb88918 7eb888c0 76fbb187 76fc1362 600c0030   
> [<8000ab94>] (vfp_reload_hw) from [<800136c0>] (__und_usr_fault_32+0x0/0x8)
> Code: 0a10 e58ab110 eee85a10 e783a10b (ecba0b20)
> ---[ end trace 26acd422f5b3785f ]---
> Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b
> ---[ end Kernel panic - not syncing: Attempted to kill init! 
> exitcode=0x000b
>
> Thanks,
>
>   Emilio


--
Alex Bennée

Re: [Qemu-devel] [PATCH 05/10] ppc405_boards: Don't size flash memory to match backing image

2019-02-21 Thread David Gibson

On Thu, Feb 21, 2019 at 05:31:30PM +0100, Markus Armbruster wrote:
> Alex Bennée  writes:
> 
> > Markus Armbruster  writes:
> >
> >> Machine "ref405ep" maps its flash memory at address 2^32 - image size.
> >> Image size is rounded up to the next multiple of 64KiB.  Useless,
> >> because pflash_cfi02_realize() fails with "failed to read the initial
> >> flash content" unless the rounding is a no-op.
> >>
> >> If the image size exceeds 0x8 Bytes, we overlap first SRAM, then
> >> other stuff.  No idea how that would play out, but a useful outcomes
> >> seem unlikely.
> >>
> >> Map the flash memory at fixed address 0xFFF8 with size 512KiB,
> >> regardless of image size, to match the physical hardware.
> >>
> >> Machine "taihu" maps its boot flash memory similarly.  The code even
> >> has a comment /* XXX: should check that size is 2MB */, followed by
> >> disabled code to adjust the size to 2MiB regardless of image size.
> >>
> >> Its code to map its application flash memory looks the same, except
> >> there the XXX comment asks for 32MiB, and the code to adjust the size
> >> isn't disabled.  Note that pflash_cfi02_realize() fails with "failed
> >> to read the initial flash content" for images smaller than 32MiB.
> >>
> >> Map the boot flash memory at fixed address 0xFFE0 with size 2MiB,
> >> to match the physical hardware.  Delete dead code from application
> >> flash mapping, and simplify some.
> >>
> >> Cc: David Gibson 
> >> Signed-off-by: Markus Armbruster 
> >> ---
> >>  hw/ppc/ppc405_boards.c | 53 +-
> >>  1 file changed, 16 insertions(+), 37 deletions(-)
> >>
> >> diff --git a/hw/ppc/ppc405_boards.c b/hw/ppc/ppc405_boards.c
> >> index f47b15f10e..728154aebb 100644
> >> --- a/hw/ppc/ppc405_boards.c
> >> +++ b/hw/ppc/ppc405_boards.c
> >> @@ -158,7 +158,7 @@ static void ref405ep_init(MachineState *machine)
> >>  target_ulong kernel_base, initrd_base;
> >>  long kernel_size, initrd_size;
> >>  int linux_boot;
> >> -int fl_idx, fl_sectors, len;
> >> +int len;
> >>  DriveInfo *dinfo;
> >>  MemoryRegion *sysmem = get_system_memory();
> >>
> >> @@ -185,26 +185,19 @@ static void ref405ep_init(MachineState *machine)
> >>  #ifdef DEBUG_BOARD_INIT
> >>  printf("%s: register BIOS\n", __func__);
> >>  #endif
> >> -fl_idx = 0;
> >>  #ifdef USE_FLASH_BIOS
> >> -dinfo = drive_get(IF_PFLASH, 0, fl_idx);
> >> +dinfo = drive_get(IF_PFLASH, 0, 0);
> >>  if (dinfo) {
> >> -BlockBackend *blk = blk_by_legacy_dinfo(dinfo);
> >> -
> >> -bios_size = blk_getlength(blk);
> >> -fl_sectors = (bios_size + 65535) >> 16;
> >>  #ifdef DEBUG_BOARD_INIT
> >> -printf("Register parallel flash %d size %lx"
> >> -   " at addr %lx '%s' %d\n",
> >> -   fl_idx, bios_size, -bios_size,
> >> -   blk_name(blk), fl_sectors);
> >> +printf("Register parallel flash\n");
> >>  #endif
> >> -pflash_cfi02_register((uint32_t)(-bios_size),
> >> +bios_size = 0x8;
> >
> >  bios_size = 8 * MiB?
> 
> The next line has base address 0xFFF8.  I picked 0x8 to make
> 0xFFF8 + 0x8 == 0 mod 2^32 more obvious.
> 
> If I change 0x8 to 8 * MiB, the size is more obvious, but "at end of
> 32 bit address space" less so.
> 
> If I additionally change the base address back to ((uint32_t)-bios_size,
> "at end of 32 bit address space" is obvious again, but the actual base
> address less so.

I have a weak preference for ((uint32_t)-bios_size), with bios_size =
8 * MiB.

> 
> I don't really care myself.  David, you're the maintainer, do you have a
> preference?
> 
> >> +pflash_cfi02_register(0xFFF8,
> >>NULL, "ef405ep.bios", bios_size,
> >> -  blk, 65536, fl_sectors, 1,
> >> +  dinfo ? blk_by_legacy_dinfo(dinfo) : NULL,
> >> +  65536, bios_size / 65536, 1,
> >
> > 64 * KiB?
> 
> David, same question (two additional instances below).

Here I think 64 * KiB would be nice in each of those places.  Again,
only a weak preference.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v3 1/5] VFIO KABI for migration interface

2019-02-21 Thread Alex Williamson

On Wed, 20 Feb 2019 02:53:16 +0530
Kirti Wankhede  wrote:

> - Defined MIGRATION region type and sub-type.
> - Used 2 bits to define VFIO device states.
> Bit 0 => 0/1 => _STOPPED/_RUNNING
> Bit 1 => 0/1 => _RESUMING/_SAVING
> Combination of these bits defines VFIO device's state during migration
> _RUNNING => Normal VFIO device running state.
> _STOPPED => VFIO device stopped.
> _SAVING | _RUNNING => vCPUs are running, VFIO device is running but start
>   saving state of device i.e. pre-copy state
> _SAVING | _STOPPED => vCPUs are stoppped, VFIO device should be stopped, 
> and
>   save device state,i.e. stop-n-copy state
> _RESUMING => VFIO device resuming state.

Shouldn't we have a non-_RESUMING/_SAVING run state?  If these are
indicating directly flow, maybe we need two bits:

  00b - None, normal runtime
  01b - Saving
  10b - Resuming
  11b - Invalid/reserved (maybe a Failed state indicator)

> - Defined vfio_device_migration_info structure which will be placed at 0th
>   offset of migration region to get/set VFIO device related information.
>   Defined members of structure and usage on read/write access:
> * device_state: (write only)
> To convey VFIO device state to be transitioned to.

Seems trivial and potentially useful to support read here, we have 30
(or maybe 29) bits yet to define.

> * pending bytes: (read only)
> To get pending bytes yet to be migrated for VFIO device
> * data_offset: (read/write)
> To get or set data offset in migration from where data exist
> during _SAVING and _RESUMING state

What's the use case for writing this?

> * data_size: (write only)
> To convey size of data copied in migration region during _RESUMING
> state

How to know how much is available for read?

> * start_pfn, page_size, total_pfns: (write only)
> To get bitmap of dirty pages from vendor driver from given
> start address for total_pfns.

What would happen if a user wrote in 1MB for page size?  Is the vendor
driver expected to support arbitrary page sizes?  Are we only trying to
convey the page size and would that page size ever be other than
getpagesize()?

> * copied_pfns: (read only)
> To get number of pfns bitmap copied in migration region.
> Vendor driver should copy the bitmap with bits set only for
> pages to be marked dirty in migration region. Vendor driver
> should return 0 if there are 0 pages dirty in requested
> range.

This is useful, but I wonder if it's really a required feature for the
vendor driver.  For instance, with mdev IOMMU support we could wrap an
arbitrary PCI device as mdev, but we don't necessarily have dirty page
tracking.  Would a device need to report -1 here if it wanted to
indicate any page could be dirty if we only know how to collect the
state of the device itself for migration (ie. force the device to be
stopped first).
 
> Migration region looks like:
>  --
> |vfio_device_migration_info|data section  |
> |  | ///  |
>  --
 ^ what's this?

>  ^  ^  ^
>  offset 0-trapped partdata.offset data.size

Isn't data.size above really (data.offset + data.size)?  '.' vs '_'
inconsistency vs above.
 
> Data section is always followed by vfio_device_migration_info
> structure in the region, so data.offset will always be none-0.

This seems exactly backwards from the diagram, data section follows
vfio_device_migration_info.  Also, "non-zero".

> Offset from where data is copied is decided by kernel driver, data

But data_offset is listed as read-write.

> section can be trapped or mapped depending on how kernel driver
> defines data section. If mmapped, then data.offset should be page
> aligned, where as initial section which contain
> vfio_device_migration_info structure might not end at offset which
> is page aligned.
> 
> Signed-off-by: Kirti Wankhede 
> Reviewed-by: Neo Jia 
> ---
>  linux-headers/linux/vfio.h | 65 
> ++
>  1 file changed, 65 insertions(+)
> 
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 12a7b1dc53c8..1b12a9b95e00 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -368,6 +368,71 @@ struct vfio_region_gfx_edid {
>   */
>  #define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1)
>  
> +/* Migration region type and sub-type */
> +#define VFIO_REGION_TYPE_MIGRATION   (2)
> +#define VFIO_REGION_SUBTYPE_MIGRATION(1)
> +
> +/**
> + * Structure vfio_device_migration_info is placed at 0th offset of
> + *

Re: [Qemu-devel] [PATCH v6 2/2] gen_pcie_root_port: Add ACS (Access Control Services) capability

2019-02-21 Thread Alex Williamson

On Thu, 21 Feb 2019 19:13:23 +0100
Knut Omang  wrote:

> Claim ACS support in the generic PCIe root port to allow
> passthrough of individual functions of a device to different
> guests (in a nested virt.setting) with VFIO.
> Without this patch, all functions of a device, such as all VFs of
> an SR/IOV device, will end up in the same IOMMU group.
> A similar situation occurs on Windows with Hyper-V.
> 
> In the single function device case, it also has a small cosmetic
> benefit in that the root port itself is not grouped with
> the device. VFIO handles that situation in that binding rules
> only apply to endpoints, so it does not limit passthrough in
> those cases.
> 
> Signed-off-by: Knut Omang 
> Reviewed-by: Marcel Apfelbaum 
> ---
>  hw/pci-bridge/gen_pcie_root_port.c | 4 
>  hw/pci-bridge/pcie_root_port.c | 4 
>  include/hw/pci/pcie_port.h | 1 +
>  3 files changed, 9 insertions(+)


Reviewed-by: Alex Williamson 

 
> diff --git a/hw/pci-bridge/gen_pcie_root_port.c 
> b/hw/pci-bridge/gen_pcie_root_port.c
> index 9766edb..26bda73 100644
> --- a/hw/pci-bridge/gen_pcie_root_port.c
> +++ b/hw/pci-bridge/gen_pcie_root_port.c
> @@ -20,6 +20,9 @@
>  OBJECT_CHECK(GenPCIERootPort, (obj), TYPE_GEN_PCIE_ROOT_PORT)
>  
>  #define GEN_PCIE_ROOT_PORT_AER_OFFSET   0x100
> +#define GEN_PCIE_ROOT_PORT_ACS_OFFSET \
> +(GEN_PCIE_ROOT_PORT_AER_OFFSET + PCI_ERR_SIZEOF)
> +
>  #define GEN_PCIE_ROOT_PORT_MSIX_NR_VECTOR   1
>  
>  typedef struct GenPCIERootPort {
> @@ -149,6 +152,7 @@ static void gen_rp_dev_class_init(ObjectClass *klass, 
> void *data)
>  rpc->interrupts_init = gen_rp_interrupts_init;
>  rpc->interrupts_uninit = gen_rp_interrupts_uninit;
>  rpc->aer_offset = GEN_PCIE_ROOT_PORT_AER_OFFSET;
> +rpc->acs_offset = GEN_PCIE_ROOT_PORT_ACS_OFFSET;
>  }
>  
>  static const TypeInfo gen_rp_dev_info = {
> diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c
> index 34ad767..e94d918 100644
> --- a/hw/pci-bridge/pcie_root_port.c
> +++ b/hw/pci-bridge/pcie_root_port.c
> @@ -47,6 +47,7 @@ static void rp_reset(DeviceState *qdev)
>  pcie_cap_deverr_reset(d);
>  pcie_cap_slot_reset(d);
>  pcie_cap_arifwd_reset(d);
> +pcie_acs_reset(d);
>  pcie_aer_root_reset(d);
>  pci_bridge_reset(qdev);
>  pci_bridge_disable_base_limit(d);
> @@ -106,6 +107,9 @@ static void rp_realize(PCIDevice *d, Error **errp)
>  pcie_aer_root_init(d);
>  rp_aer_vector_update(d);
>  
> +if (rpc->acs_offset) {
> +pcie_acs_init(d, rpc->acs_offset);
> +}
>  return;
>  
>  err:
> diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
> index df242a0..09586f4 100644
> --- a/include/hw/pci/pcie_port.h
> +++ b/include/hw/pci/pcie_port.h
> @@ -78,6 +78,7 @@ typedef struct PCIERootPortClass {
>  int exp_offset;
>  int aer_offset;
>  int ssvid_offset;
> +int acs_offset;/* If nonzero, optional ACS capability offset */
>  int ssid;
>  } PCIERootPortClass;
>

Re: [Qemu-devel] [PATCH v6 1/2] pcie: Add a simple PCIe ACS (Access Control Services) helper function

2019-02-21 Thread Alex Williamson

On Thu, 21 Feb 2019 19:13:22 +0100
Knut Omang  wrote:

> Implementing an ACS capability on downstream ports and multifunction
> endpoints indicates isolation and IOMMU visibility to a finer
> granularity. This creates smaller IOMMU groups in the guest and thus
> more flexibility in assigning endpoints to guest userspace or an L2
> guest.
> 
> Signed-off-by: Knut Omang 
> ---
>  hw/pci/pcie.c  | 38 ++
>  include/hw/pci/pcie.h  |  6 ++
>  include/hw/pci/pcie_regs.h |  4 
>  3 files changed, 48 insertions(+)


Reviewed-by: Alex Williamson 

> 
> diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
> index 230478f..09ebf11 100644
> --- a/hw/pci/pcie.c
> +++ b/hw/pci/pcie.c
> @@ -906,3 +906,41 @@ void pcie_ats_init(PCIDevice *dev, uint16_t offset)
>  
>  pci_set_word(dev->wmask + dev->exp.ats_cap + PCI_ATS_CTRL, 0x800f);
>  }
> +
> +/* ACS (Access Control Services) */
> +void pcie_acs_init(PCIDevice *dev, uint16_t offset)
> +{
> +bool is_downstream = pci_is_express_downstream_port(dev);
> +uint16_t cap_bits = 0;
> +
> +/* For endpoints, only multifunction devs may have an ACS capability: */
> +assert(is_downstream ||
> +   (dev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) ||
> +   PCI_FUNC(dev->devfn));
> +
> +pcie_add_capability(dev, PCI_EXT_CAP_ID_ACS, PCI_ACS_VER, offset,
> +PCI_ACS_SIZEOF);
> +dev->exp.acs_cap = offset;
> +
> +if (is_downstream) {
> +/*
> + * Downstream ports must implement SV, TB, RR, CR, UF, and DT (with
> + * caveats on the latter four that we ignore for simplicity).
> + * Endpoints may also implement a subset of ACS capabilities,
> + * but these are optional if the endpoint does not support
> + * peer-to-peer between functions and thus omitted here.
> + */
> +cap_bits = PCI_ACS_SV | PCI_ACS_TB | PCI_ACS_RR |
> +PCI_ACS_CR | PCI_ACS_UF | PCI_ACS_DT;
> +}
> +
> +pci_set_word(dev->config + offset + PCI_ACS_CAP, cap_bits);
> +pci_set_word(dev->wmask + offset + PCI_ACS_CTRL, cap_bits);
> +}
> +
> +void pcie_acs_reset(PCIDevice *dev)
> +{
> +if (dev->exp.acs_cap) {
> +pci_set_word(dev->config + dev->exp.acs_cap + PCI_ACS_CTRL, 0);
> +}
> +}
> diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
> index 5b82a0d..e30334d 100644
> --- a/include/hw/pci/pcie.h
> +++ b/include/hw/pci/pcie.h
> @@ -79,6 +79,9 @@ struct PCIExpressDevice {
>  
>  /* Offset of ATS capability in config space */
>  uint16_t ats_cap;
> +
> +/* ACS */
> +uint16_t acs_cap;
>  };
>  
>  #define COMPAT_PROP_PCP "power_controller_present"
> @@ -128,6 +131,9 @@ void pcie_add_capability(PCIDevice *dev,
>   uint16_t offset, uint16_t size);
>  void pcie_sync_bridge_lnk(PCIDevice *dev);
>  
> +void pcie_acs_init(PCIDevice *dev, uint16_t offset);
> +void pcie_acs_reset(PCIDevice *dev);
> +
>  void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn);
>  void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t 
> ser_num);
>  void pcie_ats_init(PCIDevice *dev, uint16_t offset);
> diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
> index ad4e780..1db86b0 100644
> --- a/include/hw/pci/pcie_regs.h
> +++ b/include/hw/pci/pcie_regs.h
> @@ -175,4 +175,8 @@ typedef enum PCIExpLinkWidth {
>   PCI_ERR_COR_INTERNAL | \
>   PCI_ERR_COR_HL_OVERFLOW)
>  
> +/* ACS */
> +#define PCI_ACS_VER 0x1
> +#define PCI_ACS_SIZEOF  8
> +
>  #endif /* QEMU_PCIE_REGS_H */

Re: [Qemu-devel] [PATCH v2 0/3] target/arm: Reduce overhead of cpu_get_tb_cpu_state

2019-02-21 Thread Emilio G. Cota

On Tue, Feb 19, 2019 at 15:34:18 -0800, Richard Henderson wrote:
> Changes since v1:
>   * Apparently I had started a last-minute API change, and failed to
> covert all of the users, and also failed to re-test afterward.
>   * Retain assertions for --enable-debug-tcg.

This brings my arm-softmmu bootup+shutdown test to an early death:

[...]
VFS: Mounted root (ext4 filesystem) readonly on device 254:1.
devtmpfs: mounted
Freeing unused kernel memory: 300K (80669000 - 806b4000)
BUG: unsupported FP instruction in kernel mode
Internal error: Oops - undefined instruction: 0 [#1] SMP ARM
Modules linked in:
CPU: 0 PID: 1 Comm: init Not tainted 4.5.0-ajb #10
Hardware name: Generic DT based system
task: eec58000 ti: eec52000 task.ti: eec52000
PC is at vfp_reload_hw+0xc/0x44
LR is at __und_usr_fault_32+0x0/0x8
pc : [<8000ab94>]lr : [<800136c0>]psr: 000c0013
sp : eec53fb0  ip : 7eb88918  fp : 
r10: eec520f8  r9 : 8001371c  r8 : 0b00
r7 : 0001  r6 : eec5204c  r5 : 4000  r4 : 
r3 : 806e1058  r2 : 76fc1362  r1 : 4000  r0 : ecac8b10
Flags: nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
Control: 30c5387d  Table: ae4048c0  DAC: fffd
Process init (pid: 1, stack limit = 0xeec52210)
Stack: (0xeec53fb0 to 0xeec54000)
3fa0: 7eb888f0  003fb0d6 fd90
3fc0: 7eb888e0 76fd8050  7eb88ab0  0001223c 76fd8958 7eb88a90
3fe0: 7eb88918 7eb888c0 76fbb187 76fc1362 600c0030   
[<8000ab94>] (vfp_reload_hw) from [<800136c0>] (__und_usr_fault_32+0x0/0x8)
Code: 0a10 e58ab110 eee85a10 e783a10b (ecba0b20)
---[ end trace 26acd422f5b3785f ]---
Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b
---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x000b

Thanks,

Emilio

Re: [Qemu-devel] [PATCH 0/1] snip my name and email

2019-02-21 Thread David Kiarie

On Thu, Feb 21, 2019 at 7:09 PM Jan Kiszka  wrote:

> On 21.02.19 17:05, Eric Blake wrote:
> > On 2/21/19 9:53 AM, David Kiarie wrote:
> >> the occurrence of my name and email on the files below may have led to
> >> some confusion in the reporting of a few recent bugs.
> >>
> >> i have therefore choosen to snip it.
> >
> > Dropping an email from the copyright line makes sense; dropping the
> > Copyright declaration altogether is a bit odd (the GPL works only in
> > tandem with a copyright assertion) - but as you are the author of the
> > line and copyright holder of your contributions, I am not in a position
> > to say you are wrong in removing it, only that it looks odd.
> >
>
> Yeah, indeed.
>
> David, also note that you probably have been addressed because
> scripts/get_maintainer.pl will look into the git history of files that
> some
> patch addresses and pick up significant and/or recent contributors from
> there.
> There should be some opt-out statement from that, but I don't recall how.
>
>
Jan, Eblake, i respect your opinion but i still think my original patch
would have been best fit for the simple me.


> Jan
>
> --
> Siemens AG, Corporate Technology, CT RDA IOT SES-DE
> Corporate Competence Center Embedded Linux
>

Re: [Qemu-devel] [PATCH v3] hw/block: better reporting on pflash backing file mismatch

2019-02-21 Thread Philippe Mathieu-Daudé

Hi Alex,

On 2/21/19 7:48 PM, Alex Bennée wrote:
> It looks like there was going to be code to check we had some sort of
> alignment so lets replace it with an actual check. This is a bit more
> useful than the enigmatic "failed to read the initial flash content"
> when we attempt to read the number of bytes the device should have.
> 
> This is a potential confusing stumbling block when you move from using
> -bios to using -drive if=pflash,file=blob,format=raw,readonly for
> loading your firmware code. To mitigate that we automatically pad in
> the read-only case.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v3
>   - tweak commit title/commentary
>   - use total_len instead of device_len for checks
>   - if the device is read-only do the padding for them
>   - accept baking_len > total_len (how to warn_report with NULL *errp?)
> ---
>  hw/block/pflash_cfi01.c | 28 +---
>  1 file changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
> index 00c2efd0d7..37d7513c45 100644
> --- a/hw/block/pflash_cfi01.c
> +++ b/hw/block/pflash_cfi01.c
> @@ -714,13 +714,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error 
> **errp)
>  }
>  device_len = sector_len_per_device * blocks_per_device;
>  
> -/* XXX: to be fixed */
> -#if 0
> -if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&
> -total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))
> -return NULL;
> -#endif
> -
>  memory_region_init_rom_device(
>  >mem, OBJECT(dev),
>  _cfi01_ops,
> @@ -747,6 +740,27 @@ static void pflash_cfi01_realize(DeviceState *dev, Error 
> **errp)
>  }
>  
>  if (pfl->blk) {
> +/*
> + * Validate the backing store is the right size for pflash
> + * devices. It should be padded to a multiple of the flash
> + * block size. If the device is read-only we can elide the
> + * check and just null pad the region first. If the user
> + * supplies a larger file we silently accept it.
> + */
> +uint64_t backing_len = blk_getlength(pfl->blk);
> +
> +if (backing_len < total_len) {
> +if (pfl->ro) {
> +memset(pfl->storage, 0, total_len);

When you erasing a NOR flash sector, all bits are set to 1.
Then you can program a word by setting its bits to 0.
It is not possible to set bits from 0 to 1 on a word boundary,
it is only possible to set bits to 1 by erasing a whole sector (sector
boundary).

The pflash QEMU models are of NOR type (is different for other flash
technologies).
If we want to set the padded area to the fabric blank state, we should
use the NOR flash erased value of 0xff.

> +total_len = backing_len;
> +} else {
> +error_setg(errp, "device(s) needs %" PRIu64 " bytes, "
> +   "backing file provides only %" PRIu64 " bytes",
> +   total_len, backing_len);
> +return;
> +}
> +}
> +
>  /* read the initial flash content */
>  ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);
>  
>

Re: [Qemu-devel] [PATCH 0/5] QEMU VFIO live migration

2019-02-21 Thread Alex Williamson

Hi Yan,

Thanks for working on this!

On Tue, 19 Feb 2019 16:50:54 +0800
Yan Zhao  wrote:

> This patchset enables VFIO devices to have live migration capability.
> Currently it does not support post-copy phase.
> 
> It follows Alex's comments on last version of VFIO live migration patches,
> including device states, VFIO device state region layout, dirty bitmap's
> query.
> 
> Device Data
> ---
> Device data is divided into three types: device memory, device config,
> and system memory dirty pages produced by device.
> 
> Device config: data like MMIOs, page tables...
> Every device is supposed to possess device config data.
>   Usually device config's size is small (no big than 10M), and it

I'm not sure how we can really impose a limit here, it is what it is
for a device.  A smaller state is obviously desirable to reduce
downtime, but some devices could have very large states.

> needs to be loaded in certain strict order.
> Therefore, device config only needs to be saved/loaded in
> stop-and-copy phase.
> The data of device config is held in device config region.
> Size of device config data is smaller than or equal to that of
> device config region.

So the intention here is that this is the last data read from the
device and it's done in one pass, so the region needs to be large
enough to expose all config data at once.  On restore it's the last
data written before switching the device to the run state.

> 
> Device Memory: device's internal memory, standalone and outside system

s/system/VM/

> memory. It is usually very big.

Or it doesn't exist.  Not sure we should be setting expectations since
it will vary per device.

> This kind of data needs to be saved / loaded in pre-copy and
> stop-and-copy phase.
> The data of device memory is held in device memory region.
> Size of devie memory is usually larger than that of device
> memory region. qemu needs to save/load it in chunks of size of
> device memory region.
> Not all device has device memory. Like IGD only uses system memory.

It seems a little gratuitous to me that this is a separate region or
that this data is handled separately.  All of this data is opaque to
QEMU, so why do we need to separate it?

> System memory dirty pages: If a device produces dirty pages in system
> memory, it is able to get dirty bitmap for certain range of system
> memory. This dirty bitmap is queried in pre-copy and stop-and-copy
> phase in .log_sync callback. By setting dirty bitmap in .log_sync
> callback, dirty pages in system memory will be save/loaded by ram's
> live migration code.
> The dirty bitmap of system memory is held in dirty bitmap region.
> If system memory range is larger than that dirty bitmap region can
> hold, qemu will cut it into several chunks and get dirty bitmap in
> succession.
> 
> 
> Device State Regions
> 
> Vendor driver is required to expose two mandatory regions and another two
> optional regions if it plans to support device state management.
> 
> So, there are up to four regions in total.
> One control region: mandatory.
> Get access via read/write system call.
> Its layout is defined in struct vfio_device_state_ctl
> Three data regions: mmaped into qemu.

Is mmap mandatory?  I would think this would be defined by the mdev
device what access they want to support per region.  We don't want to
impose a more complicated interface if the device doesn't require it.

> device config region: mandatory, holding data of device config
> device memory region: optional, holding data of device memory
> dirty bitmap region: optional, holding bitmap of system memory
> dirty pages
> 
> (The reason why four seperate regions are defined is that the unit of mmap
> system call is PAGE_SIZE, i.e. 4k bytes. So one read/write region for
> control and three mmaped regions for data seems better than one big region
> padded and sparse mmaped).

It's not obvious to me how this is better, a big region isn't padded,
there's simply a gap in the file descriptor.  Is having a sub-PAGE_SIZE
gap in a file really of any consequence?  Each region beyond the header
is more than likely larger than PAGE_SIZE, therefore they can be nicely
aligned together.  We still need fields to tell us how much data is
available in each area, so another to tell us the start of each area is
a minor detail.  And I think we still want to allow drivers to specify
which parts of which areas support mmap, so I don't think we're getting
away from sparse mmap support.

> kernel device state interface [1]
> --
> #define VFIO_DEVICE_STATE_INTERFACE_VERSION 1
> #define VFIO_DEVICE_DATA_CAP_DEVICE_MEMORY 1
> #define VFIO_DEVICE_DATA_CAP_SYSTEM_MEMORY 2

If we were to go

Re: [Qemu-devel] [PATCH 06/51] build: switch to Kconfig

2019-02-21 Thread Stefano Garzarella

Il giorno gio 21 feb 2019 alle 19:00 Paolo Bonzini  ha
scritto:

> On 21/02/19 16:44, Stefano Garzarella wrote:
> >> +
> > Hi Paolo,
> > I'm playing with Kconfig but with a simple configuration
> > (./configure --target-list=x86_64-softmmu --disable-docs) the build
> fails:
> > /usr/bin/ld: ../hw/xen/xen-legacy-backend.o: in function
> `xen_be_register_common':
> > /home/stefano/repos/qemu-kconfig/hw/xen/xen-legacy-backend.c:757:
> undefined reference to `xen_9pfs_ops'
> > collect2: error: ld returned 1 exit status
> >
> > Analyzing the Makefile.objs files maybe we should pass the CONFIG_VIRTFS
> from
> > config-host.mak down to Kconfig.
> > I tried this simple patch and it seems to fix the issue:
> >
> > diff --git a/Makefile b/Makefile
> > index df0732a050..bad583b01c 100644
> > --- a/Makefile
> > +++ b/Makefile
> > @@ -336,6 +336,7 @@ MINIKCONF_ARGS = \
> >  CONFIG_XEN=$(CONFIG_XEN) \
> >  CONFIG_OPENGL=$(CONFIG_OPENGL) \
> >  CONFIG_VHOST_USER=$(CONFIG_VHOST_USER) \
> > +CONFIG_VIRTFS=$(CONFIG_VIRTFS) \
> >  CONFIG_LINUX=$(CONFIG_LINUX)
> >
> > I'm not sure if we need to add "config VIRTFS" entry in the
> > Kconfig.host, because it is already defined in hw/9pfs/Kconfig.
>
> Yes, we should remove it from hw/9pfs/Kconfig too.  I had made this
> exact change today in my branch. :)
>
>
Great! :)

Thanks,
Stefano
-- 
Stefano Garzarella
Software Engineer @ Red Hat

Re: [Qemu-devel] [PULL 0/3] Bitmaps patches

2019-02-21 Thread no-reply

Patchew URL: https://patchew.org/QEMU/20190213234907.24173-1-js...@redhat.com/



Hi,

This series failed the docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

=== TEST SCRIPT BEGIN ===
#!/bin/bash
time make docker-test-mingw@fedora SHOW_ENV=1 J=14
=== TEST SCRIPT END ===

  CC  qapi/qapi-commands.o
  CC  qapi/qapi-commands-block-core.o
/tmp/qemu-test/src/blockdev.c: In function 'qmp_block_dirty_bitmap_add':
/tmp/qemu-test/src/blockdev.c:2868:17: error: expected ';' before '}' token
 goto out
 ^
 ;


The full log is available at
http://patchew.org/logs/20190213234907.24173-1-js...@redhat.com/testing.docker-mingw@fedora/?type=message.
---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Qemu-devel] [PATCH v3] hw/block: better reporting on pflash backing file mismatch

2019-02-21 Thread Alex Bennée



Laszlo Ersek  writes:

> On 02/21/19 19:48, Alex Bennée wrote:
>> It looks like there was going to be code to check we had some sort of
>> alignment so lets replace it with an actual check. This is a bit more
>> useful than the enigmatic "failed to read the initial flash content"
>> when we attempt to read the number of bytes the device should have.
>>
>> This is a potential confusing stumbling block when you move from using
>> -bios to using -drive if=pflash,file=blob,format=raw,readonly for
>> loading your firmware code. To mitigate that we automatically pad in
>> the read-only case.
>>
>> Signed-off-by: Alex Bennée 
>>
>> ---
>> v3
>>   - tweak commit title/commentary
>>   - use total_len instead of device_len for checks
>>   - if the device is read-only do the padding for them
>>   - accept baking_len > total_len (how to warn_report with NULL *errp?)
>> ---
>>  hw/block/pflash_cfi01.c | 28 +---
>>  1 file changed, 21 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
>> index 00c2efd0d7..37d7513c45 100644
>> --- a/hw/block/pflash_cfi01.c
>> +++ b/hw/block/pflash_cfi01.c
>> @@ -714,13 +714,6 @@ static void pflash_cfi01_realize(DeviceState *dev, 
>> Error **errp)
>>  }
>>  device_len = sector_len_per_device * blocks_per_device;
>>
>> -/* XXX: to be fixed */
>> -#if 0
>> -if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&
>> -total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))
>> -return NULL;
>> -#endif
>> -
>>  memory_region_init_rom_device(
>>  >mem, OBJECT(dev),
>>  _cfi01_ops,
>> @@ -747,6 +740,27 @@ static void pflash_cfi01_realize(DeviceState *dev, 
>> Error **errp)
>>  }
>>
>>  if (pfl->blk) {
>> +/*
>> + * Validate the backing store is the right size for pflash
>> + * devices. It should be padded to a multiple of the flash
>> + * block size. If the device is read-only we can elide the
>> + * check and just null pad the region first. If the user
>> + * supplies a larger file we silently accept it.
>
> (1) I recommend adding "and ignore the tail".
>
>> + */
>> +uint64_t backing_len = blk_getlength(pfl->blk);
>
> (2) Didn't we intend to check for blk_getlength() errors (or assert that
> there would be none)?

Oops, yes I'll fix that.

>
>> +
>> +if (backing_len < total_len) {
>> +if (pfl->ro) {
>> +memset(pfl->storage, 0, total_len);
>
> (3) Should we "optimize" (well, okay, de-pessimize) this to:
>
>   memset((uint8_t*)pfl->storage + backing_len, 0,
>  total_len - backing_len);
>
> ?

I mean in the grand scheme of things it's unlikely to show up in any
benchmarks so I went for simple and easy to get right.

>
>> +total_len = backing_len;
>> +} else {
>> +error_setg(errp, "device(s) needs %" PRIu64 " bytes, "
>
> (4) not too important, I'm just curious: why the optional plural?

I discovered the difference between device_len and total_len and found
(for some reason) the efivars came out as multiple devices.

>
>> +   "backing file provides only %" PRIu64 " bytes",
>> +   total_len, backing_len);
>> +return;
>> +}
>> +}
>> +
>>  /* read the initial flash content */
>>  ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);
>>
>>
>
> I don't feel too strongly about these, so if you disagree, I won't push.
>
> Thanks!
> Laszlo


--
Alex Bennée

Re: [Qemu-devel] Testing sysbus devices

2019-02-21 Thread Stephen Checkoway

> On Feb 20, 2019, at 03:55, Laszlo Ersek  wrote:
> 
> I would strongly prefer if the guest-side view wouldn't change at all.

It sounds like sector protection isn't something you want and it's not 
something I currently need so unless that changes, I probably won't do anything 
with it.

My goal is merely to implement some missing flash functionality that I need to 
emulate some hardware that I have. My plan for doing this is to not change any 
defaults (except for a few bug fixes) while doing so. I'm happy for the qemu 
community to take as much or as little as it finds useful.

I'll send a patch series for review in the normal fashion, but if anyone wants 
to see my in-progress work, including tests, the diff is available here 
.

For my own edification, I'm curious how you're currently dealing with some 
regions of flash that are protected. I believe Markus mentioned using multiple 
flash devices. Are you overlapping the address ranges?

The current pflash_cfi02.c code assumes, but doesn't check that both the total 
size of the chip as well as the size of each sector is a power of two. If you 
wanted say 7 MB of read/write flash and 1 MB of read-only flash, qemu might be 
willing to create a device with say 7 MB of storage, but it will definitely 
misbehave. (I added a check for that here 
.)

Cheers,

Steve

-- 
Stephen Checkoway

Re: [Qemu-devel] [PATCH] qemu-img: implement copy offload (-C) for dd

2019-02-21 Thread Sergio Lopez

On Thu, Feb 21, 2019 at 12:08:12PM -0600, Eric Blake wrote:
> On 2/21/19 11:37 AM, Sergio Lopez wrote:
> > This parameter is analogous to convert's "-C", making use of
> > bdrv_co_copy_range().
> 
> The last time I tried to patch 'qemu-img dd', it was pointed out that it
> already has several bugs (where it is not on feature-parity with real
> dd), and that we REALLY want to make it a syntactic sugar wrapper around
> 'qemu-img convert', rather than duplicating code (which means that
> qemu-img convert needs to make it easier to do arbitrary offsets and
> subsets - although to some extent you can already do that with
> --image-opts and appropriate raw driver wrappers).
> 
> https://lists.gnu.org/archive/html/qemu-devel/2018-08/msg02618.html

Interesting, I wasn't aware of that conversation. It might a little late
to go again through it, but while I don't a strong opinion about it, I
do have some reservations about the idea of making 'dd' a frontend for
'convert'.

While I do see the functional similarity of both commands, to me they
are quite different at a semantical level. For 'convert', I do expect it
to do "the right thing" and use the optimal settings (i.e. choosing the
best transfer size) by default, while 'dd' is more of "do whatever the
user told you to do no matter how wrong it is".

Due to this differences, I think turning 'convert' code into something
able to deal with 'dd' semantics would imply adding a considerable
number of conditionals, possibly making it harder to maintain than
keeping it separate.

Sergio (slp).

[Qemu-devel] [PULL 0/1] target/hppa patch queue

2019-02-21 Thread Richard Henderson

The following changes since commit fc3dbb90f2eb069801bfb4cfe9cbc83cf9c5f4a9:

  Merge remote-tracking branch 'remotes/jnsnow/tags/bitmaps-pull-request' into 
staging (2019-02-21 13:09:33 +)

are available in the Git repository at:

  https://github.com/rth7680/qemu.git tags/pull-hppa-20190221

for you to fetch changes up to 368bec88d1916f65050be305f88c10a46075a51c:

  hw/hppa/dino: mask out lower 2 bits of PCI config addr (2019-02-21 10:16:19 
-0800)


Fix dino pci config access.


Sven Schnelle (1):
  hw/hppa/dino: mask out lower 2 bits of PCI config addr

 hw/hppa/dino.c|  27 ---
 pc-bios/hppa-firmware.img | Bin 215936 -> 760040 bytes
 2 files changed, 24 insertions(+), 3 deletions(-)

[Qemu-devel] [PULL 1/2] tcg: Remove TODO file

2019-02-21 Thread Richard Henderson

The last update to this file was 9 years ago.  In the meantime,
4 of the 6 ideas have actually been completed.  The lat two do
not actually make sense anymore.

Suggested-by: Thomas Huth 
Signed-off-by: Richard Henderson 
---
 tcg/TODO | 14 --
 1 file changed, 14 deletions(-)
 delete mode 100644 tcg/TODO

diff --git a/tcg/TODO b/tcg/TODO
deleted file mode 100644
index 074784778e..00
--- a/tcg/TODO
+++ /dev/null
@@ -1,14 +0,0 @@
-- Add new instructions such as: clz, ctz, popcnt.
-
-- See if it is worth exporting mul2, mulu2, div2, divu2. 
-
-- Support of globals saved in fixed registers between TBs.
-
-Ideas:
-
-- Move the slow part of the qemu_ld/st ops after the end of the TB.
-
-- Change exception syntax to get closer to QOP system (exception
-  parameters given with a specific instruction).
-
-- Add float and vector support.
-- 
2.17.2

[Qemu-devel] [PULL 17/21] hw/arm/musca: Add PPCs

2019-02-21 Thread Peter Maydell

Many of the devices on the Musca board live behind TrustZone
Peripheral Protection Controllers (PPCs); add models of the
PPCs, using a similar scheme to the MPS2 board models.
This commit wires up the PPCs with "unimplemented device"
stubs behind them in the correct places in the address map.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
---
 hw/arm/musca.c | 289 +
 1 file changed, 289 insertions(+)

diff --git a/hw/arm/musca.c b/hw/arm/musca.c
index cc624c7d160..8774e0b87b7 100644
--- a/hw/arm/musca.c
+++ b/hw/arm/musca.c
@@ -27,8 +27,11 @@
 #include "hw/arm/armsse.h"
 #include "hw/boards.h"
 #include "hw/core/split-irq.h"
+#include "hw/misc/tz-ppc.h"
+#include "hw/misc/unimp.h"
 
 #define MUSCA_NUMIRQ_MAX 96
+#define MUSCA_PPC_MAX 3
 
 typedef enum MuscaType {
 MUSCA_A,
@@ -48,6 +51,24 @@ typedef struct {
 
 ARMSSE sse;
 SplitIRQ cpu_irq_splitter[MUSCA_NUMIRQ_MAX];
+SplitIRQ sec_resp_splitter;
+TZPPC ppc[MUSCA_PPC_MAX];
+MemoryRegion container;
+UnimplementedDeviceState eflash[2];
+UnimplementedDeviceState qspi;
+UnimplementedDeviceState mpc[5];
+UnimplementedDeviceState mhu[2];
+UnimplementedDeviceState pwm[3];
+UnimplementedDeviceState i2s;
+UnimplementedDeviceState uart[2];
+UnimplementedDeviceState i2c[2];
+UnimplementedDeviceState spi;
+UnimplementedDeviceState scc;
+UnimplementedDeviceState timer;
+UnimplementedDeviceState rtc;
+UnimplementedDeviceState pvt;
+UnimplementedDeviceState sdio;
+UnimplementedDeviceState gpio;
 } MuscaMachineState;
 
 #define TYPE_MUSCA_MACHINE "musca"
@@ -68,6 +89,94 @@ typedef struct {
  */
 #define SYSCLK_FRQ 4000
 
+/*
+ * Most of the devices in the Musca board sit behind Peripheral Protection
+ * Controllers. These data structures define the layout of which devices
+ * sit behind which PPCs.
+ * The devfn for each port is a function which creates, configures
+ * and initializes the device, returning the MemoryRegion which
+ * needs to be plugged into the downstream end of the PPC port.
+ */
+typedef MemoryRegion *MakeDevFn(MuscaMachineState *mms, void *opaque,
+const char *name, hwaddr size);
+
+typedef struct PPCPortInfo {
+const char *name;
+MakeDevFn *devfn;
+void *opaque;
+hwaddr addr;
+hwaddr size;
+} PPCPortInfo;
+
+typedef struct PPCInfo {
+const char *name;
+PPCPortInfo ports[TZ_NUM_PORTS];
+} PPCInfo;
+
+static MemoryRegion *make_unimp_dev(MuscaMachineState *mms,
+void *opaque, const char *name, hwaddr 
size)
+{
+/*
+ * Initialize, configure and realize a TYPE_UNIMPLEMENTED_DEVICE,
+ * and return a pointer to its MemoryRegion.
+ */
+UnimplementedDeviceState *uds = opaque;
+
+sysbus_init_child_obj(OBJECT(mms), name, uds,
+  sizeof(UnimplementedDeviceState),
+  TYPE_UNIMPLEMENTED_DEVICE);
+qdev_prop_set_string(DEVICE(uds), "name", name);
+qdev_prop_set_uint64(DEVICE(uds), "size", size);
+object_property_set_bool(OBJECT(uds), true, "realized", _fatal);
+return sysbus_mmio_get_region(SYS_BUS_DEVICE(uds), 0);
+}
+
+static MemoryRegion *make_musca_a_devs(MuscaMachineState *mms, void *opaque,
+   const char *name, hwaddr size)
+{
+/*
+ * Create the container MemoryRegion for all the devices that live
+ * behind the Musca-A PPC's single port. These devices don't have a PPC
+ * port each, but we use the PPCPortInfo struct as a convenient way
+ * to describe them. Note that addresses here are relative to the base
+ * address of the PPC port region: 0x4010, and devices appear both
+ * at the 0x4... NS region and the 0x5... S region.
+ */
+int i;
+MemoryRegion *container = >container;
+
+const PPCPortInfo devices[] = {
+{ "uart0", make_unimp_dev, >uart[0], 0x1000, 0x1000 },
+{ "uart1", make_unimp_dev, >uart[1], 0x2000, 0x1000 },
+{ "spi", make_unimp_dev, >spi, 0x3000, 0x1000 },
+{ "i2c0", make_unimp_dev, >i2c[0], 0x4000, 0x1000 },
+{ "i2c1", make_unimp_dev, >i2c[1], 0x5000, 0x1000 },
+{ "i2s", make_unimp_dev, >i2s, 0x6000, 0x1000 },
+{ "pwm0", make_unimp_dev, >pwm[0], 0x7000, 0x1000 },
+{ "rtc", make_unimp_dev, >rtc, 0x8000, 0x1000 },
+{ "qspi", make_unimp_dev, >qspi, 0xa000, 0x1000 },
+{ "timer", make_unimp_dev, >timer, 0xb000, 0x1000 },
+{ "scc", make_unimp_dev, >scc, 0xc000, 0x1000 },
+{ "pwm1", make_unimp_dev, >pwm[1], 0xe000, 0x1000 },
+{ "pwm2", make_unimp_dev, >pwm[2], 0xf000, 0x1000 },
+{ "gpio", make_unimp_dev, >gpio, 0x1, 0x1000 },
+{ "mpc0", make_unimp_dev, >mpc[0], 0x12000, 0x1000 },
+{ "mpc1", make_unimp_dev, >mpc[1], 0x13000, 0x1000 },
+};
+
+memory_region_init(container, OBJECT(mms),

Re: [Qemu-devel] [PATCH v3] hw/block: better reporting on pflash backing file mismatch

2019-02-21 Thread Laszlo Ersek

On 02/21/19 19:48, Alex Bennée wrote:
> It looks like there was going to be code to check we had some sort of
> alignment so lets replace it with an actual check. This is a bit more
> useful than the enigmatic "failed to read the initial flash content"
> when we attempt to read the number of bytes the device should have.
> 
> This is a potential confusing stumbling block when you move from using
> -bios to using -drive if=pflash,file=blob,format=raw,readonly for
> loading your firmware code. To mitigate that we automatically pad in
> the read-only case.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v3
>   - tweak commit title/commentary
>   - use total_len instead of device_len for checks
>   - if the device is read-only do the padding for them
>   - accept baking_len > total_len (how to warn_report with NULL *errp?)
> ---
>  hw/block/pflash_cfi01.c | 28 +---
>  1 file changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
> index 00c2efd0d7..37d7513c45 100644
> --- a/hw/block/pflash_cfi01.c
> +++ b/hw/block/pflash_cfi01.c
> @@ -714,13 +714,6 @@ static void pflash_cfi01_realize(DeviceState *dev, Error 
> **errp)
>  }
>  device_len = sector_len_per_device * blocks_per_device;
>  
> -/* XXX: to be fixed */
> -#if 0
> -if (total_len != (8 * 1024 * 1024) && total_len != (16 * 1024 * 1024) &&
> -total_len != (32 * 1024 * 1024) && total_len != (64 * 1024 * 1024))
> -return NULL;
> -#endif
> -
>  memory_region_init_rom_device(
>  >mem, OBJECT(dev),
>  _cfi01_ops,
> @@ -747,6 +740,27 @@ static void pflash_cfi01_realize(DeviceState *dev, Error 
> **errp)
>  }
>  
>  if (pfl->blk) {
> +/*
> + * Validate the backing store is the right size for pflash
> + * devices. It should be padded to a multiple of the flash
> + * block size. If the device is read-only we can elide the
> + * check and just null pad the region first. If the user
> + * supplies a larger file we silently accept it.

(1) I recommend adding "and ignore the tail".

> + */
> +uint64_t backing_len = blk_getlength(pfl->blk);

(2) Didn't we intend to check for blk_getlength() errors (or assert that
there would be none)?

> +
> +if (backing_len < total_len) {
> +if (pfl->ro) {
> +memset(pfl->storage, 0, total_len);

(3) Should we "optimize" (well, okay, de-pessimize) this to:

  memset((uint8_t*)pfl->storage + backing_len, 0,
 total_len - backing_len);

?

> +total_len = backing_len;
> +} else {
> +error_setg(errp, "device(s) needs %" PRIu64 " bytes, "

(4) not too important, I'm just curious: why the optional plural?

> +   "backing file provides only %" PRIu64 " bytes",
> +   total_len, backing_len);
> +return;
> +}
> +}
> +
>  /* read the initial flash content */
>  ret = blk_pread(pfl->blk, 0, pfl->storage, total_len);
>  
> 

I don't feel too strongly about these, so if you disagree, I won't push.

Thanks!
Laszlo

[Qemu-devel] [PULL 16/21] hw/arm/musca.c: Implement models of the Musca-A and -B1 boards

2019-02-21 Thread Peter Maydell

The Musca-A and Musca-B1 development boards are based on the
SSE-200 subsystem for embedded. Implement an initial skeleton
model of these boards, which are similar but not identical.

This commit creates the board model with the SSE and the IRQ
splitters to wire IRQs up to its two CPUs. As yet there
are no devices and no memory: these will be added later.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
---
 hw/arm/Makefile.objs|   1 +
 hw/arm/musca.c  | 197 
 MAINTAINERS |   6 +
 default-configs/arm-softmmu.mak |   1 +
 4 files changed, 205 insertions(+)
 create mode 100644 hw/arm/musca.c

diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index fa40e8d6412..fa57c7c7704 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -35,6 +35,7 @@ obj-$(CONFIG_ASPEED_SOC) += aspeed_soc.o aspeed.o
 obj-$(CONFIG_MPS2) += mps2.o
 obj-$(CONFIG_MPS2) += mps2-tz.o
 obj-$(CONFIG_MSF2) += msf2-soc.o msf2-som.o
+obj-$(CONFIG_MUSCA) += musca.o
 obj-$(CONFIG_ARMSSE) += armsse.o
 obj-$(CONFIG_FSL_IMX7) += fsl-imx7.o mcimx7d-sabre.o
 obj-$(CONFIG_ARM_SMMUV3) += smmu-common.o smmuv3.o
diff --git a/hw/arm/musca.c b/hw/arm/musca.c
new file mode 100644
index 000..cc624c7d160
--- /dev/null
+++ b/hw/arm/musca.c
@@ -0,0 +1,197 @@
+/*
+ * Arm Musca-B1 test chip board emulation
+ *
+ * Copyright (c) 2019 Linaro Limited
+ * Written by Peter Maydell
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2 or
+ *  (at your option) any later version.
+ */
+
+/*
+ * The Musca boards are a reference implementation of a system using
+ * the SSE-200 subsystem for embedded:
+ * 
https://developer.arm.com/products/system-design/development-boards/iot-test-chips-and-boards/musca-a-test-chip-board
+ * 
https://developer.arm.com/products/system-design/development-boards/iot-test-chips-and-boards/musca-b-test-chip-board
+ * We model the A and B1 variants of this board, as described in the TRMs:
+ * 
http://infocenter.arm.com/help/topic/com.arm.doc.101107__00_en/index.html
+ * 
http://infocenter.arm.com/help/topic/com.arm.doc.101312__00_en/index.html
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qapi/error.h"
+#include "exec/address-spaces.h"
+#include "hw/arm/arm.h"
+#include "hw/arm/armsse.h"
+#include "hw/boards.h"
+#include "hw/core/split-irq.h"
+
+#define MUSCA_NUMIRQ_MAX 96
+
+typedef enum MuscaType {
+MUSCA_A,
+MUSCA_B1,
+} MuscaType;
+
+typedef struct {
+MachineClass parent;
+MuscaType type;
+uint32_t init_svtor;
+int sram_addr_width;
+int num_irqs;
+} MuscaMachineClass;
+
+typedef struct {
+MachineState parent;
+
+ARMSSE sse;
+SplitIRQ cpu_irq_splitter[MUSCA_NUMIRQ_MAX];
+} MuscaMachineState;
+
+#define TYPE_MUSCA_MACHINE "musca"
+#define TYPE_MUSCA_A_MACHINE MACHINE_TYPE_NAME("musca-a")
+#define TYPE_MUSCA_B1_MACHINE MACHINE_TYPE_NAME("musca-b1")
+
+#define MUSCA_MACHINE(obj) \
+OBJECT_CHECK(MuscaMachineState, obj, TYPE_MUSCA_MACHINE)
+#define MUSCA_MACHINE_GET_CLASS(obj) \
+OBJECT_GET_CLASS(MuscaMachineClass, obj, TYPE_MUSCA_MACHINE)
+#define MUSCA_MACHINE_CLASS(klass) \
+OBJECT_CLASS_CHECK(MuscaMachineClass, klass, TYPE_MUSCA_MACHINE)
+
+/*
+ * Main SYSCLK frequency in Hz
+ * TODO this should really be different for the two cores, but we
+ * don't model that in our SSE-200 model yet.
+ */
+#define SYSCLK_FRQ 4000
+
+static void musca_init(MachineState *machine)
+{
+MuscaMachineState *mms = MUSCA_MACHINE(machine);
+MuscaMachineClass *mmc = MUSCA_MACHINE_GET_CLASS(mms);
+MachineClass *mc = MACHINE_GET_CLASS(machine);
+MemoryRegion *system_memory = get_system_memory();
+DeviceState *ssedev;
+int i;
+
+assert(mmc->num_irqs <= MUSCA_NUMIRQ_MAX);
+
+if (strcmp(machine->cpu_type, mc->default_cpu_type) != 0) {
+error_report("This board can only be used with CPU %s",
+ mc->default_cpu_type);
+exit(1);
+}
+
+sysbus_init_child_obj(OBJECT(machine), "sse-200", >sse,
+  sizeof(mms->sse), TYPE_SSE200);
+ssedev = DEVICE(>sse);
+object_property_set_link(OBJECT(>sse), OBJECT(system_memory),
+ "memory", _fatal);
+qdev_prop_set_uint32(ssedev, "EXP_NUMIRQ", mmc->num_irqs);
+qdev_prop_set_uint32(ssedev, "init-svtor", mmc->init_svtor);
+qdev_prop_set_uint32(ssedev, "SRAM_ADDR_WIDTH", mmc->sram_addr_width);
+qdev_prop_set_uint32(ssedev, "MAINCLK", SYSCLK_FRQ);
+object_property_set_bool(OBJECT(>sse), true, "realized",
+ _fatal);
+
+/*
+ * We need to create splitters to feed the IRQ inputs
+ * for each CPU in the SSE-200 from each device in the board.
+ */
+for (i = 0; i < mmc->num_irqs; i++) {
+char *name = g_strdup_printf("musca-irq-splitter%d", i);
+

[Qemu-devel] [PULL 2/2] include/exec/helper-head.h: support "const void *" in helper calls

2019-02-21 Thread Richard Henderson

From: David Hildenbrand 

Especially when dealing with out-of-line gvec helpers, it is often
helpful to specify some vector pointers as constant. E.g. when
we have two inputs and one output, marking the two inputs as consts
pointers helps to avoid bugs.

Const pointers can be specified via "cptr", however behave in TCG just
like ordinary pointers. We can specify helpers like:

DEF_HELPER_FLAGS_4(gvec_vbperm, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)

void HELPER(gvec_vbperm)(void *v1, const void *v2, const void *v3,
 uint32_t desc)

And make sure that here, only v1 will be written (as long as const is
not casted away, of course).

Signed-off-by: David Hildenbrand 
Message-Id: <20190221093459.22547-1-da...@redhat.com>
Signed-off-by: Richard Henderson 
---
 include/exec/helper-head.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h
index ab4f8b6623..f2519c9741 100644
--- a/include/exec/helper-head.h
+++ b/include/exec/helper-head.h
@@ -30,6 +30,7 @@
 #define dh_alias_f32 i32
 #define dh_alias_f64 i64
 #define dh_alias_ptr ptr
+#define dh_alias_cptr ptr
 #define dh_alias_void void
 #define dh_alias_noreturn noreturn
 #define dh_alias(t) glue(dh_alias_, t)
@@ -43,6 +44,7 @@
 #define dh_ctype_f32 float32
 #define dh_ctype_f64 float64
 #define dh_ctype_ptr void *
+#define dh_ctype_cptr const void *
 #define dh_ctype_void void
 #define dh_ctype_noreturn void QEMU_NORETURN
 #define dh_ctype(t) dh_ctype_##t
@@ -88,6 +90,7 @@
 #define dh_is_64bit_i32 0
 #define dh_is_64bit_i64 1
 #define dh_is_64bit_ptr (sizeof(void *) == 8)
+#define dh_is_64bit_cptr dh_is_64bit_ptr
 #define dh_is_64bit(t) glue(dh_is_64bit_, dh_alias(t))
 
 #define dh_is_signed_void 0
@@ -105,6 +108,7 @@
extension instructions that may be required, e.g. ia64's addp4.  But
for now we don't support any 64-bit targets with 32-bit pointers.  */
 #define dh_is_signed_ptr 0
+#define dh_is_signed_cptr dh_is_signed_ptr
 #define dh_is_signed_env dh_is_signed_ptr
 #define dh_is_signed(t) dh_is_signed_##t
 
@@ -117,6 +121,7 @@
 #define dh_callflag_f32  0
 #define dh_callflag_f64  0
 #define dh_callflag_ptr  0
+#define dh_callflag_cptr dh_callflag_ptr
 #define dh_callflag_void 0
 #define dh_callflag_noreturn TCG_CALL_NO_RETURN
 #define dh_callflag(t) glue(dh_callflag_, dh_alias(t))
-- 
2.17.2

[Qemu-devel] [PULL 20/21] hw/arm/musca: Wire up PL011 UARTs

2019-02-21 Thread Peter Maydell

Wire up the two PL011 UARTs in the Musca board.

Signed-off-by: Peter Maydell 
Reviewed-by: Richard Henderson 
---
 hw/arm/musca.c | 34 +-
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/hw/arm/musca.c b/hw/arm/musca.c
index 378912b7385..23aff43f4bc 100644
--- a/hw/arm/musca.c
+++ b/hw/arm/musca.c
@@ -23,9 +23,11 @@
 #include "qemu/error-report.h"
 #include "qapi/error.h"
 #include "exec/address-spaces.h"
+#include "sysemu/sysemu.h"
 #include "hw/arm/arm.h"
 #include "hw/arm/armsse.h"
 #include "hw/boards.h"
+#include "hw/char/pl011.h"
 #include "hw/core/split-irq.h"
 #include "hw/misc/tz-mpc.h"
 #include "hw/misc/tz-ppc.h"
@@ -69,7 +71,7 @@ typedef struct {
 UnimplementedDeviceState mhu[2];
 UnimplementedDeviceState pwm[3];
 UnimplementedDeviceState i2s;
-UnimplementedDeviceState uart[2];
+PL011State uart[2];
 UnimplementedDeviceState i2c[2];
 UnimplementedDeviceState spi;
 UnimplementedDeviceState scc;
@@ -285,6 +287,28 @@ static MemoryRegion *make_rtc(MuscaMachineState *mms, void 
*opaque,
 return sysbus_mmio_get_region(SYS_BUS_DEVICE(rtc), 0);
 }
 
+static MemoryRegion *make_uart(MuscaMachineState *mms, void *opaque,
+   const char *name, hwaddr size)
+{
+PL011State *uart = opaque;
+int i = uart - >uart[0];
+int irqbase = 7 + i * 6;
+SysBusDevice *s;
+
+sysbus_init_child_obj(OBJECT(mms), name, uart, sizeof(mms->uart[0]),
+  TYPE_PL011);
+qdev_prop_set_chr(DEVICE(uart), "chardev", serial_hd(i));
+object_property_set_bool(OBJECT(uart), true, "realized", _fatal);
+s = SYS_BUS_DEVICE(uart);
+sysbus_connect_irq(s, 0, get_sse_irq_in(mms, irqbase + 5)); /* combined */
+sysbus_connect_irq(s, 1, get_sse_irq_in(mms, irqbase + 0)); /* RX */
+sysbus_connect_irq(s, 2, get_sse_irq_in(mms, irqbase + 1)); /* TX */
+sysbus_connect_irq(s, 3, get_sse_irq_in(mms, irqbase + 2)); /* RT */
+sysbus_connect_irq(s, 4, get_sse_irq_in(mms, irqbase + 3)); /* MS */
+sysbus_connect_irq(s, 5, get_sse_irq_in(mms, irqbase + 4)); /* E */
+return sysbus_mmio_get_region(SYS_BUS_DEVICE(uart), 0);
+}
+
 static MemoryRegion *make_musca_a_devs(MuscaMachineState *mms, void *opaque,
const char *name, hwaddr size)
 {
@@ -300,8 +324,8 @@ static MemoryRegion *make_musca_a_devs(MuscaMachineState 
*mms, void *opaque,
 MemoryRegion *container = >container;
 
 const PPCPortInfo devices[] = {
-{ "uart0", make_unimp_dev, >uart[0], 0x1000, 0x1000 },
-{ "uart1", make_unimp_dev, >uart[1], 0x2000, 0x1000 },
+{ "uart0", make_uart, >uart[0], 0x1000, 0x1000 },
+{ "uart1", make_uart, >uart[1], 0x2000, 0x1000 },
 { "spi", make_unimp_dev, >spi, 0x3000, 0x1000 },
 { "i2c0", make_unimp_dev, >i2c[0], 0x4000, 0x1000 },
 { "i2c1", make_unimp_dev, >i2c[1], 0x5000, 0x1000 },
@@ -460,8 +484,8 @@ static void musca_init(MachineState *machine)
 { "pwm1", make_unimp_dev, >pwm[1], 0x40102000, 0x1000 },
 { "pwm2", make_unimp_dev, >pwm[2], 0x40103000, 0x1000 },
 { "i2s", make_unimp_dev, >i2s, 0x40104000, 0x1000 },
-{ "uart0", make_unimp_dev, >uart[0], 0x40105000, 0x1000 },
-{ "uart1", make_unimp_dev, >uart[1], 0x40106000, 0x1000 },
+{ "uart0", make_uart, >uart[0], 0x40105000, 0x1000 },
+{ "uart1", make_uart, >uart[1], 0x40106000, 0x1000 },
 { "i2c0", make_unimp_dev, >i2c[0], 0x40108000, 0x1000 },
 { "i2c1", make_unimp_dev, >i2c[1], 0x40109000, 0x1000 },
 { "spi", make_unimp_dev, >spi, 0x4010a000, 0x1000 },
-- 
2.20.1

[Qemu-devel] [PULL 07/21] target/arm: Implement ARMv8.3-JSConv

2019-02-21 Thread Peter Maydell

From: Richard Henderson 

Signed-off-by: Richard Henderson 
Message-id: 20190215192302.27855-5-richard.hender...@linaro.org
Reviewed-by: Peter Maydell 
[PMM: fixed a couple of comment typos]
Signed-off-by: Peter Maydell 
---
 target/arm/cpu.h   | 10 +
 target/arm/helper.h|  3 ++
 target/arm/cpu.c   |  1 +
 target/arm/cpu64.c |  2 +
 target/arm/translate-a64.c | 26 +++
 target/arm/translate.c | 10 +
 target/arm/vfp_helper.c| 88 ++
 7 files changed, 140 insertions(+)

diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 84ae6849c2f..1eea1a408b8 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -3273,6 +3273,11 @@ static inline bool isar_feature_aa32_vcma(const 
ARMISARegisters *id)
 return FIELD_EX32(id->id_isar5, ID_ISAR5, VCMA) != 0;
 }
 
+static inline bool isar_feature_aa32_jscvt(const ARMISARegisters *id)
+{
+return FIELD_EX32(id->id_isar6, ID_ISAR6, JSCVT) != 0;
+}
+
 static inline bool isar_feature_aa32_dp(const ARMISARegisters *id)
 {
 return FIELD_EX32(id->id_isar6, ID_ISAR6, DP) != 0;
@@ -3351,6 +3356,11 @@ static inline bool isar_feature_aa64_dp(const 
ARMISARegisters *id)
 return FIELD_EX64(id->id_aa64isar0, ID_AA64ISAR0, DP) != 0;
 }
 
+static inline bool isar_feature_aa64_jscvt(const ARMISARegisters *id)
+{
+return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, JSCVT) != 0;
+}
+
 static inline bool isar_feature_aa64_fcma(const ARMISARegisters *id)
 {
 return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, FCMA) != 0;
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 923e8e15255..747cb64d29f 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -218,6 +218,9 @@ DEF_HELPER_FLAGS_2(rintd_exact, TCG_CALL_NO_RWG, f64, f64, 
ptr)
 DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
 DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)
 
+DEF_HELPER_FLAGS_2(vjcvt, TCG_CALL_NO_RWG, i32, f64, env)
+DEF_HELPER_FLAGS_2(fjcvtzs, TCG_CALL_NO_RWG, i64, f64, ptr)
+
 /* neon_helper.c */
 DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32)
 DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32)
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index edf6e0e1f1c..8ea6569088d 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -2001,6 +2001,7 @@ static void arm_max_initfn(Object *obj)
 cpu->isar.id_isar5 = t;
 
 t = cpu->isar.id_isar6;
+t = FIELD_DP32(t, ID_ISAR6, JSCVT, 1);
 t = FIELD_DP32(t, ID_ISAR6, DP, 1);
 cpu->isar.id_isar6 = t;
 
diff --git a/target/arm/cpu64.c b/target/arm/cpu64.c
index eff0f164dd0..69e4134f79f 100644
--- a/target/arm/cpu64.c
+++ b/target/arm/cpu64.c
@@ -311,6 +311,7 @@ static void aarch64_max_initfn(Object *obj)
 cpu->isar.id_aa64isar0 = t;
 
 t = cpu->isar.id_aa64isar1;
+t = FIELD_DP64(t, ID_AA64ISAR1, JSCVT, 1);
 t = FIELD_DP64(t, ID_AA64ISAR1, FCMA, 1);
 t = FIELD_DP64(t, ID_AA64ISAR1, APA, 1); /* PAuth, architected only */
 t = FIELD_DP64(t, ID_AA64ISAR1, API, 0);
@@ -344,6 +345,7 @@ static void aarch64_max_initfn(Object *obj)
 cpu->isar.id_isar5 = u;
 
 u = cpu->isar.id_isar6;
+u = FIELD_DP32(u, ID_ISAR6, JSCVT, 1);
 u = FIELD_DP32(u, ID_ISAR6, DP, 1);
 cpu->isar.id_isar6 = u;
 
diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index dbce24fe32c..c56e878787c 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -6526,6 +6526,24 @@ static void handle_fmov(DisasContext *s, int rd, int rn, 
int type, bool itof)
 }
 }
 
+static void handle_fjcvtzs(DisasContext *s, int rd, int rn)
+{
+TCGv_i64 t = read_fp_dreg(s, rn);
+TCGv_ptr fpstatus = get_fpstatus_ptr(false);
+
+gen_helper_fjcvtzs(t, t, fpstatus);
+
+tcg_temp_free_ptr(fpstatus);
+
+tcg_gen_ext32u_i64(cpu_reg(s, rd), t);
+tcg_gen_extrh_i64_i32(cpu_ZF, t);
+tcg_gen_movi_i32(cpu_CF, 0);
+tcg_gen_movi_i32(cpu_NF, 0);
+tcg_gen_movi_i32(cpu_VF, 0);
+
+tcg_temp_free_i64(t);
+}
+
 /* Floating point <-> integer conversions
  *   31   30  29 28   24 23  22  21 20   19 18 16 15 10 9  5 4  0
  * ++---+---+---+--+---+---+-+-+++
@@ -6601,6 +6619,14 @@ static void disas_fp_int_conv(DisasContext *s, uint32_t 
insn)
 handle_fmov(s, rd, rn, type, itof);
 break;
 
+case 0b0010: /* FJCVTZS */
+if (!dc_isar_feature(aa64_jscvt, s)) {
+goto do_unallocated;
+} else if (fp_access_check(s)) {
+handle_fjcvtzs(s, rd, rn);
+}
+break;
+
 default:
 do_unallocated:
 unallocated_encoding(s);
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 64c5fe0df3e..c1175798ac9 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -3718,6

[Qemu-devel] [PULL 0/2] tcg patch queue

2019-02-21 Thread Richard Henderson

The following changes since commit fc3dbb90f2eb069801bfb4cfe9cbc83cf9c5f4a9:

  Merge remote-tracking branch 'remotes/jnsnow/tags/bitmaps-pull-request' into 
staging (2019-02-21 13:09:33 +)

are available in the Git repository at:

  https://github.com/rth7680/qemu.git tags/pull-tcg-20190221

for you to fetch changes up to 8c6edfdd90522caa4fc429144d393aba5b99f584:

  include/exec/helper-head.h: support "const void *" in helper calls 
(2019-02-21 10:22:24 -0800)


Allow const void * as argument to helpers.
Remove obsolete TODO file.


David Hildenbrand (1):
  include/exec/helper-head.h: support "const void *" in helper calls

Richard Henderson (1):
  tcg: Remove TODO file

 include/exec/helper-head.h |  5 +
 tcg/TODO   | 14 --
 2 files changed, 5 insertions(+), 14 deletions(-)
 delete mode 100644 tcg/TODO

Re: [Qemu-devel] [libvirt] Libvirt upstream CI efforts

2019-02-21 Thread Cleber Rosa




On 2/21/19 12:56 PM, Daniel P. Berrangé wrote:
> On Thu, Feb 21, 2019 at 03:39:15PM +0100, Erik Skultety wrote:
>> Hi,
>> I'm starting this thread in order to continue with the ongoing efforts to
>> bring actual integration testing to libvirt. Currently, the status quo is 
>> that
>> we build libvirt (along with our unit test suite) using different 
>> OS-flavoured
>> VMs in ci.centos.org. Andrea put a tremendous amount of work to not only
>> automate the whole process of creating the VMs but also having a way for a
>> dev to re-create the same environment locally without jenkins by using the
>> lcitool.
> 
> Note that it is more than just libvirt on the ci.centos.org host. Our
> current built project list covers libosinfo, libvirt, libvirt-cim,
> libvirt-dbus, libvirt-glib, libvirt-go, libvirt-go-xml, libvirt-ocaml,
> libvirt-perl, libvirt-python, libvirt-sandbox, libvirt-tck, osinfo-db,
> osinfo-db-tools, virt-manager & virt-viewer
> 
> For the C libraries in that list, we've also built & tested for
> mingw32/64. All the projects also build RPMs.
> 
> In addition to ci.centos.org we have Travis CI testing for several
> of the projects - libvirt, libvirt-go, libvirt-go-xml, libvirt-dbus,
> libvirt-rust and libvirt-python. In the libvirt case this uses Docker
> containers, but others just use native Travis environment. Travis is
> the only place we get macOS coverage for libvirt.
> 
> Finally everything is x86-only right now, though I've been working on
> using Debian to build cross-compiler container environments to address
> that limitation.
> 
> We also have patchew scanning libvir-list and running syntax-check
> across patches though it has not been very reliably running in
> recent times which is a shame.
> 
> 
>> #THE LONG STORY SHORT
>> As far as the functional test suite goes, there's an already existing
>> integration with the avocado-vt and a massive number of test cases at [1]
>> which is currently not used for upstream testing, primarily because of the 
>> huge
>> number of test cases (and also many unnecessary legacy test cases).
>> An alternative set of functional test cases is available as part of the
>> libvirt-tck framework [2]. The obvious question now is how can we build upon
>> any of this and introduce proper functional testing of upstream libvirt to 
>> our
>> jenkins environment at ci.centos.org, so I formulated the following 
>> discussion
>> points as I think these are crucial to sort out before we move on to the test
>> suite itself:
>>
>> * Infrastructure/Storage requirements (need for hosting pre-build images?)
>>  - one of the main goals we should strive for with upstream CI is that
>>every developer should be able to run the integration test suite on
>>their own machine (conveniently) prior to submitting their patchset to
>>the list
> 
> Any test suite that developers are expected to run before submissions
> needs to be reasonably fast to run, and above all it needs to be very r
> eliable. If it is slow, or wastes time by giving false positives, developers
> will quickly learn to not bother running it.
> 
> This neccessarily implies that what developers run will only be a small
> subset of what the CI systems run.
> 
> Developers just need to be able to then reproduce failures from CI
> in some manner locally to debug things after the fact. 
> 
>>  - we need a reproducible environment to ensure that we don't get 
>> different
>>results across different platforms (including ci.centos.org), 
>> therefore
>>we could provide pre-built images with environment already set up to 
>> run
>>the suite in an L1 guest.
>>  - as for performing migration tests, we could utilize nested virt
> 
> Migration testing doesn't fundamentally need nested virt. It just needs two
> separate isolated libvirt instances. From POV of libvirt, we're just testing
> our integration with QEMU, for which it is sufficient to use TCG, not KVM.
> This could be done with any two VMs, or two container environments.
> 
>>  - should we go this way, having some publicly accessible storage to host
>>all the pre-built images is a key problem to solve
>>
>>-> an estimate of how much we're currently using: roughly 130G 
>> from
>>   our 500G allocation at ci.centos.org to store 8 qcow2 images + 
>> 2
>>   freebsd isos
>>
>>-> we're also fairly generous with how much we allocate for a 
>> guest
>>   image as most of the guests don't even use half of the 20G
>>   allocation
>>
>>-> considering sparsifying the pre-built images and compressing 
>> them
>>   + adding a ton of dependencies to run the suite, extending the
>>   pool of distros by including ubuntu 16 + 18, 200-250G is IMHO
>>   quite a generous estimate of our real need
>>
>>-> we need to find a party willing to give us the estimated amount
>>   of

[Qemu-devel] [PULL 21/21] hw/arm/armsse: Make 0x5... alias region work for per-CPU devices

2019-02-21 Thread Peter Maydell

The region 0x4001 .. 0x4001 and its secure-only alias
at 0x5001... are for per-CPU devices. We implement this by
giving each CPU its own container memory region, where the
per-CPU devices live. Unfortunately, the alias region which
makes devices mapped at 0x4... addresses also appear at 0x5...
is only implemented in the overall "all CPUs" container. The
effect of this bug is that the CPU_IDENTITY register block appears
only at 0x4001f000, but not at the 0x5001f000 alias where it should
also appear. Guests (like very recent Arm Trusted Firmware-M)
which try to access it at 0x5001f000 will crash.

Fix this by moving the handling for this alias from the "all CPUs"
container to the per-CPU container. (We leave the aliases for
0x1... and 0x3... in the overall container, because there are
no per-CPU devices there.)

Signed-off-by: Peter Maydell 
Message-id: 20190215180500.6906-1-peter.mayd...@linaro.org
Reviewed-by: Alex Bennée 
---
 include/hw/arm/armsse.h |  2 +-
 hw/arm/armsse.c | 26 --
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/include/hw/arm/armsse.h b/include/hw/arm/armsse.h
index 84879f40dd8..7ef871c7dfe 100644
--- a/include/hw/arm/armsse.h
+++ b/include/hw/arm/armsse.h
@@ -186,7 +186,7 @@ typedef struct ARMSSE {
 MemoryRegion cpu_container[SSE_MAX_CPUS];
 MemoryRegion alias1;
 MemoryRegion alias2;
-MemoryRegion alias3;
+MemoryRegion alias3[SSE_MAX_CPUS];
 MemoryRegion sram[MAX_SRAM_BANKS];
 
 qemu_irq *exp_irqs[SSE_MAX_CPUS];
diff --git a/hw/arm/armsse.c b/hw/arm/armsse.c
index 50da41f64c5..129e7ea7fe0 100644
--- a/hw/arm/armsse.c
+++ b/hw/arm/armsse.c
@@ -110,15 +110,16 @@ static bool irq_is_common[32] = {
 /* 30, 31: reserved */
 };
 
-/* Create an alias region of @size bytes starting at @base
+/*
+ * Create an alias region in @container of @size bytes starting at @base
  * which mirrors the memory starting at @orig.
  */
-static void make_alias(ARMSSE *s, MemoryRegion *mr, const char *name,
-   hwaddr base, hwaddr size, hwaddr orig)
+static void make_alias(ARMSSE *s, MemoryRegion *mr, MemoryRegion *container,
+   const char *name, hwaddr base, hwaddr size, hwaddr orig)
 {
-memory_region_init_alias(mr, NULL, name, >container, orig, size);
+memory_region_init_alias(mr, NULL, name, container, orig, size);
 /* The alias is even lower priority than unimplemented_device regions */
-memory_region_add_subregion_overlap(>container, base, mr, -1500);
+memory_region_add_subregion_overlap(container, base, mr, -1500);
 }
 
 static void irq_status_forwarder(void *opaque, int n, int level)
@@ -607,16 +608,21 @@ static void armsse_realize(DeviceState *dev, Error **errp)
 }
 
 /* Set up the big aliases first */
-make_alias(s, >alias1, "alias 1", 0x1000, 0x1000, 0x);
-make_alias(s, >alias2, "alias 2", 0x3000, 0x1000, 0x2000);
+make_alias(s, >alias1, >container, "alias 1",
+   0x1000, 0x1000, 0x);
+make_alias(s, >alias2, >container,
+   "alias 2", 0x3000, 0x1000, 0x2000);
 /* The 0x5000..0x5fff region is not a pure alias: it has
  * a few extra devices that only appear there (generally the
  * control interfaces for the protection controllers).
  * We implement this by mapping those devices over the top of this
- * alias MR at a higher priority.
+ * alias MR at a higher priority. Some of the devices in this range
+ * are per-CPU, so we must put this alias in the per-cpu containers.
  */
-make_alias(s, >alias3, "alias 3", 0x5000, 0x1000, 0x4000);
-
+for (i = 0; i < info->num_cpus; i++) {
+make_alias(s, >alias3[i], >cpu_container[i],
+   "alias 3", 0x5000, 0x1000, 0x4000);
+}
 
 /* Security controller */
 object_property_set_bool(OBJECT(>secctl), true, "realized", );
-- 
2.20.1

1 2 3 4 >

1 - 100 of 378 matches

Mail list logo