date:20160301

Re: [Qemu-devel] [RFC PATCH v0 4/6] spapr: CPU hotplug support

2016-03-01 Thread Bharata B Rao

On Mon, Feb 29, 2016 at 10:12:10AM +0530, Bharata B Rao wrote:
> > > diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> > > index b7c5ebd..cc0369e 100644
> > > --- a/hw/ppc/spapr_rtas.c
> > > +++ b/hw/ppc/spapr_rtas.c
> > > @@ -34,6 +34,7 @@
> > >  
> > >  #include "hw/ppc/spapr.h"
> > >  #include "hw/ppc/spapr_vio.h"
> > > +#include "hw/ppc/ppc.h"
> > >  #include "qapi-event.h"
> > >  #include "hw/boards.h"
> > >  
> > > @@ -161,6 +162,27 @@ static void rtas_query_cpu_stopped_state(PowerPCCPU 
> > > *cpu_,
> > >  rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> > >  }
> > >  
> > > +/*
> > > + * Set the timebase offset of the CPU to that of first CPU.
> > > + * This helps hotplugged CPU to have the correct timebase offset.
> > > + */
> > > +static void spapr_cpu_update_tb_offset(PowerPCCPU *cpu)
> > > +{
> > > +PowerPCCPU *fcpu = POWERPC_CPU(first_cpu);
> > > +
> > > +cpu->env.tb_env->tb_offset = fcpu->env.tb_env->tb_offset;
> > > +}
> > > +
> > > +static void spapr_cpu_set_endianness(PowerPCCPU *cpu)
> > > +{
> > > +PowerPCCPU *fcpu = POWERPC_CPU(first_cpu);
> > > +PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(fcpu);
> > > +
> > > +if (!pcc->interrupts_big_endian(fcpu)) {
> > > +cpu->env.spr[SPR_LPCR] |= LPCR_ILE;
> > > +}
> > > +}
> > > +
> > 
> > Any particular reason for doing these things at rtas_start_cpu() time,
> > but other initialization at plug time?  Could you consolidate it to
> > one place or the other?
> 
> Those board specific things that are needed to be done have been consolidated
> into spapr_cpu_init() which will be called from the plug handler. We have
> discussed this earlier at:
> 
> https://lists.nongnu.org/archive/html/qemu-devel/2015-02/msg04399.html
> 
> It has been a while but there was a good reason why setting endianness
> here rather than in plug handler is necessary. W/o this LE hotplug on guests
> wouldn't work, I will dig up and come back on what exactly necessiated
> this change.

If we set LPCR_ILE in cpu->env.spr[SPR_LPCR] at plug time
(from spapr_cpu_init()), there are at least two places later where it gets
over-written. One is spapr_cpu_reset() and the other one when
kvm_cpu_synchronize_state() is called from rtas_start_cpu(). We could
probably issue a kvm_arch_put_registers(), but I found rtas_start_cpu()
as a place where this change is guaranteed to get reflected.

Regards,
Bharata.

Re: [Qemu-devel] [RFC PATCH v0 2/6] spapr: CPU core device

2016-03-01 Thread Bharata B Rao

On Mon, Feb 29, 2016 at 04:15:25PM +0100, Igor Mammedov wrote:
> On Mon, 29 Feb 2016 18:25:25 +0530
> Bharata B Rao  wrote:
> 
> > On Mon, Feb 29, 2016 at 11:03:16AM +0100, Igor Mammedov wrote:
> > > On Mon, 29 Feb 2016 11:20:19 +0530
> > > Bharata B Rao  wrote:
> > >   
> > > > On Fri, Feb 26, 2016 at 12:13:39PM -0600, Michael Roth wrote:  
> > > > > Quoting Bharata B Rao (2016-02-25 10:22:38)
> > > > > > Add sPAPR specific CPU core device that is based on generic CPU 
> > > > > > core device.
> > > > > > Creating this core device will result in creation of all the CPU 
> > > > > > thread
> > > > > > devices that are part of this core.
> > > > > > 
> > > > > > Signed-off-by: Bharata B Rao 
> > > > > > ---
> > > > > >  hw/ppc/Makefile.objs|   1 +
> > > > > >  hw/ppc/spapr_cpu_core.c | 210 
> > > > > > 
> > > > > >  include/hw/ppc/spapr_cpu_core.h |  32 ++
> > > > > >  3 files changed, 243 insertions(+)
> > > > > >  create mode 100644 hw/ppc/spapr_cpu_core.c
> > > > > >  create mode 100644 include/hw/ppc/spapr_cpu_core.h
> > > > > > 
> > > > > > diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> > > > > > index c1ffc77..5cc6608 100644
> > > > > > --- a/hw/ppc/Makefile.objs
> > > > > > +++ b/hw/ppc/Makefile.objs
> > > > > > @@ -4,6 +4,7 @@ obj-y += ppc.o ppc_booke.o
> > > > > >  obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
> > > > > >  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
> > > > > >  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
> > > > > > spapr_rng.o
> > > > > > +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
> > > > > >  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
> > > > > >  obj-y += spapr_pci_vfio.o
> > > > > >  endif
> > > > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > > > > new file mode 100644
> > > > > > index 000..c44eb61
> > > > > > --- /dev/null
> > > > > > +++ b/hw/ppc/spapr_cpu_core.c
> > > > > > @@ -0,0 +1,210 @@
> > > > > > +/*
> > > > > > + * sPAPR CPU core device, acts as container of CPU thread devices.
> > > > > > + *
> > > > > > + * Copyright (C) 2016 Bharata B Rao 
> > > > > > + *
> > > > > > + * This work is licensed under the terms of the GNU GPL, version 2 
> > > > > > or later.
> > > > > > + * See the COPYING file in the top-level directory.
> > > > > > + */
> > > > > > +#include "hw/cpu/core.h"
> > > > > > +#include "hw/ppc/spapr_cpu_core.h"
> > > > > > +#include "hw/ppc/spapr.h"
> > > > > > +#include "hw/boards.h"
> > > > > > +#include "qemu/error-report.h"
> > > > > > +#include "qapi/visitor.h"
> > > > > > +#include 
> > > > > > +
> > > > > > +static int spapr_cpu_core_realize_child(Object *child, void 
> > > > > > *opaque)
> > > > > > +{
> > > > > > +Error **errp = opaque;
> > > > > > +
> > > > > > +object_property_set_bool(child, true, "realized", errp);
> > > > > > +if (*errp) {
> > > > > > +return 1;
> > > > > > +}
> > > > > > +return 0;
> > > > > > +}
> > > > > > +
> > > > > > +static void spapr_cpu_core_realize(DeviceState *dev, Error **errp)
> > > > > > +{
> > > > > > +sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
> > > > > > +sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > > > > > +Error *local_err = NULL;
> > > > > > +
> > > > > > +if (!core->nr_threads) {
> > > > > > +error_setg(errp, "nr_threads property can't be 0");
> > > > > > +return;
> > > > > > +}
> > > > > > +
> > > > > > +if (!core->cpu_model) {
> > > > > > +error_setg(errp, "cpu_model property isn't set");
> > > > > > +return;
> > > > > > +}
> > > > > > +
> > > > > > +/*
> > > > > > + * TODO: If slot isn't specified, plug this core into
> > > > > > + * an existing empty slot.
> > > > > > + */
> > > > > > +if (!core->slot) {
> > > > > > +error_setg(errp, "slot property isn't set");
> > > > > > +return;
> > > > > > +}
> > > > > > +
> > > > > > +object_property_set_link(OBJECT(spapr), OBJECT(core), 
> > > > > > core->slot,
> > > > > > + &local_err);
> > > > > > +if (local_err) {
> > > > > > +error_propagate(errp, local_err);
> > > > > > +return;
> > > > > > +}
> > > > > > +
> > > > > > +object_child_foreach(OBJECT(dev), 
> > > > > > spapr_cpu_core_realize_child, errp);
> > > > > > +}
> > > > > > +
> > > > > > +/*
> > > > > > + * This creates the CPU threads for a given @core.
> > > > > > + *
> > > > > > + * In order to create the threads, we need two inputs - number of
> > > > > > + * threads and the cpu_model. These are set as core object's 
> > > > > > properties.
> > > > > > + * When both of them become available/set, this routine will be 
> > > > > > called from
> > > > > > + * either property's set handler to create the threads.
> > > > > > + *
> > > > > > + * TODO: Dependence of threads creation on two properties is 
> > > >

[Qemu-devel] [PULL 1/5] console: add & use qemu_console_lookup_by_device_name

2016-03-01 Thread Gerd Hoffmann

We have two places needing this, and a third one will come shortly.
So factor things out into a helper function to reduce code duplication.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Daniel P. Berrange 
Reviewed-by: Markus Armbruster 
---
 include/ui/console.h |  2 ++
 ui/console.c | 23 +++
 ui/input.c   | 15 ---
 ui/vnc.c | 15 ---
 4 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/include/ui/console.h b/include/ui/console.h
index 6631b96..f636971 100644
--- a/include/ui/console.h
+++ b/include/ui/console.h
@@ -378,6 +378,8 @@ void graphic_hw_gl_block(QemuConsole *con, bool block);
 
 QemuConsole *qemu_console_lookup_by_index(unsigned int index);
 QemuConsole *qemu_console_lookup_by_device(DeviceState *dev, uint32_t head);
+QemuConsole *qemu_console_lookup_by_device_name(const char *device_id,
+uint32_t head, Error **errp);
 bool qemu_console_is_visible(QemuConsole *con);
 bool qemu_console_is_graphic(QemuConsole *con);
 bool qemu_console_is_fixedsize(QemuConsole *con);
diff --git a/ui/console.c b/ui/console.c
index 7db0fd2..ae61382 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -1790,6 +1790,29 @@ QemuConsole *qemu_console_lookup_by_device(DeviceState 
*dev, uint32_t head)
 return NULL;
 }
 
+QemuConsole *qemu_console_lookup_by_device_name(const char *device_id,
+uint32_t head, Error **errp)
+{
+DeviceState *dev;
+QemuConsole *con;
+
+dev = qdev_find_recursive(sysbus_get_default(), device_id);
+if (dev == NULL) {
+error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+  "Device '%s' not found", device_id);
+return NULL;
+}
+
+con = qemu_console_lookup_by_device(dev, head);
+if (con == NULL) {
+error_setg(errp, "Device %s (head %d) is not bound to a QemuConsole",
+   device_id, head);
+return NULL;
+}
+
+return con;
+}
+
 bool qemu_console_is_visible(QemuConsole *con)
 {
 return (con == active_console) || (con->dcls > 0);
diff --git a/ui/input.c b/ui/input.c
index bdcb974..fce99ba 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -82,19 +82,12 @@ void qemu_input_handler_bind(QemuInputHandlerState *s,
  const char *device_id, int head,
  Error **errp)
 {
-DeviceState *dev;
 QemuConsole *con;
+Error *err = NULL;
 
-dev = qdev_find_recursive(sysbus_get_default(), device_id);
-if (dev == NULL) {
-error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-  "Device '%s' not found", device_id);
-return;
-}
-
-con = qemu_console_lookup_by_device(dev, head);
-if (con == NULL) {
-error_setg(errp, "Device %s is not bound to a QemuConsole", device_id);
+con = qemu_console_lookup_by_device_name(device_id, head, &err);
+if (err) {
+error_propagate(errp, err);
 return;
 }
 
diff --git a/ui/vnc.c b/ui/vnc.c
index b6bbea5..f27df6d 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3732,19 +3732,12 @@ void vnc_display_open(const char *id, Error **errp)
 
 device_id = qemu_opt_get(opts, "display");
 if (device_id) {
-DeviceState *dev;
 int head = qemu_opt_get_number(opts, "head", 0);
+Error *err = NULL;
 
-dev = qdev_find_recursive(sysbus_get_default(), device_id);
-if (dev == NULL) {
-error_setg(errp, "Device '%s' not found", device_id);
-goto fail;
-}
-
-con = qemu_console_lookup_by_device(dev, head);
-if (con == NULL) {
-error_setg(errp, "Device %s is not bound to a QemuConsole",
-   device_id);
+con = qemu_console_lookup_by_device_name(device_id, head, &err);
+if (err) {
+error_propagate(errp, err);
 goto fail;
 }
 } else {
-- 
1.8.3.1

[Qemu-devel] [PULL 5/5] qapi: promote input-send-event to stable

2016-03-01 Thread Gerd Hoffmann

With all fixups being in place now, we can promote input-send-event
to stable abi by removing the x- prefix.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 qapi-schema.json | 12 +++-
 qmp-commands.hx  | 14 +++---
 ui/input.c   |  6 +++---
 3 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index f3e080f..42fd61b 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3819,7 +3819,7 @@
   'abs' : 'InputMoveEvent' } }
 
 ##
-# @x-input-send-event
+# @input-send-event
 #
 # Send input event(s) to guest.
 #
@@ -3840,16 +3840,10 @@
 # specified, both input devices with and without input routing config
 # are admissible, but devices with input routing config take
 # precedence.
-
-# Since: 2.2
-#
-# Note: this command is experimental, and not a stable API.  Things that
-# might change before it becomes stable include the spelling of enum
-# values for InputButton and InputAxis, and the notion of how to designate
-# which console will receive the event.
 #
+# Since: 2.6
 ##
-{ 'command': 'x-input-send-event',
+{ 'command': 'input-send-event',
   'data': { '*device': 'str',
 '*head'  : 'int',
 'events' : [ 'InputEvent' ] } }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index edfe772..b629673 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -4658,13 +4658,13 @@ Example:
 EQMP
 
 {
-.name   = "x-input-send-event",
+.name   = "input-send-event",
 .args_type  = "console:i?,events:q",
-.mhandler.cmd_new = qmp_marshal_x_input_send_event,
+.mhandler.cmd_new = qmp_marshal_input_send_event,
 },
 
 SQMP
-@x-input-send-event
+@input-send-event
 -
 
 Send input event to guest.
@@ -4685,13 +4685,13 @@ Example (1):
 
 Press left mouse button.
 
--> { "execute": "x-input-send-event",
+-> { "execute": "input-send-event",
 "arguments": { "device": "video0",
"events": [ { "type": "btn",
"data" : { "down": true, "button": "left" } } ] } }
 <- { "return": {} }
 
--> { "execute": "x-input-send-event",
+-> { "execute": "input-send-event",
 "arguments": { "device": "video0",
"events": [ { "type": "btn",
"data" : { "down": false, "button": "left" } } ] } }
@@ -4701,7 +4701,7 @@ Example (2):
 
 Press ctrl-alt-del.
 
--> { "execute": "x-input-send-event",
+-> { "execute": "input-send-event",
  "arguments": { "events": [
 { "type": "key", "data" : { "down": true,
   "key": {"type": "qcode", "data": "ctrl" } } },
@@ -4715,7 +4715,7 @@ Example (3):
 
 Move mouse pointer to absolute coordinates (2, 400).
 
--> { "execute": "x-input-send-event" ,
+-> { "execute": "input-send-event" ,
   "arguments": { "events": [
{ "type": "abs", "data" : { "axis": "x", "value" : 2 } },
{ "type": "abs", "data" : { "axis": "y", "value" : 400 } } ] } }
diff --git a/ui/input.c b/ui/input.c
index 0887bb5..6fd48ef 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -119,9 +119,9 @@ qemu_input_find_handler(uint32_t mask, QemuConsole *con)
 return NULL;
 }
 
-void qmp_x_input_send_event(bool has_device, const char *device,
-bool has_head, int64_t head,
-InputEventList *events, Error **errp)
+void qmp_input_send_event(bool has_device, const char *device,
+  bool has_head, int64_t head,
+  InputEventList *events, Error **errp)
 {
 InputEventList *e;
 QemuConsole *con;
-- 
1.8.3.1

[Qemu-devel] [PULL 2/5] qapi: switch x-input-send-event from console to device+head

2016-03-01 Thread Gerd Hoffmann

Use display device qdev id and head number instead of console index to
specify the QemuConsole.  This makes things consistent with input
devices (for input routing) and vnc server configuration, which both use
display and head too.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Daniel P. Berrange 
Reviewed-by: Markus Armbruster 
---
 qapi-schema.json | 32 +---
 qmp-commands.hx  | 17 +
 ui/input.c   | 15 ++-
 3 files changed, 36 insertions(+), 28 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 7b8f2a1..b632239 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3829,24 +3829,24 @@
 #
 # Send input event(s) to guest.
 #
-# @console: #optional console to send event(s) to.
-#   This parameter can be used to send the input event to
-#   specific input devices in case (a) multiple input devices
-#   of the same kind are added to the virtual machine and (b)
-#   you have configured input routing (see docs/multiseat.txt)
-#   for those input devices.  If input routing is not
-#   configured this parameter has no effect.
-#   If @console is missing, only devices that aren't associated
-#   with a console are admissible.
-#   If @console is specified, it must exist, and both devices
-#   associated with that console and devices not associated with a
-#   console are admissible, but the former take precedence.
-
-#
+# @device: #optional display device to send event(s) to.
+# @head: #optional head to send event(s) to, in case the
+#display device supports multiple scanouts.
 # @events: List of InputEvent union.
 #
 # Returns: Nothing on success.
 #
+# The @display and @head parameters can be used to send the input
+# event to specific input devices in case (a) multiple input devices
+# of the same kind are added to the virtual machine and (b) you have
+# configured input routing (see docs/multiseat.txt) for those input
+# devices.  The parameters work exactly like the device and head
+# properties of input devices.  If @device is missing, only devices
+# that have no input routing config are admissible.  If @device is
+# specified, both input devices with and without input routing config
+# are admissible, but devices with input routing config take
+# precedence.
+
 # Since: 2.2
 #
 # Note: this command is experimental, and not a stable API.  Things that
@@ -3856,7 +3856,9 @@
 #
 ##
 { 'command': 'x-input-send-event',
-  'data': { '*console':'int', 'events': [ 'InputEvent' ] } }
+  'data': { '*device': 'str',
+'*head'  : 'int',
+'events' : [ 'InputEvent' ] } }
 
 ##
 # @NumaOptions
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 13f158d..b1a2b97 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -4671,8 +4671,9 @@ Send input event to guest.
 
 Arguments:
 
-- "console": console index. (json-int, optional)
-- "events": list of input events.
+- "device": display device (json-string, optional)
+- "head": display head (json-int, optional)
+- "events": list of input events
 
 The consoles are visible in the qom tree, under
 /backend/console[$index]. They have a device link and head property, so
@@ -4685,15 +4686,15 @@ Example (1):
 Press left mouse button.
 
 -> { "execute": "x-input-send-event",
-"arguments": { "console": 0,
+"arguments": { "device": "video0",
"events": [ { "type": "btn",
-"data" : { "down": true, "button": "Left" } } ] } }
+   "data" : { "down": true, "button": "Left" } } ] } }
 <- { "return": {} }
 
 -> { "execute": "x-input-send-event",
-"arguments": { "console": 0,
+"arguments": { "device": "video0",
"events": [ { "type": "btn",
-"data" : { "down": false, "button": "Left" } } ] } }
+   "data" : { "down": false, "button": "Left" } } ] } }
 <- { "return": {} }
 
 Example (2):
@@ -4701,7 +4702,7 @@ Example (2):
 Press ctrl-alt-del.
 
 -> { "execute": "x-input-send-event",
- "arguments": { "console": 0, "events": [
+ "arguments": { "events": [
 { "type": "key", "data" : { "down": true,
   "key": {"type": "qcode", "data": "ctrl" } } },
 { "type": "key", "data" : { "down": true,
@@ -4715,7 +4716,7 @@ Example (3):
 Move mouse pointer to absolute coordinates (2, 400).
 
 -> { "execute": "x-input-send-event" ,
-  "arguments": { "console": 0, "events": [
+  "arguments": { "events": [
{ "type": "abs", "data" : { "axis": "X", "value" : 2 } },
{ "type": "abs", "data" : { "axis": "Y", "value" : 400 } } ] } }
 <- { "return": {} }
diff --git a/ui/input.c b/ui/input.c
index fce99ba..0887bb5 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -119,17 +119,22 @@ qemu_input_find_handler(uint32_t mask, QemuConsole *con)
 return NULL;
 }
 
-void qmp_x_input_send_event(bool has_console, int64_t console,
+void qmp_x_input_sen

[Qemu-devel] [PULL 3/5] qapi: rename input buttons

2016-03-01 Thread Gerd Hoffmann

All lowercase, use-dash instead of CamelCase.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Daniel P. Berrange 
Reviewed-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 hw/input/hid.c  | 4 ++--
 hw/input/ps2.c  | 4 ++--
 hw/input/virtio-input-hid.c | 4 ++--
 monitor.c   | 2 +-
 qapi-schema.json| 5 +
 qmp-commands.hx | 4 ++--
 scripts/qapi.py | 1 -
 ui/cocoa.m  | 4 ++--
 ui/gtk.c| 4 ++--
 ui/input-legacy.c   | 4 ++--
 ui/sdl.c| 4 ++--
 ui/sdl2.c   | 4 ++--
 ui/spice-input.c| 4 ++--
 ui/vnc.c| 4 ++--
 14 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/hw/input/hid.c b/hw/input/hid.c
index b41efbb..81a85fb 100644
--- a/hw/input/hid.c
+++ b/hw/input/hid.c
@@ -140,9 +140,9 @@ static void hid_pointer_event(DeviceState *dev, QemuConsole 
*src,
 case INPUT_EVENT_KIND_BTN:
 if (evt->u.btn->down) {
 e->buttons_state |= bmap[evt->u.btn->button];
-if (evt->u.btn->button == INPUT_BUTTON_WHEELUP) {
+if (evt->u.btn->button == INPUT_BUTTON_WHEEL_UP) {
 e->dz--;
-} else if (evt->u.btn->button == INPUT_BUTTON_WHEELDOWN) {
+} else if (evt->u.btn->button == INPUT_BUTTON_WHEEL_DOWN) {
 e->dz++;
 }
 } else {
diff --git a/hw/input/ps2.c b/hw/input/ps2.c
index b6f0e8d..1bd0dde 100644
--- a/hw/input/ps2.c
+++ b/hw/input/ps2.c
@@ -406,9 +406,9 @@ static void ps2_mouse_event(DeviceState *dev, QemuConsole 
*src,
 case INPUT_EVENT_KIND_BTN:
 if (evt->u.btn->down) {
 s->mouse_buttons |= bmap[evt->u.btn->button];
-if (evt->u.btn->button == INPUT_BUTTON_WHEELUP) {
+if (evt->u.btn->button == INPUT_BUTTON_WHEEL_UP) {
 s->mouse_dz--;
-} else if (evt->u.btn->button == INPUT_BUTTON_WHEELDOWN) {
+} else if (evt->u.btn->button == INPUT_BUTTON_WHEEL_DOWN) {
 s->mouse_dz++;
 }
 } else {
diff --git a/hw/input/virtio-input-hid.c b/hw/input/virtio-input-hid.c
index c4af0be..9ca5395 100644
--- a/hw/input/virtio-input-hid.c
+++ b/hw/input/virtio-input-hid.c
@@ -143,8 +143,8 @@ static const unsigned int keymap_button[INPUT_BUTTON__MAX] 
= {
 [INPUT_BUTTON_LEFT]  = BTN_LEFT,
 [INPUT_BUTTON_RIGHT] = BTN_RIGHT,
 [INPUT_BUTTON_MIDDLE]= BTN_MIDDLE,
-[INPUT_BUTTON_WHEELUP]   = BTN_GEAR_UP,
-[INPUT_BUTTON_WHEELDOWN] = BTN_GEAR_DOWN,
+[INPUT_BUTTON_WHEEL_UP]  = BTN_GEAR_UP,
+[INPUT_BUTTON_WHEEL_DOWN]= BTN_GEAR_DOWN,
 };
 
 static const unsigned int axismap_rel[INPUT_AXIS__MAX] = {
diff --git a/monitor.c b/monitor.c
index 73eac17..e99ca8c 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1375,7 +1375,7 @@ static void hmp_mouse_move(Monitor *mon, const QDict 
*qdict)
 if (dz_str) {
 dz = strtol(dz_str, NULL, 0);
 if (dz != 0) {
-button = (dz > 0) ? INPUT_BUTTON_WHEELUP : INPUT_BUTTON_WHEELDOWN;
+button = (dz > 0) ? INPUT_BUTTON_WHEEL_UP : 
INPUT_BUTTON_WHEEL_DOWN;
 qemu_input_queue_btn(NULL, button, true);
 qemu_input_event_sync();
 qemu_input_queue_btn(NULL, button, false);
diff --git a/qapi-schema.json b/qapi-schema.json
index b632239..011fdb6 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3743,12 +3743,9 @@
 # Button of a pointer input device (mouse, tablet).
 #
 # Since: 2.0
-#
-# Note that the spelling of these values may change when the
-# x-input-send-event is promoted out of experimental status.
 ##
 { 'enum'  : 'InputButton',
-  'data'  : [ 'Left', 'Middle', 'Right', 'WheelUp', 'WheelDown' ] }
+  'data'  : [ 'left', 'middle', 'right', 'wheel-up', 'wheel-down' ] }
 
 ##
 # @InputAxis
diff --git a/qmp-commands.hx b/qmp-commands.hx
index b1a2b97..cd4d142 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -4688,13 +4688,13 @@ Press left mouse button.
 -> { "execute": "x-input-send-event",
 "arguments": { "device": "video0",
"events": [ { "type": "btn",
-   "data" : { "down": true, "button": "Left" } } ] } }
+   "data" : { "down": true, "button": "left" } } ] } }
 <- { "return": {} }
 
 -> { "execute": "x-input-send-event",
 "arguments": { "device": "video0",
"events": [ { "type": "btn",
-   "data" : { "down": false, "button": "Left" } } ] } }
+   "data" : { "down": false, "button": "left" } } ] } }
 <- { "return": {} }
 
 Example (2):
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 849..941d7c9 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -67,7 +67,6 @@ case_whitelist = [
 'CpuInfoMIPS',  # PC, visible through query-cpu
 'CpuInfoTricore',   # PC, visible through query-cpu
 'Inpu

[Qemu-devel] [PULL 0/5] qapi: fix input-send-event and promote to stable

2016-03-01 Thread Gerd Hoffmann

  Hi,

Time to get this finally merged.  Posted & reviewed back in january,
then fell off my radar due to vacation week and backlog ...

please pull,
  Gerd

The following changes since commit 071608b519adf62bc29c914343a21c5407ab1ac9:

  Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20160229-1' into 
staging (2016-02-29 12:24:26 +)

are available in the git repository at:


  git://git.kraxel.org/qemu tags/pull-input-20160301-1

for you to fetch changes up to 6575ccddf4e7c2484bc14b10d5e89f57506c3953:

  qapi: promote input-send-event to stable (2016-03-01 08:20:27 +0100)


qapi: fix input-send-event and promote to stable


Gerd Hoffmann (5):
  console: add & use qemu_console_lookup_by_device_name
  qapi: switch x-input-send-event from console to device+head
  qapi: rename input buttons
  qapi: rename InputAxis values.
  qapi: promote input-send-event to stable

 hw/input/hid.c  |  4 ++--
 hw/input/ps2.c  |  4 ++--
 hw/input/virtio-input-hid.c |  4 ++--
 include/ui/console.h|  2 ++
 monitor.c   |  2 +-
 qapi-schema.json| 52 ++---
 qmp-commands.hx | 35 +++---
 scripts/qapi.py |  2 --
 ui/cocoa.m  |  4 ++--
 ui/console.c| 23 
 ui/gtk.c|  4 ++--
 ui/input-legacy.c   |  4 ++--
 ui/input.c  | 32 +---
 ui/sdl.c|  4 ++--
 ui/sdl2.c   |  4 ++--
 ui/spice-input.c|  4 ++--
 ui/vnc.c| 19 ++---
 17 files changed, 104 insertions(+), 99 deletions(-)

[Qemu-devel] [PULL 4/5] qapi: rename InputAxis values.

2016-03-01 Thread Gerd Hoffmann

Lowercase them.

Signed-off-by: Gerd Hoffmann 
Reviewed-by: Daniel P. Berrange 
Reviewed-by: Markus Armbruster 
Reviewed-by: Eric Blake 
---
 qapi-schema.json | 5 +
 qmp-commands.hx  | 4 ++--
 scripts/qapi.py  | 1 -
 3 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/qapi-schema.json b/qapi-schema.json
index 011fdb6..f3e080f 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -3753,12 +3753,9 @@
 # Position axis of a pointer input device (mouse, tablet).
 #
 # Since: 2.0
-#
-# Note that the spelling of these values may change when the
-# x-input-send-event is promoted out of experimental status.
 ##
 { 'enum'  : 'InputAxis',
-  'data'  : [ 'X', 'Y' ] }
+  'data'  : [ 'x', 'y' ] }
 
 ##
 # @InputKeyEvent
diff --git a/qmp-commands.hx b/qmp-commands.hx
index cd4d142..edfe772 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -4717,8 +4717,8 @@ Move mouse pointer to absolute coordinates (2, 400).
 
 -> { "execute": "x-input-send-event" ,
   "arguments": { "events": [
-   { "type": "abs", "data" : { "axis": "X", "value" : 2 } },
-   { "type": "abs", "data" : { "axis": "Y", "value" : 400 } } ] } }
+   { "type": "abs", "data" : { "axis": "x", "value" : 2 } },
+   { "type": "abs", "data" : { "axis": "y", "value" : 400 } } ] } }
 <- { "return": {} }
 
 EQMP
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 941d7c9..18adca7 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -66,7 +66,6 @@ case_whitelist = [
 'CpuInfoBase',  # CPU, visible through query-cpu
 'CpuInfoMIPS',  # PC, visible through query-cpu
 'CpuInfoTricore',   # PC, visible through query-cpu
-'InputAxis',# TODO: drop when x-input-send-event is fixed
 'QapiErrorClass',   # all members, visible through errors
 'UuidInfo', # UUID, visible through query-uuid
 'X86CPURegister32', # all members, visible indirectly through qom-get
-- 
1.8.3.1

[Qemu-devel] [PULL 1/1] seabios: update to 1.9.1 stable release

2016-03-01 Thread Gerd Hoffmann

git shortlog rel-1.9.0..rel-1.9.1
=

Cole Robinson (1):
  biostables: Support SMBIOS 2.6+ UUID format

Kevin O'Connor (7):
  xhci: Check for device disconnects during USB2 reset polling
  xhci: Wait for port enable even for USB3 devices
  sdcard: Only enable error_irq_enable for bits defined in SDHCI v1 spec
  sdcard: fix typo causing 32bit write to 16bit block_size field
  nmi: Don't try to switch onto extra stack in NMI handler
  scsi: Do not call printf() from scsi_is_ready()
  coreboot: Check for unaligned cbfs header

Marcel Apfelbaum (1):
  fw/pci: do not automatically allocate IO region for PCIe bridges

Roger Pau Monne (1):
  build: fix typo in buildversion.py

Signed-off-by: Gerd Hoffmann 
---
 pc-bios/bios-256k.bin  | Bin 262144 -> 262144 bytes
 pc-bios/bios.bin   | Bin 131072 -> 131072 bytes
 pc-bios/vgabios-cirrus.bin | Bin 38400 -> 38400 bytes
 pc-bios/vgabios-qxl.bin| Bin 38912 -> 38912 bytes
 pc-bios/vgabios-stdvga.bin | Bin 38912 -> 38912 bytes
 pc-bios/vgabios-virtio.bin | Bin 38912 -> 38912 bytes
 pc-bios/vgabios-vmware.bin | Bin 38912 -> 38912 bytes
 pc-bios/vgabios.bin| Bin 38400 -> 38400 bytes
 roms/seabios   |   2 +-
 9 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/pc-bios/bios-256k.bin b/pc-bios/bios-256k.bin
index 1c50904..e7a7e72 100644
Binary files a/pc-bios/bios-256k.bin and b/pc-bios/bios-256k.bin differ
diff --git a/pc-bios/bios.bin b/pc-bios/bios.bin
index 742a773..b0ae502 100644
Binary files a/pc-bios/bios.bin and b/pc-bios/bios.bin differ
diff --git a/pc-bios/vgabios-cirrus.bin b/pc-bios/vgabios-cirrus.bin
index 4d35b2e..3f4bb30 100644
Binary files a/pc-bios/vgabios-cirrus.bin and b/pc-bios/vgabios-cirrus.bin 
differ
diff --git a/pc-bios/vgabios-qxl.bin b/pc-bios/vgabios-qxl.bin
index aa1c725..38d31b6 100644
Binary files a/pc-bios/vgabios-qxl.bin and b/pc-bios/vgabios-qxl.bin differ
diff --git a/pc-bios/vgabios-stdvga.bin b/pc-bios/vgabios-stdvga.bin
index d6eeab1..e469c10 100644
Binary files a/pc-bios/vgabios-stdvga.bin and b/pc-bios/vgabios-stdvga.bin 
differ
diff --git a/pc-bios/vgabios-virtio.bin b/pc-bios/vgabios-virtio.bin
index f9db6a6..d42b028 100644
Binary files a/pc-bios/vgabios-virtio.bin and b/pc-bios/vgabios-virtio.bin 
differ
diff --git a/pc-bios/vgabios-vmware.bin b/pc-bios/vgabios-vmware.bin
index 33e0377..26bc0b7 100644
Binary files a/pc-bios/vgabios-vmware.bin and b/pc-bios/vgabios-vmware.bin 
differ
diff --git a/pc-bios/vgabios.bin b/pc-bios/vgabios.bin
index db0c5eb..2d1a7c6 100644
Binary files a/pc-bios/vgabios.bin and b/pc-bios/vgabios.bin differ
diff --git a/roms/seabios b/roms/seabios
index 01a84be..b3ef39f 16
--- a/roms/seabios
+++ b/roms/seabios
@@ -1 +1 @@
-Subproject commit 01a84bea2d28a19d2405c1ecac4bdef17683cc0c
+Subproject commit b3ef39f532db52bf17457ba931da758eeb38d6b4
-- 
1.8.3.1

[Qemu-devel] [PULL 0/1] seabios: update to 1.9.1 stable release

2016-03-01 Thread Gerd Hoffmann

  Hi,

This is a stable branch update for seabios (1.9.0 -> 1.9.1).
As usual the commit message has the shortlog.

please pull,
  Gerd

The following changes since commit 071608b519adf62bc29c914343a21c5407ab1ac9:

  Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20160229-1' into 
staging (2016-02-29 12:24:26 +)

are available in the git repository at:


  git://git.kraxel.org/qemu tags/pull-seabios-20160301-1

for you to fetch changes up to fee5b753ff0eb8b25685227804a60dbc4a2ce6ea:

  seabios: update to 1.9.1 stable release (2016-03-01 09:37:07 +0100)


seabios: update to 1.9.1 stable release


Gerd Hoffmann (1):
  seabios: update to 1.9.1 stable release

 pc-bios/bios-256k.bin  | Bin 262144 -> 262144 bytes
 pc-bios/bios.bin   | Bin 131072 -> 131072 bytes
 pc-bios/vgabios-cirrus.bin | Bin 38400 -> 38400 bytes
 pc-bios/vgabios-qxl.bin| Bin 38912 -> 38912 bytes
 pc-bios/vgabios-stdvga.bin | Bin 38912 -> 38912 bytes
 pc-bios/vgabios-virtio.bin | Bin 38912 -> 38912 bytes
 pc-bios/vgabios-vmware.bin | Bin 38912 -> 38912 bytes
 pc-bios/vgabios.bin| Bin 38400 -> 38400 bytes
 roms/seabios   |   2 +-
 9 files changed, 1 insertion(+), 1 deletion(-)

Re: [Qemu-devel] [PATCH v3 5/8] nvdimm acpi: introduce patched dsm memory

2016-03-01 Thread Xiao Guangrong




On 02/29/2016 05:38 PM, Michael S. Tsirkin wrote:

On Sun, Feb 14, 2016 at 04:51:02PM +0800, Xiao Guangrong wrote:

The dsm memory is used to save the input parameters and store
the dsm result which is filled by QEMU.

The address of dsm memory is decided by bios and patched into
int32 object named "MEMA"

Signed-off-by: Xiao Guangrong 



This is a bit too hacky for my taste. First, I would prefer an explicit API
to add a DWORD. Second, I would like to avoid offset math hacks
and make API returning offsets.

Pls see below for a suggestion.



---
  hw/acpi/nvdimm.c | 34 +-
  1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 8568b20..bca36ae 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -29,6 +29,7 @@
  #include "qemu/osdep.h"
  #include "hw/acpi/acpi.h"
  #include "hw/acpi/aml-build.h"
+#include "hw/acpi/bios-linker-loader.h"
  #include "hw/nvram/fw_cfg.h"
  #include "hw/mem/nvdimm.h"

@@ -406,6 +407,7 @@ void nvdimm_init_acpi_state(AcpiNVDIMMState *state, 
MemoryRegion *io,
  }

  #define NVDIMM_COMMON_DSM  "NCAL"
+#define NVDIMM_ACPI_MEM_ADDR   "MEMA"

  static void nvdimm_build_common_dsm(Aml *dev)
  {
@@ -470,7 +472,9 @@ static void nvdimm_build_nvdimm_devices(GSList 
*device_list, Aml *root_dev)
  static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
GArray *table_data, GArray *linker)
  {
-Aml *ssdt, *sb_scope, *dev;
+Aml *ssdt, *sb_scope, *dev, *mem_addr;
+uint32_t zero_offset = 0;
+int offset;

  acpi_add_table(table_offsets, table_data);

@@ -501,9 +505,37 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray 
*table_offsets,

  aml_append(sb_scope, dev);

+/*
+ * leave it at the end of ssdt so that we can conveniently get the
+ * offset of int32 object which will be patched with the real address
+ * of the dsm memory by BIOS.
+ *
+ * 0x3200 is the magic number to let aml_int() create int32 object.
+ * It will be zeroed later to make bios_linker_loader_add_pointer()
+ * happy.
+ */
+mem_addr = aml_name_decl(NVDIMM_ACPI_MEM_ADDR, aml_int(0x3200));
+
+aml_append(sb_scope, mem_addr);
  aml_append(ssdt, sb_scope);
  /* copy AML table into ACPI tables blob and patch header there */
  g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
+
+offset = table_data->len - 4;
+
+/*
+ * zero the last 4 bytes, i.e, it is the offset of
+ * NVDIMM_ACPI_MEM_ADDR object.
+ */
+g_array_remove_range(table_data, offset, 4);
+g_array_append_vals(table_data, &zero_offset, 4);
+
+bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE,
+ false /* high memory */);
+bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
+   NVDIMM_DSM_MEM_FILE, table_data,
+   table_data->data + offset,
+   sizeof(uint32_t));
  build_header(linker, table_data,
  (void *)(table_data->data + table_data->len - ssdt->buf->len),
  "SSDT", ssdt->buf->len, 1, NULL, "NVDIMM");
--
1.8.3.1




--->

acpi: add build_append_named_dword, returning an offset in buffer

This is a very limited form of support for runtime patching -
similar in functionality to what we can do with ACPI_EXTRACT
macros in python, but implemented in C.

This is to allow ACPI code direct access to data tables -
which is exactly what DataTableRegion is there for, except
no known windows release so far implements DataTableRegion.

Signed-off-by: Michael S. Tsirkin 

---

diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 1b632dc..f8998ea 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -286,4 +286,7 @@ void acpi_build_tables_cleanup(AcpiBuildTables *tables, 
bool mfre);
  void
  build_rsdt(GArray *table_data, GArray *linker, GArray *table_offsets);

+int
+build_append_named_dword(GArray *array, const char *name_format, ...);
+
  #endif
diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 0d4b324..7f9fa65 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -262,6 +262,32 @@ static void build_append_int(GArray *table, uint64_t value)
  }
  }

+/* Build NAME(, 0x) where 0x is encoded as a dword,
+ * and return the offset to 0x for runtime patching.
+ *
+ * Warning: runtime patching is best avoided. Only use this as
+ * a replacement for DataTableRegion (for guests that don't
+ * support it).
+ */
+int
+build_append_named_dword(GArray *array, const char *name_format, ...)
+{
+int offset;
+va_list ap;
+
+va_start(ap, name_format);
+build_append_namestringv(array, name_format, ap);
+va_end(ap);
+


The NameOP was missed here...

The idea is great and i fixed and applied it on the top this patchset, the patc

Re: [Qemu-devel] [PATCH v3 5/8] nvdimm acpi: introduce patched dsm memory

2016-03-01 Thread Xiao Guangrong




On 02/29/2016 05:38 PM, Michael S. Tsirkin wrote:


+/* Build NAME(, 0x) where 0x is encoded as a dword,
+ * and return the offset to 0x for runtime patching.
+ *
+ * Warning: runtime patching is best avoided. Only use this as
+ * a replacement for DataTableRegion (for guests that don't
+ * support it).
+ */
+int
+build_append_named_dword(GArray *array, const char *name_format, ...)
+{
+int offset;
+va_list ap;
+
+va_start(ap, name_format);
+build_append_namestringv(array, name_format, ap);
+va_end(ap);


The NameOP was missed here...

The idea is great and i fixed and applied it on the top this patchset, the patch
is attached, would it be good to you?

>From 29a6803d244bbec807bd1df08aff4483ea776c9b Mon Sep 17 00:00:00 2001
From: Michael S. Tsirkin 
Date: Tue, 1 Mar 2016 16:33:49 +0800
Subject: [PATCH] acpi: add build_append_named_dword, returning an offset in
 buffer

This is a very limited form of support for runtime patching -
similar in functionality to what we can do with ACPI_EXTRACT
macros in python, but implemented in C.

This is to allow ACPI code direct access to data tables -
which is exactly what DataTableRegion is there for, except
no known windows release so far implements DataTableRegion.

[ Xiao: fixed missed NameOp and applied it to NVDIMM ACPI. ]

Signed-off-by: Michael S. Tsirkin 
Signed-off-by: Xiao Guangrong 
---
 hw/acpi/aml-build.c |  1 +
 hw/acpi/nvdimm.c| 33 +++--
 2 files changed, 8 insertions(+), 26 deletions(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index f40b93e..9d97ce8 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -271,6 +271,7 @@ build_append_named_dword(GArray *array, const char *name_format, ...)
 int offset;
 va_list ap;
 
+build_append_byte(array, 0x08); /* NameOp */
 va_start(ap, name_format);
 build_append_namestringv(array, name_format, ap);
 va_end(ap);
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index a6fbbee..fbdff76 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -565,10 +565,9 @@ static void nvdimm_build_nvdimm_devices(GSList *device_list, Aml *root_dev)
 static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
   GArray *table_data, GArray *linker)
 {
-Aml *ssdt, *sb_scope, *dev, *field, *mem_addr;
+Aml *ssdt, *sb_scope, *dev, *field;
 Aml *min_addr, *max_addr, *mr32, *method, *crs;
-uint32_t zero_offset = 0;
-int offset;
+int offset, table_len;
 
 acpi_add_table(table_offsets, table_data);
 
@@ -682,31 +681,13 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
 nvdimm_build_nvdimm_devices(device_list, dev);
 
 aml_append(sb_scope, dev);
+aml_append(ssdt, sb_scope);
 
-/*
- * leave it at the end of ssdt so that we can conveniently get the
- * offset of int32 object which will be patched with the real address
- * of the dsm memory by BIOS.
- *
- * 0x3200 is the magic number to let aml_int() create int32 object.
- * It will be zeroed later to make bios_linker_loader_add_pointer()
- * happy.
- */
-mem_addr = aml_name_decl(NVDIMM_ACPI_MEM_ADDR, aml_int(0x3200));
+table_len = table_data->len;
 
-aml_append(sb_scope, mem_addr);
-aml_append(ssdt, sb_scope);
 /* copy AML table into ACPI tables blob and patch header there */
 g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
-
-offset = table_data->len - 4;
-
-/*
- * zero the last 4 bytes, i.e, it is the offset of
- * NVDIMM_ACPI_MEM_ADDR object.
- */
-g_array_remove_range(table_data, offset, 4);
-g_array_append_vals(table_data, &zero_offset, 4);
+offset = build_append_named_dword(table_data, NVDIMM_ACPI_MEM_ADDR);
 
 bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE,
  false /* high memory */);
@@ -715,8 +696,8 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
table_data->data + offset,
sizeof(uint32_t));
 build_header(linker, table_data,
-(void *)(table_data->data + table_data->len - ssdt->buf->len),
-"SSDT", ssdt->buf->len, 1, NULL, "NVDIMM");
+(void *)(table_data->data + table_len),
+"SSDT", table_data->len - table_len, 1, NULL, "NVDIMM");
 free_aml_allocator();
 }
 
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH v3 5/8] nvdimm acpi: introduce patched dsm memory

2016-03-01 Thread Michael S. Tsirkin

On Tue, Mar 01, 2016 at 04:53:23PM +0800, Xiao Guangrong wrote:
> 
> 
> On 02/29/2016 05:38 PM, Michael S. Tsirkin wrote:
> 
> >+/* Build NAME(, 0x) where 0x is encoded as a dword,
> >+ * and return the offset to 0x for runtime patching.
> >+ *
> >+ * Warning: runtime patching is best avoided. Only use this as
> >+ * a replacement for DataTableRegion (for guests that don't
> >+ * support it).
> >+ */
> >+int
> >+build_append_named_dword(GArray *array, const char *name_format, ...)
> >+{
> >+int offset;
> >+va_list ap;
> >+
> >+va_start(ap, name_format);
> >+build_append_namestringv(array, name_format, ap);
> >+va_end(ap);
> 
> The NameOP was missed here...
> 
> The idea is great and i fixed and applied it on the top this patchset, the 
> patch
> is attached, would it be good to you?
> 

OK but I can't review this patch on top of patch.
Please split this in aml-build and nvdimm changes,
then squash the am-build change with my patch and include it
as 5/8, then append yours squashed with the nvdimm.c changes.


> >From 29a6803d244bbec807bd1df08aff4483ea776c9b Mon Sep 17 00:00:00 2001
> From: Michael S. Tsirkin 
> Date: Tue, 1 Mar 2016 16:33:49 +0800
> Subject: [PATCH] acpi: add build_append_named_dword, returning an offset in
>  buffer
> 
> This is a very limited form of support for runtime patching -
> similar in functionality to what we can do with ACPI_EXTRACT
> macros in python, but implemented in C.
> 
> This is to allow ACPI code direct access to data tables -
> which is exactly what DataTableRegion is there for, except
> no known windows release so far implements DataTableRegion.
> 
> [ Xiao: fixed missed NameOp and applied it to NVDIMM ACPI. ]
> 
> Signed-off-by: Michael S. Tsirkin 
> Signed-off-by: Xiao Guangrong 
> ---
>  hw/acpi/aml-build.c |  1 +
>  hw/acpi/nvdimm.c| 33 +++--
>  2 files changed, 8 insertions(+), 26 deletions(-)
> 
> diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
> index f40b93e..9d97ce8 100644
> --- a/hw/acpi/aml-build.c
> +++ b/hw/acpi/aml-build.c
> @@ -271,6 +271,7 @@ build_append_named_dword(GArray *array, const char 
> *name_format, ...)
>  int offset;
>  va_list ap;
>  
> +build_append_byte(array, 0x08); /* NameOp */
>  va_start(ap, name_format);
>  build_append_namestringv(array, name_format, ap);
>  va_end(ap);
> diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
> index a6fbbee..fbdff76 100644
> --- a/hw/acpi/nvdimm.c
> +++ b/hw/acpi/nvdimm.c
> @@ -565,10 +565,9 @@ static void nvdimm_build_nvdimm_devices(GSList 
> *device_list, Aml *root_dev)
>  static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
>GArray *table_data, GArray *linker)
>  {
> -Aml *ssdt, *sb_scope, *dev, *field, *mem_addr;
> +Aml *ssdt, *sb_scope, *dev, *field;
>  Aml *min_addr, *max_addr, *mr32, *method, *crs;
> -uint32_t zero_offset = 0;
> -int offset;
> +int offset, table_len;
>  
>  acpi_add_table(table_offsets, table_data);
>  
> @@ -682,31 +681,13 @@ static void nvdimm_build_ssdt(GSList *device_list, 
> GArray *table_offsets,
>  nvdimm_build_nvdimm_devices(device_list, dev);
>  
>  aml_append(sb_scope, dev);
> +aml_append(ssdt, sb_scope);
>  
> -/*
> - * leave it at the end of ssdt so that we can conveniently get the
> - * offset of int32 object which will be patched with the real address
> - * of the dsm memory by BIOS.
> - *
> - * 0x3200 is the magic number to let aml_int() create int32 object.
> - * It will be zeroed later to make bios_linker_loader_add_pointer()
> - * happy.
> - */
> -mem_addr = aml_name_decl(NVDIMM_ACPI_MEM_ADDR, aml_int(0x3200));
> +table_len = table_data->len;


Rename it something that implies what it does, not it's value. Offset of
what is it?

For example
nvdimm_ssdt = table_data->len;



>  
> -aml_append(sb_scope, mem_addr);
> -aml_append(ssdt, sb_scope);
>  /* copy AML table into ACPI tables blob and patch header there */
>  g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
> -
> -offset = table_data->len - 4;
> -
> -/*
> - * zero the last 4 bytes, i.e, it is the offset of
> - * NVDIMM_ACPI_MEM_ADDR object.
> - */
> -g_array_remove_range(table_data, offset, 4);
> -g_array_append_vals(table_data, &zero_offset, 4);
> +offset = build_append_named_dword(table_data, NVDIMM_ACPI_MEM_ADDR);

Here too, please give it a better name
mem_addr_offset = ; ?

>  
>  bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE,
>   false /* high memory */);
> @@ -715,8 +696,8 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray 
> *table_offsets,
> table_data->data + offset,
> sizeof(uint32_t));
>  build_header(linker, table_data,

[Qemu-devel] [PATCH qemu v13 03/16] spapr_iommu: Move table allocation to helpers

2016-03-01 Thread Alexey Kardashevskiy

At the moment presence of vfio-pci devices on a bus affect the way
the guest view table is allocated. If there is no vfio-pci on a PHB
and the host kernel supports KVM acceleration of H_PUT_TCE, a table
is allocated in KVM. However, if there is vfio-pci and we do yet not
KVM acceleration for these, the table has to be allocated by
the userspace. At the moment the table is allocated once at boot time
but next patches will reallocate it.

This moves kvmppc_create_spapr_tce/g_malloc0 and their counterparts
to helpers.

Signed-off-by: Alexey Kardashevskiy 
Reviewed-by: David Gibson 
---
 hw/ppc/spapr_iommu.c | 58 +++-
 trace-events |  2 +-
 2 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 277f289..8132f64 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -75,6 +75,37 @@ static IOMMUAccessFlags 
spapr_tce_iommu_access_flags(uint64_t tce)
 }
 }
 
+static uint64_t *spapr_tce_alloc_table(uint32_t liobn,
+   uint32_t page_shift,
+   uint32_t nb_table,
+   int *fd,
+   bool need_vfio)
+{
+uint64_t *table = NULL;
+uint64_t window_size = (uint64_t)nb_table << page_shift;
+
+if (kvm_enabled() && !(window_size >> 32)) {
+table = kvmppc_create_spapr_tce(liobn, window_size, fd, need_vfio);
+}
+
+if (!table) {
+*fd = -1;
+table = g_malloc0(nb_table * sizeof(uint64_t));
+}
+
+trace_spapr_iommu_new_table(liobn, table, *fd);
+
+return table;
+}
+
+static void spapr_tce_free_table(uint64_t *table, int fd, uint32_t nb_table)
+{
+if (!kvm_enabled() ||
+(kvmppc_remove_spapr_tce(table, fd, nb_table) != 0)) {
+g_free(table);
+}
+}
+
 /* Called from RCU critical section */
 static IOMMUTLBEntry spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr 
addr,
bool is_write)
@@ -141,21 +172,13 @@ static MemoryRegionIOMMUOps spapr_iommu_ops = {
 static int spapr_tce_table_realize(DeviceState *dev)
 {
 sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
-uint64_t window_size = (uint64_t)tcet->nb_table << tcet->page_shift;
 
-if (kvm_enabled() && !(window_size >> 32)) {
-tcet->table = kvmppc_create_spapr_tce(tcet->liobn,
-  window_size,
-  &tcet->fd,
-  tcet->need_vfio);
-}
-
-if (!tcet->table) {
-size_t table_size = tcet->nb_table * sizeof(uint64_t);
-tcet->table = g_malloc0(table_size);
-}
-
-trace_spapr_iommu_new_table(tcet->liobn, tcet, tcet->table, tcet->fd);
+tcet->fd = -1;
+tcet->table = spapr_tce_alloc_table(tcet->liobn,
+tcet->page_shift,
+tcet->nb_table,
+&tcet->fd,
+tcet->need_vfio);
 
 memory_region_init_iommu(&tcet->iommu, OBJECT(dev), &spapr_iommu_ops,
  "iommu-spapr",
@@ -241,11 +264,8 @@ static void spapr_tce_table_unrealize(DeviceState *dev, 
Error **errp)
 
 QLIST_REMOVE(tcet, list);
 
-if (!kvm_enabled() ||
-(kvmppc_remove_spapr_tce(tcet->table, tcet->fd,
- tcet->nb_table) != 0)) {
-g_free(tcet->table);
-}
+spapr_tce_free_table(tcet->table, tcet->fd, tcet->nb_table);
+tcet->fd = -1;
 }
 
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet)
diff --git a/trace-events b/trace-events
index 075ec27..4b6ea70 100644
--- a/trace-events
+++ b/trace-events
@@ -1431,7 +1431,7 @@ spapr_iommu_pci_get(uint64_t liobn, uint64_t ioba, 
uint64_t ret, uint64_t tce) "
 spapr_iommu_pci_indirect(uint64_t liobn, uint64_t ioba, uint64_t tce, uint64_t 
iobaN, uint64_t tceN, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" 
tcelist=0x%"PRIx64" iobaN=0x%"PRIx64" tceN=0x%"PRIx64" ret=%"PRId64
 spapr_iommu_pci_stuff(uint64_t liobn, uint64_t ioba, uint64_t tce_value, 
uint64_t npages, uint64_t ret) "liobn=%"PRIx64" ioba=0x%"PRIx64" 
tcevalue=0x%"PRIx64" npages=%"PRId64" ret=%"PRId64
 spapr_iommu_xlate(uint64_t liobn, uint64_t ioba, uint64_t tce, unsigned perm, 
unsigned pgsize) "liobn=%"PRIx64" 0x%"PRIx64" -> 0x%"PRIx64" perm=%u mask=%x"
-spapr_iommu_new_table(uint64_t liobn, void *tcet, void *table, int fd) 
"liobn=%"PRIx64" tcet=%p table=%p fd=%d"
+spapr_iommu_new_table(uint64_t liobn, void *table, int fd) "liobn=%"PRIx64" 
table=%p fd=%d"
 
 # hw/ppc/ppc.c
 ppc_tb_adjust(uint64_t offs1, uint64_t offs2, int64_t diff, int64_t seconds) 
"adjusted from 0x%"PRIx64" to 0x%"PRIx64", diff %"PRId64" (%"PRId64"s)"
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 00/16] spapr: vfio: Enable Dynamic DMA windows (DDW)

2016-03-01 Thread Alexey Kardashevskiy


Each Partitionable Endpoint (IOMMU group) has an address range on a PCI bus
where devices are allowed to do DMA. These ranges are called DMA windows.
By default, there is a single DMA window, 1 or 2GB big, mapped at zero
on a PCI bus.

PAPR defines a DDW RTAS API which allows pseries guests
querying the hypervisor about DDW support and capabilities (page size mask
for now). A pseries guest may request an additional (to the default)
DMA windows using this RTAS API.
The existing pseries Linux guests request an additional window as big as
the guest RAM and map the entire guest window which effectively creates
direct mapping of the guest memory to a PCI bus.

This patchset reworks PPC64 IOMMU code and adds necessary structures
to support big windows on pseries.

This patchset is based on git://github.com/dgibson/qemu.git ,
tag ppc-for-2.6-20160229 plus just recently posted
"Allow EEH on spapr-pci-host-bridge devices" series.

The series was completely reworked so there is no changelog.


Please comment. Thanks!


Alexey Kardashevskiy (16):
  memory: Fix IOMMU replay base address
  spapr_pci: Move DMA window enablement to a helper
  spapr_iommu: Move table allocation to helpers
  spapr_iommu: Introduce "enabled" state for TCE table
  spapr_iommu: Add root memory region
  spapr_pci: Reset DMA config on PHB reset
  vfio, memory: Notify IOMMU about starting/stopping being used by VFIO
  memory: Add reporting of supported page sizes
  vfio: Generalize IOMMU memory listener
  vfio: Use different page size for different IOMMU types
  vfio: spapr: Add SPAPR IOMMU v2 support (DMA memory preregistering)
  vmstate: Define VARRAY with VMS_ALLOC
  spapr_iommu: Remove need_vfio flag from sPAPRTCETable
  spapr_pci: Add and export DMA resetting helper
  vfio: Move iova_pgsizes from container to guest IOMMU
  spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

 hw/ppc/Makefile.objs  |   1 +
 hw/ppc/spapr.c|   7 +-
 hw/ppc/spapr_iommu.c  | 187 --
 hw/ppc/spapr_pci.c| 119 +---
 hw/ppc/spapr_rtas_ddw.c   | 306 ++
 hw/ppc/spapr_vio.c|   9 +-
 hw/vfio/Makefile.objs |   1 +
 hw/vfio/common.c  | 213 ++---
 hw/vfio/prereg.c  | 138 +++
 include/exec/memory.h |  13 ++
 include/hw/pci-host/spapr.h   |  15 +++
 include/hw/ppc/spapr.h|  35 +++--
 include/hw/vfio/vfio-common.h |  16 ++-
 include/migration/vmstate.h   |  10 ++
 memory.c  |   9 ++
 trace-events  |  10 +-
 16 files changed, 960 insertions(+), 129 deletions(-)
 create mode 100644 hw/ppc/spapr_rtas_ddw.c
 create mode 100644 hw/vfio/prereg.c

-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 04/16] spapr_iommu: Introduce "enabled" state for TCE table

2016-03-01 Thread Alexey Kardashevskiy

Currently TCE tables are created once at start and their sizes never
change. We are going to change that by introducing a Dynamic DMA windows
support where DMA configuration may change during the guest execution.

This changes spapr_tce_new_table() to create an empty zero-size IOMMU
memory region (IOMMU MR). Only LIOBN is assigned by the time of creation.
It still will be called once at the owner object (VIO or PHB) creation.

This introduces an "enabled" state for TCE table objects with two
helper functions - spapr_tce_table_enable()/spapr_tce_table_disable().
- spapr_tce_table_enable() receives TCE table parameters, allocates
a guest view of the TCE table (in the user space or KVM) and
sets the correct size on the IOMMU MR.
- spapr_tce_table_disable() disposes the table and resets the IOMMU MR
size.

This changes the PHB reset handler to do the default DMA initialization
instead of spapr_phb_realize(). This does not make differenct now but
later with more than just one DMA window, we will have to remove them all
and create the default one on a system reset.

No visible change in behaviour is expected except the actual table
will be reallocated every reset. We might optimize this later.

The other way to implement this would be dynamically create/remove
the TCE table QOM objects but this would make migration impossible
as the migration code expects all QOM objects to exist at the receiver
so we have to have TCE table objects created when migration begins.

spapr_tce_table_do_enable() is separated from from spapr_tce_table_enable()
as later it will be called at the sPAPRTCETable post-migration stage when
it already has all the properties set after the migration.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_iommu.c   | 80 +++---
 hw/ppc/spapr_pci.c | 21 +
 hw/ppc/spapr_vio.c |  9 +++---
 include/hw/ppc/spapr.h | 10 +++
 4 files changed, 80 insertions(+), 40 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 8132f64..e66e128 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -174,15 +174,8 @@ static int spapr_tce_table_realize(DeviceState *dev)
 sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
 
 tcet->fd = -1;
-tcet->table = spapr_tce_alloc_table(tcet->liobn,
-tcet->page_shift,
-tcet->nb_table,
-&tcet->fd,
-tcet->need_vfio);
-
 memory_region_init_iommu(&tcet->iommu, OBJECT(dev), &spapr_iommu_ops,
- "iommu-spapr",
- (uint64_t)tcet->nb_table << tcet->page_shift);
+ "iommu-spapr", 0);
 
 QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
@@ -224,14 +217,10 @@ void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool 
need_vfio)
 tcet->table = newtable;
 }
 
-sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn,
-   uint64_t bus_offset,
-   uint32_t page_shift,
-   uint32_t nb_table,
-   bool need_vfio)
+sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, uint32_t liobn)
 {
 sPAPRTCETable *tcet;
-char tmp[64];
+char tmp[32];
 
 if (spapr_tce_find_by_liobn(liobn)) {
 fprintf(stderr, "Attempted to create TCE table with duplicate"
@@ -239,16 +228,8 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn,
 return NULL;
 }
 
-if (!nb_table) {
-return NULL;
-}
-
 tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
 tcet->liobn = liobn;
-tcet->bus_offset = bus_offset;
-tcet->page_shift = page_shift;
-tcet->nb_table = nb_table;
-tcet->need_vfio = need_vfio;
 
 snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
 object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);
@@ -258,14 +239,65 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn,
 return tcet;
 }
 
+static void spapr_tce_table_do_enable(sPAPRTCETable *tcet)
+{
+if (!tcet->nb_table) {
+return;
+}
+
+tcet->table = spapr_tce_alloc_table(tcet->liobn,
+tcet->page_shift,
+tcet->nb_table,
+&tcet->fd,
+tcet->need_vfio);
+
+memory_region_set_size(&tcet->iommu,
+   (uint64_t)tcet->nb_table << tcet->page_shift);
+
+tcet->enabled = true;
+}
+
+void spapr_tce_table_enable(sPAPRTCETable *tcet,
+uint32_t page_shift, uint64_t bus_offset,
+uint32_t nb_table, bool need_vfio)
+{
+if (tcet->enabled) {
+return;
+}
+
+tcet->bus_offset = bus_o

[Qemu-devel] [PATCH qemu v13 02/16] spapr_pci: Move DMA window enablement to a helper

2016-03-01 Thread Alexey Kardashevskiy

We are going to have multiple DMA windows soon so let's start preparing.

This adds a new helper to create a DMA window and makes use of it in
sPAPRPHBState::realize().

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_pci.c | 40 +++-
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 3d1145e..248f20a 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -803,6 +803,29 @@ static char *spapr_phb_get_loc_code(sPAPRPHBState *sphb, 
PCIDevice *pdev)
 return buf;
 }
 
+static int spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
+   uint32_t liobn, uint32_t page_shift,
+   uint64_t window_addr,
+   uint64_t window_size)
+{
+sPAPRTCETable *tcet;
+uint32_t nb_table = window_size >> page_shift;
+
+if (!nb_table) {
+return -1;
+}
+
+tcet = spapr_tce_new_table(DEVICE(sphb), liobn, window_addr,
+   page_shift, nb_table, false);
+if (!tcet) {
+return -1;
+}
+
+memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
+spapr_tce_get_iommu(tcet));
+return 0;
+}
+
 /* Macros to operate with address in OF binding to PCI */
 #define b_x(x, p, l)(((x) & ((1<<(l))-1)) << (p))
 #define b_n(x)  b_x((x), 31, 1) /* 0 if relocatable */
@@ -1228,8 +1251,6 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 int i;
 PCIBus *bus;
 uint64_t msi_window_size = 4096;
-sPAPRTCETable *tcet;
-uint32_t nb_table;
 
 if (sphb->index != (uint32_t)-1) {
 hwaddr windows_base;
@@ -1381,18 +1402,11 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 }
 }
 
-nb_table = sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT;
-tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
-   0, SPAPR_TCE_PAGE_SHIFT, nb_table, false);
-if (!tcet) {
-error_setg(errp, "Unable to create TCE table for %s",
-   sphb->dtbusname);
-return;
-}
-
 /* Register default 32bit DMA window */
-memory_region_add_subregion(&sphb->iommu_root, sphb->dma_win_addr,
-spapr_tce_get_iommu(tcet));
+if (spapr_phb_dma_window_enable(sphb, sphb->dma_liobn, 
SPAPR_TCE_PAGE_SHIFT,
+sphb->dma_win_addr, sphb->dma_win_size)) {
+error_setg(errp, "Unable to create TCE table for %s", sphb->dtbusname);
+}
 
 sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
 }
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 10/16] vfio: Use different page size for different IOMMU types

2016-03-01 Thread Alexey Kardashevskiy

The existing memory listener is called on RAM or PCI address space
which implies potentially different page size.

This uses new memory_region_iommu_get_page_sizes() for IOMMU regions
or falls back to qemu_real_host_page_size if RAM.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
* uses the smallest page size for mask as IOMMU MR can support multple
page sizes
---
 hw/vfio/common.c | 28 
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 0e67a5a..3aaa6b5 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -318,6 +318,16 @@ static hwaddr vfio_container_granularity(VFIOContainer 
*container)
 return (hwaddr)1 << ctz64(container->iova_pgsizes);
 }
 
+static hwaddr vfio_iommu_page_mask(MemoryRegion *mr)
+{
+if (memory_region_is_iommu(mr)) {
+int smallest = ffs(memory_region_iommu_get_page_sizes(mr)) - 1;
+
+return ~((1ULL << smallest) - 1);
+}
+return qemu_real_host_page_mask;
+}
+
 static void vfio_listener_region_add(VFIOMemoryListener *vlistener,
  MemoryRegionSection *section)
 {
@@ -326,6 +336,7 @@ static void vfio_listener_region_add(VFIOMemoryListener 
*vlistener,
 Int128 llend;
 void *vaddr;
 int ret;
+hwaddr page_mask = vfio_iommu_page_mask(section->mr);
 
 if (vfio_listener_skipped_section(section)) {
 trace_vfio_listener_region_add_skip(
@@ -335,16 +346,16 @@ static void vfio_listener_region_add(VFIOMemoryListener 
*vlistener,
 return;
 }
 
-if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
- (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+if (unlikely((section->offset_within_address_space & ~page_mask) !=
+ (section->offset_within_region & ~page_mask))) {
 error_report("%s received unaligned region", __func__);
 return;
 }
 
-iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
 llend = int128_make64(section->offset_within_address_space);
 llend = int128_add(llend, section->size);
-llend = int128_and(llend, int128_exts64(TARGET_PAGE_MASK));
+llend = int128_and(llend, int128_exts64(page_mask));
 
 if (int128_ge(int128_make64(iova), llend)) {
 return;
@@ -432,6 +443,7 @@ static void vfio_listener_region_del(VFIOMemoryListener 
*vlistener,
 hwaddr iova, end;
 int ret;
 MemoryRegion *iommu = NULL;
+hwaddr page_mask = vfio_iommu_page_mask(section->mr);
 
 if (vfio_listener_skipped_section(section)) {
 trace_vfio_listener_region_del_skip(
@@ -441,8 +453,8 @@ static void vfio_listener_region_del(VFIOMemoryListener 
*vlistener,
 return;
 }
 
-if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
- (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+if (unlikely((section->offset_within_address_space & ~page_mask) !=
+ (section->offset_within_region & ~page_mask))) {
 error_report("%s received unaligned region", __func__);
 return;
 }
@@ -469,9 +481,9 @@ static void vfio_listener_region_del(VFIOMemoryListener 
*vlistener,
  */
 }
 
-iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+iova = ROUND_UP(section->offset_within_address_space, ~page_mask + 1);
 end = (section->offset_within_address_space + int128_get64(section->size)) 
&
-  TARGET_PAGE_MASK;
+  page_mask;
 
 if (iova >= end) {
 return;
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 01/16] memory: Fix IOMMU replay base address

2016-03-01 Thread Alexey Kardashevskiy

Since a788f227 "memory: Allow replay of IOMMU mapping notifications"
when new VFIO listener is added, all existing IOMMU mappings are
replayed. However there is a problem that the base address of
an IOMMU memory region (IOMMU MR) is ignored which is not a problem
for the existing user (which is pseries) with its default 32bit DMA
window starting at 0 but it is if there is another DMA window.

This stores the IOMMU's offset_within_address_space and adjusts
the IOVA before calling vfio_dma_map/vfio_dma_unmap.

As the IOMMU notifier expects IOVA offset rather than the absolute
address, this also adjusts IOVA in sPAPR H_PUT_TCE handler before
calling notifier(s).

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_iommu.c  |  2 +-
 hw/vfio/common.c  | 14 --
 include/hw/vfio/vfio-common.h |  1 +
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 7dd4588..277f289 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -277,7 +277,7 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, 
target_ulong ioba,
 tcet->table[index] = tce;
 
 entry.target_as = &address_space_memory,
-entry.iova = ioba & page_mask;
+entry.iova = (ioba - tcet->bus_offset) & page_mask;
 entry.translated_addr = tce & page_mask;
 entry.addr_mask = ~page_mask;
 entry.perm = spapr_tce_iommu_access_flags(tce);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 55e87d3..9bf4c3b 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -257,14 +257,14 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
 VFIOGuestIOMMU *giommu = container_of(n, VFIOGuestIOMMU, n);
 VFIOContainer *container = giommu->container;
 IOMMUTLBEntry *iotlb = data;
+hwaddr iova = iotlb->iova + giommu->offset_within_address_space;
 MemoryRegion *mr;
 hwaddr xlat;
 hwaddr len = iotlb->addr_mask + 1;
 void *vaddr;
 int ret;
 
-trace_vfio_iommu_map_notify(iotlb->iova,
-iotlb->iova + iotlb->addr_mask);
+trace_vfio_iommu_map_notify(iova, iova + iotlb->addr_mask);
 
 /*
  * The IOMMU TLB entry we have just covers translation through
@@ -291,21 +291,21 @@ static void vfio_iommu_map_notify(Notifier *n, void *data)
 
 if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
 vaddr = memory_region_get_ram_ptr(mr) + xlat;
-ret = vfio_dma_map(container, iotlb->iova,
+ret = vfio_dma_map(container, iova,
iotlb->addr_mask + 1, vaddr,
!(iotlb->perm & IOMMU_WO) || mr->readonly);
 if (ret) {
 error_report("vfio_dma_map(%p, 0x%"HWADDR_PRIx", "
  "0x%"HWADDR_PRIx", %p) = %d (%m)",
- container, iotlb->iova,
+ container, iova,
  iotlb->addr_mask + 1, vaddr, ret);
 }
 } else {
-ret = vfio_dma_unmap(container, iotlb->iova, iotlb->addr_mask + 1);
+ret = vfio_dma_unmap(container, iova, iotlb->addr_mask + 1);
 if (ret) {
 error_report("vfio_dma_unmap(%p, 0x%"HWADDR_PRIx", "
  "0x%"HWADDR_PRIx") = %d (%m)",
- container, iotlb->iova,
+ container, iova,
  iotlb->addr_mask + 1, ret);
 }
 }
@@ -377,6 +377,8 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
  */
 giommu = g_malloc0(sizeof(*giommu));
 giommu->iommu = section->mr;
+giommu->offset_within_address_space =
+section->offset_within_address_space;
 giommu->container = container;
 giommu->n.notify = vfio_iommu_map_notify;
 QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index f037f3c..9ffa681 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -80,6 +80,7 @@ typedef struct VFIOContainer {
 typedef struct VFIOGuestIOMMU {
 VFIOContainer *container;
 MemoryRegion *iommu;
+hwaddr offset_within_address_space;
 Notifier n;
 QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
 } VFIOGuestIOMMU;
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 06/16] spapr_pci: Reset DMA config on PHB reset

2016-03-01 Thread Alexey Kardashevskiy

LoPAPR dictates that during system reset all DMA windows must be removed
and the default DMA32 window must be created so does the patch.

At the moment there is just one window supported so no change in
behaviour is expected.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_iommu.c   |  2 +-
 hw/ppc/spapr_pci.c | 29 +++--
 include/hw/ppc/spapr.h |  1 +
 3 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index ba9ddbb..8a88a74 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -279,7 +279,7 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
 spapr_tce_table_do_enable(tcet);
 }
 
-static void spapr_tce_table_disable(sPAPRTCETable *tcet)
+void spapr_tce_table_disable(sPAPRTCETable *tcet)
 {
 if (!tcet->enabled) {
 return;
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 7b40687..ee0fecf 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -825,6 +825,19 @@ static int spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
 return 0;
 }
 
+static int spapr_phb_dma_window_disable(sPAPRPHBState *sphb, uint32_t liobn)
+{
+sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
+
+if (!tcet) {
+return -1;
+}
+
+spapr_tce_table_disable(tcet);
+
+return 0;
+}
+
 /* Macros to operate with address in OF binding to PCI */
 #define b_x(x, p, l)(((x) & ((1<<(l))-1)) << (p))
 #define b_n(x)  b_x((x), 31, 1) /* 0 if relocatable */
@@ -1412,12 +1425,6 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 memory_region_add_subregion(&sphb->iommu_root, 0,
 spapr_tce_get_iommu(tcet));
 
-/* Register default 32bit DMA window */
-spapr_phb_dma_window_enable(sphb, sphb->dma_liobn,
-SPAPR_TCE_PAGE_SHIFT,
-sphb->dma_win_addr,
-sphb->dma_win_size);
-
 sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
 }
 
@@ -1434,6 +1441,16 @@ static int spapr_phb_children_reset(Object *child, void 
*opaque)
 
 static void spapr_phb_reset(DeviceState *qdev)
 {
+sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
+
+spapr_phb_dma_window_disable(sphb, sphb->dma_liobn);
+
+/* Register default 32bit DMA window */
+spapr_phb_dma_window_enable(sphb, sphb->dma_liobn,
+SPAPR_TCE_PAGE_SHIFT,
+sphb->dma_win_addr,
+sphb->dma_win_size);
+
 /* Reset the IOMMU state */
 object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
 
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index bdf27ec..8aa0c45 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -571,6 +571,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn);
 void spapr_tce_table_enable(sPAPRTCETable *tcet,
 uint32_t page_shift, uint64_t bus_offset,
 uint32_t nb_table, bool vfio_accel);
+void spapr_tce_table_disable(sPAPRTCETable *tcet);
 void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool need_vfio);
 
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet);
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 15/16] vfio: Move iova_pgsizes from container to guest IOMMU

2016-03-01 Thread Alexey Kardashevskiy

The page size is an attribute of an IOMMU, not a container as a container
may contain more just one IOMMU.

This moves iova_pgsizes from VFIOContainer to VFIOGuestIOMMU.
The following patch will use this.

This removes iova_pgsizes from Type1 IOMMU as it is not used there anyway
and when it will get guest visible IOMMU, it will use VFIOGuestIOMMU's
iova_pgsizes.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/vfio/common.c  | 16 
 include/hw/vfio/vfio-common.h |  2 +-
 2 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index f2a03e0..42ef1eb 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -313,9 +313,9 @@ out:
 rcu_read_unlock();
 }
 
-static hwaddr vfio_container_granularity(VFIOContainer *container)
+static hwaddr vfio_container_granularity(VFIOGuestIOMMU *giommu)
 {
-return (hwaddr)1 << ctz64(container->iova_pgsizes);
+return (hwaddr)1 << ctz64(giommu->iova_pgsizes);
 }
 
 static hwaddr vfio_iommu_page_mask(MemoryRegion *mr)
@@ -392,12 +392,13 @@ static void vfio_listener_region_add(VFIOMemoryListener 
*vlistener,
 section->offset_within_address_space;
 giommu->container = container;
 giommu->n.notify = vfio_iommu_map_notify;
+giommu->iova_pgsizes = 
section->mr->iommu_ops->get_page_sizes(section->mr);
 QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 
 memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
 giommu->iommu->iommu_ops->vfio_notify(section->mr, true);
 memory_region_iommu_replay(giommu->iommu, &giommu->n,
-   vfio_container_granularity(container),
+   vfio_container_granularity(giommu),
false);
 
 return;
@@ -743,14 +744,8 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
 container->min_iova = 0;
 container->max_iova = (hwaddr)-1;
 
-/* Assume just 4K IOVA page size */
-container->iova_pgsizes = 0x1000;
 info.argsz = sizeof(info);
 ret = ioctl(fd, VFIO_IOMMU_GET_INFO, &info);
-/* Ignore errors */
-if ((ret == 0) && (info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
-container->iova_pgsizes = info.iova_pgsizes;
-}
 } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU) ||
ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU)) {
 struct vfio_iommu_spapr_tce_info info;
@@ -811,9 +806,6 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
 }
 container->min_iova = info.dma32_window_start;
 container->max_iova = container->min_iova + info.dma32_window_size - 1;
-
-/* Assume just 4K IOVA pages for now */
-container->iova_pgsizes = 0x1000;
 } else {
 error_report("vfio: No available IOMMU models");
 ret = -EINVAL;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index bcbc5cb..48a1d7f 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -80,7 +80,6 @@ typedef struct VFIOContainer {
  * future
  */
 hwaddr min_iova, max_iova;
-uint64_t iova_pgsizes;
 QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
 QLIST_HEAD(, VFIOGroup) group_list;
 QLIST_ENTRY(VFIOContainer) next;
@@ -90,6 +89,7 @@ typedef struct VFIOGuestIOMMU {
 VFIOContainer *container;
 MemoryRegion *iommu;
 hwaddr offset_within_address_space;
+uint64_t iova_pgsizes;
 Notifier n;
 QLIST_ENTRY(VFIOGuestIOMMU) giommu_next;
 } VFIOGuestIOMMU;
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 14/16] spapr_pci: Add and export DMA resetting helper

2016-03-01 Thread Alexey Kardashevskiy

This will be later used by the "ibm,reset-pe-dma-window" RTAS handler
which resets the DMA configuration to the defaults.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_pci.c  | 11 ---
 include/hw/pci-host/spapr.h |  2 ++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index b0cd148..4c6e687 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1441,10 +1441,8 @@ static int spapr_phb_children_reset(Object *child, void 
*opaque)
 return 0;
 }
 
-static void spapr_phb_reset(DeviceState *qdev)
+void spapr_phb_dma_reset(sPAPRPHBState *sphb)
 {
-sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
-
 spapr_phb_dma_window_disable(sphb, sphb->dma_liobn);
 
 /* Register default 32bit DMA window */
@@ -1452,6 +1450,13 @@ static void spapr_phb_reset(DeviceState *qdev)
 SPAPR_TCE_PAGE_SHIFT,
 sphb->dma_win_addr,
 sphb->dma_win_size);
+}
+
+static void spapr_phb_reset(DeviceState *qdev)
+{
+sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(qdev);
+
+spapr_phb_dma_reset(sphb);
 
 /* Reset the IOMMU state */
 object_child_foreach(OBJECT(qdev), spapr_phb_children_reset, NULL);
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 03ee006..7848366 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -147,4 +147,6 @@ static inline void spapr_phb_vfio_reset(DeviceState *qdev)
 }
 #endif
 
+void spapr_phb_dma_reset(sPAPRPHBState *sphb);
+
 #endif /* __HW_SPAPR_PCI_H__ */
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 09/16] vfio: Generalize IOMMU memory listener

2016-03-01 Thread Alexey Kardashevskiy

At the moment VFIOContainer uses one memory listener which listens on
PCI address space for both Type1 and sPAPR IOMMUs. Soon we will need
another listener to listen on RAM; this will do DMA memory
pre-registration for sPAPR guests which basically pins all guest
pages in the host physical RAM.

This introduces VFIOMemoryListener which is wrapper for MemoryListener
and stores a pointer to the container. This allows having multiple
memory listeners for the same container. This replaces the existing
@listener with @iommu_listener.

This should cause no change in behavior.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/vfio/common.c  | 41 +++--
 include/hw/vfio/vfio-common.h |  9 -
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ca3fd47..0e67a5a 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -318,10 +318,10 @@ static hwaddr vfio_container_granularity(VFIOContainer 
*container)
 return (hwaddr)1 << ctz64(container->iova_pgsizes);
 }
 
-static void vfio_listener_region_add(MemoryListener *listener,
+static void vfio_listener_region_add(VFIOMemoryListener *vlistener,
  MemoryRegionSection *section)
 {
-VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+VFIOContainer *container = vlistener->container;
 hwaddr iova, end;
 Int128 llend;
 void *vaddr;
@@ -425,10 +425,10 @@ fail:
 }
 }
 
-static void vfio_listener_region_del(MemoryListener *listener,
+static void vfio_listener_region_del(VFIOMemoryListener *vlistener,
  MemoryRegionSection *section)
 {
-VFIOContainer *container = container_of(listener, VFIOContainer, listener);
+VFIOContainer *container = vlistener->container;
 hwaddr iova, end;
 int ret;
 MemoryRegion *iommu = NULL;
@@ -492,14 +492,33 @@ static void vfio_listener_region_del(MemoryListener 
*listener,
 }
 }
 
-static const MemoryListener vfio_memory_listener = {
-.region_add = vfio_listener_region_add,
-.region_del = vfio_listener_region_del,
+static void vfio_iommu_listener_region_add(MemoryListener *listener,
+   MemoryRegionSection *section)
+{
+VFIOMemoryListener *vlistener = container_of(listener, VFIOMemoryListener,
+ listener);
+
+vfio_listener_region_add(vlistener, section);
+}
+
+
+static void vfio_iommu_listener_region_del(MemoryListener *listener,
+   MemoryRegionSection *section)
+{
+VFIOMemoryListener *vlistener = container_of(listener, VFIOMemoryListener,
+ listener);
+
+vfio_listener_region_del(vlistener, section);
+}
+
+static const MemoryListener vfio_iommu_listener = {
+.region_add = vfio_iommu_listener_region_add,
+.region_del = vfio_iommu_listener_region_del,
 };
 
 static void vfio_listener_release(VFIOContainer *container)
 {
-memory_listener_unregister(&container->listener);
+memory_listener_unregister(&container->iommu_listener.listener);
 }
 
 int vfio_mmap_region(Object *obj, VFIORegion *region,
@@ -768,9 +787,11 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
 goto free_container_exit;
 }
 
-container->listener = vfio_memory_listener;
+container->iommu_listener.container = container;
+container->iommu_listener.listener = vfio_iommu_listener;
 
-memory_listener_register(&container->listener, container->space->as);
+memory_listener_register(&container->iommu_listener.listener,
+ container->space->as);
 
 if (container->error) {
 ret = container->error;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 9ffa681..b6b736c 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -57,12 +57,19 @@ typedef struct VFIOAddressSpace {
 QLIST_ENTRY(VFIOAddressSpace) list;
 } VFIOAddressSpace;
 
+typedef struct VFIOContainer VFIOContainer;
+
+typedef struct VFIOMemoryListener {
+struct MemoryListener listener;
+VFIOContainer *container;
+} VFIOMemoryListener;
+
 struct VFIOGroup;
 
 typedef struct VFIOContainer {
 VFIOAddressSpace *space;
 int fd; /* /dev/vfio/vfio, empowered by the attached groups */
-MemoryListener listener;
+VFIOMemoryListener iommu_listener;
 int error;
 bool initialized;
 /*
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 12/16] vmstate: Define VARRAY with VMS_ALLOC

2016-03-01 Thread Alexey Kardashevskiy

This allows dynamic allocation for migrating arrays.

Already existing VMSTATE_VARRAY_UINT32 requires an array to be
pre-allocated, however there are cases when the size is not known in
advance and there is no real need to enforce it.

This defines another variant of VMSTATE_VARRAY_UINT32 with WMS_ALLOC
flag which tells the receiving side to allocate memory for the array
before receiving the data.

The first user of it is a dynamic DMA window which existence and size
are totally dynamic.

Signed-off-by: Alexey Kardashevskiy 
Reviewed-by: David Gibson 
Reviewed-by: Thomas Huth 
---
 include/migration/vmstate.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 84ee355..1622638 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -386,6 +386,16 @@ extern const VMStateInfo vmstate_info_bitmap;
 .offset = vmstate_offset_pointer(_state, _field, _type), \
 }
 
+#define VMSTATE_VARRAY_UINT32_ALLOC(_field, _state, _field_num, _version, 
_info, _type) {\
+.name   = (stringify(_field)),   \
+.version_id = (_version),\
+.num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
+.info   = &(_info),  \
+.size   = sizeof(_type), \
+.flags  = VMS_VARRAY_UINT32|VMS_POINTER|VMS_ALLOC,   \
+.offset = vmstate_offset_pointer(_state, _field, _type), \
+}
+
 #define VMSTATE_VARRAY_UINT16_UNSAFE(_field, _state, _field_num, _version, 
_info, _type) {\
 .name   = (stringify(_field)),   \
 .version_id = (_version),\
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 07/16] vfio, memory: Notify IOMMU about starting/stopping being used by VFIO

2016-03-01 Thread Alexey Kardashevskiy

This adds a vfio_votify() callback to inform an IOMMU (and then its owner)
that VFIO started using the IOMMU. This is used by the pseries machine to
enable/disable in-kernel acceleration of TCE hypercalls.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_iommu.c   |  9 +
 hw/ppc/spapr_pci.c | 14 --
 hw/vfio/common.c   |  7 +++
 include/exec/memory.h  |  2 ++
 include/hw/ppc/spapr.h |  4 
 5 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 8a88a74..67a8356 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -136,6 +136,13 @@ static IOMMUTLBEntry 
spapr_tce_translate_iommu(MemoryRegion *iommu, hwaddr addr,
 return ret;
 }
 
+static int spapr_tce_vfio_notify(MemoryRegion *iommu, bool attached)
+{
+sPAPRTCETable *tcet = container_of(iommu, sPAPRTCETable, iommu);
+
+return spapr_tce_vfio_notify_owner(tcet->owner, tcet, attached);
+}
+
 static int spapr_tce_table_post_load(void *opaque, int version_id)
 {
 sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
@@ -167,6 +174,7 @@ static const VMStateDescription vmstate_spapr_tce_table = {
 
 static MemoryRegionIOMMUOps spapr_iommu_ops = {
 .translate = spapr_tce_translate_iommu,
+.vfio_notify = spapr_tce_vfio_notify,
 };
 
 static int spapr_tce_table_realize(DeviceState *dev)
@@ -235,6 +243,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn)
 
 tcet = SPAPR_TCE_TABLE(object_new(TYPE_SPAPR_TCE_TABLE));
 tcet->liobn = liobn;
+tcet->owner = owner;
 
 snprintf(tmp, sizeof(tmp), "tce-table-%x", liobn);
 object_property_add_child(OBJECT(owner), tmp, OBJECT(tcet), NULL);
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index ee0fecf..b0cd148 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1084,6 +1084,14 @@ static int spapr_populate_pci_child_dt(PCIDevice *dev, 
void *fdt, int offset,
 return 0;
 }
 
+int spapr_tce_vfio_notify_owner(DeviceState *dev, sPAPRTCETable *tcet,
+bool attached)
+{
+spapr_tce_set_need_vfio(tcet, attached);
+
+return 0;
+}
+
 /* create OF node for pci device and required OF DT properties */
 static int spapr_create_pci_child_dt(sPAPRPHBState *phb, PCIDevice *dev,
  void *fdt, int node_offset)
@@ -1118,12 +1126,6 @@ static void spapr_phb_add_pci_device(sPAPRDRConnector 
*drc,
 void *fdt = NULL;
 int fdt_start_offset = 0, fdt_size;
 
-if (object_dynamic_cast(OBJECT(pdev), "vfio-pci")) {
-sPAPRTCETable *tcet = spapr_tce_find_by_liobn(phb->dma_liobn);
-
-spapr_tce_set_need_vfio(tcet, true);
-}
-
 if (dev->hotplugged) {
 fdt = create_device_tree(&fdt_size);
 fdt_start_offset = spapr_create_pci_child_dt(phb, pdev, fdt, 0);
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9bf4c3b..ca3fd47 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -384,6 +384,7 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
 QLIST_INSERT_HEAD(&container->giommu_list, giommu, giommu_next);
 
 memory_region_register_iommu_notifier(giommu->iommu, &giommu->n);
+giommu->iommu->iommu_ops->vfio_notify(section->mr, true);
 memory_region_iommu_replay(giommu->iommu, &giommu->n,
vfio_container_granularity(container),
false);
@@ -430,6 +431,7 @@ static void vfio_listener_region_del(MemoryListener 
*listener,
 VFIOContainer *container = container_of(listener, VFIOContainer, listener);
 hwaddr iova, end;
 int ret;
+MemoryRegion *iommu = NULL;
 
 if (vfio_listener_skipped_section(section)) {
 trace_vfio_listener_region_del_skip(
@@ -451,6 +453,7 @@ static void vfio_listener_region_del(MemoryListener 
*listener,
 QLIST_FOREACH(giommu, &container->giommu_list, giommu_next) {
 if (giommu->iommu == section->mr) {
 memory_region_unregister_iommu_notifier(&giommu->n);
+iommu = giommu->iommu;
 QLIST_REMOVE(giommu, giommu_next);
 g_free(giommu);
 break;
@@ -483,6 +486,10 @@ static void vfio_listener_region_del(MemoryListener 
*listener,
  "0x%"HWADDR_PRIx") = %d (%m)",
  container, iova, end - iova, ret);
 }
+
+if (iommu && iommu->iommu_ops && iommu->iommu_ops->vfio_notify) {
+iommu->iommu_ops->vfio_notify(section->mr, false);
+}
 }
 
 static const MemoryListener vfio_memory_listener = {
diff --git a/include/exec/memory.h b/include/exec/memory.h
index d5284c2..9f82629 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -150,6 +150,8 @@ typedef struct MemoryRegionIOMMUOps MemoryRegionIOMMUOps;
 struct MemoryRegionIOMMUOps {
 /* Return a TLB entry that contains a given address. */
 IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr,

[Qemu-devel] [PATCH qemu v13 13/16] spapr_iommu: Remove need_vfio flag from sPAPRTCETable

2016-03-01 Thread Alexey Kardashevskiy

sPAPRTCETable has a need_vfio flag which is passed to
kvmppc_create_spapr_tce() and controls whether to create a guest view
table in KVM as this depends on the host kernel ability to accelerate
H_PUT_TCE for VFIO devices. We would set this flag at the moment
when sPAPRTCETable is created in spapr_tce_new_table() and
use when the table is allocated in spapr_tce_table_realize().

Now we explicitly enable/disable DMA windows via spapr_tce_table_enable()
and spapr_tce_table_disable() and can pass this flag directly without
caching it in sPAPRTCETable.

This removes the flag. This should cause no behavioural change.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_iommu.c   | 13 +
 include/hw/ppc/spapr.h |  1 -
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 4c52cf4..8aa2238 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -210,8 +210,9 @@ void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool 
need_vfio)
 {
 size_t table_size = tcet->nb_table * sizeof(uint64_t);
 void *newtable;
+bool tcet_can_vfio = tcet->fd < 0;
 
-if (need_vfio == tcet->need_vfio) {
+if (need_vfio == tcet_can_vfio) {
 /* Nothing to do */
 return;
 }
@@ -222,8 +223,6 @@ void spapr_tce_set_need_vfio(sPAPRTCETable *tcet, bool 
need_vfio)
 return;
 }
 
-tcet->need_vfio = true;
-
 if (tcet->fd < 0) {
 /* Table is already in userspace, nothing to be do */
 return;
@@ -261,7 +260,7 @@ sPAPRTCETable *spapr_tce_new_table(DeviceState *owner, 
uint32_t liobn)
 return tcet;
 }
 
-static void spapr_tce_table_do_enable(sPAPRTCETable *tcet)
+static void spapr_tce_table_do_enable(sPAPRTCETable *tcet, bool need_vfio)
 {
 if (!tcet->nb_table) {
 return;
@@ -271,7 +270,7 @@ static void spapr_tce_table_do_enable(sPAPRTCETable *tcet)
 tcet->page_shift,
 tcet->nb_table,
 &tcet->fd,
-tcet->need_vfio);
+need_vfio);
 
 memory_region_set_size(&tcet->iommu,
(uint64_t)tcet->nb_table << tcet->page_shift);
@@ -291,9 +290,8 @@ void spapr_tce_table_enable(sPAPRTCETable *tcet,
 tcet->bus_offset = bus_offset;
 tcet->page_shift = page_shift;
 tcet->nb_table = nb_table;
-tcet->need_vfio = need_vfio;
 
-spapr_tce_table_do_enable(tcet);
+spapr_tce_table_do_enable(tcet, need_vfio);
 }
 
 void spapr_tce_table_disable(sPAPRTCETable *tcet)
@@ -312,7 +310,6 @@ void spapr_tce_table_disable(sPAPRTCETable *tcet)
 tcet->bus_offset = 0;
 tcet->page_shift = 0;
 tcet->nb_table = 0;
-tcet->need_vfio = false;
 }
 
 static void spapr_tce_table_unrealize(DeviceState *dev, Error **errp)
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 5d2f8f4..505cb3a 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -546,7 +546,6 @@ struct sPAPRTCETable {
 uint32_t page_shift;
 uint64_t *table;
 bool bypass;
-bool need_vfio;
 int fd;
 MemoryRegion root, iommu;
 struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 05/16] spapr_iommu: Add root memory region

2016-03-01 Thread Alexey Kardashevskiy

We are going to have multiple DMA windows at different offsets on
a PCI bus. For the sake of migration, we will have as many TCE table
objects pre-created as many windows supported.
So we need a way to map windows dynamically onto a PCI bus
when migration of a table is completed but at this stage a TCE table
object does not have access to a PHB to ask it to map a DMA window
backed by just migrated TCE table.

This adds a "root" memory region (UINT64_MAX long) to the TCE object.
This new region is mapped on a PCI bus with enabled overlapping as
there will be one root MR per TCE table, each of them mapped at 0.
The actual IOMMU memory region is a subregion of the root region and
a TCE table enables/disables this subregion and maps it at
the specific offset inside the root MR which is 1:1 mapping of
a PCI address space.

Signed-off-by: Alexey Kardashevskiy 
Reviewed-by: David Gibson 
Reviewed-by: Thomas Huth 
---
 hw/ppc/spapr_iommu.c   | 13 ++---
 hw/ppc/spapr_pci.c |  5 +++--
 include/hw/ppc/spapr.h |  2 +-
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index e66e128..ba9ddbb 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -172,10 +172,15 @@ static MemoryRegionIOMMUOps spapr_iommu_ops = {
 static int spapr_tce_table_realize(DeviceState *dev)
 {
 sPAPRTCETable *tcet = SPAPR_TCE_TABLE(dev);
+Object *tcetobj = OBJECT(tcet);
+char tmp[32];
 
 tcet->fd = -1;
-memory_region_init_iommu(&tcet->iommu, OBJECT(dev), &spapr_iommu_ops,
- "iommu-spapr", 0);
+snprintf(tmp, sizeof(tmp), "tce-root-%x", tcet->liobn);
+memory_region_init(&tcet->root, tcetobj, tmp, UINT64_MAX);
+
+snprintf(tmp, sizeof(tmp), "tce-iommu-%x", tcet->liobn);
+memory_region_init_iommu(&tcet->iommu, tcetobj, &spapr_iommu_ops, tmp, 0);
 
 QLIST_INSERT_HEAD(&spapr_tce_tables, tcet, list);
 
@@ -253,6 +258,7 @@ static void spapr_tce_table_do_enable(sPAPRTCETable *tcet)
 
 memory_region_set_size(&tcet->iommu,
(uint64_t)tcet->nb_table << tcet->page_shift);
+memory_region_add_subregion(&tcet->root, tcet->bus_offset, &tcet->iommu);
 
 tcet->enabled = true;
 }
@@ -279,6 +285,7 @@ static void spapr_tce_table_disable(sPAPRTCETable *tcet)
 return;
 }
 
+memory_region_del_subregion(&tcet->root, &tcet->iommu);
 memory_region_set_size(&tcet->iommu, 0);
 
 spapr_tce_free_table(tcet->table, tcet->fd, tcet->nb_table);
@@ -302,7 +309,7 @@ static void spapr_tce_table_unrealize(DeviceState *dev, 
Error **errp)
 
 MemoryRegion *spapr_tce_get_iommu(sPAPRTCETable *tcet)
 {
-return &tcet->iommu;
+return &tcet->root;
 }
 
 static void spapr_tce_reset(DeviceState *dev)
diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index c34a906..7b40687 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -822,8 +822,6 @@ static int spapr_phb_dma_window_enable(sPAPRPHBState *sphb,
 
 spapr_tce_table_enable(tcet, page_shift, window_addr, nb_table, false);
 
-memory_region_add_subregion(&sphb->iommu_root, tcet->bus_offset,
-spapr_tce_get_iommu(tcet));
 return 0;
 }
 
@@ -1411,6 +1409,9 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 return;
 }
 
+memory_region_add_subregion(&sphb->iommu_root, 0,
+spapr_tce_get_iommu(tcet));
+
 /* Register default 32bit DMA window */
 spapr_phb_dma_window_enable(sphb, sphb->dma_liobn,
 SPAPR_TCE_PAGE_SHIFT,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 3e6bb84..bdf27ec 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -548,7 +548,7 @@ struct sPAPRTCETable {
 bool bypass;
 bool need_vfio;
 int fd;
-MemoryRegion iommu;
+MemoryRegion root, iommu;
 struct VIOsPAPRDevice *vdev; /* for @bypass migration compatibility only */
 QLIST_ENTRY(sPAPRTCETable) list;
 };
-- 
2.5.0.rc3

[Qemu-devel] [PATCH qemu v13 11/16] vfio: spapr: Add SPAPR IOMMU v2 support (DMA memory preregistering)

2016-03-01 Thread Alexey Kardashevskiy

This makes use of the new "memory registering" feature. The idea is
to provide the userspace ability to notify the host kernel about pages
which are going to be used for DMA. Having this information, the host
kernel can pin them all once per user process, do locked pages
accounting (once) and not spent time on doing that in real time with
possible failures which cannot be handled nicely in some cases.

This adds a prereg memory listener which listens on address_space_memory
and notifies a VFIO container about memory which needs to be
pinned/unpinned. VFIO MMIO regions (i.e. "skip dump" regions) are skipped.

As there is no per-IOMMU-type release() callback anymore, this stores
the IOMMU type in the container so vfio_listener_release() can device
if it needs to unregister @prereg_listener.

The feature is only enabled for SPAPR IOMMU v2. The host kernel changes
are required. Since v2 does not need/support VFIO_IOMMU_ENABLE, this does
not call it when v2 is detected and enabled.

This does not change the guest visible interface.

Signed-off-by: Alexey Kardashevskiy 
---
 hw/vfio/Makefile.objs |   1 +
 hw/vfio/common.c  |  39 +---
 hw/vfio/prereg.c  | 138 ++
 include/hw/vfio/vfio-common.h |   4 ++
 trace-events  |   2 +
 5 files changed, 175 insertions(+), 9 deletions(-)
 create mode 100644 hw/vfio/prereg.c

diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
index ceddbb8..5800e0e 100644
--- a/hw/vfio/Makefile.objs
+++ b/hw/vfio/Makefile.objs
@@ -4,4 +4,5 @@ obj-$(CONFIG_PCI) += pci.o pci-quirks.o
 obj-$(CONFIG_SOFTMMU) += platform.o
 obj-$(CONFIG_SOFTMMU) += calxeda-xgmac.o
 obj-$(CONFIG_SOFTMMU) += amd-xgbe.o
+obj-$(CONFIG_SOFTMMU) += prereg.o
 endif
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 3aaa6b5..f2a03e0 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -531,6 +531,9 @@ static const MemoryListener vfio_iommu_listener = {
 static void vfio_listener_release(VFIOContainer *container)
 {
 memory_listener_unregister(&container->iommu_listener.listener);
+if (container->iommu_type == VFIO_SPAPR_TCE_v2_IOMMU) {
+memory_listener_unregister(&container->prereg_listener.listener);
+}
 }
 
 int vfio_mmap_region(Object *obj, VFIORegion *region,
@@ -722,8 +725,8 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
 goto free_container_exit;
 }
 
-ret = ioctl(fd, VFIO_SET_IOMMU,
-v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU);
+container->iommu_type = v2 ? VFIO_TYPE1v2_IOMMU : VFIO_TYPE1_IOMMU;
+ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
 if (ret) {
 error_report("vfio: failed to set iommu for container: %m");
 ret = -errno;
@@ -748,8 +751,10 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
 if ((ret == 0) && (info.flags & VFIO_IOMMU_INFO_PGSIZES)) {
 container->iova_pgsizes = info.iova_pgsizes;
 }
-} else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
+} else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU) ||
+   ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU)) {
 struct vfio_iommu_spapr_tce_info info;
+bool v2 = !!ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_v2_IOMMU);
 
 ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
 if (ret) {
@@ -757,7 +762,9 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
 ret = -errno;
 goto free_container_exit;
 }
-ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU);
+container->iommu_type =
+v2 ? VFIO_SPAPR_TCE_v2_IOMMU : VFIO_SPAPR_TCE_IOMMU;
+ret = ioctl(fd, VFIO_SET_IOMMU, container->iommu_type);
 if (ret) {
 error_report("vfio: failed to set iommu for container: %m");
 ret = -errno;
@@ -769,11 +776,25 @@ static int vfio_connect_container(VFIOGroup *group, 
AddressSpace *as)
  * when container fd is closed so we do not call it explicitly
  * in this file.
  */
-ret = ioctl(fd, VFIO_IOMMU_ENABLE);
-if (ret) {
-error_report("vfio: failed to enable container: %m");
-ret = -errno;
-goto free_container_exit;
+if (!v2) {
+ret = ioctl(fd, VFIO_IOMMU_ENABLE);
+if (ret) {
+error_report("vfio: failed to enable container: %m");
+ret = -errno;
+goto free_container_exit;
+}
+} else {
+container->prereg_listener.container = container;
+container->prereg_listener.listener = vfio_prereg_listener;
+
+memory_listener_register(&container->prereg_listener.listener,
+ &address_space_memory);
+if (container->error) {
+

[Qemu-devel] [PATCH qemu v13 16/16] spapr_pci/spapr_pci_vfio: Support Dynamic DMA Windows (DDW)

2016-03-01 Thread Alexey Kardashevskiy

This adds support for Dynamic DMA Windows (DDW) option defined by
the SPAPR specification which allows to have additional DMA window(s)

This implements DDW for emulated and VFIO devices. As all TCE root regions
are mapped at 0 and 64bit long (and actual tables are child regions),
this replaces memory_region_add_subregion() with _overlap() to make
QEMU memory API happy.

This reserves RTAS token numbers for DDW calls.

This changes the TCE table migration descriptor to support dynamic
tables as from now on, PHB will create as many stub TCE table objects
as PHB can possibly support but not all of them might be initialized at
the time of migration because DDW might or might not be requested by
the guest.

The "ddw" property is enabled by default on a PHB but for compatibility
the pseries-2.5 machine and older disable it.

This implements DDW for VFIO. The host kernel support is required.
This adds a "levels" property to PHB to control the number of levels
in the actual TCE table allocated by the host kernel, 0 is the default
value to tell QEMU to calculate the correct value. Current hardware
supports up to 5 levels.

The existing linux guests try creating one additional huge DMA window
with 64K or 16MB pages and map the entire guest RAM to. If succeeded,
the guest switches to dma_direct_ops and never calls TCE hypercalls
(H_PUT_TCE,...) again. This enables VFIO devices to use the entire RAM
and not waste time on map/unmap later. This adds a "dma64_win_addr"
property which is a bus address for the 64bit window and by default
set to 0x800... as this is what the modern POWER8 hardware
uses and this allows having emulated and VFIO devices on the same bus.

This adds 4 RTAS handlers:
* ibm,query-pe-dma-window
* ibm,create-pe-dma-window
* ibm,remove-pe-dma-window
* ibm,reset-pe-dma-window
These are registered from type_init() callback.

These RTAS handlers are implemented in a separate file to avoid polluting
spapr_iommu.c with PCI.

TODO (which I have no idea how to implement properly):
1. check the host kernel actually supports SPAPR_PCI_DMA_MAX_WINDOWS
windows and 12/16/24 page shift;
2. fix container::min_iova, max_iova - as for now, they are useless,
and I'd expect IOMMU MR boundaries to serve this purpose really;
3. vfio_listener_region_add/vfio_listener_region_del do explicitely
create/remove huge DMA window as we do not have vfio_container_ioctl()
anymore, do we want to move these to some sort of callbacks? How, where?

Signed-off-by: Alexey Kardashevskiy 

# Conflicts:
#   include/hw/pci-host/spapr.h

# Conflicts:
#   hw/vfio/common.c
---
 hw/ppc/Makefile.objs|   1 +
 hw/ppc/spapr.c  |   7 +-
 hw/ppc/spapr_iommu.c|  32 -
 hw/ppc/spapr_pci.c  |  61 +++--
 hw/ppc/spapr_rtas_ddw.c | 306 
 hw/vfio/common.c|  70 +-
 include/hw/pci-host/spapr.h |  13 ++
 include/hw/ppc/spapr.h  |  17 ++-
 trace-events|   6 +
 9 files changed, 489 insertions(+), 24 deletions(-)
 create mode 100644 hw/ppc/spapr_rtas_ddw.c

diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
index c1ffc77..986b36f 100644
--- a/hw/ppc/Makefile.objs
+++ b/hw/ppc/Makefile.objs
@@ -7,6 +7,7 @@ obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
spapr_rng.o
 ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
 obj-y += spapr_pci_vfio.o
 endif
+obj-$(CONFIG_PSERIES) += spapr_rtas_ddw.o
 # PowerPC 4xx boards
 obj-y += ppc405_boards.o ppc4xx_devs.o ppc405_uc.o ppc440_bamboo.o
 obj-y += ppc4xx_pci.o
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index e9d4abf..2473217 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2370,7 +2370,12 @@ DEFINE_SPAPR_MACHINE(2_6, "2.6", true);
  * pseries-2.5
  */
 #define SPAPR_COMPAT_2_5 \
-HW_COMPAT_2_5
+HW_COMPAT_2_5 \
+{\
+.driver   = TYPE_SPAPR_PCI_HOST_BRIDGE,\
+.property = "ddw",\
+.value= stringify(off),\
+},
 
 static void spapr_machine_2_5_instance_options(MachineState *machine)
 {
diff --git a/hw/ppc/spapr_iommu.c b/hw/ppc/spapr_iommu.c
index 8aa2238..e32f71b 100644
--- a/hw/ppc/spapr_iommu.c
+++ b/hw/ppc/spapr_iommu.c
@@ -150,6 +150,15 @@ static uint64_t spapr_tce_get_page_sizes(MemoryRegion 
*iommu)
 return 1ULL << tcet->page_shift;
 }
 
+static void spapr_tce_table_pre_save(void *opaque)
+{
+sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
+
+tcet->migtable = tcet->table;
+}
+
+static void spapr_tce_table_do_enable(sPAPRTCETable *tcet, bool vfio_accel);
+
 static int spapr_tce_table_post_load(void *opaque, int version_id)
 {
 sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
@@ -158,22 +167,39 @@ static int spapr_tce_table_post_load(void *opaque, int 
version_id)
 spapr_vio_set_bypass(tcet->vdev, tcet->bypass);
 }
 
+if (tcet->enabled) {
+if (!tcet->table) {
+tcet->enabled = false;
+/* VFIO does not migrate so

[Qemu-devel] [PATCH qemu v13 08/16] memory: Add reporting of supported page sizes

2016-03-01 Thread Alexey Kardashevskiy

Every IOMMU has some granularity which MemoryRegionIOMMUOps::translate
uses when translating, however this information is not available outside
the translate context for various checks.

This adds a get_page_sizes callback to MemoryRegionIOMMUOps and
a wrapper for it so IOMMU users (such as VFIO) can know the actual
page size(s) used by an IOMMU.

The qemu_real_host_page_mask is used as fallback.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v4:
* s/1page_shift;
+}
+
 static int spapr_tce_table_post_load(void *opaque, int version_id)
 {
 sPAPRTCETable *tcet = SPAPR_TCE_TABLE(opaque);
@@ -175,6 +182,7 @@ static const VMStateDescription vmstate_spapr_tce_table = {
 static MemoryRegionIOMMUOps spapr_iommu_ops = {
 .translate = spapr_tce_translate_iommu,
 .vfio_notify = spapr_tce_vfio_notify,
+.get_page_sizes = spapr_tce_get_page_sizes,
 };
 
 static int spapr_tce_table_realize(DeviceState *dev)
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9f82629..c34e67c 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -152,6 +152,8 @@ struct MemoryRegionIOMMUOps {
 IOMMUTLBEntry (*translate)(MemoryRegion *iommu, hwaddr addr, bool 
is_write);
 /* Called when VFIO starts/stops using this */
 int (*vfio_notify)(MemoryRegion *iommu, bool attached);
+/* Returns supported page sizes */
+uint64_t (*get_page_sizes)(MemoryRegion *iommu);
 };
 
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
@@ -576,6 +578,15 @@ static inline bool memory_region_is_iommu(MemoryRegion *mr)
 
 
 /**
+ * memory_region_iommu_get_page_sizes: get supported page sizes in an iommu
+ *
+ * Returns %bitmap of supported page sizes for an iommu.
+ *
+ * @mr: the memory region being queried
+ */
+uint64_t memory_region_iommu_get_page_sizes(MemoryRegion *mr);
+
+/**
  * memory_region_notify_iommu: notify a change in an IOMMU translation entry.
  *
  * @mr: the memory region that was changed
diff --git a/memory.c b/memory.c
index 0dd9695..5d8453d 100644
--- a/memory.c
+++ b/memory.c
@@ -1462,6 +1462,15 @@ void memory_region_notify_iommu(MemoryRegion *mr,
 notifier_list_notify(&mr->iommu_notify, &entry);
 }
 
+uint64_t memory_region_iommu_get_page_sizes(MemoryRegion *mr)
+{
+assert(memory_region_is_iommu(mr));
+if (mr->iommu_ops && mr->iommu_ops->get_page_sizes) {
+return mr->iommu_ops->get_page_sizes(mr);
+}
+return qemu_real_host_page_size;
+}
+
 void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client)
 {
 uint8_t mask = 1 << client;
-- 
2.5.0.rc3

Re: [Qemu-devel] [RFC PATCH v0 2/6] spapr: CPU core device

2016-03-01 Thread Igor Mammedov

On Tue, 1 Mar 2016 13:47:27 +0530
Bharata B Rao  wrote:

> On Mon, Feb 29, 2016 at 04:15:25PM +0100, Igor Mammedov wrote:
> > On Mon, 29 Feb 2016 18:25:25 +0530
> > Bharata B Rao  wrote:
> >   
> > > On Mon, Feb 29, 2016 at 11:03:16AM +0100, Igor Mammedov wrote:  
> > > > On Mon, 29 Feb 2016 11:20:19 +0530
> > > > Bharata B Rao  wrote:
> > > > 
> > > > > On Fri, Feb 26, 2016 at 12:13:39PM -0600, Michael Roth wrote:
> > > > > > Quoting Bharata B Rao (2016-02-25 10:22:38)  
> > > > > > > Add sPAPR specific CPU core device that is based on generic CPU 
> > > > > > > core device.
> > > > > > > Creating this core device will result in creation of all the CPU 
> > > > > > > thread
> > > > > > > devices that are part of this core.
> > > > > > > 
> > > > > > > Signed-off-by: Bharata B Rao 
> > > > > > > ---
> > > > > > >  hw/ppc/Makefile.objs|   1 +
> > > > > > >  hw/ppc/spapr_cpu_core.c | 210 
> > > > > > > 
> > > > > > >  include/hw/ppc/spapr_cpu_core.h |  32 ++
> > > > > > >  3 files changed, 243 insertions(+)
> > > > > > >  create mode 100644 hw/ppc/spapr_cpu_core.c
> > > > > > >  create mode 100644 include/hw/ppc/spapr_cpu_core.h
> > > > > > > 
> > > > > > > diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> > > > > > > index c1ffc77..5cc6608 100644
> > > > > > > --- a/hw/ppc/Makefile.objs
> > > > > > > +++ b/hw/ppc/Makefile.objs
> > > > > > > @@ -4,6 +4,7 @@ obj-y += ppc.o ppc_booke.o
> > > > > > >  obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
> > > > > > >  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o spapr_rtas.o
> > > > > > >  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
> > > > > > > spapr_rng.o
> > > > > > > +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
> > > > > > >  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
> > > > > > >  obj-y += spapr_pci_vfio.o
> > > > > > >  endif
> > > > > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > > > > > new file mode 100644
> > > > > > > index 000..c44eb61
> > > > > > > --- /dev/null
> > > > > > > +++ b/hw/ppc/spapr_cpu_core.c
> > > > > > > @@ -0,0 +1,210 @@
> > > > > > > +/*
> > > > > > > + * sPAPR CPU core device, acts as container of CPU thread 
> > > > > > > devices.
> > > > > > > + *
> > > > > > > + * Copyright (C) 2016 Bharata B Rao 
> > > > > > > + *
> > > > > > > + * This work is licensed under the terms of the GNU GPL, version 
> > > > > > > 2 or later.
> > > > > > > + * See the COPYING file in the top-level directory.
> > > > > > > + */
> > > > > > > +#include "hw/cpu/core.h"
> > > > > > > +#include "hw/ppc/spapr_cpu_core.h"
> > > > > > > +#include "hw/ppc/spapr.h"
> > > > > > > +#include "hw/boards.h"
> > > > > > > +#include "qemu/error-report.h"
> > > > > > > +#include "qapi/visitor.h"
> > > > > > > +#include 
> > > > > > > +
> > > > > > > +static int spapr_cpu_core_realize_child(Object *child, void 
> > > > > > > *opaque)
> > > > > > > +{
> > > > > > > +Error **errp = opaque;
> > > > > > > +
> > > > > > > +object_property_set_bool(child, true, "realized", errp);
> > > > > > > +if (*errp) {
> > > > > > > +return 1;
> > > > > > > +}
> > > > > > > +return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > > +static void spapr_cpu_core_realize(DeviceState *dev, Error 
> > > > > > > **errp)
> > > > > > > +{
> > > > > > > +sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
> > > > > > > +sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > > > > > > +Error *local_err = NULL;
> > > > > > > +
> > > > > > > +if (!core->nr_threads) {
> > > > > > > +error_setg(errp, "nr_threads property can't be 0");
> > > > > > > +return;
> > > > > > > +}
> > > > > > > +
> > > > > > > +if (!core->cpu_model) {
> > > > > > > +error_setg(errp, "cpu_model property isn't set");
> > > > > > > +return;
> > > > > > > +}
> > > > > > > +
> > > > > > > +/*
> > > > > > > + * TODO: If slot isn't specified, plug this core into
> > > > > > > + * an existing empty slot.
> > > > > > > + */
> > > > > > > +if (!core->slot) {
> > > > > > > +error_setg(errp, "slot property isn't set");
> > > > > > > +return;
> > > > > > > +}
> > > > > > > +
> > > > > > > +object_property_set_link(OBJECT(spapr), OBJECT(core), 
> > > > > > > core->slot,
> > > > > > > + &local_err);
> > > > > > > +if (local_err) {
> > > > > > > +error_propagate(errp, local_err);
> > > > > > > +return;
> > > > > > > +}
> > > > > > > +
> > > > > > > +object_child_foreach(OBJECT(dev), 
> > > > > > > spapr_cpu_core_realize_child, errp);
> > > > > > > +}
> > > > > > > +
> > > > > > > +/*
> > > > > > > + * This creates the CPU threads for a given @core.
> > > > > > > + *
> > > > > > > + * In order to create the threads, we need two inputs - number of
> > > > > > > + * threads and the cpu_mode

Re: [Qemu-devel] [PATCH v3 5/8] nvdimm acpi: introduce patched dsm memory

2016-03-01 Thread Xiao Guangrong




On 03/01/2016 05:08 PM, Michael S. Tsirkin wrote:

On Tue, Mar 01, 2016 at 04:53:23PM +0800, Xiao Guangrong wrote:



On 02/29/2016 05:38 PM, Michael S. Tsirkin wrote:


+/* Build NAME(, 0x) where 0x is encoded as a dword,
+ * and return the offset to 0x for runtime patching.
+ *
+ * Warning: runtime patching is best avoided. Only use this as
+ * a replacement for DataTableRegion (for guests that don't
+ * support it).
+ */
+int
+build_append_named_dword(GArray *array, const char *name_format, ...)
+{
+int offset;
+va_list ap;
+
+va_start(ap, name_format);
+build_append_namestringv(array, name_format, ap);
+va_end(ap);


The NameOP was missed here...

The idea is great and i fixed and applied it on the top this patchset, the patch
is attached, would it be good to you?



OK but I can't review this patch on top of patch.
Please split this in aml-build and nvdimm changes,
then squash the am-build change with my patch and include it
as 5/8, then append yours squashed with the nvdimm.c changes.


Okay... will do.



Rename it something that implies what it does, not it's value. Offset of
what is it?

For example
nvdimm_ssdt = table_data->len;


Yep, good to me.







-aml_append(sb_scope, mem_addr);
-aml_append(ssdt, sb_scope);
  /* copy AML table into ACPI tables blob and patch header there */
  g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
-
-offset = table_data->len - 4;
-
-/*
- * zero the last 4 bytes, i.e, it is the offset of
- * NVDIMM_ACPI_MEM_ADDR object.
- */
-g_array_remove_range(table_data, offset, 4);
-g_array_append_vals(table_data, &zero_offset, 4);
+offset = build_append_named_dword(table_data, NVDIMM_ACPI_MEM_ADDR);


Here too, please give it a better name
mem_addr_offset = ; ?


Yup, it is better.

Re: [Qemu-devel] [RFC PATCH v0 5/6] qmp, spapr: Show hot-plugged/pluggable CPU slots in the Machine

2016-03-01 Thread Bharata B Rao

On Mon, Feb 29, 2016 at 11:46:42AM +0100, Igor Mammedov wrote:
> On Thu, 25 Feb 2016 21:52:41 +0530
> Bharata B Rao  wrote:
> 
> > Implement query cpu-slots that provides information about hot-plugged
> > as well as hot-pluggable CPU slots that the machine supports.
> > 
> > TODO: As Eric suggested use enum for type instead of str.
> > TODO: @hotplug-granularity probably isn't required.
> > 
> > Signed-off-by: Bharata B Rao 
> > ---
> >  hw/core/machine.c   |  19 +
> >  hw/ppc/spapr.c  | 112 
> > 
> >  include/hw/boards.h |   4 ++
> >  qapi-schema.json|  85 +++
> >  qmp-commands.hx |  47 ++
> >  5 files changed, 267 insertions(+)
> > 
> > diff --git a/hw/core/machine.c b/hw/core/machine.c
> > index 6d1a0d8..3055ef8 100644
> > --- a/hw/core/machine.c
> > +++ b/hw/core/machine.c
> > @@ -17,6 +17,25 @@
> >  #include "hw/sysbus.h"
> >  #include "sysemu/sysemu.h"
> >  #include "qemu/error-report.h"
> > +#include "qmp-commands.h"
> > +
> > +/*
> > + * QMP: query-cpu-slots
> > + *
> > + * TODO: Ascertain if this is the right place to for this arch-neutral 
> > routine.
> > + */
> > +CPUSlotInfoList *qmp_query_cpu_slots(Error **errp)
> > +{
> > +MachineState *ms = MACHINE(qdev_get_machine());
> > +MachineClass *mc = MACHINE_GET_CLASS(ms);
> > +
> > +if (!mc->cpu_slots) {
> > +error_setg(errp, QERR_UNSUPPORTED);
> > +return NULL;
> > +}
> > +
> > +return mc->cpu_slots(ms);
> > +}
> >  
> >  static char *machine_get_accel(Object *obj, Error **errp)
> >  {
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 780cd00..b76ed85 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2453,6 +2453,117 @@ static unsigned 
> > spapr_cpu_index_to_socket_id(unsigned cpu_index)
> >  return cpu_index / smp_threads / smp_cores;
> >  }
> >  
> > +static int spapr_cpuinfo_list(Object *obj, void *opaque)
> > +{
> > +MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine());
> > +CPUInfoList ***prev = opaque;
> > +
> > +if (object_dynamic_cast(obj, TYPE_CPU)) {
> > +CPUInfoList *elem = g_new0(CPUInfoList, 1);
> > +CPUInfo *s = g_new0(CPUInfo, 1);
> > +CPUState *cpu = CPU(obj);
> > +PowerPCCPU *pcpu = POWERPC_CPU(cpu);
> > +
> > +s->arch_id = ppc_get_vcpu_dt_id(pcpu);
> > +s->type = g_strdup(object_get_typename(obj));
> > +s->thread = cpu->cpu_index;
> > +s->has_thread = true;
> > +s->core = cpu->cpu_index / smp_threads;
> > +s->has_core = true;
> > +if (mc->cpu_index_to_socket_id) {
> > +s->socket = mc->cpu_index_to_socket_id(cpu->cpu_index);
> > +} else {
> > +s->socket = cpu->cpu_index / smp_threads / smp_cores;
> > +}
> > +s->has_socket = true;
> > +s->node = cpu->numa_node;
> > +s->has_node = true;
> > +s->qom_path = object_get_canonical_path(obj);
> > +s->has_qom_path = true;
> > +
> > +elem->value = s;
> > +elem->next = NULL;
> > +**prev = elem;
> > +*prev = &elem->next;
> > +}
> > +object_child_foreach(obj, spapr_cpuinfo_list, opaque);
> > +return 0;
> > +}
> > +
> > +static CPUSlotInfoList *spapr_cpu_slots(MachineState *machine)
> > +{
> > +CPUSlotInfoList *head = NULL;
> > +CPUSlotInfoList **prev = &head;
> > +Object *root_container;
> > +ObjectProperty *prop;
> > +ObjectPropertyIterator iter;
> > +
> > +/*
> > + * TODO: There surely must be a better/easier way to walk all
> > + * the link properties of an object ?
> > + */
> > +root_container = container_get(object_get_root(), "/machine");
> > +object_property_iter_init(&iter, root_container);
> > +
> > +while ((prop = object_property_iter_next(&iter))) {
> > +Object *obj;
> > +DeviceState *dev;
> > +CPUSlotInfoList *elem;
> > +CPUSlotInfo *s;
> > +CPUInfoList *cpu_head = NULL;
> > +CPUInfoList **cpu_prev = &cpu_head;
> > +sPAPRCPUCore *core;
> > +
> > +if (!strstart(prop->type, "link<", NULL)) {
> > +continue;
> > +}
> > +
> > +if (!strstart(prop->name, SPAPR_MACHINE_CPU_CORE_PROP, NULL)) {
> > +continue;
> > +}
> > +
> > +elem = g_new0(CPUSlotInfoList, 1);
> > +s = g_new0(CPUSlotInfo, 1);
> > +
> > +obj = object_property_get_link(root_container, prop->name, NULL);
> > +if (obj) {
> > +/* Slot populated */
> > +dev = DEVICE(obj);
> > +core = SPAPR_CPU_CORE(obj);
> > +
> > +if (dev->id) {
> > +s->has_id = true;
> > +s->id = g_strdup(dev->id);
> > +}
> > +s->realized = object_property_get_bool(obj, "realized", NULL);
> > +s->nr_cpus = core->nr_threads;
> > +s->h

Re: [Qemu-devel] [RFC PATCH v0 2/6] spapr: CPU core device

2016-03-01 Thread Igor Mammedov

On Tue, 1 Mar 2016 12:21:27 +1100
David Gibson  wrote:

> On Mon, Feb 29, 2016 at 04:15:25PM +0100, Igor Mammedov wrote:
> > On Mon, 29 Feb 2016 18:25:25 +0530
> > Bharata B Rao  wrote:  
> > > On Mon, Feb 29, 2016 at 11:03:16AM +0100, Igor Mammedov wrote:  
> > > > On Mon, 29 Feb 2016 11:20:19 +0530
> > > > Bharata B Rao  wrote:  
> [snip]
> > > > > > "slot" seems intended to be a machine-agnostic of mapping device
> > > > > > types discovered from qmp_query_cpu_slots() to an appropriate
> > > > > > "bus" location, but here's it a field specific to 
> > > > > > TYPE_SPAPR_CPU_CORE.
> > > > > > It seems like maybe TYPE_CPU_CORE is a better place, but then on
> > > > > > x86 I suppose it might be TYPE_CPU_SOCKET or something instead...   
> > > > > >
> > > > > 
> > > > > Correct.
> > > > > 
> > > > > > 
> > > > > > It almost seems like a TYPE_INTERFACE_SLOTABLE would be the
> > > > > > right approach, but I don't know how we could expose that as
> > > > > > a property. I guess it's somewhat implied that this "interface"
> > > > > > exists if qmp_query_cpu_slots() returns the type, but I wonder
> > > > > > if something a bit more formal should be modeled to make the
> > > > > > implementation requirements a bit clearer.
> > > > > > 
> > > > > > Maybe have TYPE_CPU_{CORE,SOCKET} classes have a get_slot/set_slot
> > > > > > class method, expose them via "slot" property, then have the
> > > > > > defaults generate "not implemented" errors?  
> > > > > 
> > > > > Yes makes sense. In fact David has often times said that generic
> > > > > properties/routines should be pushed to base class wherever possible.
> > > > > 
> > > > > I didn't do that in this first iteration to keep the generic changes
> > > > > as minimum as possible, but yes slot should be a property of the
> > > > > base class of core or socket.
> > > > Then what will happen to slot if there isn't any core/socket device
> > > > to query it, i.e. cpu hasn't been plugged in yet?
> > > > To me slot looks like a machine belonged feature.
> > > 
> > > Yes slot belongs to the machine and it is represented by a link that
> > > is created b/n the machine object and the core object that sits in
> > > the slot.
> > > 
> > > In the context of this thread, slot is actually the slot name that
> > > identifies the machine slot which the core occupies or will occupy after
> > > hotplug. Thus slot name which is named slot here, it is a property of the
> > > core device.
> > > 
> > > (qemu) device_add spapr-cpu-core,slot=core[2]
> > >  ^  
> > Is 'slot' a term used by SPAPR on real hardware?  
> 
> So.. PAPR is a para-virtualized interface, so it never appears on real
> hardware.
> 
> But, no, "slot" is not a term used by PAPR.
> 
> > I'd thought that it's 'core', that's why I suggested to use
> > 'core' for POWER as that matched real world concept, see
> > my other reply in "[RFC PATCH v0 4/6] spapr: CPU hotplug support" thread
> > of this series.  
> 
> I don't think it uses "core" either, I believe it uses just "cpu" but
> meaning a multi-thread core, rather than a single logical cpu thread.
then calling property 'cpu' is fine or one could go by meaning and
use 'core' property (reusing 'core' property)

Re: [Qemu-devel] [PATCH] pc-dimm: fix error handling in pc_dimm_check_memdev_is_busy()

2016-03-01 Thread Igor Mammedov

On Mon, 29 Feb 2016 19:33:15 +0100
Markus Armbruster  wrote:

> Igor Mammedov  writes:
> 
> > if host_memory_backend_get_memory() were to return error and  
> 
> Start sentences with a capital letter, please.
> 
> > NULL MemoryRegion, pc_dimm_check_memdev_is_busy() would crash
> > dereferrencing null pointer in memory_region_is_mapped()  
> 
> dereferencing
> 
> >
> > Also pc_dimm_check_memdev_is_busy():error_setg() would assert
> > if caller passes NULL errp, but assert shouldn't happen as
> > the check is typically performed during hotplug.  
> 
> Huh?
Yep, this paragraph is wrong, I'll drop it.

> 
> >
> > To avoid above issues use typical error handling pattern
> > for property setters:
> >
> > Error *local_error = NULL;
> > ...
> > error_propagate(errp, local_err);
> >
> > Reported-by: Markus Armbruster   
> 
> The latent bug I reported was actually that if
> host_memory_backend_get_memory() sets an error and we then reach
> error_setg(), we fail the "error already set" assertion in error_setv()
> unless errp is null.
> 
> > Signed-off-by: Igor Mammedov 
> > ---
> >  hw/mem/pc-dimm.c | 13 ++---
> >  1 file changed, 10 insertions(+), 3 deletions(-)
> >
> > diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
> > index 650f0f8..973bf20 100644
> > --- a/hw/mem/pc-dimm.c
> > +++ b/hw/mem/pc-dimm.c
> > @@ -364,15 +364,22 @@ static void pc_dimm_check_memdev_is_busy(Object *obj, 
> > const char *name,
> >Object *val, Error **errp)
> >  {
> >  MemoryRegion *mr;
> > +Error *local_err = NULL;
> >  
> > -mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp);
> > +mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), &local_err);
> > +if (local_err) {
> > +goto out;
> > +}
> >  if (memory_region_is_mapped(mr)) {
> >  char *path = object_get_canonical_path_component(val);
> > -error_setg(errp, "can't use already busy memdev: %s", path);
> > +error_setg(&local_err, "can't use already busy memdev: %s", path);
> >  g_free(path);
> >  } else {
> > -qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
> > +qdev_prop_allow_set_link_before_realize(obj, name, val, 
> > &local_err);
> >  }
> > +
> > +out:
> > +error_propagate(errp, local_err);
> >  }
> >  
> >  static void pc_dimm_init(Object *obj)  
> 
> I'd error_propagate() + return instead of goto.  But your version isn't
> wrong, so:
> 
> Reviewed-by: Markus Armbruster 
> 
> Preferably with an improved commit message, of course :)
Thanks, I'll respin v2 with fixed commit message.

[Qemu-devel] [PATCH v2] pc-dimm: fix error handling in pc_dimm_check_memdev_is_busy()

2016-03-01 Thread Igor Mammedov

If host_memory_backend_get_memory() were to return error and
NULL MemoryRegion, pc_dimm_check_memdev_is_busy() would crash
dereferencing NULL pointer in memory_region_is_mapped().
But if error is set and non NULL MemoryRegion is returned
then error_setg() will fail with "error already set" assertion
in error_setv()

To avoid above issues use typical error handling pattern
for property setters:

Error *local_error = NULL;
...
error_propagate(errp, local_err);

Reported-by: Markus Armbruster 
Signed-off-by: Igor Mammedov 
Reviewed-by: Markus Armbruster 
---
 hw/mem/pc-dimm.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c
index 650f0f8..973bf20 100644
--- a/hw/mem/pc-dimm.c
+++ b/hw/mem/pc-dimm.c
@@ -364,15 +364,22 @@ static void pc_dimm_check_memdev_is_busy(Object *obj, 
const char *name,
   Object *val, Error **errp)
 {
 MemoryRegion *mr;
+Error *local_err = NULL;
 
-mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp);
+mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), &local_err);
+if (local_err) {
+goto out;
+}
 if (memory_region_is_mapped(mr)) {
 char *path = object_get_canonical_path_component(val);
-error_setg(errp, "can't use already busy memdev: %s", path);
+error_setg(&local_err, "can't use already busy memdev: %s", path);
 g_free(path);
 } else {
-qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
+qdev_prop_allow_set_link_before_realize(obj, name, val, &local_err);
 }
+
+out:
+error_propagate(errp, local_err);
 }
 
 static void pc_dimm_init(Object *obj)
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH v2 6/7] exec: Factor out section_covers_addr

2016-03-01 Thread Paolo Bonzini

On 01/03/2016 07:18, Fam Zheng wrote:
> +/* Memory topology clips a memory region to 2^64, size.hi >= 0 means the
> + * section must cover any addr. */

Small improvement:

/* Memory topology clips a memory region to [0, 2^64); size.hi > 0 means
 * the section must cover the entire address space.
 */

Paolo

Re: [Qemu-devel] [PATCH v2 0/7] memory: Clean up MemoryRegion.ram_addr and optimize address_space_translate

2016-03-01 Thread Paolo Bonzini



On 01/03/2016 07:18, Fam Zheng wrote:
> v2: In the optimization patch, factor out section_covers_addr() and use it.
> [Paolo, Peter]
> Check "ram_block == NULL" in patch 3. [Gonglei]
> Add Gonglei's rev-by in patches 1, 2, 4 and 5.
> 
> The first four patches drop ram_addr from MemoryRegion on top of Gonglei's
> optimization.
> 
> The next patch simplifies qemu_ram_free a bit by passing the RAMBlock pointer.
> 
> The last patch speeds up address_space_translate with a cache pointer inside
> the AddressSpaceDispatch.
> 
> Fam Zheng (7):
>   exec: Return RAMBlock pointer from allocating functions
>   memory: Move assignment to ram_block to memory_region_init_*
>   memory: Implement memory_region_get_ram_addr with mr->ram_block
>   memory: Drop MemoryRegion.ram_addr
>   exec: Pass RAMBlock pointer to qemu_ram_free
>   exec: Factor out section_covers_addr
>   exec: Introduce AddressSpaceDispatch.mru_section
> 
>  cputlb.c|   4 +-
>  exec.c  | 106 
> +---
>  hw/misc/ivshmem.c   |   9 ++--
>  include/exec/memory.h   |   9 +---
>  include/exec/ram_addr.h |  24 +--
>  kvm-all.c   |   3 +-
>  memory.c|  56 ++---
>  7 files changed, 111 insertions(+), 100 deletions(-)
> 

Thanks, queued!

Paolo

[Qemu-devel] [PATCH] pxb: cleanup

2016-03-01 Thread Cao jin

Signed-off-by: Cao jin 
---
BTW: this doc seems little out of date, since pxb has already support Q35.

 docs/pci_expander_bridge.txt| 6 +++---
 hw/pci-bridge/pci_expander_bridge.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/pci_expander_bridge.txt b/docs/pci_expander_bridge.txt
index e7c8fe9..3675027 100644
--- a/docs/pci_expander_bridge.txt
+++ b/docs/pci_expander_bridge.txt
@@ -24,8 +24,8 @@ A detailed command line would be:
 -object memory-backend-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 
-numa node,nodeid=0,cpus=0,memdev=ram-node0
 -object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node1 
-numa node,nodeid=1,cpus=1,memdev=ram-node1
 -device pxb,id=bridge1,bus=pci.0,numa_node=1,bus_nr=4 -netdev user,id=nd 
-device e1000,bus=bridge1,addr=0x4,netdev=nd
--device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8, -device 
e1000,bus=bridge2,addr=0x3
--device pxb,id=bridge3,bus=pci.0,bus_nr=40, -drive 
if=none,id=drive0,file=[img] -device 
virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1
+-device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8 -device 
e1000,bus=bridge2,addr=0x3
+-device pxb,id=bridge3,bus=pci.0,bus_nr=40 -drive if=none,id=drive0,file=[img] 
-device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1
 
 Here you have:
  - 2 NUMA nodes for the guest, 0 and 1. (both mapped to the same NUMA node in 
host, but you can and should put it in different host NUMA nodes)
@@ -43,7 +43,7 @@ Implementation
 ==
 The PXB is composed by:
 - HostBridge (TYPE_PXB_HOST)
-  The host bridge allows to register and query the PXB's rPCI root bus in QEMU.
+  The host bridge allows to register and query the PXB's PCI root bus in QEMU.
 - PXBDev(TYPE_PXB_DEVICE)
   It is a regular PCI Device that resides on the piix host-bridge bus and its 
bus uses the same PCI domain.
   However, the bus behind is exposed through ACPI as a primary PCI bus and 
starts a new PCI hierarchy.
diff --git a/hw/pci-bridge/pci_expander_bridge.c 
b/hw/pci-bridge/pci_expander_bridge.c
index d23b8da..5e7e546 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -283,7 +283,7 @@ static void pxb_dev_exitfn(PCIDevice *pci_dev)
 }
 
 static Property pxb_dev_properties[] = {
-/* Note: 0 is not a legal a PXB bus number. */
+/* Note: 0 is not a legal PXB bus number. */
 DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0),
 DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED),
 DEFINE_PROP_END_OF_LIST(),
-- 
2.1.0

Re: [Qemu-devel] [RFC PATCH v0 2/6] spapr: CPU core device

2016-03-01 Thread Bharata B Rao

On Tue, Mar 01, 2016 at 10:16:18AM +0100, Igor Mammedov wrote:
> On Tue, 1 Mar 2016 13:47:27 +0530
> Bharata B Rao  wrote:
> 
> > On Mon, Feb 29, 2016 at 04:15:25PM +0100, Igor Mammedov wrote:
> > > On Mon, 29 Feb 2016 18:25:25 +0530
> > > Bharata B Rao  wrote:
> > >   
> > > > On Mon, Feb 29, 2016 at 11:03:16AM +0100, Igor Mammedov wrote:  
> > > > > On Mon, 29 Feb 2016 11:20:19 +0530
> > > > > Bharata B Rao  wrote:
> > > > > 
> > > > > > On Fri, Feb 26, 2016 at 12:13:39PM -0600, Michael Roth wrote:
> > > > > > > Quoting Bharata B Rao (2016-02-25 10:22:38)  
> > > > > > > > Add sPAPR specific CPU core device that is based on generic CPU 
> > > > > > > > core device.
> > > > > > > > Creating this core device will result in creation of all the 
> > > > > > > > CPU thread
> > > > > > > > devices that are part of this core.
> > > > > > > > 
> > > > > > > > Signed-off-by: Bharata B Rao 
> > > > > > > > ---
> > > > > > > >  hw/ppc/Makefile.objs|   1 +
> > > > > > > >  hw/ppc/spapr_cpu_core.c | 210 
> > > > > > > > 
> > > > > > > >  include/hw/ppc/spapr_cpu_core.h |  32 ++
> > > > > > > >  3 files changed, 243 insertions(+)
> > > > > > > >  create mode 100644 hw/ppc/spapr_cpu_core.c
> > > > > > > >  create mode 100644 include/hw/ppc/spapr_cpu_core.h
> > > > > > > > 
> > > > > > > > diff --git a/hw/ppc/Makefile.objs b/hw/ppc/Makefile.objs
> > > > > > > > index c1ffc77..5cc6608 100644
> > > > > > > > --- a/hw/ppc/Makefile.objs
> > > > > > > > +++ b/hw/ppc/Makefile.objs
> > > > > > > > @@ -4,6 +4,7 @@ obj-y += ppc.o ppc_booke.o
> > > > > > > >  obj-$(CONFIG_PSERIES) += spapr.o spapr_vio.o spapr_events.o
> > > > > > > >  obj-$(CONFIG_PSERIES) += spapr_hcall.o spapr_iommu.o 
> > > > > > > > spapr_rtas.o
> > > > > > > >  obj-$(CONFIG_PSERIES) += spapr_pci.o spapr_rtc.o spapr_drc.o 
> > > > > > > > spapr_rng.o
> > > > > > > > +obj-$(CONFIG_PSERIES) += spapr_cpu_core.o
> > > > > > > >  ifeq ($(CONFIG_PCI)$(CONFIG_PSERIES)$(CONFIG_LINUX), yyy)
> > > > > > > >  obj-y += spapr_pci_vfio.o
> > > > > > > >  endif
> > > > > > > > diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> > > > > > > > new file mode 100644
> > > > > > > > index 000..c44eb61
> > > > > > > > --- /dev/null
> > > > > > > > +++ b/hw/ppc/spapr_cpu_core.c
> > > > > > > > @@ -0,0 +1,210 @@
> > > > > > > > +/*
> > > > > > > > + * sPAPR CPU core device, acts as container of CPU thread 
> > > > > > > > devices.
> > > > > > > > + *
> > > > > > > > + * Copyright (C) 2016 Bharata B Rao 
> > > > > > > > 
> > > > > > > > + *
> > > > > > > > + * This work is licensed under the terms of the GNU GPL, 
> > > > > > > > version 2 or later.
> > > > > > > > + * See the COPYING file in the top-level directory.
> > > > > > > > + */
> > > > > > > > +#include "hw/cpu/core.h"
> > > > > > > > +#include "hw/ppc/spapr_cpu_core.h"
> > > > > > > > +#include "hw/ppc/spapr.h"
> > > > > > > > +#include "hw/boards.h"
> > > > > > > > +#include "qemu/error-report.h"
> > > > > > > > +#include "qapi/visitor.h"
> > > > > > > > +#include 
> > > > > > > > +
> > > > > > > > +static int spapr_cpu_core_realize_child(Object *child, void 
> > > > > > > > *opaque)
> > > > > > > > +{
> > > > > > > > +Error **errp = opaque;
> > > > > > > > +
> > > > > > > > +object_property_set_bool(child, true, "realized", errp);
> > > > > > > > +if (*errp) {
> > > > > > > > +return 1;
> > > > > > > > +}
> > > > > > > > +return 0;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +static void spapr_cpu_core_realize(DeviceState *dev, Error 
> > > > > > > > **errp)
> > > > > > > > +{
> > > > > > > > +sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
> > > > > > > > +sPAPRMachineState *spapr = 
> > > > > > > > SPAPR_MACHINE(qdev_get_machine());
> > > > > > > > +Error *local_err = NULL;
> > > > > > > > +
> > > > > > > > +if (!core->nr_threads) {
> > > > > > > > +error_setg(errp, "nr_threads property can't be 0");
> > > > > > > > +return;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +if (!core->cpu_model) {
> > > > > > > > +error_setg(errp, "cpu_model property isn't set");
> > > > > > > > +return;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +/*
> > > > > > > > + * TODO: If slot isn't specified, plug this core into
> > > > > > > > + * an existing empty slot.
> > > > > > > > + */
> > > > > > > > +if (!core->slot) {
> > > > > > > > +error_setg(errp, "slot property isn't set");
> > > > > > > > +return;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +object_property_set_link(OBJECT(spapr), OBJECT(core), 
> > > > > > > > core->slot,
> > > > > > > > + &local_err);
> > > > > > > > +if (local_err) {
> > > > > > > > +error_propagate(errp, local_err);
> > > > > > > > +return;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > > +obje

Re: [Qemu-devel] [PATCH] pxb: cleanup

2016-03-01 Thread Marcel Apfelbaum


On 03/01/2016 11:45 AM, Cao jin wrote:

Signed-off-by: Cao jin 
---
BTW: this doc seems little out of date, since pxb has already support Q35.


Hi,

Yes, you are right, but we use a different device, pxb-pcie.
I will send a doc update about it, thanks for reminding me,
Marcel




  docs/pci_expander_bridge.txt| 6 +++---
  hw/pci-bridge/pci_expander_bridge.c | 2 +-
  2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/pci_expander_bridge.txt b/docs/pci_expander_bridge.txt
index e7c8fe9..3675027 100644
--- a/docs/pci_expander_bridge.txt
+++ b/docs/pci_expander_bridge.txt
@@ -24,8 +24,8 @@ A detailed command line would be:
  -object memory-backend-ram,size=1024M,policy=bind,host-nodes=0,id=ram-node0 
-numa node,nodeid=0,cpus=0,memdev=ram-node0
  -object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node1 
-numa node,nodeid=1,cpus=1,memdev=ram-node1
  -device pxb,id=bridge1,bus=pci.0,numa_node=1,bus_nr=4 -netdev user,id=nd 
-device e1000,bus=bridge1,addr=0x4,netdev=nd
--device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8, -device 
e1000,bus=bridge2,addr=0x3
--device pxb,id=bridge3,bus=pci.0,bus_nr=40, -drive 
if=none,id=drive0,file=[img] -device 
virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1
+-device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8 -device 
e1000,bus=bridge2,addr=0x3
+-device pxb,id=bridge3,bus=pci.0,bus_nr=40 -drive if=none,id=drive0,file=[img] 
-device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1

  Here you have:
   - 2 NUMA nodes for the guest, 0 and 1. (both mapped to the same NUMA node in 
host, but you can and should put it in different host NUMA nodes)
@@ -43,7 +43,7 @@ Implementation
  ==
  The PXB is composed by:
  - HostBridge (TYPE_PXB_HOST)
-  The host bridge allows to register and query the PXB's rPCI root bus in QEMU.
+  The host bridge allows to register and query the PXB's PCI root bus in QEMU.
  - PXBDev(TYPE_PXB_DEVICE)
It is a regular PCI Device that resides on the piix host-bridge bus and its 
bus uses the same PCI domain.
However, the bus behind is exposed through ACPI as a primary PCI bus and 
starts a new PCI hierarchy.
diff --git a/hw/pci-bridge/pci_expander_bridge.c 
b/hw/pci-bridge/pci_expander_bridge.c
index d23b8da..5e7e546 100644
--- a/hw/pci-bridge/pci_expander_bridge.c
+++ b/hw/pci-bridge/pci_expander_bridge.c
@@ -283,7 +283,7 @@ static void pxb_dev_exitfn(PCIDevice *pci_dev)
  }

  static Property pxb_dev_properties[] = {
-/* Note: 0 is not a legal a PXB bus number. */
+/* Note: 0 is not a legal PXB bus number. */
  DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0),
  DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED),
  DEFINE_PROP_END_OF_LIST(),




Reviewed-by: Marcel Apfelbaum

Re: [Qemu-devel] [PATCH v2 03/15] docker: Add images

2016-03-01 Thread Alex Bennée

Fam Zheng  writes:

> On Mon, 02/29 17:18, Alex Bennée wrote:
>>
>> Fam Zheng  writes:
>>
>> > Signed-off-by: Fam Zheng 
>>
>> I think we need to include the dtc libs in here unless we need to ship
>> all submodules in the tree as well.
>
> OK, will take a look.
>
>>
>> Is there a way to get a plain install shell? PAUSE=1 on the test shells
>> doesn't seem to allow me to install anything.
>
> What is the error when you install things? I think it is an unmodified shell
> and "yum" or "apt-get" should just work. It's just that when the container
> exits the changes will be discarded.

So running:

make docker-test-quick@ubuntu PAUSE=1

And I got a shell I wanted to try and get working:

root@72d09615d5b2:/var/tmp/qemu.tmp# ./configure

ERROR: DTC (libfdt) version >= 1.4.0 not present. Your options:
(1) Preferred: Install the DTC (libfdt) devel package
(2) Fetch the DTC submodule, using:
git submodule update --init dtc

root@72d09615d5b2:/var/tmp/qemu.tmp# apt-cache search libfdt
libfdt-dev - Flat Device Trees manipulation library - development
files
libfdt1 - Flat Device Trees manipulation library
root@72d09615d5b2:/var/tmp/qemu.tmp# apt-get install libfdt-dev
Reading package lists... Done
Building dependency tree
Reading state information... Done
The following extra packages will be installed:
libfdt1
The following NEW packages will be installed:
libfdt-dev libfdt1
0 upgraded, 2 newly installed, 0 to remove and 10 not upgraded.
Need to get 37.8 kB of archives.
After this operation, 216 kB of additional disk space will be used.
Do you want to continue? [Y/n]
Err http://archive.ubuntu.com/ubuntu/ trusty/main libfdt1 amd64
1.4.0+dfsg-1
Could not resolve 'archive.ubuntu.com'
Err http://archive.ubuntu.com/ubuntu/ trusty/main libfdt-dev amd64
1.4.0+dfsg-1
Could not resolve 'archive.ubuntu.com'
E: Failed to fetch
http://archive.ubuntu.com/ubuntu/pool/main/d/device-tree-compiler/libfdt1_1.4.0+dfsg-1_amd64.deb
Could not resolve 'archive.ubuntu.com'

E: Failed to fetch
http://archive.ubuntu.com/ubuntu/pool/main/d/device-tree-compiler/libfdt-dev_1.4.0+dfsg-1_amd64.deb
Could not resolve 'archive.ubuntu.com'

E: Unable to fetch some archives, maybe run apt-get update or try with
--fix-missing?
root@72d09615d5b2:/var/tmp/qemu.tmp# apt-get update
Err http://archive.ubuntu.com trusty InRelease

Err http://archive.ubuntu.com trusty-updates InRelease

Err http://archive.ubuntu.com trusty-security InRelease

Err http://archive.ubuntu.com trusty Release.gpg
Could not resolve 'archive.ubuntu.com'
Err http://archive.ubuntu.com trusty-updates Release.gpg
Could not resolve 'archive.ubuntu.com'
Err http://archive.ubuntu.com trusty-security Release.gpg
Could not resolve 'archive.ubuntu.com'
Reading package lists... Done
W: Failed to fetch
http://archive.ubuntu.com/ubuntu/dists/trusty/InRelease

W: Failed to fetch
http://archive.ubuntu.com/ubuntu/dists/trusty-updates/InRelease

W: Failed to fetch
http://archive.ubuntu.com/ubuntu/dists/trusty-security/InRelease

W: Failed to fetch
http://archive.ubuntu.com/ubuntu/dists/trusty/Release.gpg  Could not
resolve 'archive.ubuntu.com'

W: Failed to fetch
http://archive.ubuntu.com/ubuntu/dists/trusty-updates/Release.gpg
Could not resolve 'archive.ubuntu.com'

W: Failed to fetch
http://archive.ubuntu.com/ubuntu/dists/trusty-security/Release.gpg
Could not resolve 'archive.ubuntu.com'

W: Some index files failed to download. They have been ignored, or old
ones used instead.

So it looks like networking isn't working in the test images. For
debugging problems I think we need to be able to install stuff on a
running image so we can then fix the base recipes.

>
> Fam

--
Alex Bennée

Re: [Qemu-devel] [PATCH v2 02/15] Makefile: Rules for docker testing

2016-03-01 Thread Alex Bennée


Fam Zheng  writes:

> On Mon, 02/29 17:08, Alex Bennée wrote:
>>
>> Fam Zheng  writes:
>>
>> > This adds a group of make targets to run docker tests, all are available
>> > in source tree without running ./configure.
>> >
>> > The usage is shown by "make docker".
>> >
>> > Besides the fixed ones, dynamic targets for building each image and
>> > running each test in each image are generated automatically by make,
>> > scanning $(SRC_PATH)/tests/docker/ files with specific patterns.
>> >
>> > Alternative to manually list particular targets (docker-run-FOO@BAR)
>> > set, you can control which tests/images to run by filtering variables,
>> > TESTS= and IMAGES=, which are expressed in Makefile pattern syntax,
>> > "foo% %bar ...". For example:
>> >
>> > $ make docker-run IMAGES="ubuntu fedora"
>>
>> This doesn't seem to work for me:
>>
>> 16:49 alex@zen/x86_64  [qemu.git/review/docker-tests-v2] >make
>> docker-run IMAGES="ubuntu" V=1
>> /home/alex/lsrc/qemu/qemu.git/rules.mak:178: warning: overriding
>> commands for target `clean-timestamp'
>> /home/alex/lsrc/qemu/qemu.git/rules.mak:178: warning: ignoring old
>> commands for target `clean-timestamp'
>> make: *** No rule to make target `docker-run'. Stop.
>
> The commit message is stale, as the help text, the target is now "docker-test"
> which is more intuitive.
>
> Fam
>
>> `
>>
>> >
>> > Unfortunately, it's impossible to propagate "-j $JOBS" into make in
>> > containers, however since each combination is made a first class target
>> > is the top Makefile, "make -j$N docker-run" still parallels the tests
>> > coarsely.
>> >
>> > Signed-off-by: Fam Zheng 
>> > ---
>> >  Makefile  |  4 +-
>> >  tests/docker/Makefile.include | 93 
>> > +++
>> >  2 files changed, 96 insertions(+), 1 deletion(-)
>> >  create mode 100644 tests/docker/Makefile.include
>> >
>> > diff --git a/Makefile b/Makefile
>> > index f9fae3a..f104a08 100644
>> > --- a/Makefile
>> > +++ b/Makefile
>> > @@ -6,7 +6,7 @@ BUILD_DIR=$(CURDIR)
>> >  # Before including a proper config-host.mak, assume we are in the source 
>> > tree
>> >  SRC_PATH=.
>> >
>> > -UNCHECKED_GOALS := %clean TAGS cscope ctags
>> > +UNCHECKED_GOALS := %clean TAGS cscope ctags docker docker-%
>> >
>> >  # All following code might depend on configuration variables
>> >  ifneq ($(wildcard config-host.mak),)
>> > @@ -651,3 +651,5 @@ endif
>> >  # Include automatically generated dependency files
>> >  # Dependencies in Makefile.objs files come from our recursive subdir rules
>> >  -include $(wildcard *.d tests/*.d)
>> > +
>> > +include $(SRC_PATH)/tests/docker/Makefile.include
>> > diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
>> > new file mode 100644
>> > index 000..d8c55c3
>> > --- /dev/null
>> > +++ b/tests/docker/Makefile.include
>> > @@ -0,0 +1,93 @@
>> > +# Makefile for Docker tests
>> > +
>> > +$(if $(quiet-command),,$(eval include $(SRC_PATH)/rules.mak))
>> > +
>> > +.PHONY: docker docker-test docker-clean docker-image
>> > +
>> > +DOCKER_SUFFIX := .docker
>> > +
>> > +DOCKER_IMAGES := $(notdir $(basename \
>> > +  $(wildcard $(SRC_PATH)/tests/docker/dockerfiles/*)))
>> > +
>> > +DOCKER_TESTS := $(notdir $(shell \
>> > +  find $(SRC_PATH)/tests/docker/ -name 'test-*' -type f -executable))
>> > +
>> > +DOCKER_TOOLS := travis
>> > +
>> > +TESTS ?= %
>> > +IMAGES ?= %
>> > +
>> > +$(foreach i,$(DOCKER_IMAGES), \
>> > +  $(eval docker-image: docker-image-$i) \
>> > +  $(eval docker-image-$i: IMAGE = $i) \
>> > +  $(eval .PHONY: docker-image-$i docker-@$i) \
>> > +  $(foreach t,$(DOCKER_TESTS) $(DOCKER_TOOLS), \
>> > +  $(eval .PHONY: docker-$t@$i) \
>> > +  $(eval docker-$t@$i: docker-image-$i) \
>> > +  $(eval docker-$t@$i: docker-run-$t@$i) \
>> > +  $(eval docker-@$i: docker-$t@$i) \
>> > +  $(eval docker-test: docker-run-$t@$i) \
>> > +  ) \
>> > +)

Also I think you need rules for updating the images here. I just updated
ubuntu.docker to add libfdt-dev but on running on the command line:

09:53 alex@zen/x86_64  [qemu.git/review/docker-tests-v2] >make 
docker-image-ubuntu V=1
/home/alex/lsrc/qemu/qemu.git/rules.mak:178: warning: overriding commands for 
target `clean-timestamp'
/home/alex/lsrc/qemu/qemu.git/rules.mak:178: warning: ignoring old commands for 
target `clean-timestamp'
make: Nothing to be done for `docker-image-ubuntu'.


--
Alex Bennée

Re: [Qemu-devel] [PATCH] Use special code for sigsetjmp only in cpu-exec.c

2016-03-01 Thread Peter Maydell

On 1 March 2016 at 05:07, Stefan Weil  wrote:
> The rest of the code can use longjmp with stack unwinding.
>
> Signed-off-by: Stefan Weil 
> ---
>
> This is a bug fix needed for 64 bit Windows.
>
> QEMU for Windows currently gets the wrong definition for
> sigsetjmp. It uses stack unwinding for longjmp which results
> in a crash when it is called from generated code.
>
> Thanks to Andrew Baumann for his reminder that this patch was
> still missing. Andrew, could you please test it with your
> RPi emulation?

I don't understand this patch. Why doesn't it work to have
sigsetjmp() be implemented the same way for every use that
QEMU makes of it?

thanks
-- PMM

Re: [Qemu-devel] [PATCH v2 00/16] qapi: Allow blockdev-add for NBD

2016-03-01 Thread Daniel P. Berrange

On Tue, Mar 01, 2016 at 12:37:14AM +0100, Max Reitz wrote:
> On 01.03.2016 00:24, Eric Blake wrote:
> > On 02/29/2016 04:19 PM, Max Reitz wrote:
> >> Turns out NBD is not so simple to do if you do it right. Anyway, this
> >> series adds blockdev-add support for NBD clients.
> >>
> >> Patches 1 and 2 add one less and one more complicated QDict function,
> >> respectively, which I needed in later NBD patches: Patch 1 for handling
> >> legacy options (move "host" to "address.data.host" etc.) and patch 2
> >> because I'd like to use the input visitor for transforming the NBD
> >> options into a SocketAddress. Unfortunately, the block layer uses
> >> flattened QDicts everywhere, so we'll have to unflatten (erect?) them
> >> before we can use that visitor.
> > 
> > Dan had a patch proposal that called the operation "crumple"; I need to
> > review both proposals and see which one I like.
> > https://lists.gnu.org/archive/html/qemu-devel/2016-02/msg04618.html
> 
> Well, here I go again, not looking at patches on the list...
> 
> Looking at the design, I like his idea of having an escape sequence;
> also, his qdict_crumple() can return boths lists and dicts where my
> qdict_unflatten() only returns dicts (then again, this is what
> qdict_flatten() always works on). And his patch doesn't suffer from as
> much indentation as mine does.

The escape sequence is critical to support for my use case, because
sadly some object properties have '.' in their name :-(

> What I like more about my patch, however, is that I'm reusing
> qdict_array_split() and qdict_array_entries(). That is mostly because my
> function modifies the given QDict, where Dan's does not.

The reason for that is that the use context in which I need to call
qdict_crumple() has a const QDict, so modifying the original QDict
was not an option.

Second, for error handling, if there is a problem we can't resolve
half way through the unflattening process, then if you're modifying
the original QDict you end up with a QDict that is a hybrid between
the flat & unflat forms. I think it is pretty bad practice for API
design / behaviour to leave inputs in such a state on error. ie if
the code isn't capable of rolling back to the original state it
should not be modifying the input arg.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|

Re: [Qemu-devel] [RFC PATCH v0 0/6] Core based CPU hotplug for PowerPC sPAPR

2016-03-01 Thread Bharata B Rao

On Thu, Feb 25, 2016 at 09:52:36PM +0530, Bharata B Rao wrote:
> Hi,
> 
> This is an attempt to implement CPU hotplug for PowerPC sPAPR based on
> the approach suggested by Andreas. While I say that, I should also explicitly
> add that I have tried to follow Andreas' suggestions to the best of my
> understanding and hence there could be bits which are still not
> as per expectations.
> 
> I have tried to model this similarly to what Andreas did for x86 an year back 
> at
> https://lists.gnu.org/archive/html/qemu-devel/2015-03/msg04858.html

Andreas - Do you have any comments on this implementation ? Is it worth
pursuing further in your view ?

Regards,
Bharata.

Re: [Qemu-devel] [PATCH v2 00/16] qapi: Allow blockdev-add for NBD

2016-03-01 Thread Kevin Wolf

Am 01.03.2016 um 11:00 hat Daniel P. Berrange geschrieben:
> On Tue, Mar 01, 2016 at 12:37:14AM +0100, Max Reitz wrote:
> > On 01.03.2016 00:24, Eric Blake wrote:
> > > On 02/29/2016 04:19 PM, Max Reitz wrote:
> > >> Turns out NBD is not so simple to do if you do it right. Anyway, this
> > >> series adds blockdev-add support for NBD clients.
> > >>
> > >> Patches 1 and 2 add one less and one more complicated QDict function,
> > >> respectively, which I needed in later NBD patches: Patch 1 for handling
> > >> legacy options (move "host" to "address.data.host" etc.) and patch 2
> > >> because I'd like to use the input visitor for transforming the NBD
> > >> options into a SocketAddress. Unfortunately, the block layer uses
> > >> flattened QDicts everywhere, so we'll have to unflatten (erect?) them
> > >> before we can use that visitor.
> > > 
> > > Dan had a patch proposal that called the operation "crumple"; I need to
> > > review both proposals and see which one I like.
> > > https://lists.gnu.org/archive/html/qemu-devel/2016-02/msg04618.html
> > 
> > Well, here I go again, not looking at patches on the list...
> > 
> > Looking at the design, I like his idea of having an escape sequence;
> > also, his qdict_crumple() can return boths lists and dicts where my
> > qdict_unflatten() only returns dicts (then again, this is what
> > qdict_flatten() always works on). And his patch doesn't suffer from as
> > much indentation as mine does.
> 
> The escape sequence is critical to support for my use case, because
> sadly some object properties have '.' in their name :-(
> 
> > What I like more about my patch, however, is that I'm reusing
> > qdict_array_split() and qdict_array_entries(). That is mostly because my
> > function modifies the given QDict, where Dan's does not.
> 
> The reason for that is that the use context in which I need to call
> qdict_crumple() has a const QDict, so modifying the original QDict
> was not an option.

You can always clone and modify if modifying an existing QDict turns out
to be nicer to implement.

> Second, for error handling, if there is a problem we can't resolve
> half way through the unflattening process, then if you're modifying
> the original QDict you end up with a QDict that is a hybrid between
> the flat & unflat forms. I think it is pretty bad practice for API
> design / behaviour to leave inputs in such a state on error. ie if
> the code isn't capable of rolling back to the original state it
> should not be modifying the input arg.

I think we generally abort the whole action in such error cases. Then
it doesn't really matter in what state the to be freed QDict is.

Kevin

Re: [Qemu-devel] [PULL 00/12] Block patches

2016-03-01 Thread Peter Maydell

On 29 February 2016 at 20:08, Jeff Cody  wrote:
> The following changes since commit 071608b519adf62bc29c914343a21c5407ab1ac9:
>
>   Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20160229-1' into 
> staging (2016-02-29 12:24:26 +)
>
> are available in the git repository at:
>
>
>   g...@github.com:codyprime/qemu-kvm-jtc.git tags/block-pull-request
>
> for you to fetch changes up to cc199b16cf4cb9279aca73f5f5dce2cc337b9079:
>
>   iotests/124: Add cluster_size mismatch test (2016-02-29 14:55:14 -0500)
>
> 
> Block patches
> 
>

Applied, thanks.

-- PMM

Re: [Qemu-devel] [RFC PATCH v2 00/10] Add colo-proxy based on netfilter

2016-03-01 Thread Dr. David Alan Gilbert

* Li Zhijian (lizhij...@cn.fujitsu.com) wrote:
> 
> 
> On 03/01/2016 04:04 AM, Dr. David Alan Gilbert wrote:
> >* Zhang Chen (zhangchen.f...@cn.fujitsu.com) wrote:
> >>From: zhangchen 
> >>
> >>Hi,all
> >>
> >>This patch add an colo-proxy object, COLO-Proxy is a part of COLO,
> >>based on qemu netfilter and it's a plugin for qemu netfilter. the function
> >>keep Secondary VM connect normal to Primary VM and compare packets
> >>sent by PVM to sent by SVM.if the packet difference,notify COLO do
> >>checkpoint and send all primary packet has queued.
> >
> >Hi Zhangchen,
> >   How are you dealing with the IP 'identification' field?
> >It's a very very random field, and not just the initial value in the 
> >connection.
> >I can't see how the kernel colo-proxy dealt with it either; but I think it's
> >comparison was after defragementation so probably ignored the identification
> >field
> You are right, most kernel colo-proxy code is working at mangle table (after 
> defrag).
> and colo proxy only compare the contents of L4(TCP/UDP) excluding IP 
> identification.
> 
> > - wouldn't that confuse a client at failover?
> Err..., instersting question.
> 
> for example, a COLO including primay(PVM) adn secondary(SVM)
> 1. primay is sending a compared P_IP packet(identification=0x12345, split to 
> IP_s1, IP_s2..IP_s100) to client
> 2. client is receiving the ip segment(but IP_s2, IP_s50, IP_s80..IP_s99 are 
> missing)
>and primary host is down.
> 3. secondary VM takeover, and send a S_IP packet(ip contents is always same 
> as at PVM)
> 
> in step 3,
> if the S_IP identification isn't 0x12345, client will drop the ip segment at 
> step 2 because of defrag timeout.

So that triggers a timeout (30 second?) - hmm OK, a bit slow but OK.

> if the S_IP identification isn 0x12345, client may mix the segment from PVM 
> or SVM(just like segment come
> from different router), but that's Okay, because we have ensured the ip 
> contents is identical.

Could the S_IP identification match a later/earlier fragment?

Dave

> so IMO, it will not confuse the client at failover.
> 
> >
> >Dave
> >
> >>You can also get the series from:
> >>
> >>https://github.com/zhangckid/qemu/tree/colo-v2.2-periodic-mode-with-colo-proxyV2
> >>
> >>Usage:
> >>
> >>primary:
> >>-netdev tap,id=bn0 -device e1000,netdev=bn0
> >>-object colo-proxy,id=f0,netdev=bn0,queue=all,mode=primary,addr=host:port
> >>
> >>secondary:
> >>-netdev tap,id=bn0 -device e1000,netdev=bn0
> >>-object colo-proxy,id=f0,netdev=bn0,queue=all,mode=secondary,addr=host:port
> >>
> >>NOTE:
> >>queue must set "all". See enum NetFilterDirection for detail.
> >>colo-proxy need queue all packets
> >>colo-proxy V2 just can compare ip packet
> >>
> >>
> >>## Background
> >>
> >>COLO FT/HA (COarse-grain LOck-stepping Virtual Machines for Non-stop 
> >>Service)
> >>project is a high availability solution. Both Primary VM (PVM) and 
> >>Secondary VM
> >>(SVM) run in parallel. They receive the same request from client, and 
> >>generate
> >>responses in parallel too. If the response packets from PVM and SVM are
> >>identical, they are released immediately. Otherwise, a VM checkpoint (on
> >>demand)is conducted.
> >>
> >>Paper:
> >>http://www.socc2013.org/home/program/a3-dong.pdf?attredirects=0
> >>
> >>COLO on Xen:
> >>http://wiki.xen.org/wiki/COLO_-_Coarse_Grain_Lock_Stepping
> >>
> >>COLO on Qemu/KVM:
> >>http://wiki.qemu.org/Features/COLO
> >>
> >>By the needs of capturing response packets from PVM and SVM and finding out
> >>whether they are identical, we introduce a new module to qemu networking
> >>called colo-proxy.
> >>
> >>V2:
> >>   rebase colo-proxy with qemu-colo-v2.2-periodic-mode
> >>   fix dave's comments
> >>   fix wency's comments
> >>   fix zhanghailiang's comments
> >>
> >>v1:
> >>   initial patch.
> >>
> >>
> >>
> >>zhangchen (10):
> >>   Init colo-proxy object based on netfilter
> >>   Jhash: add linux kernel jhashtable in qemu
> >>   Colo-proxy: add colo-proxy framework
> >>   Colo-proxy: add data structure and jhash func
> >>   net/colo-proxy: Add colo interface to use proxy
> >>   net/colo-proxy: add socket used by forward func
> >>   net/colo-proxy: Add packet enqueue & handle func
> >>   net/colo-proxy: Handle packet and connection
> >>   net/colo-proxy: Compare pri pkt to sec pkt
> >>   net/colo-proxy: Colo-proxy do checkpoint and clear
> >>
> >>  include/qemu/jhash.h |  61 
> >>  net/Makefile.objs|   1 +
> >>  net/colo-proxy.c | 939 
> >> +++
> >>  net/colo-proxy.h |  24 ++
> >>  qemu-options.hx  |   6 +
> >>  trace-events |   8 +
> >>  vl.c |   3 +-
> >>  7 files changed, 1041 insertions(+), 1 deletion(-)
> >>  create mode 100644 include/qemu/jhash.h
> >>  create mode 100644 net/colo-proxy.c
> >>  create mode 100644 net/colo-proxy.h
> >>
> >>--
> >>1.9.1
> >>
> >>
> >>
> >--
> >Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK
> >
> >
> >.
> >
> 
> -- 
> Best

Re: [Qemu-devel] [RFC] QMP: add query-hotpluggable-cpus

2016-03-01 Thread Igor Mammedov

On Tue, 1 Mar 2016 12:19:21 +1100
David Gibson  wrote:

> On Mon, Feb 29, 2016 at 04:42:58PM +0100, Igor Mammedov wrote:
> > On Thu, 25 Feb 2016 14:52:06 -0300
> > Eduardo Habkost  wrote:
> >   
> > > On Wed, Feb 24, 2016 at 03:42:18PM +0100, Igor Mammedov wrote:  
> > > > On Tue, 23 Feb 2016 18:26:20 -0300
> > > > Eduardo Habkost  wrote:
> > > > 
> > > > > On Tue, Feb 23, 2016 at 10:46:45AM +0100, Igor Mammedov wrote:
> > > > > > On Mon, 22 Feb 2016 13:54:32 +1100
> > > > > > David Gibson  wrote:  
> > > > > [...]
> > > > > > > This is why Eduardo suggested - and I agreed - that it's probably
> > > > > > > better to implement the "1st layer" as an internal 
> > > > > > > structure/interface
> > > > > > > only, and implement the 2nd layer on top of that.  When/if we 
> > > > > > > need to
> > > > > > > we can revisit a user-accessible interface to the 1st layer.  
> > > > > > We are going around QOM based CPU introspecting interface for
> > > > > > years now and that's exactly what 2nd layer is, just another
> > > > > > implementation. I've just lost hope in this approach.
> > > > > > 
> > > > > > What I'm suggesting in this RFC is to forget controversial
> > > > > > QOM approach for now and use -device/device_add + QMP 
> > > > > > introspection,  
> > > > > 
> > > > > You have a point about it looking controversial, but I would like
> > > > > to understand why exactly it is controversial. Discussions seem
> > > > > to get stuck every single time we try to do something useful with
> > > > > the QOM tree, and I don't undertsand why.
> > > > Maybe because we are trying to create a universal solution to fit
> > > > ALL platforms? And every time some one posts patches to show
> > > > implementation, it would break something in existing machine
> > > > or is not complete in terms of how interface would work wrt
> > > > mgmt/CLI/migration.
> > > 
> > > That's true.
> > >   
> > > > 
> > > > > 
> > > > > > i.e. completely split interface from how boards internally implement
> > > > > > CPU hotplug.  
> > > > > 
> > > > > A QOM-based interface may still split the interface from how
> > > > > boards internally implement CPU hotplug. They don't need to
> > > > > affect the device tree of the machine, we just need to create QOM
> > > > > objects or links at predictable paths, that implement certain
> > > > > interfaces.
> > > > Beside of not being able to reach consensus for a long time,
> > > > I'm fine with isolated QOM interface if it allow us to move forward.
> > > > However static QMP/QAPI interface seems to be better describing and
> > > > has better documentation vs current very flexible poorly 
> > > > self-describing QOM.
> > > 
> > > You have a good point: QMP is more stable and better documented.
> > > QOM is easier for making experiments, and I would really like to
> > > see it being used more. But if we still don't understand the
> > > requirements enough to design a QMP interface, we won't be able
> > > to implement the same functionality using QOM either.
> > > 
> > > If we figure out the requirements, I believe we should be able to
> > > design equivalent QMP and QOM interfaces.  
> > So not to stall CPU hotplug progress, I'd start with stable QMP query
> > interface for general use, leaving experimental QOM interface for later
> > as difficult to discover and poorly documented one from mgmt pov,
> > meaning mgmt would have to:
> >  - instantiate a particular machine type to find if QOM interface is 
> > supported,
> >i.e. '-machine none' won't work with it as it's board depended VS static 
> > compile time qapi-schema in QMP case
> >  - execute a bunch of qom-list/qom-read requests over wire to 
> > enumerate/query
> >objects starting at some fixed entry point (/machine/cpus) VS a single 
> > command that does 'atomic' enumeration in QMP case.  
> 
> That sounds reasonable to me.
> 
> However, before even that, I think we need to work out exactly what
> device_add of a multi-thread cpu module looks like.  I think that's
> less of a solved problem than everyone seems to be assuming.
S390 seems to be interested only in thread level hotplug:

   device_add thread-type,thread=1

for x86 I see 2 cases, current thread level,
which also likely applies to virt-arm board

   device_add thread-type,[node=N,]socket=X,core=Y,thread=1

and if decide to do x86 hotplug at socket level then an additional variant
for new machine type would be multi-threaded:
 
   device_add socket-type,[node=N,]socket=X

For sPAPR it would be:

  device_add socket-type,core=X
   
For homogeneous CPUs we can continue to use -smp cores,threads options for
describing internal multi-threaded CPU layout. These options could be even
converted to global properties for TYPE_CPU_SOCKET.cores and 
TYPE_CPU_CORE.threads
so that they would be set automatically on all CPU objects.

Heterogeneous CPUs obviously don't fit in -smp world and would require
more/other properties to describe t

Re: [Qemu-devel] [PATCH 02/38] qemu-doc: Fix ivshmem huge page example

2016-03-01 Thread Marc-André Lureau

On Mon, Feb 29, 2016 at 7:40 PM, Markus Armbruster  wrote:
> Option parameter "share" is missing.  Without it, you get a *private*
> mmap(), which defeats ivshmem's purpose pretty thoroughly ;)
>
> While there, switch to the conventional mountpoint of hugetlbfs
> /dev/hugepages.
>
> Signed-off-by: Markus Armbruster 

Reviewed-by: Marc-André Lureau 


> ---
>  qemu-doc.texi | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/qemu-doc.texi b/qemu-doc.texi
> index bc9dd13..65f3b29 100644
> --- a/qemu-doc.texi
> +++ b/qemu-doc.texi
> @@ -1311,7 +1311,7 @@ Instead of specifying the  using POSIX shm, 
> you may specify
>  a memory backend that has hugepage support:
>
>  @example
> -qemu-system-i386 -object 
> memory-backend-file,size=1G,mem-path=/mnt/hugepages/my-shmem-file,id=mb1
> +qemu-system-i386 -object 
> memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1
>   -device ivshmem,x-memdev=mb1
>  @end example
>
> --
> 2.4.3
>
>



-- 
Marc-André Lureau

[Qemu-devel] [PATCH v4 0/9] NVDIMM ACPI: introduce the framework of QEMU emulated

2016-03-01 Thread Xiao Guangrong

This patchset is against commit 2212ef27b342b98b220fe9 (fw-cfg: support
writeable blobs) on pci branch of Michael's git tree
and can be found at:
  https://github.com/xiaogr/qemu.git nvdimm-acpi-v4

Changelog in v4:
- drop the unnecessary assert() in aml_concatenate() based on Igor's
  suggestion
- introduce build_append_named_dword() and use it to simplify the code as
  Michael's suggestion

Changelog in v3:
Changes addressing Michael's comment:
- rebase the patchset against current code

Changes addressing Igor's comment:
- rename the parameters of aml_create_field() to reflect the ACPI spec
- fix the issue that the @target operand can not be optional in
  aml_concatenate() that is also cleaned up by using build_opcode_2arg_dst()

Others:
- separate the test patches to the single set and will be posted on later 
  
These changes are based on Igor's comments:
- drop ssdt.rev2 support as the memory address allocated by BIOS/OVMF
  are always 32 bits
- support to test NVDIMM tables (NFIT and NVDIMM SSDT)
- add _CRS to report its operation region
- make AML APIs change be the separated patches

This is the second part of vNVDIMM implementation which implements the
BIOS patched dsm memory and introduces the framework that allows QEMU
to emulate DSM method

Thanks to Michael's idea, we do not reserve any memory for NVDIMM ACPI,
instead we let BIOS allocate the memory and patch the address to the
offset we want

IO port is still enabled as it plays as the way to notify QEMU and pass
the patched dsm memory address, so that IO port region, 0x0a18 - 0xa20,
is reserved and it is divided into two 32 bits ports and used to pass
the low 32 bits and high 32 bits of dsm memory address to QEMU

Thanks Igor's idea, this patchset also extends DSDT/SSDT to revision 2
to apply 64 bit operations, in order to keeping compatibility, old
version (<= 2.5) still uses revision 1. Since 64 bit operations breaks
old guests (such as windows XP), we should keep the 64 bits stuff in
the private place where common ACPI operation does not touch it

Michael S. Tsirkin (1):
  acpi: add build_append_named_dword, returning an offset in buffer

Xiao Guangrong (8):
  acpi: add aml_create_field()
  acpi: add aml_concatenate()
  acpi: allow using object as offset for OperationRegion
  nvdimm acpi: initialize the resource used by NVDIMM ACPI
  nvdimm acpi: introduce patched dsm memory
  nvdimm acpi: let qemu handle _DSM method
  nvdimm acpi: emulate dsm method
  nvdimm acpi: add _CRS

 hw/acpi/Makefile.objs   |   2 +-
 hw/acpi/aml-build.c |  55 +-
 hw/acpi/nvdimm.c| 243 ++--
 hw/i386/acpi-build.c|  41 
 hw/i386/pc.c|   6 +-
 hw/i386/pc_piix.c   |   5 +
 hw/i386/pc_q35.c|   8 +-
 include/hw/acpi/aml-build.h |   9 +-
 include/hw/i386/pc.h|   4 +-
 include/hw/mem/nvdimm.h |  36 ++-
 10 files changed, 366 insertions(+), 43 deletions(-)

-- 
1.8.3.1

[Qemu-devel] [PATCH 1/9] acpi: add aml_create_field()

2016-03-01 Thread Xiao Guangrong

It will be used by nvdimm acpi

Signed-off-by: Xiao Guangrong 
---
 hw/acpi/aml-build.c | 14 ++
 include/hw/acpi/aml-build.h |  2 ++
 2 files changed, 16 insertions(+)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 6675535..45b7f0a 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -997,6 +997,20 @@ Aml *create_field_common(int opcode, Aml *srcbuf, Aml 
*index, const char *name)
 return var;
 }
 
+/* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefCreateField */
+Aml *aml_create_field(Aml *srcbuf, Aml *bit_index, Aml *num_bits,
+  const char *name)
+{
+Aml *var = aml_alloc();
+build_append_byte(var->buf, 0x5B); /* ExtOpPrefix */
+build_append_byte(var->buf, 0x13); /* CreateFieldOp */
+aml_append(var, srcbuf);
+aml_append(var, bit_index);
+aml_append(var, num_bits);
+build_append_namestring(var->buf, "%s", name);
+return var;
+}
+
 /* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefCreateDWordField */
 Aml *aml_create_dword_field(Aml *srcbuf, Aml *index, const char *name)
 {
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index aa29d30..8ef10ad 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -346,6 +346,8 @@ Aml *aml_mutex(const char *name, uint8_t sync_level);
 Aml *aml_acquire(Aml *mutex, uint16_t timeout);
 Aml *aml_release(Aml *mutex);
 Aml *aml_alias(const char *source_object, const char *alias_object);
+Aml *aml_create_field(Aml *srcbuf, Aml *bit_index, Aml *num_bits,
+  const char *name);
 Aml *aml_create_dword_field(Aml *srcbuf, Aml *index, const char *name);
 Aml *aml_create_qword_field(Aml *srcbuf, Aml *index, const char *name);
 Aml *aml_varpackage(uint32_t num_elements);
-- 
1.8.3.1

[Qemu-devel] [PATCH 4/9] nvdimm acpi: initialize the resource used by NVDIMM ACPI

2016-03-01 Thread Xiao Guangrong

32 bits IO port starting from 0x0a18 in guest is reserved for NVDIMM
ACPI emulation. The table, NVDIMM_DSM_MEM_FILE, will be patched into
NVDIMM ACPI binary code

OSPM uses this port to tell QEMU the final address of the DSM memory
and notify QEMU to emulate the DSM method

Signed-off-by: Xiao Guangrong 
---
 hw/acpi/Makefile.objs   |  2 +-
 hw/acpi/nvdimm.c| 35 +++
 hw/i386/acpi-build.c| 10 +-
 hw/i386/pc.c|  6 +++---
 hw/i386/pc_piix.c   |  5 +
 hw/i386/pc_q35.c|  8 +++-
 include/hw/i386/pc.h|  4 +++-
 include/hw/mem/nvdimm.h | 28 +++-
 8 files changed, 82 insertions(+), 16 deletions(-)

diff --git a/hw/acpi/Makefile.objs b/hw/acpi/Makefile.objs
index f3ade9a..faee86c 100644
--- a/hw/acpi/Makefile.objs
+++ b/hw/acpi/Makefile.objs
@@ -2,7 +2,7 @@ common-obj-$(CONFIG_ACPI_X86) += core.o piix4.o pcihp.o
 common-obj-$(CONFIG_ACPI_X86_ICH) += ich9.o tco.o
 common-obj-$(CONFIG_ACPI_CPU_HOTPLUG) += cpu_hotplug.o cpu_hotplug_acpi_table.o
 common-obj-$(CONFIG_ACPI_MEMORY_HOTPLUG) += memory_hotplug.o 
memory_hotplug_acpi_table.o
-common-obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
+obj-$(CONFIG_ACPI_NVDIMM) += nvdimm.o
 common-obj-$(CONFIG_ACPI) += acpi_interface.o
 common-obj-$(CONFIG_ACPI) += bios-linker-loader.o
 common-obj-$(CONFIG_ACPI) += aml-build.o
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 49ee68e..8568b20 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -29,6 +29,7 @@
 #include "qemu/osdep.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/aml-build.h"
+#include "hw/nvram/fw_cfg.h"
 #include "hw/mem/nvdimm.h"
 
 static int nvdimm_plugged_device_list(Object *obj, void *opaque)
@@ -370,6 +371,40 @@ static void nvdimm_build_nfit(GSList *device_list, GArray 
*table_offsets,
 g_array_free(structures, true);
 }
 
+static uint64_t
+nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
+{
+return 0;
+}
+
+static void
+nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
+{
+}
+
+static const MemoryRegionOps nvdimm_dsm_ops = {
+.read = nvdimm_dsm_read,
+.write = nvdimm_dsm_write,
+.endianness = DEVICE_LITTLE_ENDIAN,
+.valid = {
+.min_access_size = 4,
+.max_access_size = 4,
+},
+};
+
+void nvdimm_init_acpi_state(AcpiNVDIMMState *state, MemoryRegion *io,
+FWCfgState *fw_cfg, Object *owner)
+{
+memory_region_init_io(&state->io_mr, owner, &nvdimm_dsm_ops, state,
+  "nvdimm-acpi-io", NVDIMM_ACPI_IO_LEN);
+memory_region_add_subregion(io, NVDIMM_ACPI_IO_BASE, &state->io_mr);
+
+state->dsm_mem = g_array_new(false, true /* clear */, 1);
+acpi_data_push(state->dsm_mem, TARGET_PAGE_SIZE);
+fw_cfg_add_file(fw_cfg, NVDIMM_DSM_MEM_FILE, state->dsm_mem->data,
+state->dsm_mem->len);
+}
+
 #define NVDIMM_COMMON_DSM  "NCAL"
 
 static void nvdimm_build_common_dsm(Aml *dev)
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 6d8d23b..f8ff89d 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -38,7 +38,6 @@
 #include "hw/loader.h"
 #include "hw/isa/isa.h"
 #include "hw/acpi/memory_hotplug.h"
-#include "hw/mem/nvdimm.h"
 #include "sysemu/tpm.h"
 #include "hw/acpi/tpm.h"
 #include "sysemu/tpm_backend.h"
@@ -2582,13 +2581,6 @@ static bool acpi_has_iommu(void)
 return intel_iommu && !ambiguous;
 }
 
-static bool acpi_has_nvdimm(void)
-{
-PCMachineState *pcms = PC_MACHINE(qdev_get_machine());
-
-return pcms->nvdimm;
-}
-
 static
 void acpi_build(AcpiBuildTables *tables)
 {
@@ -2673,7 +2665,7 @@ void acpi_build(AcpiBuildTables *tables)
 build_dmar_q35(tables_blob, tables->linker);
 }
 
-if (acpi_has_nvdimm()) {
+if (pcms->acpi_nvdimm_state.is_enabled) {
 nvdimm_build_acpi(table_offsets, tables_blob, tables->linker);
 }
 
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0aeefd2..5194acd 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1854,14 +1854,14 @@ static bool pc_machine_get_nvdimm(Object *obj, Error 
**errp)
 {
 PCMachineState *pcms = PC_MACHINE(obj);
 
-return pcms->nvdimm;
+return pcms->acpi_nvdimm_state.is_enabled;
 }
 
 static void pc_machine_set_nvdimm(Object *obj, bool value, Error **errp)
 {
 PCMachineState *pcms = PC_MACHINE(obj);
 
-pcms->nvdimm = value;
+pcms->acpi_nvdimm_state.is_enabled = value;
 }
 
 static void pc_machine_initfn(Object *obj)
@@ -1900,7 +1900,7 @@ static void pc_machine_initfn(Object *obj)
 &error_abort);
 
 /* nvdimm is disabled on default. */
-pcms->nvdimm = false;
+pcms->acpi_nvdimm_state.is_enabled = false;
 object_property_add_bool(obj, PC_MACHINE_NVDIMM, pc_machine_get_nvdimm,
  pc_machine_set_nvdimm, &error_abort);
 }
diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index 6f8c2cd..6a69b23 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -2

[Qemu-devel] [PATCH 5/9] acpi: add build_append_named_dword, returning an offset in buffer

2016-03-01 Thread Xiao Guangrong

From: "Michael S. Tsirkin" 

This is a very limited form of support for runtime patching -
similar in functionality to what we can do with ACPI_EXTRACT
macros in python, but implemented in C.

This is to allow ACPI code direct access to data tables -
which is exactly what DataTableRegion is there for, except
no known windows release so far implements DataTableRegion.

Signed-off-by: Michael S. Tsirkin 
Signed-off-by: Xiao Guangrong 
---
 hw/acpi/aml-build.c | 28 
 include/hw/acpi/aml-build.h |  3 +++
 2 files changed, 31 insertions(+)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index f26fa26..ab89ca6 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -258,6 +258,34 @@ static void build_append_int(GArray *table, uint64_t value)
 }
 }
 
+/*
+ * Build NAME(, 0x) where 0x is encoded as a dword,
+ * and return the offset to 0x for runtime patching.
+ *
+ * Warning: runtime patching is best avoided. Only use this as
+ * a replacement for DataTableRegion (for guests that don't
+ * support it).
+ */
+int
+build_append_named_dword(GArray *array, const char *name_format, ...)
+{
+int offset;
+va_list ap;
+
+build_append_byte(array, 0x08); /* NameOp */
+va_start(ap, name_format);
+build_append_namestringv(array, name_format, ap);
+va_end(ap);
+
+build_append_byte(array, 0x0C); /* DWordPrefix */
+
+offset = array->len;
+build_append_int_noprefix(array, 0x, 4);
+assert(array->len == offset + 4);
+
+return offset;
+}
+
 static GPtrArray *alloc_list;
 
 static Aml *aml_alloc(void)
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 07b2d48..7404e2a 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -370,4 +370,7 @@ void
 build_rsdt(GArray *table_data, GArray *linker, GArray *table_offsets,
const char *oem_id, const char *oem_table_id);
 
+int
+build_append_named_dword(GArray *array, const char *name_format, ...);
+
 #endif
-- 
1.8.3.1

[Qemu-devel] [PATCH 2/9] acpi: add aml_concatenate()

2016-03-01 Thread Xiao Guangrong

It will be used by nvdimm acpi

Signed-off-by: Xiao Guangrong 
---
 hw/acpi/aml-build.c | 7 +++
 include/hw/acpi/aml-build.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index 45b7f0a..bb0cf52 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -1437,6 +1437,13 @@ Aml *aml_alias(const char *source_object, const char 
*alias_object)
 return var;
 }
 
+/* ACPI 1.0b: 16.2.5.4 Type 2 Opcodes Encoding: DefConcat */
+Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target)
+{
+return build_opcode_2arg_dst(0x73 /* ConcatOp */, source1, source2,
+ target);
+}
+
 void
 build_header(GArray *linker, GArray *table_data,
  AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 8ef10ad..735c34a 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -355,6 +355,7 @@ Aml *aml_touuid(const char *uuid);
 Aml *aml_unicode(const char *str);
 Aml *aml_derefof(Aml *arg);
 Aml *aml_sizeof(Aml *arg);
+Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target);
 
 void
 build_header(GArray *linker, GArray *table_data,
-- 
1.8.3.1

[Qemu-devel] [PATCH 9/9] nvdimm acpi: add _CRS

2016-03-01 Thread Xiao Guangrong

As Igor suggested that we can report the BIOS patched operation region
so that OSPM could see that particular range is in use and be able to
notice conflicts if it happens some day

Signed-off-by: Xiao Guangrong 
---
 hw/acpi/nvdimm.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index e0b483a..a6359cc 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -566,6 +566,7 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray 
*table_offsets,
   GArray *table_data, GArray *linker)
 {
 Aml *ssdt, *sb_scope, *dev, *field;
+Aml *min_addr, *max_addr, *mr32, *method, *crs;
 int mem_addr_offset, nvdimm_ssdt;
 
 acpi_add_table(table_offsets, table_data);
@@ -590,6 +591,32 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray 
*table_offsets,
  */
 aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0012")));
 
+/*
+ * report the dsm memory so that OSPM could see that particular range is
+ * in use and be able to notice conflicts if it happens some day.
+ */
+method = aml_method("_CRS", 0, AML_SERIALIZED);
+crs = aml_resource_template();
+aml_append(crs, aml_dword_memory(AML_POS_DECODE, AML_MIN_FIXED,
+ AML_MAX_FIXED, AML_CACHEABLE,
+ AML_READ_WRITE,
+ 0, 0x0, 0xFFFE, 0,
+ TARGET_PAGE_SIZE));
+aml_append(method, aml_name_decl("MR32", crs));
+mr32 = aml_name("MR32");
+aml_append(method, aml_create_dword_field(mr32, aml_int(10), "MIN"));
+aml_append(method, aml_create_dword_field(mr32, aml_int(14), "MAX"));
+
+min_addr = aml_name("MIN");
+max_addr = aml_name("MAX");
+
+aml_append(method, aml_store(aml_name(NVDIMM_ACPI_MEM_ADDR), min_addr));
+aml_append(method, aml_add(min_addr, aml_int(TARGET_PAGE_SIZE),
+   max_addr));
+aml_append(method, aml_decrement(max_addr));
+aml_append(method, aml_return(mr32));
+aml_append(dev, method);
+
 /* map DSM memory and IO into ACPI namespace. */
 aml_append(dev, aml_operation_region("NPIO", AML_SYSTEM_IO,
aml_int(NVDIMM_ACPI_IO_BASE), NVDIMM_ACPI_IO_LEN));
-- 
1.8.3.1

[Qemu-devel] [PATCH 6/9] nvdimm acpi: introduce patched dsm memory

2016-03-01 Thread Xiao Guangrong

The dsm memory is used to save the input parameters and store
the dsm result which is filled by QEMU.

The address of dsm memory is decided by bios and patched into
int32 object named "MEMA"

Signed-off-by: Xiao Guangrong 
---
 hw/acpi/nvdimm.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 8568b20..90032e5 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -29,6 +29,7 @@
 #include "qemu/osdep.h"
 #include "hw/acpi/acpi.h"
 #include "hw/acpi/aml-build.h"
+#include "hw/acpi/bios-linker-loader.h"
 #include "hw/nvram/fw_cfg.h"
 #include "hw/mem/nvdimm.h"
 
@@ -406,6 +407,7 @@ void nvdimm_init_acpi_state(AcpiNVDIMMState *state, 
MemoryRegion *io,
 }
 
 #define NVDIMM_COMMON_DSM  "NCAL"
+#define NVDIMM_ACPI_MEM_ADDR   "MEMA"
 
 static void nvdimm_build_common_dsm(Aml *dev)
 {
@@ -471,6 +473,7 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray 
*table_offsets,
   GArray *table_data, GArray *linker)
 {
 Aml *ssdt, *sb_scope, *dev;
+int mem_addr_offset, nvdimm_ssdt;
 
 acpi_add_table(table_offsets, table_data);
 
@@ -500,13 +503,24 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray 
*table_offsets,
 nvdimm_build_nvdimm_devices(device_list, dev);
 
 aml_append(sb_scope, dev);
-
 aml_append(ssdt, sb_scope);
+
+nvdimm_ssdt = table_data->len;
+
 /* copy AML table into ACPI tables blob and patch header there */
 g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len);
+mem_addr_offset = build_append_named_dword(table_data,
+   NVDIMM_ACPI_MEM_ADDR);
+
+bios_linker_loader_alloc(linker, NVDIMM_DSM_MEM_FILE, TARGET_PAGE_SIZE,
+ false /* high memory */);
+bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE,
+   NVDIMM_DSM_MEM_FILE, table_data,
+   table_data->data + mem_addr_offset,
+   sizeof(uint32_t));
 build_header(linker, table_data,
-(void *)(table_data->data + table_data->len - ssdt->buf->len),
-"SSDT", ssdt->buf->len, 1, NULL, "NVDIMM");
+(void *)(table_data->data + nvdimm_ssdt),
+"SSDT", table_data->len - nvdimm_ssdt, 1, NULL, "NVDIMM");
 free_aml_allocator();
 }
 
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH 1/2] hw/mips: add initial Cluster Power Controller support

2016-03-01 Thread Leon Alrae

On 26/02/16 16:49, Peter Maydell wrote:
> On 26 February 2016 at 16:19, Leon Alrae  wrote:
>> Cluster Power Controller (CPC) is responsible for power management in
>> multiprocessing system. It provides registers to control the power and the
>> clock frequency of the individual elements in the system.
>>
>> This patch implements only three registers that are used to control the
>> power state of each VP on a single core:
>> * VP Run is a write-only register used to set each VP to the run state
>> * VP Stop is a write-only register used to set each VP to the suspend state
>> * VP Running is a read-only register indicating the run state of each VP
>>
>> Signed-off-by: Leon Alrae 
>> ---
>>  default-configs/mips-softmmu.mak |   1 +
>>  default-configs/mips64-softmmu.mak   |   1 +
>>  default-configs/mips64el-softmmu.mak |   1 +
>>  default-configs/mipsel-softmmu.mak   |   1 +
> 
> A separate thing, but maybe it would be worth having a mips-softmmu-common.mak
> that all the mips*-softmmu.mak include to avoid having to repeat CONFIG 
> defines
> in four places like this.

Good idea. I'll prepare a separate patch for that.

And the comments below will be addressed in v2 (it'll also include few
extra changes).

Thanks,
Leon

> 
>> +static void mips_cpc_init(Object *obj)
>> +{
>> +SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
>> +MIPSCPCState *s = MIPS_CPC(obj);
>> +
>> +memory_region_init_io(&s->mr, OBJECT(s), &cpc_ops, s, "mips-cpc",
>> +  CPC_ADDRSPACE_SZ);
>> +sysbus_init_mmio(sbd, &s->mr);
>> +}
>> +
>> +static const TypeInfo mips_cpc_info = {
>> +.name  = TYPE_MIPS_CPC,
>> +.parent= TYPE_SYS_BUS_DEVICE,
>> +.instance_size = sizeof(MIPSCPCState),
>> +.instance_init = mips_cpc_init,
>> +};
> 
> I suspect you need a reset method.
> 
>> +
>> +static void mips_cpc_register_types(void)
>> +{
>> +type_register_static(&mips_cpc_info);
>> +}
>> +
>> +type_init(mips_cpc_register_types)
> 
>> +typedef struct MIPSCPCState {
>> +SysBusDevice parent_obj;
>> +
>> +MemoryRegion mr;
>> +uint64_t vp_running; /* Indicates which VP's are in the run state */
> 
> This is state, so you need a VMState structure to migrate it correctly.
> 
>> +} MIPSCPCState;
>> +
>> +#endif /* MIPS_CPC_H */
>> --
>> 2.1.0
> 
> thanks
> -- PMM
>

[Qemu-devel] [PATCH 7/9] nvdimm acpi: let qemu handle _DSM method

2016-03-01 Thread Xiao Guangrong

If dsm memory is successfully patched, we let qemu fully emulate
the dsm method

This patch saves _DSM input parameters into dsm memory, tell dsm
memory address to QEMU, then fetch the result from the dsm memory

Signed-off-by: Xiao Guangrong 
---
 hw/acpi/nvdimm.c | 117 ---
 1 file changed, 112 insertions(+), 5 deletions(-)

diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 90032e5..781f6c1 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -372,6 +372,24 @@ static void nvdimm_build_nfit(GSList *device_list, GArray 
*table_offsets,
 g_array_free(structures, true);
 }
 
+struct NvdimmDsmIn {
+uint32_t handle;
+uint32_t revision;
+uint32_t function;
+   /* the remaining size in the page is used by arg3. */
+union {
+uint8_t arg3[0];
+};
+} QEMU_PACKED;
+typedef struct NvdimmDsmIn NvdimmDsmIn;
+
+struct NvdimmDsmOut {
+/* the size of buffer filled by QEMU. */
+uint32_t len;
+uint8_t data[0];
+} QEMU_PACKED;
+typedef struct NvdimmDsmOut NvdimmDsmOut;
+
 static uint64_t
 nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
 {
@@ -411,11 +429,18 @@ void nvdimm_init_acpi_state(AcpiNVDIMMState *state, 
MemoryRegion *io,
 
 static void nvdimm_build_common_dsm(Aml *dev)
 {
-Aml *method, *ifctx, *function;
+Aml *method, *ifctx, *function, *dsm_mem, *unpatched, *result_size;
 uint8_t byte_list[1];
 
-method = aml_method(NVDIMM_COMMON_DSM, 4, AML_NOTSERIALIZED);
+method = aml_method(NVDIMM_COMMON_DSM, 4, AML_SERIALIZED);
 function = aml_arg(2);
+dsm_mem = aml_name(NVDIMM_ACPI_MEM_ADDR);
+
+/*
+ * do not support any method if DSM memory address has not been
+ * patched.
+ */
+unpatched = aml_if(aml_equal(dsm_mem, aml_int(0x0)));
 
 /*
  * function 0 is called to inquire what functions are supported by
@@ -424,12 +449,36 @@ static void nvdimm_build_common_dsm(Aml *dev)
 ifctx = aml_if(aml_equal(function, aml_int(0)));
 byte_list[0] = 0 /* No function Supported */;
 aml_append(ifctx, aml_return(aml_buffer(1, byte_list)));
-aml_append(method, ifctx);
+aml_append(unpatched, ifctx);
 
 /* No function is supported yet. */
 byte_list[0] = 1 /* Not Supported */;
-aml_append(method, aml_return(aml_buffer(1, byte_list)));
+aml_append(unpatched, aml_return(aml_buffer(1, byte_list)));
+aml_append(method, unpatched);
+
+/*
+ * Currently no function is supported for both root device and NVDIMM
+ * devices, let's temporarily set handle to 0x0 at this time.
+ */
+aml_append(method, aml_store(aml_int(0x0), aml_name("HDLE")));
+aml_append(method, aml_store(aml_arg(1), aml_name("REVS")));
+aml_append(method, aml_store(aml_arg(2), aml_name("FUNC")));
 
+/*
+ * tell QEMU about the real address of DSM memory, then QEMU begins
+ * to emulate the method and fills the result to DSM memory.
+ */
+aml_append(method, aml_store(dsm_mem, aml_name("NTFI")));
+
+result_size = aml_local(1);
+aml_append(method, aml_store(aml_name("RLEN"), result_size));
+aml_append(method, aml_store(aml_shiftleft(result_size, aml_int(3)),
+ result_size));
+aml_append(method, aml_create_field(aml_name("ODAT"), aml_int(0),
+result_size, "OBUF"));
+aml_append(method, aml_concatenate(aml_buffer(0, NULL), aml_name("OBUF"),
+   aml_arg(6)));
+aml_append(method, aml_return(aml_arg(6)));
 aml_append(dev, method);
 }
 
@@ -472,7 +521,7 @@ static void nvdimm_build_nvdimm_devices(GSList 
*device_list, Aml *root_dev)
 static void nvdimm_build_ssdt(GSList *device_list, GArray *table_offsets,
   GArray *table_data, GArray *linker)
 {
-Aml *ssdt, *sb_scope, *dev;
+Aml *ssdt, *sb_scope, *dev, *field;
 int mem_addr_offset, nvdimm_ssdt;
 
 acpi_add_table(table_offsets, table_data);
@@ -497,6 +546,64 @@ static void nvdimm_build_ssdt(GSList *device_list, GArray 
*table_offsets,
  */
 aml_append(dev, aml_name_decl("_HID", aml_string("ACPI0012")));
 
+/* map DSM memory and IO into ACPI namespace. */
+aml_append(dev, aml_operation_region("NPIO", AML_SYSTEM_IO,
+   aml_int(NVDIMM_ACPI_IO_BASE), NVDIMM_ACPI_IO_LEN));
+aml_append(dev, aml_operation_region("NRAM", AML_SYSTEM_MEMORY,
+   aml_name(NVDIMM_ACPI_MEM_ADDR), TARGET_PAGE_SIZE));
+
+/*
+ * DSM notifier:
+ * NTFI: write the address of DSM memory and notify QEMU to emulate
+ *   the access.
+ *
+ * It is the IO port so that accessing them will cause VM-exit, the
+ * control will be transferred to QEMU.
+ */
+field = aml_field("NPIO", AML_DWORD_ACC, AML_NOLOCK, AML_PRESERVE);
+aml_append(field, aml_named_field("NTFI",
+   sizeof(uint32_t) * BITS_PER_BYTE));
+aml_append(dev, field);
+
+/*
+ * DSM input:
+

[Qemu-devel] [PATCH 3/9] acpi: allow using object as offset for OperationRegion

2016-03-01 Thread Xiao Guangrong

Extend aml_operation_region() to use object as offset

Reviewed-by: Igor Mammedov 
Signed-off-by: Xiao Guangrong 
---
 hw/acpi/aml-build.c |  4 ++--
 hw/i386/acpi-build.c| 31 ---
 include/hw/acpi/aml-build.h |  2 +-
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index bb0cf52..f26fa26 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -942,14 +942,14 @@ Aml *aml_package(uint8_t num_elements)
 
 /* ACPI 1.0b: 16.2.5.2 Named Objects Encoding: DefOpRegion */
 Aml *aml_operation_region(const char *name, AmlRegionSpace rs,
-  uint32_t offset, uint32_t len)
+  Aml *offset, uint32_t len)
 {
 Aml *var = aml_alloc();
 build_append_byte(var->buf, 0x5B); /* ExtOpPrefix */
 build_append_byte(var->buf, 0x80); /* OpRegionOp */
 build_append_namestring(var->buf, "%s", name);
 build_append_byte(var->buf, rs);
-build_append_int(var->buf, offset);
+aml_append(var, offset);
 build_append_int(var->buf, len);
 return var;
 }
diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index b654b0d..6d8d23b 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -993,7 +993,7 @@ static void build_processor_devices(Aml *sb_scope, unsigned 
acpi_cpus,
 aml_append(sb_scope, dev);
 /* declare CPU hotplug MMIO region and PRS field to access it */
 aml_append(sb_scope, aml_operation_region(
-"PRST", AML_SYSTEM_IO, pm->cpu_hp_io_base, pm->cpu_hp_io_len));
+"PRST", AML_SYSTEM_IO, aml_int(pm->cpu_hp_io_base), 
pm->cpu_hp_io_len));
 field = aml_field("PRST", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
 aml_append(field, aml_named_field("PRS", 256));
 aml_append(sb_scope, field);
@@ -1078,7 +1078,7 @@ static void build_memory_devices(Aml *sb_scope, int 
nr_mem,
 
 aml_append(scope, aml_operation_region(
 MEMORY_HOTPLUG_IO_REGION, AML_SYSTEM_IO,
-io_base, io_len)
+aml_int(io_base), io_len)
 );
 
 field = aml_field(MEMORY_HOTPLUG_IO_REGION, AML_DWORD_ACC,
@@ -1192,7 +1192,8 @@ static void build_hpet_aml(Aml *table)
 aml_append(dev, aml_name_decl("_UID", zero));
 
 aml_append(dev,
-aml_operation_region("HPTM", AML_SYSTEM_MEMORY, HPET_BASE, HPET_LEN));
+aml_operation_region("HPTM", AML_SYSTEM_MEMORY, aml_int(HPET_BASE),
+ HPET_LEN));
 field = aml_field("HPTM", AML_DWORD_ACC, AML_LOCK, AML_PRESERVE);
 aml_append(field, aml_named_field("VEND", 32));
 aml_append(field, aml_named_field("PRD", 32));
@@ -1430,7 +1431,7 @@ static void build_dbg_aml(Aml *table)
 Aml *idx = aml_local(2);
 
 aml_append(scope,
-   aml_operation_region("DBG", AML_SYSTEM_IO, 0x0402, 0x01));
+   aml_operation_region("DBG", AML_SYSTEM_IO, aml_int(0x0402), 0x01));
 field = aml_field("DBG", AML_BYTE_ACC, AML_NOLOCK, AML_PRESERVE);
 aml_append(field, aml_named_field("DBGB", 8));
 aml_append(scope, field);
@@ -1770,10 +1771,10 @@ static void build_q35_isa_bridge(Aml *table)
 
 /* ICH9 PCI to ISA irq remapping */
 aml_append(dev, aml_operation_region("PIRQ", AML_PCI_CONFIG,
- 0x60, 0x0C));
+ aml_int(0x60), 0x0C));
 
 aml_append(dev, aml_operation_region("LPCD", AML_PCI_CONFIG,
- 0x80, 0x02));
+ aml_int(0x80), 0x02));
 field = aml_field("LPCD", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
 aml_append(field, aml_named_field("COMA", 3));
 aml_append(field, aml_reserved_field(1));
@@ -1785,7 +1786,7 @@ static void build_q35_isa_bridge(Aml *table)
 aml_append(dev, field);
 
 aml_append(dev, aml_operation_region("LPCE", AML_PCI_CONFIG,
- 0x82, 0x02));
+ aml_int(0x82), 0x02));
 /* enable bits */
 field = aml_field("LPCE", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
 aml_append(field, aml_named_field("CAEN", 1));
@@ -1808,7 +1809,7 @@ static void build_piix4_pm(Aml *table)
 aml_append(dev, aml_name_decl("_ADR", aml_int(0x00010003)));
 
 aml_append(dev, aml_operation_region("P13C", AML_PCI_CONFIG,
- 0x00, 0xff));
+ aml_int(0x00), 0xff));
 aml_append(scope, dev);
 aml_append(table, scope);
 }
@@ -1825,7 +1826,7 @@ static void build_piix4_isa_bridge(Aml *table)
 
 /* PIIX PCI to ISA irq remapping */
 aml_append(dev, aml_operation_region("P40C", AML_PCI_CONFIG,
- 0x60, 0x04));
+ aml_int(0x60), 0x04));
 /* enable bits */
 field = aml_field("^PX13.P13C", AML_ANY_ACC, AML_NOLOCK, AML_PRESERVE);
 /* Offset(0x5f),, 7, */
@@ -1854,20 +1855,20 @@ static void build_piix4_pc

[Qemu-devel] [PATCH 8/9] nvdimm acpi: emulate dsm method

2016-03-01 Thread Xiao Guangrong

Emulate dsm method after IO VM-exit

Currently, we only introduce the framework and no function is actually
supported

Signed-off-by: Xiao Guangrong 
---
 hw/acpi/aml-build.c |  2 +-
 hw/acpi/nvdimm.c| 44 
 include/hw/acpi/aml-build.h |  1 +
 include/hw/mem/nvdimm.h |  8 
 4 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/hw/acpi/aml-build.c b/hw/acpi/aml-build.c
index ab89ca6..da11bf8 100644
--- a/hw/acpi/aml-build.c
+++ b/hw/acpi/aml-build.c
@@ -227,7 +227,7 @@ static void build_extop_package(GArray *package, uint8_t op)
 build_prepend_byte(package, 0x5B); /* ExtOpPrefix */
 }
 
-static void build_append_int_noprefix(GArray *table, uint64_t value, int size)
+void build_append_int_noprefix(GArray *table, uint64_t value, int size)
 {
 int i;
 
diff --git a/hw/acpi/nvdimm.c b/hw/acpi/nvdimm.c
index 781f6c1..e0b483a 100644
--- a/hw/acpi/nvdimm.c
+++ b/hw/acpi/nvdimm.c
@@ -393,12 +393,56 @@ typedef struct NvdimmDsmOut NvdimmDsmOut;
 static uint64_t
 nvdimm_dsm_read(void *opaque, hwaddr addr, unsigned size)
 {
+fprintf(stderr, "BUG: we never read _DSM IO Port.\n");
 return 0;
 }
 
 static void
 nvdimm_dsm_write(void *opaque, hwaddr addr, uint64_t val, unsigned size)
 {
+NvdimmDsmIn *in;
+GArray *out;
+uint32_t buf_size;
+hwaddr dsm_mem_addr = val;
+
+nvdimm_debug("dsm memory address %#lx.\n", dsm_mem_addr);
+
+/*
+ * The DSM memory is mapped to guest address space so an evil guest
+ * can change its content while we are doing DSM emulation. Avoid
+ * this by copying DSM memory to QEMU local memory.
+ */
+in = g_malloc(TARGET_PAGE_SIZE);
+cpu_physical_memory_read(dsm_mem_addr, in, TARGET_PAGE_SIZE);
+
+le32_to_cpus(&in->revision);
+le32_to_cpus(&in->function);
+le32_to_cpus(&in->handle);
+
+nvdimm_debug("Revision %#x Handler %#x Function %#x.\n", in->revision,
+ in->handle, in->function);
+
+out = g_array_new(false, true /* clear */, 1);
+
+/*
+ * function 0 is called to inquire what functions are supported by
+ * OSPM
+ */
+if (in->function == 0) {
+build_append_int_noprefix(out, 0 /* No function Supported */,
+  sizeof(uint8_t));
+} else {
+/* No function is supported yet. */
+build_append_int_noprefix(out, 1 /* Not Supported */,
+  sizeof(uint8_t));
+}
+
+buf_size = cpu_to_le32(out->len);
+cpu_physical_memory_write(dsm_mem_addr, &buf_size, sizeof(buf_size));
+cpu_physical_memory_write(dsm_mem_addr + sizeof(buf_size), out->data,
+  out->len);
+g_free(in);
+g_array_free(out, true);
 }
 
 static const MemoryRegionOps nvdimm_dsm_ops = {
diff --git a/include/hw/acpi/aml-build.h b/include/hw/acpi/aml-build.h
index 7404e2a..b0826f0 100644
--- a/include/hw/acpi/aml-build.h
+++ b/include/hw/acpi/aml-build.h
@@ -357,6 +357,7 @@ Aml *aml_derefof(Aml *arg);
 Aml *aml_sizeof(Aml *arg);
 Aml *aml_concatenate(Aml *source1, Aml *source2, Aml *target);
 
+void build_append_int_noprefix(GArray *table, uint64_t value, int size);
 void
 build_header(GArray *linker, GArray *table_data,
  AcpiTableHeader *h, const char *sig, int len, uint8_t rev,
diff --git a/include/hw/mem/nvdimm.h b/include/hw/mem/nvdimm.h
index 634c60b..aaa2608 100644
--- a/include/hw/mem/nvdimm.h
+++ b/include/hw/mem/nvdimm.h
@@ -25,6 +25,14 @@
 
 #include "hw/mem/pc-dimm.h"
 
+#define NVDIMM_DEBUG 0
+#define nvdimm_debug(fmt, ...)\
+do {  \
+if (NVDIMM_DEBUG) {   \
+fprintf(stderr, "nvdimm: " fmt, ## __VA_ARGS__);  \
+} \
+} while (0)
+
 #define TYPE_NVDIMM "nvdimm"
 
 #define NVDIMM_DSM_MEM_FILE "etc/acpi/nvdimm-mem"
-- 
1.8.3.1

Re: [Qemu-devel] [PATCH 03/38] event_notifier: Make event_notifier_init_fd() #ifdef CONFIG_EVENTFD

2016-03-01 Thread Marc-André Lureau

Hi

On Mon, Feb 29, 2016 at 7:40 PM, Markus Armbruster  wrote:
> Event notifiers are designed for eventfd(2).  They can fall back to
> pipes, but according to Paolo, event_notifier_init_fd() really
> requires the real thing, and should therefore be under #ifdef
> CONFIG_EVENTFD.  Do that.
>
> Its only user is ivshmem, which is currently CONFIG_POSIX.  Narrow it
> to CONFIG_EVENTFD.
>
> Cc: Paolo Bonzini 
> Signed-off-by: Markus Armbruster 
> ---
>  default-configs/pci.mak | 2 +-
>  util/event_notifier-posix.c | 6 ++
>  2 files changed, 7 insertions(+), 1 deletion(-)
>
> diff --git a/default-configs/pci.mak b/default-configs/pci.mak
> index 4fa9a28..9c8bc68 100644
> --- a/default-configs/pci.mak
> +++ b/default-configs/pci.mak
> @@ -36,5 +36,5 @@ CONFIG_SDHCI=y
>  CONFIG_EDU=y
>  CONFIG_VGA=y
>  CONFIG_VGA_PCI=y
> -CONFIG_IVSHMEM=$(CONFIG_POSIX)
> +CONFIG_IVSHMEM=$(CONFIG_EVENTFD)

This narrows ivshmem to eventfd os only. Eventually after the split,
it is easier to bring back posix for ivshmem-plain, but it's important
to highlight this change.

>  CONFIG_ROCKER=y
> diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
> index 2e30e74..c9657a6 100644
> --- a/util/event_notifier-posix.c
> +++ b/util/event_notifier-posix.c
> @@ -20,11 +20,17 @@
>  #include 
>  #endif
>
> +#ifdef CONFIG_EVENTFD
> +/*
> + * Initialize @e with existing file descriptor @fd.
> + * @fd must be a genuine eventfd object, emulation with pipe won't do.
> + */
>  void event_notifier_init_fd(EventNotifier *e, int fd)
>  {
>  e->rfd = fd;
>  e->wfd = fd;
>  }
> +#endif
>
>  int event_notifier_init(EventNotifier *e, int active)
>  {
> --
> 2.4.3
>
>



-- 
Marc-André Lureau

Re: [Qemu-devel] [PATCH 04/38] tests/libqos/pci-pc: Fix qpci_pc_iomap() to map BARs aligned

2016-03-01 Thread Marc-André Lureau

On Mon, Feb 29, 2016 at 7:40 PM, Markus Armbruster  wrote:
> qpci_pc_iomap() maps BARs one after the other, without padding.  This
> is wrong.  PCI Local Bus Specification Revision 3.0, 6.2.5.1. Address
> Maps: "all address spaces used are a power of two in size and are
> naturally aligned".  That's because the size of a BAR is given by the
> number of address bits the device decodes, and the BAR needs to be
> mapped at a multiple of that size to ensure the address decoding
> works.
>
> Fix qpci_pc_iomap() accordingly.  This takes care of a FIXME in
> ivshmem-test.
>
> Signed-off-by: Markus Armbruster 

Reviewed-by: Marc-André Lureau 

Neat, thanks for fixing my fixme ;)

> ---
>  tests/ivshmem-test.c  | 17 -
>  tests/libqos/pci-pc.c |  8 ++--
>  2 files changed, 14 insertions(+), 11 deletions(-)
>
> diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c
> index e184c67..e118377 100644
> --- a/tests/ivshmem-test.c
> +++ b/tests/ivshmem-test.c
> @@ -110,19 +110,18 @@ static void setup_vm_cmd(IVState *s, const char *cmd, 
> bool msix)
>  s->pcibus = qpci_init_pc();
>  s->dev = get_device(s->pcibus);
>
> -/* FIXME: other bar order fails, mappings changes */
> -s->mem_base = qpci_iomap(s->dev, 2, &barsize);
> -g_assert_nonnull(s->mem_base);
> -g_assert_cmpuint(barsize, ==, TMPSHMSIZE);
> -
> -if (msix) {
> -qpci_msix_enable(s->dev);
> -}
> -
>  s->reg_base = qpci_iomap(s->dev, 0, &barsize);
>  g_assert_nonnull(s->reg_base);
>  g_assert_cmpuint(barsize, ==, 256);
>
> +if (msix) {
> +qpci_msix_enable(s->dev);
> +}
> +
> +s->mem_base = qpci_iomap(s->dev, 2, &barsize);
> +g_assert_nonnull(s->mem_base);
> +g_assert_cmpuint(barsize, ==, TMPSHMSIZE);
> +
>  qpci_device_enable(s->dev);
>  }
>
> diff --git a/tests/libqos/pci-pc.c b/tests/libqos/pci-pc.c
> index 08167c0..77f15e5 100644
> --- a/tests/libqos/pci-pc.c
> +++ b/tests/libqos/pci-pc.c
> @@ -184,7 +184,9 @@ static void *qpci_pc_iomap(QPCIBus *bus, QPCIDevice *dev, 
> int barno, uint64_t *s
>  if (io_type == PCI_BASE_ADDRESS_SPACE_IO) {
>  uint16_t loc;
>
> -g_assert((s->pci_iohole_alloc + size) <= s->pci_iohole_size);
> +g_assert(QEMU_ALIGN_UP(s->pci_iohole_alloc, size) + size
> + <= s->pci_iohole_size);
> +s->pci_iohole_alloc = QEMU_ALIGN_UP(s->pci_iohole_alloc, size);
>  loc = s->pci_iohole_start + s->pci_iohole_alloc;
>  s->pci_iohole_alloc += size;
>
> @@ -194,7 +196,9 @@ static void *qpci_pc_iomap(QPCIBus *bus, QPCIDevice *dev, 
> int barno, uint64_t *s
>  } else {
>  uint64_t loc;
>
> -g_assert((s->pci_hole_alloc + size) <= s->pci_hole_size);
> +g_assert(QEMU_ALIGN_UP(s->pci_hole_alloc, size) + size
> + <= s->pci_hole_size);
> +s->pci_hole_alloc = QEMU_ALIGN_UP(s->pci_hole_alloc, size);
>  loc = s->pci_hole_start + s->pci_hole_alloc;
>  s->pci_hole_alloc += size;
>
> --
> 2.4.3
>
>



-- 
Marc-André Lureau

Re: [Qemu-devel] [PATCH 05/38] ivshmem-test: Improve test case /ivshmem/single

2016-03-01 Thread Marc-André Lureau

On Mon, Feb 29, 2016 at 7:40 PM, Markus Armbruster  wrote:
> Test state of registers after reset.
>
> Test reading Interrupt Status clears it.
>
> Test (invalid) read of Doorbell.
>
> Add more comments.
>
> Signed-off-by: Markus Armbruster 

Reviewed-by: Marc-André Lureau 


> ---
>  tests/ivshmem-test.c | 23 ---
>  1 file changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c
> index e118377..ba4d9f1 100644
> --- a/tests/ivshmem-test.c
> +++ b/tests/ivshmem-test.c
> @@ -143,32 +143,41 @@ static void test_ivshmem_single(void)
>  setup_vm(&state);
>  s = &state;
>
> -/* valid io */
> -out_reg(s, INTRMASK, 0);
> -in_reg(s, INTRSTATUS);
> -in_reg(s, IVPOSITION);
> +/* initial state of readable registers */
> +g_assert_cmpuint(in_reg(s, INTRMASK), ==, 0);
> +g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 0);
> +g_assert_cmpuint(in_reg(s, IVPOSITION), ==, 0);
>
> +/* trigger interrupt via registers */
>  out_reg(s, INTRMASK, 0x);
>  g_assert_cmpuint(in_reg(s, INTRMASK), ==, 0x);
>  out_reg(s, INTRSTATUS, 1);
> -/* XXX: intercept IRQ, not seen in resp */
> +/* check interrupt status */
>  g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 1);
> +/* reading clears */
> +g_assert_cmpuint(in_reg(s, INTRSTATUS), ==, 0);
> +/* TODO intercept actual interrupt (needs qtest work) */
>
> -/* invalid io */
> +/* invalid register access */
>  out_reg(s, IVPOSITION, 1);
> +in_reg(s, DOORBELL);
> +
> +/* ring the (non-functional) doorbell */
>  out_reg(s, DOORBELL, 8 << 16);
>
> +/* write shared memory */
>  for (i = 0; i < G_N_ELEMENTS(data); i++) {
>  data[i] = i;
>  }
>  qtest_memwrite(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
>
> +/* verify write */
>  for (i = 0; i < G_N_ELEMENTS(data); i++) {
>  g_assert_cmpuint(((uint32_t *)tmpshmem)[i], ==, i);
>  }
>
> +/* read it back and verify read */
>  memset(data, 0, sizeof(data));
> -
>  qtest_memread(s->qtest, (uintptr_t)s->mem_base, data, sizeof(data));
>  for (i = 0; i < G_N_ELEMENTS(data); i++) {
>  g_assert_cmpuint(data[i], ==, i);
> --
> 2.4.3
>
>



-- 
Marc-André Lureau

[Qemu-devel] [PATCH v3 1/5] replay: character devices

2016-03-01 Thread Pavel Dovgalyuk

This patch implements record and replay of character devices.
It records chardevs communication in replay mode. Recorded information
include data read from backend and counter of bytes written
from frontend to backend to preserve frontend internal state.
If character device was configured through the command line in record mode,
then in replay mode it should be also added to command line. Backend of
the character device could be changed in replay mode.
Replaying of devices that perform ioctl and get_msgfd operations is not
supported.
gdbstub which also acts as a backend is not recorded to allow controlling
the replaying through gdb.

Signed-off-by: Pavel Dovgalyuk 
---
 gdbstub.c|2 -
 include/sysemu/char.h|   26 
 include/sysemu/replay.h  |   12 ++
 qemu-char.c  |   56 ---
 replay/Makefile.objs |1 
 replay/replay-char.c |   97 ++
 replay/replay-events.c   |   17 +++-
 replay/replay-internal.h |   15 +++
 replay/replay.c  |   25 +++-
 9 files changed, 240 insertions(+), 11 deletions(-)
 create mode 100755 replay/replay-char.c

diff --git a/gdbstub.c b/gdbstub.c
index 61c12b1..fdcb0ee 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1752,7 +1752,7 @@ int gdbserver_start(const char *device)
 sigaction(SIGINT, &act, NULL);
 }
 #endif
-chr = qemu_chr_new("gdb", device, NULL);
+chr = qemu_chr_new_noreplay("gdb", device, NULL);
 if (!chr)
 return -1;
 
diff --git a/include/sysemu/char.h b/include/sysemu/char.h
index e035d1c..03fc165 100644
--- a/include/sysemu/char.h
+++ b/include/sysemu/char.h
@@ -86,6 +86,7 @@ struct CharDriverState {
 int is_mux;
 guint fd_in_tag;
 QemuOpts *opts;
+bool replay;
 QTAILQ_ENTRY(CharDriverState) next;
 };
 
@@ -129,6 +130,22 @@ CharDriverState *qemu_chr_new(const char *label, const 
char *filename,
   void (*init)(struct CharDriverState *s));
 
 /**
+ * @qemu_chr_new_noreplay:
+ *
+ * Create a new character backend from a URI.
+ * Character device communications are not written
+ * into the replay log.
+ *
+ * @label the name of the backend
+ * @filename the URI
+ * @init not sure..
+ *
+ * Returns: a new character backend
+ */
+CharDriverState *qemu_chr_new_noreplay(const char *label, const char *filename,
+   void (*init)(struct CharDriverState 
*s));
+
+/**
  * @qemu_chr_delete:
  *
  * Destroy a character backend and remove it from the list of
@@ -331,6 +348,15 @@ int qemu_chr_be_can_write(CharDriverState *s);
  */
 void qemu_chr_be_write(CharDriverState *s, uint8_t *buf, int len);
 
+/**
+ * @qemu_chr_be_write_impl:
+ *
+ * Implementation of back end writing. Used by replay module.
+ *
+ * @buf a buffer to receive data from the front end
+ * @len the number of bytes to receive from the front end
+ */
+void qemu_chr_be_write_impl(CharDriverState *s, uint8_t *buf, int len);
 
 /**
  * @qemu_chr_be_event:
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index abb4688..3c4a988 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -117,4 +117,16 @@ void replay_input_event(QemuConsole *src, InputEvent *evt);
 /*! Adds input sync event to the queue */
 void replay_input_sync_event(void);
 
+/* Character device */
+
+/*! Registers char driver to save it's events */
+void replay_register_char_driver(struct CharDriverState *chr);
+/*! Saves write to char device event to the log */
+void replay_chr_be_write(struct CharDriverState *s, uint8_t *buf, int len);
+
+/* Other data */
+
+/*! Writes or reads integer value to/from replay log. */
+void replay_data_int(int *data);
+
 #endif
diff --git a/qemu-char.c b/qemu-char.c
index 927c47e..e285f9f 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -37,6 +37,7 @@
 #include "io/channel-socket.h"
 #include "io/channel-file.h"
 #include "io/channel-tls.h"
+#include "sysemu/replay.h"
 
 #include 
 
@@ -245,6 +246,9 @@ int qemu_chr_fe_write(CharDriverState *s, const uint8_t 
*buf, int len)
 qemu_chr_fe_write_log(s, buf, ret);
 }
 
+if (s->replay) {
+replay_data_int(&ret);
+}
 qemu_mutex_unlock(&s->chr_write_lock);
 return ret;
 }
@@ -318,9 +322,19 @@ int qemu_chr_fe_read_all(CharDriverState *s, uint8_t *buf, 
int len)
 
 int qemu_chr_fe_ioctl(CharDriverState *s, int cmd, void *arg)
 {
-if (!s->chr_ioctl)
-return -ENOTSUP;
-return s->chr_ioctl(s, cmd, arg);
+int res;
+if (!s->chr_ioctl) {
+res = -ENOTSUP;
+} else {
+res = s->chr_ioctl(s, cmd, arg);
+if (s->replay) {
+fprintf(stderr,
+"Replay: ioctl is not supported for serial devices yet\n");
+exit(1);
+}
+}
+
+return res;
 }
 
 int qemu_chr_be_can_write(CharDriverState *s)
@@ -330,17 +344,35 @@ int qemu_chr_be_can_write(CharDriverState *s)
 r

[Qemu-devel] [PATCH v3 5/5] replay: introduce block devices record/replay

2016-03-01 Thread Pavel Dovgalyuk

This patch introduces block driver that implement recording
and replaying of block devices' operations.
All block completion operations are added to the queue.
Queue is flushed at checkpoints and information about processed requests
is recorded to the log. In replay phase the queue is matched with
events read from the log. Therefore block devices requests are processed
deterministically.

Signed-off-by: Pavel Dovgalyuk 
---
 block/Makefile.objs  |2 -
 block/blkreplay.c|  156 ++
 docs/replay.txt  |   20 ++
 include/sysemu/replay.h  |2 +
 replay/replay-events.c   |   24 ++-
 replay/replay-internal.h |1 
 stubs/replay.c   |4 +
 7 files changed, 206 insertions(+), 3 deletions(-)
 create mode 100755 block/blkreplay.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index 58ef2ef..38fea16 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -4,7 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o 
qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
-block-obj-y += parallels.o blkdebug.o blkverify.o
+block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
 block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/blkreplay.c b/block/blkreplay.c
new file mode 100755
index 000..96c189a
--- /dev/null
+++ b/block/blkreplay.c
@@ -0,0 +1,156 @@
+/*
+ * Block protocol for record/replay
+ *
+ * Copyright (c) 2010-2016 Institute for System Programming
+ * of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "sysemu/replay.h"
+
+typedef struct Request {
+Coroutine *co;
+QEMUBH *bh;
+} Request;
+
+/* Next request id.
+   This counter is global, because requests from different
+   block devices should not get overlapping ids. */
+static uint64_t request_id;
+
+static int blkreplay_open(BlockDriverState *bs, QDict *options, int flags,
+  Error **errp)
+{
+Error *local_err = NULL;
+int ret;
+
+/* Open the image file */
+bs->file = bdrv_open_child(NULL, options, "image",
+   bs, &child_file, false, &local_err);
+if (local_err) {
+ret = -EINVAL;
+error_propagate(errp, local_err);
+goto fail;
+}
+
+ret = 0;
+fail:
+if (ret < 0) {
+bdrv_unref_child(bs, bs->file);
+}
+return ret;
+}
+
+static void blkreplay_close(BlockDriverState *bs)
+{
+}
+
+static int64_t blkreplay_getlength(BlockDriverState *bs)
+{
+return bdrv_getlength(bs->file->bs);
+}
+
+static void blkreplay_bh_cb(void *opaque)
+{
+Request *req = opaque;
+qemu_coroutine_enter(req->co, NULL);
+qemu_bh_delete(req->bh);
+g_free(req);
+}
+
+static void block_request_create(uint64_t reqid, BlockDriverState *bs,
+ Coroutine *co)
+{
+Request *req = g_malloc0(sizeof(Request));
+req->co = co;
+req->bh = aio_bh_new(bdrv_get_aio_context(bs), blkreplay_bh_cb, req);
+replay_block_event(req->bh, reqid);
+}
+
+static int coroutine_fn blkreplay_co_readv(BlockDriverState *bs,
+int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+uint64_t reqid = request_id++;
+
+bdrv_co_readv(bs->file->bs, sector_num, nb_sectors, qiov);
+block_request_create(reqid, bs, qemu_coroutine_self());
+qemu_coroutine_yield();
+
+return 0;
+}
+
+static int coroutine_fn blkreplay_co_writev(BlockDriverState *bs,
+int64_t sector_num, int nb_sectors, QEMUIOVector *qiov)
+{
+uint64_t reqid = request_id++;
+
+bdrv_co_writev(bs->file->bs, sector_num, nb_sectors, qiov);
+block_request_create(reqid, bs, qemu_coroutine_self());
+qemu_coroutine_yield();
+
+return 0;
+}
+
+static int coroutine_fn blkreplay_co_write_zeroes(BlockDriverState *bs,
+int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+{
+uint64_t reqid = request_id++;
+
+bdrv_co_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags);
+block_request_create(reqid, bs, qemu_coroutine_self());
+qemu_coroutine_yield();
+
+return 0;
+}
+
+static int coroutine_fn blkreplay_co_discard(BlockDriverState *bs,
+int64_t sector_num, int nb_sectors)
+{
+uint64_t reqid = request_id++;
+
+bdrv_co_discard(bs->file->bs, sector_num, nb_sectors);
+block_request_create(reqid, bs, qemu_coroutine_self());
+qemu_coroutine_yield();
+
+return 0;
+}
+
+static int coroutine_fn blkreplay_co_flush(BlockDriverState *bs)
+{
+uint64_t reqid = request_id++;
+
+bdrv_co_flush(bs->file->bs);
+block_request_create(reqid, bs, qemu_coroutine_self());
+qe

[Qemu-devel] [PATCH v3 0/5] Deterministic replay extensions

2016-03-01 Thread Pavel Dovgalyuk

This set of patches is related to the reverse execution and deterministic 
replay of qemu execution. It includes recording and replaying of serial devices
and block devices operations.

With these patches one can record and deterministically replay behavior
of the system with connected disk drives and serial communication ports
(e.g., telnet terminal).

Patches for deterministic replay of the block devices intercept calls of
bdrv coroutine functions at the top of block drivers stack.
To record and replay block operations the drive must be configured
as following:
 -drive file=disk.qcow,if=none,id=img-direct
 -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay
 -device ide-hd,drive=img-blkreplay

blkreplay driver should be inserted between disk image and virtual driver
controller. Therefore all disk requests may be recorded and replayed.

v3 changes:
 - introduced bdrv_flush callback for block drivers
 - introduced block driver for recording block operations (as suggested by 
Kevin Wolf)
 - added documentation for block record/replay

v2 changes:
 - removed obsolete call of qemu_clock_warp
 - fixed record/replay of aio_cancel
 - simplified call sequence for blk_aio_ functions in non-replay mode (as 
suggested by Kevin Wolf)

---

Pavel Dovgalyuk (5):
  replay: character devices
  icount: remove obsolete warp call
  replay: introduce new checkpoint for icount warp
  block: add flush callback
  replay: introduce block devices record/replay


 block/Makefile.objs   |2 -
 block/blkreplay.c |  156 +
 block/io.c|6 ++
 cpus.c|   10 +--
 docs/replay.txt   |   20 ++
 gdbstub.c |2 -
 include/block/block_int.h |7 ++
 include/qemu/timer.h  |3 +
 include/sysemu/char.h |   26 
 include/sysemu/replay.h   |   15 
 main-loop.c   |2 -
 qemu-char.c   |   56 ++--
 qemu-timer.c  |2 -
 replay/Makefile.objs  |1 
 replay/replay-char.c  |   97 
 replay/replay-events.c|   41 ++--
 replay/replay-internal.h  |   16 +
 replay/replay.c   |   25 +++
 stubs/clock-warp.c|2 -
 stubs/replay.c|4 +
 20 files changed, 469 insertions(+), 24 deletions(-)
 create mode 100755 block/blkreplay.c
 create mode 100755 replay/replay-char.c

-- 
Pavel Dovgalyuk

[Qemu-devel] [PATCH v3 2/5] icount: remove obsolete warp call

2016-03-01 Thread Pavel Dovgalyuk

qemu_clock_warp call in qemu_tcg_wait_io_event function is not needed
anymore, because it is called in every iteration of main_loop_wait.

Signed-off-by: Pavel Dovgalyuk 
---
 cpus.c |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/cpus.c b/cpus.c
index 898426c..01c9809 100644
--- a/cpus.c
+++ b/cpus.c
@@ -995,9 +995,6 @@ static void qemu_wait_io_event_common(CPUState *cpu)
 static void qemu_tcg_wait_io_event(CPUState *cpu)
 {
 while (all_cpu_threads_idle()) {
-   /* Start accounting real time to the virtual clock if the CPUs
-  are idle.  */
-qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
 qemu_cond_wait(cpu->halt_cond, &qemu_global_mutex);
 }

[Qemu-devel] [PATCH v3 3/5] replay: introduce new checkpoint for icount warp

2016-03-01 Thread Pavel Dovgalyuk

qemu_clock_warp function is called to update virtual clock when CPU
is sleeping. This function includes replay checkpoint to make execution
deterministic in icount mode.
Record/replay module flushes async event queue at checkpoints.
Some of the events (e.g., block devices operations) include interaction
with hardware. E.g., APIC polled by block devices sets one of IRQ flags.
Flag to be set depends on currently executed thread (CPU or iothread).
Therefore in replay mode we have to process the checkpoints in the same thread
as they were recorded.
qemu_clock_warp function (and its checkpoint) may be called from different
thread. This patch introduces new checkpoint which distinguished warp
checkpoint calls from different threads.

Signed-off-by: Pavel Dovgalyuk 
---
 cpus.c  |7 ---
 include/qemu/timer.h|3 ++-
 include/sysemu/replay.h |1 +
 main-loop.c |2 +-
 qemu-timer.c|2 +-
 stubs/clock-warp.c  |2 +-
 6 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/cpus.c b/cpus.c
index 01c9809..c2d9cfe 100644
--- a/cpus.c
+++ b/cpus.c
@@ -396,7 +396,7 @@ void qtest_clock_warp(int64_t dest)
 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 }
 
-void qemu_clock_warp(QEMUClockType type)
+void qemu_clock_warp(QEMUClockType type, bool in_tcg)
 {
 int64_t clock;
 int64_t deadline;
@@ -418,7 +418,8 @@ void qemu_clock_warp(QEMUClockType type)
 }
 
 /* warp clock deterministically in record/replay mode */
-if (!replay_checkpoint(CHECKPOINT_CLOCK_WARP)) {
+if (!replay_checkpoint(in_tcg ? CHECKPOINT_CLOCK_WARP_TCG
+  : CHECKPOINT_CLOCK_WARP)) {
 return;
 }
 
@@ -1496,7 +1497,7 @@ static void tcg_exec_all(void)
 int r;
 
 /* Account partial waits to QEMU_CLOCK_VIRTUAL.  */
-qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
+qemu_clock_warp(QEMU_CLOCK_VIRTUAL, true);
 
 if (next_cpu == NULL) {
 next_cpu = first_cpu;
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index d0946cb..c58192c 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -212,10 +212,11 @@ void qemu_clock_enable(QEMUClockType type, bool enabled);
 /**
  * qemu_clock_warp:
  * @type: the clock type
+ * @in_tcg: true if function is called from TCG CPU thread
  *
  * Warp a clock to a new value
  */
-void qemu_clock_warp(QEMUClockType type);
+void qemu_clock_warp(QEMUClockType type, bool in_tcg);
 
 /**
  * qemu_clock_register_reset_notifier:
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 3c4a988..c879231 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -31,6 +31,7 @@ typedef enum ReplayClockKind ReplayClockKind;
 /* IDs of the checkpoints */
 enum ReplayCheckpoint {
 CHECKPOINT_CLOCK_WARP,
+CHECKPOINT_CLOCK_WARP_TCG,
 CHECKPOINT_RESET_REQUESTED,
 CHECKPOINT_SUSPEND_REQUESTED,
 CHECKPOINT_CLOCK_VIRTUAL,
diff --git a/main-loop.c b/main-loop.c
index 19beae7..cd8415f 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -509,7 +509,7 @@ int main_loop_wait(int nonblocking)
 
 /* CPU thread can infinitely wait for event after
missing the warp */
-qemu_clock_warp(QEMU_CLOCK_VIRTUAL);
+qemu_clock_warp(QEMU_CLOCK_VIRTUAL, false);
 qemu_clock_run_all_timers();
 
 return ret;
diff --git a/qemu-timer.c b/qemu-timer.c
index e98ecc9..980fe7e 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -394,7 +394,7 @@ static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
 static void timerlist_rearm(QEMUTimerList *timer_list)
 {
 /* Interrupt execution to force deadline recalculation.  */
-qemu_clock_warp(timer_list->clock->type);
+qemu_clock_warp(timer_list->clock->type, false);
 timerlist_notify(timer_list);
 }
 
diff --git a/stubs/clock-warp.c b/stubs/clock-warp.c
index 5ae32b9..24ae0f8 100644
--- a/stubs/clock-warp.c
+++ b/stubs/clock-warp.c
@@ -2,7 +2,7 @@
 #include "qemu-common.h"
 #include "qemu/timer.h"
 
-void qemu_clock_warp(QEMUClockType type)
+void qemu_clock_warp(QEMUClockType type, bool in_tcg)
 {
 }

Re: [Qemu-devel] [PATCH v9 0/7] trace: Show vCPU info in guest code events

2016-03-01 Thread Lluís Vilanova

Lluís Vilanova writes:

> NOTE: This series should complete the framework for guest code tracing. From
>   here on, other series can concurrently add actual events and improve the
>   guest code tracing features and performance (e.g., control tracing
>   independently on each vCPU).

> This series introduces the "vcpu" property for tracing events. This property
> identifies events that are tied to a particular virtual CPU (e.g., executing 
> an
> instruction).

> Events with this property have an implicit vcpu argument, which is shown in 
> the
> trace. In the case of events executed at TCG translation time, two implicit
> arguments are added:

> * The vCPU performing the code translation (shown in the translation-time 
> trace)
> * The vCPU executing the translated code (shown in the execution-time trace)

> Note that the "vcpu" and "tcg" properties are not merged into a single one,
> since events can be defined that relate to a vCPU but are never raised from 
> TCG
> code (e.g., interrupts).


> Changes in v9
> =

> * Rebase on 774ae42.
> * Fix CPUState typedef, and refactor into a separate patch [Stefan Hajnoczi].
> * Fix TCGv_cpu -> TCGv_env rename in sparc [Stefan Hajnoczi].
[...]

Hi Stefan,

I know it's soft-freeze day, but did you have a chance to take a look at this
series and the other two I sent on top? ("trace: Add events for vCPU memory
accesses" and "trace: Per-vCPU tracing states").


Thanks a lot,
  Lluis

Re: [Qemu-devel] [PATCH 06/38] ivshmem-test: Clean up wait for devices to become operational

2016-03-01 Thread Marc-André Lureau

On Mon, Feb 29, 2016 at 7:40 PM, Markus Armbruster  wrote:
> test_ivshmem_server() waits until the first byte in BAR 2 contains the
> 0x42 we put into shared memory.  Works because the byte reads zero
> until the device maps the shared memory gotten from the server.
>
> Check the IVPosition register instead: it's initially -1, and becomes
> non-negative right when the device maps the share memory, so no
> change, just cleaner, because it's what guest software is supposed to
> do.
>
> Signed-off-by: Markus Armbruster 

Reviewed-by: Marc-André Lureau 

> ---
>  tests/ivshmem-test.c | 10 --
>  1 file changed, 4 insertions(+), 6 deletions(-)
>
> diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c
> index ba4d9f1..f40c3497 100644
> --- a/tests/ivshmem-test.c
> +++ b/tests/ivshmem-test.c
> @@ -301,7 +301,6 @@ static void test_ivshmem_server(bool msi)
>  int nvectors = 2;
>  guint64 end_time = g_get_monotonic_time() + 5 * G_TIME_SPAN_SECOND;
>
> -memset(tmpshmem, 0x42, TMPSHMSIZE);
>  ret = ivshmem_server_init(&server, tmpserver, tmpshm,
>TMPSHMSIZE, nvectors,
>g_test_verbose());
> @@ -315,9 +314,9 @@ static void test_ivshmem_server(bool msi)
>  setup_vm_with_server(&state2, nvectors, msi);
>  s2 = &state2;
>
> +/* check state before server sends stuff */
>  g_assert_cmpuint(in_reg(s1, IVPOSITION), ==, 0x);
>  g_assert_cmpuint(in_reg(s2, IVPOSITION), ==, 0x);
> -
>  g_assert_cmpuint(qtest_readb(s1->qtest, (uintptr_t)s1->mem_base), ==, 
> 0x00);
>
>  thread.server = &server;
> @@ -326,12 +325,11 @@ static void test_ivshmem_server(bool msi)
>  thread.thread = g_thread_new("ivshmem-server", server_thread, &thread);
>  g_assert(thread.thread != NULL);
>
> -/* waiting until mapping is done */
> +/* waiting for devices to become operational */
>  while (g_get_monotonic_time() < end_time) {
>  g_usleep(1000);
> -
> -if (qtest_readb(s1->qtest, (uintptr_t)s1->mem_base) == 0x42 &&
> -qtest_readb(s2->qtest, (uintptr_t)s2->mem_base) == 0x42) {
> +if ((int)in_reg(s1, IVPOSITION) >= 0 &&
> +(int)in_reg(s2, IVPOSITION) >= 0) {
>  break;
>  }
>  }
> --
> 2.4.3
>
>



-- 
Marc-André Lureau

[Qemu-devel] [PATCH v3 4/5] block: add flush callback

2016-03-01 Thread Pavel Dovgalyuk

This patch adds callback for flush request. This callback is responsible
for flushing whole block devices stack. bdrv_flush function does not
proceed to underlying devices. It should be performed by this callback
function, if needed.

Signed-off-by: Pavel Dovgalyuk 
---
 block/io.c|6 ++
 include/block/block_int.h |7 +++
 2 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/block/io.c b/block/io.c
index a69bfc4..9e05dfe 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2369,6 +2369,12 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
 }
 
 tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+/* Write back all layers by calling one driver function */
+if (bs->drv->bdrv_co_flush) {
+ret = bs->drv->bdrv_co_flush(bs);
+goto out;
+}
+
 /* Write back cached data to the OS even with cache=unsafe */
 BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS);
 if (bs->drv->bdrv_co_flush_to_os) {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9ef823a..9cc2c58 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -176,6 +176,13 @@ struct BlockDriver {
 int (*bdrv_inactivate)(BlockDriverState *bs);
 
 /*
+ * Flushes all data for all layers by calling bdrv_co_flush for underlying
+ * layers, if needed. This function is needed for deterministic
+ * synchronization of the flush finishing callback.
+ */
+int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);
+
+/*
  * Flushes all data that was already written to the OS all the way down to
  * the disk (for example raw-posix calls fsync()).
  */

Re: [Qemu-devel] [PATCH 07/38] ivshmem-test: Improve test cases /ivshmem/server-*

2016-03-01 Thread Marc-André Lureau

On Mon, Feb 29, 2016 at 7:40 PM, Markus Armbruster  wrote:
> Document missing test: behavior with MSI-X present but not enabled.
>
> For MSI-X, we test and clear the interrupt pending bit before testing
> the interrupt.  For INTx, we only clear.  Change to test and clear for
> consistency.
>
> Test MSI-X vector 1 in addition to vector 0.
>
> Improve comments.
>
> Signed-off-by: Markus Armbruster 

Reviewed-by: Marc-André Lureau 


> ---
>  tests/ivshmem-test.c | 17 ++---
>  1 file changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/tests/ivshmem-test.c b/tests/ivshmem-test.c
> index f40c3497..c1dd7bb 100644
> --- a/tests/ivshmem-test.c
> +++ b/tests/ivshmem-test.c
> @@ -339,18 +339,21 @@ static void test_ivshmem_server(bool msi)
>  vm2 = in_reg(s2, IVPOSITION);
>  g_assert_cmpuint(vm1, !=, vm2);
>
> +/* check number of MSI-X vectors */
>  global_qtest = s1->qtest;
>  if (msi) {
>  ret = qpci_msix_table_size(s1->dev);
>  g_assert_cmpuint(ret, ==, nvectors);
>  }
>
> -/* ping vm2 -> vm1 */
> +/* TODO test behavior before MSI-X is enabled */
> +
> +/* ping vm2 -> vm1 on vector 0 */
>  if (msi) {
>  ret = qpci_msix_pending(s1->dev, 0);
>  g_assert_cmpuint(ret, ==, 0);
>  } else {
> -out_reg(s1, INTRSTATUS, 0);
> +g_assert_cmpuint(in_reg(s1, INTRSTATUS), ==, 0);
>  }
>  out_reg(s2, DOORBELL, vm1 << 16);
>  do {
> @@ -359,18 +362,18 @@ static void test_ivshmem_server(bool msi)
>  } while (ret == 0 && g_get_monotonic_time() < end_time);
>  g_assert_cmpuint(ret, !=, 0);
>
> -/* ping vm1 -> vm2 */
> +/* ping vm1 -> vm2 on vector 1 */
>  global_qtest = s2->qtest;
>  if (msi) {
> -ret = qpci_msix_pending(s2->dev, 0);
> +ret = qpci_msix_pending(s2->dev, 1);
>  g_assert_cmpuint(ret, ==, 0);
>  } else {
> -out_reg(s2, INTRSTATUS, 0);
> +g_assert_cmpuint(in_reg(s2, INTRSTATUS), ==, 0);
>  }
> -out_reg(s1, DOORBELL, vm2 << 16);
> +out_reg(s1, DOORBELL, vm2 << 16 | 1);
>  do {
>  g_usleep(1);
> -ret = msi ? qpci_msix_pending(s2->dev, 0) : in_reg(s2, INTRSTATUS);
> +ret = msi ? qpci_msix_pending(s2->dev, 1) : in_reg(s2, INTRSTATUS);
>  } while (ret == 0 && g_get_monotonic_time() < end_time);
>  g_assert_cmpuint(ret, !=, 0);
>
> --
> 2.4.3
>
>



-- 
Marc-André Lureau

[Qemu-devel] [PATCH] balloon: fix segfault and harden the stats queue

2016-03-01 Thread Ladi Prosek

The segfault here is triggered by the driver notifying the stats queue
twice after adding a buffer to it. This effectively resets stats_vq_elem
back to NULL and QEMU crashes on the next stats timer tick in
balloon_stats_poll_cb.

This is a regression introduced in 51b19ebe4320f3dc, although admittedly
the device assumed too much about the stats queue protocol even before
that commit. This commit adds a few more checks and ensures that the one
stats buffer gets deallocated on device reset.

Signed-off-by: Ladi Prosek 
---
 hw/virtio/virtio-balloon.c | 24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c
index e9c30e9..e97d403 100644
--- a/hw/virtio/virtio-balloon.c
+++ b/hw/virtio/virtio-balloon.c
@@ -101,7 +101,7 @@ static void balloon_stats_poll_cb(void *opaque)
 VirtIOBalloon *s = opaque;
 VirtIODevice *vdev = VIRTIO_DEVICE(s);
 
-if (!balloon_stats_supported(s)) {
+if (s->stats_vq_elem == NULL || !balloon_stats_supported(s)) {
 /* re-schedule */
 balloon_stats_change_timer(s, s->stats_poll_interval);
 return;
@@ -258,11 +258,20 @@ static void virtio_balloon_receive_stats(VirtIODevice 
*vdev, VirtQueue *vq)
 size_t offset = 0;
 qemu_timeval tv;
 
-s->stats_vq_elem = elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
+elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
 if (!elem) {
 goto out;
 }
 
+if (s->stats_vq_elem != NULL) {
+/* This should never happen if the driver follows the spec. */
+virtqueue_push(vq, s->stats_vq_elem, 0);
+virtio_notify(vdev, vq);
+g_free(s->stats_vq_elem);
+}
+
+s->stats_vq_elem = elem;
+
 /* Initialize the stats to get rid of any stale values.  This is only
  * needed to handle the case where a guest supports fewer stats than it
  * used to (ie. it has booted into an old kernel).
@@ -458,6 +467,16 @@ static void virtio_balloon_device_unrealize(DeviceState 
*dev, Error **errp)
 virtio_cleanup(vdev);
 }
 
+static void virtio_balloon_device_reset(VirtIODevice *vdev)
+{
+VirtIOBalloon *s = VIRTIO_BALLOON(vdev);
+
+if (s->stats_vq_elem != NULL) {
+g_free(s->stats_vq_elem);
+s->stats_vq_elem = NULL;
+}
+}
+
 static void virtio_balloon_instance_init(Object *obj)
 {
 VirtIOBalloon *s = VIRTIO_BALLOON(obj);
@@ -486,6 +505,7 @@ static void virtio_balloon_class_init(ObjectClass *klass, 
void *data)
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 vdc->realize = virtio_balloon_device_realize;
 vdc->unrealize = virtio_balloon_device_unrealize;
+vdc->reset = virtio_balloon_device_reset;
 vdc->get_config = virtio_balloon_get_config;
 vdc->set_config = virtio_balloon_set_config;
 vdc->get_features = virtio_balloon_get_features;
-- 
2.5.0

Re: [Qemu-devel] [PULL 0/2] vga: minor cirrus/qxl bugfixes.

2016-03-01 Thread Peter Maydell

On 1 March 2016 at 07:17, Gerd Hoffmann  wrote:
>   Hi,
>
> Yet another small bugfix pull request, this time for vga.
>
> please pull,
>   Gerd
>
> The following changes since commit 071608b519adf62bc29c914343a21c5407ab1ac9:
>
>   Merge remote-tracking branch 'remotes/kraxel/tags/pull-usb-20160229-1' into 
> staging (2016-02-29 12:24:26 +)
>
> are available in the git repository at:
>
>
>   git://git.kraxel.org/qemu tags/pull-vga-20160301-1
>
> for you to fetch changes up to 05fa1c742fd6f66978b989ded0dd981ef11c4a0c:
>
>   qxl: lock current_async update in qxl_soft_reset (2016-03-01 07:51:32 +0100)
>
> 
> vga: minor cirrus/qxl bugfixes.

Applied, thanks.

-- PMM

Re: [Qemu-devel] [PATCH] log: Redirect stderr to logfile if deamonized

2016-03-01 Thread Gerd Hoffmann

On Do, 2016-02-18 at 13:38 +0200, Dimitris Aragiorgis wrote:
> In case of daemonize, use the logfile passed with the -D option in
> order to redirect stderr to it instead of /dev/null.
> 
> Also remove some unused code in log.h.

Patch breaks interaction with libvirt.  libvirt hangs on startup, while
probing qemu capabilities.  qemu runs in probing mode (command line is
"/home/kraxel/projects/qemu/build-default/x86_64-softmmu/qemu-system-x86_64 -S 
-no-user-config -nodefaults -nographic -M none -qmp 
unix:/var/lib/libvirt/qemu/capabilities.monitor.sock,server,nowait -pidfile 
/var/lib/libvirt/qemu/capabilities.pidfile -daemonize" according to "systemctl 
status libvirtd -l"), apparently both qemu and libvirt wait for each other.

cheers,
  Gerd

Re: [Qemu-devel] [PATCH 08/38] ivshmem: Rewrite specification document

2016-03-01 Thread Marc-André Lureau

On Mon, Feb 29, 2016 at 7:40 PM, Markus Armbruster  wrote:
> This started as an attempt to update ivshmem_device_spec.txt for
> clarity, accuracy and completeness while working on its code, and
> quickly became a full rewrite.  Since the diff would be useless
> anyway, I'm using the opportunity to rename the file to
> ivshmem-spec.txt.
>
> I tried hard to ensure the new text contradicts neither the old text
> nor the code.  If the new text contradicts the old text but not the
> code, it's probably a bug in the old text.  If the new text
> contradicts both, its probably a bug in the new text.
>
> Signed-off-by: Markus Armbruster 

Reviewed-by: Marc-André Lureau 


> ---
>  docs/specs/ivshmem-spec.txt| 244 
> +
>  docs/specs/ivshmem_device_spec.txt | 161 
>  2 files changed, 244 insertions(+), 161 deletions(-)
>  create mode 100644 docs/specs/ivshmem-spec.txt
>  delete mode 100644 docs/specs/ivshmem_device_spec.txt
>
> diff --git a/docs/specs/ivshmem-spec.txt b/docs/specs/ivshmem-spec.txt
> new file mode 100644
> index 000..0835ba1
> --- /dev/null
> +++ b/docs/specs/ivshmem-spec.txt
> @@ -0,0 +1,244 @@
> += Device Specification for Inter-VM shared memory device =
> +
> +The Inter-VM shared memory device (ivshmem) is designed to share a
> +memory region between multiple QEMU processes running different guests
> +and the host.  In order for all guests to be able to pick up the
> +shared memory area, it is modeled by QEMU as a PCI device exposing
> +said memory to the guest as a PCI BAR.
> +
> +The device can use a shared memory object on the host directly, or it
> +can obtain one from an ivshmem server.
> +
> +In the latter case, the device can additionally interrupt its peers, and
> +get interrupted by its peers.
> +
> +
> +== Configuring the ivshmem PCI device ==
> +
> +There are two basic configurations:
> +
> +- Just shared memory: -device ivshmem,shm=NAME,...
> +
> +  This uses shared memory object NAME.
> +
> +- Shared memory plus interrupts: -device ivshmem,chardev=CHR,vectors=N,...
> +
> +  An ivshmem server must already be running on the host.  The device
> +  connects to the server's UNIX domain socket via character device
> +  CHR.
> +
> +  Each peer gets assigned a unique ID by the server.  IDs must be
> +  between 0 and 65535.
> +
> +  Interrupts are message-signaled by default (MSI-X).  With msi=off
> +  the device has no MSI-X capability, and uses legacy INTx instead.
> +  vectors=N configures the number of vectors to use.
> +
> +For more details on ivshmem device properties, see The QEMU Emulator
> +User Documentation (qemu-doc.*).
> +
> +
> +== The ivshmem PCI device's guest interface ==
> +
> +The device has vendor ID 1af4, device ID 1110, revision 0.
> +
> +=== PCI BARs ===
> +
> +The ivshmem PCI device has two or three BARs:
> +
> +- BAR0 holds device registers (256 Byte MMIO)
> +- BAR1 holds MSI-X table and PBA (only when using MSI-X)
> +- BAR2 maps the shared memory object
> +
> +There are two ways to use this device:
> +
> +- If you only need the shared memory part, BAR2 suffices.  This way,
> +  you have access to the shared memory in the guest and can use it as
> +  you see fit.  Memnic, for example, uses ivshmem this way from guest
> +  user space (see http://dpdk.org/browse/memnic).
> +
> +- If you additionally need the capability for peers to interrupt each
> +  other, you need BAR0 and, if using MSI-X, BAR1.  You will most
> +  likely want to write a kernel driver to handle interrupts.  Requires
> +  the device to be configured for interrupts, obviously.
> +
> +If the device is configured for interrupts, BAR2 is initially invalid.
> +It becomes safely accessible only after the ivshmem server provided
> +the shared memory.  Guest software should wait for the IVPosition
> +register (described below) to become non-negative before accessing
> +BAR2.
> +
> +The device is not capable to tell guest software whether it is
> +configured for interrupts.
> +
> +=== PCI device registers ===
> +
> +BAR 0 contains the following registers:
> +
> +Offset  Size  Access  On reset  Function
> +0 4   read/write0   Interrupt Mask
> +bit 0: peer interrupt
> +bit 1..31: reserved
> +4 4   read/write0   Interrupt Status
> +bit 0: peer interrupt
> +bit 1..31: reserved
> +8 4   read-only   0 or -1   IVPosition
> +   12 4   write-only  N/A   Doorbell
> +bit 0..15: vector
> +bit 16..31: peer ID
> +   16   240   noneN/A   reserved
> +
> +Software should only access the registers as specified in column
> +"Access".  Reserved bits should be ignored on read, and preserved on
> +write.
> +
> +Interrupt Status and Mask Regi

Re: [Qemu-devel] [PATCH 03/38] event_notifier: Make event_notifier_init_fd() #ifdef CONFIG_EVENTFD

2016-03-01 Thread Paolo Bonzini



On 29/02/2016 19:40, Markus Armbruster wrote:
> Event notifiers are designed for eventfd(2).  They can fall back to
> pipes, but according to Paolo, event_notifier_init_fd() really
> requires the real thing, and should therefore be under #ifdef
> CONFIG_EVENTFD.  Do that.
> 
> Its only user is ivshmem, which is currently CONFIG_POSIX.  Narrow it
> to CONFIG_EVENTFD.
> 
> Cc: Paolo Bonzini 
> Signed-off-by: Markus Armbruster 
> ---
>  default-configs/pci.mak | 2 +-
>  util/event_notifier-posix.c | 6 ++
>  2 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/default-configs/pci.mak b/default-configs/pci.mak
> index 4fa9a28..9c8bc68 100644
> --- a/default-configs/pci.mak
> +++ b/default-configs/pci.mak
> @@ -36,5 +36,5 @@ CONFIG_SDHCI=y
>  CONFIG_EDU=y
>  CONFIG_VGA=y
>  CONFIG_VGA_PCI=y
> -CONFIG_IVSHMEM=$(CONFIG_POSIX)
> +CONFIG_IVSHMEM=$(CONFIG_EVENTFD)
>  CONFIG_ROCKER=y
> diff --git a/util/event_notifier-posix.c b/util/event_notifier-posix.c
> index 2e30e74..c9657a6 100644
> --- a/util/event_notifier-posix.c
> +++ b/util/event_notifier-posix.c
> @@ -20,11 +20,17 @@
>  #include 
>  #endif
>  
> +#ifdef CONFIG_EVENTFD
> +/*
> + * Initialize @e with existing file descriptor @fd.
> + * @fd must be a genuine eventfd object, emulation with pipe won't do.
> + */
>  void event_notifier_init_fd(EventNotifier *e, int fd)
>  {
>  e->rfd = fd;
>  e->wfd = fd;
>  }
> +#endif
>  
>  int event_notifier_init(EventNotifier *e, int active)
>  {
> 

Reviewed-by: Paolo Bonzini

Re: [Qemu-devel] [PATCH 01/38] exec: Fix memory allocation when memory path names new file

2016-03-01 Thread Paolo Bonzini



On 29/02/2016 19:40, Markus Armbruster wrote:
> -if (!stat(path, &st) && S_ISDIR(st.st_mode)) {
> +ret = stat(path, &st);
> +if (!ret && S_ISDIR(st.st_mode)) {
> +/* path names a directory -> create a temporary file there */
>  /* Make name safe to use with mkstemp by replacing '/' with '_'. */
>  sanitized_name = g_strdup(memory_region_name(block->mr));
>  for (c = sanitized_name; *c != '\0'; c++) {
> @@ -1282,13 +1271,32 @@ static void *file_ram_alloc(RAMBlock *block,
>  unlink(filename);
>  }
>  g_free(filename);
> +} else if (!ret) {
> +/* path names an existing file -> use it */
> +fd = open(path, O_RDWR);
>  } else {
> +/* create a new file */
>  fd = open(path, O_RDWR | O_CREAT, 0644);
> +unlink_on_error = true;
>  }

While at it, let's avoid TOCTTOU conditions:

for (;;) {
fd = open(path, O_RDWR);
if (fd != -1) {
break;
}
if (errno == ENOENT) {
fd = open(path, O_RDWR | O_CREAT | O_EXCL, 0644);
if (fd != -1) {
unlink_on_error = true;
break;
}
} else if (errno == EISDIR) {
... mkstemp ...
if (fd != -1) {
unlink_on_error = true;
break;
}
}
if (errno != EEXIST && errno != EINTR) {
goto error;
}
}

and use fstatfs in gethugepagesize.

Paolo

Re: [Qemu-devel] [PATCH 02/38] qemu-doc: Fix ivshmem huge page example

2016-03-01 Thread Paolo Bonzini



On 29/02/2016 19:40, Markus Armbruster wrote:
> Option parameter "share" is missing.  Without it, you get a *private*
> mmap(), which defeats ivshmem's purpose pretty thoroughly ;)
> 
> While there, switch to the conventional mountpoint of hugetlbfs
> /dev/hugepages.
> 
> Signed-off-by: Markus Armbruster 
> ---
>  qemu-doc.texi | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/qemu-doc.texi b/qemu-doc.texi
> index bc9dd13..65f3b29 100644
> --- a/qemu-doc.texi
> +++ b/qemu-doc.texi
> @@ -1311,7 +1311,7 @@ Instead of specifying the  using POSIX shm, 
> you may specify
>  a memory backend that has hugepage support:
>  
>  @example
> -qemu-system-i386 -object 
> memory-backend-file,size=1G,mem-path=/mnt/hugepages/my-shmem-file,id=mb1
> +qemu-system-i386 -object 
> memory-backend-file,size=1G,mem-path=/dev/hugepages/my-shmem-file,share,id=mb1
>   -device ivshmem,x-memdev=mb1
>  @end example
>  
> 

Reviewed-by: Paolo Bonzini

Re: [Qemu-devel] [Qemu-discuss] golang on linux-user

2016-03-01 Thread Peter Maydell

On 1 March 2016 at 09:38, Hunter Laux  wrote:
> I was having trouble running golang on linux-user with an aarch64 target.
>
> It turns out that snappy is written in Go. When I tried the xenial aarch64
> preinstall image in qemu, Snappy was broken.
>
> For some reason, it calls sigaction on all the signals.
>
> I noticed do_sigaction in linux-user/signal.c calls the host sigaction.
> Unfortunately, glibc blocks signal 33 and for "SIGSETXID", which I guess is
> just a user signal that pthread names, but thats as far as I got into it.
>
> See here:
> https://sourceware.org/git/?p=glibc.git;a=blob;f=nptl/sigaction.c
>
> Maybe there's some simple cleaner solutions, but here's what I did.
>
> I made a quick fix by calling the __libc_sigaction instead of the
> __sigaction to bypass the check for SIGSETXID. It seems to work, but I'm not
> sure if that's safe. My "one liner" is definitely a hack.

This has been a bug for ages and we probably should try to
fix it. The problem is that the go runtime expects to be able
to register a handler for every signal, but since QEMU is written
in C and uses libc we can't register a handler for some signals
that libc needs to use. SIGSETXID is signal 33, which glibc uses
as part of its setuid/setgid handling.

We can't call __libc_sigaction because then the libc threading code
would no longer be able to correctly handle setuid/setgid -- I suspect
that if your guest Go program makes a setuid syscall with your patch
applied and multiple threads present this will result in it hanging.

This was traditionally a problem only for guests which try to
register SIGRTMAX (64), because QEMU has a hack where it swaps
the guest's SIGRTMIN and SIGRTMAX. This allows guests using libc
to think they have a working SIGRTMIN -- SIGRTMIN is the other
libc-internal signal (used for thread cancellation) -- without
interfering with the host libc use of it. This did cause go to
complain, but now the go runtime has a workaround for it whereby
it ignores signal 64 failures:
https://go-review.googlesource.com/#/c/16853/3/src/runtime/os1_linux.go

SIGSETXID is I think newer, but similar issues apply. In fact
the only reason we haven't noticed problems already with the glibc
runtime is that it ignores failure return when setting up the
signal handler(!):
https://sourceware.org/git/?p=glibc.git;a=blob;f=nptl/nptl-init.c;h=bdbdfedcef956bca51b9473674381d36eac2c751;hb=HEAD#l411
[I think, but have not tested, that this will mean that attempts
to use setuid etc in a program with multiple threads will hang
under QEMU.]

So we probably ought to:
 (1) redirect SIGSETXID from 33 up to SIGRTMAX-1, for the same
reasons we redirect signal 32 up to SIGRTMAX
 (2a) consider returning success for attempts to register SIGRTMAX
and SIGRTMAX-1 handlers in the guest, rather than failure
 (2b) alternatively, ask the go runtime maintainers to extend
their 'ignore signal 64 registration failure' hack to cover 63

For 2a vs 2b, I checked what Valgrind does -- it also returns
failure-EINVAL for its internal-use signal. (Valgrind only needs
64 for internal use, because it doesn't use glibc.)

So I think we should go with (2b).

> After I did that, I kept getting an EXCP_YIELD. I'm not sure how to handle
> this, but ignoring it seems to work. Again, I'm not sure that's safe.

This is pretty much the right thing. EXCP_YIELD is raised by the
'yield' instruction in order to cause execution of guest code to
return to QEMU's main loop for system emulation so we can schedule
a different guest CPU in an SMP config. In userspace emulation it's
not so useful, but since the yield insn is runnable in usermode it
can happen, and just ignoring it and continuing to run code is the
right thing to do. I'll write up a proper patch that includes a
comment about what's going on.

thanks
-- PMM

Re: [Qemu-devel] [PATCH 29/38] ivshmem: Implement shm=... with a memory backend

2016-03-01 Thread Paolo Bonzini



On 29/02/2016 19:40, Markus Armbruster wrote:
> ivshmem has its very own code to create and map shared memory.
> Replace that with an implicitly created memory backend.  Reduces the
> number of ways we create BAR 2 from three to two.
> 
> Signed-off-by: Markus Armbruster 

Very appreciated, but do not use user_creatable_add_opts.  Instead,
create the object with object_initialize, object_property_set_* and
user_creatable_complete.  After the object_initialize, add it with
object_property_add_child *under the ivshmem device itself*, giving it a
name like "internal-shm-backend".

This matches what virtio-blk dataplane used to do for x-dataplane (now
removed).

Thanks,

Paolo

> +static HostMemoryBackend *desugar_shm(const char *shm, size_t size)
> +{
> +/* TODO avoid the detour through QemuOpts */
> +static int counter;
> +QemuOpts *opts = qemu_opts_create(qemu_find_opts("object"),
> +  NULL, 0, &error_abort);
> +char *path;
> +Object *obj;
> +
> +qemu_opt_set(opts, "qom-type", "memory-backend-file",
> +&error_abort);
> +/* FIXME need a better way to make up an ID */
> +qemu_opts_set_id(opts, g_strdup_printf("ivshmem-backend-%d", counter++));
> +path = g_strdup_printf("/dev/shm/%s", shm);
> +qemu_opt_set(opts, "mem-path", path, &error_abort);
> +qemu_opt_set_number(opts, "size", size, &error_abort);
> +qemu_opt_set_bool(opts, "share", true, &error_abort);
> +g_free(path);
> +
> +obj = user_creatable_add_opts(opts, &error_abort);
> +qemu_opts_del(opts);
> +
> +user_creatable_complete(obj, &error_abort);
> +
> +return MEMORY_BACKEND(obj);
> +}
> +
>  static void pci_ivshmem_realize(PCIDevice *dev, Error **errp)
>  {
>  IVShmemState *s = IVSHMEM(dev);
> @@ -911,6 +914,10 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error 
> **errp)
>  attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
>  }
>  
> +if (s->shmobj) {
> +s->hostmem = desugar_shm(s->shmobj, s->ivshmem_size);
> +}
> +
>  if (s->hostmem != NULL) {
>  MemoryRegion *mr;
>  
> @@ -921,7 +928,7 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error 
> **errp)
>  vmstate_register_ram(mr, DEVICE(s));
>  memory_region_add_subregion(&s->bar, 0, mr);
>  pci_register_bar(PCI_DEVICE(s), 2, attr, &s->bar);
> -} else if (s->server_chr != NULL) {
> +} else {
>  IVSHMEM_DPRINTF("using shared memory server (socket = %s)\n",
>  s->server_chr->filename);
>  
> @@ -948,36 +955,6 @@ static void pci_ivshmem_realize(PCIDevice *dev, Error 
> **errp)
>  error_setg(errp, "failed to initialize interrupts");
>  return;
>  }
> -} else {
> -/* just map the file immediately, we're not using a server */
> -int fd;
> -
> -IVSHMEM_DPRINTF("using shm_open (shm object = %s)\n", s->shmobj);
> -
> -/* try opening with O_EXCL and if it succeeds zero the memory
> - * by truncating to 0 */
> -if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR|O_EXCL,
> -S_IRWXU|S_IRWXG|S_IRWXO)) > 0) {
> -   /* truncate file to length PCI device's memory */
> -if (ftruncate(fd, s->ivshmem_size) != 0) {
> -error_report("could not truncate shared file");
> -}
> -
> -} else if ((fd = shm_open(s->shmobj, O_CREAT|O_RDWR,
> -S_IRWXU|S_IRWXG|S_IRWXO)) < 0) {
> -error_setg(errp, "could not open shared file");
> -return;
> -}
> -
> -if (check_shm_size(s, fd, errp) == -1) {
> -return;
> -}
> -
> -create_shared_memory_BAR(s, fd, attr, &err);
> -if (err) {
> -error_propagate(errp, err);
> -return;
> -}
>  }
>  
>  if (s->role_val == IVSHMEM_PEER) {
>

Re: [Qemu-devel] [PATCH 30/38] ivshmem: Simplify memory regions for BAR 2 (shared memory)

2016-03-01 Thread Paolo Bonzini



On 29/02/2016 19:40, Markus Armbruster wrote:
> ivshmem_realize() puts the shared memory region in a container region.
> Used to be necessary to permit delayed mapping of the shared memory.
> Now we don't do that anymore, the container is redundant.  Drop it.

Can you explain why we don't do that anymore to someone who hasn't read
patches 4 to 28? :-)  Is it patch 23?

Paolo

Re: [Qemu-devel] [PATCH 30/38] ivshmem: Simplify memory regions for BAR 2 (shared memory)

2016-03-01 Thread Paolo Bonzini



On 29/02/2016 19:40, Markus Armbruster wrote:
> -memory_region_init_ram_ptr(&s->ivshmem, OBJECT(s),
> +s->ivshmem_bar2 = g_new(MemoryRegion, 1);
> +memory_region_init_ram_ptr(s->ivshmem_bar2, OBJECT(s),
> "ivshmem.bar2", s->ivshmem_size, ptr);
> -qemu_set_ram_fd(s->ivshmem.ram_addr, fd);
> -vmstate_register_ram(&s->ivshmem, DEVICE(s));
> -memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
> +qemu_set_ram_fd(s->ivshmem_bar2->ram_addr, fd);

This is missing an instance_finalize callback to do

if (s->ivshmem_bar2) {
object_unparent(s->ivshmem_bar2);
g_free(s->ivshmem_bar2);
}

or, alternatively just use a flag (e.g. s->bar2_mapped) and allocate it
directly in the IVShmemState struct.

Paolo

Re: [Qemu-devel] [PATCH] log: Redirect stderr to logfile if deamonized

2016-03-01 Thread Daniel P. Berrange

On Tue, Mar 01, 2016 at 12:15:21PM +0100, Gerd Hoffmann wrote:
> On Do, 2016-02-18 at 13:38 +0200, Dimitris Aragiorgis wrote:
> > In case of daemonize, use the logfile passed with the -D option in
> > order to redirect stderr to it instead of /dev/null.
> > 
> > Also remove some unused code in log.h.
> 
> Patch breaks interaction with libvirt.  libvirt hangs on startup, while
> probing qemu capabilities.  qemu runs in probing mode (command line is
> "/home/kraxel/projects/qemu/build-default/x86_64-softmmu/qemu-system-x86_64 
> -S -no-user-config -nodefaults -nographic -M none -qmp 
> unix:/var/lib/libvirt/qemu/capabilities.monitor.sock,server,nowait -pidfile 
> /var/lib/libvirt/qemu/capabilities.pidfile -daemonize" according to 
> "systemctl status libvirtd -l"), apparently both qemu and libvirt wait for 
> each other.

When libvirt is probing capabilities it passes a pipe file descriptor
for stderr. It reads from this pipe to detect any errors printed by
QEMU before daemonizing. When QEMU daemonizes, it closes this FD and
sets stderr to /dev/null. Libvirt knows qemu has successfully started
up at this point.

With this patch though, this code:

@@ -275,7 +276,10 @@ void os_setup_post(void)

 dup2(fd, 0);
 dup2(fd, 1);
-dup2(fd, 2);
+/* In case -D is given do not redirect stderr to /dev/null */
+if (!qemu_logfile) {
+dup2(fd, 2);
+}

 close(fd);

means that QEMU will never close stderr anymore, so libvirt things
QEMU is still starting upforever.

Given current libvirt behaviour / expectations, I think the only
option is to revert this change.

IMHO if applications want qemu logs to go to stderr, they should
explicitly ask for that to happen via a CLI arg.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|

[Qemu-devel] [PATCH] log: use strtok_r

2016-03-01 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini 
---
 include/sysemu/os-win32.h |  1 +
 util/log.c| 30 --
 util/oslib-win32.c| 35 +++
 3 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
index fbed346..042633f 100644
--- a/include/sysemu/os-win32.h
+++ b/include/sysemu/os-win32.h
@@ -80,6 +80,7 @@ struct tm *gmtime_r(const time_t *timep, struct tm *result);
 struct tm *localtime_r(const time_t *timep, struct tm *result);
 #endif /* CONFIG_LOCALTIME_R */
 
+char *strtok_r(char *str, const char *delim, char **saveptr);
 
 static inline void os_setup_signal_handling(void) {}
 static inline void os_daemonize(void) {}
diff --git a/util/log.c b/util/log.c
index 8b921de..e4f2679 100644
--- a/util/log.c
+++ b/util/log.c
@@ -140,40 +140,29 @@ const QEMULogItem qemu_log_items[] = {
 { 0, NULL, NULL },
 };
 
-static int cmp1(const char *s1, int n, const char *s2)
-{
-if (strlen(s2) != n) {
-return 0;
-}
-return memcmp(s1, s2, n) == 0;
-}
-
 /* takes a comma separated list of log masks. Return 0 if error. */
 int qemu_str_to_log_mask(const char *str)
 {
 const QEMULogItem *item;
 int mask;
-const char *p, *p1;
+char *copy, *p, *p1;
 
-p = str;
+copy = strdup(str);
 mask = 0;
-for (;;) {
-p1 = strchr(p, ',');
-if (!p1) {
-p1 = p + strlen(p);
-}
-if (cmp1(p,p1-p,"all")) {
+for (p = strtok_r(copy, ",", &p1); p;
+ p = strtok_r(NULL, ",", &p1)) {
+if (!strcmp(p,"all")) {
 for (item = qemu_log_items; item->mask != 0; item++) {
 mask |= item->mask;
 }
 #ifdef CONFIG_TRACE_LOG
-} else if (strncmp(p, "trace:", 6) == 0 && p + 6 != p1) {
+} else if (strncmp(p, "trace:", 6) == 0 && p[6] != 0) {
 trace_enable_events(p + 6);
 mask |= LOG_TRACE;
 #endif
 } else {
 for (item = qemu_log_items; item->mask != 0; item++) {
-if (cmp1(p, p1 - p, item->name)) {
+if (!strcmp(p, item->name)) {
 goto found;
 }
 }
@@ -181,11 +170,8 @@ int qemu_str_to_log_mask(const char *str)
 found:
 mask |= item->mask;
 }
-if (*p1 != ',') {
-break;
-}
-p = p1 + 1;
 }
+g_free(copy);
 return mask;
 }
 
diff --git a/util/oslib-win32.c b/util/oslib-win32.c
index 438cfa4..9f94871 100644
--- a/util/oslib-win32.c
+++ b/util/oslib-win32.c
@@ -504,3 +504,38 @@ pid_t qemu_fork(Error **errp)
  "cannot fork child process");
 return -1;
 }
+
+/*
+ * public domain strtok_r() by Charlie Gordon
+ *
+ *   from comp.lang.c  9/14/2007
+ *
+ *  http://groups.google.com/group/comp.lang.c/msg/2ab1ecbb86646684
+ *
+ * (Declaration that it's public domain):
+ *  http://groups.google.com/group/comp.lang.c/msg/7c7b39328fefab9c
+ */
+char *strtok_r(char *str, const char *delim, char **saveptr)
+{
+char *ret;
+
+if (!str) {
+str = *saveptr;
+}
+
+/* Ignore delimiters at beginning of string.  */
+str += strspn(str, delim);
+if (!*str) {
+/* Ignore delimiters at end of string too.  */
+return NULL;
+}
+
+ret = str;
+str += strcspn(str, delim);
+if (*str) {
+*str++ = '\0';
+}
+
+*saveptr = str;
+return ret;
+}
-- 
2.5.0

Re: [Qemu-devel] [PATCH] log: Redirect stderr to logfile if deamonized

2016-03-01 Thread Paolo Bonzini



On 01/03/2016 12:15, Gerd Hoffmann wrote:
> On Do, 2016-02-18 at 13:38 +0200, Dimitris Aragiorgis wrote:
>> In case of daemonize, use the logfile passed with the -D option in
>> order to redirect stderr to it instead of /dev/null.
>>
>> Also remove some unused code in log.h.
> 
> Patch breaks interaction with libvirt.  libvirt hangs on startup, while
> probing qemu capabilities.  qemu runs in probing mode (command line is
> "/home/kraxel/projects/qemu/build-default/x86_64-softmmu/qemu-system-x86_64 
> -S -no-user-config -nodefaults -nographic -M none -qmp 
> unix:/var/lib/libvirt/qemu/capabilities.monitor.sock,server,nowait -pidfile 
> /var/lib/libvirt/qemu/capabilities.pidfile -daemonize" according to 
> "systemctl status libvirtd -l"), apparently both qemu and libvirt wait for 
> each other.

Patch sent, as a workaround use "./configure --enable-trace-backend=nop".

Paolo

[Qemu-devel] [PATCH] log: do not log if QEMU is daemonized but without -D

2016-03-01 Thread Paolo Bonzini

Commit 96c33a4 ("log: Redirect stderr to logfile if deamonized",
2016-02-22) wanted to move stderr of a daemonized QEMU to the file
specified with -D.

However, if -D was not passed, the patch had the side effect of not
redirecting stderr to /dev/null.  This happened because qemu_logfile
was set to stderr rather than the expected value of NULL.  The fix
is simply in the "if" condition of do_qemu_set_log; the "if" for
closing the file is also changed to match.

Reported-by: Jan Tomko 
Signed-off-by: Paolo Bonzini 
---
 util/log.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/util/log.c b/util/log.c
index a7ddc7e..8b921de 100644
--- a/util/log.c
+++ b/util/log.c
@@ -56,7 +56,8 @@ void do_qemu_set_log(int log_flags, bool use_own_buffers)
 #ifdef CONFIG_TRACE_LOG
 qemu_loglevel |= LOG_TRACE;
 #endif
-if ((qemu_loglevel || is_daemonized()) && !qemu_logfile) {
+if (!qemu_logfile &&
+(is_daemonized() ? logfilename != NULL : qemu_loglevel)) {
 if (logfilename) {
 qemu_logfile = fopen(logfilename, log_append ? "a" : "w");
 if (!qemu_logfile) {
@@ -72,6 +73,7 @@ void do_qemu_set_log(int log_flags, bool use_own_buffers)
 }
 } else {
 /* Default to stderr if no log file specified */
+assert(!is_daemonized());
 qemu_logfile = stderr;
 }
 /* must avoid mmap() usage of glibc by setting a buffer "by hand" */
@@ -89,7 +91,8 @@ void do_qemu_set_log(int log_flags, bool use_own_buffers)
 log_append = 1;
 }
 }
-if (!qemu_loglevel && !is_daemonized() && qemu_logfile) {
+if (qemu_logfile &&
+(is_daemonized() ? logfilename == NULL : !qemu_loglevel)) {
 qemu_log_close();
 }
 }
-- 
2.5.0

Re: [Qemu-devel] [PATCH] log: Redirect stderr to logfile if deamonized

2016-03-01 Thread Paolo Bonzini



On 01/03/2016 12:47, Daniel P. Berrange wrote:
> means that QEMU will never close stderr anymore, so libvirt things
> QEMU is still starting upforever.
> 
> Given current libvirt behaviour / expectations, I think the only
> option is to revert this change.

Why not fix it instead? :)

Paolo

Re: [Qemu-devel] [PATCH] log: use strtok_r

2016-03-01 Thread Paolo Bonzini

Sent by mistake, sorry.

Paolo

On 01/03/2016 12:48, Paolo Bonzini wrote:
> Signed-off-by: Paolo Bonzini 
> ---
>  include/sysemu/os-win32.h |  1 +
>  util/log.c| 30 --
>  util/oslib-win32.c| 35 +++
>  3 files changed, 44 insertions(+), 22 deletions(-)
> 
> diff --git a/include/sysemu/os-win32.h b/include/sysemu/os-win32.h
> index fbed346..042633f 100644
> --- a/include/sysemu/os-win32.h
> +++ b/include/sysemu/os-win32.h
> @@ -80,6 +80,7 @@ struct tm *gmtime_r(const time_t *timep, struct tm *result);
>  struct tm *localtime_r(const time_t *timep, struct tm *result);
>  #endif /* CONFIG_LOCALTIME_R */
>  
> +char *strtok_r(char *str, const char *delim, char **saveptr);
>  
>  static inline void os_setup_signal_handling(void) {}
>  static inline void os_daemonize(void) {}
> diff --git a/util/log.c b/util/log.c
> index 8b921de..e4f2679 100644
> --- a/util/log.c
> +++ b/util/log.c
> @@ -140,40 +140,29 @@ const QEMULogItem qemu_log_items[] = {
>  { 0, NULL, NULL },
>  };
>  
> -static int cmp1(const char *s1, int n, const char *s2)
> -{
> -if (strlen(s2) != n) {
> -return 0;
> -}
> -return memcmp(s1, s2, n) == 0;
> -}
> -
>  /* takes a comma separated list of log masks. Return 0 if error. */
>  int qemu_str_to_log_mask(const char *str)
>  {
>  const QEMULogItem *item;
>  int mask;
> -const char *p, *p1;
> +char *copy, *p, *p1;
>  
> -p = str;
> +copy = strdup(str);
>  mask = 0;
> -for (;;) {
> -p1 = strchr(p, ',');
> -if (!p1) {
> -p1 = p + strlen(p);
> -}
> -if (cmp1(p,p1-p,"all")) {
> +for (p = strtok_r(copy, ",", &p1); p;
> + p = strtok_r(NULL, ",", &p1)) {
> +if (!strcmp(p,"all")) {
>  for (item = qemu_log_items; item->mask != 0; item++) {
>  mask |= item->mask;
>  }
>  #ifdef CONFIG_TRACE_LOG
> -} else if (strncmp(p, "trace:", 6) == 0 && p + 6 != p1) {
> +} else if (strncmp(p, "trace:", 6) == 0 && p[6] != 0) {
>  trace_enable_events(p + 6);
>  mask |= LOG_TRACE;
>  #endif
>  } else {
>  for (item = qemu_log_items; item->mask != 0; item++) {
> -if (cmp1(p, p1 - p, item->name)) {
> +if (!strcmp(p, item->name)) {
>  goto found;
>  }
>  }
> @@ -181,11 +170,8 @@ int qemu_str_to_log_mask(const char *str)
>  found:
>  mask |= item->mask;
>  }
> -if (*p1 != ',') {
> -break;
> -}
> -p = p1 + 1;
>  }
> +g_free(copy);
>  return mask;
>  }
>  
> diff --git a/util/oslib-win32.c b/util/oslib-win32.c
> index 438cfa4..9f94871 100644
> --- a/util/oslib-win32.c
> +++ b/util/oslib-win32.c
> @@ -504,3 +504,38 @@ pid_t qemu_fork(Error **errp)
>   "cannot fork child process");
>  return -1;
>  }
> +
> +/*
> + * public domain strtok_r() by Charlie Gordon
> + *
> + *   from comp.lang.c  9/14/2007
> + *
> + *  http://groups.google.com/group/comp.lang.c/msg/2ab1ecbb86646684
> + *
> + * (Declaration that it's public domain):
> + *  http://groups.google.com/group/comp.lang.c/msg/7c7b39328fefab9c
> + */
> +char *strtok_r(char *str, const char *delim, char **saveptr)
> +{
> +char *ret;
> +
> +if (!str) {
> +str = *saveptr;
> +}
> +
> +/* Ignore delimiters at beginning of string.  */
> +str += strspn(str, delim);
> +if (!*str) {
> +/* Ignore delimiters at end of string too.  */
> +return NULL;
> +}
> +
> +ret = str;
> +str += strcspn(str, delim);
> +if (*str) {
> +*str++ = '\0';
> +}
> +
> +*saveptr = str;
> +return ret;
> +}
>

[Qemu-devel] [PATCH] linux-user: arm: Handle (ignore) EXCP_YIELD in ARM cpu_loop()

2016-03-01 Thread Peter Maydell

The new-in-ARMv8 YIELD instruction has been implemented to throw
an EXCP_YIELD back up to the QEMU main loop. In system emulation
we use this to decide to schedule a different guest CPU in SMP
configurations. In usermode emulation there is nothing to do,
so just ignore it and resume the guest.

This prevents an abort with "unhandled CPU exception 0x10004"
if the guest process uses the YIELD instruction.

Reported-by: Hunter Laux 
Signed-off-by: Peter Maydell 
---
 linux-user/main.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/linux-user/main.c b/linux-user/main.c
index 700724e..64b0058 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -871,6 +871,9 @@ void cpu_loop(CPUARMState *env)
 if (do_kernel_trap(env))
   goto error;
 break;
+case EXCP_YIELD:
+/* nothing to do here for user-mode, just resume guest code */
+break;
 default:
 error:
 EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", 
trapnr);
@@ -1061,6 +1064,9 @@ void cpu_loop(CPUARMState *env)
 case EXCP_SEMIHOST:
 env->xregs[0] = do_arm_semihosting(env);
 break;
+case EXCP_YIELD:
+/* nothing to do here for user-mode, just resume guest code */
+break;
 default:
 EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", 
trapnr);
 abort();
-- 
1.9.1

[Qemu-devel] [PULL 03/14] s390x: fix debug statement in trigger_page_fault()

2016-03-01 Thread Cornelia Huck

From: David Hildenbrand 

When mmu_translate debugging output is enabled, code won't compile.
Let's just use the same statement as in trigger_prot_fault().

Acked-by: Cornelia Huck 
Signed-off-by: David Hildenbrand 
Signed-off-by: Cornelia Huck 
---
 target-s390x/mmu_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target-s390x/mmu_helper.c b/target-s390x/mmu_helper.c
index 5323c53..b11a027 100644
--- a/target-s390x/mmu_helper.c
+++ b/target-s390x/mmu_helper.c
@@ -90,7 +90,7 @@ static void trigger_page_fault(CPUS390XState *env, 
target_ulong vaddr,
 
 tec = vaddr | (rw == MMU_DATA_STORE ? FS_WRITE : FS_READ) | asc >> 46;
 
-DPRINTF("%s: vaddr=%016" PRIx64 " bits=%d\n", __func__, vaddr, bits);
+DPRINTF("%s: trans_exc_code=%016" PRIx64 "\n", __func__, tec);
 
 if (!exc) {
 return;
-- 
2.7.2

[Qemu-devel] [PULL 04/14] s390x: remove {kvm_}s390_virtio_irq()

2016-03-01 Thread Cornelia Huck

This interface was only used by the old virtio machine and therefore
is not needed anymore.

Reviewed-by: David Hildenbrand 
Reviewed-by: Halil Pasic 
Signed-off-by: Cornelia Huck 
---
 target-s390x/cpu.h   |  5 -
 target-s390x/interrupt.c | 11 ---
 target-s390x/kvm.c   | 11 ---
 3 files changed, 27 deletions(-)

diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index 6ae5699..49c8415 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -470,10 +470,8 @@ void s390x_tod_timer(void *opaque);
 void s390x_cpu_timer(void *opaque);
 
 int s390_virtio_hypercall(CPUS390XState *env);
-void s390_virtio_irq(int config_change, uint64_t token);
 
 #ifdef CONFIG_KVM
-void kvm_s390_virtio_irq(int config_change, uint64_t token);
 void kvm_s390_service_interrupt(uint32_t parm);
 void kvm_s390_vcpu_interrupt(S390CPU *cpu, struct kvm_s390_irq *irq);
 void kvm_s390_floating_interrupt(struct kvm_s390_irq *irq);
@@ -484,9 +482,6 @@ int kvm_s390_mem_op(S390CPU *cpu, vaddr addr, uint8_t ar, 
void *hostbuf,
 int kvm_s390_get_clock(uint8_t *tod_high, uint64_t *tod_clock);
 int kvm_s390_set_clock(uint8_t *tod_high, uint64_t *tod_clock);
 #else
-static inline void kvm_s390_virtio_irq(int config_change, uint64_t token)
-{
-}
 static inline void kvm_s390_service_interrupt(uint32_t parm)
 {
 }
diff --git a/target-s390x/interrupt.c b/target-s390x/interrupt.c
index 07d210a..bad60a7 100644
--- a/target-s390x/interrupt.c
+++ b/target-s390x/interrupt.c
@@ -28,17 +28,6 @@ void s390_sclp_extint(uint32_t parm)
 }
 }
 
-void s390_virtio_irq(int config_change, uint64_t token)
-{
-if (kvm_enabled()) {
-kvm_s390_virtio_irq(config_change, token);
-} else {
-S390CPU *dummy_cpu = s390_cpu_addr2state(0);
-
-cpu_inject_ext(dummy_cpu, EXT_VIRTIO, config_change, token);
-}
-}
-
 void s390_io_interrupt(uint16_t subchannel_id, uint16_t subchannel_nr,
uint32_t io_int_parm, uint32_t io_int_word)
 {
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 6763656..e1859ca 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -936,17 +936,6 @@ void kvm_s390_floating_interrupt(struct kvm_s390_irq *irq)
 __kvm_s390_floating_interrupt(irq);
 }
 
-void kvm_s390_virtio_irq(int config_change, uint64_t token)
-{
-struct kvm_s390_irq irq = {
-.type = KVM_S390_INT_VIRTIO,
-.u.ext.ext_params = config_change,
-.u.ext.ext_params2 = token,
-};
-
-kvm_s390_floating_interrupt(&irq);
-}
-
 void kvm_s390_service_interrupt(uint32_t parm)
 {
 struct kvm_s390_irq irq = {
-- 
2.7.2

[Qemu-devel] [PULL 02/14] s390x/kvm: sync fprs via kvm_run

2016-03-01 Thread Cornelia Huck

From: David Hildenbrand 

We can now also sync the fprs via kvm_run, avoiding one ioctl.

Reviewed-by: Christian Borntraeger 
Signed-off-by: David Hildenbrand 
Signed-off-by: Cornelia Huck 
---
 target-s390x/kvm.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 9b21b96..6763656 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -342,6 +342,12 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 }
 cs->kvm_run->s.regs.fpc = env->fpc;
 cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_VRS;
+} else if (can_sync_regs(cs, KVM_SYNC_FPRS)) {
+for (i = 0; i < 16; i++) {
+cs->kvm_run->s.regs.fprs[i] = get_freg(env, i)->ll;
+}
+cs->kvm_run->s.regs.fpc = env->fpc;
+cs->kvm_run->kvm_dirty_regs |= KVM_SYNC_FPRS;
 } else {
 /* Floating point */
 for (i = 0; i < 16; i++) {
@@ -482,6 +488,11 @@ int kvm_arch_get_registers(CPUState *cs)
 env->vregs[i][1].ll = cs->kvm_run->s.regs.vrs[i][1];
 }
 env->fpc = cs->kvm_run->s.regs.fpc;
+} else if (can_sync_regs(cs, KVM_SYNC_FPRS)) {
+for (i = 0; i < 16; i++) {
+get_freg(env, i)->ll = cs->kvm_run->s.regs.fprs[i];
+}
+env->fpc = cs->kvm_run->s.regs.fpc;
 } else {
 r = kvm_vcpu_ioctl(cs, KVM_GET_FPU, &fpu);
 if (r < 0) {
-- 
2.7.2

[Qemu-devel] [PULL 11/14] s390x/pci: use PCI_MSIX_FLAGS on retrieving the MSIX entries

2016-03-01 Thread Cornelia Huck

From: Wei Yang 

Even PCI_CAP_FLAGS has the same value as PCI_MSIX_FLAGS, the later one is
the more proper on retrieving MSIX entries.

This patch uses PCI_MSIX_FLAGS to retrieve the MSIX entries.

Signed-off-by: Wei Yang 
CC: Cornelia Huck 
CC: Christian Borntraeger 
Message-Id: <1455895091-7589-3-git-send-email-richard.weiy...@gmail.com>
Signed-off-by: Cornelia Huck 
---
 hw/s390x/s390-pci-bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index 5d6cebb..dba0202 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -524,7 +524,7 @@ static int s390_pcihost_setup_msix(S390PCIBusDevice *pbdev)
 return 0;
 }
 
-ctrl = pci_host_config_read_common(pbdev->pdev, pos + PCI_CAP_FLAGS,
+ctrl = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_FLAGS,
  pci_config_size(pbdev->pdev), sizeof(ctrl));
 table = pci_host_config_read_common(pbdev->pdev, pos + PCI_MSIX_TABLE,
  pci_config_size(pbdev->pdev), sizeof(table));
-- 
2.7.2

[Qemu-devel] [PULL 00/14] s390x patches

2016-03-01 Thread Cornelia Huck

The following changes since commit 9c74a853048f14fd9a3e2efa1e3a6935d00e7495:

  Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into 
staging (2016-03-01 09:54:53 +)

are available in the git repository at:

  git://github.com/cohuck/qemu tags/s390x-20160301

for you to fetch changes up to ce350f32e4bb9638085f585329fb5d751676d2d2:

  s390x/css: only suspend when enabled by orb (2016-03-01 12:15:29 +0100)


Assorted fixes, cleanups and enhancements.



Cornelia Huck (4):
  linux-headers: update against kvm/next
  s390x: remove {kvm_}s390_virtio_irq()
  s390x/virtio: old machine leftovers
  s390x/css: only suspend when enabled by orb

David Hildenbrand (2):
  s390x/kvm: sync fprs via kvm_run
  s390x: fix debug statement in trigger_page_fault()

Eduardo Habkost (2):
  s390x/css: Allocate channel_subsys statically
  s390x/css: Use static initialization for channel_subsys fields

Sascha Silbe (1):
  watchdog/diag288: avoid race condition on expired watchdog

Thomas Huth (2):
  MAINTAINERS: Remove the old s390-virtio machine
  MAINTAINERS: Remove entry for hw/s390x/s390-virtio-bus.[ch]

Wei Yang (1):
  s390x/pci: use PCI_MSIX_FLAGS on retrieving the MSIX entries

Yi Min Zhao (2):
  s390x/css: introduce indicator refcounting interfaces
  s390x/pci: fix reg/dereg irq functions

 MAINTAINERS|   7 -
 hw/s390x/css.c | 260 +++--
 hw/s390x/css.h |  14 ++
 hw/s390x/s390-pci-bus.c|   2 +-
 hw/s390x/s390-pci-bus.h|   2 +
 hw/s390x/s390-pci-inst.c   |  23 ++--
 hw/s390x/s390-virtio.c |   2 -
 hw/s390x/virtio-ccw.c  |  63 -
 hw/s390x/virtio-ccw.h  |  11 +-
 hw/watchdog/wdt_diag288.c  |  12 +-
 linux-headers/asm-arm/unistd.h |   2 +
 linux-headers/asm-powerpc/unistd.h |  13 +-
 linux-headers/asm-s390/kvm.h   |  11 +-
 linux-headers/asm-s390/unistd.h|   3 +-
 linux-headers/asm-x86/unistd_32.h  |   1 +
 linux-headers/asm-x86/unistd_64.h  |   1 +
 linux-headers/asm-x86/unistd_x32.h |   1 +
 linux-headers/linux/kvm.h  |   9 +-
 linux-headers/linux/vfio.h |   9 ++
 target-s390x/cpu.h |   5 -
 target-s390x/interrupt.c   |  11 --
 target-s390x/kvm.c |  22 ++--
 target-s390x/mmu_helper.c  |   2 +-
 23 files changed, 246 insertions(+), 240 deletions(-)

-- 
2.7.2

[Qemu-devel] [PULL 05/14] watchdog/diag288: avoid race condition on expired watchdog

2016-03-01 Thread Cornelia Huck

From: Sascha Silbe 

When configured to inject an NMI, watchdog_perform_action() may cause
the BQL to be temporarily relinquished (inject_nmi() → ... →
s390_nmi() → s390_cpu_restart() → run_on_cpu()). When the guest issues
diag 288 again in response to the NMI, the diag 288 operation will
race against wdt_diag288_reset(). Depending on scheduler behaviour,
wdt_diag288_reset() may be run after the guest issued a diag 288
Init. As a result, we will cancel the timer the guest just set up. The
effect observed by the guest is that a second expiry does not trigger
the watchdog action and diag 288 Change operations fail.

Fix this by resetting the timer _before_ invoking the action.

Signed-off-by: Sascha Silbe 
Acked-by: David Hildenbrand 
Signed-off-by: Cornelia Huck 
---
 hw/watchdog/wdt_diag288.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/hw/watchdog/wdt_diag288.c b/hw/watchdog/wdt_diag288.c
index 5eb5b94..1c3658e 100644
--- a/hw/watchdog/wdt_diag288.c
+++ b/hw/watchdog/wdt_diag288.c
@@ -51,15 +51,19 @@ static void diag288_reset(void *opaque)
 static void diag288_timer_expired(void *dev)
 {
 qemu_log_mask(CPU_LOG_RESET, "Watchdog timer expired.\n");
-watchdog_perform_action();
-/* Reset the watchdog only if the guest was notified about expiry. */
+/* Reset the watchdog only if the guest gets notified about
+ * expiry. watchdog_perform_action() may temporarily relinquish
+ * the BQL; reset before triggering the action to avoid races with
+ * diag288 instructions. */
 switch (get_watchdog_action()) {
 case WDT_DEBUG:
 case WDT_NONE:
 case WDT_PAUSE:
- return;
+break;
+default:
+wdt_diag288_reset(dev);
 }
-wdt_diag288_reset(dev);
+watchdog_perform_action();
 }
 
 static int wdt_diag288_handle_timer(DIAG288State *diag288,
-- 
2.7.2

[Qemu-devel] [PULL 13/14] MAINTAINERS: Remove entry for hw/s390x/s390-virtio-bus.[ch]

2016-03-01 Thread Cornelia Huck

From: Thomas Huth 

The files have been deleted recently, no need to keep these entries
anymore.

Signed-off-by: Thomas Huth 
Message-Id: <1456397100-22746-1-git-send-email-th...@redhat.com>
Signed-off-by: Cornelia Huck 
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2774f31..13d1b4d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -663,7 +663,6 @@ M: Alexander Graf 
 S: Supported
 F: hw/char/sclp*.[hc]
 F: hw/s390x/
-X: hw/s390x/s390-virtio-bus.[ch]
 F: include/hw/s390x/
 F: pc-bios/s390-ccw/
 F: hw/watchdog/wdt_diag288.c
-- 
2.7.2

[Qemu-devel] [PULL 08/14] s390x/pci: fix reg/dereg irq functions

2016-03-01 Thread Cornelia Huck

From: Yi Min Zhao 

Indicator refcounting interfaces are introduced. This patch fixes
introducing unneeded indicator mappings and failure to release
AISB mappings on deregistration.

Signed-off-by: Yi Min Zhao 
Reviewed-by: Cornelia Huck 
Signed-off-by: Cornelia Huck 
---
 hw/s390x/s390-pci-bus.h  |  2 ++
 hw/s390x/s390-pci-inst.c | 23 +++
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.h b/hw/s390x/s390-pci-bus.h
index d8ddb77..59fd5c9 100644
--- a/hw/s390x/s390-pci-bus.h
+++ b/hw/s390x/s390-pci-bus.h
@@ -233,6 +233,8 @@ typedef struct S390PCIBusDevice {
 AddressSpace as;
 MemoryRegion mr;
 MemoryRegion iommu_mr;
+IndAddr *summary_ind;
+IndAddr *indicator;
 } S390PCIBusDevice;
 
 typedef struct S390pciState {
diff --git a/hw/s390x/s390-pci-inst.c b/hw/s390x/s390-pci-inst.c
index fe73ca8..506147d 100644
--- a/hw/s390x/s390-pci-inst.c
+++ b/hw/s390x/s390-pci-inst.c
@@ -621,19 +621,19 @@ int pcistb_service_call(S390CPU *cpu, uint8_t r1, uint8_t 
r3, uint64_t gaddr,
 
 static int reg_irqs(CPUS390XState *env, S390PCIBusDevice *pbdev, ZpciFib fib)
 {
-int ret;
-S390FLICState *fs = s390_get_flic();
-S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
+int ret, len;
 
 ret = css_register_io_adapter(S390_PCIPT_ADAPTER,
   FIB_DATA_ISC(ldl_p(&fib.data)), true, false,
   &pbdev->routes.adapter.adapter_id);
 assert(ret == 0);
 
-fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id,
-ldq_p(&fib.aisb), true);
-fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id,
-ldq_p(&fib.aibv), true);
+pbdev->summary_ind = get_indicator(ldq_p(&fib.aisb), sizeof(uint64_t));
+len = BITS_TO_LONGS(FIB_DATA_NOI(ldl_p(&fib.data))) * sizeof(unsigned 
long);
+pbdev->indicator = get_indicator(ldq_p(&fib.aibv), len);
+
+map_indicator(&pbdev->routes.adapter, pbdev->summary_ind);
+map_indicator(&pbdev->routes.adapter, pbdev->indicator);
 
 pbdev->routes.adapter.summary_addr = ldq_p(&fib.aisb);
 pbdev->routes.adapter.summary_offset = FIB_DATA_AISBO(ldl_p(&fib.data));
@@ -649,12 +649,11 @@ static int reg_irqs(CPUS390XState *env, S390PCIBusDevice 
*pbdev, ZpciFib fib)
 
 static int dereg_irqs(S390PCIBusDevice *pbdev)
 {
-S390FLICState *fs = s390_get_flic();
-S390FLICStateClass *fsc = S390_FLIC_COMMON_GET_CLASS(fs);
-
-fsc->io_adapter_map(fs, pbdev->routes.adapter.adapter_id,
-pbdev->routes.adapter.ind_addr, false);
+release_indicator(&pbdev->routes.adapter, pbdev->summary_ind);
+release_indicator(&pbdev->routes.adapter, pbdev->indicator);
 
+pbdev->summary_ind = NULL;
+pbdev->indicator = NULL;
 pbdev->routes.adapter.summary_addr = 0;
 pbdev->routes.adapter.summary_offset = 0;
 pbdev->routes.adapter.ind_addr = 0;
-- 
2.7.2

1 2 3 4 >

1 - 100 of 394 matches

Mail list logo