date:20160315

Re: [Qemu-devel] [RFC v3 2/2] spapr: implement query-hotpluggable-cpus QMP command

2016-03-15 Thread David Gibson

On Wed, 16 Mar 2016 10:49:41 +0530
Bharata B Rao  wrote:

> On Tue, Mar 15, 2016 at 02:24:08PM +0100, Igor Mammedov wrote:
> > it returns a list of present/possible to hotplug CPU
> > objects with a list of properties to use with
> > device_add.
> > 
> > in spapr case returned list would looks like:  
> > -> { "execute": "query-hotpluggable-cpus" }  
> > <- {"return": [
> >  { "props": { "core": 1 }, "type": "spapr-cpu-core",
> >"vcpus-count": 2 },
> >  { "props": { "core": 0 }, "type": "spapr-cpu-core",
> >"vcpus-count": 2,
> >"qom-path": "/machine/unattached/device[0]"}
> >]}'
> > 
> > TODO:
> >   add 'node' property for core <-> numa node mapping
> > 
> > Signed-off-by: Igor Mammedov 
> > ---
> > it's only compile tested
> > ---
> >  hw/ppc/spapr.c | 32 
> >  1 file changed, 32 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index b1e9ba2..e1ce983 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -65,6 +65,7 @@
> >  #include "hw/compat.h"
> >  #include "qemu-common.h"
> >  #include "hw/ppc/spapr_cpu_core.h"
> > +#include "qmp-commands.h"
> > 
> >  #include 
> > 
> > @@ -2399,6 +2400,37 @@ static unsigned 
> > spapr_cpu_index_to_socket_id(unsigned cpu_index)
> >  return cpu_index / smp_threads / smp_cores;
> >  }
> > 
> > +HotpluggableCPUList *qmp_query_hotpluggable_cpus(Error **errp)
> > +{
> > +int i;
> > +HotpluggableCPUList *head = NULL;
> > +sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> > +int spapr_max_cores = max_cpus / smp_threads;
> > +
> > +for (i = 0; i < spapr_max_cores; i++) {
> > +HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
> > +HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
> > +CpuInstanceProperties *cpu_props = g_new0(typeof(*cpu_props), 1);
> > +
> > +cpu_item->type = g_strdup(TYPE_SPAPR_CPU_CORE);
> > +cpu_item->vcpus_count = smp_threads;  
> 
> Shouldn't this be fetched from "threads" property of the core device
> instead of directly using smp_threads ? But again, what that would mean
> for not-yet-plugged in cores and how to get that for them is a question.

Yeah, I think Igor's patch is correct here.  The information flow goes
the other direction: the machine type code advertises smp_threads here,
which management then passes back to the device_add spapr-core in the
threads property.

-- 
David Gibson 
Senior Software Engineer, Virtualization, Red Hat


pgpeHd_KwzseK.pgp
Description: OpenPGP digital signature

Re: [Qemu-devel] [RFC PATCH v2 9/9] spapr: CPU hot unplug support

2016-03-15 Thread Bharata B Rao

On Wed, Mar 16, 2016 at 04:27:04PM +1100, David Gibson wrote:
> On Fri, Mar 11, 2016 at 10:24:38AM +0530, Bharata B Rao wrote:
> > Remove the CPU core device by removing the underlying CPU thread devices.
> > Hot removal of CPU for sPAPR guests is achieved by sending the hot unplug
> > notification to the guest. Release the vCPU object after CPU hot unplug so
> > that vCPU fd can be parked and reused.
> > 
> > Signed-off-by: Bharata B Rao 
> > ---
> >  hw/ppc/spapr.c  | 21 ++
> >  hw/ppc/spapr_cpu_core.c | 86 
> > +
> >  include/hw/ppc/spapr.h  |  1 +
> >  include/hw/ppc/spapr_cpu_core.h | 12 ++
> >  4 files changed, 120 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 822c87d..b1e9ba2 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -2345,7 +2345,12 @@ static void spapr_machine_device_plug(HotplugHandler 
> > *hotplug_dev,
> >  
> >  spapr_memory_plug(hotplug_dev, dev, node, errp);
> >  } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
> > +/*
> > + * TODO: Move this check to pre_plug handler at which point
> > + * spapr_core_release() won't be necessary.
> > + */
> >  if (!smc->dr_cpu_enabled && dev->hotplugged) {
> > +spapr_core_release(dev);
> >  error_setg(errp, "CPU hotplug not supported for this machine");
> >  return;
> >  }
> 
> This hunk doesn't look like its related to unplug.  Did it belong in
> another patch?

Yes, it actually belongs to hot-plug but the whole infrastructure to release
the core and associated threads get introduced in this patch, hence put
this hunk here.

However, if pre_plug is the way to go forward, we woudn't need this
altogether as roll-back is much easier from there than from here.

Regards,
Bharata.

Re: [Qemu-devel] [RFC PATCH v2 9/9] spapr: CPU hot unplug support

2016-03-15 Thread David Gibson

On Fri, Mar 11, 2016 at 10:24:38AM +0530, Bharata B Rao wrote:
> Remove the CPU core device by removing the underlying CPU thread devices.
> Hot removal of CPU for sPAPR guests is achieved by sending the hot unplug
> notification to the guest. Release the vCPU object after CPU hot unplug so
> that vCPU fd can be parked and reused.
> 
> Signed-off-by: Bharata B Rao 
> ---
>  hw/ppc/spapr.c  | 21 ++
>  hw/ppc/spapr_cpu_core.c | 86 
> +
>  include/hw/ppc/spapr.h  |  1 +
>  include/hw/ppc/spapr_cpu_core.h | 12 ++
>  4 files changed, 120 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 822c87d..b1e9ba2 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2345,7 +2345,12 @@ static void spapr_machine_device_plug(HotplugHandler 
> *hotplug_dev,
>  
>  spapr_memory_plug(hotplug_dev, dev, node, errp);
>  } else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
> +/*
> + * TODO: Move this check to pre_plug handler at which point
> + * spapr_core_release() won't be necessary.
> + */
>  if (!smc->dr_cpu_enabled && dev->hotplugged) {
> +spapr_core_release(dev);
>  error_setg(errp, "CPU hotplug not supported for this machine");
>  return;
>  }

This hunk doesn't look like its related to unplug.  Did it belong in
another patch?

> @@ -2353,11 +2358,27 @@ static void spapr_machine_device_plug(HotplugHandler 
> *hotplug_dev,
>  }
>  }
>  
> +void spapr_cpu_destroy(PowerPCCPU *cpu)
> +{
> +sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> +
> +xics_cpu_destroy(spapr->icp, cpu);
> +qemu_unregister_reset(spapr_cpu_reset, cpu);
> +}
> +
>  static void spapr_machine_device_unplug(HotplugHandler *hotplug_dev,
>DeviceState *dev, Error **errp)
>  {
> +sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> +
>  if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
>  error_setg(errp, "Memory hot unplug not supported by sPAPR");
> +} else if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) {
> +if (!smc->dr_cpu_enabled) {
> +error_setg(errp, "CPU hot unplug not supported on this machine");
> +return;
> +}
> +spapr_core_unplug(hotplug_dev, dev, errp);
>  }
>  }
>  
> diff --git a/hw/ppc/spapr_cpu_core.c b/hw/ppc/spapr_cpu_core.c
> index db8de32..dd391bd 100644
> --- a/hw/ppc/spapr_cpu_core.c
> +++ b/hw/ppc/spapr_cpu_core.c
> @@ -84,6 +84,92 @@ void spapr_core_plug(HotplugHandler *hotplug_dev, 
> DeviceState *dev,
>  }
>  }
>  
> +static void spapr_cpu_core_cleanup(struct sPAPRCPUUnplugList *unplug_list)
> +{
> +sPAPRCPUUnplug *unplug, *next;
> +Object *cpu;
> +
> +QLIST_FOREACH_SAFE(unplug, unplug_list, node, next) {
> +cpu = unplug->cpu;
> +object_unparent(cpu);
> +QLIST_REMOVE(unplug, node);
> +g_free(unplug);
> +}
> +}
> +
> +static void spapr_add_cpu_to_unplug_list(Object *cpu,
> + struct sPAPRCPUUnplugList 
> *unplug_list)
> +{
> +sPAPRCPUUnplug *unplug = g_malloc(sizeof(*unplug));
> +
> +unplug->cpu = cpu;
> +QLIST_INSERT_HEAD(unplug_list, unplug, node);
> +}
> +
> +static int spapr_cpu_release(Object *obj, void *opaque)
> +{
> +DeviceState *dev = DEVICE(obj);
> +CPUState *cs = CPU(dev);
> +PowerPCCPU *cpu = POWERPC_CPU(cs);
> +struct sPAPRCPUUnplugList *unplug_list = opaque;
> +
> +spapr_cpu_destroy(cpu);
> +cpu_remove_sync(cs);
> +
> +/*
> + * We are still walking the core object's children list, and
> + * hence can't cleanup this CPU thread object just yet. Put
> + * it on a list for later removal.
> + */
> +spapr_add_cpu_to_unplug_list(obj, unplug_list);
> +return 0;
> +}
> +
> +void spapr_core_release(DeviceState *dev)
> +{
> +struct sPAPRCPUUnplugList unplug_list;
> +sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> +sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
> +int core_dt_id = object_property_get_int(OBJECT(dev), "core", NULL);
> +int smt = kvmppc_smt_threads();
> +
> +QLIST_INIT(_list);
> +object_child_foreach(OBJECT(dev), spapr_cpu_release, _list);
> +spapr_cpu_core_cleanup(_list);
> +spapr->cores[core_dt_id / smt] = NULL;
> +
> +g_free(core->threads);
> +}
> +
> +static void spapr_core_release_unparent(DeviceState *dev, void *opaque)
> +{
> +spapr_core_release(dev);
> +object_unparent(OBJECT(dev));
> +}
> +
> +void spapr_core_unplug(HotplugHandler *hotplug_dev, DeviceState *dev,
> +   Error **errp)
> +{
> +sPAPRCPUCore *core = SPAPR_CPU_CORE(OBJECT(dev));
> +PowerPCCPU *cpu = >threads[0];
> +int id = ppc_get_vcpu_dt_id(cpu);
> +sPAPRDRConnector *drc =
> +

Re: [Qemu-devel] [RFC PATCH v2 7/9] spapr: CPU hotplug support

2016-03-15 Thread David Gibson

On Fri, Mar 11, 2016 at 10:24:36AM +0530, Bharata B Rao wrote:
> Set up device tree entries for the hotplugged CPU core and use the
> exising RTAS event logging infrastructure to send CPU hotplug notification
> to the guest.
> 
> Signed-off-by: Bharata B Rao 
> ---
>  hw/ppc/spapr.c  | 64 ++
>  hw/ppc/spapr_cpu_core.c | 69 
> +
>  hw/ppc/spapr_events.c   |  3 ++
>  hw/ppc/spapr_rtas.c | 24 ++
>  include/hw/ppc/spapr.h  |  2 ++
>  include/hw/ppc/spapr_cpu_core.h |  2 ++
>  6 files changed, 164 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index cffe8c8..822c87d 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -603,6 +603,18 @@ static void spapr_populate_cpu_dt(CPUState *cs, void 
> *fdt, int offset,
>  size_t page_sizes_prop_size;
>  uint32_t vcpus_per_socket = smp_threads * smp_cores;
>  uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> +sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> +sPAPRDRConnector *drc;
> +sPAPRDRConnectorClass *drck;
> +int drc_index;
> +
> +if (smc->dr_cpu_enabled) {
> +drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index);
> +g_assert(drc);

Small nit: rather than asserting here it might be simpler to just
check for drc != NULL instead of checking smc->dr_cpu_enabled.  That
should have the same effect for now, and will be correct if we ever
have some pluggable and some non-pluggable CPUs...

> +drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +drc_index = drck->get_index(drc);
> +_FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
> +}
>  
>  /* Note: we keep CI large pages off for now because a 64K capable guest
>   * provisioned with large pages might otherwise try to map a qemu
> @@ -987,6 +999,16 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
>  _FDT(spapr_drc_populate_dt(fdt, 0, NULL, 
> SPAPR_DR_CONNECTOR_TYPE_LMB));
>  }
>  
> +if (smc->dr_cpu_enabled) {
> +int offset = fdt_path_offset(fdt, "/cpus");
> +ret = spapr_drc_populate_dt(fdt, offset, NULL,
> +SPAPR_DR_CONNECTOR_TYPE_CPU);
> +if (ret < 0) {
> +error_report("Couldn't set up CPU DR device tree properties");
> +exit(1);
> +}
> +}
> +
>  _FDT((fdt_pack(fdt)));
>  
>  if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
> @@ -1622,6 +1644,8 @@ static void spapr_boot_set(void *opaque, const char 
> *boot_device,
>  void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error **errp)
>  {
>  CPUPPCState *env = >env;
> +CPUState *cs = CPU(cpu);
> +int i;
>  
>  /* Set time-base frequency to 512 MHz */
>  cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> @@ -1646,6 +1670,14 @@ void spapr_cpu_init(sPAPRMachineState *spapr, 
> PowerPCCPU *cpu, Error **errp)
>  }
>  }
>  
> +/* Set NUMA node for the added CPUs  */
> +for (i = 0; i < nb_numa_nodes; i++) {
> +if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> +cs->numa_node = i;
> +break;
> +}
> +}
> +
>  xics_cpu_setup(spapr->icp, cpu);
>  
>  qemu_register_reset(spapr_cpu_reset, cpu);
> @@ -1824,6 +1856,11 @@ static void ppc_spapr_init(MachineState *machine)
>  
>  for (i = 0; i < spapr_max_cores; i++) {
>  int core_dt_id = i * smt;
> +sPAPRDRConnector *drc =
> +spapr_dr_connector_new(OBJECT(spapr),
> +   SPAPR_DR_CONNECTOR_TYPE_CPU, 
> core_dt_id);
> +
> +qemu_register_reset(spapr_drc_reset, drc);

...at least it will be if you make construction of the DRC object
conditional on dr_cpu_enabled.

>  if (i < spapr_cores) {
>  Object *core  = object_new(TYPE_SPAPR_CPU_CORE);
> @@ -2246,6 +2283,27 @@ out:
>  error_propagate(errp, local_err);
>  }
>  
> +void *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs,
> +int *fdt_offset, sPAPRMachineState 
> *spapr)
> +{
> +PowerPCCPU *cpu = POWERPC_CPU(cs);
> +DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +int id = ppc_get_vcpu_dt_id(cpu);
> +void *fdt;
> +int offset, fdt_size;
> +char *nodename;
> +
> +fdt = create_device_tree(_size);
> +nodename = g_strdup_printf("%s@%x", dc->fw_name, id);
> +offset = fdt_add_subnode(fdt, 0, nodename);
> +
> +spapr_populate_cpu_dt(cs, fdt, offset, spapr);
> +g_free(nodename);
> +
> +*fdt_offset = offset;
> +return fdt;
> +}
> +
>  static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
>DeviceState *dev, Error **errp)
>  {
> @@ -2286,6 +2344,12 @@ static void

Re: [Qemu-devel] [PATCH 5/8] virtio-blk: fix "disabled data plane" mode

2016-03-15 Thread tu bo




On 03/15/2016 08:45 PM, Fam Zheng wrote:

On Fri, 03/11 11:28, Paolo Bonzini wrote:



On 10/03/2016 10:40, Christian Borntraeger wrote:

On 03/10/2016 10:03 AM, Christian Borntraeger wrote:

On 03/10/2016 02:51 AM, Fam Zheng wrote:
[...]

The aio_poll() inside "blk_set_aio_context(s->conf->conf.blk, s->ctx)" looks
suspicious:

main thread  iothread

 virtio_blk_handle_output()
  virtio_blk_data_plane_start()
   vblk->dataplane_started = true;
   blk_set_aio_context()
bdrv_set_aio_context()
 bdrv_drain()
  aio_poll()
   
virtio_blk_handle_output()
 /* s->dataplane_started is true */
!!!   ->virtio_blk_handle_request()
  event_notifier_set(ioeventfd)
 aio_poll()
  
virtio_blk_handle_request()

Christian, could you try the followed patch? The aio_poll above is replaced
with a "limited aio_poll" that doesn't disptach ioeventfd.

(Note: perhaps moving "vblk->dataplane_started = true;" after
blk_set_aio_context() also *works around* this.)

---

diff --git a/block.c b/block.c
index ba24b8e..e37e8f7 100644
--- a/block.c
+++ b/block.c
@@ -4093,7 +4093,9 @@ void bdrv_attach_aio_context(BlockDriverState *bs,

  void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
  {
-bdrv_drain(bs); /* ensure there are no in-flight requests */
+/* ensure there are no in-flight requests */
+bdrv_drained_begin(bs);
+bdrv_drained_end(bs);

  bdrv_detach_aio_context(bs);



That seems to do the trick.


Or not. Crashed again :-(


I would put bdrv_drained_end just before aio_context_release.


This won't work. bdrv_drained_end must be called with the same ctx as
bdrv_drained_begin, which is only true before bdrv_detach_aio_context().



But secondarily, I'm thinking of making the logic simpler to understand
in two ways:

1) adding a mutex around virtio_blk_data_plane_start/stop.

2) moving

 event_notifier_set(virtio_queue_get_host_notifier(s->vq));
 virtio_queue_aio_set_host_notifier_handler(s->vq, s->ctx, true, true);

to a bottom half (created with aio_bh_new in s->ctx).  The bottom half
takes the mutex, checks again "if (vblk->dataplane_started)" and if it's
true starts the processing.


Like this? If it captures your idea, could Bo or Christian help test?




With this patch, I still can get qemu crash as before,
(gdb) bt
#0  bdrv_co_do_rw (opaque=0x0) at block/io.c:2172
#1  0x02aa17f5a4a6 in coroutine_trampoline (i0=, 
i1=-1677707808) at util/coroutine-ucontext.c:79

#2  0x03ffac25150a in __makecontext_ret () from /lib64/libc.so.6


Good news is that frequency of qemu crash is much less that before.


---


 From b5b8886693828d498ee184fc7d4e13d8c06cdf39 Mon Sep 17 00:00:00 2001
From: Fam Zheng 
Date: Thu, 10 Mar 2016 10:26:36 +0800
Subject: [PATCH] virtio-blk dataplane start crash fix

Suggested-by: Paolo Bonzini 
Signed-off-by: Fam Zheng 
---
  block.c |  4 +++-
  hw/block/dataplane/virtio-blk.c | 39 ---
  2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/block.c b/block.c
index ba24b8e..e37e8f7 100644
--- a/block.c
+++ b/block.c
@@ -4093,7 +4093,9 @@ void bdrv_attach_aio_context(BlockDriverState *bs,

  void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
  {
-bdrv_drain(bs); /* ensure there are no in-flight requests */
+/* ensure there are no in-flight requests */
+bdrv_drained_begin(bs);
+bdrv_drained_end(bs);

  bdrv_detach_aio_context(bs);

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 36f3d2b..6db5c22 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -49,6 +49,8 @@ struct VirtIOBlockDataPlane {

  /* Operation blocker on BDS */
  Error *blocker;
+
+QemuMutex start_lock;
  };

  /* Raise an interrupt to signal guest, if necessary */
@@ -150,6 +152,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, 
VirtIOBlkConf *conf,
  s = g_new0(VirtIOBlockDataPlane, 1);
  s->vdev = vdev;
  s->conf = conf;
+qemu_mutex_init(>start_lock);

  if (conf->iothread) {
  s->iothread = conf->iothread;
@@ -184,15 +187,38 @@ void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane 
*s)
  g_free(s);
  }

+typedef struct {
+VirtIOBlockDataPlane *s;
+QEMUBH *bh;
+} VirtIOBlockStartData;
+
+static void virtio_blk_data_plane_start_bh_cb(void *opaque)
+{
+VirtIOBlockStartData *data = opaque;
+VirtIOBlockDataPlane *s = data->s;
+
+/* Kick right away to begin processing requests already in vring */
+event_notifier_set(virtio_queue_get_host_notifier(s->vq));
+
+/* Get this

Re: [Qemu-devel] [RFC v3 2/2] spapr: implement query-hotpluggable-cpus QMP command

2016-03-15 Thread Bharata B Rao

On Tue, Mar 15, 2016 at 02:24:08PM +0100, Igor Mammedov wrote:
> it returns a list of present/possible to hotplug CPU
> objects with a list of properties to use with
> device_add.
> 
> in spapr case returned list would looks like:
> -> { "execute": "query-hotpluggable-cpus" }
> <- {"return": [
>  { "props": { "core": 1 }, "type": "spapr-cpu-core",
>"vcpus-count": 2 },
>  { "props": { "core": 0 }, "type": "spapr-cpu-core",
>"vcpus-count": 2,
>"qom-path": "/machine/unattached/device[0]"}
>]}'
> 
> TODO:
>   add 'node' property for core <-> numa node mapping
> 
> Signed-off-by: Igor Mammedov 
> ---
> it's only compile tested
> ---
>  hw/ppc/spapr.c | 32 
>  1 file changed, 32 insertions(+)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index b1e9ba2..e1ce983 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -65,6 +65,7 @@
>  #include "hw/compat.h"
>  #include "qemu-common.h"
>  #include "hw/ppc/spapr_cpu_core.h"
> +#include "qmp-commands.h"
> 
>  #include 
> 
> @@ -2399,6 +2400,37 @@ static unsigned spapr_cpu_index_to_socket_id(unsigned 
> cpu_index)
>  return cpu_index / smp_threads / smp_cores;
>  }
> 
> +HotpluggableCPUList *qmp_query_hotpluggable_cpus(Error **errp)
> +{
> +int i;
> +HotpluggableCPUList *head = NULL;
> +sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> +int spapr_max_cores = max_cpus / smp_threads;
> +
> +for (i = 0; i < spapr_max_cores; i++) {
> +HotpluggableCPUList *list_item = g_new0(typeof(*list_item), 1);
> +HotpluggableCPU *cpu_item = g_new0(typeof(*cpu_item), 1);
> +CpuInstanceProperties *cpu_props = g_new0(typeof(*cpu_props), 1);
> +
> +cpu_item->type = g_strdup(TYPE_SPAPR_CPU_CORE);
> +cpu_item->vcpus_count = smp_threads;

Shouldn't this be fetched from "threads" property of the core device
instead of directly using smp_threads ? But again, what that would mean
for not-yet-plugged in cores and how to get that for them is a question.

Regards,
Bharata.

[Qemu-devel] [PULL 12/16] spapr_pci: Eliminate class callbacks

2016-03-15 Thread David Gibson

The EEH operations in the spapr-vfio-pci-host-bridge no longer rely on the
special groupid field in sPAPRPHBVFIOState.  So we can simplify, removing
the class specific callbacks with direct calls based on a simple
spapr_phb_eeh_enabled() helper.  For now we implement that in terms of
a boolean in the class, but we'll continue to clean that up later.

On its own this is a rather strange way of doing things, but it's a useful
intermediate step to further cleanups.

Signed-off-by: David Gibson 
Reviewed-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_pci.c  | 44 ++--
 hw/ppc/spapr_pci_vfio.c | 18 +++---
 include/hw/pci-host/spapr.h | 37 +
 3 files changed, 62 insertions(+), 37 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 9f40db2..c4c5e7e 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -92,6 +92,13 @@ PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, 
uint64_t buid,
 return pci_find_device(phb->bus, bus_num, devfn);
 }
 
+static bool spapr_phb_eeh_available(sPAPRPHBState *sphb)
+{
+sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
+
+return spc->eeh_available;
+}
+
 static uint32_t rtas_pci_cfgaddr(uint32_t arg)
 {
 /* This handles the encoding of extended config space addresses */
@@ -440,7 +447,6 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
 target_ulong rets)
 {
 sPAPRPHBState *sphb;
-sPAPRPHBClass *spc;
 uint32_t addr, option;
 uint64_t buid;
 int ret;
@@ -458,12 +464,11 @@ static void rtas_ibm_set_eeh_option(PowerPCCPU *cpu,
 goto param_error_exit;
 }
 
-spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-if (!spc->eeh_set_option) {
+if (!spapr_phb_eeh_available(sphb)) {
 goto param_error_exit;
 }
 
-ret = spc->eeh_set_option(sphb, addr, option);
+ret = spapr_phb_vfio_eeh_set_option(sphb, addr, option);
 rtas_st(rets, 0, ret);
 return;
 
@@ -478,7 +483,6 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
target_ulong rets)
 {
 sPAPRPHBState *sphb;
-sPAPRPHBClass *spc;
 PCIDevice *pdev;
 uint32_t addr, option;
 uint64_t buid;
@@ -493,8 +497,7 @@ static void rtas_ibm_get_config_addr_info2(PowerPCCPU *cpu,
 goto param_error_exit;
 }
 
-spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-if (!spc->eeh_set_option) {
+if (!spapr_phb_eeh_available(sphb)) {
 goto param_error_exit;
 }
 
@@ -534,7 +537,6 @@ static void rtas_ibm_read_slot_reset_state2(PowerPCCPU *cpu,
 target_ulong rets)
 {
 sPAPRPHBState *sphb;
-sPAPRPHBClass *spc;
 uint64_t buid;
 int state, ret;
 
@@ -548,12 +550,11 @@ static void rtas_ibm_read_slot_reset_state2(PowerPCCPU 
*cpu,
 goto param_error_exit;
 }
 
-spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-if (!spc->eeh_get_state) {
+if (!spapr_phb_eeh_available(sphb)) {
 goto param_error_exit;
 }
 
-ret = spc->eeh_get_state(sphb, );
+ret = spapr_phb_vfio_eeh_get_state(sphb, );
 rtas_st(rets, 0, ret);
 if (ret != RTAS_OUT_SUCCESS) {
 return;
@@ -578,7 +579,6 @@ static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
 target_ulong rets)
 {
 sPAPRPHBState *sphb;
-sPAPRPHBClass *spc;
 uint32_t option;
 uint64_t buid;
 int ret;
@@ -594,12 +594,11 @@ static void rtas_ibm_set_slot_reset(PowerPCCPU *cpu,
 goto param_error_exit;
 }
 
-spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-if (!spc->eeh_reset) {
+if (!spapr_phb_eeh_available(sphb)) {
 goto param_error_exit;
 }
 
-ret = spc->eeh_reset(sphb, option);
+ret = spapr_phb_vfio_eeh_reset(sphb, option);
 rtas_st(rets, 0, ret);
 return;
 
@@ -614,7 +613,6 @@ static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
   target_ulong rets)
 {
 sPAPRPHBState *sphb;
-sPAPRPHBClass *spc;
 uint64_t buid;
 int ret;
 
@@ -628,12 +626,11 @@ static void rtas_ibm_configure_pe(PowerPCCPU *cpu,
 goto param_error_exit;
 }
 
-spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-if (!spc->eeh_configure) {
+if (!spapr_phb_eeh_available(sphb)) {
 goto param_error_exit;
 }
 
-ret = spc->eeh_configure(sphb);
+ret = spapr_phb_vfio_eeh_configure(sphb);
 rtas_st(rets, 0, ret);
 return;
 
@@ -649,7 +646,6 @@ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
target_ulong rets)
 {
 sPAPRPHBState *sphb;
-sPAPRPHBClass *spc;
 int option;
 uint64_t buid;
 
@@ -663,8 +659,7 @@ static void rtas_ibm_slot_error_detail(PowerPCCPU *cpu,
 goto param_error_exit;
 }
 
-spc =

[Qemu-devel] [PULL 06/16] target-ppc: Split out SREGS get/put functions

2016-03-15 Thread David Gibson

Currently the getting and setting of Power MMU registers (sregs) take up
large inline chunks of the kvm_arch_get_registers() and
kvm_arch_put_registers() functions.  Especially since there are two
variants (for Book-E and Book-S CPUs), only one of which will be used in
practice, this is pretty hard to read.

This patch splits these out into helper functions for clarity.  No
functional change is expected.

Signed-off-by: David Gibson 
Reviewed-by: Thomas Huth 
Reviewed-by: Alexey Kardashevskiy 
Reviewed-by: Greg Kurz 
---
 target-ppc/kvm.c | 421 ++-
 1 file changed, 228 insertions(+), 193 deletions(-)

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index d67c169..4161f64 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -867,6 +867,44 @@ static int kvm_put_vpa(CPUState *cs)
 }
 #endif /* TARGET_PPC64 */
 
+static int kvmppc_put_books_sregs(PowerPCCPU *cpu)
+{
+CPUPPCState *env = >env;
+struct kvm_sregs sregs;
+int i;
+
+sregs.pvr = env->spr[SPR_PVR];
+
+sregs.u.s.sdr1 = env->spr[SPR_SDR1];
+
+/* Sync SLB */
+#ifdef TARGET_PPC64
+for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
+sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
+if (env->slb[i].esid & SLB_ESID_V) {
+sregs.u.s.ppc64.slb[i].slbe |= i;
+}
+sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
+}
+#endif
+
+/* Sync SRs */
+for (i = 0; i < 16; i++) {
+sregs.u.s.ppc32.sr[i] = env->sr[i];
+}
+
+/* Sync BATs */
+for (i = 0; i < 8; i++) {
+/* Beware. We have to swap upper and lower bits here */
+sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
+| env->DBAT[1][i];
+sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
+| env->IBAT[1][i];
+}
+
+return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, );
+}
+
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
 PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -920,39 +958,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 }
 
 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
-struct kvm_sregs sregs;
-
-sregs.pvr = env->spr[SPR_PVR];
-
-sregs.u.s.sdr1 = env->spr[SPR_SDR1];
-
-/* Sync SLB */
-#ifdef TARGET_PPC64
-for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
-sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
-if (env->slb[i].esid & SLB_ESID_V) {
-sregs.u.s.ppc64.slb[i].slbe |= i;
-}
-sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
-}
-#endif
-
-/* Sync SRs */
-for (i = 0; i < 16; i++) {
-sregs.u.s.ppc32.sr[i] = env->sr[i];
-}
-
-/* Sync BATs */
-for (i = 0; i < 8; i++) {
-/* Beware. We have to swap upper and lower bits here */
-sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
-| env->DBAT[1][i];
-sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
-| env->IBAT[1][i];
-}
-
-ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, );
-if (ret) {
+ret = kvmppc_put_books_sregs(cpu);
+if (ret < 0) {
 return ret;
 }
 }
@@ -1014,12 +1021,197 @@ static void kvm_sync_excp(CPUPPCState *env, int 
vector, int ivor)
  env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
 }
 
+static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
+{
+CPUPPCState *env = >env;
+struct kvm_sregs sregs;
+int ret;
+
+ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, );
+if (ret < 0) {
+return ret;
+}
+
+if (sregs.u.e.features & KVM_SREGS_E_BASE) {
+env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
+env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
+env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
+env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
+env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
+env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
+env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
+env->spr[SPR_DECR] = sregs.u.e.dec;
+env->spr[SPR_TBL] = sregs.u.e.tb & 0x;
+env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
+env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
+}
+
+if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
+env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
+env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
+env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
+env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
+env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
+}
+
+if (sregs.u.e.features & KVM_SREGS_E_64) {
+env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
+}
+
+if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
+env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
+}
+
+if (sregs.u.e.features &

[Qemu-devel] [PULL 13/16] spapr_pci: Allow EEH on spapr-pci-host-bridge

2016-03-15 Thread David Gibson

Now that the EEH code is independent of the special
spapr-vfio-pci-host-bridge device, we can allow it on all spapr PCI
host bridges instead.  We do this by changing spapr_phb_eeh_available()
to be based on the vfio_eeh_as_ok() call instead of the host bridge class.

Because the value of vfio_eeh_as_ok() can change with devices being
hotplugged or unplugged, this can potentially lead to some strange edge
cases where the guest starts using EEH, then it starts failing because
of a change in status.

However, it's not really any worse than the current situation.  Cases that
would have worked previously will still work (i.e. VFIO devices from at
most one VFIO IOMMU group per vPHB), it's just that it's no longer
necessary to use spapr-vfio-pci-host-bridge with the groupid pre-specified.

Signed-off-by: David Gibson 
Reviewed-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_pci.c  | 10 ++
 hw/ppc/spapr_pci_vfio.c |  6 +-
 include/hw/pci-host/spapr.h |  6 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index c4c5e7e..3ec1823 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -42,6 +42,8 @@
 #include "hw/ppc/spapr_drc.h"
 #include "sysemu/device_tree.h"
 
+#include "hw/vfio/vfio.h"
+
 /* Copied from the kernel arch/powerpc/platforms/pseries/msi.c */
 #define RTAS_QUERY_FN   0
 #define RTAS_CHANGE_FN  1
@@ -92,13 +94,6 @@ PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, 
uint64_t buid,
 return pci_find_device(phb->bus, bus_num, devfn);
 }
 
-static bool spapr_phb_eeh_available(sPAPRPHBState *sphb)
-{
-sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(sphb);
-
-return spc->eeh_available;
-}
-
 static uint32_t rtas_pci_cfgaddr(uint32_t arg)
 {
 /* This handles the encoding of extended config space addresses */
@@ -1642,7 +1637,6 @@ static void spapr_phb_class_init(ObjectClass *klass, void 
*data)
 set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 dc->cannot_instantiate_with_device_add_yet = false;
 spc->finish_realize = spapr_phb_finish_realize;
-spc->eeh_available = false;
 hp->plug = spapr_phb_hot_plug_child;
 hp->unplug = spapr_phb_hot_unplug_child;
 }
diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index 10fa88a..16a4a8f 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -73,6 +73,11 @@ static void spapr_phb_vfio_finish_realize(sPAPRPHBState 
*sphb, Error **errp)
 spapr_tce_get_iommu(tcet));
 }
 
+bool spapr_phb_eeh_available(sPAPRPHBState *sphb)
+{
+return vfio_eeh_as_ok(>iommu_as);
+}
+
 static void spapr_phb_vfio_eeh_reenable(sPAPRPHBState *sphb)
 {
 vfio_eeh_as_op(>iommu_as, VFIO_EEH_PE_ENABLE);
@@ -240,7 +245,6 @@ static void spapr_phb_vfio_class_init(ObjectClass *klass, 
void *data)
 
 dc->props = spapr_phb_vfio_properties;
 spc->finish_realize = spapr_phb_vfio_finish_realize;
-spc->eeh_available = true;
 }
 
 static const TypeInfo spapr_phb_vfio_info = {
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 0b936c6..19a95e0 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -49,7 +49,6 @@ struct sPAPRPHBClass {
 PCIHostBridgeClass parent_class;
 
 void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
-bool eeh_available;
 };
 
 typedef struct spapr_pci_msi {
@@ -136,6 +135,7 @@ PCIDevice *spapr_pci_find_dev(sPAPRMachineState *spapr, 
uint64_t buid,
 
 /* VFIO EEH hooks */
 #ifdef CONFIG_LINUX
+bool spapr_phb_eeh_available(sPAPRPHBState *sphb);
 int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
   unsigned int addr, int option);
 int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state);
@@ -143,6 +143,10 @@ int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int 
option);
 int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb);
 void spapr_phb_vfio_reset(DeviceState *qdev);
 #else
+static inline bool spapr_phb_eeh_available(sPAPRPHBState *sphb)
+{
+return false;
+}
 static inline int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
 unsigned int addr, int option)
 {
-- 
2.5.0

[Qemu-devel] [PULL 01/16] ppc: Define the PSPB register on POWER8

2016-03-15 Thread David Gibson

From: Thomas Huth 

POWER8 / PowerISA 2.07 has a new special purpose register called PSPB
("Problem State Priority Boost Register"). The contents of this register
are currently lost during migration. To be able to migrate this register,
too, we've got to define this SPR along with the other SPRs of POWER8.

Signed-off-by: Thomas Huth 
Signed-off-by: David Gibson 
---
 target-ppc/cpu.h| 1 +
 target-ppc/translate_init.c | 9 +
 2 files changed, 10 insertions(+)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 2b10597..8fc0fb4 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1380,6 +1380,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool 
ifetch)
 #define SPR_UAMOR (0x09D)
 #define SPR_MPC_ICTRL (0x09E)
 #define SPR_MPC_BAR   (0x09F)
+#define SPR_PSPB  (0x09F)
 #define SPR_VRSAVE(0x100)
 #define SPR_USPRG0(0x100)
 #define SPR_USPRG1(0x101)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index bd0cffc..f72148c 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7842,6 +7842,14 @@ static void gen_spr_power8_fscr(CPUPPCState *env)
  KVM_REG_PPC_FSCR, initval);
 }
 
+static void gen_spr_power8_pspb(CPUPPCState *env)
+{
+spr_register_kvm(env, SPR_PSPB, "PSPB",
+ SPR_NOACCESS, SPR_NOACCESS,
+ _read_generic, _write_generic32,
+ KVM_REG_PPC_PSPB, 0);
+}
+
 static void init_proc_book3s_64(CPUPPCState *env, int version)
 {
 gen_spr_ne_601(env);
@@ -7892,6 +7900,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int 
version)
 gen_spr_power8_pmu_sup(env);
 gen_spr_power8_pmu_user(env);
 gen_spr_power8_tm(env);
+gen_spr_power8_pspb(env);
 gen_spr_vtb(env);
 }
 if (version < BOOK3S_CPU_POWER8) {
-- 
2.5.0

[Qemu-devel] [PULL 05/16] spapr_pci: fix multifunction hotplug

2016-03-15 Thread David Gibson

From: Michael Roth 

Since 3f1e147, QEMU has adopted a convention of supporting function
hotplug by deferring hotplug events until func 0 is hotplugged.
This is likely how management tools like libvirt would expose
such support going forward.

Since sPAPR guests rely on per-func events rather than
slot-based, our protocol has been to hotplug func 0 *first* to
avoid cases where devices appear within guests without func 0
present to avoid undefined behavior.

To remain compatible with new convention, defer hotplug in a
similar manner, but then generate events in 0-first order as we
did in the past. Once func 0 present, fail any attempts to plug
additional functions (as we do with PCIe).

For unplug, defer unplug operations in a similar manner, but
generate unplug events such that function 0 is removed last in guest.

Signed-off-by: Michael Roth 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_pci.c | 93 ++
 1 file changed, 86 insertions(+), 7 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 3fc7895..9f40db2 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1142,14 +1142,21 @@ static void 
spapr_phb_remove_pci_device(sPAPRDRConnector *drc,
 drck->detach(drc, DEVICE(pdev), spapr_phb_remove_pci_device_cb, phb, errp);
 }
 
-static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
-   PCIDevice *pdev)
+static sPAPRDRConnector *spapr_phb_get_pci_func_drc(sPAPRPHBState *phb,
+uint32_t busnr,
+int32_t devfn)
 {
-uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev;
 return spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_PCI,
 (phb->index << 16) |
 (busnr << 8) |
-pdev->devfn);
+devfn);
+}
+
+static sPAPRDRConnector *spapr_phb_get_pci_drc(sPAPRPHBState *phb,
+   PCIDevice *pdev)
+{
+uint32_t busnr = pci_bus_num(PCI_BUS(qdev_get_parent_bus(DEVICE(pdev;
+return spapr_phb_get_pci_func_drc(phb, busnr, pdev->devfn);
 }
 
 static uint32_t spapr_phb_get_pci_drc_index(sPAPRPHBState *phb,
@@ -1173,6 +1180,8 @@ static void spapr_phb_hot_plug_child(HotplugHandler 
*plug_handler,
 PCIDevice *pdev = PCI_DEVICE(plugged_dev);
 sPAPRDRConnector *drc = spapr_phb_get_pci_drc(phb, pdev);
 Error *local_err = NULL;
+PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
+uint32_t slotnr = PCI_SLOT(pdev->devfn);
 
 /* if DR is disabled we don't need to do anything in the case of
  * hotplug or coldplug callbacks
@@ -1190,13 +1199,44 @@ static void spapr_phb_hot_plug_child(HotplugHandler 
*plug_handler,
 
 g_assert(drc);
 
+/* Following the QEMU convention used for PCIe multifunction
+ * hotplug, we do not allow functions to be hotplugged to a
+ * slot that already has function 0 present
+ */
+if (plugged_dev->hotplugged && bus->devices[PCI_DEVFN(slotnr, 0)] &&
+PCI_FUNC(pdev->devfn) != 0) {
+error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
+   " additional functions can no longer be exposed to guest.",
+   slotnr, bus->devices[PCI_DEVFN(slotnr, 0)]->name);
+return;
+}
+
 spapr_phb_add_pci_device(drc, phb, pdev, _err);
 if (local_err) {
 error_propagate(errp, local_err);
 return;
 }
-if (plugged_dev->hotplugged) {
-spapr_hotplug_req_add_by_index(drc);
+
+/* If this is function 0, signal hotplug for all the device functions.
+ * Otherwise defer sending the hotplug event.
+ */
+if (plugged_dev->hotplugged && PCI_FUNC(pdev->devfn) == 0) {
+int i;
+
+for (i = 0; i < 8; i++) {
+sPAPRDRConnector *func_drc;
+sPAPRDRConnectorClass *func_drck;
+sPAPRDREntitySense state;
+
+func_drc = spapr_phb_get_pci_func_drc(phb, pci_bus_num(bus),
+  PCI_DEVFN(slotnr, i));
+func_drck = SPAPR_DR_CONNECTOR_GET_CLASS(func_drc);
+func_drck->entity_sense(func_drc, );
+
+if (state == SPAPR_DR_ENTITY_SENSE_PRESENT) {
+spapr_hotplug_req_add_by_index(func_drc);
+}
+}
 }
 }
 
@@ -1219,12 +1259,51 @@ static void spapr_phb_hot_unplug_child(HotplugHandler 
*plug_handler,
 
 drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
 if (!drck->release_pending(drc)) {
+PCIBus *bus = PCI_BUS(qdev_get_parent_bus(DEVICE(pdev)));
+uint32_t slotnr = PCI_SLOT(pdev->devfn);
+sPAPRDRConnector *func_drc;
+sPAPRDRConnectorClass *func_drck;
+

[Qemu-devel] [PULL 14/16] spapr_pci: (Mostly) remove spapr-pci-vfio-host-bridge

2016-03-15 Thread David Gibson

Now that the regular spapr-pci-host-bridge can handle EEH, there are only
two things that spapr-pci-vfio-host-bridge does differently:
1. automatically sizes its DMA window to match the host IOMMU
2. checks if the attached VFIO container is backed by the
   VFIO_SPAPR_TCE_IOMMU type on the host

(1) is not particularly useful, since the default window used by the
regular host bridge will work with the host IOMMU configuration on all
current systems anyway.

Plus, automatically changing guest visible configuration (such as the DMA
window) based on host settings is generally a bad idea.  It's not
definitively broken, since spapr-pci-vfio-host-bridge is only supposed to
support VFIO devices which can't be migrated anyway, but still.

(2) is not really useful, because if a guest tries to configure EEH on a
different host IOMMU, the first call will fail and that will be that.

It's possible there are scripts or tools out there which expect
spapr-pci-vfio-host-bridge, so we don't remove it entirely.  This patch
reduces it to just a stub for backwards compatibility.

Signed-off-by: David Gibson 
Reviewed-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_pci_vfio.c | 61 +
 include/hw/pci-host/spapr.h | 11 
 2 files changed, 17 insertions(+), 55 deletions(-)

diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index 16a4a8f..9e15924 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -23,54 +23,29 @@
 #include "hw/pci/msix.h"
 #include "linux/vfio.h"
 #include "hw/vfio/vfio.h"
+#include "qemu/error-report.h"
 
-static Property spapr_phb_vfio_properties[] = {
-DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
-DEFINE_PROP_END_OF_LIST(),
-};
+#define TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE "spapr-pci-vfio-host-bridge"
 
-static void spapr_phb_vfio_finish_realize(sPAPRPHBState *sphb, Error **errp)
-{
-sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
-struct vfio_iommu_spapr_tce_info info = { .argsz = sizeof(info) };
-int ret;
-sPAPRTCETable *tcet;
-uint32_t liobn = svphb->phb.dma_liobn;
+#define SPAPR_PCI_VFIO_HOST_BRIDGE(obj) \
+OBJECT_CHECK(sPAPRPHBVFIOState, (obj), TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE)
 
-if (svphb->iommugroupid == -1) {
-error_setg(errp, "Wrong IOMMU group ID %d", svphb->iommugroupid);
-return;
-}
+typedef struct sPAPRPHBVFIOState sPAPRPHBVFIOState;
 
-ret = vfio_container_ioctl(>phb.iommu_as, svphb->iommugroupid,
-   VFIO_CHECK_EXTENSION,
-   (void *) VFIO_SPAPR_TCE_IOMMU);
-if (ret != 1) {
-error_setg_errno(errp, -ret,
- "spapr-vfio: SPAPR extension is not supported");
-return;
-}
+struct sPAPRPHBVFIOState {
+sPAPRPHBState phb;
 
-ret = vfio_container_ioctl(>phb.iommu_as, svphb->iommugroupid,
-   VFIO_IOMMU_SPAPR_TCE_GET_INFO, );
-if (ret) {
-error_setg_errno(errp, -ret,
- "spapr-vfio: get info from container failed");
-return;
-}
+int32_t iommugroupid;
+};
 
-tcet = spapr_tce_new_table(DEVICE(sphb), liobn, info.dma32_window_start,
-   SPAPR_TCE_PAGE_SHIFT,
-   info.dma32_window_size >> SPAPR_TCE_PAGE_SHIFT,
-   true);
-if (!tcet) {
-error_setg(errp, "spapr-vfio: failed to create VFIO TCE table");
-return;
-}
+static Property spapr_phb_vfio_properties[] = {
+DEFINE_PROP_INT32("iommu", sPAPRPHBVFIOState, iommugroupid, -1),
+DEFINE_PROP_END_OF_LIST(),
+};
 
-/* Register default 32bit DMA window */
-memory_region_add_subregion(>iommu_root, tcet->bus_offset,
-spapr_tce_get_iommu(tcet));
+static void spapr_phb_vfio_instance_init(Object *obj)
+{
+error_report("spapr-pci-vfio-host-bridge is deprecated");
 }
 
 bool spapr_phb_eeh_available(sPAPRPHBState *sphb)
@@ -241,18 +216,16 @@ int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb)
 static void spapr_phb_vfio_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
-sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
 
 dc->props = spapr_phb_vfio_properties;
-spc->finish_realize = spapr_phb_vfio_finish_realize;
 }
 
 static const TypeInfo spapr_phb_vfio_info = {
 .name  = TYPE_SPAPR_PCI_VFIO_HOST_BRIDGE,
 .parent= TYPE_SPAPR_PCI_HOST_BRIDGE,
 .instance_size = sizeof(sPAPRPHBVFIOState),
+.instance_init = spapr_phb_vfio_instance_init,
 .class_init= spapr_phb_vfio_class_init,
-.class_size= sizeof(sPAPRPHBClass),
 };
 
 static void spapr_pci_vfio_register_types(void)
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index 19a95e0..a08235e 100644
--- a/include/hw/pci-host/spapr.h

Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support

2016-03-15 Thread David Gibson

On Wed, Mar 16, 2016 at 10:11:54AM +0530, Bharata B Rao wrote:
> On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:
> > On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:
> > > Add support to hot remove pc-dimm memory devices.
> > > 
> > > Signed-off-by: Bharata B Rao 
> > 
> > Reviewed-by: David Gibson 
> > 
> > Looks correct, but again, needs to wait on the PAPR change.
> > 
> > Have you thought any further on the idea of sending an index message,
> > then a count message as an interim approach to fixing this without
> > requiring a PAPR change?
> 
> Removal by index and removal by count are valid messages by themselves
> and drmgr would go ahead and start the removal in reponse to those
> calls. IIUC, you are suggesting that lets remove one LMB by index in
> response to 1st message and remove (count -1) LMBs from where the last
> removal was done in the previous message.

That's right.

> Since the same code base of powerpc-utils works on PowerVM too, I am not
> sure if such an approach would impact PowerVM in any undesirable manner.
> May be Nathan can clarify ?

Ah..  My first guess would be that it's ok; since IIUC PowerVM doesn't
care where the LMBs are removed from, removing them starting from the
last place we removed something should be as good as anywhere.

But it's possible there's some issue I haven't considered.

> I see that this can be done, but the changes in drmgr code specially the
> code related to LMB list handling/removal can be non-trivial. So not sure
> if the temporary approach is all that worth here and hence I feel it is better
> to wait and do it the count-indexed way.

Ok.  It seems like it ought to be fairly straightforward, but I don't
know the drmgr code, so..

It would certainly be useful if Nathan could chime in on this.

> While we are here, I would also like to get some opinion on the real
> need for memory unplug. Is there anything that memory unplug gives us
> which memory ballooning (shrinking mem via ballooning) can't give ?

That's.. a good question.  I guess it means avoiding another interface
and a pseudo-device at least.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

[Qemu-devel] [PULL 08/16] target-ppc: Eliminate kvmppc_kern_htab global

2016-03-15 Thread David Gibson

fa48b43 "target-ppc: Remove hack for ppc_hash64_load_hpte*() with HV KVM"
purports to remove a hack in the handling of hash page tables (HPTs)
managed by KVM instead of qemu.  However, it actually went in the wrong
direction.

That patch requires anything looking for an external HPT (that is one not
managed by the guest itself) to check both env->external_htab (for a qemu
managed HPT) and kvmppc_kern_htab (for a KVM managed HPT).  That's a
problem because kvmppc_kern_htab is local to mmu-hash64.c, but some places
which need to check for an external HPT are outside that, such as
kvm_arch_get_registers().  The latter was subtly broken by the earlier
patch such that gdbstub can no longer access memory.

Basically a KVM managed HPT is much more like a qemu managed HPT than it is
like a guest managed HPT, so the original "hack" was actually on the right
track.

This partially reverts fa48b43, so we again mark a KVM managed external HPT
by putting a special but non-NULL value in env->external_htab.  It then
goes further, using that marker to eliminate the kvmppc_kern_htab global
entirely.  The ppc_hash64_set_external_hpt() helper function is extended
to set that marker if passed a NULL value (if you're setting an external
HPT, but don't have an actual HPT to set, the assumption is that it must
be a KVM managed HPT).

This also has some flow-on changes to the HPT access helpers, required by
the above changes.

Reported-by: Greg Kurz 
Signed-off-by: David Gibson 
Reviewed-by: Thomas Huth 
Reviewed-by: Greg Kurz 
Tested-by: Greg Kurz 
---
 hw/ppc/spapr.c  |  3 +--
 hw/ppc/spapr_hcall.c| 10 +-
 target-ppc/mmu-hash64.c | 40 ++--
 target-ppc/mmu-hash64.h |  9 +++--
 4 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index eb66a87..79a70a9 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1091,7 +1091,7 @@ static void spapr_reallocate_hpt(sPAPRMachineState 
*spapr, int shift,
 }
 
 spapr->htab_shift = shift;
-kvmppc_kern_htab = true;
+spapr->htab = NULL;
 } else {
 /* kernel-side HPT not needed, allocate in userspace instead */
 size_t size = 1ULL << shift;
@@ -1106,7 +1106,6 @@ static void spapr_reallocate_hpt(sPAPRMachineState 
*spapr, int shift,
 
 memset(spapr->htab, 0, size);
 spapr->htab_shift = shift;
-kvmppc_kern_htab = false;
 
 for (i = 0; i < size / HASH_PTE_SIZE_64; i++) {
 DIRTY_HPTE(HPTE(spapr->htab, i));
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 1733482..b2b1b93 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -122,17 +122,17 @@ static target_ulong h_enter(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 break;
 }
 }
-ppc_hash64_stop_access(token);
+ppc_hash64_stop_access(cpu, token);
 if (index == 8) {
 return H_PTEG_FULL;
 }
 } else {
 token = ppc_hash64_start_access(cpu, pte_index);
 if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) {
-ppc_hash64_stop_access(token);
+ppc_hash64_stop_access(cpu, token);
 return H_PTEG_FULL;
 }
-ppc_hash64_stop_access(token);
+ppc_hash64_stop_access(cpu, token);
 }
 
 ppc_hash64_store_hpte(cpu, pte_index + index,
@@ -165,7 +165,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, 
target_ulong ptex,
 token = ppc_hash64_start_access(cpu, ptex);
 v = ppc_hash64_load_hpte0(cpu, token, 0);
 r = ppc_hash64_load_hpte1(cpu, token, 0);
-ppc_hash64_stop_access(token);
+ppc_hash64_stop_access(cpu, token);
 
 if ((v & HPTE64_V_VALID) == 0 ||
 ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
@@ -288,7 +288,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 token = ppc_hash64_start_access(cpu, pte_index);
 v = ppc_hash64_load_hpte0(cpu, token, 0);
 r = ppc_hash64_load_hpte1(cpu, token, 0);
-ppc_hash64_stop_access(token);
+ppc_hash64_stop_access(cpu, token);
 
 if ((v & HPTE64_V_VALID) == 0 ||
 ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 7b5200b..d175fda 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -36,10 +36,11 @@
 #endif
 
 /*
- * Used to indicate whether we have allocated htab in the
- * host kernel
+ * Used to indicate that a CPU has its hash page table (HPT) managed
+ * within the host kernel
  */
-bool kvmppc_kern_htab;
+#define MMU_HASH64_KVM_MANAGED_HPT  ((void *)-1)
+
 /*
  * SLB handling
  */
@@ -283,7 +284,11 @@ void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void 
*hpt, int shift,
 
 cpu_synchronize_state(CPU(cpu));
 
-env->external_htab =

[Qemu-devel] [PULL 04/16] target-ppc: Add PVR for POWER8NVL processor

2016-03-15 Thread David Gibson

From: Alexey Kardashevskiy 

This adds a new POWER8+NVLink CPU PVR which core is identical to POWER8
but has a different PVR. The only available machine now has PVR
pvr 004c 0100 so this defines "POWER8NVL" alias as v1.0.

The corresponding kernel commit is
https://github.com/torvalds/linux/commit/ddee09c099c3
"powerpc: Add PVR for POWER8NVL processor"

Signed-off-by: Alexey Kardashevskiy 
Signed-off-by: David Gibson 
---
 target-ppc/cpu-models.c | 3 +++
 target-ppc/cpu-models.h | 2 ++
 target-ppc/translate_init.c | 3 +++
 3 files changed, 8 insertions(+)

diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
index ed005d7..5209e63 100644
--- a/target-ppc/cpu-models.c
+++ b/target-ppc/cpu-models.c
@@ -1143,6 +1143,8 @@
 "POWER8E v2.1")
 POWERPC_DEF("POWER8_v2.0",   CPU_POWERPC_POWER8_v20, POWER8,
 "POWER8 v2.0")
+POWERPC_DEF("POWER8NVL_v1.0",CPU_POWERPC_POWER8NVL_v10,  POWER8,
+"POWER8NVL v1.0")
 POWERPC_DEF("970_v2.2",  CPU_POWERPC_970_v22,970,
 "PowerPC 970 v2.2")
 POWERPC_DEF("970fx_v1.0",CPU_POWERPC_970FX_v10,  970,
@@ -1392,6 +1394,7 @@ PowerPCCPUAlias ppc_cpu_aliases[] = {
 { "POWER7+", "POWER7+_v2.1" },
 { "POWER8E", "POWER8E_v2.1" },
 { "POWER8", "POWER8_v2.0" },
+{ "POWER8NVL", "POWER8NVL_v1.0" },
 { "970", "970_v2.2" },
 { "970fx", "970fx_v3.1" },
 { "970mp", "970mp_v1.1" },
diff --git a/target-ppc/cpu-models.h b/target-ppc/cpu-models.h
index 2992427..f21a44c 100644
--- a/target-ppc/cpu-models.h
+++ b/target-ppc/cpu-models.h
@@ -560,6 +560,8 @@ enum {
 CPU_POWERPC_POWER8E_v21= 0x004B0201,
 CPU_POWERPC_POWER8_BASE= 0x004D,
 CPU_POWERPC_POWER8_v20 = 0x004D0200,
+CPU_POWERPC_POWER8NVL_BASE = 0x004C,
+CPU_POWERPC_POWER8NVL_v10  = 0x004C0100,
 CPU_POWERPC_970_v22= 0x00390202,
 CPU_POWERPC_970FX_v10  = 0x00391100,
 CPU_POWERPC_970FX_v20  = 0x003C0200,
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 06b008de..fb206af 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8256,6 +8256,9 @@ static void init_proc_POWER8(CPUPPCState *env)
 
 static bool ppc_pvr_match_power8(PowerPCCPUClass *pcc, uint32_t pvr)
 {
+if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER8NVL_BASE) {
+return true;
+}
 if ((pvr & CPU_POWERPC_POWER_SERVER_MASK) == CPU_POWERPC_POWER8E_BASE) {
 return true;
 }
-- 
2.5.0

[Qemu-devel] [PULL 00/16] ppc-for-2.6 queue 20160316

2016-03-15 Thread David Gibson

The following changes since commit a6cdb77f816961f929d7934643febd2852230135:

  Merge remote-tracking branch 'remotes/thibault/tags/samuel-thibault' into 
staging (2016-03-15 17:09:52 +)

are available in the git repository at:

  git://github.com/dgibson/qemu.git tags/ppc-for-2.6-20160316

for you to fetch changes up to 3356128cd13d7ec7689b7cddd3efbfbc5339a262:

  vfio: Eliminate vfio_container_ioctl() (2016-03-16 09:55:11 +1100)


ppc patch queue for 2016-03-16

Accumulated patches for target-ppc, pseries machine type and related
devices.  As we are now in soft freeze, these are mostly fixes.
   * Fix KVM migration for several SPRs that qemu didn't handle
   * Clean up handling of SDR1, which allows a fix to the gdbstub
   * Fix a race in spapr_rng
   * Fix a bug with multifunction hotplug

The exception is the 7 patches to allow EEH on spapr-pci-host-bridge
devices (rather than the special and poorly designed
spapr-vfio-pci-host-bridge device).  I believe these are low risk of
breaking non-EEH cases, and EEH cases were little used in practice
previously (since libvirt did not support the special device amongst
other things).  It did have a draft posted before the soft freeze,
removes a very ugly VFIO interface, and removes device we'd like to
deprecate sooner rather than later.  So, I'm hoping we can squeeze
these in during the soft freeze.

This includes two patches to the VFIO code, which Alex Williamson has
indicated he's ok with coming through my tree.


Alexey Kardashevskiy (1):
  target-ppc: Add PVR for POWER8NVL processor

Benjamin Herrenschmidt (1):
  ppc: Add a few more P8 PMU SPRs

David Gibson (10):
  target-ppc: Split out SREGS get/put functions
  target-ppc: Add helpers for updating a CPU's SDR1 and external HPT
  target-ppc: Eliminate kvmppc_kern_htab global
  vfio: Start improving VFIO/EEH interface
  spapr_pci: Switch to vfio_eeh_as_op() interface
  spapr_pci: Eliminate class callbacks
  spapr_pci: Allow EEH on spapr-pci-host-bridge
  spapr_pci: (Mostly) remove spapr-pci-vfio-host-bridge
  spapr_pci: Remove finish_realize hook
  vfio: Eliminate vfio_container_ioctl()

Greg Kurz (1):
  spapr_rng: fix race with main loop

Michael Roth (1):
  spapr_pci: fix multifunction hotplug

Thomas Huth (2):
  ppc: Define the PSPB register on POWER8
  ppc: Fix migration of the TAR SPR

 hw/ppc/spapr.c  |  16 +-
 hw/ppc/spapr_hcall.c|  10 +-
 hw/ppc/spapr_pci.c  | 156 ++--
 hw/ppc/spapr_pci_vfio.c | 131 +-
 hw/ppc/spapr_rng.c  |   4 +-
 hw/vfio/common.c| 112 
 include/hw/pci-host/spapr.h |  64 ---
 include/hw/vfio/vfio.h  |   4 +-
 target-ppc/cpu-models.c |   3 +
 target-ppc/cpu-models.h |   2 +
 target-ppc/cpu.h|   8 +
 target-ppc/kvm.c| 421 
 target-ppc/kvm_ppc.h|   6 +
 target-ppc/mmu-hash64.c |  81 ++---
 target-ppc/mmu-hash64.h |  11 +-
 target-ppc/mmu_helper.c |  13 +-
 target-ppc/translate_init.c |  48 -
 17 files changed, 644 insertions(+), 446 deletions(-)

[Qemu-devel] [PULL 03/16] ppc: Add a few more P8 PMU SPRs

2016-03-15 Thread David Gibson

From: Benjamin Herrenschmidt 

Signed-off-by: Benjamin Herrenschmidt 
Signed-off-by: David Gibson 
---
 target-ppc/cpu.h|  7 +++
 target-ppc/translate_init.c | 28 
 2 files changed, 35 insertions(+)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 8fc0fb4..8d90d86 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1564,6 +1564,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool 
ifetch)
 #define SPR_PERF0 (0x300)
 #define SPR_RCPU_MI_RBA0  (0x300)
 #define SPR_MPC_MI_CTR(0x300)
+#define SPR_POWER_USIER   (0x300)
 #define SPR_PERF1 (0x301)
 #define SPR_RCPU_MI_RBA1  (0x301)
 #define SPR_POWER_UMMCR2  (0x301)
@@ -1613,6 +1614,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool 
ifetch)
 #define SPR_PERFF (0x30F)
 #define SPR_MPC_MD_TW (0x30F)
 #define SPR_UPERF0(0x310)
+#define SPR_POWER_SIER(0x310)
 #define SPR_UPERF1(0x311)
 #define SPR_POWER_MMCR2   (0x311)
 #define SPR_UPERF2(0x312)
@@ -1674,7 +1676,12 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool 
ifetch)
 #define SPR_440_ITV2  (0x376)
 #define SPR_440_ITV3  (0x377)
 #define SPR_440_CCR1  (0x378)
+#define SPR_TACR  (0x378)
+#define SPR_TCSCR (0x379)
+#define SPR_CSIGR (0x37a)
 #define SPR_DCRIPR(0x37B)
+#define SPR_POWER_SPMC1   (0x37C)
+#define SPR_POWER_SPMC2   (0x37D)
 #define SPR_POWER_MMCRS   (0x37E)
 #define SPR_PPR   (0x380)
 #define SPR_750_GQR0  (0x390)
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 48a1635..06b008de 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7603,6 +7603,30 @@ static void gen_spr_power8_pmu_sup(CPUPPCState *env)
  SPR_NOACCESS, SPR_NOACCESS,
  _read_generic, _write_generic,
  KVM_REG_PPC_MMCRS, 0x);
+spr_register_kvm(env, SPR_POWER_SIER, "SIER",
+ SPR_NOACCESS, SPR_NOACCESS,
+ _read_generic, _write_generic,
+ KVM_REG_PPC_SIER, 0x);
+spr_register_kvm(env, SPR_POWER_SPMC1, "SPMC1",
+ SPR_NOACCESS, SPR_NOACCESS,
+ _read_generic, _write_generic,
+ KVM_REG_PPC_SPMC1, 0x);
+spr_register_kvm(env, SPR_POWER_SPMC2, "SPMC2",
+ SPR_NOACCESS, SPR_NOACCESS,
+ _read_generic, _write_generic,
+ KVM_REG_PPC_SPMC2, 0x);
+spr_register_kvm(env, SPR_TACR, "TACR",
+ SPR_NOACCESS, SPR_NOACCESS,
+ _read_generic, _write_generic,
+ KVM_REG_PPC_TACR, 0x);
+spr_register_kvm(env, SPR_TCSCR, "TCSCR",
+ SPR_NOACCESS, SPR_NOACCESS,
+ _read_generic, _write_generic,
+ KVM_REG_PPC_TCSCR, 0x);
+spr_register_kvm(env, SPR_CSIGR, "CSIGR",
+ SPR_NOACCESS, SPR_NOACCESS,
+ _read_generic, _write_generic,
+ KVM_REG_PPC_CSIGR, 0x);
 }
 
 static void gen_spr_power8_pmu_user(CPUPPCState *env)
@@ -7611,6 +7635,10 @@ static void gen_spr_power8_pmu_user(CPUPPCState *env)
  _read_ureg, SPR_NOACCESS,
  _read_ureg, _write_ureg,
  0x);
+spr_register(env, SPR_POWER_USIER, "USIER",
+ _read_generic, SPR_NOACCESS,
+ _read_generic, _write_generic,
+ 0x);
 }
 
 static void gen_spr_power5p_ear(CPUPPCState *env)
-- 
2.5.0

[Qemu-devel] [PULL 07/16] target-ppc: Add helpers for updating a CPU's SDR1 and external HPT

2016-03-15 Thread David Gibson

When a Power cpu with 64-bit hash MMU has it's hash page table (HPT)
pointer updated by a write to the SDR1 register we need to update some
derived variables.  Likewise, when the cpu is configured for an external
HPT (one not in the guest memory space) some derived variables need to be
updated.

Currently the logic for this is (partially) duplicated in ppc_store_sdr1()
and in spapr_cpu_reset().  In future we're going to need it in some other
places, so make some common helpers for this update.

In addition the new ppc_hash64_set_external_hpt() helper also updates
SDR1 in KVM - it's not updated by the normal runtime KVM <-> qemu CPU
synchronization.  In a sense this belongs logically in the
ppc_hash64_set_sdr1() helper, but that is called from
kvm_arch_get_registers() so can't itself call cpu_synchronize_state()
without infinite recursion.  In practice this doesn't matter because
the only other caller is TCG specific.

Currently there aren't situations where updating SDR1 at runtime in KVM
matters, but there are going to be in future.

Signed-off-by: David Gibson 
Reviewed-by: Greg Kurz 
Reviewed-by: Thomas Huth 
---
 hw/ppc/spapr.c  | 13 ++---
 target-ppc/kvm.c|  2 +-
 target-ppc/kvm_ppc.h|  6 ++
 target-ppc/mmu-hash64.c | 43 +++
 target-ppc/mmu-hash64.h |  6 ++
 target-ppc/mmu_helper.c | 13 ++---
 6 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 298171a..eb66a87 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1196,17 +1196,8 @@ static void spapr_cpu_reset(void *opaque)
 
 env->spr[SPR_HIOR] = 0;
 
-env->external_htab = (uint8_t *)spapr->htab;
-env->htab_base = -1;
-/*
- * htab_mask is the mask used to normalize hash value to PTEG index.
- * htab_shift is log2 of hash table size.
- * We have 8 hpte per group, and each hpte is 16 bytes.
- * ie have 128 bytes per hpte entry.
- */
-env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1;
-env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
-(spapr->htab_shift - 18);
+ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift,
+_fatal);
 }
 
 static void spapr_create_nvram(sPAPRMachineState *spapr)
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 4161f64..776336b 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -867,7 +867,7 @@ static int kvm_put_vpa(CPUState *cs)
 }
 #endif /* TARGET_PPC64 */
 
-static int kvmppc_put_books_sregs(PowerPCCPU *cpu)
+int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 {
 CPUPPCState *env = >env;
 struct kvm_sregs sregs;
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index fd64c44..fc79312 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -55,6 +55,7 @@ void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong 
pte_index,
  target_ulong pte0, target_ulong pte1);
 bool kvmppc_has_cap_fixup_hcalls(void);
 int kvmppc_enable_hwrng(void);
+int kvmppc_put_books_sregs(PowerPCCPU *cpu);
 
 #else
 
@@ -246,6 +247,11 @@ static inline int kvmppc_enable_hwrng(void)
 {
 return -1;
 }
+
+static inline int kvmppc_put_books_sregs(PowerPCCPU *cpu)
+{
+abort();
+}
 #endif
 
 #ifndef CONFIG_KVM
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 9c58fbf..7b5200b 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -258,6 +258,49 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, 
target_ulong rb)
 /*
  * 64-bit hash table MMU handling
  */
+void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value,
+ Error **errp)
+{
+CPUPPCState *env = >env;
+target_ulong htabsize = value & SDR_64_HTABSIZE;
+
+env->spr[SPR_SDR1] = value;
+if (htabsize > 28) {
+error_setg(errp,
+   "Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
+   htabsize);
+htabsize = 28;
+}
+env->htab_mask = (1ULL << (htabsize + 18 - 7)) - 1;
+env->htab_base = value & SDR_64_HTABORG;
+}
+
+void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
+ Error **errp)
+{
+CPUPPCState *env = >env;
+Error *local_err = NULL;
+
+cpu_synchronize_state(CPU(cpu));
+
+env->external_htab = hpt;
+ppc_hash64_set_sdr1(cpu, (target_ulong)(uintptr_t)hpt | (shift - 18),
+_err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+
+/* Not strictly necessary, but makes it clearer that an external
+ * htab is in use when debugging */
+env->htab_base = -1;
+
+if (kvm_enabled()) {
+if (kvmppc_put_books_sregs(cpu) < 0) {
+error_setg(errp, "Unable to update SDR1 in KVM");
+}
+}
+}
 
 static int

[Qemu-devel] [PULL 10/16] vfio: Start improving VFIO/EEH interface

2016-03-15 Thread David Gibson

At present the code handling IBM's Enhanced Error Handling (EEH) interface
on VFIO devices operates by bypassing the usual VFIO logic with
vfio_container_ioctl().  That's a poorly designed interface with unclear
semantics about exactly what can be operated on.

In particular it operates on a single vfio container internally (hence the
name), but takes an address space and group id, from which it deduces the
container in a rather roundabout way.  groupids are something that code
outside vfio shouldn't even be aware of.

This patch creates new interfaces for EEH operations.  Internally we
have vfio_eeh_container_op() which takes a VFIOContainer object
directly.  For external use we have vfio_eeh_as_ok() which determines
if an AddressSpace is usable for EEH (at present this means it has a
single container with exactly one group attached), and vfio_eeh_as_op()
which will perform an operation on an AddressSpace in the unambiguous case,
and otherwise returns an error.

This interface still isn't great, but it's enough of an improvement to
allow a number of cleanups in other places.

Signed-off-by: David Gibson 
Reviewed-by: Alexey Kardashevskiy 
Acked-by: Alex Williamson 
---
 hw/vfio/common.c   | 95 ++
 include/hw/vfio/vfio.h |  2 ++
 2 files changed, 97 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 96ccb79..0636bb1 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1137,3 +1137,98 @@ int vfio_container_ioctl(AddressSpace *as, int32_t 
groupid,
 
 return vfio_container_do_ioctl(as, groupid, req, param);
 }
+
+/*
+ * Interfaces for IBM EEH (Enhanced Error Handling)
+ */
+static bool vfio_eeh_container_ok(VFIOContainer *container)
+{
+/*
+ * As of 2016-03-04 (linux-4.5) the host kernel EEH/VFIO
+ * implementation is broken if there are multiple groups in a
+ * container.  The hardware works in units of Partitionable
+ * Endpoints (== IOMMU groups) and the EEH operations naively
+ * iterate across all groups in the container, without any logic
+ * to make sure the groups have their state synchronized.  For
+ * certain operations (ENABLE) that might be ok, until an error
+ * occurs, but for others (GET_STATE) it's clearly broken.
+ */
+
+/*
+ * XXX Once fixed kernels exist, test for them here
+ */
+
+if (QLIST_EMPTY(>group_list)) {
+return false;
+}
+
+if (QLIST_NEXT(QLIST_FIRST(>group_list), container_next)) {
+return false;
+}
+
+return true;
+}
+
+static int vfio_eeh_container_op(VFIOContainer *container, uint32_t op)
+{
+struct vfio_eeh_pe_op pe_op = {
+.argsz = sizeof(pe_op),
+.op = op,
+};
+int ret;
+
+if (!vfio_eeh_container_ok(container)) {
+error_report("vfio/eeh: EEH_PE_OP 0x%x: "
+ "kernel requires a container with exactly one group", op);
+return -EPERM;
+}
+
+ret = ioctl(container->fd, VFIO_EEH_PE_OP, _op);
+if (ret < 0) {
+error_report("vfio/eeh: EEH_PE_OP 0x%x failed: %m", op);
+return -errno;
+}
+
+return 0;
+}
+
+static VFIOContainer *vfio_eeh_as_container(AddressSpace *as)
+{
+VFIOAddressSpace *space = vfio_get_address_space(as);
+VFIOContainer *container = NULL;
+
+if (QLIST_EMPTY(>containers)) {
+/* No containers to act on */
+goto out;
+}
+
+container = QLIST_FIRST(>containers);
+
+if (QLIST_NEXT(container, next)) {
+/* We don't yet have logic to synchronize EEH state across
+ * multiple containers */
+container = NULL;
+goto out;
+}
+
+out:
+vfio_put_address_space(space);
+return container;
+}
+
+bool vfio_eeh_as_ok(AddressSpace *as)
+{
+VFIOContainer *container = vfio_eeh_as_container(as);
+
+return (container != NULL) && vfio_eeh_container_ok(container);
+}
+
+int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
+{
+VFIOContainer *container = vfio_eeh_as_container(as);
+
+if (!container) {
+return -ENODEV;
+}
+return vfio_eeh_container_op(container, op);
+}
diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h
index 0b26cd8..fd3933b 100644
--- a/include/hw/vfio/vfio.h
+++ b/include/hw/vfio/vfio.h
@@ -5,5 +5,7 @@
 
 extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
 int req, void *param);
+bool vfio_eeh_as_ok(AddressSpace *as);
+int vfio_eeh_as_op(AddressSpace *as, uint32_t op);
 
 #endif
-- 
2.5.0

[Qemu-devel] [PULL 16/16] vfio: Eliminate vfio_container_ioctl()

2016-03-15 Thread David Gibson

vfio_container_ioctl() was a bad interface that bypassed abstraction
boundaries, had semantics that sat uneasily with its name, and was unsafe
in many realistic circumstances.  Now that spapr-pci-vfio-host-bridge has
been folded into spapr-pci-host-bridge, there are no more users, so remove
it.

Signed-off-by: David Gibson 
Reviewed-by: Alexey Kardashevskiy 
Acked-by: Alex Williamson 
---
 hw/vfio/common.c   | 45 -
 include/hw/vfio/vfio.h |  2 --
 2 files changed, 47 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 0636bb1..fb588d8 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1093,51 +1093,6 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
 return 0;
 }
 
-static int vfio_container_do_ioctl(AddressSpace *as, int32_t groupid,
-   int req, void *param)
-{
-VFIOGroup *group;
-VFIOContainer *container;
-int ret = -1;
-
-group = vfio_get_group(groupid, as);
-if (!group) {
-error_report("vfio: group %d not registered", groupid);
-return ret;
-}
-
-container = group->container;
-if (group->container) {
-ret = ioctl(container->fd, req, param);
-if (ret < 0) {
-error_report("vfio: failed to ioctl %d to container: ret=%d, %s",
- _IOC_NR(req) - VFIO_BASE, ret, strerror(errno));
-}
-}
-
-vfio_put_group(group);
-
-return ret;
-}
-
-int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
- int req, void *param)
-{
-/* We allow only certain ioctls to the container */
-switch (req) {
-case VFIO_CHECK_EXTENSION:
-case VFIO_IOMMU_SPAPR_TCE_GET_INFO:
-case VFIO_EEH_PE_OP:
-break;
-default:
-/* Return an error on unknown requests */
-error_report("vfio: unsupported ioctl %X", req);
-return -1;
-}
-
-return vfio_container_do_ioctl(as, groupid, req, param);
-}
-
 /*
  * Interfaces for IBM EEH (Enhanced Error Handling)
  */
diff --git a/include/hw/vfio/vfio.h b/include/hw/vfio/vfio.h
index fd3933b..7153604 100644
--- a/include/hw/vfio/vfio.h
+++ b/include/hw/vfio/vfio.h
@@ -3,8 +3,6 @@
 
 #include "qemu/typedefs.h"
 
-extern int vfio_container_ioctl(AddressSpace *as, int32_t groupid,
-int req, void *param);
 bool vfio_eeh_as_ok(AddressSpace *as);
 int vfio_eeh_as_op(AddressSpace *as, uint32_t op);
 
-- 
2.5.0

[Qemu-devel] [PULL 09/16] spapr_rng: fix race with main loop

2016-03-15 Thread David Gibson

From: Greg Kurz 

Since commit "60253ed1e6ec rng: add request queue support to rng-random",
the use of a spapr_rng device may hang vCPU threads.

The following path is taken without holding the lock to the main loop mutex:

h_random()
  rng_backend_request_entropy()
rng_random_request_entropy()
  qemu_set_fd_handler()

The consequence is that entropy_available() may be called before the vCPU
thread could even queue the request: depending on the scheduling, it may
happen that entropy_available() does not call random_recv()->qemu_sem_post().
The vCPU thread will then sleep forever in h_random()->qemu_sem_wait().

This could not happen before 60253ed1e6ec because entropy_available() used
to call random_recv() unconditionally.

This patch ensures the lock is held to avoid the race.

Signed-off-by: Greg Kurz 
Reviewed-by: Cédric Le Goater 
Reviewed-by: Thomas Huth 
Signed-off-by: David Gibson 
---
 hw/ppc/spapr_rng.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr_rng.c b/hw/ppc/spapr_rng.c
index a39d472..02d6be4 100644
--- a/hw/ppc/spapr_rng.c
+++ b/hw/ppc/spapr_rng.c
@@ -77,13 +77,13 @@ static target_ulong h_random(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 hrdata.val.v64 = 0;
 hrdata.received = 0;
 
-qemu_mutex_unlock_iothread();
 while (hrdata.received < 8) {
 rng_backend_request_entropy(rngstate->backend, 8 - hrdata.received,
 random_recv, );
+qemu_mutex_unlock_iothread();
 qemu_sem_wait();
+qemu_mutex_lock_iothread();
 }
-qemu_mutex_lock_iothread();
 
 qemu_sem_destroy();
 args[0] = hrdata.val.v64;
-- 
2.5.0

[Qemu-devel] [PULL 15/16] spapr_pci: Remove finish_realize hook

2016-03-15 Thread David Gibson

Now that spapr-pci-vfio-host-bridge is reduced to just a stub, there is
only one implementation of the finish_realize hook in sPAPRPHBClass.  So,
we can fold that implementation into its (single) caller, and remove the
hook.  That's the last thing left in sPAPRPHBClass, so that can go away as
well.

Signed-off-by: David Gibson 
Reviewed-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_pci.c  | 25 +
 include/hw/pci-host/spapr.h | 12 
 2 files changed, 5 insertions(+), 32 deletions(-)

diff --git a/hw/ppc/spapr_pci.c b/hw/ppc/spapr_pci.c
index 3ec1823..79baa7b 100644
--- a/hw/ppc/spapr_pci.c
+++ b/hw/ppc/spapr_pci.c
@@ -1303,11 +1303,12 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 SysBusDevice *s = SYS_BUS_DEVICE(dev);
 sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
 PCIHostState *phb = PCI_HOST_BRIDGE(s);
-sPAPRPHBClass *info = SPAPR_PCI_HOST_BRIDGE_GET_CLASS(s);
 char *namebuf;
 int i;
 PCIBus *bus;
 uint64_t msi_window_size = 4096;
+sPAPRTCETable *tcet;
+uint32_t nb_table;
 
 if (sphb->index != (uint32_t)-1) {
 hwaddr windows_base;
@@ -1459,33 +1460,20 @@ static void spapr_phb_realize(DeviceState *dev, Error 
**errp)
 }
 }
 
-if (!info->finish_realize) {
-error_setg(errp, "finish_realize not defined");
-return;
-}
-
-info->finish_realize(sphb, errp);
-
-sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
-}
-
-static void spapr_phb_finish_realize(sPAPRPHBState *sphb, Error **errp)
-{
-sPAPRTCETable *tcet;
-uint32_t nb_table;
-
 nb_table = sphb->dma_win_size >> SPAPR_TCE_PAGE_SHIFT;
 tcet = spapr_tce_new_table(DEVICE(sphb), sphb->dma_liobn,
0, SPAPR_TCE_PAGE_SHIFT, nb_table, false);
 if (!tcet) {
 error_setg(errp, "Unable to create TCE table for %s",
sphb->dtbusname);
-return ;
+return;
 }
 
 /* Register default 32bit DMA window */
 memory_region_add_subregion(>iommu_root, sphb->dma_win_addr,
 spapr_tce_get_iommu(tcet));
+
+sphb->msi = g_hash_table_new_full(g_int_hash, g_int_equal, g_free, g_free);
 }
 
 static int spapr_phb_children_reset(Object *child, void *opaque)
@@ -1626,7 +1614,6 @@ static void spapr_phb_class_init(ObjectClass *klass, void 
*data)
 {
 PCIHostBridgeClass *hc = PCI_HOST_BRIDGE_CLASS(klass);
 DeviceClass *dc = DEVICE_CLASS(klass);
-sPAPRPHBClass *spc = SPAPR_PCI_HOST_BRIDGE_CLASS(klass);
 HotplugHandlerClass *hp = HOTPLUG_HANDLER_CLASS(klass);
 
 hc->root_bus_path = spapr_phb_root_bus_path;
@@ -1636,7 +1623,6 @@ static void spapr_phb_class_init(ObjectClass *klass, void 
*data)
 dc->vmsd = _spapr_pci;
 set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 dc->cannot_instantiate_with_device_add_yet = false;
-spc->finish_realize = spapr_phb_finish_realize;
 hp->plug = spapr_phb_hot_plug_child;
 hp->unplug = spapr_phb_hot_unplug_child;
 }
@@ -1646,7 +1632,6 @@ static const TypeInfo spapr_phb_info = {
 .parent= TYPE_PCI_HOST_BRIDGE,
 .instance_size = sizeof(sPAPRPHBState),
 .class_init= spapr_phb_class_init,
-.class_size= sizeof(sPAPRPHBClass),
 .interfaces= (InterfaceInfo[]) {
 { TYPE_HOTPLUG_HANDLER },
 { }
diff --git a/include/hw/pci-host/spapr.h b/include/hw/pci-host/spapr.h
index a08235e..03ee006 100644
--- a/include/hw/pci-host/spapr.h
+++ b/include/hw/pci-host/spapr.h
@@ -32,20 +32,8 @@
 #define SPAPR_PCI_HOST_BRIDGE(obj) \
 OBJECT_CHECK(sPAPRPHBState, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
 
-#define SPAPR_PCI_HOST_BRIDGE_CLASS(klass) \
- OBJECT_CLASS_CHECK(sPAPRPHBClass, (klass), TYPE_SPAPR_PCI_HOST_BRIDGE)
-#define SPAPR_PCI_HOST_BRIDGE_GET_CLASS(obj) \
- OBJECT_GET_CLASS(sPAPRPHBClass, (obj), TYPE_SPAPR_PCI_HOST_BRIDGE)
-
-typedef struct sPAPRPHBClass sPAPRPHBClass;
 typedef struct sPAPRPHBState sPAPRPHBState;
 
-struct sPAPRPHBClass {
-PCIHostBridgeClass parent_class;
-
-void (*finish_realize)(sPAPRPHBState *sphb, Error **errp);
-};
-
 typedef struct spapr_pci_msi {
 uint32_t first_irq;
 uint32_t num;
-- 
2.5.0

[Qemu-devel] [PULL 02/16] ppc: Fix migration of the TAR SPR

2016-03-15 Thread David Gibson

From: Thomas Huth 

The TAR special purpose register currently does not get migrated
under KVM because it does not get synchronized with the kernel.
Use spr_register_kvm() instead of spr_register() to fix this issue.

Signed-off-by: Thomas Huth 
Signed-off-by: David Gibson 
---
 target-ppc/translate_init.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index f72148c..48a1635 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -7714,10 +7714,10 @@ static void spr_write_tar(DisasContext *ctx, int sprn, 
int gprn)
 
 static void gen_spr_power8_tce_address_control(CPUPPCState *env)
 {
-spr_register(env, SPR_TAR, "TAR",
- _read_tar, _write_tar,
- _read_generic, _write_generic,
- 0x);
+spr_register_kvm(env, SPR_TAR, "TAR",
+ _read_tar, _write_tar,
+ _read_generic, _write_generic,
+ KVM_REG_PPC_TAR, 0x);
 }
 
 static void spr_read_tm(DisasContext *ctx, int gprn, int sprn)
-- 
2.5.0

[Qemu-devel] [PULL 11/16] spapr_pci: Switch to vfio_eeh_as_op() interface

2016-03-15 Thread David Gibson

This switches all EEH on VFIO operations in spapr_pci_vfio.c from the
broken vfio_container_ioctl() interface to the new vfio_as_eeh_op()
interface.

Signed-off-by: David Gibson 
Reviewed-by: Alexey Kardashevskiy 
---
 hw/ppc/spapr_pci_vfio.c | 50 -
 1 file changed, 16 insertions(+), 34 deletions(-)

diff --git a/hw/ppc/spapr_pci_vfio.c b/hw/ppc/spapr_pci_vfio.c
index 2f3752e..b1e8e8e 100644
--- a/hw/ppc/spapr_pci_vfio.c
+++ b/hw/ppc/spapr_pci_vfio.c
@@ -73,15 +73,9 @@ static void spapr_phb_vfio_finish_realize(sPAPRPHBState 
*sphb, Error **errp)
 spapr_tce_get_iommu(tcet));
 }
 
-static void spapr_phb_vfio_eeh_reenable(sPAPRPHBVFIOState *svphb)
+static void spapr_phb_vfio_eeh_reenable(sPAPRPHBState *sphb)
 {
-struct vfio_eeh_pe_op op = {
-.argsz = sizeof(op),
-.op= VFIO_EEH_PE_ENABLE
-};
-
-vfio_container_ioctl(>phb.iommu_as,
- svphb->iommugroupid, VFIO_EEH_PE_OP, );
+vfio_eeh_as_op(>iommu_as, VFIO_EEH_PE_ENABLE);
 }
 
 static void spapr_phb_vfio_reset(DeviceState *qdev)
@@ -92,19 +86,18 @@ static void spapr_phb_vfio_reset(DeviceState *qdev)
  * ensures that the contained PCI devices will work properly
  * after reboot.
  */
-spapr_phb_vfio_eeh_reenable(SPAPR_PCI_VFIO_HOST_BRIDGE(qdev));
+spapr_phb_vfio_eeh_reenable(SPAPR_PCI_HOST_BRIDGE(qdev));
 }
 
 static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState *sphb,
  unsigned int addr, int option)
 {
-sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
-struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
+uint32_t op;
 int ret;
 
 switch (option) {
 case RTAS_EEH_DISABLE:
-op.op = VFIO_EEH_PE_DISABLE;
+op = VFIO_EEH_PE_DISABLE;
 break;
 case RTAS_EEH_ENABLE: {
 PCIHostState *phb;
@@ -122,21 +115,20 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState 
*sphb,
 return RTAS_OUT_PARAM_ERROR;
 }
 
-op.op = VFIO_EEH_PE_ENABLE;
+op = VFIO_EEH_PE_ENABLE;
 break;
 }
 case RTAS_EEH_THAW_IO:
-op.op = VFIO_EEH_PE_UNFREEZE_IO;
+op = VFIO_EEH_PE_UNFREEZE_IO;
 break;
 case RTAS_EEH_THAW_DMA:
-op.op = VFIO_EEH_PE_UNFREEZE_DMA;
+op = VFIO_EEH_PE_UNFREEZE_DMA;
 break;
 default:
 return RTAS_OUT_PARAM_ERROR;
 }
 
-ret = vfio_container_ioctl(>phb.iommu_as, svphb->iommugroupid,
-   VFIO_EEH_PE_OP, );
+ret = vfio_eeh_as_op(>iommu_as, op);
 if (ret < 0) {
 return RTAS_OUT_HW_ERROR;
 }
@@ -146,13 +138,9 @@ static int spapr_phb_vfio_eeh_set_option(sPAPRPHBState 
*sphb,
 
 static int spapr_phb_vfio_eeh_get_state(sPAPRPHBState *sphb, int *state)
 {
-sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
-struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
 int ret;
 
-op.op = VFIO_EEH_PE_GET_STATE;
-ret = vfio_container_ioctl(>phb.iommu_as, svphb->iommugroupid,
-   VFIO_EEH_PE_OP, );
+ret = vfio_eeh_as_op(>iommu_as, VFIO_EEH_PE_GET_STATE);
 if (ret < 0) {
 return RTAS_OUT_PARAM_ERROR;
 }
@@ -206,28 +194,26 @@ static void spapr_phb_vfio_eeh_pre_reset(sPAPRPHBState 
*sphb)
 
 static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, int option)
 {
-sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
-struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
+uint32_t op;
 int ret;
 
 switch (option) {
 case RTAS_SLOT_RESET_DEACTIVATE:
-op.op = VFIO_EEH_PE_RESET_DEACTIVATE;
+op = VFIO_EEH_PE_RESET_DEACTIVATE;
 break;
 case RTAS_SLOT_RESET_HOT:
 spapr_phb_vfio_eeh_pre_reset(sphb);
-op.op = VFIO_EEH_PE_RESET_HOT;
+op = VFIO_EEH_PE_RESET_HOT;
 break;
 case RTAS_SLOT_RESET_FUNDAMENTAL:
 spapr_phb_vfio_eeh_pre_reset(sphb);
-op.op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
+op = VFIO_EEH_PE_RESET_FUNDAMENTAL;
 break;
 default:
 return RTAS_OUT_PARAM_ERROR;
 }
 
-ret = vfio_container_ioctl(>phb.iommu_as, svphb->iommugroupid,
-   VFIO_EEH_PE_OP, );
+ret = vfio_eeh_as_op(>iommu_as, op);
 if (ret < 0) {
 return RTAS_OUT_HW_ERROR;
 }
@@ -237,13 +223,9 @@ static int spapr_phb_vfio_eeh_reset(sPAPRPHBState *sphb, 
int option)
 
 static int spapr_phb_vfio_eeh_configure(sPAPRPHBState *sphb)
 {
-sPAPRPHBVFIOState *svphb = SPAPR_PCI_VFIO_HOST_BRIDGE(sphb);
-struct vfio_eeh_pe_op op = { .argsz = sizeof(op) };
 int ret;
 
-op.op = VFIO_EEH_PE_CONFIGURE;
-ret = vfio_container_ioctl(>phb.iommu_as, svphb->iommugroupid,
-   VFIO_EEH_PE_OP, );
+ret = vfio_eeh_as_op(>iommu_as, VFIO_EEH_PE_CONFIGURE);
 if (ret < 0) {

Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support

2016-03-15 Thread Bharata B Rao

On Wed, Mar 16, 2016 at 12:36:05PM +1100, David Gibson wrote:
> On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:
> > Add support to hot remove pc-dimm memory devices.
> > 
> > Signed-off-by: Bharata B Rao 
> 
> Reviewed-by: David Gibson 
> 
> Looks correct, but again, needs to wait on the PAPR change.
> 
> Have you thought any further on the idea of sending an index message,
> then a count message as an interim approach to fixing this without
> requiring a PAPR change?

Removal by index and removal by count are valid messages by themselves
and drmgr would go ahead and start the removal in reponse to those
calls. IIUC, you are suggesting that lets remove one LMB by index in
response to 1st message and remove (count -1) LMBs from where the last
removal was done in the previous message.

Since the same code base of powerpc-utils works on PowerVM too, I am not
sure if such an approach would impact PowerVM in any undesirable manner.
May be Nathan can clarify ?

I see that this can be done, but the changes in drmgr code specially the
code related to LMB list handling/removal can be non-trivial. So not sure
if the temporary approach is all that worth here and hence I feel it is better
to wait and do it the count-indexed way.

While we are here, I would also like to get some opinion on the real
need for memory unplug. Is there anything that memory unplug gives us
which memory ballooning (shrinking mem via ballooning) can't give ?

Regards,
Bharata.

Re: [Qemu-devel] [PATCH V2] net/filter-mirror: Change filter_mirror_send interface

2016-03-15 Thread Jason Wang



On 03/15/2016 06:02 PM, Zhang Chen wrote:
> Change filter_mirror_send interface to make it easier
> to used by other filter
>
> Signed-off-by: Zhang Chen 
> Signed-off-by: Wen Congyang 
> Signed-off-by: Li Zhijian 
> ---
>  net/filter-mirror.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/net/filter-mirror.c b/net/filter-mirror.c
> index 2e2ed27..1b1ec16 100644
> --- a/net/filter-mirror.c
> +++ b/net/filter-mirror.c
> @@ -34,11 +34,10 @@ typedef struct MirrorState {
>  CharDriverState *chr_out;
>  } MirrorState;
>  
> -static int filter_mirror_send(NetFilterState *nf,
> +static int filter_mirror_send(CharDriverState *chr_out,
>const struct iovec *iov,
>int iovcnt)
>  {
> -MirrorState *s = FILTER_MIRROR(nf);
>  int ret = 0;
>  ssize_t size = 0;
>  uint32_t len =  0;
> @@ -50,14 +49,14 @@ static int filter_mirror_send(NetFilterState *nf,
>  }
>  
>  len = htonl(size);
> -ret = qemu_chr_fe_write_all(s->chr_out, (uint8_t *), sizeof(len));
> +ret = qemu_chr_fe_write_all(chr_out, (uint8_t *), sizeof(len));
>  if (ret != sizeof(len)) {
>  goto err;
>  }
>  
>  buf = g_malloc(size);
>  iov_to_buf(iov, iovcnt, 0, buf, size);
> -ret = qemu_chr_fe_write_all(s->chr_out, (uint8_t *)buf, size);
> +ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)buf, size);
>  g_free(buf);
>  if (ret != size) {
>  goto err;
> @@ -76,9 +75,10 @@ static ssize_t filter_mirror_receive_iov(NetFilterState 
> *nf,
>   int iovcnt,
>   NetPacketSent *sent_cb)
>  {
> +MirrorState *s = FILTER_MIRROR(nf);
>  int ret;
>  
> -ret = filter_mirror_send(nf, iov, iovcnt);
> +ret = filter_mirror_send(s->chr_out, iov, iovcnt);
>  if (ret) {
>  error_report("filter_mirror_send failed(%s)", strerror(-ret));
>  }

Applied to -net.

Thanks

Re: [Qemu-devel] [PATCH V10 0/2] net/filter-mirror:add filter-mirror and unit test

2016-03-15 Thread Jason Wang



On 03/15/2016 03:41 PM, Zhang Chen wrote:
> Filter-mirror is a netfilter plugin.
> It gives qemu the ability to mirror
> packets to a chardev.
>
> v10:
>  - add include "qemu/osdep.h"
>
> v9:
>  - add qmp("{ 'execute' : 'query-status'}")
>before iov_send() and change pipe
>to socket in test-filter-mirror.c
>
> v8:
>  - The outdev of filter-mirror test changed
>from -chardev socket to -chardev pipe
>
> v7:
>  - fix mktemp() to mkstemp()
>
> v6:
>  - Address Jason's comments.
>
> v5:
>  - Address Jason's comments.
>
> v4:
>  - Address Jason's comments.
>
> v3:
>  - Add filter-mirror unit test according
>to Jason's comments
>  - Address zhanghailiang's comments.
>  - Address Jason's comments.
>
> v2:
>  - Address zhanghailiang's comments.
>  - Address Eric Blake's comments.
>  - Address Yang Hongyang's comments.
>  - Address Dave's comments.
>
> v1:
>  initial patch.
>
>
> Zhang Chen (2):
>   net/filter-mirror:Add filter-mirror
>   tests/test-filter-mirror:add filter-mirror unit test
>
>  net/Makefile.objs  |   1 +
>  net/filter-mirror.c| 182 
> +
>  qemu-options.hx|   5 ++
>  tests/.gitignore   |   1 +
>  tests/Makefile |   2 +
>  tests/test-filter-mirror.c |  93 +++
>  vl.c   |   3 +-
>  7 files changed, 286 insertions(+), 1 deletion(-)
>  create mode 100644 net/filter-mirror.c
>  create mode 100644 tests/test-filter-mirror.c
>

Applied to -net.

Thanks

Re: [Qemu-devel] [PATCH v3] block/gluster: add support for SEEK_DATA/SEEK_HOLE

2016-03-15 Thread Niels de Vos

On Tue, Mar 15, 2016 at 03:52:02PM -0400, Jeff Cody wrote:
> On Tue, Mar 15, 2016 at 03:50:17PM -0400, Jeff Cody wrote:
> > On Thu, Mar 10, 2016 at 07:38:00PM +0100, Niels de Vos wrote:
> > > GlusterFS 3.8 contains support for SEEK_DATA and SEEK_HOLE. This makes
> > > it possible to detect sparse areas in files.
> > > 
> > > Signed-off-by: Niels de Vos 
> > > 
> > > ---
> > > Tested by compiling and running "qemu-img map gluster://..." with a
> > > build of the current master branch of glusterfs. Using a Fedora cloud
> > > image (in raw format) shows many SEEK procudure calls going back and
> > > forth over the network. The output of "qemu map" matches the output when
> > > run against the image on the local filesystem.
> > > 
> > > v2 based on feedback from Jeff Cody:
> > > - Replace compile time detection by runtime detection
> > > - Update return pointer (new argument) for .bdrv_co_get_block_status
> > > ---
> > >  block/gluster.c | 182 
> > > 
> > >  1 file changed, 182 insertions(+)
> > > 
> > > diff --git a/block/gluster.c b/block/gluster.c
> > > index 65077a0..a4f0628 100644
> > > --- a/block/gluster.c
> > > +++ b/block/gluster.c
> > > @@ -23,6 +23,7 @@ typedef struct GlusterAIOCB {
> > >  typedef struct BDRVGlusterState {
> > >  struct glfs *glfs;
> > >  struct glfs_fd *fd;
> > > +bool supports_seek_data;
> > >  } BDRVGlusterState;
> > >  
> > >  typedef struct GlusterConf {
> > > @@ -286,6 +287,28 @@ static void qemu_gluster_parse_flags(int bdrv_flags, 
> > > int *open_flags)
> > >  }
> > >  }
> > >  
> > > +/*
> > > + * Do SEEK_DATA/HOLE to detect if it is functional. Older broken 
> > > versions of
> > > + * gfapi incorrectly return the current offset when SEEK_DATA/HOLE is 
> > > used.
> > > + * - Corrected versions return -1 and set errno to EINVAL.
> > > + * - Versions that support SEEK_DATA/HOLE correctly, will return -1 and 
> > > set
> > > + *   errno to ENXIO when SEEK_DATA is called with a position of EOF.
> > > + */
> > > +static bool qemu_gluster_test_seek(struct glfs_fd *fd)
> > > +{
> > > +off_t ret, eof;
> > > +
> > > +eof = glfs_lseek(fd, 0, SEEK_END);
> > > +if (eof < 0) {
> > > +/* this should never occur */
> > > +return false;
> > > +}
> > > +
> > > +/* this should always fail with ENXIO if SEEK_DATA is supported */
> > > +ret = glfs_lseek(fd, eof, SEEK_DATA);
> > > +return (ret < 0) && (errno == ENXIO);
> > > +}
> > > +
> > >  static int qemu_gluster_open(BlockDriverState *bs,  QDict *options,
> > >   int bdrv_flags, Error **errp)
> > >  {
> > > @@ -320,6 +343,8 @@ static int qemu_gluster_open(BlockDriverState *bs,  
> > > QDict *options,
> > >  ret = -errno;
> > >  }
> > >  
> > > +s->supports_seek_data = qemu_gluster_test_seek(s->fd);
> > > +
> > >  out:
> > >  qemu_opts_del(opts);
> > >  qemu_gluster_gconf_free(gconf);
> > > @@ -677,6 +702,159 @@ static int 
> > > qemu_gluster_has_zero_init(BlockDriverState *bs)
> > >  return 0;
> > >  }
> > >  
> > > +/*
> > > + * Find allocation range in @bs around offset @start.
> > > + * May change underlying file descriptor's file offset.
> > > + * If @start is not in a hole, store @start in @data, and the
> > > + * beginning of the next hole in @hole, and return 0.
> > > + * If @start is in a non-trailing hole, store @start in @hole and the
> > > + * beginning of the next non-hole in @data, and return 0.
> > > + * If @start is in a trailing hole or beyond EOF, return -ENXIO.
> > > + * If we can't find out, return a negative errno other than -ENXIO.
> > > + *
> > > + * (Shamefully copied from raw-posix.c, only miniscule adaptions.)
> > > + */
> > > +static int find_allocation(BlockDriverState *bs, off_t start,
> > > +   off_t *data, off_t *hole)
> > > +{
> > > +BDRVGlusterState *s = bs->opaque;
> > > +off_t offs;
> > > +
> > > +if (!s->supports_seek_data) {
> > > +return -ENOTSUP;
> > > +}
> > > +
> > > +/*
> > > + * SEEK_DATA cases:
> > > + * D1. offs == start: start is in data
> > > + * D2. offs > start: start is in a hole, next data at offs
> > > + * D3. offs < 0, errno = ENXIO: either start is in a trailing hole
> > > + *  or start is beyond EOF
> > > + * If the latter happens, the file has been truncated behind
> > > + * our back since we opened it.  All bets are off then.
> > > + * Treating like a trailing hole is simplest.
> > > + * D4. offs < 0, errno != ENXIO: we learned nothing
> > > + */
> > > +offs = glfs_lseek(s->fd, start, SEEK_DATA);
> > > +if (offs < 0) {
> > > +return -errno;  /* D3 or D4 */
> > > +}
> > > +assert(offs >= start);
> > > +
> > > +if (offs > start) {
> > > +/* D2: in hole, next data at offs */
> > > +*hole = start;
> > > +*data

Re: [Qemu-devel] [PATCH v3 00/13] tests: Introducing docker tests

2016-03-15 Thread Fam Zheng

On Fri, 03/11 16:16, Alex Bennée wrote:
> 
> Fam Zheng  writes:
> 
> > v3 changes:
> 
> I think we are almost there. There a just a few tweaks to be made to
> help text and prompts. Can you ensure that all examples in commit
> messages and help text actually do run as expected?

OK, I'm fixing these now and will send v4 very soon.

> 
> Is it proposed this goes through Daniel's treee?

I'm fine with that. So should I add Daniel in the MAINTAINERS patch? I can also
send a pull req my self if that's okay - I will then sign my gpg key with Jason
Wang.

Fam

Re: [Qemu-devel] [PATCH v3 10/13] docker: Add travis tool

2016-03-15 Thread Fam Zheng

On Fri, 03/11 16:14, Alex Bennée wrote:
> 
> Fam Zheng  writes:
> 
> > The script is not named test-travis.sh so it won't run with "make
> > docker-run", because it can take too long.
> >
> > Run it with "make docker-run-travis.sh@ubuntu".
> 
> 16:08 alex@zen/x86_64  [qemu.git/review/docker-v3] >make 
> docker-run-travis.sh@ubuntu
> ARCHIVE qemu.tgz
> COPY RUNNER
> RUN travis.sh in ubuntu
> ./run: line 49: /tmp/qemu-test/src/tests/docker/travis.sh: No such file or 
> directory

Will update the commit message.

Fam

Re: [Qemu-devel] [RFC PATCH v2 0/9] Core based CPU hotplug for PowerPC sPAPR

2016-03-15 Thread Bharata B Rao

On Mon, Mar 14, 2016 at 10:47:28AM +0100, Igor Mammedov wrote:
> On Fri, 11 Mar 2016 10:24:29 +0530
> Bharata B Rao  wrote:
> 
> > Hi,
> > 
> > This is the next version of "Core based CPU hotplug for PowerPC sPAPR" that
> > was posted at
> > https://lists.gnu.org/archive/html/qemu-ppc/2016-03/msg00081.html
> > 
> > device_add semantics
> > 
> > For -smp 16,sockets=1,cores=2,threads=8,maxcpus=32
> > (qemu) device_add spapr-cpu-core,id=core2,core=16,cpu_model=host[,threads=8]
> do you plan to allow user to hotplug different cpu_models?
> If not it would be better to hide cpu_model from user
> and set it from machine pre_plug handler.

In my earlier implementations I derived cpu model from -cpu and threads from
-smp,threads= commandline options and never exposed them to device_add
command.

Though we don't support heterogenous systems (different cpu models and/or
threads) now, it was felt that it should be easy enough to support such
systems if required in future, that's how cpu_model and threads became
options for device_add.

One of the things that David felt was missing from my earlier QMP query
command (and which is true in your QMP query implementation also) is that
we aren't exporting cpu_model at all, at least for not-yet-plugged cores.
So should we include that or let management figure that out since it
would already know about the CPU model.

Regards,
Bharata.

Re: [Qemu-devel] [PATCH v3 07/13] docker: Add full test

2016-03-15 Thread Fam Zheng

On Fri, 03/11 16:10, Alex Bennée wrote:
> 
> Fam Zheng  writes:
> 
> > This builds all available targets.
> >
> > Signed-off-by: Fam Zheng 
> > ---
> >  tests/docker/test-full | 17 +
> >  1 file changed, 17 insertions(+)
> >  create mode 100755 tests/docker/test-full
> >
> > diff --git a/tests/docker/test-full b/tests/docker/test-full
> > new file mode 100755
> > index 000..fd9b798
> > --- /dev/null
> > +++ b/tests/docker/test-full
> > @@ -0,0 +1,17 @@
> > +#!/bin/bash -e
> > +#
> > +# Compile all the targets.
> > +#
> > +# Copyright (c) 2016 Red Hat Inc.
> > +#
> > +# Authors:
> > +#  Fam Zheng 
> > +#
> > +# This work is licensed under the terms of the GNU GPL, version 2
> > +# or (at your option) any later version. See the COPYING file in
> > +# the top-level directory.
> > +
> > +. common.rc
> > +
> > +build_qemu
> 
> If this is the full featured test how can we pass additional configure
> flags to the build?

Let's add an EXTRA_CONFIGURE_OPTS and use it in build_qemu. Works for you?

> 
> > +make check $MAKEFLAGS
> 
> 
> --
> Alex Bennée

Re: [Qemu-devel] [PATCH v3 05/13] docker: Add common.rc

2016-03-15 Thread Fam Zheng

On Fri, 03/11 16:06, Alex Bennée wrote:
> 
> Fam Zheng  writes:
> 
> > "requires" checks the "FEATURE" environment for specified prerequisits,
> > and skip the execution of test if not found.
> 
> You also add a build_qemu function which you should mention.

Will mention it.

Thanks,

Fam
> 
> Otherwise have a:
> 
> Reviewed-by: Alex Bennée 
>

Re: [Qemu-devel] [PATCH v3 02/13] Makefile: Rules for docker testing

2016-03-15 Thread Fam Zheng

On Fri, 03/11 15:11, Alex Bennée wrote:
> 
> Fam Zheng  writes:
> 
> > This adds a group of make targets to run docker tests, all are available
> > in source tree without running ./configure.
> >
> > The usage is shown by "make docker".
> >
> > Besides the fixed ones, dynamic targets for building each image and
> > running each test in each image are generated automatically by make,
> > scanning $(SRC_PATH)/tests/docker/ files with specific patterns.
> >
> > Alternative to manually list particular targets (docker-run-FOO@BAR)
> > set, you can control which tests/images to run by filtering variables,
> > TESTS= and IMAGES=, which are expressed in Makefile pattern syntax,
> > "foo% %bar ...". For example:
> >
> > $ make docker-run IMAGES="ubuntu fedora"
> 
> I thought I mentioned this last time, it needs fixing in the commit
> message:
> 
> 15:05 alex@zen/x86_64  [qemu.git/review/docker-v3] >make docker-run
> IMAGES="ubuntu fedora"
> make: *** No rule to make target `docker-run'. Stop.

My bad!

> 
> >
> > Unfortunately, it's impossible to propagate "-j $JOBS" into make in
> > containers, however since each combination is made a first class target
> > is the top Makefile, "make -j$N docker-run" still parallels the tests
> > coarsely.
> >
> > Instead of providing a live version of the source tree to the docker
> > container we snapshot it with git-archive. This ensure the tree is in a
> > pristine state for whatever operations the container is going to run on
> > them.
> >
> > Uncommitted changes known to files known by the git index will be
> > included in the snapshot if there are any.
> >
> > Signed-off-by: Fam Zheng 
> > Signed-off-by: Alex Bennée 
> 
> fix the commit message and have a:
> 
> Reviewed-by: Alex Bennée 

Thanks!

Fam

Re: [Qemu-devel] [PATCH v3 01/13] tests: Add utilities for docker testing

2016-03-15 Thread Fam Zheng

On Fri, 03/11 15:04, Alex Bennée wrote:
> 
> Fam Zheng  writes:
> 
> > docker_run: A wrapper for "docker run" (or "sudo -n docker run" if
> > necessary), which takes care of killing and removing the running
> > container at SIGINT.
> >
> > docker_clean: A tool to tear down all the containers including inactive
> > ones that are started by docker_run.
> >
> > docker_build: A tool to compare an image from given dockerfile and
> > rebuild it if they're different.
> 
> This commit text needs updating with the actual calling conventions.

Will do.

> 
> >
> > Signed-off-by: Fam Zheng 
> > ---
> >  tests/docker/docker.py | 180 
> > +
> >  1 file changed, 180 insertions(+)
> >  create mode 100755 tests/docker/docker.py
> >
> > diff --git a/tests/docker/docker.py b/tests/docker/docker.py
> > new file mode 100755
> > index 000..22f537c
> > --- /dev/null
> > +++ b/tests/docker/docker.py
> > @@ -0,0 +1,180 @@
> > +#!/usr/bin/env python2
> > +#
> > +# Docker controlling module
> > +#
> > +# Copyright (c) 2016 Red Hat Inc.
> > +#
> > +# Authors:
> > +#  Fam Zheng 
> > +#
> > +# This work is licensed under the terms of the GNU GPL, version 2
> > +# or (at your option) any later version. See the COPYING file in
> > +# the top-level directory.
> 
> It's worth running pylint over this file. There are a number of
> missing newlines/spaces/long lines that aren't PEP friendly.

I'll run this through pylint.

> 
> > +
> > +import os
> > +import sys
> > +import subprocess
> > +import json
> > +import hashlib
> > +import atexit
> > +import uuid
> > +import argparse
> > +
> > +class Docker(object):
> > +""" Running Docker commands """
> > +def __init__(self):
> > +self._command = self._guess_command()
> > +self._instances = []
> > +atexit.register(self._kill_instances)
> > +
> > +def _do(self, cmd, quiet=True, **kwargs):
> > +if quiet:
> > +kwargs["stdout"] = subprocess.PIPE
> > +return subprocess.call(self._command + cmd, **kwargs)
> > +
> > +def _do_kill_instances(self, only_known, only_active=True):
> > +cmd = ["ps", "-q"]
> > +if not only_active:
> > +cmd.append("-a")
> > +for i in self._output(cmd).split():
> > +resp = self._output(["inspect", i])
> > +labels = json.loads(resp)[0]["Config"]["Labels"]
> > +active = json.loads(resp)[0]["State"]["Running"]
> > +if not labels:
> > +continue
> > +instance_uuid = labels.get("com.qemu.instance.uuid", None)
> > +if not instance_uuid:
> > +continue
> > +if only_known and instance_uuid not in self._instances:
> > +continue
> > +print "Terminating", i
> > +if active:
> > +self._do(["kill", i])
> > +self._do(["rm", i])
> > +
> > +def clean(self):
> > +self._do_kill_instances(False, False)
> > +return 0
> > +
> > +def _kill_instances(self):
> > +return self._do_kill_instances(True)
> > +
> > +def _output(self, cmd, **kwargs):
> > +return subprocess.check_output(self._command + cmd,
> > +   stderr=subprocess.STDOUT,
> > +   **kwargs)
> > +
> > +def _guess_command(self):
> > +commands = [["docker"], ["sudo", "-n", "docker"]]
> > +for cmd in commands:
> > +if subprocess.call(cmd + ["images"],
> > +   stdout=subprocess.PIPE,
> > +   stderr=subprocess.PIPE) == 0:
> > +return cmd
> > +commands_txt = "\n".join(["  " + " ".join(x) for x in commands])
> > +raise Exception("Cannot find working docker command. Tried:\n%s" % 
> > commands_txt)
> > +
> > +def get_image_dockerfile_checksum(self, tag):
> > +resp = self._output(["inspect", tag])
> > +labels = json.loads(resp)[0]["Config"].get("Labels", {})
> > +return labels.get("com.qemu.dockerfile-checksum", "")
> > +
> > +def checksum(self, text):
> > +return hashlib.sha1(text).hexdigest()
> > +
> > +def build_image(self, tag, dockerfile, df, quiet=True, argv=[]):
> > +tmp = dockerfile + "\n" + \
> > +  "LABEL com.qemu.dockerfile-checksum=%s" % 
> > self.checksum(dockerfile)
> > +tmp_df = df + ".tmp"
> > +tmp_file = open(tmp_df, "wb")
> > +tmp_file.write(tmp)
> > +tmp_file.close()
> > +self._do(["build", "-t", tag, "-f", tmp_df] + argv + 
> > [os.path.dirname(df)],
> > + quiet=quiet)
> > +os.unlink(tmp_df)
> 
> Use python's tempfile to do this. It handles all the lifetime issues for
> you automatically - the file gets removed when the object goes out of scope.

Okay, will do.

> 
> > +
> > +def

Re: [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 tcp traffic

2016-03-15 Thread Wei Xu



- Original Message -
From: "Michael S. Tsirkin" 
To: w...@redhat.com
Cc: vict...@redhat.com, jasow...@redhat.com, yvuge...@redhat.com, 
qemu-devel@nongnu.org, mar...@redhat.com, dfley...@redhat.com
Sent: Tuesday, March 15, 2016 6:00:03 PM
Subject: Re: [Qemu-devel] [ Patch 1/2] virtio-net rsc: support coalescing ipv4 
tcp traffic

On Tue, Mar 15, 2016 at 05:17:03PM +0800, w...@redhat.com wrote:
> From: Wei Xu 
> 
> All the data packets in a tcp connection will be cached to a big buffer
> in every receive interval, and will be sent out via a timer, the
> 'virtio_net_rsc_timeout' controls the interval, the value will influent the
> performance and response of tcp connection extremely, 5(50us) is a
> experience value to gain a performance improvement, since the whql test
> sends packets every 100us, so '30(300us)' can pass the test case,
> this is also the default value, it's gonna to be tunable.
> The timer will only be triggered if the packets pool is not empty,
> and it'll drain off all the cached packets
> 
> 'NetRscChain' is used to save the segments of different protocols in a
> VirtIONet device.
> 
> The main handler of TCP includes TCP window update, duplicated ACK check
> and the real data coalescing if the new segment passed sanity check
> and is identified as an 'wanted' one.
> 
> An 'wanted' segment means:
> 1. Segment is within current window and the sequence is the expected one.
> 2. ACK of the segment is in the valid window.
> 3. If the ACK in the segment is a duplicated one, then it must less than 2,
>this is to notify upper layer TCP starting retransmission due to the spec.
> 
> Sanity check includes:
> 1. Incorrect version in IP header
> 2. IP options & IP fragment
> 3. Not a TCP packets
> 4. Sanity size check to prevent buffer overflow attack.
> 
> There maybe more cases should be considered such as ip identification other
> flags, while it broke the test because windows set it to the same even it's
> not a fragment.
> 
> Normally it includes 2 typical ways to handle a TCP control flag, 'bypass'
> and 'finalize', 'bypass' means should be sent out directly, and 'finalize'
> means the packets should also be bypassed, and this should be done
> after searching for the same connection packets in the pool and sending
> all of them out, this is to avoid out of data.
> 
> All the 'SYN' packets will be bypassed since this always begin a new'
> connection, other flags such 'FIN/RST' will trigger a finalization, because
> this normally happens upon a connection is going to be closed, an 'URG' packet
> also finalize current coalescing unit while there maybe protocol difference to
> different OS.
> 
> Statistics can be used to monitor the basic coalescing status, the 'out of 
> order'
> and 'out of window' means how many retransmitting packets, thus describe the
> performance intuitively.
> 
> Signed-off-by: Wei Xu 
> ---
>  hw/net/virtio-net.c| 486 
> -
>  include/hw/virtio/virtio-net.h |   1 +
>  include/hw/virtio/virtio.h |  72 ++
>  3 files changed, 558 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
> index 5798f87..c23b45f 100644
> --- a/hw/net/virtio-net.c
> +++ b/hw/net/virtio-net.c
> @@ -15,10 +15,12 @@
>  #include "qemu/iov.h"
>  #include "hw/virtio/virtio.h"
>  #include "net/net.h"
> +#include "net/eth.h"
>  #include "net/checksum.h"
>  #include "net/tap.h"
>  #include "qemu/error-report.h"
>  #include "qemu/timer.h"
> +#include "qemu/sockets.h"
>  #include "hw/virtio/virtio-net.h"
>  #include "net/vhost_net.h"
>  #include "hw/virtio/virtio-bus.h"
> @@ -38,6 +40,35 @@
>  #define endof(container, field) \
>  (offsetof(container, field) + sizeof(((container *)0)->field))
>  
> +#define ETH_HDR_SZ (sizeof(struct eth_header))
> +#define IP4_HDR_SZ (sizeof(struct ip_header))
> +#define TCP_HDR_SZ (sizeof(struct tcp_header))
> +#define ETH_IP4_HDR_SZ (ETH_HDR_SZ + IP4_HDR_SZ)

It's better to open-code these imho.

okay.

> +
> +#define IP4_ADDR_SIZE   8   /* ipv4 saddr + daddr */
> +#define TCP_PORT_SIZE   4   /* sport + dport */
> +
> +/* IPv4 max payload, 16 bits in the header */
> +#define MAX_IP4_PAYLOAD (65535 - IP4_HDR_SZ)
> +#define MAX_TCP_PAYLOAD 65535
> +
> +/* max payload with virtio header */
> +#define MAX_VIRTIO_PAYLOAD  (sizeof(struct virtio_net_hdr_mrg_rxbuf) \
> ++ ETH_HDR_SZ + MAX_TCP_PAYLOAD)
> +
> +#define IP4_HEADER_LEN 5 /* header lenght value in ip header without option 
> */
> +
> +/* Purge coalesced packets timer interval */
> +#define RSC_TIMER_INTERVAL  30

Pls prefix local macros with VIRTIO_NET_

sure.


> +
> +/* Switcher to enable/disable rsc */
> +static bool virtio_net_rsc_bypass = 1;
> +
> +/* This value affects the performance a lot, and should be tuned carefully,
> +   '30'(300us) is the recommended value to pass the

Re: [Qemu-devel] [ Patch 0/2] Support Receive-Segment-Offload(RSC) for WHQL test of Window guest

2016-03-15 Thread Wei Xu



- Original Message -
From: "Michael S. Tsirkin" 
To: w...@redhat.com
Cc: vict...@redhat.com, jasow...@redhat.com, yvuge...@redhat.com, 
qemu-devel@nongnu.org, mar...@redhat.com, dfley...@redhat.com
Sent: Tuesday, March 15, 2016 6:01:12 PM
Subject: Re: [Qemu-devel] [ Patch 0/2] Support Receive-Segment-Offload(RSC) for 
WHQL test of Window guest

On Tue, Mar 15, 2016 at 05:17:02PM +0800, w...@redhat.com wrote:
> From: Wei Xu 
> 
> Fixed issues based on rfc patch v2:
> 1. Removed big param list, replace it with 'NetRscUnit' 
> 2. Different virtio header size
> 3. Modify callback function to direct call.
> 4. Needn't check the failure of g_malloc()
> 5. Other code format adjustment, macro naming, etc 
> 
> This patch is to support WHQL test for Windows guest, while this feature also
> benifits other guest works as a kernel 'gro' like feature with userspace 
> implementation.
> Feature information:
>   http://msdn.microsoft.com/en-us/library/windows/hardware/jj853324
> 
> Both IPv4 and IPv6 are supported, though performance with userspace virtio
> is slow than vhost-net, there is about 1x to 3x performance improvement to
> userspace virtio, this is done by turning this feature on and disable
> 'tso/gso/gro' on corresponding tap interface and guest interface, while get
> less improment with all these feature on.
> 
> Test steps:
> Although this feature is mainly used for window guest, i used linux guest to 
> help test
> the feature, to make things simple, i used 3 steps to test the patch as i 
> moved on.
> 1. With a tcp socket client/server pair running on 2 linux guest, thus i can 
> control
> the traffic and debugging the code as i want.
> 2. Netperf on linux guest test the throughput.
> 3. WHQL test with 2 Windows guests.
> 
> Current status:
> IPv4 pass all the above tests.
> IPv6 just passed test step 1 and 2 as described ahead, the virtio nic cannot
> receive any packet in WHQL test, looks like the test traffic is not sent from
> on the support machine, test device can access both host and another linux
> guest, tried a lot of ways to work it out but failed, maybe debug from windows
> guest driver side can help figuring it out.
> 
> Note:
> A 'MessageDevice' nic chose as 'Realtek' will panic the system sometimes 
> during setup,
> this can be figured out by replacing it with an 'e1000' nic.
> 
> Todo:
> More sanity check and tcp 'ecn' and 'window' scale test.

So at this point this is still an RFC, pls label as such
in the subject.
Also, commit log of each patch should also include info on
how to activate a feature.

OK, thanks mst.

thanks!

> Wei Xu (2):
>   virtio-net rsc: support coalescing ipv4 tcp traffic
>   virtio-net rsc: support coalescing ipv6 tcp traffic
> 
>  hw/net/virtio-net.c| 602 
> -
>  include/hw/virtio/virtio-net.h |   1 +
>  include/hw/virtio/virtio.h |  75 +
>  3 files changed, 677 insertions(+), 1 deletion(-)
> 
> -- 
> 2.5.0

Re: [Qemu-devel] [RFC PATCH v2 1/2] spapr: Add DRC count indexed hotplug identifier type

2016-03-15 Thread David Gibson

On Tue, Mar 15, 2016 at 10:08:55AM +0530, Bharata B Rao wrote:
> Add support for DRC count indexed hotplug ID type which is primarily
> needed for memory hot unplug. This type allows for specifying the
> number of DRs that should be plugged/unplugged starting from a given
> DRC index.
> 
> NOTE: This new hotplug identifier type is not yet part of PAPR.
> 
> Signed-off-by: Bharata B Rao 

Reviewed-by: David Gibson 

Looks correct, but obviously I won't apply until the change reaches
PAPR.


> ---
>  hw/ppc/spapr_events.c  | 57 
> +-
>  include/hw/ppc/spapr.h |  2 ++
>  2 files changed, 45 insertions(+), 14 deletions(-)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index 39f4682..5d1d13d 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -171,6 +171,16 @@ struct epow_log_full {
>  struct rtas_event_log_v6_epow epow;
>  } QEMU_PACKED;
>  
> +union drc_id {
> +uint32_t index;
> +uint32_t count;
> +struct count_index {
> +uint32_t index;
> +uint32_t count;
> +} count_index;
> +char name[1];
> +} QEMU_PACKED;
> +
>  struct rtas_event_log_v6_hp {
>  #define RTAS_LOG_V6_SECTION_ID_HOTPLUG  0x4850 /* HP */
>  struct rtas_event_log_v6_section_header hdr;
> @@ -187,12 +197,9 @@ struct rtas_event_log_v6_hp {
>  #define RTAS_LOG_V6_HP_ID_DRC_NAME   1
>  #define RTAS_LOG_V6_HP_ID_DRC_INDEX  2
>  #define RTAS_LOG_V6_HP_ID_DRC_COUNT  3
> +#define RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED  4
>  uint8_t reserved;
> -union {
> -uint32_t index;
> -uint32_t count;
> -char name[1];
> -} drc;
> +union drc_id drc_id;
>  } QEMU_PACKED;
>  
>  struct hp_log_full {
> @@ -389,7 +396,7 @@ static void spapr_powerdown_req(Notifier *n, void *opaque)
>  
>  static void spapr_hotplug_req_event(uint8_t hp_id, uint8_t hp_action,
>  sPAPRDRConnectorType drc_type,
> -uint32_t drc)
> +union drc_id *drc_id)
>  {
>  sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>  struct hp_log_full *new_hp;
> @@ -446,9 +453,12 @@ static void spapr_hotplug_req_event(uint8_t hp_id, 
> uint8_t hp_action,
>  }
>  
>  if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT) {
> -hp->drc.count = cpu_to_be32(drc);
> +hp->drc_id.count = cpu_to_be32(drc_id->count);
>  } else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_INDEX) {
> -hp->drc.index = cpu_to_be32(drc);
> +hp->drc_id.index = cpu_to_be32(drc_id->index);
> +} else if (hp_id == RTAS_LOG_V6_HP_ID_DRC_COUNT_INDEXED) {
> +hp->drc_id.count_index.count = 
> cpu_to_be32(drc_id->count_index.count);
> +hp->drc_id.count_index.index = 
> cpu_to_be32(drc_id->count_index.index);
>  }
>  
>  rtas_event_log_queue(RTAS_LOG_TYPE_HOTPLUG, new_hp, true);
> @@ -460,34 +470,53 @@ void spapr_hotplug_req_add_by_index(sPAPRDRConnector 
> *drc)
>  {
>  sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>  sPAPRDRConnectorType drc_type = drck->get_type(drc);
> -uint32_t index = drck->get_index(drc);
> +union drc_id drc_id;
> +drc_id.index = drck->get_index(drc);
>  
>  spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
> -RTAS_LOG_V6_HP_ACTION_ADD, drc_type, index);
> +RTAS_LOG_V6_HP_ACTION_ADD, drc_type, _id);
>  }
>  
>  void spapr_hotplug_req_remove_by_index(sPAPRDRConnector *drc)
>  {
>  sPAPRDRConnectorClass *drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
>  sPAPRDRConnectorType drc_type = drck->get_type(drc);
> -uint32_t index = drck->get_index(drc);
> +union drc_id drc_id;
> +drc_id.index = drck->get_index(drc);
>  
>  spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_INDEX,
> -RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, index);
> +RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, _id);
>  }
>  
>  void spapr_hotplug_req_add_by_count(sPAPRDRConnectorType drc_type,
> uint32_t count)
>  {
> +union drc_id drc_id;
> +drc_id.count = count;
> +
>  spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
> -RTAS_LOG_V6_HP_ACTION_ADD, drc_type, count);
> +RTAS_LOG_V6_HP_ACTION_ADD, drc_type, _id);
>  }
>  
>  void spapr_hotplug_req_remove_by_count(sPAPRDRConnectorType drc_type,
>uint32_t count)
>  {
> +union drc_id drc_id;
> +drc_id.count = count;
> +
>  spapr_hotplug_req_event(RTAS_LOG_V6_HP_ID_DRC_COUNT,
> -RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, count);
> +RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type,

Re: [Qemu-devel] [RFC PATCH v2 2/2] spapr: Memory hot-unplug support

2016-03-15 Thread David Gibson

On Tue, Mar 15, 2016 at 10:08:56AM +0530, Bharata B Rao wrote:
> Add support to hot remove pc-dimm memory devices.
> 
> Signed-off-by: Bharata B Rao 

Reviewed-by: David Gibson 

Looks correct, but again, needs to wait on the PAPR change.

Have you thought any further on the idea of sending an index message,
then a count message as an interim approach to fixing this without
requiring a PAPR change?

> ---
>  hw/ppc/spapr.c | 93 
> +-
>  hw/ppc/spapr_drc.c | 18 +++
>  2 files changed, 110 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 43708a2..cdf268a 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2197,6 +2197,88 @@ out:
>  error_propagate(errp, local_err);
>  }
>  
> +typedef struct sPAPRDIMMState {
> +uint32_t nr_lmbs;
> +} sPAPRDIMMState;
> +
> +static void spapr_lmb_release(DeviceState *dev, void *opaque)
> +{
> +sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque;
> +HotplugHandler *hotplug_ctrl = NULL;
> +
> +if (--ds->nr_lmbs) {
> +return;
> +}
> +
> +g_free(ds);
> +
> +/*
> + * Now that all the LMBs have been removed by the guest, call the
> + * pc-dimm unplug handler to cleanup up the pc-dimm device.
> + */
> +hotplug_ctrl = qdev_get_hotplug_handler(dev);
> +hotplug_handler_unplug(hotplug_ctrl, dev, _abort);
> +}
> +
> +static void spapr_del_lmbs(DeviceState *dev, uint64_t addr, uint64_t size,
> +   Error **errp)
> +{
> +sPAPRDRConnector *drc;
> +sPAPRDRConnectorClass *drck;
> +uint32_t nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
> +int i;
> +sPAPRDIMMState *ds = g_malloc0(sizeof(sPAPRDIMMState));
> +uint32_t start_index;
> +
> +ds->nr_lmbs = nr_lmbs;
> +for (i = 0; i < nr_lmbs; i++) {
> +drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_LMB,
> +addr / SPAPR_MEMORY_BLOCK_SIZE);
> +g_assert(drc);
> +
> +drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +drck->detach(drc, dev, spapr_lmb_release, ds, errp);
> +if (!i) {
> +start_index = drck->get_index(drc);
> +}
> +addr += SPAPR_MEMORY_BLOCK_SIZE;
> +}
> +spapr_hotplug_req_remove_by_count_indexed(SPAPR_DR_CONNECTOR_TYPE_LMB,
> +  nr_lmbs, start_index);
> +}
> +
> +static void spapr_memory_unplug(HotplugHandler *hotplug_dev, DeviceState 
> *dev,
> +Error **errp)
> +{
> +sPAPRMachineState *ms = SPAPR_MACHINE(hotplug_dev);
> +PCDIMMDevice *dimm = PC_DIMM(dev);
> +PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> +MemoryRegion *mr = ddc->get_memory_region(dimm);
> +
> +pc_dimm_memory_unplug(dev, >hotplug_memory, mr);
> +object_unparent(OBJECT(dev));
> +}
> +
> +static void spapr_memory_unplug_request(HotplugHandler *hotplug_dev,
> +DeviceState *dev, Error **errp)
> +{
> +Error *local_err = NULL;
> +PCDIMMDevice *dimm = PC_DIMM(dev);
> +PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
> +MemoryRegion *mr = ddc->get_memory_region(dimm);
> +uint64_t size = memory_region_size(mr);
> +uint64_t addr;
> +
> +addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, 
> _err);
> +if (local_err) {
> +goto out;
> +}
> +
> +spapr_del_lmbs(dev, addr, size, _abort);
> +out:
> +error_propagate(errp, local_err);
> +}
> +
>  static void spapr_machine_device_plug(HotplugHandler *hotplug_dev,
>DeviceState *dev, Error **errp)
>  {
> @@ -2244,7 +2326,15 @@ static void spapr_machine_device_unplug(HotplugHandler 
> *hotplug_dev,
>DeviceState *dev, Error **errp)
>  {
>  if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> -error_setg(errp, "Memory hot unplug not supported by sPAPR");
> +spapr_memory_unplug(hotplug_dev, dev, errp);
> +}
> +}
> +
> +static void spapr_machine_device_unplug_request(HotplugHandler *hotplug_dev,
> +DeviceState *dev, Error 
> **errp)
> +{
> +if (object_dynamic_cast(OBJECT(dev), TYPE_PC_DIMM)) {
> +spapr_memory_unplug_request(hotplug_dev, dev, errp);
>  }
>  }
>  
> @@ -2293,6 +2383,7 @@ static void spapr_machine_class_init(ObjectClass *oc, 
> void *data)
>  hc->plug = spapr_machine_device_plug;
>  hc->unplug = spapr_machine_device_unplug;
>  mc->cpu_index_to_socket_id = spapr_cpu_index_to_socket_id;
> +hc->unplug_request = spapr_machine_device_unplug_request;
>  
>  smc->dr_lmb_enabled = true;
>  fwc->get_dev_path = spapr_get_fw_dev_path;
> diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
> index ef063c0..740b9d4 100644
> --- a/hw/ppc/spapr_drc.c
> +++ b/hw/ppc/spapr_drc.c
> @@ -12,6 +12,7 @@

[Qemu-devel] [PATCH v5 1/4] hw/timer: Add ASPEED timer device model

2016-03-15 Thread Andrew Jeffery

Implement basic ASPEED timer functionality for the AST2400 SoC[1]: Up to
8 timers can independently be configured, enabled, reset and disabled.
Some hardware features are not implemented, namely clock value matching
and pulse generation, but the implementation is enough to boot the Linux
kernel configured with aspeed_defconfig.

[1] http://www.aspeedtech.com/products.php?fPath=20=376

Signed-off-by: Andrew Jeffery 
---
Since v4:
  * Fix to compile when using GCC 4.x
  * Drop unnecessary asserts

Since v3:
  * Drop unnecessary mention of VMStateDescription in timer_to_ctrl description
  * Mention hw/timer/a9gtimer.c with respect to clock value matching
  * Add missing VMSTATE_END_OF_LIST() to vmstate_aspeed_timer_state

Since v2:
  * Improve handling of timer configuration with respect to enabled state
  * Remove redundant enabled member from AspeedTimer
  * Implement VMStateDescriptions
  * Fix interrupt behaviour (edge triggered, both edges)
  * Fix various issues with trace-event declarations
  * Include qemu/osdep.h

Since v1:
  * Refactor initialisation of and respect requested clock rates (APB/External)
  * Simplify some index calculations
  * Use tracing infrastructure instead of internal DPRINTF
  * Enforce access size constraints and alignment in MemoryRegionOps

 default-configs/arm-softmmu.mak |   1 +
 hw/timer/Makefile.objs  |   1 +
 hw/timer/aspeed_timer.c | 449 
 include/hw/timer/aspeed_timer.h |  59 ++
 trace-events|   9 +
 5 files changed, 519 insertions(+)
 create mode 100644 hw/timer/aspeed_timer.c
 create mode 100644 include/hw/timer/aspeed_timer.h

diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index a9f82a1..2bcd236 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -110,3 +110,4 @@ CONFIG_IOH3420=y
 CONFIG_I82801B11=y
 CONFIG_ACPI=y
 CONFIG_SMBIOS=y
+CONFIG_ASPEED_SOC=y
diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs
index 5cfea6e..003c14f 100644
--- a/hw/timer/Makefile.objs
+++ b/hw/timer/Makefile.objs
@@ -32,3 +32,4 @@ obj-$(CONFIG_MC146818RTC) += mc146818rtc.o
 obj-$(CONFIG_ALLWINNER_A10_PIT) += allwinner-a10-pit.o
 
 common-obj-$(CONFIG_STM32F2XX_TIMER) += stm32f2xx_timer.o
+common-obj-$(CONFIG_ASPEED_SOC) += aspeed_timer.o
diff --git a/hw/timer/aspeed_timer.c b/hw/timer/aspeed_timer.c
new file mode 100644
index 000..51e8303
--- /dev/null
+++ b/hw/timer/aspeed_timer.c
@@ -0,0 +1,449 @@
+/*
+ * ASPEED AST2400 Timer
+ *
+ * Andrew Jeffery 
+ *
+ * Copyright (C) 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/ptimer.h"
+#include "hw/sysbus.h"
+#include "hw/timer/aspeed_timer.h"
+#include "qemu-common.h"
+#include "qemu/bitops.h"
+#include "qemu/main-loop.h"
+#include "qemu/timer.h"
+#include "trace.h"
+
+#define TIMER_NR_REGS 4
+
+#define TIMER_CTRL_BITS 4
+#define TIMER_CTRL_MASK ((1 << TIMER_CTRL_BITS) - 1)
+
+#define TIMER_CLOCK_USE_EXT true
+#define TIMER_CLOCK_EXT_HZ 100
+#define TIMER_CLOCK_USE_APB false
+#define TIMER_CLOCK_APB_HZ 2400
+
+#define TIMER_REG_STATUS 0
+#define TIMER_REG_RELOAD 1
+#define TIMER_REG_MATCH_FIRST 2
+#define TIMER_REG_MATCH_SECOND 3
+
+#define TIMER_FIRST_CAP_PULSE 4
+
+enum timer_ctrl_op {
+op_enable = 0,
+op_external_clock,
+op_overflow_interrupt,
+op_pulse_enable
+};
+
+/**
+ * Avoid mutual references between AspeedTimerCtrlState and AspeedTimer
+ * structs, as it's a waste of memory. The ptimer BH callback needs to know
+ * whether a specific AspeedTimer is enabled, but this information is held in
+ * AspeedTimerCtrlState. So, provide a helper to hoist ourselves from an
+ * arbitrary AspeedTimer to AspeedTimerCtrlState.
+ */
+static inline AspeedTimerCtrlState *timer_to_ctrl(AspeedTimer *t)
+{
+const AspeedTimer (*timers)[] = (void *)t - (t->id * sizeof(*t));
+return container_of(timers, AspeedTimerCtrlState, timers);
+}
+
+static inline bool timer_ctrl_status(AspeedTimer *t, enum timer_ctrl_op op)
+{
+return !!(timer_to_ctrl(t)->ctrl & BIT(t->id * TIMER_CTRL_BITS + op));
+}
+
+static inline bool timer_enabled(AspeedTimer *t)
+{
+return timer_ctrl_status(t, op_enable);
+}
+
+static inline bool timer_overflow_interrupt(AspeedTimer *t)
+{
+return timer_ctrl_status(t, op_overflow_interrupt);
+}
+
+static inline bool timer_can_pulse(AspeedTimer *t)
+{
+return t->id >= TIMER_FIRST_CAP_PULSE;
+}
+
+static void aspeed_timer_expire(void *opaque)
+{
+AspeedTimer *t = opaque;
+
+/* Only support interrupts on match values of zero for the moment - this is
+ * sufficient to boot an aspeed_defconfig Linux kernel.
+ *
+ * TODO: matching on arbitrary values (see e.g. hw/timer/a9gtimer.c)
+ */
+bool match = !(t->match[0] && t->match[1]);
+bool interrupt =

[Qemu-devel] [PATCH v5 4/4] hw/arm: Add palmetto-bmc machine

2016-03-15 Thread Andrew Jeffery

The new machine is a thin layer over the AST2400 ARM926-based SoC[1].
Between the minimal machine and the current SoC implementation there is
enough functionality to boot an aspeed_defconfig Linux kernel to
userspace. Nothing yet is specific to the Palmetto's BMC (other than
using an AST2400 SoC), but creating specific machine types is preferable
to a generic machine that doesn't match any particular hardware.

[1] http://www.aspeedtech.com/products.php?fPath=20=376

Signed-off-by: Andrew Jeffery 
---
Since v4:
  * Rename to hw/arm/palmetto-bmc.c, update functions, structs and strings

Since v3:
  * Split the machine from the SoC implementation

Since v2:
  * Implement a SOC model to move code out from the machine definition
  * Rework the machine to better use QOM
  * Include qemu/osdep.h
  * Revert back to qemu_log_mask(LOG_UNIMP, ...) in IO handlers

 hw/arm/Makefile.objs  |  2 +-
 hw/arm/palmetto-bmc.c | 65 +++
 2 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 hw/arm/palmetto-bmc.c

diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index f333b7f..954c9fe 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -16,4 +16,4 @@ obj-$(CONFIG_STM32F205_SOC) += stm32f205_soc.o
 obj-$(CONFIG_XLNX_ZYNQMP) += xlnx-zynqmp.o xlnx-ep108.o
 obj-$(CONFIG_FSL_IMX25) += fsl-imx25.o imx25_pdk.o
 obj-$(CONFIG_FSL_IMX31) += fsl-imx31.o kzm.o
-obj-$(CONFIG_ASPEED_SOC) += ast2400.o
+obj-$(CONFIG_ASPEED_SOC) += ast2400.o palmetto-bmc.o
diff --git a/hw/arm/palmetto-bmc.c b/hw/arm/palmetto-bmc.c
new file mode 100644
index 000..55d7419
--- /dev/null
+++ b/hw/arm/palmetto-bmc.c
@@ -0,0 +1,65 @@
+/*
+ * OpenPOWER Palmetto BMC
+ *
+ * Andrew Jeffery 
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "hw/arm/arm.h"
+#include "hw/arm/ast2400.h"
+#include "hw/boards.h"
+
+static struct arm_boot_info palmetto_bmc_binfo = {
+.loader_start = AST2400_SDRAM_BASE,
+.board_id = 0,
+.nb_cpus = 1,
+};
+
+typedef struct PalmettoBMCState {
+AST2400State soc;
+MemoryRegion ram;
+} PalmettoBMCState;
+
+static void palmetto_bmc_init(MachineState *machine)
+{
+PalmettoBMCState *bmc;
+
+bmc = g_new0(PalmettoBMCState, 1);
+object_initialize(>soc, (sizeof(bmc->soc)), TYPE_AST2400);
+object_property_add_child(OBJECT(machine), "soc", OBJECT(>soc),
+  _abort);
+
+memory_region_allocate_system_memory(>ram, NULL, "ram", ram_size);
+memory_region_add_subregion(get_system_memory(), AST2400_SDRAM_BASE,
+>ram);
+object_property_add_const_link(OBJECT(>soc), "ram", OBJECT(>ram),
+   _abort);
+object_property_set_bool(OBJECT(>soc), true, "realized",
+ _abort);
+
+palmetto_bmc_binfo.kernel_filename = machine->kernel_filename;
+palmetto_bmc_binfo.initrd_filename = machine->initrd_filename;
+palmetto_bmc_binfo.kernel_cmdline = machine->kernel_cmdline;
+palmetto_bmc_binfo.ram_size = ram_size;
+arm_load_kernel(ARM_CPU(first_cpu), _bmc_binfo);
+}
+
+static void palmetto_bmc_machine_init(MachineClass *mc)
+{
+mc->desc = "OpenPOWER Palmetto BMC";
+mc->init = palmetto_bmc_init;
+mc->max_cpus = 1;
+mc->no_sdcard = 1;
+mc->no_floppy = 1;
+mc->no_cdrom = 1;
+mc->no_sdcard = 1;
+mc->no_parallel = 1;
+}
+
+DEFINE_MACHINE("palmetto-bmc", palmetto_bmc_machine_init);
-- 
2.5.0

[Qemu-devel] [PATCH v5 0/4] Add ASPEED AST2400 SoC and Palmetto BMC machine

2016-03-15 Thread Andrew Jeffery

This patch series models enough of the ASPEED AST2400 ARM9 SoC[0] to boot an
aspeed_defconfig Linux kernel[1][2]. Specifically, the series implements the
ASPEED timer and VIC devices, integrates them into an AST2400 SoC and exposes
it all through a new palmetto-bmc machine. The device model patches only
partially implement the hardware features of the timer and VIC, again mostly
just enough to boot Linux.

Unfortunately the datasheet describing the devices is not generally available,
but I'll try to add comments to any unclear areas.

The addition of the AST2400 to QEMU is motivated by use of the SoC as a BMC in
OpenPOWER[2][3] machines and the ongoing development of OpenBMC[4]. The
presence of a machine model utilising the AST2400 will help with development
and testing of the OpenBMC stack.

Cheers,

Andrew

[0] http://www.aspeedtech.com/products.php?fPath=20=376
[1] https://github.com/openbmc/linux/tree/dev-4.3
[2] git fetch g...@github.com:openbmc/linux.git dev-4.3
[3] http://openpowerfoundation.org/
[4] https://github.com/open-power/
[5] https://github.com/openbmc/openbmc

Changes since v4:

  * Switch approach to modelling a specific machine type (palmetto-bmc)
rather than something generic that may have no corresponding hardware
implementation
  * Address review comments from Jeremy Kerr, Cédric Le Goater, Dmitry Osipenko

Changes since v3:

  Address comments from Peter Maydell, splitting the machine model from
  hw/arm/ast2400.c into hw/arm/opbmc2400.c amongst other minor cleanups to the
  timer and VIC.

Changes since v2:

  This re-roll is a reasonable rework of the patches in the series, which may
  make it difficult to compare v1 to v2.

  Addressed reviews/comments from:
  * Peter Maydell
  * Alexey Kardashevskiy
  * Joel Stanley

Changes since v1:

  Addressed reviews/comments from:
  * Cédric Le Goater

Andrew Jeffery (4):
  hw/timer: Add ASPEED timer device model
  hw/intc: Add (new) ASPEED VIC device model
  hw/arm: Add ASPEED AST2400 SoC model
  hw/arm: Add palmetto-bmc machine

 default-configs/arm-softmmu.mak |   1 +
 hw/arm/Makefile.objs|   1 +
 hw/arm/ast2400.c| 137 
 hw/arm/palmetto-bmc.c   |  65 ++
 hw/intc/Makefile.objs   |   1 +
 hw/intc/aspeed_vic.c| 339 ++
 hw/timer/Makefile.objs  |   1 +
 hw/timer/aspeed_timer.c | 449 
 include/hw/arm/ast2400.h|  35 
 include/hw/intc/aspeed_vic.h|  48 +
 include/hw/timer/aspeed_timer.h |  59 ++
 trace-events|  16 ++
 12 files changed, 1152 insertions(+)
 create mode 100644 hw/arm/ast2400.c
 create mode 100644 hw/arm/palmetto-bmc.c
 create mode 100644 hw/intc/aspeed_vic.c
 create mode 100644 hw/timer/aspeed_timer.c
 create mode 100644 include/hw/arm/ast2400.h
 create mode 100644 include/hw/intc/aspeed_vic.h
 create mode 100644 include/hw/timer/aspeed_timer.h

-- 
2.5.0

[Qemu-devel] [PATCH v5 3/4] hw/arm: Add ASPEED AST2400 SoC model

2016-03-15 Thread Andrew Jeffery

While the ASPEED AST2400 SoC[1] has a broad range of capabilities this
implementation is minimal, comprising an ARM926 processor, ASPEED VIC
and timer devices, and a 8250 UART.

[1] http://www.aspeedtech.com/products.php?fPath=20=376

Signed-off-by: Andrew Jeffery 
---
Since v3:

  * Split the SoC from the machine implementation

Since v2:
  * Implement a SOC model to move code out from the machine definition
  * Rework the machine to better use QOM
  * Include qemu/osdep.h
  * Revert back to qemu_log_mask(LOG_UNIMP, ...) in IO handlers

 hw/arm/Makefile.objs |   1 +
 hw/arm/ast2400.c | 137 +++
 include/hw/arm/ast2400.h |  35 
 3 files changed, 173 insertions(+)
 create mode 100644 hw/arm/ast2400.c
 create mode 100644 include/hw/arm/ast2400.h

diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index a711e4d..f333b7f 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -16,3 +16,4 @@ obj-$(CONFIG_STM32F205_SOC) += stm32f205_soc.o
 obj-$(CONFIG_XLNX_ZYNQMP) += xlnx-zynqmp.o xlnx-ep108.o
 obj-$(CONFIG_FSL_IMX25) += fsl-imx25.o imx25_pdk.o
 obj-$(CONFIG_FSL_IMX31) += fsl-imx31.o kzm.o
+obj-$(CONFIG_ASPEED_SOC) += ast2400.o
diff --git a/hw/arm/ast2400.c b/hw/arm/ast2400.c
new file mode 100644
index 000..daa5518
--- /dev/null
+++ b/hw/arm/ast2400.c
@@ -0,0 +1,137 @@
+/*
+ * AST2400 SoC
+ *
+ * Andrew Jeffery 
+ * Jeremy Kerr 
+ *
+ * Copyright 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "exec/address-spaces.h"
+#include "hw/arm/ast2400.h"
+#include "hw/char/serial.h"
+
+#define AST2400_UART_5_BASE  0x00184000
+#define AST2400_IOMEM_SIZE   0x0020
+#define AST2400_IOMEM_BASE   0x1E60
+#define AST2400_VIC_BASE 0x1E6C
+#define AST2400_TIMER_BASE   0x1E782000
+
+static const int uart_irqs[] = { 9, 32, 33, 34, 10 };
+static const int timer_irqs[] = { 16, 17, 18, 35, 36, 37, 38, 39, };
+
+/*
+ * IO handlers: simply catch any reads/writes to IO addresses that aren't
+ * handled by a device mapping.
+ */
+
+static uint64_t ast2400_io_read(void *p, hwaddr offset, unsigned size)
+{
+qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " [%u]\n",
+  __func__, offset, size);
+return 0;
+}
+
+static void ast2400_io_write(void *opaque, hwaddr offset, uint64_t value,
+unsigned size)
+{
+qemu_log_mask(LOG_UNIMP, "%s: 0x%" HWADDR_PRIx " <- 0x%" PRIx64 " [%u]\n",
+  __func__, offset, value, size);
+}
+
+static const MemoryRegionOps ast2400_io_ops = {
+.read = ast2400_io_read,
+.write = ast2400_io_write,
+.endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void ast2400_init(Object *obj)
+{
+AST2400State *s = AST2400(obj);
+
+s->cpu = cpu_arm_init("arm926");
+
+object_initialize(>vic, sizeof(s->vic), TYPE_ASPEED_VIC);
+object_property_add_child(obj, "vic", OBJECT(>vic), NULL);
+qdev_set_parent_bus(DEVICE(>vic), sysbus_get_default());
+
+object_initialize(>timerctrl, sizeof(s->timerctrl), TYPE_ASPEED_TIMER);
+object_property_add_child(obj, "timerctrl", OBJECT(>timerctrl), NULL);
+qdev_set_parent_bus(DEVICE(>timerctrl), sysbus_get_default());
+}
+
+static void ast2400_realize(DeviceState *dev, Error **errp)
+{
+int i;
+AST2400State *s = AST2400(dev);
+Error *err = NULL;
+
+/* IO space */
+memory_region_init_io(>iomem, NULL, _io_ops, NULL,
+"ast2400.io", AST2400_IOMEM_SIZE);
+memory_region_add_subregion_overlap(get_system_memory(), 
AST2400_IOMEM_BASE,
+>iomem, -1);
+
+/* VIC */
+object_property_set_bool(OBJECT(>vic), true, "realized", );
+if (err) {
+error_propagate(errp, err);
+return;
+}
+sysbus_mmio_map(SYS_BUS_DEVICE(>vic), 0, AST2400_VIC_BASE);
+sysbus_connect_irq(SYS_BUS_DEVICE(>vic), 0,
+   qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_IRQ));
+sysbus_connect_irq(SYS_BUS_DEVICE(>vic), 1,
+   qdev_get_gpio_in(DEVICE(s->cpu), ARM_CPU_FIQ));
+
+/* Timer */
+object_property_set_bool(OBJECT(>timerctrl), true, "realized", );
+if (err) {
+error_propagate(errp, err);
+return;
+}
+sysbus_mmio_map(SYS_BUS_DEVICE(>timerctrl), 0, AST2400_TIMER_BASE);
+for (i = 0; i < ARRAY_SIZE(timer_irqs); i++) {
+qemu_irq irq = qdev_get_gpio_in(DEVICE(>vic), timer_irqs[i]);
+sysbus_connect_irq(SYS_BUS_DEVICE(>timerctrl), i, irq);
+}
+
+/* UART - attach an 8250 to the IO space as our UART5 */
+if (serial_hds[0]) {
+qemu_irq uart5 = qdev_get_gpio_in(DEVICE(>vic), uart_irqs[4]);
+serial_mm_init(>iomem, AST2400_UART_5_BASE, 2,
+   uart5, 38400, serial_hds[0], DEVICE_LITTLE_ENDIAN);
+}
+}
+
+static void ast2400_class_init(ObjectClass

[Qemu-devel] [PATCH v5 2/4] hw/intc: Add (new) ASPEED VIC device model

2016-03-15 Thread Andrew Jeffery

Implement a basic ASPEED VIC device model for the AST2400 SoC[1], with
enough functionality to boot an aspeed_defconfig Linux kernel. The model
implements the 'new' (revised) register set: While the hardware exposes
both the new and legacy register sets, accesses to the model's legacy
register set will not be serviced (however the access will be logged).

[1] http://www.aspeedtech.com/products.php?fPath=20=376

Signed-off-by: Andrew Jeffery 
---
Since v3:
  * Switch from g_assert() to qemu_log_mask(LOG_GUEST_ERROR, ...) in guest path

Since v2:
  * Implement all supported interrupt types and configurations
  * Implement a VMStateDescription
  * Log accesses to legacy IO space
  * Add documentation on some implementation and hardware details
  * Switch to extract64/deposit64 where possible
  * Drop int_ prefix from some struct member names
  * Fix various issues with trace-event declarations
  * Include qemu/osdep.h

 hw/intc/Makefile.objs|   1 +
 hw/intc/aspeed_vic.c | 339 +++
 include/hw/intc/aspeed_vic.h |  48 ++
 trace-events |   7 +
 4 files changed, 395 insertions(+)
 create mode 100644 hw/intc/aspeed_vic.c
 create mode 100644 include/hw/intc/aspeed_vic.h

diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 6a13a39..0e47f0f 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -31,3 +31,4 @@ obj-$(CONFIG_XICS_KVM) += xics_kvm.o
 obj-$(CONFIG_ALLWINNER_A10_PIC) += allwinner-a10-pic.o
 obj-$(CONFIG_S390_FLIC) += s390_flic.o
 obj-$(CONFIG_S390_FLIC_KVM) += s390_flic_kvm.o
+obj-$(CONFIG_ASPEED_SOC) += aspeed_vic.o
diff --git a/hw/intc/aspeed_vic.c b/hw/intc/aspeed_vic.c
new file mode 100644
index 000..19a0ff7
--- /dev/null
+++ b/hw/intc/aspeed_vic.c
@@ -0,0 +1,339 @@
+/*
+ * ASPEED Interrupt Controller (New)
+ *
+ * Andrew Jeffery 
+ *
+ * Copyright 2015, 2016 IBM Corp.
+ *
+ * This code is licensed under the GPL version 2 or later.  See
+ * the COPYING file in the top-level directory.
+ */
+
+/* The hardware exposes two register sets, a legacy set and a 'new' set. The
+ * model implements the 'new' register set, and logs warnings on accesses to
+ * the legacy IO space.
+ *
+ * The hardware uses 32bit registers to manage 51 IRQs, with low and high
+ * registers for each conceptual register. The device model's implementation
+ * uses 64bit data types to store both low and high register values (in the one
+ * member), but must cope with access offset values in multiples of 4 passed to
+ * the callbacks. As such the read() and write() implementations process the
+ * provided offset to understand whether the access is requesting the lower or
+ * upper 32 bits of the 64bit member.
+ *
+ * Additionally, the "Interrupt Enable", "Edge Status" and "Software Interrupt"
+ * fields have separate "enable"/"status" and "clear" registers, where set bits
+ * are written to one or the other to change state (avoiding a
+ * read-modify-write sequence).
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include "hw/intc/aspeed_vic.h"
+#include "qemu/bitops.h"
+#include "trace.h"
+
+#define AVIC_NEW_BASE_OFFSET 0x80
+
+#define AVIC_L_MASK 0xU
+#define AVIC_H_MASK 0x0007U
+#define AVIC_EVENT_W_MASK (0x78000ULL << 32)
+
+static void aspeed_vic_update(AspeedVICState *s)
+{
+uint64_t new = (s->raw & s->enable);
+uint64_t flags;
+
+flags = new & s->select;
+trace_aspeed_vic_update_fiq(!!flags);
+qemu_set_irq(s->fiq, !!flags);
+
+flags = new & ~s->select;
+trace_aspeed_vic_update_irq(!!flags);
+qemu_set_irq(s->irq, !!flags);
+}
+
+static void aspeed_vic_set_irq(void *opaque, int irq, int level)
+{
+uint64_t irq_mask;
+bool raise;
+AspeedVICState *s = (AspeedVICState *)opaque;
+
+if (irq > ASPEED_VIC_NR_IRQS) {
+qemu_log_mask(LOG_GUEST_ERROR, "%s: Invalid interrupt number: %d\n",
+  __func__, irq);
+return;
+}
+
+trace_aspeed_vic_set_irq(irq, level);
+
+irq_mask = BIT(irq);
+if (s->sense & irq_mask) {
+/* level-triggered */
+if (s->event & irq_mask) {
+/* high-sensitive */
+raise = level;
+} else {
+/* low-sensitive */
+raise = !level;
+}
+s->raw = deposit64(s->raw, irq, 1, raise);
+} else {
+uint64_t old_level = s->level & irq_mask;
+
+/* edge-triggered */
+if (s->dual_edge & irq_mask) {
+raise = (!!old_level) != (!!level);
+} else {
+if (s->event & irq_mask) {
+/* rising-sensitive */
+raise = !old_level && level;
+} else {
+/* falling-sensitive */
+raise = old_level && !level;
+}
+}
+if (raise) {
+s->raw = deposit64(s->raw, irq, 1, raise);
+}
+}
+s->level = deposit64(s->level, irq, 1, level);
+

Re: [Qemu-devel] [PATCH qemu] spapr/target-ppc/kvm: Only add hcall-instructions if KVM supports it

2016-03-15 Thread Alexey Kardashevskiy


On 03/15/2016 10:32 PM, Thomas Huth wrote:

On 15.03.2016 10:42, Alexey Kardashevskiy wrote:

On 03/15/2016 07:18 PM, Thomas Huth wrote:


   Hi Alexey,

On 15.03.2016 06:51, Alexey Kardashevskiy wrote:

ePAPR defines "hcall-instructions" device-tree property which contains
code to call hypercalls in ePAPR paravirtualized guests. However this
property is also present for pseries guests where it does not make
sense,
even though it contains dummy code which simply fails.

Instead of maintaining the property (which used to be BE only; then was
fixed to be endian-agnostic) and confusing the guest (which might think
there is ePAPR host while there is none), this simply does not
the property to the device tree if the host kernel does not implement
it.

In order to tell the machine code if the host kernel supports
KVM_CAP_PPC_GET_PVINFO, this changes kvmppc_get_hypercall() to return 1
if the host kernel does not implement it (which is HV KVM case).

Signed-off-by: Alexey Kardashevskiy 
---


Alexander,

We just got a bug report that LE guests would not boot under quite
old QEMU
and we (powerkvm) wonder if it makes sense to backport endian-agnostic
hypercall code to older QEMU or it is simpler/more correct
not to have epapr-hypercall property in the tree.


---
   hw/ppc/spapr.c   | 9 +
   target-ppc/kvm.c | 2 +-
   2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 43708a2..8130eb4 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -497,10 +497,11 @@ static void *spapr_create_fdt_skel(hwaddr
initrd_base,
* Older KVM versions with older guest kernels were
broken with the
* magic page, don't allow the guest to map it.
*/
-kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
- sizeof(hypercall));
-_FDT((fdt_property(fdt, "hcall-instructions", hypercall,
-  sizeof(hypercall;
+if (!kvmppc_get_hypercall(first_cpu->env_ptr, hypercall,
+  sizeof(hypercall))) {
+_FDT((fdt_property(fdt, "hcall-instructions",
hypercall,
+   sizeof(hypercall;
+}
   }
   _FDT((fdt_end_node(fdt)));
   }
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 776336b..e5183db 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -2001,7 +2001,7 @@ int kvmppc_get_hypercall(CPUPPCState *env,
uint8_t *buf, int buf_len)
   hc[2] = cpu_to_be32(0x4808);
   hc[3] = cpu_to_be32(bswap32(0x3860));

-return 0;
+return 1;
   }

   static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)


Sorry, I have a hard time to understand what this is really good for. Is
it a patch for current QEMU or for older ones? If it is for older ones,
then why did you not CC: to qemu-stable?
If it is for current QEMU, then I've got some more questions about
things I do not understand:

1) In your patch description, you talk about ePAPR and that the property
does not make sense for pseries. But why is this code then available at
all in spapr.c? ... there must be a reason for this, I think (like using
a different h-call on nested KVM-PR for example?)



No, this is from old times when there was only PR KVM fully emulating
powermac (not pseries) which needed to interact with the hypervisor and
epapr_hypercall was chosen for this.



2) The code in spapr.c is already protected with a
if (kvmppc_has_cap_fixup_hcalls()) ...
and that CAP should only be there if the PVINFO CAP is available, too.
So I don't see how you could run into that problem anyway where PVINFO
is _not_ available but the FIXUP_HCALL CAP _is_ available?



HV KVM guest calls (on pseries machine as well):

kvm_guest_init
kvm_para_has_feature
kvm_arch_para_features
kvm_para_available - this returns "1"
epapr_hypercall0_1(KVM_HC_FEATURES)

This epapr_hypercall0_1() calls a binary blob from "hcall-instructions".
And fails if the guest is LE and the blob from BE-only times.


What about that "if (kvmppc_has_cap_fixup_hcalls())" ? Could you please
check why this succeeds on your system , but the KVM_CAP_PPC_GET_PVINFO
call does not?


KVM_CAP_PPC_FIXUP_HCALL is always enabled for CONFIG_PPC_BOOK3S_64, 
KVM_CAP_PPC_GET_PVINFO is only enabled for "!hv_enabled".




--
Alexey

Re: [Qemu-devel] [PATCH 0/9] some QOM'ify work under hw/arm

2016-03-15 Thread zxq_yx_007


At 2016-03-07 15:05:41, "xiaoqiang zhao"  wrote:
>This patch set trying to QOM'ify code under hw/arm directory.
>As previous patches to hw/timer/*, we use instance_init instead of
>the SysBus's init function. 
>
>
>xiaoqiang zhao (9):
>  hw/arm: QOM'ify armv7m.c
>  hw/arm: QOM'ify highbank.c
>  hw/arm: QOM'ify integratorcp.c
>  hw/arm: QOM'ify pxa2xx.c
>  hw/arm: QOM'ify pxa2xx_pic.c
>  hw/arm: QOM'ify spitz.c
>  hw/arm: QOM'ify stellaris.c
>  hw/arm: QOM'ify strongarm.c
>  hw/arm: QOM'ify versatilepb.c
>
> hw/arm/armv7m.c   | 11 -
> hw/arm/highbank.c | 12 --
> hw/arm/integratorcp.c | 32 ++---
> hw/arm/pxa2xx.c   | 26 +---
> hw/arm/pxa2xx_pic.c   |  7 --
> hw/arm/spitz.c| 23 +++---
> hw/arm/stellaris.c| 48 ++---
> hw/arm/strongarm.c| 66 ++-
> hw/arm/versatilepb.c  | 13 +-
> 9 files changed, 100 insertions(+), 138 deletions(-)
>
>-- 
>2.1.4

>
ping !

Re: [Qemu-devel] [PATCH v5 0/8] QOM'ify hw/timer/*

2016-03-15 Thread hitmoon




在 2016年03月03日 14:31, hitmoon 写道:

On 25 February 2016 at 10:30, xiaoqiang zhao  wrote:

This patch series QOM'ify timer code under hw/timer directory.
Main idea is to split the initfn's work, some to TypeInfo.instance_init
and some is placed in DeviceClass::realize.
Drop the use of SysBusDeviceClass::init if possible.

Patch 3,4 (m48t59) has been tested in a sparc vm with debian linux guest
and savevm/loadvm looks fine.
Comments from the relevant maintainers are needed!

ping ...
http://lists.nongnu.org/archive/html/qemu-devel/2016-02/msg05859.html

ping! Any comment guys ?

Re: [Qemu-devel] [PATCH] sd: Fix "info qtree" on boards with SD cards

2016-03-15 Thread hitmoon




在 2016年03月16日 00:56, Peter Maydell 写道:

The SD card object is not a SysBusDevice, so don't create it with
qdev_create() if we're not assigning it to a specific bus; use
object_new() instead.

This was causing 'info qtree' to segfault on boards with SD cards,
because qdev_create(NULL, TYPE_FOO) puts the created object on the
system bus, and then we may try to run functions like sysbus_dev_print()
on it, which fail when casting the object to SysBusDevice.

(This is the same mistake that we made with the NAND device
and fixed in commit 6749695eaaf346c1.)

Reported-by: hitmoon 
Signed-off-by: Peter Maydell 
---
I assume that using qdev_create() for non-SysBus devices is
OK if we are passing in a specific bus pointer, because we do
this already for various things including PCI devices. The
various "properly QOMified" uses of TYPE_SD_CARD do that; only
this sd_init() function for the legacy uses doesn't.
---
  hw/sd/sd.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/sd/sd.c b/hw/sd/sd.c
index 00c320d..1568057 100644
--- a/hw/sd/sd.c
+++ b/hw/sd/sd.c
@@ -563,17 +563,19 @@ static const VMStateDescription sd_vmstate = {
  /* Legacy initialization function for use by non-qdevified callers */
  SDState *sd_init(BlockBackend *blk, bool is_spi)
  {
+Object *obj;
  DeviceState *dev;
  Error *err = NULL;
  
-dev = qdev_create(NULL, TYPE_SD_CARD);

+obj = object_new(TYPE_SD_CARD);
+dev = DEVICE(obj);
  qdev_prop_set_drive(dev, "drive", blk, );
  if (err) {
  error_report("sd_init failed: %s", error_get_pretty(err));
  return NULL;
  }
  qdev_prop_set_bit(dev, "spi", is_spi);
-object_property_set_bool(OBJECT(dev), true, "realized", );
+object_property_set_bool(obj, true, "realized", );
  if (err) {
  error_report("sd_init failed: %s", error_get_pretty(err));
  return NULL;


Nice patch !

Reviewed-by: xiaoqiang.zhao

Re: [Qemu-devel] [Qemu-arm] [PATCH] sd: Fix "info qtree" on boards with SD cards

2016-03-15 Thread Thomas Hanson

The patch looks good.

Would it also be good to update bus_add_child() so that it NULL-checks
its "bus" parameter before dereferencing it?

-Tom

On 15 March 2016 at 10:56, Peter Maydell  wrote:
> The SD card object is not a SysBusDevice, so don't create it with
> qdev_create() if we're not assigning it to a specific bus; use
> object_new() instead.
>
> This was causing 'info qtree' to segfault on boards with SD cards,
> because qdev_create(NULL, TYPE_FOO) puts the created object on the
> system bus, and then we may try to run functions like sysbus_dev_print()
> on it, which fail when casting the object to SysBusDevice.
>
> (This is the same mistake that we made with the NAND device
> and fixed in commit 6749695eaaf346c1.)
>
> Reported-by: hitmoon 
> Signed-off-by: Peter Maydell 
> ---
> I assume that using qdev_create() for non-SysBus devices is
> OK if we are passing in a specific bus pointer, because we do
> this already for various things including PCI devices. The
> various "properly QOMified" uses of TYPE_SD_CARD do that; only
> this sd_init() function for the legacy uses doesn't.
> ---
>  hw/sd/sd.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/hw/sd/sd.c b/hw/sd/sd.c
> index 00c320d..1568057 100644
> --- a/hw/sd/sd.c
> +++ b/hw/sd/sd.c
> @@ -563,17 +563,19 @@ static const VMStateDescription sd_vmstate = {
>  /* Legacy initialization function for use by non-qdevified callers */
>  SDState *sd_init(BlockBackend *blk, bool is_spi)
>  {
> +Object *obj;
>  DeviceState *dev;
>  Error *err = NULL;
>
> -dev = qdev_create(NULL, TYPE_SD_CARD);
> +obj = object_new(TYPE_SD_CARD);
> +dev = DEVICE(obj);
>  qdev_prop_set_drive(dev, "drive", blk, );
>  if (err) {
>  error_report("sd_init failed: %s", error_get_pretty(err));
>  return NULL;
>  }
>  qdev_prop_set_bit(dev, "spi", is_spi);
> -object_property_set_bool(OBJECT(dev), true, "realized", );
> +object_property_set_bool(obj, true, "realized", );
>  if (err) {
>  error_report("sd_init failed: %s", error_get_pretty(err));
>  return NULL;
> --
> 1.9.1
>
>

Re: [Qemu-devel] [PATCH v12 2/3] quorum: implement bdrv_add_child() and bdrv_del_child()

2016-03-15 Thread Wen Congyang

On 03/11/2016 08:21 PM, Alberto Garcia wrote:
> On Thu 10 Mar 2016 03:49:40 AM CET, Changlong Xie wrote:
>> @@ -81,6 +82,8 @@ typedef struct BDRVQuorumState {
>>  bool rewrite_corrupted;/* true if the driver must rewrite-on-read 
>> corrupted
>>  * block if Quorum is reached.
>>  */
>> +unsigned long *index_bitmap;
>> +int bsize;
>   [...]
>> +static int get_new_child_index(BDRVQuorumState *s)
>   [...]
>> +static void remove_child_index(BDRVQuorumState *s, int index)
>   [...]
> 
> Sorry if I missed a previous discussion, but why is this necessary?

Hi, Alberto Garcia

Do you have any comments about this patch or give a R-B?

Thanks
Wen Congyang

> 
> Berto
> 
> 
> .
>

Re: [Qemu-devel] [PATCH 15/17] ppc: Add dummy POWER8 MPPR register

2016-03-15 Thread David Gibson

On Mon, Mar 14, 2016 at 05:56:38PM +0100, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt 
> 
> Controls the micropartition prefetch, this is pretty much meaningless
> in full emulation (used for priming the caches on real HW).
> 
> Signed-off-by: Benjamin Herrenschmidt 

So, this is readable with HV=0, so technically a fix even for non-HV
machines.  I'm guessing it's not actually read in practice outside the
HV, though.  Not sure if this should go in 2.6 or 2.7.

> ---
>  target-ppc/cpu.h|  1 +
>  target-ppc/translate_init.c | 13 +
>  2 files changed, 14 insertions(+)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 81a3e6b5ed29..5203cc6a3bfb 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1398,6 +1398,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool 
> ifetch)
>  #define SPR_DHDES (0x0B1)
>  #define SPR_DPDES (0x0B0)
>  #define SPR_DAWR  (0x0B4)
> +#define SPR_MPPR  (0x0B8)
>  #define SPR_RPR   (0x0BA)
>  #define SPR_DAWRX (0x0BC)
>  #define SPR_HFSCR (0x0BE)
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 28a9c2e73156..cfb1bc088950 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8161,6 +8161,18 @@ static void gen_spr_power8_ic(CPUPPCState *env)
>  #endif
>  }
>  
> +static void gen_spr_power8_book4(CPUPPCState *env)
> +{
> +/* Add a number of P8 book4 registers */
> +#if !defined(CONFIG_USER_ONLY)
> +spr_register_hv(env, SPR_MPPR, "MPPR",
> +SPR_NOACCESS, SPR_NOACCESS,
> +_read_generic, SPR_NOACCESS,
> +_read_generic, _write_generic,
> +0);
> +#endif
> +}
> +
>  static void init_proc_book3s_64(CPUPPCState *env, int version)
>  {
>  gen_spr_ne_601(env);
> @@ -8216,6 +8228,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int 
> version)
>  gen_spr_power8_rpr(env);
>  gen_spr_power8_dbell(env);
>  gen_spr_power8_ic(env);
> +gen_spr_power8_book4(env);
>  }
>  if (version < BOOK3S_CPU_POWER8) {
>  gen_spr_book3s_dbg(env);

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [RFC qemu 0/4] A PV solution for live migration optimization

2016-03-15 Thread Li, Liang Z

> > > > > >   I'm just catching back up on this thread; so without
> > > > > > reference to any particular previous mail in the thread.
> > > > > >
> > > > > >   1) How many of the free pages do we tell the host about?
> > > > > >  Your main change is telling the host about all the
> > > > > >  free pages.
> > > > >
> > > > > Yes, all the guest's free pages.
> > > > >
> > > > > >  If we tell the host about all the free pages, then we might
> > > > > >  end up needing to allocate more pages and update the host
> > > > > >  with pages we now want to use; that would have to wait for the
> > > > > >  host to acknowledge that use of these pages, since if we don't
> > > > > >  wait for it then it might have skipped migrating a page we
> > > > > >  just started using (I don't understand how your series solves 
> > > > > > that).
> > > > > >  So the guest probably needs to keep some free pages - how
> many?
> > > > >
> > > > > Actually, there is no need to care about whether the free pages
> > > > > will be
> > > used by the host.
> > > > > We only care about some of the free pages we get reused by the
> > > > > guest,
> > > right?
> > > > >
> > > > > The dirty page logging can be used to solve this, starting the
> > > > > dirty page logging before getting the free pages informant from guest.
> > > > > Even some of the free pages are modified by the guest during the
> > > > > process of getting the free pages information, these modified
> > > > > pages will
> > > be traced by the dirty page logging mechanism. So in the following
> > > migration_bitmap_sync() function.
> > > > > The pages in the free pages bitmap, but latter was modified,
> > > > > will be reset to dirty. We won't omit any dirtied pages.
> > > > >
> > > > > So, guest doesn't need to keep any free pages.
> > > >
> > > > OK, yes, that works; so we do:
> > > >   * enable dirty logging
> > > >   * ask guest for free pages
> > > >   * initialise the migration bitmap as everything-free
> > > >   * then later we do the normal sync-dirty bitmap stuff and it all just
> works.
> > > >
> > > > That's nice and simple.
> > >
> > > This works once, sure. But there's an issue is that you have to
> > > defer migration until you get the free page list, and this only
> > > works once. So you end up with heuristics about how long to wait.
> > >
> > > Instead I propose:
> > >
> > > - mark all pages dirty as we do now.
> > >
> > > - at start of migration, start tracking dirty
> > >   pages in kvm, and tell guest to start tracking free pages
> > >
> > > we can now introduce any kind of delay, for example wait for ack
> > > from guest, or do whatever else, or even just start migrating pages
> > >
> > > - repeatedly:
> > >   - get list of free pages from guest
> > >   - clear them in migration bitmap
> > >   - get dirty list from kvm
> > >
> > > - at end of migration, stop tracking writes in kvm,
> > >   and tell guest to stop tracking free pages
> >
> > I had thought of filtering out the free pages in each migration bitmap
> synchronization.
> > The advantage is we can skip process as many free pages as possible. Not
> just once.
> > The disadvantage is that we should change the current memory
> > management code to track the free pages, instead of traversing the free
> page list to construct the free pages bitmap, to reduce the overhead to get
> the free pages bitmap.
> > I am not sure the if the Kernel people would like it.
> >
> > If keeping the traversing mechanism, because of the overhead, maybe it's
> not worth to filter out the free pages repeatedly.
> 
> Well, Michael's idea of not waiting for the dirty bitmap to be filled does 
> make
> that idea of constnatly using the free-bitmap better.
> 

No wait is a good idea.
Actually, we could shorten the waiting time by pre allocating the free pages 
bit map
and update it when guest allocating/freeing pages. it requires to modify the mm 
related code. I don't know whether the kernel people like this.

> In that case, is it easier if something (guest/host?) allocates some memory in
> the guests physical RAM space and just points the host to it, rather than
> having an explicit 'send'.
> 

Good idea too.

Liang
> Dave

Re: [Qemu-devel] [PATCH 03/17] ppc: Add a bunch of hypervisor SPRs to Book3s

2016-03-15 Thread David Gibson

On Tue, Mar 15, 2016 at 11:49:31AM +0100, Thomas Huth wrote:
> On 15.03.2016 10:43, David Gibson wrote:
> > 
> > On Mon, Mar 14, 2016 at 08:14:59PM +0100, Thomas Huth wrote:
> >> On 14.03.2016 17:56, Cédric Le Goater wrote:
> >>> From: Benjamin Herrenschmidt 
> >>>
> >>> We don't give them a KVM reg number to most of the registers yet as no
> >>> current KVM version supports HV mode. For DAWR and DAWRX, the KVM reg
> >>> number is needed since this register can be set by the guest via the
> >>> H_SET_MODE hypercall.
> >>>
> >>> Signed-off-by: Benjamin Herrenschmidt 
> >>> [clg: squashed in patch 'ppc: Add KVM numbers to some P8 SPRs' and
> >>>   changed the commit log with a proposal of Thomas Huth ]
> >>> Signed-off-by: Cédric Le Goater 
> >>> ---
> >>>  target-ppc/translate_init.c | 140 
> >>> +++-
> >>>  1 file changed, 137 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> >>> index 6a11b41206e5..43c6e524a6bc 100644
> >>> --- a/target-ppc/translate_init.c
> >>> +++ b/target-ppc/translate_init.c
> >>> @@ -1105,6 +1105,11 @@ static void gen_spr_amr (CPUPPCState *env)
> >>>   SPR_NOACCESS, SPR_NOACCESS,
> >>>   _read_generic, _write_generic,
> >>>   KVM_REG_PPC_UAMOR, 0);
> >>> +spr_register_hv(env, SPR_AMOR, "AMOR",
> >>> +SPR_NOACCESS, SPR_NOACCESS,
> >>> +SPR_NOACCESS, SPR_NOACCESS,
> >>> +_read_generic, _write_generic,
> >>> +0);
> >>>  #endif /* !CONFIG_USER_ONLY */
> >>>  }
> >>>  #endif /* TARGET_PPC64 */
> >>> @@ -7491,6 +7496,20 @@ static void gen_spr_book3s_dbg(CPUPPCState *env)
> >>>   KVM_REG_PPC_DABRX, 0x);
> >>>  }
> >>>  
> >>> +static void gen_spr_book3s_207_dbg(CPUPPCState *env)
> >>> +{
> >>> +spr_register_kvm_hv(env, SPR_DAWR, "DAWR",
> >>> +SPR_NOACCESS, SPR_NOACCESS,
> >>> +SPR_NOACCESS, SPR_NOACCESS,
> >>> +_read_generic, _write_generic,
> >>> +KVM_REG_PPC_DAWR, 0x);
> >>> +spr_register_kvm_hv(env, SPR_DAWRX, "DAWRX",
> >>> +SPR_NOACCESS, SPR_NOACCESS,
> >>> +SPR_NOACCESS, SPR_NOACCESS,
> >>> +_read_generic, _write_generic,
> >>> +KVM_REG_PPC_DAWRX, 0x);
> >>> +}
> >>> +
> >>>  static void gen_spr_970_dbg(CPUPPCState *env)
> >>>  {
> >>>  /* Breakpoints */
> >>> @@ -7683,15 +7702,116 @@ static void gen_spr_power5p_lpar(CPUPPCState 
> >>> *env)
> >>>  spr_register_kvm(env, SPR_LPCR, "LPCR",
> >>>   SPR_NOACCESS, SPR_NOACCESS,
> >>>   _read_generic, _write_generic,
> >>> - KVM_REG_PPC_LPCR, 0x);
> >>> + KVM_REG_PPC_LPCR, LPCR_LPES0 | LPCR_LPES1);
> >>
> >> Could we please postpone that hunk to a later, separate patch (after
> >> QEMU 2.6 has been released)? It looks like it could maybe cause some
> >> trouble with some emulated boards (e.g. there is some code in
> >> target-ppc/excp_helper.c for example - which is currently disabled, but
> >> I'm not sure whether there are other spots like this somewhere else).
> > 
> > I think this whole patch needs to wait until after 2.6, I'm not seeing
> > a good rationale for squeezing it into 2.6 at this stage.
> 
> Well, this patch registers DAWR and DAWRX registers with KVM - so
> without this patch, the hardware breakpoints will be lost during
> migration. I haven't tested it, but I think that when somebody uses
> hardware breakpoints in gdb in a KVM guest, and migrates it, then the
> breakpoints won't be triggered anymore after migration without this patch.

Ah.. good point.  So the question becomes, which is lower risk:
adjusting the patches to just add DAWR without the HV SPR stuff, or
just incorporating the HV SPR stuff as is.

> Cédric, maybe you could send a patch that adds at least the DAWR and
> DAWRX registers if David does not want to have the full patch for 2.6?
> 
>  Thomas
> 
> 



-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 16/17] ppc: Add dummy CIABR SPR

2016-03-15 Thread David Gibson

On Mon, Mar 14, 2016 at 05:56:39PM +0100, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt 
> 
> We should implement HW breakpoint/watchpoint, qemu supports them...
> 
> Signed-off-by: Benjamin Herrenschmidt 

Reviewed-by: David Gibson 

But I'm assuming 2.7, not 2.6.

> ---
>  target-ppc/cpu.h| 1 +
>  target-ppc/translate_init.c | 5 +
>  2 files changed, 6 insertions(+)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 5203cc6a3bfb..9e1ef10b7dc6 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1400,6 +1400,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool 
> ifetch)
>  #define SPR_DAWR  (0x0B4)
>  #define SPR_MPPR  (0x0B8)
>  #define SPR_RPR   (0x0BA)
> +#define SPR_CIABR (0x0BB)
>  #define SPR_DAWRX (0x0BC)
>  #define SPR_HFSCR (0x0BE)
>  #define SPR_VRSAVE(0x100)
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index cfb1bc088950..f88bdf7b3cd1 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -7603,6 +7603,11 @@ static void gen_spr_book3s_207_dbg(CPUPPCState *env)
>  SPR_NOACCESS, SPR_NOACCESS,
>  _read_generic, _write_generic,
>  KVM_REG_PPC_DAWRX, 0x);
> +spr_register_kvm_hv(env, SPR_CIABR, "CIABR",
> +SPR_NOACCESS, SPR_NOACCESS,
> +SPR_NOACCESS, SPR_NOACCESS,
> +_read_generic, _write_generic,
> +KVM_REG_PPC_CIABR, 0x);
>  }
>  
>  static void gen_spr_970_dbg(CPUPPCState *env)

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 11/17] ppc: Initialize AMOR in PAPR mode

2016-03-15 Thread David Gibson

On Mon, Mar 14, 2016 at 05:56:34PM +0100, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt 
> 
> Make sure we give the guest full authorization
> 
> Signed-off-by: Benjamin Herrenschmidt 

Reviewed-by: David Gibson 

I'm guessing this one is a fix that belongs in 2.6, but I'm not
entirely certain.

> ---
>  target-ppc/translate_init.c | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 68abd847a251..c921d9f53984 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8542,6 +8542,7 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu)
>  {
>  CPUPPCState *env = >env;
>  ppc_spr_t *lpcr = >spr_cb[SPR_LPCR];
> +ppc_spr_t *amor = >spr_cb[SPR_AMOR];
>  
>  /* PAPR always has exception vectors in RAM not ROM. To ensure this,
>   * MSR[IP] should never be set.
> @@ -8563,6 +8564,9 @@ void cpu_ppc_set_papr(PowerPCCPU *cpu)
>   */
>  env->spr[SPR_LPCR] = lpcr->default_value;
>  
> +/* Set a full AMOR so guest can use the AMR as it sees fit */
> +env->spr[SPR_AMOR] = amor->default_value = 0xull;
> +
>  /* Tell KVM that we're in PAPR mode */
>  if (kvm_enabled()) {
>  kvmppc_set_papr(cpu);

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 14/17] ppc: Add dummy write to VTB

2016-03-15 Thread David Gibson

On Mon, Mar 14, 2016 at 05:56:37PM +0100, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt 
> 
> The Hypervisor can write it. We don't handle that properly yet but
> at least let's not blow up when it is written.
> 
> Signed-off-by: Benjamin Herrenschmidt 

AFAICT this should be for 2.7, not 2.6?

> ---
>  target-ppc/translate_init.c | 15 +++
>  1 file changed, 11 insertions(+), 4 deletions(-)
> 
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 2fac6ea58698..28a9c2e73156 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -300,6 +300,12 @@ static void spr_write_purr(DisasContext *ctx, int gprn, 
> int sprn)
>  /* Temporary placeholder */
>  }
>  
> +__attribute__ ((unused))
> +static void spr_write_vtb(DisasContext *ctx, int gprn, int sprn)
> +{
> +/* Temporary placeholder */
> +}
> +
>  #endif
>  #endif
>  
> @@ -8089,10 +8095,11 @@ static void gen_spr_power8_ebb(CPUPPCState *env)
>  /* Virtual Time Base */
>  static void gen_spr_vtb(CPUPPCState *env)
>  {
> -spr_register(env, SPR_VTB, "VTB",
> - SPR_NOACCESS, SPR_NOACCESS,
> - _read_tbl, SPR_NOACCESS,
> - 0x);
> +spr_register_hv(env, SPR_VTB, "VTB",
> +   SPR_NOACCESS, SPR_NOACCESS,
> +   _read_tbl, SPR_NOACCESS,
> +   _read_tbl, spr_write_vtb,
> +   0x);
>  }
>  
>  static void gen_spr_power8_fscr(CPUPPCState *env)

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 07/17] ppc: Better figure out if processor has HV mode

2016-03-15 Thread David Gibson

On Mon, Mar 14, 2016 at 05:56:30PM +0100, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt 
> 
> We use an env. flag which is set to the initial value of MSR_HVB in
> the msr_mask. We also adjust the POWER8 mask to set SHV.
> 
> Also use this to adjust ctx.hv so that it is *set* when the processor
> doesn't have an HV mode (970 with Apple mode for example), thus enabling
> hypervisor instructions/SPRs.
> 
> Signed-off-by: Benjamin Herrenschmidt 
> Reviewed-by: David Gibson 

Since this seems to apply purely to improving HV mode support, I'm not
yet seeing the case for including this in 2.6.

> ---
>  target-ppc/cpu.h|  4 
>  target-ppc/translate.c  |  4 +++-
>  target-ppc/translate_init.c | 19 +++
>  3 files changed, 22 insertions(+), 5 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index a7da0d3e95a9..02aed6427ade 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1153,6 +1153,10 @@ struct CPUPPCState {
>  hwaddr mpic_iack;
>  /* true when the external proxy facility mode is enabled */
>  bool mpic_proxy;
> +/* set when the processor has an HV mode, thus HV priv
> + * instructions and SPRs are diallowed if MSR:HV is 0
> + */
> +bool has_hv_mode;
>  #endif
>  
>  /* Those resources are used only during code translation */
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 327f3259b4be..11801ded62d2 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -11456,8 +11456,10 @@ void gen_intermediate_code(CPUPPCState *env, struct 
> TranslationBlock *tb)
>  ctx.exception = POWERPC_EXCP_NONE;
>  ctx.spr_cb = env->spr_cb;
>  ctx.pr = msr_pr;
> -ctx.hv = !msr_pr && msr_hv;
>  ctx.mem_idx = env->mmu_idx;
> +#if !defined(CONFIG_USER_ONLY)
> +ctx.hv = msr_hv || !env->has_hv_mode;
> +#endif
>  ctx.insns_flags = env->insns_flags;
>  ctx.insns_flags2 = env->insns_flags2;
>  ctx.access_type = -1;
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 093ef036320d..59a68de0bce8 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8463,6 +8463,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
>  PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
>  PPC2_TM;
>  pcc->msr_mask = (1ull << MSR_SF) |
> +(1ull << MSR_SHV) |
>  (1ull << MSR_TM) |
>  (1ull << MSR_VR) |
>  (1ull << MSR_VSX) |
> @@ -9876,10 +9877,7 @@ static void ppc_cpu_reset(CPUState *s)
>  pcc->parent_reset(s);
>  
>  msr = (target_ulong)0;
> -if (0) {
> -/* XXX: find a suitable condition to enable the hypervisor mode */
> -msr |= (target_ulong)MSR_HVB;
> -}
> +msr |= (target_ulong)MSR_HVB;
>  msr |= (target_ulong)0 << MSR_AP; /* TO BE CHECKED */
>  msr |= (target_ulong)0 << MSR_SA; /* TO BE CHECKED */
>  msr |= (target_ulong)1 << MSR_EP;
> @@ -9980,6 +9978,19 @@ static void ppc_cpu_initfn(Object *obj)
>  env->bfd_mach = pcc->bfd_mach;
>  env->check_pow = pcc->check_pow;
>  
> +/* Mark HV mode as supported if the CPU has an MSR_HV bit
> + * in the msr_mask. The mask can later be cleared by PAPR
> + * mode but the hv mode support will remain, thus enforcing
> + * that we cannot use priv. instructions in guest in PAPR
> + * mode. For 970 we currently simply don't set HV in msr_mask
> + * thus simulating an "Apple mode" 970. If we ever want to
> + * support 970 HV mode, we'll have to add a processor attribute
> + * of some sort.
> + */
> +#if !defined(CONFIG_USER_ONLY)
> +env->has_hv_mode = !!(env->msr_mask & MSR_HVB);
> +#endif
> +
>  #if defined(TARGET_PPC64)
>  if (pcc->sps) {
>  env->sps = *pcc->sps;

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 17/17] ppc: A couple more dummy POWER8 Book4 regs

2016-03-15 Thread David Gibson

On Mon, Mar 14, 2016 at 05:56:40PM +0100, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt 
> 
> Signed-off-by: Benjamin Herrenschmidt 
> [clg: squashed in patch 'ppc: Add dummy ACOP SPR' ]
> Signed-off-by: Cédric Le Goater 

Reviewed-by: David Gibson 

And this looks like a fix for 2.6 to me.

> ---
>  target-ppc/cpu.h|  3 +++
>  target-ppc/translate_init.c | 12 
>  2 files changed, 15 insertions(+)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 9e1ef10b7dc6..9ed406cf111b 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1359,7 +1359,9 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool 
> ifetch)
>  #define SPR_SRR1  (0x01B)
>  #define SPR_CFAR  (0x01C)
>  #define SPR_AMR   (0x01D)
> +#define SPR_ACOP  (0x01F)
>  #define SPR_BOOKE_PID (0x030)
> +#define SPR_BOOKS_PID (0x030)
>  #define SPR_BOOKE_DECAR   (0x036)
>  #define SPR_BOOKE_CSRR0   (0x03A)
>  #define SPR_BOOKE_CSRR1   (0x03B)
> @@ -1713,6 +1715,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool 
> ifetch)
>  #define SPR_POWER_SPMC1   (0x37C)
>  #define SPR_POWER_SPMC2   (0x37D)
>  #define SPR_POWER_MMCRS   (0x37E)
> +#define SPR_WORT  (0x37F)
>  #define SPR_PPR   (0x380)
>  #define SPR_750_GQR0  (0x390)
>  #define SPR_440_DNV0  (0x390)
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index f88bdf7b3cd1..22afeef2731a 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8175,6 +8175,18 @@ static void gen_spr_power8_book4(CPUPPCState *env)
>  _read_generic, SPR_NOACCESS,
>  _read_generic, _write_generic,
>  0);
> +spr_register_kvm(env, SPR_ACOP, "ACOP",
> + SPR_NOACCESS, SPR_NOACCESS,
> + _read_generic, _write_generic,
> + KVM_REG_PPC_ACOP, 0);
> +spr_register_kvm(env, SPR_BOOKS_PID, "PID",
> + SPR_NOACCESS, SPR_NOACCESS,
> + _read_generic, _write_generic,
> + KVM_REG_PPC_PID, 0);
> +spr_register_kvm(env, SPR_WORT, "WORT",
> + SPR_NOACCESS, SPR_NOACCESS,
> + _read_generic, _write_generic,
> + KVM_REG_PPC_WORT, 0);
>  #endif
>  }
>  

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 10/17] ppc: Add dummy SPR_IC for POWER8

2016-03-15 Thread David Gibson

On Mon, Mar 14, 2016 at 05:56:33PM +0100, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt 
> 
> It's supposed to be an instruction counter. For now make us not
> crash when accessing it.
> 
> Signed-off-by: Benjamin Herrenschmidt 

Reviewed-by: David Gibson 

But expecting this is for 2.7, not 2.6.

> ---
>  target-ppc/cpu.h|  1 +
>  target-ppc/translate_init.c | 12 
>  2 files changed, 13 insertions(+)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 779cb57bd700..6952d789e518 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1691,6 +1691,7 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool 
> ifetch)
>  #define SPR_MPC_MD_DBRAM1 (0x32A)
>  #define SPR_RCPU_L2U_RA3  (0x32B)
>  #define SPR_TAR   (0x32F)
> +#define SPR_IC(0x350)
>  #define SPR_VTB   (0x351)
>  #define SPR_MMCRC (0x353)
>  #define SPR_440_INV0  (0x370)
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 10f67136b609..68abd847a251 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8061,6 +8061,17 @@ static void gen_spr_power8_dbell(CPUPPCState *env)
>  #endif
>  }
>  
> +static void gen_spr_power8_ic(CPUPPCState *env)
> +{
> +#if !defined(CONFIG_USER_ONLY)
> +spr_register_hv(env, SPR_IC, "IC",
> +SPR_NOACCESS, SPR_NOACCESS,
> +_read_generic, SPR_NOACCESS,
> +_read_generic, _write_generic,
> +0);
> +#endif
> +}
> +
>  static void init_proc_book3s_64(CPUPPCState *env, int version)
>  {
>  gen_spr_ne_601(env);
> @@ -8115,6 +8126,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int 
> version)
>  gen_spr_vtb(env);
>  gen_spr_power8_rpr(env);
>  gen_spr_power8_dbell(env);
> +gen_spr_power8_ic(env);
>  }
>  if (version < BOOK3S_CPU_POWER8) {
>  gen_spr_book3s_dbg(env);

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 00/17] ppc: preparing pnv landing

2016-03-15 Thread David Gibson

On Tue, Mar 15, 2016 at 09:11:31AM +0100, Cédric Le Goater wrote:
> On 03/15/2016 01:39 AM, David Gibson wrote:
> > On Mon, Mar 14, 2016 at 05:56:23PM +0100, Cédric Le Goater wrote:
> >> Hello,
> >>
> >> This is a first mini-serie of patches adding support for new ppc SPRs.
> >> They were taken from Ben's larger patchset adding the ppc powernv
> >> platform and they should already be useful for the pseries guest
> >> migration.
> >>
> >> Initial patches come from :
> >>
> >>https://github.com/ozbenh/qemu/commits/powernv
> >>
> >> The changes are mostly due to the rebase on Dave's 2.6 branch:
> >>
> >>https://github.com/dgibson/qemu/commits/ppc-for-2.6
> >>
> >> A couple more are bisect and checkpatch fixes and finally some patches
> >> were merge to reduce the noise.
> >>
> >>   
> >>
> >> The patchset is also available here: 
> >>
> >>https://github.com/legoater/qemu/commits/for-2.6
> >>
> >> It was quickly tested with a pseries guest using KVM and TCG.
> > 
> > Hmm.. do these all fix bugs with migration, or only some of them?
> 
> Probably only some. 
> 
> Initially, Thomas gave a shorter list which I expanded to a larger one 
> because of dependencies between patches and I didn't want to change too
> much what Ben had sent. You had also reviewed a few.
> 
> > The relevance is that things to fix migration should go into 2.6, but
> > preparation work for powernv that doesn't fix bug shouldn't really be
> > going in now, after the soft freeze and will need to wait for 2.7.
> 
> OK. I will rework and keep the rest for 2.7. 

So, I'm ok with including (low risk) patches that aren't directly
relevant to 2.6 if they're prereqs for patches that are relevant to
2.6.  After all, reworking the patches isn't risk free either.  Please
mention why these patches are being included in the commit messages
though.

> Thomas, thanks for the review. I have identified a few things I need 
> to work on but may be, the patchset is still too large for 2.6 ?

It's not really a question of being too large, it's that I'm nervous
about applying patches which touch the core translation code
(e.g. fixes to HV mode tests) during soft freeze if they're not
addressing a bug that's relevant to 2.6.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 09/17] ppc: SPURR & PURR are HV writeable and privileged

2016-03-15 Thread David Gibson

On Mon, Mar 14, 2016 at 08:37:59PM +0100, Thomas Huth wrote:
> On 14.03.2016 17:56, Cédric Le Goater wrote:
> > From: Benjamin Herrenschmidt 
> > 
> > Those are HV writeable, so we provide a dummy write. We eventually need
> > to provide a better emulation but for now this will get us going.
> > 
> > We also make them non-user readable as per the architecture.
> > 
> > Signed-off-by: Benjamin Herrenschmidt 
> > ---
> >  target-ppc/translate_init.c | 25 +
> >  1 file changed, 17 insertions(+), 8 deletions(-)
> > 
> > diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> > index 7a399b97bc6f..10f67136b609 100644
> > --- a/target-ppc/translate_init.c
> > +++ b/target-ppc/translate_init.c
> > @@ -293,6 +293,13 @@ static void spr_read_purr (DisasContext *ctx, int 
> > gprn, int sprn)
> >  {
> >  gen_helper_load_purr(cpu_gpr[gprn], cpu_env);
> >  }
> > +
> > +__attribute__ ((unused))
> > +static void spr_write_purr(DisasContext *ctx, int gprn, int sprn)
> > +{
> > +/* Temporary placeholder */
> > +}
> 
> What's the "__attribute__ ((unused))" needed here for? The function is
> referenced below, so it should be "used"?
> Or is this simply about handling the CONFIG_USER_ONLY case? Then I think
> it would be nicer to change the #ifdef in front of it to include
> "!defined(CONFIG_USER_ONLY)", too.

Ah, yes.  I second Thomas' comment.

> 
>  Thomas
> 
> >  #endif
> >  #endif
> >  
> > @@ -7828,14 +7835,16 @@ static void gen_spr_book3s_purr(CPUPPCState *env)
> >  {
> >  #if !defined(CONFIG_USER_ONLY)
> >  /* PURR & SPURR: Hack - treat these as aliases for the TB for now */
> > -spr_register_kvm(env, SPR_PURR,   "PURR",
> > - _read_purr, SPR_NOACCESS,
> > - _read_purr, SPR_NOACCESS,
> > - KVM_REG_PPC_PURR, 0x);
> > -spr_register_kvm(env, SPR_SPURR,   "SPURR",
> > - _read_purr, SPR_NOACCESS,
> > - _read_purr, SPR_NOACCESS,
> > - KVM_REG_PPC_SPURR, 0x);
> > +spr_register_kvm_hv(env, SPR_PURR,   "PURR",
> > +SPR_NOACCESS, SPR_NOACCESS,
> > +_read_purr, SPR_NOACCESS,
> > +_read_purr, _write_purr,
> > +KVM_REG_PPC_PURR, 0x);
> > +spr_register_kvm_hv(env, SPR_SPURR,   "SPURR",
> > +SPR_NOACCESS, SPR_NOACCESS,
> > +_read_purr, SPR_NOACCESS,
> > +_read_purr, _write_purr,
> > +KVM_REG_PPC_SPURR, 0x);
> >  #endif
> >  }
> >  
> > 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 08/17] ppc: Add placeholder SPRs for DPDES and DHDES on P8

2016-03-15 Thread David Gibson

On Mon, Mar 14, 2016 at 05:56:31PM +0100, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt 
> 
> We still need to eventually implement doorbells but at least this
> makes us not crash when the SPRs are accessed.
> 
> Signed-off-by: Benjamin Herrenschmidt 

Reviewed-by: David Gibson 

But expecting this will be for 2.7, not 2.6.

> ---
>  target-ppc/cpu.h|  2 ++
>  target-ppc/translate_init.c | 17 +
>  2 files changed, 19 insertions(+)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index 02aed6427ade..779cb57bd700 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -1394,6 +1394,8 @@ static inline int cpu_mmu_index (CPUPPCState *env, bool 
> ifetch)
>  #define SPR_MPC_ICTRL (0x09E)
>  #define SPR_MPC_BAR   (0x09F)
>  #define SPR_PSPB  (0x09F)
> +#define SPR_DHDES (0x0B1)
> +#define SPR_DPDES (0x0B0)
>  #define SPR_DAWR  (0x0B4)
>  #define SPR_RPR   (0x0BA)
>  #define SPR_DAWRX (0x0BC)
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 59a68de0bce8..7a399b97bc6f 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -8036,6 +8036,22 @@ static void gen_spr_power8_rpr(CPUPPCState *env)
>  #endif
>  }
>  
> +static void gen_spr_power8_dbell(CPUPPCState *env)
> +{
> +#if !defined(CONFIG_USER_ONLY)
> +spr_register_hv(env, SPR_DPDES, "DPDES",
> +SPR_NOACCESS, SPR_NOACCESS,
> +_read_generic, SPR_NOACCESS,
> +_read_generic, _write_generic,
> +0);
> +spr_register_hv(env, SPR_DHDES, "DHDES",
> +SPR_NOACCESS, SPR_NOACCESS,
> +SPR_NOACCESS, SPR_NOACCESS,
> +_read_generic, _write_generic,
> +0);
> +#endif
> +}
> +
>  static void init_proc_book3s_64(CPUPPCState *env, int version)
>  {
>  gen_spr_ne_601(env);
> @@ -8089,6 +8105,7 @@ static void init_proc_book3s_64(CPUPPCState *env, int 
> version)
>  gen_spr_power8_pspb(env);
>  gen_spr_vtb(env);
>  gen_spr_power8_rpr(env);
> +gen_spr_power8_dbell(env);
>  }
>  if (version < BOOK3S_CPU_POWER8) {
>  gen_spr_book3s_dbg(env);

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 09/17] ppc: SPURR & PURR are HV writeable and privileged

2016-03-15 Thread David Gibson

On Mon, Mar 14, 2016 at 05:56:32PM +0100, Cédric Le Goater wrote:
> From: Benjamin Herrenschmidt 
> 
> Those are HV writeable, so we provide a dummy write. We eventually need
> to provide a better emulation but for now this will get us going.
> 
> We also make them non-user readable as per the architecture.
> 
> Signed-off-by: Benjamin Herrenschmidt 

Reviewed-by: David Gibson 

But expecting it for 2.7, not 2.6.

> ---
>  target-ppc/translate_init.c | 25 +
>  1 file changed, 17 insertions(+), 8 deletions(-)
> 
> diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
> index 7a399b97bc6f..10f67136b609 100644
> --- a/target-ppc/translate_init.c
> +++ b/target-ppc/translate_init.c
> @@ -293,6 +293,13 @@ static void spr_read_purr (DisasContext *ctx, int gprn, 
> int sprn)
>  {
>  gen_helper_load_purr(cpu_gpr[gprn], cpu_env);
>  }
> +
> +__attribute__ ((unused))
> +static void spr_write_purr(DisasContext *ctx, int gprn, int sprn)
> +{
> +/* Temporary placeholder */
> +}
> +
>  #endif
>  #endif
>  
> @@ -7828,14 +7835,16 @@ static void gen_spr_book3s_purr(CPUPPCState *env)
>  {
>  #if !defined(CONFIG_USER_ONLY)
>  /* PURR & SPURR: Hack - treat these as aliases for the TB for now */
> -spr_register_kvm(env, SPR_PURR,   "PURR",
> - _read_purr, SPR_NOACCESS,
> - _read_purr, SPR_NOACCESS,
> - KVM_REG_PPC_PURR, 0x);
> -spr_register_kvm(env, SPR_SPURR,   "SPURR",
> - _read_purr, SPR_NOACCESS,
> - _read_purr, SPR_NOACCESS,
> - KVM_REG_PPC_SPURR, 0x);
> +spr_register_kvm_hv(env, SPR_PURR,   "PURR",
> +SPR_NOACCESS, SPR_NOACCESS,
> +_read_purr, SPR_NOACCESS,
> +_read_purr, _write_purr,
> +KVM_REG_PPC_PURR, 0x);
> +spr_register_kvm_hv(env, SPR_SPURR,   "SPURR",
> +SPR_NOACCESS, SPR_NOACCESS,
> +_read_purr, SPR_NOACCESS,
> +_read_purr, _write_purr,
> +KVM_REG_PPC_SPURR, 0x);
>  #endif
>  }
>  

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 04/17] ppc: Add number of threads per core to the processor definition

2016-03-15 Thread David Gibson

On Wed, Mar 16, 2016 at 08:11:48AM +1100, Benjamin Herrenschmidt wrote:
> On Tue, 2016-03-15 at 20:45 +1100, David Gibson wrote:
> > On Mon, Mar 14, 2016 at 05:56:27PM +0100, Cédric Le Goater wrote:
> > > 
> > > From: Benjamin Herrenschmidt 
> > > 
> > > Also use it to clamp the max SMT mode and ensure that the cpu_dt_id
> > > are offset by that value in order to preserve consistency with the
> > > HW implementations.
> 
> > I think this can change change CPU ids, and therefore break migration
> > on some existing setups.  So it will need some rework to apply at
> > all, and will certainly want to wait until after 2.6
> 
> Our migration is so bloody damn fragile ... grrr.

Well, yes, but that can't really be blamed for this one: you're
changing a guest visible detail.

> We will need it for powernv though, there are many things especially in
> OPAL that rely on the consistent numbering.

Right.  Really it doesn't make sense to allocate the dt_id here - that
should be done in the machine type code which actually controls the
DT.  That way we can change to fixed numbering for powernv (and
possibly future spapr) machine types, while leaving it the same for
existing machine types for compatibility.

> In fact, it will have to go further and number the cores based on their
> equivalent HW numbers at some point for SCOMs to work, which means a
> slightly discontiguous numbering scheme (no core 0 for example). At
> least if we want to model some of the EX XSCOMs.

Right, another argument that the machine setup code needs to be in
charge of the guest visible CPU ids.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH qemu] vmstate: Define VARRAY with VMS_ALLOC

2016-03-15 Thread David Gibson

On Tue, Mar 15, 2016 at 12:01:04PM +, Dr. David Alan Gilbert wrote:
> * Alexey Kardashevskiy (a...@ozlabs.ru) wrote:
> > This allows dynamic allocation for migrating arrays.
> > 
> > Already existing VMSTATE_VARRAY_UINT32 requires an array to be
> > pre-allocated, however there are cases when the size is not known in
> > advance and there is no real need to enforce it.
> > 
> > This defines another variant of VMSTATE_VARRAY_UINT32 with WMS_ALLOC
> > flag which tells the receiving side to allocate memory for the array
> > before receiving the data.
> > 
> > The first user of it is the "pseries" machine (POWER8) with
> > dynamic DMA windows which existence and size are totally dynamic.
> 
> You say totally dynamic, how big do they get out of interest?

They're basically used to map all guest RAM.  Typically we'd be
looking at one 64-bit TCE per 64K guest page, so we'd be looking at
1/8192th of RAM size.

Since we can in theory have guests in the 1T+ range, that might start
getting pretty big, so we probably should look at incremental transfer
of the TCE tables at some point.

> 
> Dave
> 
> > 
> > Signed-off-by: Alexey Kardashevskiy 
> > Reviewed-by: David Gibson 
> > Reviewed-by: Thomas Huth 
> > ---
> > 
> > 
> > As David suggested, I am posting it alone (separately from the DDW 
> > patchset).
> > 
> > It would be nice either to get "ack" to get this pulled via the powerpc
> > tree or get it pulled to the upstream via some other tree. Thanks!
> > 
> > 
> > 
> > ---
> >  include/migration/vmstate.h | 10 ++
> >  1 file changed, 10 insertions(+)
> > 
> > diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
> > index 84ee355..1622638 100644
> > --- a/include/migration/vmstate.h
> > +++ b/include/migration/vmstate.h
> > @@ -386,6 +386,16 @@ extern const VMStateInfo vmstate_info_bitmap;
> >  .offset = vmstate_offset_pointer(_state, _field, _type), \
> >  }
> >  
> > +#define VMSTATE_VARRAY_UINT32_ALLOC(_field, _state, _field_num, _version, 
> > _info, _type) {\
> > +.name   = (stringify(_field)),   \
> > +.version_id = (_version),\
> > +.num_offset = vmstate_offset_value(_state, _field_num, uint32_t),\
> > +.info   = &(_info),  \
> > +.size   = sizeof(_type), \
> > +.flags  = VMS_VARRAY_UINT32|VMS_POINTER|VMS_ALLOC,   \
> > +.offset = vmstate_offset_pointer(_state, _field, _type), \
> > +}
> > +
> >  #define VMSTATE_VARRAY_UINT16_UNSAFE(_field, _state, _field_num, _version, 
> > _info, _type) {\
> >  .name   = (stringify(_field)),   \
> >  .version_id = (_version),\

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH 0/3] target-ppc improvements

2016-03-15 Thread David Gibson

On Tue, Feb 23, 2016 at 05:18:33PM -0800, Richard Henderson wrote:
> The ISEL patch was posted last year; I don't believe I ever
> got around to posting these other two.

I'm not entirely clear if these are just cleanups / optimizations, or
if they also fix actual bugs.

If the former, I'm inclined to leave these until the 2.7 cycle.  So,
for the time being I've applied these to a new ppc-for-2.7 branch.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson

signature.asc
Description: PGP signature

[Qemu-devel] [PATCH] mac_newworld.c: add USB keyboard and mouse as default

2016-03-15 Thread Programmingkid

Add the USB keyboard and mouse by default to the mac99 target. This more closely
represents what a real PowerMac3,1 would be using.

Signed-off-by: John Arbuckle 

---
The USB keyboard and mouse work better than their ADB counterparts.
I can right click on a Mac OS X guest with a USB mouse.
The Key Caps program on Mac OS 9 actually works well with the USB keyboard.
Apple System Profiler on Mac OS 9 actually displays a keyboard name under the 
keyboard heading.

 hw/ppc/mac_newworld.c | 20 +++-
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/hw/ppc/mac_newworld.c b/hw/ppc/mac_newworld.c
index f0a36b3..ab38b77 100644
--- a/hw/ppc/mac_newworld.c
+++ b/hw/ppc/mac_newworld.c
@@ -165,7 +165,6 @@ static void ppc_core99_init(MachineState *machine)
 PCIBus *pci_bus;
 PCIDevice *macio;
 MACIOIDEState *macio_ide;
-BusState *adb_bus;
 MacIONVRAMState *nvr;
 int bios_size;
 MemoryRegion *pic_mem, *escc_mem;
@@ -415,24 +414,11 @@ static void ppc_core99_init(MachineState *machine)
 "ide[1]"));
 macio_ide_init_drives(macio_ide, [MAX_IDE_DEVS]);
 
-dev = DEVICE(object_resolve_path_component(OBJECT(macio), "cuda"));
-adb_bus = qdev_get_child_bus(dev, "adb.0");
-dev = qdev_create(adb_bus, TYPE_ADB_KEYBOARD);
-qdev_init_nofail(dev);
-dev = qdev_create(adb_bus, TYPE_ADB_MOUSE);
-qdev_init_nofail(dev);
-
 if (machine->usb) {
 pci_create_simple(pci_bus, -1, "pci-ohci");
-
-/* U3 needs to use USB for input because Linux doesn't support via-cuda
-on PPC64 */
-if (machine_arch == ARCH_MAC99_U3) {
-USBBus *usb_bus = usb_bus_find(-1);
-
-usb_create_simple(usb_bus, "usb-kbd");
-usb_create_simple(usb_bus, "usb-mouse");
-}
+USBBus *usb_bus = usb_bus_find(-1);
+usb_create_simple(usb_bus, "usb-kbd");
+usb_create_simple(usb_bus, "usb-mouse");
 }
 
 pci_vga_init(pci_bus);
-- 
2.7.2

Re: [Qemu-devel] [RFC PATCH v2 6/9] spapr: CPU core device

2016-03-15 Thread David Gibson

On Tue, Mar 15, 2016 at 02:46:37PM +0100, Igor Mammedov wrote:
> On Tue, 15 Mar 2016 20:34:28 +1100
> David Gibson  wrote:
> > On Tue, Mar 15, 2016 at 02:44:01PM +0530, Bharata B Rao wrote:
> > > On Mon, Mar 14, 2016 at 11:25:23AM +0100, Igor Mammedov wrote:  
> > > > On Fri, 11 Mar 2016 10:24:35 +0530
> > > > Bharata B Rao  wrote:
[snip]
> > > > > +if (!core->oc) {
> > > > > +error_setg(_err, "cpu_model property isn't set");
> > > > > +goto out;
> > > > > +}
> > > > > +
> > > > > +core_dt_id = object_property_get_int(OBJECT(dev), "core", 
> > > > > _err);
> > > > > +if (local_err) {
> > > > > +goto out;
> > > > > +}
> > > > > +
> > > > > +if (core_dt_id % smt) {
> > > > > +error_setg(_err, "invalid core id %d\n", core_dt_id);
> > > > > +goto out;
> > > > > +}
> > > > > +
> > > > > +core_id = core_dt_id / smt;
> > > > > +if (core_id < 0 || core_id >= spapr_max_cores) {
> > > > > +error_setg(_err, "core id %d out of range", 
> > > > > core_dt_id);
> > > > > +goto out;
> > > > > +}  
> > > > maybe due to nameing it's a bit confusing,
> > > > what's difference between core_id and core_dt_id?  
> > > 
> > > core_dt_id is the device tree IDs that we use with PowerPC cores. This is
> > > what we use with "core" property of CPU_CORE. Since core_dt_id doesn't
> > > grow contiguously (Eg. it will be 0, 8, 16 etc for SMT8 guest on a POWER8 
> > > host),
> > > I am translating that to contiguous integer core_id so that I can
> > > store the pointer of the realized core in the appropriate slot of
> > > spapr->cpu_cores[] array.  
> > 
> > So, I see why the distinction is there, but it is kinda confusing.
> > I'm wondering if we could do away with the spapr->cores array entirely
> > and instead just access the core objects via the QOM tree - QOM
> > "arrays" (i.e. properties named like foo[NNN]) can be sparse, so
> > there's no need to allocate dense ids.
> Wouldn't be lookups for duplicate in QOM tree take O(N^2)
> when hot-plugging N cpus?

With the present QOM implementation, yes, although I know Paolo has
made noises about changing that to a hash table.

> It should be less with sorted array at least.

It would, but I doubt the O(N^2) will actually be a problem with
realistic numbers of cpus.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature

Re: [Qemu-devel] [PATCH v2 4/5] spapr: check if cpu core is already present

2016-03-15 Thread David Gibson

On Tue, Mar 15, 2016 at 12:05:06PM +0100, Igor Mammedov wrote:
> On Tue, 15 Mar 2016 17:10:27 +1100
> David Gibson  wrote:
> 
> > On Thu, Mar 10, 2016 at 11:39:46AM +0100, Igor Mammedov wrote:
> > > On Thu, 10 Mar 2016 11:32:44 +0530
> > > Bharata B Rao  wrote:
> > >   
> > > > On Thu, Mar 10, 2016 at 04:22:43PM +1100, David Gibson wrote:  
> > > > > On Wed, Mar 09, 2016 at 11:07:40AM +0100, Igor Mammedov wrote:
> > > > > > On Tue, 8 Mar 2016 20:04:12 +0530
> > > > > > Bharata B Rao  wrote:
> > > > > > 
> > > > > > > On Tue, Mar 08, 2016 at 02:18:14PM +0100, Igor Mammedov wrote:
> > > > > > > > Signed-off-by: Igor Mammedov 
> > > > > > > > ---
> > > > > > > > replaced link set check removed in previous patch
> > > > > > > > ---
> > > > > > > >  hw/ppc/spapr.c | 26 ++
> > > > > > > >  1 file changed, 22 insertions(+), 4 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > > > > > > > index 6890a44..db33c29 100644
> > > > > > > > --- a/hw/ppc/spapr.c
> > > > > > > > +++ b/hw/ppc/spapr.c
> > > > > > > > @@ -2297,6 +2297,27 @@ void 
> > > > > > > > *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs,
> > > > > > > >  return fdt;
> > > > > > > >  }
> > > > > > > > 
> > > > > > > > +static void spapr_machine_device_pre_plug(HotplugHandler 
> > > > > > > > *hotplug_dev,
> > > > > > > > +  DeviceState *dev, 
> > > > > > > > Error **errp)
> > > > > > > > +{
> > > > > > > > +sPAPRMachineClass *smc = 
> > > > > > > > SPAPR_MACHINE_GET_CLASS(hotplug_dev);
> > > > > > > > +sPAPRMachineState *spapr = SPAPR_MACHINE(hotplug_dev);
> > > > > > > > +
> > > > > > > > +if (object_dynamic_cast(OBJECT(dev), TYPE_SPAPR_CPU_CORE)) 
> > > > > > > > {
> > > > > > > > +int core = object_property_get_int(OBJECT(dev), 
> > > > > > > > CPU_CORE_ID_PROP,
> > > > > > > > +   _abort);
> > > > > > > > +
> > > > > > > > +if (!smc->dr_cpu_enabled && dev->hotplugged) {
> > > > > > > > +error_setg(errp, "CPU hotplug not supported for 
> > > > > > > > this machine");
> > > > > > > > +return;
> > > > > > > > +}
> > > > > > > > +if (spapr->cores[core]) {
> > > > > > > > +error_setg(errp, "core %d is already present", 
> > > > > > > > core);
> > > > > > > > +return;
> > > > > > > > +}  
> > > > > > > 
> > > > > > > Wondering why can't we do the above check from core's realizefn 
> > > > > > > and fail
> > > > > > > the core hotplug from realizefn ?
> > > > > > that's rather simple, in ideal QOM world child shouldn't
> > > > > > poke into parents internal if it could be helped.
> > > > > > So hook provides responsibility separation where
> > > > > > board/or something else(HotplugHandler) can do a necessary
> > > > > > wiring of a component which is being hotplugged, without
> > > > > > forcing hotplugged device being aware about it.
> > > > > 
> > > > > Oh.. yes.  Sorry, somehow I got confused and thought you were
> > > > > suggesting a 'pre_realize()' method on the *object* rather than a
> > > > > pre_plug hotplughandler hook.
> > > > > 
> > > > > > That's what HotplugHandler->plug callback is doing for
> > > > > > post realize and HotplugHandler->pre_plug will do similar
> > > > > > thing but allowing board to execute preliminary tasks
> > > > > > (like check/set properties, amend its internal state)
> > > > > > before object is realized.
> > > > > 
> > > > > > That will make realize() cleaner as it won't have to hack
> > > > > > into data it shouldn't and would prevent us calling unrealize()
> > > > > > if we were to check it later at HotplugHandler->plug time.
> > > > > > (i.e. realize() won't even have a chance to introduce side
> > > > > > effects that should be undone with unlealize())
> > > > > 
> > > > > Hmm.. how big a deal is it to roll back from the existing plug()
> > > > > handler?  
> > > realize shouldn't complete without error if object properties are
> > > wrong /for ex: i.e. you create kvm vcpu thread, configure it
> > > as already existing vcpu and have a lot fun afterwards/.  
> (*1 ^^^)
> 
> > 
> > It seems to me there are two sorts of checks.  (1) properties that are
> > wrong simply with reference to the CPU core itself (e.g. unsupported
> > CPU model, impossible number of threads).  (2) properties that are
> > wrong only in the context of other CPUs or devices (e.g. core id
> > already populated, too many cores, impossible core id).
> > 
> > Is it really a problem for realize() to complete if (1) is checked,
> > but not (2)?
> skipping 2 would do *1, (it's hard to tell what complications would
> be if CPU object with incorrect properties are created)

Hm, ok.

> > If it's so essential, I'm surprised we haven't hit this

Re: [Qemu-devel] [PATCH 5/8] virtio-blk: fix "disabled data plane" mode

2016-03-15 Thread Fam Zheng

On Tue, 03/15 15:08, Paolo Bonzini wrote:
> 
> 
> On 15/03/2016 14:18, Cornelia Huck wrote:
> > On Tue, 15 Mar 2016 20:45:30 +0800
> > Fam Zheng  wrote:
> > 
> >> On Fri, 03/11 11:28, Paolo Bonzini wrote:
> > 
> >>> But secondarily, I'm thinking of making the logic simpler to understand 
> >>> in two ways:
> >>>
> >>> 1) adding a mutex around virtio_blk_data_plane_start/stop.
> >>>
> >>> 2) moving
> >>>
> >>> event_notifier_set(virtio_queue_get_host_notifier(s->vq));
> >>> virtio_queue_aio_set_host_notifier_handler(s->vq, s->ctx, true, true);
> >>>
> >>> to a bottom half (created with aio_bh_new in s->ctx).  The bottom half
> >>> takes the mutex, checks again "if (vblk->dataplane_started)" and if it's
> >>> true starts the processing.
> >>
> >> Like this? If it captures your idea, could Bo or Christian help test?
> >>
> >> ---
> >>
> >> From b5b8886693828d498ee184fc7d4e13d8c06cdf39 Mon Sep 17 00:00:00 2001
> >> From: Fam Zheng 
> >> Date: Thu, 10 Mar 2016 10:26:36 +0800
> >> Subject: [PATCH] virtio-blk dataplane start crash fix
> >>
> >> Suggested-by: Paolo Bonzini 
> >> Signed-off-by: Fam Zheng 
> >> ---
> >>  block.c |  4 +++-
> >>  hw/block/dataplane/virtio-blk.c | 39 
> >> ---
> >>  2 files changed, 35 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/block.c b/block.c
> >> index ba24b8e..e37e8f7 100644
> >> --- a/block.c
> >> +++ b/block.c
> >> @@ -4093,7 +4093,9 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
> >>
> >>  void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context)
> >>  {
> >> -bdrv_drain(bs); /* ensure there are no in-flight requests */
> >> +/* ensure there are no in-flight requests */
> >> +bdrv_drained_begin(bs);
> >> +bdrv_drained_end(bs);
> 
> I'm not sure that this is necessary.  An empty section should be the
> same as plain old bdrv_drain.

Slighly different. This wraps aio_poll of bdrv_drain with
aio_disable_external/aio_enable_external, which avoids a nested
virtio_blk_handle_output as explained in my earlier message.

> 
> >>  bdrv_detach_aio_context(bs);
> >>
> >> diff --git a/hw/block/dataplane/virtio-blk.c 
> >> b/hw/block/dataplane/virtio-blk.c
> >> index 36f3d2b..6db5c22 100644
> >> --- a/hw/block/dataplane/virtio-blk.c
> >> +++ b/hw/block/dataplane/virtio-blk.c
> >> @@ -49,6 +49,8 @@ struct VirtIOBlockDataPlane {
> >>
> >>  /* Operation blocker on BDS */
> >>  Error *blocker;
> >> +
> >> +QemuMutex start_lock;
> >>  };
> >>
> >>  /* Raise an interrupt to signal guest, if necessary */
> >> @@ -150,6 +152,7 @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, 
> >> VirtIOBlkConf *conf,
> >>  s = g_new0(VirtIOBlockDataPlane, 1);
> >>  s->vdev = vdev;
> >>  s->conf = conf;
> >> +qemu_mutex_init(>start_lock);
> >>
> >>  if (conf->iothread) {
> >>  s->iothread = conf->iothread;
> >> @@ -184,15 +187,38 @@ void 
> >> virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s)
> >>  g_free(s);
> >>  }
> >>
> >> +typedef struct {
> >> +VirtIOBlockDataPlane *s;
> >> +QEMUBH *bh;
> >> +} VirtIOBlockStartData;
> >> +
> >> +static void virtio_blk_data_plane_start_bh_cb(void *opaque)
> >> +{
> >> +VirtIOBlockStartData *data = opaque;
> >> +VirtIOBlockDataPlane *s = data->s;
> > 
> > Won't you need to check here whether ->started is still set?
> 
> Yes.
> 
> >> +
> >> +/* Kick right away to begin processing requests already in vring */
> >> +event_notifier_set(virtio_queue_get_host_notifier(s->vq));
> >> +
> >> +/* Get this show started by hooking up our callbacks */
> >> +virtio_queue_aio_set_host_notifier_handler(s->vq, s->ctx, true, true);
> >> +
> >> +qemu_bh_delete(data->bh);
> >> +g_free(data);
> >> +}
> >> +
> >>  /* Context: QEMU global mutex held */
> >>  void virtio_blk_data_plane_start(VirtIOBlockDataPlane *s)
> >>  {
> >>  BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(s->vdev)));
> >>  VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
> >>  VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
> >> +VirtIOBlockStartData *data;
> >>  int r;
> >>
> >> +qemu_mutex_lock(>start_lock);
> >>  if (vblk->dataplane_started || s->starting) {
> > 
> > Do we still need ->starting with the new mutex?
> 
> No, but really we shouldn't have needed it before either. :)  So a task
> for another day.
> 
> >> +qemu_mutex_unlock(>start_lock);
> >>  return;
> >>  }
> >>
> >>  /* Context: QEMU global mutex held */
> > 
> > Do you also need to do something in _stop()?
> 
> _stop definitely needs to take the mutex too.

Will fix this and above and send as a top level email.

Fam

Re: [Qemu-devel] [PATCH v4 0/4] Add ASPEED AST2400 SoC and OpenPower BMC machine

2016-03-15 Thread Andrew Jeffery

On Tue, 2016-03-15 at 11:25 +0100, Cédric Le Goater wrote:
> On 03/15/2016 06:01 AM, Andrew Jeffery wrote:
> > On Tue, 2016-03-15 at 12:34 +0800, Jeremy Kerr wrote:
> > > Hi Andrew,
> > > 
> > > > This patch series models enough of the ASPEED AST2400 ARM9 SoC[0] to
> > > > boot an aspeed_defconfig Linux kernel[1][2]. Specifically, the series
> > > > implements the ASPEED timer and VIC devices, integrates them into an
> > > > AST2400 SoC and exposes it all through a new opbmc2400 machine. The
> > > > device model patches only partially implement the hardware features of
> > > > the timer and VIC, again mostly just enough to boot Linux.
> > > 
> > > Awesome! Nice to have these patches escaping the lab :)
> > > 
> > > In terms of naming suggestions: I think this depends on what we're
> > > looking to emulate here. I see two options:
> > > 
> > > The qemu platform becomes a "reference" for OpenPOWER bmc hardware, but
> > > doesn't necessarily align with an actual machine. In that case,
> > > something generic like opbmc- would make sense.
> > > 
> > > On the other hand, if we'd like to create qemu platforms that represent
> > > actual machines (eg, the OpenPOWER "palmetto" machine), then
> > > -bmc would seem more appropriate. In this case, the machine
> > > name would be palmetto-bmc. No need to include the SoC name in that, as
> > > it's defined by the hardware implementation.
> > > 
> > > I think the latter option may be more generally useful.
> > 
> > Okay, agreed, I'll rework the change to use palmetto-bmc for the
> > machine name. Thanks for the feedback.
> 
> Yes. palmetto-bmc is good choice. Palmetto is a reference machine
> for OpenPOWER. 
> 
> May be change also :
> 
> +mc->desc = "OpenPOWER AST2400 BMC (ARM926EJ-S)";
> 
> to reflect that choice.

Will do!

Thanks,

Andrew

signature.asc
Description: This is a digitally signed message part

Re: [Qemu-devel] [PATCH v4 1/4] hw/timer: Add ASPEED timer device model

2016-03-15 Thread Andrew Jeffery

On Tue, 2016-03-15 at 14:14 +0100, Cédric Le Goater wrote:
> On 03/14/2016 05:13 AM, Andrew Jeffery wrote:
> > Implement basic ASPEED timer functionality for the AST2400 SoC[1]: Up to
> > 8 timers can independently be configured, enabled, reset and disabled.
> > Some hardware features are not implemented, namely clock value matching
> > and pulse generation, but the implementation is enough to boot the Linux
> > kernel configured with aspeed_defconfig.
> > 
> > [1] http://www.aspeedtech.com/products.php?fPath=20=376
> > 
> > Signed-off-by: Andrew Jeffery 
> 
> Looks good. One stylistic comment and a possible compile break in 
> timer_to_ctrl(). 
> 
> > 
> > ---
> > Since v3:
> >   * Drop unnecessary mention of VMStateDescription in timer_to_ctrl 
> > description
> >   * Mention hw/timer/a9gtimer.c with respect to clock value matching
> >   * Add missing VMSTATE_END_OF_LIST() to vmstate_aspeed_timer_state
> > 
> > Since v2:
> >   * Improve handling of timer configuration with respect to enabled state
> >   * Remove redundant enabled member from AspeedTimer
> >   * Implement VMStateDescriptions
> >   * Fix interrupt behaviour (edge triggered, both edges)
> >   * Fix various issues with trace-event declarations
> >   * Include qemu/osdep.h
> > 
> > Since v1:
> >   * Refactor initialisation of and respect requested clock rates 
> > (APB/External)
> >   * Simplify some index calculations
> >   * Use tracing infrastructure instead of internal DPRINTF
> >   * Enforce access size constraints and alignment in MemoryRegionOps
> > 
> >  default-configs/arm-softmmu.mak |   1 +
> >  hw/timer/Makefile.objs  |   1 +
> >  hw/timer/aspeed_timer.c | 451 
> > 
> >  include/hw/timer/aspeed_timer.h |  59 ++
> >  trace-events|   9 +
> >  5 files changed, 521 insertions(+)
> >  create mode 100644 hw/timer/aspeed_timer.c
> >  create mode 100644 include/hw/timer/aspeed_timer.h
> > 
> > diff --git a/default-configs/arm-softmmu.mak 
> > b/default-configs/arm-softmmu.mak
> > index a9f82a1..2bcd236 100644
> > --- a/default-configs/arm-softmmu.mak
> > +++ b/default-configs/arm-softmmu.mak
> > @@ -110,3 +110,4 @@ CONFIG_IOH3420=y
> >  CONFIG_I82801B11=y
> >  CONFIG_ACPI=y
> >  CONFIG_SMBIOS=y
> > +CONFIG_ASPEED_SOC=y
> > diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs
> > index 5cfea6e..003c14f 100644
> > --- a/hw/timer/Makefile.objs
> > +++ b/hw/timer/Makefile.objs
> > @@ -32,3 +32,4 @@ obj-$(CONFIG_MC146818RTC) += mc146818rtc.o
> >  obj-$(CONFIG_ALLWINNER_A10_PIT) += allwinner-a10-pit.o
> >  
> >  common-obj-$(CONFIG_STM32F2XX_TIMER) += stm32f2xx_timer.o
> > +common-obj-$(CONFIG_ASPEED_SOC) += aspeed_timer.o
> > diff --git a/hw/timer/aspeed_timer.c b/hw/timer/aspeed_timer.c
> > new file mode 100644
> > index 000..0e82178
> > --- /dev/null
> > +++ b/hw/timer/aspeed_timer.c
> > @@ -0,0 +1,452 @@
> > +/*
> > + * ASPEED AST2400 Timer
> > + *
> > + * Andrew Jeffery 
> > + *
> > + * Copyright (C) 2016 IBM Corp.
> > + *
> > + * This code is licensed under the GPL version 2 or later.  See
> > + * the COPYING file in the top-level directory.
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "hw/ptimer.h"
> > +#include "hw/sysbus.h"
> > +#include "hw/timer/aspeed_timer.h"
> > +#include "qemu-common.h"
> > +#include "qemu/bitops.h"
> > +#include "qemu/main-loop.h"
> > +#include "qemu/timer.h"
> > +#include "trace.h"
> > +
> > +#define TIMER_NR_REGS 4
> > +
> > +#define TIMER_CTRL_BITS 4
> > +#define TIMER_CTRL_MASK ((1 << TIMER_CTRL_BITS) - 1)
> > +
> > +#define TIMER_CLOCK_USE_EXT true
> > +#define TIMER_CLOCK_EXT_HZ 100
> > +#define TIMER_CLOCK_USE_APB false
> > +#define TIMER_CLOCK_APB_HZ 2400
> > +
> > +#define TIMER_REG_STATUS 0
> > +#define TIMER_REG_RELOAD 1
> > +#define TIMER_REG_MATCH_FIRST 2
> > +#define TIMER_REG_MATCH_SECOND 3
> > +
> > +#define TIMER_FIRST_CAP_PULSE 4
> > +
> > +enum timer_ctrl_op {
> > +op_enable = 0,
> > +op_external_clock,
> > +op_overflow_interrupt,
> > +op_pulse_enable
> > +};
> > +
> > +/**
> > + * Avoid mutual references between AspeedTimerCtrlState and AspeedTimer
> > + * structs, as it's a waste of memory. The ptimer BH callback needs to know
> > + * whether a specific AspeedTimer is enabled, but this information is held 
> > in
> > + * AspeedTimerCtrlState. So, provide a helper to hoist ourselves from an
> > + * arbitrary AspeedTimer to AspeedTimerCtrlState.
> > + */
> > +static inline struct AspeedTimerCtrlState *timer_to_ctrl(AspeedTimer *t)
> 
> 
> you can remove the 'struct' above.

Good catch, will do.

> 
> > +{
> > +AspeedTimer (*timers)[] = (void *)t - (t->id * sizeof(*t));
> 
> This will not compile on gcc < 5.0. You need to add a 'const' :
> 
> const AspeedTimer (*timers)[] = (void *)t - (t->id * sizeof(*t));
> 
> That should work on all versions.

Thanks, I've confirmed the failure and fix with gcc-4.7. I'll make sure
to test

Re: [Qemu-devel] [PATCH v4 1/4] hw/timer: Add ASPEED timer device model

2016-03-15 Thread Andrew Jeffery

Hi Dmitry,

On Tue, 2016-03-15 at 21:14 +0300, Dmitry Osipenko wrote:
> Hello Andrew,
> 
> 14.03.2016 07:13, Andrew Jeffery пишет:
> > Implement basic ASPEED timer functionality for the AST2400 SoC[1]: Up to
> > 8 timers can independently be configured, enabled, reset and disabled.
> > Some hardware features are not implemented, namely clock value matching
> > and pulse generation, but the implementation is enough to boot the Linux
> > kernel configured with aspeed_defconfig.
> > 
> 
> [snip]
> 
> > +static void aspeed_timer_set_value(AspeedTimerCtrlState *s, int timer, int 
> > reg,
> > +   uint32_t value)
> > +{
> > +AspeedTimer *t;
> > +
> > +g_assert(timer >= 0 && timer < ASPEED_TIMER_NR_TIMERS);
> 
> This would never fail, wouldn't it?

You're right, it shouldn't: I put it in as a sanity check and some
"active" documentation. I'm happy to remove it if you think just adds
noise.

> 
> [snip]
> 
> > +static void aspeed_timer_write(void *opaque, hwaddr offset, uint64_t value,
> > +   unsigned size)
> > +{
> > +const uint32_t tv = (uint32_t)(value & 0x);
> > +const int reg = (offset & 0xf) / 4;
> > +AspeedTimerCtrlState *s = opaque;
> > +
> > +switch (offset) {
> > +/* Control Registers */
> > +case 0x30:
> > +aspeed_timer_set_ctrl(s, tv);
> > +break;
> > +case 0x34:
> > +aspeed_timer_set_ctrl2(s, tv);
> > +break;
> > +/* Timer Registers */
> > +case 0x00 ... 0x2c:
> > +aspeed_timer_set_value(s, (offset >> TIMER_NR_REGS), reg, tv);
> > +break;
> > +case 0x40 ... 0x8c:
> > +aspeed_timer_set_value(s, (offset >> TIMER_NR_REGS) - 1, reg, tv);
> > +break;
> 
> 
> [snip]
> 
> > +static void aspeed_init_one_timer(AspeedTimerCtrlState *s, uint8_t id)
> > +{
> > +QEMUBH *bh;
> > +AspeedTimer *t = >timers[id];
> > +
> > +t->id = id;
> > +bh = qemu_bh_new(aspeed_timer_expire, t);
> > +assert(bh);
> > +t->timer = ptimer_init(bh);
> > +assert(t->timer);
> > +}
> 
> I'm wondering why do you need those asserts, it's very unlikely that this 
> code 
> would fail. Have you had any weird issues with it?

No, no weird issues - thanks for pointing them out as I'll remove them:
I put them in when I started developing the series, before
understanding that either call should already have aborted if the
allocations failed.

Thanks for taking a look at the patch!

Andrew

signature.asc
Description: This is a digitally signed message part

Re: [Qemu-devel] [PATCH v3 12/12] i.MX: Add sabrelite i.MX6 emulation.

2016-03-15 Thread Jean-Christophe DUBOIS


Le 11/03/2016 00:57, Peter Maydell a écrit :

On 11 March 2016 at 02:24, Jean-Christophe DUBOIS  wrote:

Le 10/03/2016 11:38, Peter Maydell a écrit :

On 2 March 2016 at 05:27, Jean-Christophe Dubois 
wrote:

The sabrelite supports one SPI FLASH memory on SPI1

Signed-off-by: Jean-Christophe Dubois 
---

+
+{
+/* Add the sst25vf016b NOR FLASH memory to first SPI */
+Object *spi_dev;
+
+spi_dev = object_resolve_path_component(OBJECT(>soc),
"spi1");
+if (spi_dev) {
+SSIBus *spi_bus;
+
+spi_bus = (SSIBus *)qdev_get_child_bus(DEVICE(spi_dev),
"spi");

This looks odd. You should just be able to do
   spi_bus = (SSIBus *)qdev_get_child_bus(DEVICE(>soc), "spi1");
without using object_resolve_path_component() to try to find an
SPI device object, because your SoC device should have alias properties
which provide access to its SPI subcomponents' SPI buses.
See hw/arm/xlnx-ep108.c for an example of the board code for this and
hw/arm/xlnx-zynqmp.c for the SoC code which calls
object_property_add_alias() to set up the aliases.


I certainly could do as you proposed.

The problem is that I also need the spi_dev device for the
sysbus_connect_irq() call below.

My spi_dev is referenced as "spi1" in the i.MX6 doc and I added a 'spi1"
property for it in the i.MX6 soc.

Once I have the spi_dev device it is trivial to retrieve the spi_bus
attached to it.

So, yes this is not in line with what is done in xlnx-zynqmp.c but the need
is a bit different.

I think the SoC should probably have an externally-facing IRQ line
which it wires up internally to the SPI's IRQ line.
(This corresponds basically to what happens in h/w -- the SoC's
interfaces are defined by it even though many of them may be
directly wired up to some internal component it has, but from
outside the SoC you don't get access to the whole of the internal
component.)


Well, each SPI controller has 4 possible CS lines to external devices 
and I have 5 SPI controllers.


This makes 20 externally facing IRQ lines (these are output lines) to 
add to the i.MX6 soc object as properties (with a meaningful naming 
convention).


And anyway, what I need to pass to sysbus_connect_irq() is a 
SYS_BUS_DEVICE and not an IRQ line ...


So do you mean I should define the 20 external lines (properties) and 
wire all of them up with sysbus_connect_irq() in the i.MX6 SOC 
implementation (fsl-imx6.c)?


Then How to you "connect" this externally facing line to the selected 
device CS line (here a FLASH memory) in sabrelite?


JC




thanks
-- PMM

[Qemu-devel] [PULL] acpi-test: update UID for GSI links

2016-03-15 Thread Michael S. Tsirkin

Update acpi test data to match
commit 6a991e07bb8eeb7d7799a949c0528dffb84b2a98
("hw/acpi: fix GSI links UID").

Signed-off-by: Michael S. Tsirkin 
---
 tests/acpi-test-data/q35/DSDT| Bin 8349 -> 8357 bytes
 tests/acpi-test-data/q35/DSDT.bridge | Bin 8366 -> 8374 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)

diff --git a/tests/acpi-test-data/q35/DSDT b/tests/acpi-test-data/q35/DSDT
index 
cb720f4fb6bdd02a14b089a0fa4da342ec918fee..1c089c34b06c9f2ea9fe67abb45498021319303c
 100644
GIT binary patch
delta 236
zcmbR1xYUu$CDRRhy6`w&;NjVLYEVqTp@13=sS6%xCj%M(BxwRRhy6`w&;Njs0g-aTkg9@7Ib%qjgh{zzNJ#1dmln`NLn4At~F95T*NkA=f$I!wm36=6d
Lm)aa68ORO*#sxJG

diff --git a/tests/acpi-test-data/q35/DSDT.bridge 
b/tests/acpi-test-data/q35/DSDT.bridge
index 
dd4c28525e7d04bc3025eb62a1fd791bb4a6af64..b29fcda0bb1717ff708668c6e98f3ded3f34a96c
 100644
GIT binary patch
delta 236
zcmZ4IxXqEvCDkH436pZckdlzFVDy

[Qemu-devel] [PULL] acpi: minor fix

2016-03-15 Thread Michael S. Tsirkin

The following changes since commit 6a991e07bb8eeb7d7799a949c0528dffb84b2a98:

  hw/acpi: fix GSI links UID (2016-03-15 16:16:57 +0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/virt/kvm/mst/qemu.git tags/for_upstream

for you to fetch changes up to 3ba6a710e6ca1fb52cfdb38f39268e5c6e380ef9:

  acpi-test: update UID for GSI links (2016-03-15 23:25:52 +0200)


acpi: minor fix

Since previous pull acpi test triggers warnings,
fix it up.

Signed-off-by: Michael S. Tsirkin 


Michael S. Tsirkin (1):
  acpi-test: update UID for GSI links

 tests/acpi-test-data/q35/DSDT| Bin 8349 -> 8357 bytes
 tests/acpi-test-data/q35/DSDT.bridge | Bin 8366 -> 8374 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)

Re: [Qemu-devel] [PATCH 04/17] ppc: Add number of threads per core to the processor definition

2016-03-15 Thread Benjamin Herrenschmidt

On Tue, 2016-03-15 at 20:45 +1100, David Gibson wrote:
> On Mon, Mar 14, 2016 at 05:56:27PM +0100, Cédric Le Goater wrote:
> > 
> > From: Benjamin Herrenschmidt 
> > 
> > Also use it to clamp the max SMT mode and ensure that the cpu_dt_id
> > are offset by that value in order to preserve consistency with the
> > HW implementations.

> I think this can change change CPU ids, and therefore break migration
> on some existing setups.  So it will need some rework to apply at
> all, and will certainly want to wait until after 2.6

Our migration is so bloody damn fragile ... grrr.

We will need it for powernv though, there are many things especially in
OPAL that rely on the consistent numbering.

In fact, it will have to go further and number the cores based on their
equivalent HW numbers at some point for SCOMs to work, which means a
slightly discontiguous numbering scheme (no core 0 for example). At
least if we want to model some of the EX XSCOMs.

Cheers,
Ben.

> > 
> > 
> > Signed-off-by: Benjamin Herrenschmidt 
> > ---
> >  target-ppc/cpu-qom.h|  1 +
> >  target-ppc/translate_init.c | 11 ++-
> >  2 files changed, 11 insertions(+), 1 deletion(-)
> > 
> > diff --git a/target-ppc/cpu-qom.h b/target-ppc/cpu-qom.h
> > index 7d5e2b36a997..735981309c5b 100644
> > --- a/target-ppc/cpu-qom.h
> > +++ b/target-ppc/cpu-qom.h
> > @@ -68,6 +68,7 @@ typedef struct PowerPCCPUClass {
> >  uint32_t flags;
> >  int bfd_mach;
> >  uint32_t l1_dcache_size, l1_icache_size;
> > +uint32_t threads_per_core;
> >  #if defined(TARGET_PPC64)
> >  const struct ppc_segment_page_sizes *sps;
> >  #endif
> > diff --git a/target-ppc/translate_init.c b/target-
> > ppc/translate_init.c
> > index 43c6e524a6bc..46dabe58783a 100644
> > --- a/target-ppc/translate_init.c
> > +++ b/target-ppc/translate_init.c
> > @@ -8231,6 +8231,7 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void
> > *data)
> >   POWERPC_FLAG_BUS_CLK;
> >  pcc->l1_dcache_size = 0x8000;
> >  pcc->l1_icache_size = 0x1;
> > +pcc->threads_per_core = 2;
> >  }
> >  
> >  static void powerpc_get_compat(Object *obj, Visitor *v, const char
> > *name,
> > @@ -8408,6 +8409,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void
> > *data)
> >  pcc->l1_dcache_size = 0x8000;
> >  pcc->l1_icache_size = 0x8000;
> >  pcc->interrupts_big_endian =
> > ppc_cpu_interrupts_big_endian_lpcr;
> > +pcc->threads_per_core = 4;
> >  }
> >  
> >  static void init_proc_POWER8(CPUPPCState *env)
> > @@ -8492,6 +8494,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void
> > *data)
> >  pcc->l1_dcache_size = 0x8000;
> >  pcc->l1_icache_size = 0x8000;
> >  pcc->interrupts_big_endian =
> > ppc_cpu_interrupts_big_endian_lpcr;
> > +pcc->threads_per_core = 8;
> >  }
> >  #endif /* defined (TARGET_PPC64) */
> >  
> > @@ -9195,6 +9198,12 @@ static void ppc_cpu_realizefn(DeviceState
> > *dev, Error **errp)
> >  #endif
> >  
> >  #if !defined(CONFIG_USER_ONLY)
> > +if (pcc->threads_per_core == 0) {
> > +pcc->threads_per_core = 1;
> > +}
> > +if (max_smt > pcc->threads_per_core) {
> > +max_smt = pcc->threads_per_core;
> > +}
> >  if (smp_threads > max_smt) {
> >  error_setg(errp, "Cannot support more than %d threads on
> > PPC with %s",
> > max_smt, kvm_enabled() ? "KVM" : "TCG");
> > @@ -9215,7 +9224,7 @@ static void ppc_cpu_realizefn(DeviceState
> > *dev, Error **errp)
> >  }
> >  
> >  #if !defined(CONFIG_USER_ONLY)
> > -cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * max_smt
> > +cpu->cpu_dt_id = (cs->cpu_index / smp_threads) * pcc-
> > >threads_per_core
> >  + (cs->cpu_index % smp_threads);
> >  #endif
> >

Re: [Qemu-devel] [PULL 00/11] slirp: Adding IPv6 support to Qemu -net user mode

2016-03-15 Thread Jan Kiszka

On 2016-03-15 18:55, Peter Maydell wrote:
> On 15 March 2016 at 16:11, Samuel Thibault  
> wrote:
>> The following changes since commit 618a5a8bc52ba0f2ecbb3dffd01e657f4d841f75:
>>
>>   Merge remote-tracking branch 'remotes/stefanha/tags/tracing-pull-request' 
>> into staging (2016-03-14 16:22:17 +)
>>
>> are available in the git repository at:
>>
>>   http://people.debian.org/~sthibault/qemu.git tags/samuel-thibault
>>
>> for you to fetch changes up to fad7fb9ccd8013ea03c8c7a8f491c395e786dae6:
>>
>>   slirp: Add IPv6 support to the TFTP code (2016-03-15 17:05:34 +0100)
>>
>> 
>> slirp: Adding IPv6 support to Qemu -net user mode
>>
> 
> Applied, thanks.

Many thanks for your persistent work to get this in, Samuel!

Jan

-- 
Siemens AG, Corporate Technology, CT RDA ITP SES-DE
Corporate Competence Center Embedded Linux

Re: [Qemu-devel] [Qemu-arm] [PATCH] sd: Fix "info qtree" on boards with SD cards

2016-03-15 Thread Peter Maydell

On 15 March 2016 at 20:33, Peter Maydell  wrote:
> On 15 March 2016 at 20:28, Thomas Hanson  wrote:
>> The patch looks good.
>>
>> Would it also be good to update bus_add_child() so that it NULL-checks
>> its "bus" parameter before dereferencing it?
>
> No, I think it's just a programming error to call qdev_set_parent_bus()
> with a NULL bus parameter, so crashing is fine.

...but it might be helpful to assert in qdev_try_create() that
if we're using the default bus then the object is a sysbus
device object. At least then the problem will be immediately
clear rather than only showing up if you run a monitor command
later.

thanks
-- PMM

Re: [Qemu-devel] [patch v3 7/9] vfio: vote the function 0 to do host bus reset when aer occurred

2016-03-15 Thread Alex Williamson

On Tue, 15 Mar 2016 09:35:47 +0800
Cao jin  wrote:

> From: Chen Fan 
> 
> Signed-off-by: Chen Fan 
> ---
>  hw/pci/pci.c |  2 ++
>  hw/vfio/pci.c| 14 ++
>  hw/vfio/pci.h|  1 +
>  include/hw/pci/pci_bus.h |  2 ++
>  4 files changed, 19 insertions(+)
> 
> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
> index e67664d..953745d 100644
> --- a/hw/pci/pci.c
> +++ b/hw/pci/pci.c
> @@ -276,6 +276,8 @@ static void pcibus_reset(BusState *qbus)
>  for (i = 0; i < bus->nirq; i++) {
>  assert(bus->irq_count[i] == 0);
>  }
> +
> +bus->is_bus_rst = false;
>  }
>  
>  static void pci_host_bus_register(DeviceState *host)
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 223c0ee..b944d0b 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -1901,6 +1901,8 @@ static void vfio_check_hot_bus_reset(VFIOPCIDevice 
> *vdev, Error **errp)
>  /* List all affected devices by bus reset */
>  devices = >devices[0];
>  
> +vdev->single_depend_dev = (info->count == 1);
> +
>  /* Verify that we have all the groups required */
>  for (i = 0; i < info->count; i++) {
>  PCIHostDeviceAddress host;
> @@ -2593,6 +2595,10 @@ static void vfio_err_notifier_handler(void *opaque)
>  return;
>  }
>  
> +if ((vdev->features & VFIO_FEATURE_ENABLE_AER)) {
> +vdev->pdev.bus->is_bus_rst = true;
> +}
> +

So we're *assuming* that the next reset will be a bus reset because we
took an AER fault... what if that particular error got handled in
another way, maybe a device specific handler that doesn't do a bus
reset?  The asymmetry of setting a value here and clearing it in PCI
code is pretty undesirable as well.  Can we detect that the bus is in
reset on our own given the current set of configuration restrictions?
Seems pretty easy to test for an AER device with the parent bridge
having PCI_BRIDGE_CTL_BUS_RESET set and wait for function #0 to do a
hot reset.  Thanks,

Alex

>  /*
>   * TBD. Retrieve the error details and decide what action
>   * needs to be taken. One of the actions could be to pass
> @@ -3060,6 +3066,14 @@ static void vfio_pci_reset(DeviceState *dev)
>  
>  trace_vfio_pci_reset(vdev->vbasedev.name);
>  
> +if (pdev->bus->is_bus_rst) {
> +/* simply voting the function 0 to do hot bus reset */
> +if (pci_get_function_0(pdev) == pdev) {
> +vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
> +}
> +return;
> +}
> +
>  vfio_pci_pre_reset(vdev);
>  
>  if (vdev->resetfn && !vdev->resetfn(vdev)) {
> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
> index aff46c2..32bd31f 100644
> --- a/hw/vfio/pci.h
> +++ b/hw/vfio/pci.h
> @@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
>  bool no_kvm_intx;
>  bool no_kvm_msi;
>  bool no_kvm_msix;
> +bool single_depend_dev;
>  } VFIOPCIDevice;
>  
>  uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
> diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
> index 403fec6..6bcd334 100644
> --- a/include/hw/pci/pci_bus.h
> +++ b/include/hw/pci/pci_bus.h
> @@ -39,6 +39,8 @@ struct PCIBus {
> Keep a count of the number of devices with raised IRQs.  */
>  int nirq;
>  int *irq_count;
> +
> +bool is_bus_rst;
>  };
>  
>  typedef struct PCIBridgeWindows PCIBridgeWindows;

Re: [Qemu-devel] [Qemu-arm] [PATCH] sd: Fix "info qtree" on boards with SD cards

2016-03-15 Thread Peter Maydell

On 15 March 2016 at 20:28, Thomas Hanson  wrote:
> The patch looks good.
>
> Would it also be good to update bus_add_child() so that it NULL-checks
> its "bus" parameter before dereferencing it?

No, I think it's just a programming error to call qdev_set_parent_bus()
with a NULL bus parameter, so crashing is fine.

(The problem fixed by this patch doesn't involve calling bus_add_child()
with a NULL pointer, in any case -- qdev_try_create() will handle
a NULL bus pointer as "use the default system bus", so by the time
it gets to bus_add_child() the bus pointer is never NULL. It's
using the default bus at all that causes things to go wrong much
later on down the line.)

thanks
-- PMM

Re: [Qemu-devel] [PATCH 01/22] block: Add two dirty bitmap getters

2016-03-15 Thread Vladimir Sementsov-Ogievskiy


On 15.03.2016 23:04, Vladimir Sementsov-Ogievskiy wrote:

From: Fam Zheng 

For dirty bitmap users to get the size and the name of a
BdrvDirtyBitmap.

Signed-off-by: Fam Zheng 
Reviewed-by: John Snow 
Signed-off-by: Vladimir Sementsov-Ogievskiy 


it's an accidental s.o.b., actually there are no changes by me.


---
  block/dirty-bitmap.c | 10 ++
  include/block/dirty-bitmap.h |  2 ++
  2 files changed, 12 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 556e1d1..45cfa3b 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -97,6 +97,16 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState 
*bs,
  return bitmap;
  }
  
+int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)

+{
+return bitmap->size;
+}
+
+const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
+{
+return bitmap->name;
+}
+
  bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
  {
  return bitmap->successor;
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 80afe60..4dc8750 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -29,6 +29,8 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState 
*bs);
  uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
  bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
  bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
+const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
+int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
  DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
  int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
 int64_t sector);



--
Best regards,
Vladimir

[Qemu-devel] [PATCH 22/22] qcow2-dirty-bitmap: add EXTRA_DATA_COMPATIBLE flag

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

If this flag is unset and exta data present the bitmap should be
read-only. For now just return error for this case.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2-dirty-bitmap.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
index 159e935..95c166c 100644
--- a/block/qcow2-dirty-bitmap.c
+++ b/block/qcow2-dirty-bitmap.c
@@ -45,6 +45,7 @@
 #define BME_RESERVED_FLAGS 0xfffc
 #define BME_FLAG_IN_USE 1
 #define BME_FLAG_AUTO   (1U << 1)
+#define BME_FLAG_EXTRA_DATA_COMPATIBLE   (1U << 1)
 
 /* bits [1, 8] U [56, 63] are reserved */
 #define BME_TABLE_ENTRY_RESERVED_MASK 0xff0001fe
@@ -333,6 +334,13 @@ static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs, 
QCow2Bitmap *bm,
 return NULL;
 }
 
+if (!(bmh->flags & BME_FLAG_EXTRA_DATA_COMPATIBLE) &&
+bmh->extra_data_size != 0) {
+error_setg(errp, "Uncompatible extra data found for bitmap '%s'",
+   bm->name);
+return NULL;
+}
+
 bitmap_table = g_try_malloc(bmh->bitmap_table_size * sizeof(uint64_t));
 if (bitmap_table == NULL) {
 error_setg_errno(errp, -ENOMEM,
-- 
1.8.3.1

[Qemu-devel] [PATCH 19/22] iotests: add VM.test_launcn()

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

Test vm can launch and print output in case of fail. This function is
needed for testing erroneous cases

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/iotests.py | 20 
 1 file changed, 20 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 6807b07..187b434 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -226,6 +226,26 @@ class VM(object):
 os.remove(self._monitor_path)
 raise
 
+def test_launch(self):
+'''Launch the VM, an error is expected'''
+try:
+self.launch()
+except:
+self._popen.wait()
+regex = re.compile(r"qemu-system-\w+")
+print "Test launch failed: %d" % self._popen.returncode
+print "--- qemu output ---"
+for line in open(self._qemu_log_path):
+#filter qtest comments
+if not "] OPENED" in line:
+print regex.sub("qemu-system-*", line)
+print "--- end qemu output ---"
+return False
+
+print "Tast launch successed!"
+self.shutdown()
+return True
+
 def shutdown(self):
 '''Terminate the VM and clean up'''
 if not self._popen is None:
-- 
1.8.3.1

[Qemu-devel] [PATCH 21/22] qcow2-dirty-bitmap: add AUTO flag

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

The bitmap should be auto-loaded if auto flag is set.
For now, actually, there are no methods to set it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2-dirty-bitmap.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
index 70c6e36..159e935 100644
--- a/block/qcow2-dirty-bitmap.c
+++ b/block/qcow2-dirty-bitmap.c
@@ -42,8 +42,9 @@
 #define BME_MAX_NAME_SIZE 1023
 
 /* Bitmap directory entry flags */
-#define BME_RESERVED_FLAGS 0xfffe
+#define BME_RESERVED_FLAGS 0xfffc
 #define BME_FLAG_IN_USE 1
+#define BME_FLAG_AUTO   (1U << 1)
 
 /* bits [1, 8] U [56, 63] are reserved */
 #define BME_TABLE_ENTRY_RESERVED_MASK 0xff0001fe
@@ -52,6 +53,9 @@ typedef enum BitmapType {
 BT_DIRTY_TRACKING_BITMAP = 1
 } BitmapType;
 
+static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs, QCow2Bitmap *bm,
+Error **errp);
+
 void qcow2_free_bitmaps(BlockDriverState *bs)
 {
 BDRVQcow2State *s = bs->opaque;
@@ -215,6 +219,13 @@ static int directory_read(BlockDriverState *bs, Error 
**errp)
 bm->offset = offset;
 bm->name = g_strndup((char *)(h + 1), h->name_size);
 
+if (h->flags & BME_FLAG_AUTO) {
+load_bitmap(bs, bm, errp);
+if (*errp != NULL) {
+goto fail;
+}
+}
+
 offset += dir_entry_size(h);
 }
 return 0;
-- 
1.8.3.1

[Qemu-devel] [PATCH 16/22] qemu: command line option for dirty bitmaps

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

The patch adds the following command line option:

-dirty-bitmap [option1=val1][,option2=val2]...

Avaliable options are:

name
The name of the bitmap.
Should be unique per 'file'/'node' and per 'for_node'.

node
The node to load and bind the bitmap.
It should be specified as 'id' suboption of one of '-node' options.

granularity
Granularity (in bytes) for created dirty bitmap.
If the bitmap is already exists in specified 'file'/'file_id'/device
it's granularity will not be changed but only checked (an error will be
generated if this check fails).

enabled
on|off
Enabled flag for the bitmap.
By default the bitmap will be enabled.

create
on|off
By default is off.
If on, then new bitmap will be created in the image, if the bitmap with
same name is already exists an error will be generated.
If off, then the bitmap will be loaded from the image, if there is no
one an error will be generated.
If create=off and granularity is specified then granularity will be
checked for loaded bitmap and if not match an error will be generated.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 blockdev.c| 36 +
 include/sysemu/blockdev.h |  1 +
 include/sysemu/sysemu.h   |  1 +
 qemu-options.hx   | 35 +
 vl.c  | 79 +++
 5 files changed, 152 insertions(+)

diff --git a/blockdev.c b/blockdev.c
index 322ca03..3a3d71c 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -185,6 +185,12 @@ QemuOpts *drive_def(const char *optstr)
 return qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
 }
 
+QemuOpts *dirty_bitmap_def(const char *optstr)
+{
+return qemu_opts_parse_noisily(qemu_find_opts("dirty-bitmap"), optstr,
+   false);
+}
+
 QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
 const char *optstr)
 {
@@ -4084,6 +4090,36 @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
 return head;
 }
 
+QemuOptsList qemu_dirty_bitmap_opts = {
+.name = "dirty-bitmap",
+.head = QTAILQ_HEAD_INITIALIZER(qemu_dirty_bitmap_opts.head),
+.desc = {
+{
+.name = "name",
+.type = QEMU_OPT_STRING,
+.help = "Name of the dirty bitmap",
+},{
+.name = "node",
+.type = QEMU_OPT_STRING,
+.help = "node name to bind the bitmap to (and load it from it)",
+},{
+.name = "granularity",
+.type = QEMU_OPT_NUMBER,
+.help = "granularity",
+},{
+.name = "enabled",
+.type = QEMU_OPT_BOOL,
+.help = "enabled flag (default is 'on')",
+},{
+.name = "create",
+.type = QEMU_OPT_BOOL,
+.help = "create flag (default is 'off'), "
+"if on, new dirty bitmap will be created, "
+"else the existing one will be loaded"
+}
+}
+};
+
 QemuOptsList qemu_common_drive_opts = {
 .name = "drive",
 .head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
diff --git a/include/sysemu/blockdev.h b/include/sysemu/blockdev.h
index 16432f3..105d11b 100644
--- a/include/sysemu/blockdev.h
+++ b/include/sysemu/blockdev.h
@@ -56,6 +56,7 @@ int drive_get_max_devs(BlockInterfaceType type);
 DriveInfo *drive_get_next(BlockInterfaceType type);
 
 QemuOpts *drive_def(const char *optstr);
+QemuOpts *dirty_bitmap_def(const char *optstr);
 QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
 const char *optstr);
 DriveInfo *drive_new(QemuOpts *arg, BlockInterfaceType block_default_type);
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 3bb8897..7dc3980 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -248,6 +248,7 @@ bool usb_enabled(void);
 
 extern QemuOptsList qemu_legacy_drive_opts;
 extern QemuOptsList qemu_common_drive_opts;
+extern QemuOptsList qemu_dirty_bitmap_opts;
 extern QemuOptsList qemu_drive_opts;
 extern QemuOptsList qemu_chardev_opts;
 extern QemuOptsList qemu_device_opts;
diff --git a/qemu-options.hx b/qemu-options.hx
index 0cf7bb9..e750ebc 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -680,6 +680,41 @@ qemu-system-i386 -hda a -hdb b
 @end example
 ETEXI
 
+DEF("dirty-bitmap", HAS_ARG, QEMU_OPTION_dirty_bitmap,
+"-dirty-bitmap name=name,node=@var{id}\n"
+"  
[,granularity=granularity][,enabled=on|off][,create=on|off]\n",
+QEMU_ARCH_ALL)
+STEXI
+@item -dirty-bitmap @var{option}[,@var{option}[,@var{option}[,...]]]
+@findex -dirty-bitmap
+
+Define a dirty-bitmap. Valid options are:
+
+@table @option
+@item name=@var{name}
+The name of the bitmap. Should be unique per @var{file}/@var{node} and per
+@var{for_node}.
+@item node=@var{node}
+The node to load and bind the bitmap. It should be specified as @var{id} 
suboption
+of one of @option{-node}

[Qemu-devel] [PATCH 17/22] qcow2-dirty-bitmap: add IN_USE flag

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

This flag is set on bitmap load and unset on store. If it is already
set when loading the bitmap, the bitmap should not be load (it is in
use by other drive or it is inconsistent (was not successfully saved))

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2-dirty-bitmap.c | 45 -
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
index 384ccea..f210fee 100644
--- a/block/qcow2-dirty-bitmap.c
+++ b/block/qcow2-dirty-bitmap.c
@@ -42,7 +42,8 @@
 #define BME_MAX_NAME_SIZE 1023
 
 /* Bitmap directory entry flags */
-#define BME_RESERVED_FLAGS 0x
+#define BME_RESERVED_FLAGS 0xfffe
+#define BME_FLAG_IN_USE 1
 
 /* bits [1, 8] U [56, 63] are reserved */
 #define BME_TABLE_ENTRY_RESERVED_MASK 0xff0001fe
@@ -134,6 +135,29 @@ static QCow2BitmapHeader *bitmap_header(BDRVQcow2State *s,
(s->bitmap_directory + bitmap->offset);
 }
 
+static int update_bitmap_header_sync(BlockDriverState *bs, QCow2Bitmap *bitmap)
+{
+int ret;
+BDRVQcow2State *s = bs->opaque;
+QCow2BitmapHeader *h = bitmap_header(s, bitmap);
+
+bitmap_header_to_be(h);
+ret = bdrv_pwrite(bs->file->bs,
+  s->bitmap_directory_offset + bitmap->offset,
+  h, dir_entry_size(h));
+bitmap_header_to_cpu(h);
+if (ret < 0) {
+return ret;
+}
+
+ret = bdrv_flush(bs);
+if (ret < 0) {
+return ret;
+}
+
+return 0;
+}
+
 static int directory_read(BlockDriverState *bs, Error **errp)
 {
 int ret;
@@ -293,6 +317,11 @@ static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs, 
QCow2Bitmap *bm,
 
 bmh = bitmap_header(s, bm);
 
+if (bmh->flags & BME_FLAG_IN_USE) {
+error_setg(errp, "Bitmap '%s' is in use", bm->name);
+return NULL;
+}
+
 bitmap_table = g_try_malloc(bmh->bitmap_table_size * sizeof(uint64_t));
 if (bitmap_table == NULL) {
 error_setg_errno(errp, -ENOMEM,
@@ -321,6 +350,13 @@ static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs, 
QCow2Bitmap *bm,
 goto fail;
 }
 
+bmh->flags |= BME_FLAG_IN_USE;
+ret = update_bitmap_header_sync(bs, bm);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Could not set in_use in bitmap header");
+goto fail;
+}
+
 g_free(bitmap_table);
 return bitmap;
 
@@ -769,6 +805,13 @@ void qcow2_bitmap_store(BlockDriverState *bs,
 goto finish;
 }
 
+bmh->flags &= ~BME_FLAG_IN_USE;
+ret = update_bitmap_header_sync(bs, bm);
+if (ret < 0) {
+error_setg_errno(errp, ret, "Can't update bitmap header.");
+goto finish;
+}
+
 finish:
 g_free(bitmap_table);
 }
-- 
1.8.3.1

[Qemu-devel] [PATCH 18/22] qcow2-dirty-bitmaps: disallow stroing bitmap to other bs

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

Check, that bitmap is stored to the owning bs.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/dirty-bitmap.c | 12 
 block/qcow2-dirty-bitmap.c   |  5 +
 include/block/dirty-bitmap.h |  2 ++
 3 files changed, 19 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 87ee4d7..9625f4a 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -485,3 +485,15 @@ BdrvDirtyBitmap *bdrv_load_dirty_bitmap(BlockDriverState 
*bs, const char *name,
 }
 return NULL;
 }
+
+bool bdrv_has_dirty_bitmap(BlockDriverState *bs, const BdrvDirtyBitmap *bitmap)
+{
+BdrvDirtyBitmap *bm, *next;
+QLIST_FOREACH_SAFE(bm, >dirty_bitmaps, list, next) {
+if (bm == bitmap) {
+return true;
+}
+}
+
+return false;
+}
diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
index f210fee..70c6e36 100644
--- a/block/qcow2-dirty-bitmap.c
+++ b/block/qcow2-dirty-bitmap.c
@@ -757,6 +757,11 @@ void qcow2_bitmap_store(BlockDriverState *bs,
 uint64_t size = bdrv_dirty_bitmap_size(bitmap);
 int granularity = bdrv_dirty_bitmap_granularity(bitmap);
 
+if (!bdrv_has_dirty_bitmap(bs, bitmap)) {
+error_setg(errp, "Can't store bitmap to the other node.");
+return;
+}
+
 /* find/create dirty bitmap */
 bm = find_bitmap_by_name(bs, name);
 if (bm == NULL) {
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 66ba3f8..af76ac1 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -62,4 +62,6 @@ void bdrv_finalize_persistent_dirty_bitmaps(BlockDriverState 
*bs);
 BdrvDirtyBitmap *bdrv_load_dirty_bitmap(BlockDriverState *bs, const char *name,
 Error **errp);
 
+bool bdrv_has_dirty_bitmap(BlockDriverState *bs, const BdrvDirtyBitmap 
*bitmap);
+
 #endif
-- 
1.8.3.1

[Qemu-devel] [PATCH 12/22] qcow2-dirty-bitmap: add qcow2_bitmap_load_check()

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

The function checks existing of the bitmap without loading it.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/dirty-bitmap.c | 15 +++
 block/qcow2-dirty-bitmap.c   |  5 +
 block/qcow2.c|  1 +
 block/qcow2.h|  1 +
 include/block/block_int.h|  2 ++
 include/block/dirty-bitmap.h |  1 +
 6 files changed, 25 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 816c6ee..7a44722 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -419,3 +419,18 @@ int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, 
BlockDriverState *bs,
 {
 return hbitmap_store(bitmap->bitmap, bs, table, table_size, cluster_size);
 }
+
+bool bdrv_load_check_dirty_bitmap(BlockDriverState *file, const char *name)
+{
+BlockDriver *drv = file->drv;
+if (!drv) {
+return false;
+}
+if (drv->bdrv_dirty_bitmap_load_check) {
+return drv->bdrv_dirty_bitmap_load_check(file, name);
+}
+if (file->file)  {
+return bdrv_load_check_dirty_bitmap(file->file->bs, name);
+}
+return false;
+}
diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
index 28ed309..24415df 100644
--- a/block/qcow2-dirty-bitmap.c
+++ b/block/qcow2-dirty-bitmap.c
@@ -276,6 +276,11 @@ static int load_bitmap_data(BlockDriverState *bs, const 
uint64_t *bitmap_table,
 return ret;
 }
 
+bool qcow2_bitmap_load_check(BlockDriverState *file, const char *name)
+{
+return find_bitmap_by_name(file, name) != NULL;
+}
+
 static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs, QCow2Bitmap *bm,
 Error **errp)
 {
diff --git a/block/qcow2.c b/block/qcow2.c
index bda3026..7a342c2 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3442,6 +3442,7 @@ BlockDriver bdrv_qcow2 = {
 .bdrv_get_specific_info = qcow2_get_specific_info,
 
 .bdrv_dirty_bitmap_load = qcow2_bitmap_load,
+.bdrv_dirty_bitmap_load_check = qcow2_bitmap_load_check,
 .bdrv_dirty_bitmap_store = qcow2_bitmap_store,
 
 .bdrv_save_vmstate= qcow2_save_vmstate,
diff --git a/block/qcow2.h b/block/qcow2.h
index e4a517c..423c279 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -620,6 +620,7 @@ int qcow2_read_snapshots(BlockDriverState *bs);
 void qcow2_free_bitmaps(BlockDriverState *bs);
 int qcow2_read_bitmaps(BlockDriverState *bs, Error **errp);
 
+bool qcow2_bitmap_load_check(BlockDriverState *file, const char *name);
 BdrvDirtyBitmap *qcow2_bitmap_load(BlockDriverState *bs, const char *name,
Error **errp);
 void qcow2_bitmap_store(BlockDriverState *bs, const BdrvDirtyBitmap *bitmap,
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 7cd05e1..66a388a 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -215,6 +215,8 @@ struct BlockDriver {
 int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
 ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
 
+bool (*bdrv_dirty_bitmap_load_check)(BlockDriverState *file,
+ const char *name);
 BdrvDirtyBitmap *(*bdrv_dirty_bitmap_load)(BlockDriverState *bs,
const char *name,
Error **errp);
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 20cb540..f3cedaa 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -53,5 +53,6 @@ int bdrv_dirty_bitmap_prepare_store(const BdrvDirtyBitmap 
*bitmap,
 int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, BlockDriverState 
*bs,
 const uint64_t *table, uint32_t table_size,
 uint32_t cluster_size);
+bool bdrv_load_check_dirty_bitmap(BlockDriverState *file, const char *name);
 
 #endif
-- 
1.8.3.1

[Qemu-devel] [PATCH 10/22] qcow2-dirty-bitmap: add qcow2_bitmap_store()

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

This function stores block dirty bitmap to qcow2. If the bitmap with
the same name, size and granularity already exists, it will be
rewritten, if the bitmap with the same name exists but granularity or
size does not match, an error will be genrated.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2-dirty-bitmap.c | 443 +
 block/qcow2.c  |   1 +
 block/qcow2.h  |   2 +
 include/block/block_int.h  |   3 +
 4 files changed, 449 insertions(+)

diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
index c9f7ef1..28ed309 100644
--- a/block/qcow2-dirty-bitmap.c
+++ b/block/qcow2-dirty-bitmap.c
@@ -76,6 +76,15 @@ static void bitmap_header_to_cpu(QCow2BitmapHeader *h)
 be32_to_cpus(>extra_data_size);
 }
 
+static void bitmap_header_to_be(QCow2BitmapHeader *h)
+{
+cpu_to_be64s(>bitmap_table_offset);
+cpu_to_be32s(>bitmap_table_size);
+cpu_to_be32s(>flags);
+cpu_to_be16s(>name_size);
+cpu_to_be32s(>extra_data_size);
+}
+
 static int calc_dir_entry_size(size_t name_size)
 {
 return align_offset(sizeof(QCow2BitmapHeader) + name_size, 8);
@@ -86,6 +95,17 @@ static int dir_entry_size(QCow2BitmapHeader *h)
 return calc_dir_entry_size(h->name_size);
 }
 
+static void directory_to_be(uint8_t *dir, size_t size)
+{
+uint8_t *end = dir + size;
+while (dir < end) {
+QCow2BitmapHeader *h = (QCow2BitmapHeader *)dir;
+dir += dir_entry_size(h);
+
+bitmap_header_to_be(h);
+}
+}
+
 static int check_constraints(QCow2BitmapHeader *h, int cluster_size,
  uint64_t disk_size)
 {
@@ -320,3 +340,426 @@ BdrvDirtyBitmap *qcow2_bitmap_load(BlockDriverState *bs, 
const char *name,
 
 return load_bitmap(bs, bm, errp);
 }
+
+static int update_header_sync(BlockDriverState *bs)
+{
+int ret;
+
+ret = qcow2_update_header(bs);
+if (ret < 0) {
+return ret;
+}
+
+ret = bdrv_flush(bs);
+if (ret < 0) {
+return ret;
+}
+
+return 0;
+}
+
+/* write bitmap directory from the state to new allocated clusters */
+static int64_t directory_write(BlockDriverState *bs, const uint8_t *dir,
+   size_t size)
+{
+int ret = 0;
+uint8_t *dir_be = NULL;
+int64_t dir_offset = 0;
+
+dir_be = g_try_malloc(size);
+if (dir_be == NULL) {
+return -ENOMEM;
+}
+memcpy(dir_be, dir, size);
+directory_to_be(dir_be, size);
+
+/* Allocate space for the new bitmap directory */
+dir_offset = qcow2_alloc_clusters(bs, size);
+if (dir_offset < 0) {
+ret = dir_offset;
+goto out;
+}
+
+/* The bitmap directory position has not yet been updated, so these
+ * clusters must indeed be completely free */
+ret = qcow2_pre_write_overlap_check(bs, 0, dir_offset, size);
+if (ret < 0) {
+goto out;
+}
+
+ret = bdrv_pwrite(bs->file->bs, dir_offset, dir_be, size);
+if (ret < 0) {
+goto out;
+}
+
+out:
+g_free(dir_be);
+
+if (ret < 0) {
+if (dir_offset > 0) {
+qcow2_free_clusters(bs, dir_offset, size, QCOW2_DISCARD_ALWAYS);
+}
+
+return ret;
+}
+
+return dir_offset;
+}
+
+static int directory_push_entry(BlockDriverState *bs, QCow2BitmapHeader 
*header)
+{
+BDRVQcow2State *s = bs->opaque;
+int ret;
+int entry_size = dir_entry_size(header);
+int64_t new_offset = 0, old_offset = 0;
+uint64_t new_size = s->bitmap_directory_size + entry_size, old_size = 0;
+void *p;
+int64_t nb_sectors = bdrv_nb_sectors(bs);
+
+if (nb_sectors < 0) {
+return nb_sectors;
+}
+
+if (new_size > QCOW_MAX_DIRTY_BITMAP_DIRECTORY_SIZE) {
+return -EINVAL;
+}
+
+ret = check_constraints(header, s->cluster_size,
+nb_sectors << BDRV_SECTOR_BITS);
+if (ret < 0) {
+return -EINVAL;
+}
+
+old_offset = s->bitmap_directory_offset;
+old_size = s->bitmap_directory_size;
+
+uint8_t *new_dir = g_try_malloc(new_size);
+if (new_dir == NULL) {
+return -ENOMEM;
+}
+memcpy(new_dir, s->bitmap_directory, s->bitmap_directory_size);
+memcpy(new_dir + s->bitmap_directory_size, header, entry_size);
+
+new_offset = directory_write(bs, new_dir, new_size);
+if (new_offset < 0) {
+ret = new_offset;
+goto fail;
+}
+
+ret = bdrv_flush(bs);
+if (ret < 0) {
+goto fail;
+}
+
+s->bitmap_directory_offset = new_offset;
+s->bitmap_directory_size = new_size;
+
+ret = update_header_sync(bs);
+if (ret < 0) {
+goto fail;
+}
+
+if (old_size) {
+qcow2_free_clusters(bs, old_offset, old_size, QCOW2_DISCARD_ALWAYS);
+}
+
+g_free(s->bitmap_directory);
+s->bitmap_directory = new_dir;
+
+return 0;
+
+fail:
+g_free(new_dir);
+if (new_offset > 0) {
+

[Qemu-devel] [PATCH 20/22] iotests: test internal persistent dirty bitmap

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

Add simple test cases for testing persistent dirty bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/160| 112 ++
 tests/qemu-iotests/160.out|  21 
 tests/qemu-iotests/group  |   1 +
 tests/qemu-iotests/iotests.py |   6 +++
 4 files changed, 140 insertions(+)
 create mode 100755 tests/qemu-iotests/160
 create mode 100644 tests/qemu-iotests/160.out

diff --git a/tests/qemu-iotests/160 b/tests/qemu-iotests/160
new file mode 100755
index 000..f9843da
--- /dev/null
+++ b/tests/qemu-iotests/160
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+#
+# Tests for persistent dirty bitmaps.
+#
+# Copyright: Vladimir Sementsov-Ogievskiy 2015
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import iotests
+import time
+from iotests import qemu_img
+
+disk = os.path.join(iotests.test_dir, 'disk')
+
+size   = 0x4000 # 1G
+sector_size = 512
+granularity = 0x1
+regions1 = [
+{ 'start': 0,  'count': 0x10 },
+{ 'start': 0x20,   'count': 0x10 }
+]
+regions2 = [
+{ 'start': 0x1000, 'count': 0x2  },
+{ 'start': 0x3999, 'count': 0x1  }
+]
+
+class TestPersistentDirtyBitmap(iotests.QMPTestCase):
+
+def setUp(self):
+qemu_img('create', '-f', iotests.imgfmt, disk, str(size))
+
+def mkVm(self, create_bitmap):
+vm = iotests.VM().add_drive(disk)
+vm.add_dirty_bitmap('1230---1230-0123', 'drive0', 
create_bitmap)
+return vm
+
+def tearDown(self):
+os.remove(disk)
+
+def getMd5(self):
+result = self.vm.qmp('query-block');
+return result['return'][0]['dirty-bitmaps'][0]['md5']
+
+def checkBitmap(self, md5):
+result = self.vm.qmp('query-block');
+self.assert_qmp(result, 'return[0]/dirty-bitmaps[0]/md5', md5);
+
+def writeRegions(self, regions):
+for r in regions:
+  self.vm.hmp_qemu_io('drive0',
+'write %d %d' % (r['start'], r['count']))
+
+def test_persistent(self):
+self.vm = self.mkVm(True)
+self.vm.launch()
+
+self.writeRegions(regions1)
+md5 = self.getMd5()
+
+self.vm.shutdown()
+self.vm = self.mkVm(False)
+self.vm.launch()
+
+self.checkBitmap(md5)
+self.writeRegions(regions2)
+md5 = self.getMd5()
+
+self.vm.shutdown()
+self.vm.launch()
+
+self.checkBitmap(md5)
+
+self.vm.shutdown()
+
+def test_not_exist(self):
+vm = self.mkVm(False)
+vm.test_launch()
+
+def test_already_exists(self):
+vm = self.mkVm(True)
+vm.test_launch()
+vm.test_launch()
+
+def test_in_use(self):
+vm = self.mkVm(True)
+vm.launch()
+vm.shutdown()
+
+vm1 = self.mkVm(False)
+vm1.launch()
+
+vm2 = self.mkVm(False)
+vm2.test_launch()
+
+vm1.shutdown()
+
+
+if __name__ == '__main__':
+iotests.main()
diff --git a/tests/qemu-iotests/160.out b/tests/qemu-iotests/160.out
new file mode 100644
index 000..653d21b
--- /dev/null
+++ b/tests/qemu-iotests/160.out
@@ -0,0 +1,21 @@
+
+--
+Ran 4 tests
+
+OK
+Tast launch successed!
+Test launch failed: 1
+--- qemu output ---
+qemu-system-*: -dirty-bitmap 
name=1230---1230-0123,node=drive0,create=on: bitmap 
'1230---1230-0123' already exists
+
+--- end qemu output ---
+Test launch failed: 1
+--- qemu output ---
+qemu-system-*: -dirty-bitmap 
name=1230---1230-0123,node=drive0,create=off: Bitmap 
'1230---1230-0123' is in use
+
+--- end qemu output ---
+Test launch failed: 1
+--- qemu output ---
+qemu-system-*: -dirty-bitmap 
name=1230---1230-0123,node=drive0,create=off: Could not 
find bitmap '1230---1230-0123' in the node 'drive0'
+
+--- end qemu output ---
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index faf0f21..641106a 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -150,3 +150,4 @@
 145 auto quick
 146 auto quick
 148 rw auto quick
+160 rw auto quick
diff --git a/tests/qemu-iotests/iotests.py

[Qemu-devel] [PATCH 14/22] block: add bdrv_load_dirty_bitmap()

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

The funcion loads dirty bitmap from file, using underlying driver
function.

Note: the function doesn't change BdrvDirtyBitmap.file field. This field
is only used by bdrv_store_dirty_bitmap() function and is ONLY written
by bdrv_dirty_bitmap_set_file() function.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/dirty-bitmap.c | 16 
 include/block/dirty-bitmap.h |  2 ++
 2 files changed, 18 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index c9e999f..87ee4d7 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -469,3 +469,19 @@ void 
bdrv_finalize_persistent_dirty_bitmaps(BlockDriverState *bs)
 }
 }
 }
+
+BdrvDirtyBitmap *bdrv_load_dirty_bitmap(BlockDriverState *bs, const char *name,
+Error **errp)
+{
+BlockDriver *drv = bs->drv;
+if (!drv) {
+return NULL;
+}
+if (drv->bdrv_dirty_bitmap_load) {
+return drv->bdrv_dirty_bitmap_load(bs, name, errp);
+}
+if (bs->file)  {
+return bdrv_load_dirty_bitmap(bs, name, errp);
+}
+return NULL;
+}
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 37b5f23..66ba3f8 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -59,5 +59,7 @@ void 
bdrv_dirty_bitmap_set_internal_persistance(BdrvDirtyBitmap *bitmap,
 void bdrv_store_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
  Error **errp);
 void bdrv_finalize_persistent_dirty_bitmaps(BlockDriverState *bs);
+BdrvDirtyBitmap *bdrv_load_dirty_bitmap(BlockDriverState *bs, const char *name,
+Error **errp);
 
 #endif
-- 
1.8.3.1

[Qemu-devel] [PATCH 11/22] qcow2: add dirty bitmaps extension

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

Add dirty bitmap extension as specified in docs/specs/qcow2.txt.

Load bitmap headers on open. Handle close and update_header.

Handle resize: for now, just block resize if there are dirty bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2.c | 89 +++
 1 file changed, 89 insertions(+)

diff --git a/block/qcow2.c b/block/qcow2.c
index 20d095b..bda3026 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -62,6 +62,7 @@ typedef struct {
 #define  QCOW2_EXT_MAGIC_END 0
 #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
 #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
+#define  QCOW2_EXT_MAGIC_DIRTY_BITMAPS 0x23852875
 
 static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
@@ -91,6 +92,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, 
uint64_t start_offset,
 QCowExtension ext;
 uint64_t offset;
 int ret;
+Qcow2BitmapHeaderExt bitmaps_ext;
 
 #ifdef DEBUG_EXT
 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, 
end_offset);
@@ -161,6 +163,67 @@ static int qcow2_read_extensions(BlockDriverState *bs, 
uint64_t start_offset,
 }
 break;
 
+case QCOW2_EXT_MAGIC_DIRTY_BITMAPS:
+ret = bdrv_pread(bs->file->bs, offset, _ext, ext.len);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "ERROR: bitmaps_ext: "
+ "Could not read ext header");
+return ret;
+}
+
+if (bitmaps_ext.reserved32 != 0) {
+error_setg_errno(errp, -ret, "ERROR: bitmaps_ext: "
+ "Reserved field is not zero.");
+return -EINVAL;
+}
+
+be32_to_cpus(_ext.nb_bitmaps);
+be64_to_cpus(_ext.bitmap_directory_size);
+be64_to_cpus(_ext.bitmap_directory_offset);
+
+if (bitmaps_ext.nb_bitmaps > QCOW_MAX_DIRTY_BITMAPS) {
+error_setg(errp, "ERROR: bitmaps_ext: "
+ "too many dirty bitmaps");
+return -EINVAL;
+}
+
+if (bitmaps_ext.nb_bitmaps == 0) {
+error_setg(errp, "ERROR: bitmaps_ext: "
+ "found bitmaps extension with zero bitmaps");
+return -EINVAL;
+}
+
+if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) {
+error_setg(errp, "ERROR: bitmaps_ext: "
+ "wrong dirty bitmap directory offset");
+return -EINVAL;
+}
+
+if (bitmaps_ext.bitmap_directory_size >
+QCOW_MAX_DIRTY_BITMAP_DIRECTORY_SIZE) {
+error_setg(errp, "ERROR: bitmaps_ext: "
+ "too large dirty bitmap directory");
+return -EINVAL;
+}
+
+s->nb_bitmaps = bitmaps_ext.nb_bitmaps;
+s->bitmap_directory_offset =
+bitmaps_ext.bitmap_directory_offset;
+s->bitmap_directory_size =
+bitmaps_ext.bitmap_directory_size;
+
+ret = qcow2_read_bitmaps(bs, errp);
+if (ret < 0) {
+return ret;
+}
+
+#ifdef DEBUG_EXT
+printf("Qcow2: Got dirty bitmaps extension:"
+   " offset=%" PRIu64 " nb_bitmaps=%" PRIu32 "\n",
+   s->bitmaps_offset, s->nb_bitmaps);
+#endif
+break;
+
 default:
 /* unknown magic - save it in case we need to rewrite the header */
 {
@@ -1178,6 +1241,7 @@ static int qcow2_open(BlockDriverState *bs, QDict 
*options, int flags,
 g_free(s->unknown_header_fields);
 cleanup_unknown_header_ext(bs);
 qcow2_free_snapshots(bs);
+qcow2_free_bitmaps(bs);
 qcow2_refcount_close(bs);
 qemu_vfree(s->l1_table);
 /* else pre-write overlap checks in cache_destroy may crash */
@@ -1742,6 +1806,7 @@ static void qcow2_close(BlockDriverState *bs)
 qemu_vfree(s->cluster_data);
 qcow2_refcount_close(bs);
 qcow2_free_snapshots(bs);
+qcow2_free_bitmaps(bs);
 }
 
 static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
@@ -1939,6 +2004,24 @@ int qcow2_update_header(BlockDriverState *bs)
 buflen -= ret;
 }
 
+if (s->nb_bitmaps > 0) {
+Qcow2BitmapHeaderExt bitmaps_header = {
+.nb_bitmaps = cpu_to_be32(s->nb_bitmaps),
+.bitmap_directory_size =
+cpu_to_be64(s->bitmap_directory_size),
+.bitmap_directory_offset =
+cpu_to_be64(s->bitmap_directory_offset)
+};
+ret = header_ext_add(buf, QCOW2_EXT_MAGIC_DIRTY_BITMAPS,
+ _header, sizeof(bitmaps_header),
+ buflen);
+if (ret < 0) {
+goto fail;
+}

[Qemu-devel] [PATCH 15/22] qcow2-dirty-bitmap: add autoclear bit

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

Add autoclear bit for handling rewriting image by old qemu version.

If autoclear bit is not set, but bitmaps extension is found it
would not be loaded and warning will be generated.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2-dirty-bitmap.c |  4 
 block/qcow2.c  | 12 ++--
 block/qcow2.h  |  9 +
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
index 24415df..384ccea 100644
--- a/block/qcow2-dirty-bitmap.c
+++ b/block/qcow2-dirty-bitmap.c
@@ -419,6 +419,7 @@ static int directory_push_entry(BlockDriverState *bs, 
QCow2BitmapHeader *header)
 int64_t new_offset = 0, old_offset = 0;
 uint64_t new_size = s->bitmap_directory_size + entry_size, old_size = 0;
 void *p;
+uint64_t old_autocl;
 int64_t nb_sectors = bdrv_nb_sectors(bs);
 
 if (nb_sectors < 0) {
@@ -437,6 +438,7 @@ static int directory_push_entry(BlockDriverState *bs, 
QCow2BitmapHeader *header)
 
 old_offset = s->bitmap_directory_offset;
 old_size = s->bitmap_directory_size;
+old_autocl = s->autoclear_features;
 
 uint8_t *new_dir = g_try_malloc(new_size);
 if (new_dir == NULL) {
@@ -458,6 +460,7 @@ static int directory_push_entry(BlockDriverState *bs, 
QCow2BitmapHeader *header)
 
 s->bitmap_directory_offset = new_offset;
 s->bitmap_directory_size = new_size;
+s->autoclear_features |= QCOW2_AUTOCLEAR_DIRTY_BITMAPS;
 
 ret = update_header_sync(bs);
 if (ret < 0) {
@@ -479,6 +482,7 @@ fail:
 qcow2_free_clusters(bs, new_offset, new_size, QCOW2_DISCARD_ALWAYS);
 s->bitmap_directory_offset = old_offset;
 s->bitmap_directory_size = old_size;
+s->autoclear_features = old_autocl;
 }
 
 p = g_try_realloc(s->bitmap_directory, s->bitmap_directory_size);
diff --git a/block/qcow2.c b/block/qcow2.c
index 7a342c2..f269bab 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -164,6 +164,13 @@ static int qcow2_read_extensions(BlockDriverState *bs, 
uint64_t start_offset,
 break;
 
 case QCOW2_EXT_MAGIC_DIRTY_BITMAPS:
+if (!(s->autoclear_features & QCOW2_AUTOCLEAR_DIRTY_BITMAPS)) {
+fprintf(stderr,
+"WARNING: bitmaps_ext: autoclear flag is not "
+"set, all bitmaps will be considered as inconsistent");
+break;
+}
+
 ret = bdrv_pread(bs->file->bs, offset, _ext, ext.len);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "ERROR: bitmaps_ext: "
@@ -1205,8 +1212,9 @@ static int qcow2_open(BlockDriverState *bs, QDict 
*options, int flags,
 }
 
 /* Clear unknown autoclear feature bits */
-if (!bs->read_only && !(flags & BDRV_O_INACTIVE) && s->autoclear_features) 
{
-s->autoclear_features = 0;
+if (!bs->read_only && !(flags & BDRV_O_INACTIVE) &&
+(s->autoclear_features & ~QCOW2_AUTOCLEAR_MASK)) {
+s->autoclear_features &= QCOW2_AUTOCLEAR_MASK;
 ret = qcow2_update_header(bs);
 if (ret < 0) {
 error_setg_errno(errp, -ret, "Could not update qcow2 header");
diff --git a/block/qcow2.h b/block/qcow2.h
index 423c279..63ea543 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -220,6 +220,15 @@ enum {
 QCOW2_COMPAT_FEAT_MASK= QCOW2_COMPAT_LAZY_REFCOUNTS,
 };
 
+/* Autoclear feature bits */
+enum {
+QCOW2_AUTOCLEAR_DIRTY_BITMAPS_BITNR = 0,
+QCOW2_AUTOCLEAR_DIRTY_BITMAPS   =
+1 << QCOW2_AUTOCLEAR_DIRTY_BITMAPS_BITNR,
+
+QCOW2_AUTOCLEAR_MASK= QCOW2_AUTOCLEAR_DIRTY_BITMAPS,
+};
+
 enum qcow2_discard_type {
 QCOW2_DISCARD_NEVER = 0,
 QCOW2_DISCARD_ALWAYS,
-- 
1.8.3.1

[Qemu-devel] [PATCH 04/22] iotests: add default node-name

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

When testing migration, auto-generated by qemu node-names differs in
source and destination qemu and migration fails. After this patch,
auto-generated by iotest nodenames will be the same.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/iotests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 2445cf2..6807b07 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -156,6 +156,7 @@ class VM(object):
 options.append('file=%s' % path)
 options.append('format=%s' % imgfmt)
 options.append('cache=%s' % cachemode)
+options.append('node-name=drivenode%d' % self._num_drives)
 
 if opts:
 options.append(opts)
-- 
1.8.3.1

[Qemu-devel] [PATCH 09/22] qcow2-dirty-bitmap: add qcow2_bitmap_load()

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

This function loads block dirty bitmap from qcow2.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2-dirty-bitmap.c | 110 +
 block/qcow2.c  |   2 +
 block/qcow2.h  |   3 ++
 include/block/block_int.h  |   4 ++
 4 files changed, 119 insertions(+)

diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
index e57058c..c9f7ef1 100644
--- a/block/qcow2-dirty-bitmap.c
+++ b/block/qcow2-dirty-bitmap.c
@@ -107,6 +107,13 @@ static int check_constraints(QCow2BitmapHeader *h, int 
cluster_size,
 return fail ? -EINVAL : 0;
 }
 
+static QCow2BitmapHeader *bitmap_header(BDRVQcow2State *s,
+QCow2Bitmap *bitmap)
+{
+return (QCow2BitmapHeader *)
+   (s->bitmap_directory + bitmap->offset);
+}
+
 static int directory_read(BlockDriverState *bs, Error **errp)
 {
 int ret;
@@ -210,3 +217,106 @@ fail:
 
 return ret;
 }
+
+static QCow2Bitmap *find_bitmap_by_name(BlockDriverState *bs, const char *name)
+{
+BDRVQcow2State *s = bs->opaque;
+QCow2Bitmap *bm, *end = s->bitmaps + s->nb_bitmaps;
+
+for (bm = s->bitmaps; bm < end; ++bm) {
+if (strcmp(bm->name, name) == 0) {
+return bm;
+}
+}
+
+return NULL;
+}
+
+/* load_bitmap_data()
+ * load dirty bitmap from bitmap table
+ * Bitmap table entries are assumed to be in big endian format */
+static int load_bitmap_data(BlockDriverState *bs, const uint64_t *bitmap_table,
+uint32_t bitmap_table_size, BdrvDirtyBitmap 
*bitmap)
+{
+int ret = 0;
+BDRVQcow2State *s = bs->opaque;
+uint32_t i;
+uint64_t *tab = g_memdup(bitmap_table,
+ bitmap_table_size * sizeof(uint64_t));
+
+for (i = 0; i < bitmap_table_size; ++i) {
+be64_to_cpus(tab + i);
+}
+
+ret = bdrv_dirty_bitmap_load(bitmap, bs->file->bs, tab, bitmap_table_size,
+ s->cluster_size);
+
+g_free(tab);
+
+return ret;
+}
+
+static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs, QCow2Bitmap *bm,
+Error **errp)
+{
+BDRVQcow2State *s = bs->opaque;
+int ret;
+QCow2BitmapHeader *bmh;
+uint64_t *bitmap_table = NULL;
+uint32_t granularity;
+BdrvDirtyBitmap *bitmap = NULL;
+
+bmh = bitmap_header(s, bm);
+
+bitmap_table = g_try_malloc(bmh->bitmap_table_size * sizeof(uint64_t));
+if (bitmap_table == NULL) {
+error_setg_errno(errp, -ENOMEM,
+ "Could not allocate bitmap table");
+return NULL;
+}
+
+ret = bdrv_pread(bs->file->bs, bmh->bitmap_table_offset,
+ bitmap_table,
+ bmh->bitmap_table_size * sizeof(uint64_t));
+if (ret < 0) {
+error_setg_errno(errp, -ret,
+ "Could not read bitmap_table table from image");
+goto fail;
+}
+
+granularity = 1U << bmh->granularity_bits;
+bitmap = bdrv_create_dirty_bitmap(bs, granularity, bm->name, errp);
+if (bitmap == NULL) {
+goto fail;
+}
+
+ret = load_bitmap_data(bs, bitmap_table, bmh->bitmap_table_size, bitmap);
+if (ret < 0) {
+error_setg_errno(errp, -ret, "Could not read bitmap from image");
+goto fail;
+}
+
+g_free(bitmap_table);
+return bitmap;
+
+fail:
+g_free(bitmap_table);
+bdrv_release_dirty_bitmap(bs, bitmap);
+
+return NULL;
+}
+
+BdrvDirtyBitmap *qcow2_bitmap_load(BlockDriverState *bs, const char *name,
+   Error **errp)
+{
+QCow2Bitmap *bm;
+
+bm = find_bitmap_by_name(bs, name);
+if (bm == NULL) {
+error_setg(errp, "Could not find bitmap '%s' in the node '%s'", name,
+   bdrv_get_device_or_node_name(bs));
+return NULL;
+}
+
+return load_bitmap(bs, bm, errp);
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 1ce6264..5f54528 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -3352,6 +3352,8 @@ BlockDriver bdrv_qcow2 = {
 .bdrv_get_info  = qcow2_get_info,
 .bdrv_get_specific_info = qcow2_get_specific_info,
 
+.bdrv_dirty_bitmap_load = qcow2_bitmap_load,
+
 .bdrv_save_vmstate= qcow2_save_vmstate,
 .bdrv_load_vmstate= qcow2_load_vmstate,
 
diff --git a/block/qcow2.h b/block/qcow2.h
index 48fb2a5..cc4c776 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -620,6 +620,9 @@ int qcow2_read_snapshots(BlockDriverState *bs);
 void qcow2_free_bitmaps(BlockDriverState *bs);
 int qcow2_read_bitmaps(BlockDriverState *bs, Error **errp);
 
+BdrvDirtyBitmap *qcow2_bitmap_load(BlockDriverState *bs, const char *name,
+   Error **errp);
+
 /* qcow2-cache.c functions */
 Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables);
 int qcow2_cache_destroy(BlockDriverState* bs, Qcow2Cache *c);
diff --git a/include/block/block_int.h

[Qemu-devel] [PATCH 07/22] qcow2: Bitmaps extension: structs and consts

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

Add data structures and constraints accordingly to docs/specs/qcow2.txt

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/Makefile.objs|  2 +-
 block/qcow2-dirty-bitmap.c | 47 ++
 block/qcow2.h  | 34 +
 3 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 block/qcow2-dirty-bitmap.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index cdd8655..c4dcf7c 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -1,5 +1,5 @@
 block-obj-y += raw_bsd.o qcow.o vdi.o vmdk.o cloop.o bochs.o vpc.o vvfat.o
-block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
+block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o qcow2-dirty-bitmap.o
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
new file mode 100644
index 000..2c749ab
--- /dev/null
+++ b/block/qcow2-dirty-bitmap.c
@@ -0,0 +1,47 @@
+/*
+ * Bitmaps for the QCOW version 2 format
+ *
+ * Copyright (c) 2014-2015 Vladimir Sementsov-Ogievskiy
+ *
+ * This file is derived from qcow2-snapshot.c, original copyright:
+ * Copyright (c) 2004-2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* NOTICE: BME here means Bitmaps Extension and used as a namespace for
+ * _internal_ constants. Please do not use this _internal_ abbreviation for
+ * other needs and/or outside of this file. */
+
+/* Bitmap directory entry constraints */
+#define BME_MAX_TABLE_SIZE 0x800
+#define BME_MAX_PHYS_SIZE 0x2000 /* 512 mb */
+#define BME_MAX_GRANULARITY_BITS 31
+#define BME_MIN_GRANULARITY_BITS 9
+#define BME_MAX_NAME_SIZE 1023
+
+/* Bitmap directory entry flags */
+#define BME_RESERVED_FLAGS 0x
+
+/* bits [1, 8] U [56, 63] are reserved */
+#define BME_TABLE_ENTRY_RESERVED_MASK 0xff0001fe
+
+typedef enum BitmapType {
+BT_DIRTY_TRACKING_BITMAP = 1
+} BitmapType;
diff --git a/block/qcow2.h b/block/qcow2.h
index a063a3c..3f7429e 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -52,6 +52,10 @@
  * space for snapshot names and IDs */
 #define QCOW_MAX_SNAPSHOTS_SIZE (1024 * QCOW_MAX_SNAPSHOTS)
 
+/* Bitmap header extension constraints */
+#define QCOW_MAX_DIRTY_BITMAPS 65535
+#define QCOW_MAX_DIRTY_BITMAP_DIRECTORY_SIZE (1024 * QCOW_MAX_DIRTY_BITMAPS)
+
 /* indicate that the refcount of the referenced cluster is exactly one. */
 #define QCOW_OFLAG_COPIED (1ULL << 63)
 /* indicate that the cluster is compressed (they never have the copied flag) */
@@ -142,6 +146,22 @@ typedef struct QEMU_PACKED QCowSnapshotHeader {
 /* name follows  */
 } QCowSnapshotHeader;
 
+/* QCow2BitmapHeader is actually a bitmap directory entry */
+typedef struct QEMU_PACKED QCow2BitmapHeader {
+/* header is 8 byte aligned */
+uint64_t bitmap_table_offset;
+
+uint32_t bitmap_table_size;
+uint32_t flags;
+
+uint8_t type;
+uint8_t granularity_bits;
+uint16_t name_size;
+uint32_t extra_data_size;
+/* extra data follows  */
+/* name follows  */
+} QCow2BitmapHeader;
+
 typedef struct QEMU_PACKED QCowSnapshotExtraData {
 uint64_t vm_state_size_large;
 uint64_t disk_size;
@@ -160,6 +180,11 @@ typedef struct QCowSnapshot {
 uint64_t vm_clock_nsec;
 } QCowSnapshot;
 
+typedef struct QCow2Bitmap {
+uint64_t offset;
+char *name;
+} QCow2Bitmap;
+
 struct Qcow2Cache;
 typedef struct Qcow2Cache Qcow2Cache;
 
@@ -222,6 +247,15 @@ typedef uint64_t Qcow2GetRefcountFunc(const void 
*refcount_array,
 typedef void Qcow2SetRefcountFunc(void *refcount_array,
   uint64_t index, uint64_t value);
 
+/* Be careful, Qcow2BitmapHeaderExt is not an extension of

[Qemu-devel] [PATCH 13/22] block: store persistent dirty bitmaps

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

Persistent dirty bitmaps are the bitmaps, for which the new field
BdrvDirtyBitmap.file is not NULL. We save all persistent dirty bitmaps
owned by BlockDriverState in corresponding bdrv_close().
BdrvDirtyBitmap.file is a BlockDriverState, where we want to save the
bitmap. It may be set in bdrv_dirty_bitmap_set_file() only once.
bdrv_ref/bdrv_unref are used for BdrvDirtyBitmap.file to be sure that
files will be closed and resources will be freed.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block.c  |  2 ++
 block/dirty-bitmap.c | 35 +++
 include/block/dirty-bitmap.h |  5 +
 3 files changed, 42 insertions(+)

diff --git a/block.c b/block.c
index 59a18a3..b54875e 100644
--- a/block.c
+++ b/block.c
@@ -2144,6 +2144,8 @@ static void bdrv_close(BlockDriverState *bs)
 bdrv_flush(bs);
 bdrv_drain(bs); /* in case flush left pending I/O */
 
+/* save and release persistent dirty bitmaps */
+bdrv_finalize_persistent_dirty_bitmaps(bs);
 bdrv_release_named_dirty_bitmaps(bs);
 assert(QLIST_EMPTY(>dirty_bitmaps));
 
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 7a44722..c9e999f 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -42,6 +42,7 @@ struct BdrvDirtyBitmap {
 char *name; /* Optional non-empty unique ID */
 int64_t size;   /* Size of the bitmap (Number of sectors) */
 bool disabled;  /* Bitmap is read-only */
+bool internal_persistent;   /* bitmap must be saved to owner disk image */
 QLIST_ENTRY(BdrvDirtyBitmap) list;
 };
 
@@ -434,3 +435,37 @@ bool bdrv_load_check_dirty_bitmap(BlockDriverState *file, 
const char *name)
 }
 return false;
 }
+
+void bdrv_store_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ Error **errp)
+{
+if (bs == NULL || bs->drv == NULL ||
+bs->drv->bdrv_dirty_bitmap_store == NULL) {
+error_setg(errp, "Storing bitmap is unsupported for the format.");
+return;
+}
+
+bs->drv->bdrv_dirty_bitmap_store(bs, bitmap, errp);
+}
+
+void bdrv_dirty_bitmap_set_internal_persistance(BdrvDirtyBitmap *bitmap,
+bool persistent)
+{
+bitmap->internal_persistent = persistent;
+}
+
+void bdrv_finalize_persistent_dirty_bitmaps(BlockDriverState *bs)
+{
+BdrvDirtyBitmap *bm, *bm_next;
+
+QLIST_FOREACH_SAFE(bm, >dirty_bitmaps, list, bm_next) {
+if (bm->internal_persistent) {
+Error *local_err = NULL;
+bdrv_store_dirty_bitmap(bs, bm, _err);
+if (local_err) {
+error_report_err(local_err);
+}
+bdrv_release_dirty_bitmap(bs, bm);
+}
+}
+}
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index f3cedaa..37b5f23 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -54,5 +54,10 @@ int bdrv_dirty_bitmap_store(const BdrvDirtyBitmap *bitmap, 
BlockDriverState *bs,
 const uint64_t *table, uint32_t table_size,
 uint32_t cluster_size);
 bool bdrv_load_check_dirty_bitmap(BlockDriverState *file, const char *name);
+void bdrv_dirty_bitmap_set_internal_persistance(BdrvDirtyBitmap *bitmap,
+bool persistent);
+void bdrv_store_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+ Error **errp);
+void bdrv_finalize_persistent_dirty_bitmaps(BlockDriverState *bs);
 
 #endif
-- 
1.8.3.1

[Qemu-devel] [PATCH 01/22] block: Add two dirty bitmap getters

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

From: Fam Zheng 

For dirty bitmap users to get the size and the name of a
BdrvDirtyBitmap.

Signed-off-by: Fam Zheng 
Reviewed-by: John Snow 
Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/dirty-bitmap.c | 10 ++
 include/block/dirty-bitmap.h |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 556e1d1..45cfa3b 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -97,6 +97,16 @@ BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState 
*bs,
 return bitmap;
 }
 
+int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap)
+{
+return bitmap->size;
+}
+
+const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap)
+{
+return bitmap->name;
+}
+
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
 {
 return bitmap->successor;
diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 80afe60..4dc8750 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -29,6 +29,8 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState 
*bs);
 uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
+const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
+int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
 int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
int64_t sector);
-- 
1.8.3.1

[Qemu-devel] [PATCH 08/22] qcow2-dirty-bitmap: read dirty bitmap directory

2016-03-15 Thread Vladimir Sementsov-Ogievskiy

Adds qcow2_read_bitmaps, reading bitmap directory as
specified in docs/specs/qcow2.txt

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/qcow2-dirty-bitmap.c | 165 +
 block/qcow2.h  |  10 +++
 2 files changed, 175 insertions(+)

diff --git a/block/qcow2-dirty-bitmap.c b/block/qcow2-dirty-bitmap.c
index 2c749ab..e57058c 100644
--- a/block/qcow2-dirty-bitmap.c
+++ b/block/qcow2-dirty-bitmap.c
@@ -25,6 +25,11 @@
  * THE SOFTWARE.
  */
 
+#include "qemu/osdep.h"
+
+#include "block/block_int.h"
+#include "block/qcow2.h"
+
 /* NOTICE: BME here means Bitmaps Extension and used as a namespace for
  * _internal_ constants. Please do not use this _internal_ abbreviation for
  * other needs and/or outside of this file. */
@@ -45,3 +50,163 @@
 typedef enum BitmapType {
 BT_DIRTY_TRACKING_BITMAP = 1
 } BitmapType;
+
+void qcow2_free_bitmaps(BlockDriverState *bs)
+{
+BDRVQcow2State *s = bs->opaque;
+int i;
+
+for (i = 0; i < s->nb_bitmaps; i++) {
+g_free(s->bitmaps[i].name);
+}
+g_free(s->bitmaps);
+s->bitmaps = NULL;
+s->nb_bitmaps = 0;
+
+g_free(s->bitmap_directory);
+s->bitmap_directory = NULL;
+}
+
+static void bitmap_header_to_cpu(QCow2BitmapHeader *h)
+{
+be64_to_cpus(>bitmap_table_offset);
+be32_to_cpus(>bitmap_table_size);
+be32_to_cpus(>flags);
+be16_to_cpus(>name_size);
+be32_to_cpus(>extra_data_size);
+}
+
+static int calc_dir_entry_size(size_t name_size)
+{
+return align_offset(sizeof(QCow2BitmapHeader) + name_size, 8);
+}
+
+static int dir_entry_size(QCow2BitmapHeader *h)
+{
+return calc_dir_entry_size(h->name_size);
+}
+
+static int check_constraints(QCow2BitmapHeader *h, int cluster_size,
+ uint64_t disk_size)
+{
+uint64_t phys_bitmap_bytes =
+(uint64_t)h->bitmap_table_size * cluster_size;
+uint64_t max_virtual_bits = (phys_bitmap_bytes * 8) << h->granularity_bits;
+
+int fail =
+(h->bitmap_table_offset % cluster_size) ||
+(h->bitmap_table_size > BME_MAX_TABLE_SIZE) ||
+(phys_bitmap_bytes > BME_MAX_PHYS_SIZE) ||
+(disk_size > max_virtual_bits) ||
+(h->granularity_bits > BME_MAX_GRANULARITY_BITS) ||
+(h->granularity_bits < BME_MIN_GRANULARITY_BITS) ||
+(h->flags & BME_RESERVED_FLAGS) ||
+(h->name_size > BME_MAX_NAME_SIZE) ||
+(h->type != BT_DIRTY_TRACKING_BITMAP);
+
+return fail ? -EINVAL : 0;
+}
+
+static int directory_read(BlockDriverState *bs, Error **errp)
+{
+int ret;
+BDRVQcow2State *s = bs->opaque;
+QCow2Bitmap *bm, *end;
+int64_t nb_sectors = bdrv_nb_sectors(bs);
+size_t offset;
+
+if (nb_sectors < 0) {
+error_setg(errp, "Can't calculate number of disk sectors.");
+return nb_sectors;
+}
+
+if (s->bitmap_directory != NULL) {
+/* already read */
+error_setg(errp, "Try read bitmaps, when they are already read.");
+return -EEXIST;
+}
+
+s->bitmap_directory = g_try_malloc0(s->bitmap_directory_size);
+if (s->bitmap_directory == NULL) {
+error_setg(errp, "Can't allocate space for bitmap directory.");
+return -ENOMEM;
+}
+
+ret = bdrv_pread(bs->file->bs,
+ s->bitmap_directory_offset,
+ s->bitmap_directory,
+ s->bitmap_directory_size);
+if (ret < 0) {
+error_setg(errp, "Can't read bitmap directory.");
+goto fail;
+}
+
+offset = 0;
+end = s->bitmaps + s->nb_bitmaps;
+for (bm = s->bitmaps; bm < end; ++bm) {
+QCow2BitmapHeader *h =
+(QCow2BitmapHeader *)(s->bitmap_directory + offset);
+
+if (offset >= s->bitmap_directory_size) {
+error_setg(errp, "Broken bitmap directory.");
+goto fail;
+}
+
+bitmap_header_to_cpu(h);
+
+ret = check_constraints(h, s->cluster_size,
+nb_sectors << BDRV_SECTOR_BITS);
+if (ret < 0) {
+error_setg(errp, "Bitmap doesn't satisfy the constraints.");
+goto fail;
+}
+
+bm->offset = offset;
+bm->name = g_strndup((char *)(h + 1), h->name_size);
+
+offset += dir_entry_size(h);
+}
+return 0;
+
+fail:
+g_free(s->bitmap_directory);
+s->bitmap_directory = NULL;
+
+return ret;
+}
+
+int qcow2_read_bitmaps(BlockDriverState *bs, Error **errp)
+{
+int ret;
+BDRVQcow2State *s = bs->opaque;
+
+if (s->bitmap_directory != NULL || s->bitmaps != NULL) {
+/* already read */
+error_setg(errp, "Try read bitmaps, when they are already read.");
+return -EEXIST;
+}
+
+if (s->nb_bitmaps == 0) {
+/* No bitmaps - nothing to do */
+return 0;
+}
+
+s->bitmaps = g_try_new0(QCow2Bitmap, s->nb_bitmaps);
+if (s->bitmaps == NULL) {
+

1 2 3 4 5 >

1 - 100 of 481 matches

Mail list logo