[Qemu-devel] [PULL 0/3] input patch queue

2017-05-03 Thread Gerd Hoffmann
  Hi,

Input patch queue, with a new tracepoint and two bugfixes (one of them
cve).

please pull,
  Gerd

The following changes since commit e619b14746e5d8c0e53061661fd0e1da01fd4d60:

  Merge remote-tracking branch 'sthibault/tags/samuel-thibault' into staging 
(2017-05-02 15:16:29 +0100)

are available in the git repository at:

  git://git.kraxel.org/qemu tags/pull-input-20170504-1

for you to fetch changes up to e0a633070f7f3eafcc9d0e95e7f1a4e6fe36:

  input: Add trace event for empty keyboard queue (2017-05-03 14:20:12 +0200)


input: limit kbd queue depth
input: don't queue delay if paused
input: Add trace event for empty keyboard queue


Alexander Graf (1):
  input: Add trace event for empty keyboard queue

Gerd Hoffmann (1):
  input: limit kbd queue depth

Marc-André Lureau (1):
  input: don't queue delay if paused

 hw/input/hid.c|  4 
 ui/input.c| 18 +++---
 hw/input/trace-events |  1 +
 3 files changed, 20 insertions(+), 3 deletions(-)



[Qemu-devel] [PULL 3/3] input: Add trace event for empty keyboard queue

2017-05-03 Thread Gerd Hoffmann
From: Alexander Graf 

When driving QEMU from the outside, we have basically no chance to
determine how quickly the guest OS picks up key events, so we usually
have to limit ourselves to very slow keyboard presses to make sure
the guest always has enough chance to pick them up.

This patch adds a trace events when the keyboarde queue is drained.
An external driver can use that as hint that new keys can be pressed.

Signed-off-by: Alexander Graf 
Message-id: 1490883775-94658-1-git-send-email-ag...@suse.de
Signed-off-by: Gerd Hoffmann 
---
 hw/input/hid.c| 4 
 hw/input/trace-events | 1 +
 2 files changed, 5 insertions(+)

diff --git a/hw/input/hid.c b/hw/input/hid.c
index fa9cc4c616..93887ecc43 100644
--- a/hw/input/hid.c
+++ b/hw/input/hid.c
@@ -256,6 +256,10 @@ static void hid_keyboard_process_keycode(HIDState *hs)
 slot = hs->head & QUEUE_MASK; QUEUE_INCR(hs->head); hs->n--;
 keycode = hs->kbd.keycodes[slot];
 
+if (!hs->n) {
+trace_hid_kbd_queue_empty();
+}
+
 key = keycode & 0x7f;
 index = key | ((hs->kbd.modifiers & (1 << 8)) >> 1);
 hid_code = hid_usage_keys[index];
diff --git a/hw/input/trace-events b/hw/input/trace-events
index f3bfbede5c..5a87818b49 100644
--- a/hw/input/trace-events
+++ b/hw/input/trace-events
@@ -24,6 +24,7 @@ milkymist_softusb_pulse_irq(void) "Pulse IRQ"
 
 # hw/input/hid.c
 hid_kbd_queue_full(void) "queue full"
+hid_kbd_queue_empty(void) "queue empty"
 
 # hw/input/virtio
 virtio_input_queue_full(void) "queue full"
-- 
2.9.3




[Qemu-devel] [PULL 2/3] input: don't queue delay if paused

2017-05-03 Thread Gerd Hoffmann
From: Marc-André Lureau 

qemu_input_event_send() discards key event when the guest is paused,
but not the delay.

The delay ends up in the input queue, and qemu_input_event_send_key()
will further fill the queue with upcoming events.

VNC uses qemu_input_event_send_key_delay(), not SPICE, which results
in a different input behaviour on pause: VNC will queue the events
(except the first that is discarded), SPICE will discard all events.

Don't queue delay if paused, and provide same behaviour on SPICE and
VNC clients on resume (and potentially avoid over-allocating the
buffer queue)

Fixes:
https://bugzilla.redhat.com/show_bug.cgi?id=1444326

Signed-off-by: Marc-André Lureau 
Message-id: 20170425130520.31819-1-marcandre.lur...@redhat.com
Signed-off-by: Gerd Hoffmann 
---
 ui/input.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/ui/input.c b/ui/input.c
index fb1f404095..830f912f99 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -411,6 +411,10 @@ void qemu_input_event_send_key_qcode(QemuConsole *src, 
QKeyCode q, bool down)
 
 void qemu_input_event_send_key_delay(uint32_t delay_ms)
 {
+if (!runstate_is_running() && !runstate_check(RUN_STATE_SUSPENDED)) {
+return;
+}
+
 if (!kbd_timer) {
 kbd_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, qemu_input_queue_process,
  _queue);
-- 
2.9.3




[Qemu-devel] [PULL 1/3] input: limit kbd queue depth

2017-05-03 Thread Gerd Hoffmann
Apply a limit to the number of items we accept into the keyboard queue.

Impact: Without this limit vnc clients can exhaust host memory by
sending keyboard events faster than qemu feeds them to the guest.

Fixes: CVE-2017-8379
Cc: P J P 
Cc: Huawei PSIRT 
Reported-by: jiangx...@huawei.com
Signed-off-by: Gerd Hoffmann 
Message-id: 20170428084237.23960-1-kra...@redhat.com
---
 ui/input.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/ui/input.c b/ui/input.c
index ed88cda6d6..fb1f404095 100644
--- a/ui/input.c
+++ b/ui/input.c
@@ -41,6 +41,8 @@ static QTAILQ_HEAD(QemuInputEventQueueHead, 
QemuInputEventQueue) kbd_queue =
 QTAILQ_HEAD_INITIALIZER(kbd_queue);
 static QEMUTimer *kbd_timer;
 static uint32_t kbd_default_delay_ms = 10;
+static uint32_t queue_count;
+static uint32_t queue_limit = 1024;
 
 QemuInputHandlerState *qemu_input_handler_register(DeviceState *dev,
QemuInputHandler *handler)
@@ -268,6 +270,7 @@ static void qemu_input_queue_process(void *opaque)
 break;
 }
 QTAILQ_REMOVE(queue, item, node);
+queue_count--;
 g_free(item);
 }
 }
@@ -282,6 +285,7 @@ static void qemu_input_queue_delay(struct 
QemuInputEventQueueHead *queue,
 item->delay_ms = delay_ms;
 item->timer = timer;
 QTAILQ_INSERT_TAIL(queue, item, node);
+queue_count++;
 
 if (start_timer) {
 timer_mod(item->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL)
@@ -298,6 +302,7 @@ static void qemu_input_queue_event(struct 
QemuInputEventQueueHead *queue,
 item->src = src;
 item->evt = evt;
 QTAILQ_INSERT_TAIL(queue, item, node);
+queue_count++;
 }
 
 static void qemu_input_queue_sync(struct QemuInputEventQueueHead *queue)
@@ -306,6 +311,7 @@ static void qemu_input_queue_sync(struct 
QemuInputEventQueueHead *queue)
 
 item->type = QEMU_INPUT_QUEUE_SYNC;
 QTAILQ_INSERT_TAIL(queue, item, node);
+queue_count++;
 }
 
 void qemu_input_event_send_impl(QemuConsole *src, InputEvent *evt)
@@ -381,7 +387,7 @@ void qemu_input_event_send_key(QemuConsole *src, KeyValue 
*key, bool down)
 qemu_input_event_send(src, evt);
 qemu_input_event_sync();
 qapi_free_InputEvent(evt);
-} else {
+} else if (queue_count < queue_limit) {
 qemu_input_queue_event(_queue, src, evt);
 qemu_input_queue_sync(_queue);
 }
@@ -409,8 +415,10 @@ void qemu_input_event_send_key_delay(uint32_t delay_ms)
 kbd_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, qemu_input_queue_process,
  _queue);
 }
-qemu_input_queue_delay(_queue, kbd_timer,
-   delay_ms ? delay_ms : kbd_default_delay_ms);
+if (queue_count < queue_limit) {
+qemu_input_queue_delay(_queue, kbd_timer,
+   delay_ms ? delay_ms : kbd_default_delay_ms);
+}
 }
 
 InputEvent *qemu_input_event_new_btn(InputButton btn, bool down)
-- 
2.9.3




Re: [Qemu-devel] [PATCH 1/5] hw/ppc: setting spapr_drc_detach_cb in spapr_dr_connector_new

2017-05-03 Thread David Gibson
On Sun, Apr 30, 2017 at 02:25:43PM -0300, Daniel Henrique Barboza wrote:
> The idea of moving the detach callback functions to the constructor
> of the dr_connector is to set them statically at init time, avoiding
> any post-load hooks to restore it (after a migration, for example).
> 
> Summary of changes:
> 
> - hw/ppc/spapr_drc.c and include/hw/ppc/spapr_drc.h:
> *  spapr_dr_connector_new() now has an additional parameter,
> spapr_drc_detach_cb *detach_cb
> *  'spapr_drc_detach_cb *detach_cb' parameter was removed of
> the detach function pointer in sPAPRDRConnectorClass
> 
> - hw/ppc/spapr_pci.c:
> * the callback 'spapr_phb_remove_pci_device_cb' is now passed
> as a parameter in 'spapr_dr_connector_new' instead of 'drck->detach()'
> 
> - hw/ppc/spapr.c:
> * 'spapr_create_lmb_dr_connectors' now passes the callback
> 'spapr_lmb_release' to 'spapr_dr_connector_new' instead of 'drck-detach()'
> * 'spapr_init_cpus' now passes the callback 'spapr_core_release'
> to 'spapr_dr_connector_new' instead of 'drck-detach()'
> * moved the callback functions up in the code so they can be referenced
> by 'spapr_create_lmb_dr_connectors' and 'spapr_init_cpus'
> 
> Signed-off-by: Daniel Henrique Barboza 

So, the patch looks correct, but the approach bothers me a bit.  The
DRC type and the detach callback are essentially redundant information
- the callback still gets stored in the instance, which doesn't make a
whole lot of sense.

Ideally we'd use QOM subtypes of the base DRC type and put the
callback in the drck, but I suspect that will raise some other
complications, so I'm ok with postponing that.

In the meantime, I think we'd be better of doing an explicit switch on
the DRC type when we want to call the detach function, rather than
storing a function pointer at all.  It's kind of ugly, but we already
have a bunch of nasty switches on the type, so I don't think it's
really any uglier than what we have.


> ---
>  hw/ppc/spapr.c | 71 
> +++---
>  hw/ppc/spapr_drc.c | 17 ++-
>  hw/ppc/spapr_pci.c |  5 ++--
>  include/hw/ppc/spapr_drc.h |  4 +--
>  4 files changed, 49 insertions(+), 48 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 80d12d0..bc11757 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1887,6 +1887,29 @@ static void spapr_drc_reset(void *opaque)
>  }
>  }
>  
> +typedef struct sPAPRDIMMState {
> +uint32_t nr_lmbs;
> +} sPAPRDIMMState;
> +
> +static void spapr_lmb_release(DeviceState *dev, void *opaque)
> +{
> +sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque;
> +HotplugHandler *hotplug_ctrl;
> +
> +if (--ds->nr_lmbs) {
> +return;
> +}
> +
> +g_free(ds);
> +
> +/*
> + * Now that all the LMBs have been removed by the guest, call the
> + * pc-dimm unplug handler to cleanup up the pc-dimm device.
> + */
> +hotplug_ctrl = qdev_get_hotplug_handler(dev);
> +hotplug_handler_unplug(hotplug_ctrl, dev, _abort);
> +}
> +
>  static void spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr)
>  {
>  MachineState *machine = MACHINE(spapr);
> @@ -1900,7 +1923,7 @@ static void 
> spapr_create_lmb_dr_connectors(sPAPRMachineState *spapr)
>  
>  addr = i * lmb_size + spapr->hotplug_memory.base;
>  drc = spapr_dr_connector_new(OBJECT(spapr), 
> SPAPR_DR_CONNECTOR_TYPE_LMB,
> - addr/lmb_size);
> + (addr / lmb_size), spapr_lmb_release);
>  qemu_register_reset(spapr_drc_reset, drc);
>  }
>  }
> @@ -1956,6 +1979,14 @@ static CPUArchId *spapr_find_cpu_slot(MachineState 
> *ms, uint32_t id, int *idx)
>  return >possible_cpus->cpus[index];
>  }
>  
> +static void spapr_core_release(DeviceState *dev, void *opaque)
> +{
> +HotplugHandler *hotplug_ctrl;
> +
> +hotplug_ctrl = qdev_get_hotplug_handler(dev);
> +hotplug_handler_unplug(hotplug_ctrl, dev, _abort);
> +}
> +
>  static void spapr_init_cpus(sPAPRMachineState *spapr)
>  {
>  MachineState *machine = MACHINE(spapr);
> @@ -1998,7 +2029,8 @@ static void spapr_init_cpus(sPAPRMachineState *spapr)
>  sPAPRDRConnector *drc =
>  spapr_dr_connector_new(OBJECT(spapr),
> SPAPR_DR_CONNECTOR_TYPE_CPU,
> -   (core_id / smp_threads) * smt);
> +   (core_id / smp_threads) * smt,
> +   spapr_core_release);
>  
>  qemu_register_reset(spapr_drc_reset, drc);
>  }
> @@ -2596,29 +2628,6 @@ out:
>  error_propagate(errp, local_err);
>  }
>  
> -typedef struct sPAPRDIMMState {
> -uint32_t nr_lmbs;
> -} sPAPRDIMMState;
> -
> -static void spapr_lmb_release(DeviceState *dev, void *opaque)
> -{
> -sPAPRDIMMState *ds = (sPAPRDIMMState *)opaque;
> -HotplugHandler 

[Qemu-devel] [PATCH v2] Fix the -accel parameter and the documentation for 'hax'

2017-05-03 Thread Thomas Huth
Since 'hax' is a possible accelerator nowadays, too, the '-accel'
option should support it and we should mention this accelerator
in the documentation, too.

Signed-off-by: Thomas Huth 
---
 v2:
 - Use qemu_opt_set() instead of qemu_opts_parse_noisily()
 - Improve the documentation of the -accel option

 qemu-options.hx | 18 +-
 vl.c| 23 +--
 2 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index 787b9c3..21f8ff2 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -31,7 +31,7 @@ DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
 "-machine [type=]name[,prop[=value][,...]]\n"
 "selects emulated machine ('-machine help' for list)\n"
 "property accel=accel1[:accel2[:...]] selects 
accelerator\n"
-"supported accelerators are kvm, xen, tcg (default: tcg)\n"
+"supported accelerators are kvm, xen, hax or tcg (default: 
tcg)\n"
 "kernel_irqchip=on|off|split controls accelerated irqchip 
support (default=off)\n"
 "vmport=on|off|auto controls emulation of vmport (default: 
auto)\n"
 "kvm_shadow_mem=size of KVM shadow MMU in bytes\n"
@@ -52,9 +52,9 @@ available machines. Supported machine properties are:
 @table @option
 @item accel=@var{accels1}[:@var{accels2}[:...]]
 This is used to enable an accelerator. Depending on the target architecture,
-kvm, xen, or tcg can be available. By default, tcg is used. If there is more
-than one accelerator specified, the next one is used if the previous one fails
-to initialize.
+kvm, xen, hax or tcg can be available. By default, tcg is used. If there is
+more than one accelerator specified, the next one is used if the previous one
+fails to initialize.
 @item kernel_irqchip=on|off
 Controls in-kernel irqchip support for the chosen accelerator when available.
 @item gfx_passthru=on|off
@@ -97,15 +97,15 @@ ETEXI
 
 DEF("accel", HAS_ARG, QEMU_OPTION_accel,
 "-accel [accel=]accelerator[,thread=single|multi]\n"
-"   select accelerator ('-accel help for list')\n"
-"   thread=single|multi (enable multi-threaded TCG)", 
QEMU_ARCH_ALL)
+"select accelerator (kvm, xen, hax or tcg; use 'help' for 
a list)\n"
+"thread=single|multi (enable multi-threaded TCG)", 
QEMU_ARCH_ALL)
 STEXI
 @item -accel @var{name}[,prop=@var{value}[,...]]
 @findex -accel
 This is used to enable an accelerator. Depending on the target architecture,
-kvm, xen, or tcg can be available. By default, tcg is used. If there is more
-than one accelerator specified, the next one is used if the previous one fails
-to initialize.
+kvm, xen, hax or tcg can be available. By default, tcg is used. If there is
+more than one accelerator specified, the next one is used if the previous one
+fails to initialize.
 @table @option
 @item thread=single|multi
 Controls number of TCG threads. When the TCG is multi-threaded there will be 
one
diff --git a/vl.c b/vl.c
index f46e070..0a1b931 100644
--- a/vl.c
+++ b/vl.c
@@ -3725,26 +3725,21 @@ int main(int argc, char **argv, char **envp)
 qdev_prop_register_global(_pit_lost_tick_policy);
 break;
 }
-case QEMU_OPTION_accel:
+case QEMU_OPTION_accel: {
+QemuOpts *accel_opts;
+
 accel_opts = qemu_opts_parse_noisily(qemu_find_opts("accel"),
  optarg, true);
 optarg = qemu_opt_get(accel_opts, "accel");
-
-olist = qemu_find_opts("machine");
-if (strcmp("kvm", optarg) == 0) {
-qemu_opts_parse_noisily(olist, "accel=kvm", false);
-} else if (strcmp("xen", optarg) == 0) {
-qemu_opts_parse_noisily(olist, "accel=xen", false);
-} else if (strcmp("tcg", optarg) == 0) {
-qemu_opts_parse_noisily(olist, "accel=tcg", false);
-} else {
-if (!is_help_option(optarg)) {
-error_printf("Unknown accelerator: %s", optarg);
-}
-error_printf("Supported accelerators: kvm, xen, tcg\n");
+if (!optarg || is_help_option(optarg)) {
+error_printf("Possible accelerators: kvm, xen, hax, 
tcg\n");
 exit(1);
 }
+accel_opts = qemu_opts_create(qemu_find_opts("machine"), NULL,
+  false, _abort);
+qemu_opt_set(accel_opts, "accel", optarg, _abort);
 break;
+}
 case QEMU_OPTION_usb:
 olist = qemu_find_opts("machine");
 qemu_opts_parse_noisily(olist, "usb=on", false);
-- 
1.8.3.1




Re: [Qemu-devel] [PATCH 0/7] KVM: MMU: fast write protect

2017-05-03 Thread Xiao Guangrong



On 05/03/2017 10:57 PM, Paolo Bonzini wrote:



On 03/05/2017 16:50, Xiao Guangrong wrote:

Furthermore, userspace has no knowledge about if PML is enable (it
can be required from sysfs, but it is a good way in QEMU), so it is
difficult for the usespace to know when to use write-protect-all.
Maybe we can make KVM_CAP_X86_WRITE_PROTECT_ALL_MEM return false if
PML is enabled?


Yes, that's a good idea.  Though it's a pity that, with PML, setting the
dirty bit will still do the massive walk of the rmap.  At least with
reset_dirty_pages it's done a little bit at a time.


Also, I wonder how the alternative write protection mechanism would
affect performance of the dirty page ring buffer patches.  You would do
the write protection of all memory at the end of
kvm_vm_ioctl_reset_dirty_pages.  You wouldn't even need a separate
ioctl, which is nice.  On the other hand, checkpoints would be more
frequent and most pages would be write-protected, so it would be more
expensive to rebuild the shadow page tables...


Yup, write-protect-all can improve reset_dirty_pages indeed, i will
apply your idea after reset_dirty_pages is merged.

However, we still prefer to have a separate ioctl for write-protect-all
which cooperates with KVM_GET_DIRTY_LOG to improve live migration that
should not always depend on checkpoint.


Ok, I plan to merge the dirty ring pages early in 4.13 development.


Great.

As there is no conflict between these two patchsets except dirty
ring pages takes benefit from write-protect-all, i think they
can be developed and iterated independently, right?

Or you prefer to merge dirty ring pages first then review the
new version of this patchset later?

Thanks!




[Qemu-devel] 答复: [PATCH v2 05/18] COLO: Handle shutdown command for VMin COLO state

2017-05-03 Thread wang.guang55
hi

The patch may cause qemu_system_reset_request call  qemu_notify_event() twice.

old:



void qemu_system_reset_request(void)

{

if (no_reboot) {

shutdown_requested = 1

} else {

reset_requested = 1

}

cpu_stop_current()

qemu_notify_event()

}


new:

qemu_system_reset_request ->qemu_system_shutdown_request->qemu_notify_event()


And qemu_system_reset_request  may call   replay_shutdown_request() ,old 
version dose not call replay_shutdown_request here.





















原始邮件



发件人: <zhang.zhanghaili...@huawei.com>
收件人: <qemu-devel@nongnu.org> <gilb...@redhat.com>
抄送人: <lizhij...@cn.fujitsu.com> <xiecl.f...@cn.fujitsu.com> 
<zhang.zhanghaili...@huawei.com> <zhangchen.f...@cn.fujitsu.com> 
<quint...@redhat.com> <pbonz...@redhat.com>
日 期 :2017年04月22日 16:55
主 题 :[Qemu-devel] [PATCH v2 05/18] COLO: Handle shutdown command for VMin COLO 
state





If VM is in COLO FT state, we need to do some extra works before
starting normal shutdown process.

Secondary VM will ignore the shutdown command if users issue it directly
to Secondary VM. COLO will capture shutdown command and after
shutdown request from user.

Cc: Paolo Bonzini <pbonz...@redhat.com>
Signed-off-by: zhanghailiang <zhang.zhanghaili...@huawei.com>
Signed-off-by: Li Zhijian <lizhij...@cn.fujitsu.com>
Reviewed-by: Dr. David Alan Gilbert <dgilb...@redhat.com>
---
 include/migration/colo.h |  1 +
 include/sysemu/sysemu.h  |  3 +++
 migration/colo.c | 46 +-
 qapi-schema.json |  4 +++-
 vl.c | 19 ---
 5 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/include/migration/colo.h b/include/migration/colo.h
index 2bbff9e..aadd040 100644
--- a/include/migration/colo.h
+++ b/include/migration/colo.h
@@ -37,4 +37,5 @@ COLOMode get_colo_mode(void)
 void colo_do_failover(MigrationState *s)
 
 void colo_checkpoint_notify(void *opaque)
+bool colo_handle_shutdown(void)
 #endif
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
index 16175f7..8054f53 100644
--- a/include/sysemu/sysemu.h
+++ b/include/sysemu/sysemu.h
@@ -49,6 +49,8 @@ typedef enum WakeupReason {
 QEMU_WAKEUP_REASON_OTHER,
 } WakeupReason
 
+extern int colo_shutdown_requested
+
 void qemu_system_reset_request(void)
 void qemu_system_suspend_request(void)
 void qemu_register_suspend_notifier(Notifier *notifier)
@@ -56,6 +58,7 @@ void qemu_system_wakeup_request(WakeupReason reason)
 void qemu_system_wakeup_enable(WakeupReason reason, bool enabled)
 void qemu_register_wakeup_notifier(Notifier *notifier)
 void qemu_system_shutdown_request(void)
+void qemu_system_shutdown_request_core(void)
 void qemu_system_powerdown_request(void)
 void qemu_register_powerdown_notifier(Notifier *notifier)
 void qemu_system_debug_request(void)
diff --git a/migration/colo.c b/migration/colo.c
index a3344ce..c4fc865 100644
--- a/migration/colo.c
+++ b/migration/colo.c
@@ -384,6 +384,21 @@ static int colo_do_checkpoint_transaction(MigrationState 
*s,
 goto out
 }
 
+if (colo_shutdown_requested) {
+colo_send_message(s->to_dst_file, COLO_MESSAGE_GUEST_SHUTDOWN,
+  _err)
+if (local_err) {
+error_free(local_err)
+/* Go on the shutdown process and throw the error message */
+error_report("Failed to send shutdown message to SVM")
+}
+qemu_fflush(s->to_dst_file)
+colo_shutdown_requested = 0
+qemu_system_shutdown_request_core()
+/* Fix me: Just let the colo thread exit ? */
+qemu_thread_exit(0)
+}
+
 ret = 0
 
 qemu_mutex_lock_iothread()
@@ -449,7 +464,9 @@ static void colo_process_checkpoint(MigrationState *s)
 goto out
 }
 
-qemu_sem_wait(>colo_checkpoint_sem)
+if (!colo_shutdown_requested) {
+qemu_sem_wait(>colo_checkpoint_sem)
+}
 
 ret = colo_do_checkpoint_transaction(s, bioc, fb)
 if (ret < 0) {
@@ -534,6 +551,16 @@ static void colo_wait_handle_message(QEMUFile *f, int 
*checkpoint_request,
 case COLO_MESSAGE_CHECKPOINT_REQUEST:
 *checkpoint_request = 1
 break
+case COLO_MESSAGE_GUEST_SHUTDOWN:
+qemu_mutex_lock_iothread()
+vm_stop_force_state(RUN_STATE_COLO)
+qemu_system_shutdown_request_core()
+qemu_mutex_unlock_iothread()
+/*
+ * The main thread will be exit and terminate the whole
+ * process, do need some cleanup ?
+ */
+qemu_thread_exit(0)
 default:
 *checkpoint_request = 0
 error_setg(errp, "Got unknown COLO message: %d", msg)
@@ -696,3 +723,20 @@ out:
 
 return NULL
 }
+
+bool colo_handle_shutdown(void)
+{
+/*
+ * If VM is in COLO-FT mode, we need do some significant work before
+ * respond to the shutdown request. Besides, Secondary VM will ignore
+ * the shutdown request from users.
+ */
+if 

Re: [Qemu-devel] [PATCH 1/5] mc146818rtc: update periodic timer only if it is needed

2017-05-03 Thread Xiao Guangrong



On 05/03/2017 11:42 PM, Paolo Bonzini wrote:



On 12/04/2017 11:51, guangrong.x...@gmail.com wrote:

From: Xiao Guangrong 

Currently, the timer is updated whenever RegA or RegB is written
even if the periodic timer related configuration is not changed

This patch optimizes it slightly to make the update happen only
if its period or enable-status is changed, also later patches are
depend on this optimization

Signed-off-by: Xiao Guangrong 
---
  hw/timer/mc146818rtc.c | 31 +--
  1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index 4165450..749e206 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -387,10 +387,25 @@ static void rtc_update_timer(void *opaque)
  check_update_timer(s);
  }
  
+static bool rtc_periodic_timer_updated_by_regA(RTCState *s, uint64_t data)

+{
+uint8_t orig = s->cmos_data[RTC_REG_A];
+
+return (orig & 0x0f) != (data & 0x0f);
+}
+
+static bool rtc_periodic_timer_updated_by_regB(RTCState *s, uint64_t data)
+{
+uint8_t orig = s->cmos_data[RTC_REG_B];
+
+return (orig & REG_B_PIE) != (data & REG_B_PIE);
+}
+
  static void cmos_ioport_write(void *opaque, hwaddr addr,
uint64_t data, unsigned size)
  {
  RTCState *s = opaque;
+bool update_periodic_timer;
  
  if ((addr & 1) == 0) {

  s->cmos_index = data & 0x7f;
@@ -423,6 +438,8 @@ static void cmos_ioport_write(void *opaque, hwaddr addr,
  }
  break;
  case RTC_REG_A:
+update_periodic_timer = rtc_periodic_timer_updated_by_regA(s, 
data);


I think you can change it to a...


  if ((data & 0x60) == 0x60) {
  if (rtc_running(s)) {
  rtc_update_time(s);
@@ -445,10 +462,16 @@ static void cmos_ioport_write(void *opaque, hwaddr addr,
  /* UIP bit is read only */


... inlined update_periodic_timer assignment here:

 update_periodic_timer = (s->cmos_data[RTC_REG_A] ^ data) & REG_A_PERIOD;


  s->cmos_data[RTC_REG_A] = (data & ~REG_A_UIP) |
  (s->cmos_data[RTC_REG_A] & REG_A_UIP);
-periodic_timer_update(s, qemu_clock_get_ns(rtc_clock));
+
+if (update_periodic_timer) {
+periodic_timer_update(s, qemu_clock_get_ns(rtc_clock));
+}
+
  check_update_timer(s);
  break;
  case RTC_REG_B:
+update_periodic_timer = rtc_periodic_timer_updated_by_regB(s, 
data);
+
  if (data & REG_B_SET) {
  /* update cmos to when the rtc was stopping */
  if (rtc_running(s)) {
@@ -475,7 +498,11 @@ static void cmos_ioport_write(void *opaque, hwaddr addr,
  qemu_irq_lower(s->irq);
  }


Same here:

 update_periodic_timer = (s->cmos_data[RTC_REG_A] ^ data) & REG_B_PIE;



Okay, will do, thanks!




Re: [Qemu-devel] [PATCH 4/5] mc146818rtc: move x86 specific code out of periodic_timer_update

2017-05-03 Thread Xiao Guangrong



On 05/03/2017 11:39 PM, Paolo Bonzini wrote:



On 12/04/2017 11:51, guangrong.x...@gmail.com wrote:

From: Xiao Guangrong 

Move the x86 specific code in periodic_timer_update() to a common place,
the actual logic is not changed

Signed-off-by: Xiao Guangrong 
---
  hw/timer/mc146818rtc.c | 112 +
  1 file changed, 66 insertions(+), 46 deletions(-)

diff --git a/hw/timer/mc146818rtc.c b/hw/timer/mc146818rtc.c
index 3bf559d..d7b7c56 100644
--- a/hw/timer/mc146818rtc.c
+++ b/hw/timer/mc146818rtc.c
@@ -144,6 +144,63 @@ static void rtc_coalesced_timer(void *opaque)
  
  rtc_coalesced_timer_update(s);

  }
+
+static int64_t
+arch_periodic_timer_update(RTCState *s, int period, int64_t lost_clock)
+{
+if (period != s->period) {
+int64_t scale_lost_clock;
+int current_irq_coalesced = s->irq_coalesced;
+
+s->irq_coalesced = (current_irq_coalesced * s->period) / period;
+
+/*
+ * calculate the lost clock after it is scaled which should be
+ * compensated in the next interrupt.
+ */
+scale_lost_clock = current_irq_coalesced * s->period -
+s->irq_coalesced * period;
+DPRINTF_C("cmos: coalesced irqs scaled from %d to %d, %ld clocks "
+  "are compensated.\n", current_irq_coalesced,
+  s->irq_coalesced, scale_lost_clock);
+lost_clock += scale_lost_clock;
+s->period = period;


This should be moved up to the caller.


It should not. As you pointed out below, all these code are only needed
for LOST_TICK_POLICY_SLEW that is x86 specific.

Or use "if (s->lost_tick_policy != LOST_TICK_POLICY_SLEW)" without
"#ifdef TARGET_I386" is acceptable as only x86 can make it SLEW,
Unnecessary branch checks will little slow down other architectures,
but i think it is acceptable, right? :)



Also, if s->lost_tick_policy is not SLEW, s->irq_coalesced on input is
zero.  So I *think* what you get is equivalent to

if (s->lost_tick_policy != LOST_TICK_POLICY_SLEW) {
return;
}

/* ... comment ... */
lost_interrupt = (s->irq_coalesced * s->period) / period;
lost_clock += (s->irq_coalesced * s->period) % period;
lost_interrupt += lost_clock / period;
lost_clock %= period;

s->irq_coalesced = load_interrupt;
rtc_coalesced_timer_update(s);

or equivalently:

lost_clock += s->irq_coalesced * s->period;

s->irq_coalesced = lost_clock / period;
lost_clock %= period;
rtc_coalesced_timer_update(s);



Exactly right, it is much better, will apply it.


I think you should probably merge these three patches and document the
resulting logic, because it's simpler than building it a patch at a time.


Okay, i will consider it carefully in the next version.

Thank you, Paolo!




[Qemu-devel] [PATCH v12 08/10] qcow2: Optimize write zero of unaligned tail cluster

2017-05-03 Thread Eric Blake
We've already improved discards to operate efficiently on the tail
of an unaligned qcow2 image; it's time to make a similar improvement
to write zeroes.  The special case is only valid at the tail
cluster of a file, where we must recognize that any sectors beyond
the image end would implicitly read as zero, and therefore should
not penalize our logic for widening a partial cluster into writing
the whole cluster as zero.

However, note that for now, the special case of end-of-file is only
recognized if there is no backing file, or if the backing file has
the same length; that's because when the backing file is shorter
than the active layer, we don't have code in place to recognize
that reads of a sector unallocated at the top and beyond the backing
end-of-file are implicitly zero.  It's not much of a real loss,
because most people don't use images that aren't cluster-aligned,
or where the active layer is a different size than the backing
layer (especially where the difference falls within a single cluster).

Update test 154 to cover the new scenarios, using two images of
intentionally differing length.

While at it, fix the test to gracefully skip when run as
./check -qcow2 -o compat=0.10 154
since the older format lacks zero clusters already required earlier
in the test.

Signed-off-by: Eric Blake 

---
v12: fix testsuite problems, document shortcoming of differing
v11: reserved for blkdebug half of v10
size of backing file

v10: rebase to better reporting of preallocated zero clusters
v9: new patch
---
 block/qcow2.c  |   7 ++
 tests/qemu-iotests/154 | 160 -
 tests/qemu-iotests/154.out | 129 
 3 files changed, 294 insertions(+), 2 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index dded5a0..3478bb6 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2451,6 +2451,10 @@ static bool is_zero_sectors(BlockDriverState *bs, 
int64_t start,
 BlockDriverState *file;
 int64_t res;

+if (start + count > bs->total_sectors) {
+count = bs->total_sectors - start;
+}
+
 if (!count) {
 return true;
 }
@@ -2469,6 +2473,9 @@ static coroutine_fn int 
qcow2_co_pwrite_zeroes(BlockDriverState *bs,
 uint32_t tail = (offset + count) % s->cluster_size;

 trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count);
+if (offset + count == bs->total_sectors * BDRV_SECTOR_SIZE) {
+tail = 0;
+}

 if (head || tail) {
 int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
diff --git a/tests/qemu-iotests/154 b/tests/qemu-iotests/154
index 7ca7219..687b8f3 100755
--- a/tests/qemu-iotests/154
+++ b/tests/qemu-iotests/154
@@ -2,7 +2,7 @@
 #
 # qcow2 specific bdrv_pwrite_zeroes tests with backing files (complements 034)
 #
-# Copyright (C) 2016 Red Hat, Inc.
+# Copyright (C) 2016-2017 Red Hat, Inc.
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -42,7 +42,10 @@ _supported_proto file
 _supported_os Linux

 CLUSTER_SIZE=4k
-size=128M
+size=$((128 * 1024 * 1024))
+
+# This test requires zero clusters, added in v3 images
+_unsupported_imgopts compat=0.10

 echo
 echo == backing file contains zeros ==
@@ -299,6 +302,159 @@ $QEMU_IO -c "read -P 0 75k 1k" "$TEST_IMG" | 
_filter_qemu_io

 $QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map

+echo
+echo == unaligned image tail cluster, no allocation needed ==
+
+# With no backing file, write to all or part of unallocated partial cluster
+# will mark the cluster as zero, but does not allocate.
+# Re-create the image each time to get back to unallocated clusters.
+
+# Write at the front: sector-wise, the request is: 128m... | 00 -- -- --
+_make_test_img $((size + 2048))
+$QEMU_IO -c "write -z $size 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write at the back: sector-wise, the request is: 128m... | -- -- -- 00
+_make_test_img $((size + 2048))
+$QEMU_IO -c "write -z $((size + 1536)) 512" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write at middle: sector-wise, the request is: 128m... | -- 00 00 --
+_make_test_img $((size + 2048))
+$QEMU_IO -c "write -z $((size + 512)) 1024" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Write entire cluster: sector-wise, the request is: 128m... | 00 00 00 00
+_make_test_img $((size + 2048))
+$QEMU_IO -c "write -z $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IO -c "alloc $size 2048" "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG" | _filter_qemu_img_map
+
+# Repeat with 

[Qemu-devel] [PATCH v12 10/10] qcow2: Discard/zero clusters by byte count

2017-05-03 Thread Eric Blake
Passing a byte offset, but sector count, when we ultimately
want to operate on cluster granularity, is madness.  Clean up
the external interfaces to take both offset and count as bytes,
while still keeping the assertion added previously that the
caller must align the values to a cluster.  Then rename things
to make sure backports don't get confused by changed units:
instead of qcow2_discard_clusters() and qcow2_zero_clusters(),
we now have qcow2_cluster_discard() and qcow2_cluster_zeroize().

The internal functions still operate on clusters at a time, and
return an int for number of cleared clusters; but on an image
with 2M clusters, a single L2 table holds 256k entries that each
represent a 2M cluster, totalling well over INT_MAX bytes if we
ever had a request for that many bytes at once.  All our callers
currently limit themselves to 32-bit bytes (and therefore fewer
clusters), but by making this function 64-bit clean, we have one
less place to clean up if we later improve the block layer to
support 64-bit bytes through all operations (with the block layer
auto-fragmenting on behalf of more-limited drivers), rather than
the current state where some interfaces are artificially limited
to INT_MAX at a time.

Signed-off-by: Eric Blake 
Reviewed-by: Max Reitz 

---
v12: minor tweaks suggested by Max
v11: reserved for blkdebug half of v10
v10: squash in fixup accounting for unaligned file end
v9: rebase to earlier changes, drop R-b
v7, v8: only earlier half of series submitted for 2.9
v6: rebase due to context
v5: s/count/byte/ to make the units obvious, and rework the math
to ensure no 32-bit integer overflow on large clusters
v4: improve function names, split assertion additions into earlier patch
[no v3 or v2]
v1: https://lists.gnu.org/archive/html/qemu-devel/2016-12/msg00339.html
---
 block/qcow2.h  |  9 +
 block/qcow2-cluster.c  | 42 ++
 block/qcow2-snapshot.c |  7 +++
 block/qcow2.c  | 22 +-
 4 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/block/qcow2.h b/block/qcow2.h
index 142f81b..e150e2a 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -551,10 +551,11 @@ uint64_t 
qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
  int compressed_size);

 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m);
-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-int nb_sectors, enum qcow2_discard_type type, bool full_discard);
-int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors,
-int flags);
+int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
+  uint64_t bytes, enum qcow2_discard_type type,
+  bool full_discard);
+int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
+  uint64_t bytes, int flags);

 int qcow2_expand_zero_clusters(BlockDriverState *bs,
BlockDriverAmendStatusCB *status_cb,
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 9aab6dd..a47ac9f 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1547,34 +1547,36 @@ static int discard_single_l2(BlockDriverState *bs, 
uint64_t offset,
 return nb_clusters;
 }

-int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-int nb_sectors, enum qcow2_discard_type type, bool full_discard)
+int qcow2_cluster_discard(BlockDriverState *bs, uint64_t offset,
+  uint64_t bytes, enum qcow2_discard_type type,
+  bool full_discard)
 {
 BDRVQcow2State *s = bs->opaque;
-uint64_t end_offset;
+uint64_t end_offset = offset + bytes;
 uint64_t nb_clusters;
+int64_t cleared;
 int ret;

-end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
-
 /* Caller must pass aligned values, except at image end */
 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
 assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
end_offset == bs->total_sectors << BDRV_SECTOR_BITS);

-nb_clusters = size_to_clusters(s, end_offset - offset);
+nb_clusters = size_to_clusters(s, bytes);

 s->cache_discards = true;

 /* Each L2 table is handled by its own loop iteration */
 while (nb_clusters > 0) {
-ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
-if (ret < 0) {
+cleared = discard_single_l2(bs, offset, nb_clusters, type,
+full_discard);
+if (cleared < 0) {
+ret = cleared;
 goto fail;
 }

-nb_clusters -= ret;
-offset += (ret * s->cluster_size);
+nb_clusters -= cleared;
+offset += (cleared * s->cluster_size);
 }

 ret = 0;
@@ -1639,16 +1641,15 @@ static int zero_single_l2(BlockDriverState 

[Qemu-devel] [PATCH v12 07/10] iotests: Add test 179 to cover write zeroes with unmap

2017-05-03 Thread Eric Blake
No tests were covering write zeroes with unmap.  Additionally,
I needed to prove that my previous patches for correct status
reporting and write zeroes optimizations actually had an impact.

The test works for cluster_size between 8k and 2M (for smaller
sizes, it fails because our allocation patterns are not contiguous
with small clusters - in part, the largest consecutive allocation
we tend to get is often bounded by the size covered by one L2
table).

Note that testing for zero clusters is tricky: 'qemu-io map'
reports whether data comes from the current layer of the image
(useful for sniffing out which regions of the file have
QCOW_OFLAG_ZERO) - but doesn't show which clusters have mappings;
while 'qemu-img map' sees "zero":true for both unallocated and
zero clusters for any qcow2 with no backing layer (so less useful
at detecting true zero clusters), but reliably shows mappings.
So we have to rely on both queries side-by-side at each point of
the test.

Signed-off-by: Eric Blake 

---
v12: probe the map in more places, to make test easier to follow
v11: reserved for blkdebug half of v10
v10: drop any changes to v2 files, rewrite test to work with updates
earlier in the series, add a blkdebug probe
v9: new patch
---
 tests/qemu-iotests/179 | 132 +
 tests/qemu-iotests/179.out | 160 +
 tests/qemu-iotests/group   |   1 +
 3 files changed, 293 insertions(+)
 create mode 100755 tests/qemu-iotests/179
 create mode 100644 tests/qemu-iotests/179.out

diff --git a/tests/qemu-iotests/179 b/tests/qemu-iotests/179
new file mode 100755
index 000..124666f
--- /dev/null
+++ b/tests/qemu-iotests/179
@@ -0,0 +1,132 @@
+#!/bin/bash
+#
+# Test case for write zeroes with unmap
+#
+# Copyright (C) 2017 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+# creator
+owner=ebl...@redhat.com
+
+seq="$(basename $0)"
+echo "QA output created by $seq"
+
+here="$PWD"
+status=1   # failure is the default!
+
+_cleanup()
+{
+   _cleanup_test_img
+}
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common.rc
+. ./common.filter
+
+_supported_fmt qcow2
+_supported_proto file
+_supported_os Linux
+
+# v2 images can't mark clusters as zero
+_unsupported_imgopts compat=0.10
+
+echo
+echo '=== Testing write zeroes with unmap ==='
+echo
+
+TEST_IMG="$TEST_IMG.base" _make_test_img 64M
+_make_test_img -b "$TEST_IMG.base"
+
+# Offsets chosen at or near 2M boundaries so test works at any cluster size
+
+# Aligned writes to unallocated cluster should not allocate mapping, but must
+# mark cluster as zero, whether or not unmap was requested
+$QEMU_IO -c "write -z -u 2M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z 6M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map
+
+# Unaligned writes need not allocate mapping if the cluster already reads
+# as zero, but must mark cluster as zero, whether or not unmap was requested
+$QEMU_IO -c "write -z -u 10485761 2097150" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z 14680065 2097150" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map
+
+# Requesting unmap of normal data must deallocate; omitting unmap should
+# preserve the mapping
+$QEMU_IO -c "write 18M 14M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z -u 20M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z 24M 6M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map
+
+# Likewise when writing on already-mapped zero data
+$QEMU_IO -c "write -z -u 26M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z 28M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "map" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IMG map --output=json "$TEST_IMG.base" | _filter_qemu_img_map
+
+# Writing on unmapped zeroes does not allocate
+$QEMU_IO -c "write -z 32M 8M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z -u 34M 2M" "$TEST_IMG.base" | _filter_qemu_io
+$QEMU_IO -c "write -z 36M 2M" "$TEST_IMG.base" | _filter_qemu_io

[Qemu-devel] [PATCH v12 09/10] qcow2: Assert that cluster operations are aligned

2017-05-03 Thread Eric Blake
We already audited (in commit 0c1bd469) that qcow2_discard_clusters()
is only passed cluster-aligned start values; but we can further
tighten the assertion that the only unaligned end value is at EOF.

Recent commits have taken advantage of an unaligned tail cluster,
for both discard and write zeroes.

Signed-off-by: Eric Blake 
Reviewed-by: Max Reitz 

---
v12: no change
v11: reserved for blkdebug half of v10
v10: rebase to context
v9: rebase to master, by asserting that only tail cluster is unaligned
v7, v8: only earlier half of series submitted for 2.9
v6: avoid assertion on non-cluster-aligned image, use s->cluster_sectors
to avoid a shift, drop R-b
v5: no change
v4: new patch
---
 block/qcow2-cluster.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 78fbe34..9aab6dd 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1557,11 +1557,10 @@ int qcow2_discard_clusters(BlockDriverState *bs, 
uint64_t offset,

 end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);

-/* The caller must cluster-align start; round end down except at EOF */
+/* Caller must pass aligned values, except at image end */
 assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
-if (end_offset != bs->total_sectors * BDRV_SECTOR_SIZE) {
-end_offset = start_of_cluster(s, end_offset);
-}
+assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
+   end_offset == bs->total_sectors << BDRV_SECTOR_BITS);

 nb_clusters = size_to_clusters(s, end_offset - offset);

@@ -1644,9 +1643,17 @@ int qcow2_zero_clusters(BlockDriverState *bs, uint64_t 
offset, int nb_sectors,
 int flags)
 {
 BDRVQcow2State *s = bs->opaque;
+uint64_t end_offset;
 uint64_t nb_clusters;
 int ret;

+end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
+
+/* Caller must pass aligned values, except at image end */
+assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
+assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
+   end_offset == bs->total_sectors << BDRV_SECTOR_BITS);
+
 /* The zero flag is only supported by version 3 and newer */
 if (s->qcow_version < 3) {
 return -ENOTSUP;
-- 
2.9.3




[Qemu-devel] [PATCH v12 05/10] qcow2: Optimize zero_single_l2() to minimize L2 churn

2017-05-03 Thread Eric Blake
Similar to discard_single_l2(), we should try to avoid dirtying
the L2 cache when the cluster we are changing already has the
right characteristics.

Note that by the time we get to zero_single_l2(), BDRV_REQ_MAY_UNMAP
is a requirement to unallocate a cluster (this is because the block
layer clears that flag if discard.* flags during open requested that
we never punch holes - see the conversation around commit 170f4b2e,
https://lists.gnu.org/archive/html/qemu-devel/2016-09/msg07306.html).
Therefore, this patch can only reuse a zero cluster as-is if either
unmapping is not requested, or if the zero cluster was not associated
with an allocation.

Technically, there are some cases where an unallocated cluster
already reads as all zeroes (namely, when there is no backing file
[easy: check bs->backing], or when the backing file also reads as
zeroes [harder: we can't check bdrv_get_block_status since we are
already holding the lock]), where the guest would not immediately see
a difference if we left that cluster unallocated.  But if the user
did not request unmapping, leaving an unallocated cluster is wrong;
and even if the user DID request unmapping, keeping a cluster
unallocated risks a subtle semantic change of guest-visible contents
if a backing file is later added, and it is not worth auditing
whether all internal uses such as mirror properly avoid an unmap
request.  Thus, this patch is intentionally limited to just clusters
that are already marked as zero.

Signed-off-by: Eric Blake 

---
v12: store cluster type in temporary
v11: reserved for blkdebug half of v10
v10: new patch, replacing earlier attempt to use unallocated clusters,
and ditching any optimization of v2 files
---
 block/qcow2-cluster.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 14e2086..78fbe34 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1599,6 +1599,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t 
offset,
 int l2_index;
 int ret;
 int i;
+bool unmap = !!(flags & BDRV_REQ_MAY_UNMAP);

 ret = get_cluster_table(bs, offset, _table, _index);
 if (ret < 0) {
@@ -1611,12 +1612,22 @@ static int zero_single_l2(BlockDriverState *bs, 
uint64_t offset,

 for (i = 0; i < nb_clusters; i++) {
 uint64_t old_offset;
+int cluster_type;

 old_offset = be64_to_cpu(l2_table[l2_index + i]);

-/* Update L2 entries */
+/*
+ * Minimize L2 changes if the cluster already reads back as
+ * zeroes with correct allocation.
+ */
+cluster_type = qcow2_get_cluster_type(old_offset);
+if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN ||
+(cluster_type == QCOW2_CLUSTER_ZERO_ALLOC && !unmap)) {
+continue;
+}
+
 qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
-if (old_offset & QCOW_OFLAG_COMPRESSED || flags & BDRV_REQ_MAY_UNMAP) {
+if (cluster_type == QCOW2_CLUSTER_COMPRESSED || unmap) {
 l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
 qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
 } else {
-- 
2.9.3




[Qemu-devel] [PATCH v12 01/10] qcow2: Use consistent switch indentation

2017-05-03 Thread Eric Blake
Fix a couple of inconsistent indentations, before an upcoming
patch further tweaks the switch statements.  While at it, make
some tweaks for shorter lines to keep checkpatch happy (best
viewed with 'git diff -b').

Signed-off-by: Eric Blake 

---
v12: new patch
---
 block/qcow2-cluster.c  | 32 -
 block/qcow2-refcount.c | 96 ++
 2 files changed, 65 insertions(+), 63 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 31077d8..335a505 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1504,25 +1504,25 @@ static int discard_single_l2(BlockDriverState *bs, 
uint64_t offset,
  * but rather fall through to the backing file.
  */
 switch (qcow2_get_cluster_type(old_l2_entry)) {
-case QCOW2_CLUSTER_UNALLOCATED:
-if (full_discard || !bs->backing) {
-continue;
-}
-break;
+case QCOW2_CLUSTER_UNALLOCATED:
+if (full_discard || !bs->backing) {
+continue;
+}
+break;

-case QCOW2_CLUSTER_ZERO:
-/* Preallocated zero clusters should be discarded in any case 
*/
-if (!full_discard && (old_l2_entry & L2E_OFFSET_MASK) == 0) {
-continue;
-}
-break;
+case QCOW2_CLUSTER_ZERO:
+/* Preallocated zero clusters should be discarded in any case */
+if (!full_discard && (old_l2_entry & L2E_OFFSET_MASK) == 0) {
+continue;
+}
+break;

-case QCOW2_CLUSTER_NORMAL:
-case QCOW2_CLUSTER_COMPRESSED:
-break;
+case QCOW2_CLUSTER_NORMAL:
+case QCOW2_CLUSTER_COMPRESSED:
+break;

-default:
-abort();
+default:
+abort();
 }

 /* First remove L2 entries */
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 4efca7e..a5a0076 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -1117,70 +1117,72 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
 goto fail;
 }

-for(j = 0; j < s->l2_size; j++) {
+for (j = 0; j < s->l2_size; j++) {
 uint64_t cluster_index;
+uint64_t offset_masked;

 offset = be64_to_cpu(l2_table[j]);
 old_offset = offset;
+offset_masked = offset & L2E_OFFSET_MASK;
 offset &= ~QCOW_OFLAG_COPIED;

 switch (qcow2_get_cluster_type(offset)) {
-case QCOW2_CLUSTER_COMPRESSED:
-nb_csectors = ((offset >> s->csize_shift) &
-   s->csize_mask) + 1;
-if (addend != 0) {
-ret = update_refcount(bs,
-(offset & s->cluster_offset_mask) & ~511,
-nb_csectors * 512, abs(addend), addend < 0,
-QCOW2_DISCARD_SNAPSHOT);
-if (ret < 0) {
-goto fail;
-}
-}
-/* compressed clusters are never modified */
-refcount = 2;
-break;
-
-case QCOW2_CLUSTER_NORMAL:
-case QCOW2_CLUSTER_ZERO:
-if (offset_into_cluster(s, offset & L2E_OFFSET_MASK)) {
-qcow2_signal_corruption(bs, true, -1, -1, "Data "
-"cluster offset %#llx "
-"unaligned (L2 offset: %#"
-PRIx64 ", L2 index: %#x)",
-offset & L2E_OFFSET_MASK,
-l2_offset, j);
-ret = -EIO;
+case QCOW2_CLUSTER_COMPRESSED:
+nb_csectors = ((offset >> s->csize_shift) &
+   s->csize_mask) + 1;
+if (addend != 0) {
+ret = update_refcount(bs,
+(offset & s->cluster_offset_mask) & ~511,
+nb_csectors * 512, abs(addend), addend < 0,
+QCOW2_DISCARD_SNAPSHOT);
+if (ret < 0) {
 goto fail;
 }
+}
+/* compressed clusters are never modified */
+refcount = 2;
+break;

-cluster_index = (offset & L2E_OFFSET_MASK) >> 
s->cluster_bits;
-  

[Qemu-devel] [PATCH v12 06/10] iotests: Improve _filter_qemu_img_map

2017-05-03 Thread Eric Blake
Although _filter_qemu_img_map documents that it scrubs offsets, it
was only doing so for human mode.  Of the existing tests using the
filter (97, 122, 150, 154, 176), two of them are affected, but it
does not hurt the validity of the tests to not require particular
mappings (another test, 66, uses offsets but intentionally does not
pass through _filter_qemu_img_map, because it checks that offsets
are unchanged before and after an operation).

Another justification for this patch is that it will allow a future
patch to utilize 'qemu-img map --output=json' to check the status of
preallocated zero clusters without regards to the mapping (since
the qcow2 mapping can be very sensitive to the chosen cluster size,
when preallocation is not in use).

Signed-off-by: Eric Blake 

---
v12: new patch
---
 tests/qemu-iotests/common.filter |  4 +++-
 tests/qemu-iotests/122.out   | 16 
 tests/qemu-iotests/154.out   | 30 +++---
 3 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/tests/qemu-iotests/common.filter b/tests/qemu-iotests/common.filter
index f58548d..2c14f15 100644
--- a/tests/qemu-iotests/common.filter
+++ b/tests/qemu-iotests/common.filter
@@ -152,10 +152,12 @@ _filter_img_info()
 -e "/log_size: [0-9]\\+/d"
 }

-# filter out offsets and file names from qemu-img map
+# filter out offsets and file names from qemu-img map; good for both
+# human and json output
 _filter_qemu_img_map()
 {
 sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \
+   -e 's/"offset": [0-9]\+/"offset": OFFSET/g' \
 -e 's/Mapped to *//' | _filter_testdir | _filter_imgfmt
 }

diff --git a/tests/qemu-iotests/122.out b/tests/qemu-iotests/122.out
index 9317d80..47d8656 100644
--- a/tests/qemu-iotests/122.out
+++ b/tests/qemu-iotests/122.out
@@ -112,7 +112,7 @@ read 3145728/3145728 bytes at offset 0
 3 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 63963136/63963136 bytes at offset 3145728
 61 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, 
"offset": 327680}]
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET}]

 convert -c -S 0:
 read 3145728/3145728 bytes at offset 0
@@ -134,7 +134,7 @@ read 30408704/30408704 bytes at offset 3145728
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, 
"offset": 327680}]
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET}]

 convert -c -S 0 with source backing file:
 read 3145728/3145728 bytes at offset 0
@@ -152,7 +152,7 @@ read 30408704/30408704 bytes at offset 3145728
 29 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 read 33554432/33554432 bytes at offset 33554432
 32 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
-[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, 
"offset": 327680}]
+[{ "start": 0, "length": 67108864, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET}]

 convert -c -S 0 -B ...
 read 3145728/3145728 bytes at offset 0
@@ -176,11 +176,11 @@ wrote 1024/1024 bytes at offset 17408
 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)

 convert -S 4k
-[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, 
"offset": 8192},
+[{ "start": 0, "length": 1024, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET},
 { "start": 1024, "length": 7168, "depth": 0, "zero": true, "data": false},
-{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true, 
"offset": 9216},
+{ "start": 8192, "length": 1024, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET},
 { "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
-{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, 
"offset": 10240},
+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET},
 { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]

 convert -c -S 4k
@@ -192,9 +192,9 @@ convert -c -S 4k
 { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]

 convert -S 8k
-[{ "start": 0, "length": 9216, "depth": 0, "zero": false, "data": true, 
"offset": 8192},
+[{ "start": 0, "length": 9216, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET},
 { "start": 9216, "length": 8192, "depth": 0, "zero": true, "data": false},
-{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, 
"offset": 17408},
+{ "start": 17408, "length": 1024, "depth": 0, "zero": false, "data": true, 
"offset": OFFSET},
 { "start": 18432, "length": 67090432, "depth": 0, "zero": true, "data": false}]

 convert -c -S 8k
diff --git a/tests/qemu-iotests/154.out 

[Qemu-devel] [PATCH v12 02/10] block: Update comments on BDRV_BLOCK_* meanings

2017-05-03 Thread Eric Blake
We had some conflicting documentation: a nice 8-way table that
described all possible combinations of DATA, ZERO, and
OFFSET_VALID, contrasted with text that implied that OFFSET_VALID
always meant raw data could be read directly.  Furthermore, the
text refers a lot to bs->file, even though the interface was
updated back in 67a0fd2a to let the driver pass back which BDS (not
necessarily bs->file).  As the 8-way table is the intended
semantics, simplify the rest of the text to get rid of the
confusion.

ALLOCATED is always set by the block layer for convenience (drivers
do not have to worry about it). RAW is used only internally, but
by more than the raw driver.  Document these additional items on
the driver callback.

Suggested-by: Max Reitz 
Signed-off-by: Eric Blake 

---
v12: even more wording tweaks
v11: reserved for blkdebug half of v10
v10: new patch
---
 include/block/block.h | 35 +++
 include/block/block_int.h |  7 +++
 2 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 862eb56..c8bec7d 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -120,29 +120,32 @@ typedef struct HDGeometry {
 #define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)

 /*
- * Allocation status flags
- * BDRV_BLOCK_DATA: data is read from a file returned by bdrv_get_block_status.
- * BDRV_BLOCK_ZERO: sectors read as zero
- * BDRV_BLOCK_OFFSET_VALID: sector stored as raw data in a file returned by
- *  bdrv_get_block_status.
+ * Allocation status flags for bdrv_get_block_status() and friends.
+ *
+ * Public flags:
+ * BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
+ * BDRV_BLOCK_ZERO: offset reads as zero
+ * BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
  * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
- *   layer (as opposed to the backing file)
- * BDRV_BLOCK_RAW: used internally to indicate that the request
- * was answered by the raw driver and that one
- * should look in bs->file directly.
+ *   layer (short for DATA || ZERO), set by block layer
  *
- * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 represent the offset in
- * bs->file where sector data can be read from as raw data.
+ * Internal flag:
+ * BDRV_BLOCK_RAW: used internally to indicate that the request was
+ * answered by a passthrough driver such as raw and that the
+ * block layer should recompute the answer from bs->file.
  *
- * DATA == 0 && ZERO == 0 means that data is read from backing_hd if present.
+ * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK)
+ * represent the offset in the returned BDS that is allocated for the
+ * corresponding raw data; however, whether that offset actually contains
+ * data also depends on BDRV_BLOCK_DATA and BDRV_BLOCK_ZERO, as follows:
  *
  * DATA ZERO OFFSET_VALID
- *  ttt   sectors read as zero, bs->file is zero at offset
- *  tft   sectors read as valid from bs->file at offset
- *  ftt   sectors preallocated, read as zero, bs->file not
+ *  ttt   sectors read as zero, returned file is zero at offset
+ *  tft   sectors read as valid from file at offset
+ *  ftt   sectors preallocated, read as zero, returned file not
  *necessarily zero at offset
  *  fft   sectors preallocated but read from backing_hd,
- *bs->file contains garbage at offset
+ *returned file contains garbage at offset
  *  ttf   sectors preallocated, read as zero, unknown offset
  *  tff   sectors read from unknown file or offset
  *  ftf   not allocated or unknown offset, read as zero
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 8773940..1fdfff7 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -165,6 +165,13 @@ struct BlockDriver {
 int64_t offset, int count, BdrvRequestFlags flags);
 int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
 int64_t offset, int count);
+
+/*
+ * Building block for bdrv_block_status[_above]. The driver should
+ * answer only according to the current layer, and should not
+ * set BDRV_BLOCK_ALLOCATED, but may set BDRV_BLOCK_RAW.  See block.h
+ * for the meaning of _DATA, _ZERO, and _OFFSET_VALID.
+ */
 int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
 int64_t sector_num, int nb_sectors, int *pnum,
 BlockDriverState **file);
-- 
2.9.3




[Qemu-devel] [PATCH v12 00/10] qcow2 zero-cluster tweaks [was add blkdebug tests]

2017-05-03 Thread Eric Blake
I've collected several improvements for qcow2 zero-cluster handling.

Available as a tag at:
git fetch git://repo.or.cz/qemu/ericb.git nbd-blkdebug-v12

Marked as v12 for "hysterical raisins", since it it the half of
v10 [1] that was not resubmitted as v11 [2].

Depends on Max's block tree:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00641.html
and on Max's qcow2 cleanups:
https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg00689.html

[1] https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg05227.html
[2] https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg05896.html

Changes since last posting:
- lots of tweaks to resolve Max's review findings, including
rewriting my additions to test 154
- a new patch splitting QCOW2_CLUSTER_ZERO that fell out from
my review of Max's work
- defer any optimizations of a backing file with different length
until later (I will still post an RFC patch to explore what
optimizations a BDRV_BLOCK_EOF would allow, but didn't want to
hold up this series any further)

001/10:[down] 'qcow2: Use consistent switch indentation'
002/10:[0043] [FC] 'block: Update comments on BDRV_BLOCK_* meanings'
003/10:[0023] [FC] 'qcow2: Correctly report status of preallocated zero 
clusters'
004/10:[down] 'qcow2: Make distinction between zero cluster types obvious'
005/10:[0008] [FC] 'qcow2: Optimize zero_single_l2() to minimize L2 churn'
006/10:[down] 'iotests: Improve _filter_qemu_img_map'
007/10:[0107] [FC] 'iotests: Add test 179 to cover write zeroes with unmap'
008/10:[0260] [FC] 'qcow2: Optimize write zero of unaligned tail cluster'
009/10:[] [--] 'qcow2: Assert that cluster operations are aligned'
010/10:[0005] [FC] 'qcow2: Discard/zero clusters by byte count'

Eric Blake (10):
  qcow2: Use consistent switch indentation
  block: Update comments on BDRV_BLOCK_* meanings
  qcow2: Correctly report status of preallocated zero clusters
  qcow2: Make distinction between zero cluster types obvious
  qcow2: Optimize zero_single_l2() to minimize L2 churn
  iotests: Improve _filter_qemu_img_map
  iotests: Add test 179 to cover write zeroes with unmap
  qcow2: Optimize write zero of unaligned tail cluster
  qcow2: Assert that cluster operations are aligned
  qcow2: Discard/zero clusters by byte count

 block/qcow2.h|  17 +++--
 include/block/block.h|  35 +
 include/block/block_int.h|   7 ++
 block/qcow2-cluster.c| 156 ++
 block/qcow2-refcount.c   | 124 +++---
 block/qcow2-snapshot.c   |   7 +-
 block/qcow2.c|  38 ++
 tests/qemu-iotests/common.filter |   4 +-
 tests/qemu-iotests/122.out   |  16 ++--
 tests/qemu-iotests/154   | 160 ++-
 tests/qemu-iotests/154.out   | 159 ++
 tests/qemu-iotests/179   | 132 
 tests/qemu-iotests/179.out   | 160 +++
 tests/qemu-iotests/group |   1 +
 14 files changed, 818 insertions(+), 198 deletions(-)
 create mode 100755 tests/qemu-iotests/179
 create mode 100644 tests/qemu-iotests/179.out

-- 
2.9.3




[Qemu-devel] [PATCH v12 03/10] qcow2: Correctly report status of preallocated zero clusters

2017-05-03 Thread Eric Blake
We were throwing away the preallocation information associated with
zero clusters.  But we should be matching the well-defined semantics
in bdrv_get_block_status(), where (BDRV_BLOCK_ZERO |
BDRV_BLOCK_OFFSET_VALID) informs the user which offset is reserved,
while still reminding the user that reading from that offset is
likely to read garbage.

count_contiguous_clusters_by_type() is now used only for unallocated
cluster runs, hence it gets renamed and tightened.

Making this change lets us see which portions of an image are zero
but preallocated, when using qemu-img map --output=json.  The
--output=human side intentionally ignores all zero clusters, whether
or not they are preallocated.

The fact that there is no change to qemu-iotests './check -qcow2'
merely means that we aren't yet testing this aspect of qemu-img;
a later patch will add a test.

Signed-off-by: Eric Blake 

---
v12: rename helper function
v11: reserved for blkdebug half of v10
v10: new patch
---
 block/qcow2-cluster.c | 45 +++--
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 335a505..f3bfce6 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -334,16 +334,23 @@ static int count_contiguous_clusters(int nb_clusters, int 
cluster_size,
return i;
 }

-static int count_contiguous_clusters_by_type(int nb_clusters,
- uint64_t *l2_table,
- int wanted_type)
+/*
+ * Checks how many consecutive unallocated clusters in a given L2
+ * table have the same cluster type.
+ */
+static int count_contiguous_clusters_unallocated(int nb_clusters,
+ uint64_t *l2_table,
+ int wanted_type)
 {
 int i;

+assert(wanted_type == QCOW2_CLUSTER_ZERO ||
+   wanted_type == QCOW2_CLUSTER_UNALLOCATED);
 for (i = 0; i < nb_clusters; i++) {
-int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
+uint64_t entry = be64_to_cpu(l2_table[i]);
+int type = qcow2_get_cluster_type(entry);

-if (type != wanted_type) {
+if (type != wanted_type || entry & L2E_OFFSET_MASK) {
 break;
 }
 }
@@ -565,14 +572,32 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, 
uint64_t offset,
 ret = -EIO;
 goto fail;
 }
-c = count_contiguous_clusters_by_type(nb_clusters, _table[l2_index],
-  QCOW2_CLUSTER_ZERO);
-*cluster_offset = 0;
+/* Distinguish between pure zero clusters and pre-allocated ones */
+if (*cluster_offset & L2E_OFFSET_MASK) {
+c = count_contiguous_clusters(nb_clusters, s->cluster_size,
+  _table[l2_index], 
QCOW_OFLAG_ZERO);
+*cluster_offset &= L2E_OFFSET_MASK;
+if (offset_into_cluster(s, *cluster_offset)) {
+qcow2_signal_corruption(bs, true, -1, -1,
+"Preallocated zero cluster offset %#"
+PRIx64 " unaligned (L2 offset: %#"
+PRIx64 ", L2 index: %#x)",
+*cluster_offset, l2_offset, l2_index);
+ret = -EIO;
+goto fail;
+}
+} else {
+c = count_contiguous_clusters_unallocated(nb_clusters,
+  _table[l2_index],
+  QCOW2_CLUSTER_ZERO);
+*cluster_offset = 0;
+}
 break;
 case QCOW2_CLUSTER_UNALLOCATED:
 /* how many empty clusters ? */
-c = count_contiguous_clusters_by_type(nb_clusters, _table[l2_index],
-  QCOW2_CLUSTER_UNALLOCATED);
+c = count_contiguous_clusters_unallocated(nb_clusters,
+  _table[l2_index],
+  QCOW2_CLUSTER_UNALLOCATED);
 *cluster_offset = 0;
 break;
 case QCOW2_CLUSTER_NORMAL:
-- 
2.9.3




[Qemu-devel] [PATCH v12 04/10] qcow2: Make distinction between zero cluster types obvious

2017-05-03 Thread Eric Blake
Treat plain zero clusters differently from allocated ones, so that
we can simplify the logic of checking whether an offset is present.
Do this by splitting QCOW2_CLUSTER_ZERO into two new enums,
QCOW2_CLUSTER_ZERO_PLAIN and QCOW2_CLUSTER_ZERO_ALLOC.

I tried to arrange the enum so that we could use
'ret <= QCOW2_CLUSTER_ZERO_PLAIN' for all unallocated types, and
'ret >= QCOW2_CLUSTER_ZERO_ALLOC' for allocated types, although
I didn't actually end up taking advantage of the layout.

In many cases, this leads to simpler code, by properly combining
cases (sometimes, both zero types pair together, other times,
plain zero is more like unallocated while allocated zero is more
like normal).

Signed-off-by: Eric Blake 

---
v12: new patch
---
 block/qcow2.h  |  8 +--
 block/qcow2-cluster.c  | 65 ++
 block/qcow2-refcount.c | 40 +--
 block/qcow2.c  |  9 ---
 4 files changed, 51 insertions(+), 71 deletions(-)

diff --git a/block/qcow2.h b/block/qcow2.h
index 8731f24..142f81b 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -351,9 +351,10 @@ typedef struct QCowL2Meta

 enum {
 QCOW2_CLUSTER_UNALLOCATED,
+QCOW2_CLUSTER_ZERO_PLAIN,
+QCOW2_CLUSTER_ZERO_ALLOC,
 QCOW2_CLUSTER_NORMAL,
 QCOW2_CLUSTER_COMPRESSED,
-QCOW2_CLUSTER_ZERO
 };

 typedef enum QCow2MetadataOverlap {
@@ -448,7 +449,10 @@ static inline int qcow2_get_cluster_type(uint64_t l2_entry)
 if (l2_entry & QCOW_OFLAG_COMPRESSED) {
 return QCOW2_CLUSTER_COMPRESSED;
 } else if (l2_entry & QCOW_OFLAG_ZERO) {
-return QCOW2_CLUSTER_ZERO;
+if (l2_entry & L2E_OFFSET_MASK) {
+return QCOW2_CLUSTER_ZERO_ALLOC;
+}
+return QCOW2_CLUSTER_ZERO_PLAIN;
 } else if (!(l2_entry & L2E_OFFSET_MASK)) {
 return QCOW2_CLUSTER_UNALLOCATED;
 } else {
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index f3bfce6..14e2086 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -321,8 +321,7 @@ static int count_contiguous_clusters(int nb_clusters, int 
cluster_size,
 /* must be allocated */
 first_cluster_type = qcow2_get_cluster_type(first_entry);
 assert(first_cluster_type == QCOW2_CLUSTER_NORMAL ||
-   (first_cluster_type == QCOW2_CLUSTER_ZERO &&
-(first_entry & L2E_OFFSET_MASK) != 0));
+   first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);

 for (i = 0; i < nb_clusters; i++) {
 uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -344,7 +343,7 @@ static int count_contiguous_clusters_unallocated(int 
nb_clusters,
 {
 int i;

-assert(wanted_type == QCOW2_CLUSTER_ZERO ||
+assert(wanted_type == QCOW2_CLUSTER_ZERO_PLAIN ||
wanted_type == QCOW2_CLUSTER_UNALLOCATED);
 for (i = 0; i < nb_clusters; i++) {
 uint64_t entry = be64_to_cpu(l2_table[i]);
@@ -558,52 +557,32 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, 
uint64_t offset,
 assert(nb_clusters <= INT_MAX);

 ret = qcow2_get_cluster_type(*cluster_offset);
+if (s->qcow_version < 3 && (ret == QCOW2_CLUSTER_ZERO_PLAIN ||
+ret == QCOW2_CLUSTER_ZERO_ALLOC)) {
+qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
+" in pre-v3 image (L2 offset: %#" PRIx64
+", L2 index: %#x)", l2_offset, l2_index);
+ret = -EIO;
+goto fail;
+}
 switch (ret) {
 case QCOW2_CLUSTER_COMPRESSED:
 /* Compressed clusters can only be processed one by one */
 c = 1;
 *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
 break;
-case QCOW2_CLUSTER_ZERO:
-if (s->qcow_version < 3) {
-qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry 
found"
-" in pre-v3 image (L2 offset: %#" PRIx64
-", L2 index: %#x)", l2_offset, l2_index);
-ret = -EIO;
-goto fail;
-}
-/* Distinguish between pure zero clusters and pre-allocated ones */
-if (*cluster_offset & L2E_OFFSET_MASK) {
-c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-  _table[l2_index], 
QCOW_OFLAG_ZERO);
-*cluster_offset &= L2E_OFFSET_MASK;
-if (offset_into_cluster(s, *cluster_offset)) {
-qcow2_signal_corruption(bs, true, -1, -1,
-"Preallocated zero cluster offset %#"
-PRIx64 " unaligned (L2 offset: %#"
-PRIx64 ", L2 index: %#x)",
-*cluster_offset, l2_offset, l2_index);
-ret = -EIO;
-goto fail;
-}
-} else {
-c = 

Re: [Qemu-devel] [PATCH] tests: acpi: extend cphp and memhp testcase with numa distance check

2017-05-03 Thread He Chen
On Wed, May 03, 2017 at 04:33:04PM -0300, Eduardo Habkost wrote:
> On Wed, May 03, 2017 at 03:07:05PM +0200, Igor Mammedov wrote:
> > On Wed,  3 May 2017 17:17:16 +0800
> > He Chen  wrote:
> > 
> > > Signed-off-by: He Chen 
> > Reviewed-by: Igor Mammedov 
> 
> Thanks!
> 
> > 
> > Eduardo,
> > 
> > could you take it through your tree as well and
> > add as part of this patch SLIT tables blobs for
> > piix4/q35 that tests/acpi-test-data/rebuild-expected-aml.sh
> > will generate.
> 
> Queued to numa-next, updated SRAT and SLIT blobs. diffstat of commit fixup:
> 
>  tests/acpi-test-data/pc/SLIT.cphp   | Bin 0 -> 48 bytes
>  tests/acpi-test-data/pc/SLIT.memhp  | Bin 0 -> 48 bytes
>  tests/acpi-test-data/pc/SRAT.memhp  | Bin 224 -> 264 bytes
>  tests/acpi-test-data/q35/SLIT.cphp  | Bin 0 -> 48 bytes
>  tests/acpi-test-data/q35/SLIT.memhp | Bin 0 -> 48 bytes
>  tests/acpi-test-data/q35/SRAT.memhp | Bin 224 -> 264 bytes
>  6 files changed, 0 insertions(+), 0 deletions(-)
> 
> Are you able to explain why SRAT.memhp changed, but SRAT.cphp didn't?
> 

ACPI spec says SRAT table is an optional table that provides information
that allows OSPM to associate processors and memory ranges, including
ranges of memory provided by hot-added memory devices, with system
localities proximity domains and clock domains.

I think the reason why SRAT.cphp does not change is that in CPU hotplug
test, the NUMA topology is not changed although the NUMA distance
information is added (distance information is contained in SLIT table
rather than SRAT table).

While for memory hot plug test, we change the NUMA topology (NUMA node
from 1 to 2 nodes.), thus the processors and memory ranges is changed,
which results in SRAT table gets changed.
> > 
> > > ---
> > >  tests/bios-tables-test.c | 16 
> > >  1 file changed, 12 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
> > > index 88dbf97..c593165 100644
> > > --- a/tests/bios-tables-test.c
> > > +++ b/tests/bios-tables-test.c
> > > @@ -710,7 +710,8 @@ static void test_acpi_piix4_tcg_cphp(void)
> > >  data.machine = MACHINE_PC;
> > >  data.variant = ".cphp";
> > >  test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6"
> > > -  " -numa node -numa node",
> > > +  " -numa node -numa node"
> > > +  " -numa dist,src=0,dst=1,val=21",
> > >);
> > >  free_test_data();
> > >  }
> > > @@ -723,7 +724,8 @@ static void test_acpi_q35_tcg_cphp(void)
> > >  data.machine = MACHINE_Q35;
> > >  data.variant = ".cphp";
> > >  test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6"
> > > -  " -numa node -numa node",
> > > +  " -numa node -numa node"
> > > +  " -numa dist,src=0,dst=1,val=21",
> > >);
> > >  free_test_data();
> > >  }
> > > @@ -772,7 +774,10 @@ static void test_acpi_q35_tcg_memhp(void)
> > >  memset(, 0, sizeof(data));
> > >  data.machine = MACHINE_Q35;
> > >  data.variant = ".memhp";
> > > -test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", );
> > > +test_acpi_one(" -m 128,slots=3,maxmem=1G"
> > > +  " -numa node -numa node"
> > > +  " -numa dist,src=0,dst=1,val=21",
> > > +  );
> > >  free_test_data();
> > >  }
> > >  
> > > @@ -783,7 +788,10 @@ static void test_acpi_piix4_tcg_memhp(void)
> > >  memset(, 0, sizeof(data));
> > >  data.machine = MACHINE_PC;
> > >  data.variant = ".memhp";
> > > -test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", );
> > > +test_acpi_one(" -m 128,slots=3,maxmem=1G"
> > > +  " -numa node -numa node"
> > > +  " -numa dist,src=0,dst=1,val=21",
> > > +  );
> > >  free_test_data();
> > >  }
> > >  
> > 
> 
> -- 
> Eduardo
> 



Re: [Qemu-devel] [PATCH 3/5] mc146818rtc: properly count the time for the next interrupt

2017-05-03 Thread Xiao Guangrong



On 05/03/2017 11:32 PM, Paolo Bonzini wrote:

On 12/04/2017 11:51, guangrong.x...@gmail.com wrote:

+#ifdef TARGET_I386
+/*
+ * if more than period clocks were passed, i.e, the timer interrupt
+ * has been lost, we should catch up the time.
+ */
+if (s->lost_tick_policy == LOST_TICK_POLICY_SLEW &&
+(lost_clo / period)) {
+int lost_interrupt = lost_clock / period;
+
+s->irq_coalesced += lost_interrupt;
+lost_clock -= lost_interrupt * period;
+if (lost_interrupt) {
+DPRINTF_C("cmos: compensate %d interrupts, coalesced irqs "
+  "increased to %d\n", lost_interrupt,
+  s->irq_coalesced);
+rtc_coalesced_timer_update(s);
+}


I think you should merge these two patches, since both of them
essentially update the number of coalesced ticks and then split it
between s->irq_coalesced and lost_clock.


I thought these two patches fix two different issues, one for clock
lost for coalesced-irq and another for period reconfiguration. Your
suggestion sounds reasonable indeed, will merged them in the next
version. :)




Re: [Qemu-devel] [PATCH 1/3] colo-compare: serialize compare thread's initialization with main thread

2017-05-03 Thread Hailiang Zhang

Hi Jason,

On 2017/4/25 19:33, Jason Wang wrote:


On 2017年04月25日 17:59, Hailiang Zhang wrote:

On 2017/4/25 16:41, Jason Wang wrote:

On 2017年04月24日 14:03, Hailiang Zhang wrote:

On 2017/4/24 12:10, Jason Wang wrote:

On 2017年04月20日 15:46, zhanghailiang wrote:

We call qemu_chr_fe_set_handlers() in colo-compare thread, it is used
to detach watched fd from default main context, so it has chance to
handle the same watched fd with main thread concurrently, which will
trigger an error report:
"qemu-char.c:918: io_watch_poll_finalize: Assertion `iwp->src ==
((void *)0)' failed."

Anyway to prevent fd from being handled by main thread before creating
colo thread? Using semaphore seems not elegant.

So how about calling qemu_mutex_lock_iothread() before
qemu_chr_fe_set_handlers() ?

Looks better, but I needs more information e.g how main thread can
touch it?

Hmm, this happened quite occasionally, and we didn't catch the first
place (backtrace)
of removing fd from been watched, but  from the codes logic, we found
there should
be such possible cases:
tcp_chr_write (Or tcp_chr_read/tcp_chr_sync_read/chr_disconnect)
  ->tcp_chr_disconnect (Or char_socket_finalize)
 ->tcp_chr_free_connection
   -> remove_fd_in_watch(chr);

Anyway, it needs the protection from been freed twice.

Thanks,
Hailiang

Still a little bit confused. The question is how could main thread still
call tcp_chr_write or other in the above case?


Finally, we reproduced this bug (We use qemu 2.6), and got the follow backtrace 
of this problem:

(gdb) thread apply all bt

Thread 7 (Thread 0x7f407a1ff700 (LWP 23144)):
#0  0x7f41037e0db5 in _int_malloc () from /usr/lib64/libc.so.6
#1  0x7f41037e3b96 in calloc () from /usr/lib64/libc.so.6
#2  0x7f41041ad4d7 in g_malloc0 () from /usr/lib64/libglib-2.0.so.0
#3  0x7f41041a5437 in g_source_new () from /usr/lib64/libglib-2.0.so.0
#4  0x7f410a2cec9c in qio_channel_create_fd_watch 
(ioc=ioc@entry=0x7f410d6238c0, fd=20, condition=condition@entry=
(G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL)) at io/channel-watch.c:259
#5  0x7f410a2ced01 in qio_channel_create_socket_watch 
(ioc=ioc@entry=0x7f410d6238c0, socket=,
condition=condition@entry=(G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL)) at 
io/channel-watch.c:311
#6  0x7f410a2cbea7 in qio_channel_socket_create_watch (ioc=0x7f410d6238c0, 
condition=(G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL))
at io/channel-socket.c:732
#7  0x7f410a2c94d2 in qio_channel_create_watch (ioc=0x7f410d6238c0, 
condition=condition@entry=
(G_IO_IN | G_IO_ERR | G_IO_HUP | G_IO_NVAL)) at io/channel.c:132
#8  0x7f410a003cd6 in io_watch_poll_prepare (source=0x7f407d00, 
timeout_=) at qemu-char.c:883
#9  0x7f41041a72ed in g_main_context_prepare () from 
/usr/lib64/libglib-2.0.so.0
#10 0x7f41041a7b7b in g_main_context_iterate.isra.24 () from 
/usr/lib64/libglib-2.0.so.0
#11 0x7f41041a7fba in g_main_loop_run () from /usr/lib64/libglib-2.0.so.0
#12 0x7f410a1e528f in colo_compare_thread (opaque=0x7f410d7d6800) at 
net/colo-compare.c:651
#13 0x7f4103b2bdc5 in start_thread () from /usr/lib64/libpthread.so.0
#14 0x7f410385971d in clone () from /usr/lib64/libc.so.6

Thread 6 (Thread 0x7f40799fe700 (LWP 19368)):
#0  0x7f4103b2f6d5 in pthread_cond_wait@@GLIBC_2.3.2 () from 
/usr/lib64/libpthread.so.0
#1  0x7f410a3138d1 in qemu_cond_wait (cond=cond@entry=0x7f410cce44c0, 
mutex=mutex@entry=0x7f410cce44f0)
at util/qemu-thread-posix.c:132
---Type  to continue, or q  to quit---
#2  0x7f410a22b1a3 in vnc_worker_thread_loop 
(queue=queue@entry=0x7f410cce44c0) at ui/vnc-jobs.c:228
#3  0x7f410a22b810 in vnc_worker_thread (arg=0x7f410cce44c0) at 
ui/vnc-jobs.c:335
#4  0x7f4103b2bdc5 in start_thread () from /usr/lib64/libpthread.so.0
#5  0x7f410385971d in clone () from /usr/lib64/libc.so.6

Thread 5 (Thread 0x7f407abff700 (LWP 19366)):
#0  0x7f4103b2f6d5 in pthread_cond_wait@@GLIBC_2.3.2 () from 
/usr/lib64/libpthread.so.0
#1  0x7f410a3138d1 in qemu_cond_wait (cond=cond@entry=0x7f410a9fc368 
,
mutex=mutex@entry=0x7f410a9fc340 ) at 
util/qemu-thread-posix.c:132
#2  0x7f4109e99060 in mlock_wait () at 
/work/zhanghailiang/qemu-kvm/exec.c:392
#3  mlock_thread (opaque=) at 
/work/zhanghailiang/qemu-kvm/exec.c:407
#4  0x7f4103b2bdc5 in start_thread () from /usr/lib64/libpthread.so.0
#5  0x7f410385971d in clone () from /usr/lib64/libc.so.6

Thread 4 (Thread 0x7f40fcd83700 (LWP 19364)):
#0  0x7f4103b2f6d5 in pthread_cond_wait@@GLIBC_2.3.2 () from 
/usr/lib64/libpthread.so.0
#1  0x7f410a3138d1 in qemu_cond_wait (cond=, 
mutex=mutex@entry=0x7f410aa66ca0 )
at util/qemu-thread-posix.c:132
#2  0x7f4109ed5b3b in qemu_kvm_wait_io_event (cpu=0x7f410c2bda30) at 
/work/zhanghailiang/qemu-kvm/cpus.c:1087
#3  qemu_kvm_cpu_thread_fn (arg=0x7f410c2bda30) at 
/work/zhanghailiang/qemu-kvm/cpus.c:1126
#4  0x7f4103b2bdc5 in start_thread () from 

Re: [Qemu-devel] [PATCH 2/5] mc146818rtc: fix clock lost after scaling coalesced irq

2017-05-03 Thread Xiao Guangrong



On 05/03/2017 11:15 PM, Paolo Bonzini wrote:



On 12/04/2017 11:51, guangrong.x...@gmail.com wrote:

+int current_irq_coalesced = s->irq_coalesced;
+
+s->irq_coalesced = (current_irq_coalesced * s->period) / period;
+
+/*
+ * calculate the lost clock after it is scaled which should be
+ * compensated in the next interrupt.
+ */
+lost_clock += current_irq_coalesced * s->period -
+s->irq_coalesced * period;


This is:

lost_clock = current_irq_coalesced * s->period -
(current_irq_coalesced * s->period) / period * period;

i.e.

/* When switching from a shorter to a longer period, scale down the
 * missing ticks since we expect the OS handler to treat the delayed
 * ticks as longer.  Any leftovers are put back into next_irq_clock.
 *
 * When switching to a shorter period, scale up the missing ticks
 * since we expect the OS handler to treat the delayed ticks as
 * shorter.
 */
lost_clock = (s->irq_coalesced * s->period) % period;
s->irq_coalesced = (s->irq_coalesced * s->period) / period;

Is this correct?



Yes, it is correct, it looks smarter, will apply it in the next version.

Thanks!



Re: [Qemu-devel] [PATCH v10 09/17] qcow2: Optimize write zero of unaligned tail cluster

2017-05-03 Thread Eric Blake
On 04/28/2017 04:24 PM, Eric Blake wrote:

>>> +echo
>>> +echo == unaligned image tail cluster, no allocation needed ==
>>> +
>>> +CLUSTER_SIZE=1024 TEST_IMG="$TEST_IMG.base" _make_test_img $((size + 1024))
>>
>> Any reason for the CLUSTER_SIZE? It passes with 64 kB as well, and I
>> actually think that would be a better test because as it is, the image's
>> "unaligned" tail is exactly one cluster (so it isn't really unaligned).
> 
> Uhhh - copy-and-paste?  You're right, that 1024 is too small for what
> I'm actually doing with it.  :(

Actually, the whole test defaults to 4k clusters except when overridden,
but I did learn while hammering at things that we don't have a nice way
to tell that a backing file slightly shorter than the active file
behaves as though we read all zeros for the difference. I'm thinking of
proposing an RFC patch introducing BDRV_BLOCK_EOF, which gets set any
time bdrv_get_block_status() clamps the returns *pnum because it hit
EOF, to see what optimizations fall out elsewhere in the tree as a
result.  (It may not be worth it in the end, but we'll see).

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH 3/4] qcow2: Discard preallocated zero clusters

2017-05-03 Thread Eric Blake
On 05/03/2017 06:11 PM, Max Reitz wrote:
> In discard_single_l2(), we completely discard normal clusters instead of
> simply turning them into preallocated zero clusters. That means we
> should probably do the same with such preallocated zero clusters:
> Discard them instead of keeping them allocated.
> 
> Reported-by: Eric Blake 
> Signed-off-by: Max Reitz 
> ---
>  block/qcow2-cluster.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)

Reviewed-by: Eric Blake 

> 
> diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
> index fb91fd8979..31077d8102 100644
> --- a/block/qcow2-cluster.c
> +++ b/block/qcow2-cluster.c
> @@ -1511,7 +1511,8 @@ static int discard_single_l2(BlockDriverState *bs, 
> uint64_t offset,
>  break;
>  
>  case QCOW2_CLUSTER_ZERO:
> -if (!full_discard) {
> +/* Preallocated zero clusters should be discarded in any 
> case */
> +if (!full_discard && (old_l2_entry & L2E_OFFSET_MASK) == 0) {

Again, if we had a separate QCOW2_CLUSTER_* for plain vs. pre-allocated
zero, this might be easier to write.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH 4/4] iotests: Extend test 066

2017-05-03 Thread Eric Blake
On 05/03/2017 06:11 PM, Max Reitz wrote:
> 066 was supposed to be a test "for discarding preallocated zero
> clusters", but it did so incompletely: While it did check the image
> file's integrity after the operation, it did not confirm that the
> clusters are indeed freed. This patch adds this test.
> 
> In addition, new cases for writing to preallocated zero clusters are
> added.
> 
> Signed-off-by: Max Reitz 
> ---
>  tests/qemu-iotests/066 | 128 
> -
>  tests/qemu-iotests/066.out |  46 
>  2 files changed, 173 insertions(+), 1 deletion(-)
> 
> @@ -55,8 +55,134 @@ _make_test_img $IMG_SIZE
>  $QEMU_IO -c "write 0 256k" -c "write -z 0 256k" -c "write 64M 512" \
>-c "discard 0 $IMG_SIZE" -c "read -P 0 0 $IMG_SIZE" "$TEST_IMG" \
>   | _filter_qemu_io
> +
>  # Check the image (there shouldn't be any leaks)
>  _check_test_img
> +# Map the image (we want all clusters to be gone)
> +$QEMU_IMG map "$TEST_IMG"

The human-readable qemu-img map ignores allocated but reads-as-zeros
clusters.  Maybe it shouldn't, but only --output=json does the right
thing (actually, even then it doesn't, without my pending patch that I
am rebasing on top of your series [1]).

But I think this is sufficient for test 66, since I'm adding more tests
in my pending series.

[1] https://lists.gnu.org/archive/html/qemu-devel/2017-04/msg05224.html


> +# Create three normal clusters
> +$QEMU_IO -c 'write -P 42 0 192k' "$TEST_IMG" | _filter_qemu_io
> +orig_map=$($QEMU_IMG map --output=json "$TEST_IMG")
> +
> +# Make the middle cluster a preallocated zero cluster
> +$QEMU_IO -c 'write -z 64k 64k' "$TEST_IMG" | _filter_qemu_io
> +
> +# Try to overwrite everything: This should reuse the whole range. To test 
> that
> +# this only issues a single continuous write request, use blkdebug.
> +$QEMU_IO -c 'write -P 42 0 192k' \
> +"json:{
> +'driver': '$IMGFMT',
> +'file': {
> +'driver': 'blkdebug',
> +'image.filename': '$TEST_IMG',
> +'set-state': [{
> +'event': 'write_aio',
> +'new_state': 2
> +}],
> +'inject-error': [{
> +'event': 'write_aio',
> +'state': 2
> +}]
> +}
> +}" \
> +| _filter_qemu_io

blkdebug is cool!

Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH v5 1/5] test-keyval: fix leaks

2017-05-03 Thread Eric Blake
On 05/03/2017 05:38 PM, Marc-André Lureau wrote:
> Spotted by ASAN.
> 
> Signed-off-by: Marc-André Lureau 
> ---
>  tests/test-keyval.c | 4 
>  1 file changed, 4 insertions(+)

Reviewed-by: Eric blake 

> 
> diff --git a/tests/test-keyval.c b/tests/test-keyval.c
> index ba19560a22..c556b1b117 100644
> --- a/tests/test-keyval.c
> +++ b/tests/test-keyval.c
> @@ -628,6 +628,7 @@ static void test_keyval_visit_alternate(void)
>  visit_type_AltNumStr(v, "a", , _abort);
>  g_assert_cmpint(ans->type, ==, QTYPE_QSTRING);
>  g_assert_cmpstr(ans->u.s, ==, "1");
> +qapi_free_AltNumStr(ans);
>  visit_type_AltNumInt(v, "a", , );
>  error_free_or_abort();
>  visit_end_struct(v, NULL);
> @@ -651,9 +652,12 @@ static void test_keyval_visit_any(void)
>  g_assert(qlist);
>  qstr = qobject_to_qstring(qlist_pop(qlist));
>  g_assert_cmpstr(qstring_get_str(qstr), ==, "null");
> +QDECREF(qstr);
>  qstr = qobject_to_qstring(qlist_pop(qlist));
>  g_assert_cmpstr(qstring_get_str(qstr), ==, "1");
>  g_assert(qlist_empty(qlist));
> +QDECREF(qstr);
> +qobject_decref(any);
>  visit_check_struct(v, _abort);
>  visit_end_struct(v, NULL);
>  visit_free(v);
> 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH 2/4] qcow2: Reuse preallocated zero clusters

2017-05-03 Thread Eric Blake
On 05/03/2017 06:11 PM, Max Reitz wrote:
> Instead of just freeing preallocated zero clusters and completely
> allocating them from scratch, reuse them.
> 
> We cannot do this in handle_copied(), however, since this is a COW
> operation. Therefore, we have to add the new logic to handle_alloc() and
> simply return the existing offset if it exists. The only catch is that
> we have to convince qcow2_alloc_cluster_link_l2() not to free the old
> clusters (because we have reused them).
> 
> Reported-by: Eric Blake 
> Signed-off-by: Max Reitz 
> ---
>  block/qcow2.h |  3 ++
>  block/qcow2-cluster.c | 80 
> +++
>  2 files changed, 59 insertions(+), 24 deletions(-)
> 

>  
> -assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);
> +/* must be allocated */
> +first_cluster_type = qcow2_get_cluster_type(first_entry);
> +assert(first_cluster_type == QCOW2_CLUSTER_NORMAL ||
> +   (first_cluster_type == QCOW2_CLUSTER_ZERO &&
> +(first_entry & L2E_OFFSET_MASK) != 0));

I wonder if we should have separate QCOW2_CLUSTER_ZERO_PLAIN and
QCOW2_CLUSTER_ZERO_ALLOCATED, so we don't have to keep checking
first_entry & L2E_OFFSET_MASK.  But I think that can be a separate
followup if we want it.


Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH 1/4] qcow2: Fix preallocation size formula

2017-05-03 Thread Eric Blake
On 05/03/2017 06:11 PM, Max Reitz wrote:
> When calculating the number of reftable entries, we should actually use
> the number of refblocks and not (wrongly[1]) re-calculate it.
> 
> [1] "Wrongly" means: Dividing the number of clusters by the number of
> entries per refblock and rounding down instead of up.
> 
> Reported-by: Eric Blake 
> Signed-off-by: Max Reitz 
> ---
>  block/qcow2.c | 9 +
>  1 file changed, 5 insertions(+), 4 deletions(-)
> 
Reviewed-by: Eric Blake 

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature


[Qemu-devel] [PULL v2 07/11] target/openrisc: implement shadow registers

2017-05-03 Thread Stafford Horne
Shadow registers are part of the openrisc spec along with sr[cid], as
part of the fast context switching feature.  When exceptions occur,
instead of having to save registers to the stack if enabled the CID will
increment and a new set of registers will be available.

This patch only implements shadow registers which can be used as extra
scratch registers via the mfspr and mtspr if required.  This is
implemented in a way where it would be easy to add on the fast context
switching, currently cid is hardcoded to 0.

This is need for openrisc linux smp kernels to boot correctly.

Signed-off-by: Stafford Horne 
---
 linux-user/elfload.c|  2 +-
 linux-user/main.c   | 18 +-
 linux-user/openrisc/target_cpu.h|  6 +++---
 linux-user/openrisc/target_signal.h |  2 +-
 linux-user/signal.c | 17 +
 target/openrisc/cpu.c   |  4 +++-
 target/openrisc/cpu.h   | 15 +--
 target/openrisc/gdbstub.c   |  4 ++--
 target/openrisc/machine.c   |  6 +++---
 target/openrisc/sys_helper.c|  9 +
 target/openrisc/translate.c |  5 +++--
 11 files changed, 56 insertions(+), 32 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index f520d77..ce77317 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1052,7 +1052,7 @@ static void elf_core_copy_regs(target_elf_gregset_t *regs,
 int i;
 
 for (i = 0; i < 32; i++) {
-(*regs)[i] = tswapreg(env->gpr[i]);
+(*regs)[i] = tswapreg(cpu_get_gpr(env, i));
 }
 (*regs)[32] = tswapreg(env->pc);
 (*regs)[33] = tswapreg(cpu_get_sr(env));
diff --git a/linux-user/main.c b/linux-user/main.c
index 10a3bb3..79d621b 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2590,17 +2590,17 @@ void cpu_loop(CPUOpenRISCState *env)
 case EXCP_SYSCALL:
 env->pc += 4;   /* 0xc00; */
 ret = do_syscall(env,
- env->gpr[11], /* return value   */
- env->gpr[3],  /* r3 - r7 are params */
- env->gpr[4],
- env->gpr[5],
- env->gpr[6],
- env->gpr[7],
- env->gpr[8], 0, 0);
+ cpu_get_gpr(env, 11), /* return value   */
+ cpu_get_gpr(env, 3),  /* r3 - r7 are params */
+ cpu_get_gpr(env, 4),
+ cpu_get_gpr(env, 5),
+ cpu_get_gpr(env, 6),
+ cpu_get_gpr(env, 7),
+ cpu_get_gpr(env, 8), 0, 0);
 if (ret == -TARGET_ERESTARTSYS) {
 env->pc -= 4;
 } else if (ret != -TARGET_QEMU_ESIGRETURN) {
-env->gpr[11] = ret;
+cpu_set_gpr(env, 11, ret);
 }
 break;
 case EXCP_DPF:
@@ -4765,7 +4765,7 @@ int main(int argc, char **argv, char **envp)
 int i;
 
 for (i = 0; i < 32; i++) {
-env->gpr[i] = regs->gpr[i];
+cpu_set_gpr(env, i, regs->gpr[i]);
 }
 env->pc = regs->pc;
 cpu_set_sr(env, regs->sr);
diff --git a/linux-user/openrisc/target_cpu.h b/linux-user/openrisc/target_cpu.h
index f283d96..606ad6f 100644
--- a/linux-user/openrisc/target_cpu.h
+++ b/linux-user/openrisc/target_cpu.h
@@ -23,14 +23,14 @@
 static inline void cpu_clone_regs(CPUOpenRISCState *env, target_ulong newsp)
 {
 if (newsp) {
-env->gpr[1] = newsp;
+cpu_set_gpr(env, 1, newsp);
 }
-env->gpr[11] = 0;
+cpu_set_gpr(env, 11, 0);
 }
 
 static inline void cpu_set_tls(CPUOpenRISCState *env, target_ulong newtls)
 {
-env->gpr[10] = newtls;
+cpu_set_gpr(env, 10, newtls);
 }
 
 #endif
diff --git a/linux-user/openrisc/target_signal.h 
b/linux-user/openrisc/target_signal.h
index 9f2c493..95a733e 100644
--- a/linux-user/openrisc/target_signal.h
+++ b/linux-user/openrisc/target_signal.h
@@ -20,7 +20,7 @@ typedef struct target_sigaltstack {
 
 static inline abi_ulong get_sp_from_cpustate(CPUOpenRISCState *state)
 {
-return state->gpr[1];
+return cpu_get_gpr(state, 1);
 }
 
 
diff --git a/linux-user/signal.c b/linux-user/signal.c
index a67db04..3d18d1b 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4411,7 +4411,7 @@ static void setup_sigcontext(struct target_sigcontext *sc,
  CPUOpenRISCState *regs,
  unsigned long mask)
 {
-unsigned long usp = regs->gpr[1];
+unsigned long usp = cpu_get_gpr(regs, 1);
 
 /* copy the regs. they are first in sc so we can use sc directly */
 
@@ -4436,7 +4436,7 @@ static inline abi_ulong get_sigframe(struct 
target_sigaction *ka,
  CPUOpenRISCState *regs,
   

[Qemu-devel] [PULL v2 11/11] target/openrisc: Support non-busy idle state using PMR SPR

2017-05-03 Thread Stafford Horne
The OpenRISC architecture has the Power Management Register (PMR)
special purpose register to manage cpu power states.  The interesting
modes are:

 * Doze Mode (DME) - Stop cpu except timer & pic - wake on interrupt
 * Sleep Mode (SME) - Stop cpu and all units - wake on interrupt
 * Suspend Model (SUME) - Stop cpu and all units - wake on reset

The linux kernel will set DME when idle.

This patch implements the PMR SPR and halts the qemu cpu when there is a
change to DME or SME.  This means that openrisc qemu in no longer peggs
a host cpu at 100%.

In order for this to work we need to kick the CPU when timers are
expired.  Update the cpu timer to kick the cpu upon each timer event.

Reviewed-by: Richard Henderson 
Signed-off-by: Stafford Horne 
---
 hw/openrisc/cputimer.c   |  1 +
 target/openrisc/cpu.c|  3 ++-
 target/openrisc/cpu.h| 10 ++
 target/openrisc/interrupt.c  |  2 ++
 target/openrisc/machine.c|  1 +
 target/openrisc/sys_helper.c | 13 +
 6 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/hw/openrisc/cputimer.c b/hw/openrisc/cputimer.c
index a98c799..febc469 100644
--- a/hw/openrisc/cputimer.c
+++ b/hw/openrisc/cputimer.c
@@ -61,6 +61,7 @@ void cpu_openrisc_timer_update(OpenRISCCPU *cpu)
 }
 next = now + (uint64_t)wait * TIMER_PERIOD;
 timer_mod(cpu->env.timer, next);
+qemu_cpu_kick(CPU(cpu));
 }
 
 void cpu_openrisc_count_start(OpenRISCCPU *cpu)
diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index c9b3f22..1d6330c 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -51,7 +51,8 @@ static void openrisc_cpu_reset(CPUState *s)
 cpu->env.lock_addr = -1;
 s->exception_index = -1;
 
-cpu->env.upr = UPR_UP | UPR_DMP | UPR_IMP | UPR_PICP | UPR_TTP;
+cpu->env.upr = UPR_UP | UPR_DMP | UPR_IMP | UPR_PICP | UPR_TTP |
+   UPR_PMP;
 cpu->env.dmmucfgr = (DMMUCFGR_NTW & (0 << 2)) | (DMMUCFGR_NTS & (6 << 2));
 cpu->env.immucfgr = (IMMUCFGR_NTW & (0 << 2)) | (IMMUCFGR_NTS & (6 << 2));
 
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index 938ccc3..2721432 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -140,6 +140,15 @@ enum {
 IMMUCFGR_HTR = (1 << 11),
 };
 
+/* Power management register */
+enum {
+PMR_SDF = (15 << 0),
+PMR_DME = (1 << 4),
+PMR_SME = (1 << 5),
+PMR_DCGE = (1 << 6),
+PMR_SUME = (1 << 7),
+};
+
 /* Float point control status register */
 enum {
 FPCSR_FPEE = 1,
@@ -284,6 +293,7 @@ typedef struct CPUOpenRISCState {
 uint32_t immucfgr;/* IMMU configure register */
 uint32_t esr; /* Exception supervisor register */
 uint32_t evbar;   /* Exception vector base address register */
+uint32_t pmr; /* Power Management Register */
 uint32_t fpcsr;   /* Float register */
 float_status fp_status;
 
diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c
index 2c91fab..3959671 100644
--- a/target/openrisc/interrupt.c
+++ b/target/openrisc/interrupt.c
@@ -60,6 +60,8 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
 env->sr |= SR_SM;
 env->sr &= ~SR_IEE;
 env->sr &= ~SR_TEE;
+env->pmr &= ~PMR_DME;
+env->pmr &= ~PMR_SME;
 env->tlb->cpu_openrisc_map_address_data = _openrisc_get_phys_nommu;
 env->tlb->cpu_openrisc_map_address_code = _openrisc_get_phys_nommu;
 env->lock_addr = -1;
diff --git a/target/openrisc/machine.c b/target/openrisc/machine.c
index a82be62..a20cce7 100644
--- a/target/openrisc/machine.c
+++ b/target/openrisc/machine.c
@@ -138,6 +138,7 @@ static const VMStateDescription vmstate_env = {
 VMSTATE_UINT32(dmmucfgr, CPUOpenRISCState),
 VMSTATE_UINT32(immucfgr, CPUOpenRISCState),
 VMSTATE_UINT32(evbar, CPUOpenRISCState),
+VMSTATE_UINT32(pmr, CPUOpenRISCState),
 VMSTATE_UINT32(esr, CPUOpenRISCState),
 VMSTATE_UINT32(fpcsr, CPUOpenRISCState),
 VMSTATE_UINT64(mac, CPUOpenRISCState),
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index fa3d6a4..abdef5d 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -22,6 +22,7 @@
 #include "cpu.h"
 #include "exec/exec-all.h"
 #include "exec/helper-proto.h"
+#include "exception.h"
 
 #define TO_SPR(group, number) (((group) << 11) + (number))
 
@@ -141,6 +142,15 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
 case TO_SPR(5, 2):  /* MACHI */
 env->mac = deposit64(env->mac, 32, 32, rb);
 break;
+case TO_SPR(8, 0):  /* PMR */
+env->pmr = rb;
+if (env->pmr & PMR_DME || env->pmr & PMR_SME) {
+cpu_restore_state(cs, GETPC());
+env->pc += 4;
+cs->halted = 1;
+raise_exception(cpu, EXCP_HALTED);
+}
+break;
 case TO_SPR(9, 0):  /* PICMR */
 env->picmr |= rb;
 break;
@@ -287,6 +297,9 @@ target_ulong 

[Qemu-devel] [PULL v2 09/11] target/openrisc: Implement full vmstate serialization

2017-05-03 Thread Stafford Horne
Previously serialization did not persist the tlb, timer, pic and other
key state items.  This meant snapshotting and restoring a running os
would crash. After adding these I am able to take snapshots of a
running linux os and restore at a later time.

I am currently not trying to maintain capatibility with older versions
as I do not believe this really worked before or anyone used it.

Signed-off-by: Stafford Horne 
---
 target/openrisc/machine.c | 73 +--
 1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/target/openrisc/machine.c b/target/openrisc/machine.c
index 2bf71c3..a82be62 100644
--- a/target/openrisc/machine.c
+++ b/target/openrisc/machine.c
@@ -24,6 +24,63 @@
 #include "hw/boards.h"
 #include "migration/cpu.h"
 
+static int env_post_load(void *opaque, int version_id)
+{
+CPUOpenRISCState *env = opaque;
+
+/* Restore MMU handlers */
+if (env->sr & SR_DME) {
+env->tlb->cpu_openrisc_map_address_data =
+_openrisc_get_phys_data;
+} else {
+env->tlb->cpu_openrisc_map_address_data =
+_openrisc_get_phys_nommu;
+}
+
+if (env->sr & SR_IME) {
+env->tlb->cpu_openrisc_map_address_code =
+_openrisc_get_phys_code;
+} else {
+env->tlb->cpu_openrisc_map_address_code =
+_openrisc_get_phys_nommu;
+}
+
+
+return 0;
+}
+
+static const VMStateDescription vmstate_tlb_entry = {
+.name = "tlb_entry",
+.version_id = 1,
+.minimum_version_id = 1,
+.minimum_version_id_old = 1,
+.fields = (VMStateField[]) {
+VMSTATE_UINTTL(mr, OpenRISCTLBEntry),
+VMSTATE_UINTTL(tr, OpenRISCTLBEntry),
+VMSTATE_END_OF_LIST()
+}
+};
+
+static const VMStateDescription vmstate_cpu_tlb = {
+.name = "cpu_tlb",
+.version_id = 1,
+.minimum_version_id = 1,
+.minimum_version_id_old = 1,
+.fields = (VMStateField[]) {
+VMSTATE_STRUCT_2DARRAY(itlb, CPUOpenRISCTLBContext,
+ ITLB_WAYS, ITLB_SIZE, 0,
+ vmstate_tlb_entry, OpenRISCTLBEntry),
+VMSTATE_STRUCT_2DARRAY(dtlb, CPUOpenRISCTLBContext,
+ DTLB_WAYS, DTLB_SIZE, 0,
+ vmstate_tlb_entry, OpenRISCTLBEntry),
+VMSTATE_END_OF_LIST()
+}
+};
+
+#define VMSTATE_CPU_TLB(_f, _s) \
+VMSTATE_STRUCT_POINTER(_f, _s, vmstate_cpu_tlb, CPUOpenRISCTLBContext)
+
+
 static int get_sr(QEMUFile *f, void *opaque, size_t size, VMStateField *field)
 {
 CPUOpenRISCState *env = opaque;
@@ -47,8 +104,9 @@ static const VMStateInfo vmstate_sr = {
 
 static const VMStateDescription vmstate_env = {
 .name = "env",
-.version_id = 5,
-.minimum_version_id = 5,
+.version_id = 6,
+.minimum_version_id = 6,
+.post_load = env_post_load,
 .fields = (VMStateField[]) {
 VMSTATE_UINTTL_2DARRAY(shadow_gpr, CPUOpenRISCState, 16, 32),
 VMSTATE_UINTTL(pc, CPUOpenRISCState),
@@ -79,9 +137,20 @@ static const VMStateDescription vmstate_env = {
 VMSTATE_UINT32(cpucfgr, CPUOpenRISCState),
 VMSTATE_UINT32(dmmucfgr, CPUOpenRISCState),
 VMSTATE_UINT32(immucfgr, CPUOpenRISCState),
+VMSTATE_UINT32(evbar, CPUOpenRISCState),
 VMSTATE_UINT32(esr, CPUOpenRISCState),
 VMSTATE_UINT32(fpcsr, CPUOpenRISCState),
 VMSTATE_UINT64(mac, CPUOpenRISCState),
+
+VMSTATE_CPU_TLB(tlb, CPUOpenRISCState),
+
+VMSTATE_TIMER_PTR(timer, CPUOpenRISCState),
+VMSTATE_UINT32(ttmr, CPUOpenRISCState),
+VMSTATE_UINT32(ttcr, CPUOpenRISCState),
+
+VMSTATE_UINT32(picmr, CPUOpenRISCState),
+VMSTATE_UINT32(picsr, CPUOpenRISCState),
+
 VMSTATE_END_OF_LIST()
 }
 };
-- 
2.9.3




[Qemu-devel] [PULL v2 02/11] target/openrisc: Implement EVBAR register

2017-05-03 Thread Stafford Horne
From: Tim 'mithro' Ansell 

Exception Vector Base Address Register (EVBAR) - This optional register
can be used to apply an offset to the exception vector addresses.

The significant bits (31-12) of the vector offset address for each
exception depend on the setting of the Supervision Register (SR)'s EPH
bit and the Exception Vector Base Address Register (EVBAR).

Its presence is indicated by the EVBARP bit in the CPU Configuration
Register (CPUCFGR).

Signed-off-by: Tim 'mithro' Ansell 
Signed-off-by: Stafford Horne 
---
 target/openrisc/cpu.c| 2 ++
 target/openrisc/cpu.h| 7 +++
 target/openrisc/interrupt.c  | 6 +-
 target/openrisc/sys_helper.c | 7 +++
 4 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index 7fd2b9a..1524ed9 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -134,6 +134,7 @@ static void or1200_initfn(Object *obj)
 
 set_feature(cpu, OPENRISC_FEATURE_OB32S);
 set_feature(cpu, OPENRISC_FEATURE_OF32S);
+set_feature(cpu, OPENRISC_FEATURE_EVBAR);
 }
 
 static void openrisc_any_initfn(Object *obj)
@@ -141,6 +142,7 @@ static void openrisc_any_initfn(Object *obj)
 OpenRISCCPU *cpu = OPENRISC_CPU(obj);
 
 set_feature(cpu, OPENRISC_FEATURE_OB32S);
+set_feature(cpu, OPENRISC_FEATURE_EVBAR);
 }
 
 typedef struct OpenRISCCPUInfo {
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index 418a0e6..1958b72 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -111,6 +111,11 @@ enum {
 CPUCFGR_OF32S = (1 << 7),
 CPUCFGR_OF64S = (1 << 8),
 CPUCFGR_OV64S = (1 << 9),
+/* CPUCFGR_ND = (1 << 10), */
+/* CPUCFGR_AVRP = (1 << 11), */
+CPUCFGR_EVBARP = (1 << 12),
+/* CPUCFGR_ISRP = (1 << 13), */
+/* CPUCFGR_AECSRP = (1 << 14), */
 };
 
 /* DMMU configure register */
@@ -200,6 +205,7 @@ enum {
 OPENRISC_FEATURE_OF32S = (1 << 7),
 OPENRISC_FEATURE_OF64S = (1 << 8),
 OPENRISC_FEATURE_OV64S = (1 << 9),
+OPENRISC_FEATURE_EVBAR = (1 << 12),
 };
 
 /* Tick Timer Mode Register */
@@ -289,6 +295,7 @@ typedef struct CPUOpenRISCState {
 uint32_t dmmucfgr;/* DMMU configure register */
 uint32_t immucfgr;/* IMMU configure register */
 uint32_t esr; /* Exception supervisor register */
+uint32_t evbar;   /* Exception vector base address register */
 uint32_t fpcsr;   /* Float register */
 float_status fp_status;
 
diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c
index a2eec6f..78f0ba9 100644
--- a/target/openrisc/interrupt.c
+++ b/target/openrisc/interrupt.c
@@ -65,7 +65,11 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
 env->lock_addr = -1;
 
 if (cs->exception_index > 0 && cs->exception_index < EXCP_NR) {
-env->pc = (cs->exception_index << 8);
+hwaddr vect_pc = cs->exception_index << 8;
+if (env->cpucfgr & CPUCFGR_EVBARP) {
+vect_pc |= env->evbar;
+}
+env->pc = vect_pc;
 } else {
 cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
 }
diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index 60c3193..6ba8162 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -39,6 +39,10 @@ void HELPER(mtspr)(CPUOpenRISCState *env,
 env->vr = rb;
 break;
 
+case TO_SPR(0, 11): /* EVBAR */
+env->evbar = rb;
+break;
+
 case TO_SPR(0, 16): /* NPC */
 cpu_restore_state(cs, GETPC());
 /* ??? Mirror or1ksim in not trashing delayed branch state
@@ -206,6 +210,9 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
 case TO_SPR(0, 4): /* IMMUCFGR */
 return env->immucfgr;
 
+case TO_SPR(0, 11): /* EVBAR */
+return env->evbar;
+
 case TO_SPR(0, 16): /* NPC (equals PC) */
 cpu_restore_state(cs, GETPC());
 return env->pc;
-- 
2.9.3




[Qemu-devel] [PULL v2 08/11] migration: Add VMSTATE_STRUCT_2DARRAY()

2017-05-03 Thread Stafford Horne
For openrisc we implement tlb state as a 2d array of tlb entry structs.
This is added to allow easy storing of state of 2d arrays.

Signed-off-by: Stafford Horne 
---
 include/migration/vmstate.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index f2dbf84..4834e55 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -499,6 +499,19 @@ extern const VMStateInfo vmstate_info_qtailq;
 .offset   = vmstate_offset_array(_state, _field, _type, _num),\
 }
 
+#define VMSTATE_STRUCT_2DARRAY_TEST(_field, _state, _n1, _n2, _test, \
+_version, _vmsd, _type) {\
+.name = (stringify(_field)), \
+.num  = (_n1) * (_n2),   \
+.field_exists = (_test), \
+.version_id   = (_version),  \
+.vmsd = &(_vmsd),\
+.size = sizeof(_type),   \
+.flags= VMS_STRUCT | VMS_ARRAY,  \
+.offset   = vmstate_offset_2darray(_state, _field, _type,\
+   _n1, _n2),\
+}
+
 #define VMSTATE_STRUCT_VARRAY_UINT8(_field, _state, _field_num, _version, 
_vmsd, _type) { \
 .name   = (stringify(_field)),   \
 .num_offset = vmstate_offset_value(_state, _field_num, uint8_t), \
@@ -746,6 +759,11 @@ extern const VMStateInfo vmstate_info_qtailq;
 VMSTATE_STRUCT_ARRAY_TEST(_field, _state, _num, NULL, _version,   \
 _vmsd, _type)
 
+#define VMSTATE_STRUCT_2DARRAY(_field, _state, _n1, _n2, _version,\
+_vmsd, _type) \
+VMSTATE_STRUCT_2DARRAY_TEST(_field, _state, _n1, _n2, NULL,   \
+_version, _vmsd, _type)
+
 #define VMSTATE_BUFFER_UNSAFE_INFO(_field, _state, _version, _info, _size) \
 VMSTATE_BUFFER_UNSAFE_INFO_TEST(_field, _state, NULL, _version, _info, \
 _size)
-- 
2.9.3




[Qemu-devel] [PULL v2 10/11] target/openrisc: Remove duplicate features property

2017-05-03 Thread Stafford Horne
The features property has stored the exact same thing as the cpucfgr
spr. Remove the feature enum and property as it is not needed.

In order to preserve the behavior or keeping features accross reset this
patch moves cpucfgr into the non reset region of the state struct.  Since
the cpucfgr is read only this means we only need to sset cpucfgr once
during class init.

Signed-off-by: Stafford Horne 
---
 target/openrisc/cpu.c | 17 +++--
 target/openrisc/cpu.h | 16 ++--
 2 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/target/openrisc/cpu.c b/target/openrisc/cpu.c
index 6c1ed07..c9b3f22 100644
--- a/target/openrisc/cpu.c
+++ b/target/openrisc/cpu.c
@@ -52,7 +52,6 @@ static void openrisc_cpu_reset(CPUState *s)
 s->exception_index = -1;
 
 cpu->env.upr = UPR_UP | UPR_DMP | UPR_IMP | UPR_PICP | UPR_TTP;
-cpu->env.cpucfgr = CPUCFGR_OB32S | CPUCFGR_OF32S | CPUCFGR_NSGF;
 cpu->env.dmmucfgr = (DMMUCFGR_NTW & (0 << 2)) | (DMMUCFGR_NTS & (6 << 2));
 cpu->env.immucfgr = (IMMUCFGR_NTW & (0 << 2)) | (IMMUCFGR_NTS & (6 << 2));
 
@@ -65,12 +64,6 @@ static void openrisc_cpu_reset(CPUState *s)
 #endif
 }
 
-static inline void set_feature(OpenRISCCPU *cpu, int feature)
-{
-cpu->feature |= feature;
-cpu->env.cpucfgr = cpu->feature;
-}
-
 static void openrisc_cpu_realizefn(DeviceState *dev, Error **errp)
 {
 CPUState *cs = CPU(dev);
@@ -132,19 +125,15 @@ static void or1200_initfn(Object *obj)
 {
 OpenRISCCPU *cpu = OPENRISC_CPU(obj);
 
-set_feature(cpu, OPENRISC_FEATURE_NSGF);
-set_feature(cpu, OPENRISC_FEATURE_OB32S);
-set_feature(cpu, OPENRISC_FEATURE_OF32S);
-set_feature(cpu, OPENRISC_FEATURE_EVBAR);
+cpu->env.cpucfgr = CPUCFGR_NSGF | CPUCFGR_OB32S | CPUCFGR_OF32S |
+   CPUCFGR_EVBARP;
 }
 
 static void openrisc_any_initfn(Object *obj)
 {
 OpenRISCCPU *cpu = OPENRISC_CPU(obj);
 
-set_feature(cpu, OPENRISC_FEATURE_NSGF);
-set_feature(cpu, OPENRISC_FEATURE_OB32S);
-set_feature(cpu, OPENRISC_FEATURE_EVBAR);
+cpu->env.cpucfgr = CPUCFGR_NSGF | CPUCFGR_OB32S | CPUCFGR_EVBARP;
 }
 
 typedef struct OpenRISCCPUInfo {
diff --git a/target/openrisc/cpu.h b/target/openrisc/cpu.h
index e159b22..938ccc3 100644
--- a/target/openrisc/cpu.h
+++ b/target/openrisc/cpu.h
@@ -196,18 +196,6 @@ enum {
 SR_SCE = (1 << 17),
 };
 
-/* OpenRISC Hardware Capabilities */
-enum {
-OPENRISC_FEATURE_NSGF = (15 << 0),
-OPENRISC_FEATURE_CGF = (1 << 4),
-OPENRISC_FEATURE_OB32S = (1 << 5),
-OPENRISC_FEATURE_OB64S = (1 << 6),
-OPENRISC_FEATURE_OF32S = (1 << 7),
-OPENRISC_FEATURE_OF64S = (1 << 8),
-OPENRISC_FEATURE_OV64S = (1 << 9),
-OPENRISC_FEATURE_EVBAR = (1 << 12),
-};
-
 /* Tick Timer Mode Register */
 enum {
 TTMR_TP = (0xfff),
@@ -292,7 +280,6 @@ typedef struct CPUOpenRISCState {
 uint32_t sr;  /* Supervisor register, without SR_{F,CY,OV} */
 uint32_t vr;  /* Version register */
 uint32_t upr; /* Unit presence register */
-uint32_t cpucfgr; /* CPU configure register */
 uint32_t dmmucfgr;/* DMMU configure register */
 uint32_t immucfgr;/* IMMU configure register */
 uint32_t esr; /* Exception supervisor register */
@@ -311,6 +298,8 @@ typedef struct CPUOpenRISCState {
 CPU_COMMON
 
 /* Fields from here on are preserved across CPU reset. */
+uint32_t cpucfgr; /* CPU configure register */
+
 #ifndef CONFIG_USER_ONLY
 CPUOpenRISCTLBContext * tlb;
 
@@ -337,7 +326,6 @@ typedef struct OpenRISCCPU {
 
 CPUOpenRISCState env;
 
-uint32_t feature;   /* CPU Capabilities */
 } OpenRISCCPU;
 
 static inline OpenRISCCPU *openrisc_env_get_cpu(CPUOpenRISCState *env)
-- 
2.9.3




[Qemu-devel] [PULL v2 06/11] migration: Add VMSTATE_UINTTL_2DARRAY()

2017-05-03 Thread Stafford Horne
In openRISC we are implementing the shadow registers as a 2d array.
Using this target long method rather than direct 32-bit alternatives is
consistent with the rest of our vm state serialization logic.

Signed-off-by: Stafford Horne 
---
 include/migration/cpu.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/include/migration/cpu.h b/include/migration/cpu.h
index f3d5dfc..a40bd35 100644
--- a/include/migration/cpu.h
+++ b/include/migration/cpu.h
@@ -18,6 +18,8 @@
 VMSTATE_UINT64_EQUAL_V(_f, _s, _v)
 #define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v)\
 VMSTATE_UINT64_ARRAY_V(_f, _s, _n, _v)
+#define VMSTATE_UINTTL_2DARRAY_V(_f, _s, _n1, _n2, _v)\
+VMSTATE_UINT64_2DARRAY_V(_f, _s, _n1, _n2, _v)
 #define VMSTATE_UINTTL_TEST(_f, _s, _t)   \
 VMSTATE_UINT64_TEST(_f, _s, _t)
 #define vmstate_info_uinttl vmstate_info_uint64
@@ -37,6 +39,8 @@
 VMSTATE_UINT32_EQUAL_V(_f, _s, _v)
 #define VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, _v)\
 VMSTATE_UINT32_ARRAY_V(_f, _s, _n, _v)
+#define VMSTATE_UINTTL_2DARRAY_V(_f, _s, _n1, _n2, _v)\
+VMSTATE_UINT32_2DARRAY_V(_f, _s, _n1, _n2, _v)
 #define VMSTATE_UINTTL_TEST(_f, _s, _t)   \
 VMSTATE_UINT32_TEST(_f, _s, _t)
 #define vmstate_info_uinttl vmstate_info_uint32
@@ -48,5 +52,8 @@
 VMSTATE_UINTTL_EQUAL_V(_f, _s, 0)
 #define VMSTATE_UINTTL_ARRAY(_f, _s, _n)  \
 VMSTATE_UINTTL_ARRAY_V(_f, _s, _n, 0)
+#define VMSTATE_UINTTL_2DARRAY(_f, _s, _n1, _n2)  \
+VMSTATE_UINTTL_2DARRAY_V(_f, _s, _n1, _n2, 0)
+
 
 #endif
-- 
2.9.3




[Qemu-devel] [PULL v2 00/11] Fixes and features for OpenRISC

2017-05-03 Thread Stafford Horne
Hello,

This are the openrisc patches I have been circulating on the mailing list
of the last few months.  We have had help from a few new people and added
the following:
 * Fixes for gdb memory debugging
 * Added support for Shadow Registers, EVBAR, EPH
 * Added support for idle state, no more 100% pegged cpu's
 * Fixed VM state persisting


Changes Since v1:
  o Fixed all checkpatch warnings and errors


The following changes since commit 359c41abe32638adad503e386969fa428cecff52:

  Update version for v2.9.0 release (2017-04-20 15:31:34 +0100)

are available in the git repository at:

  git://github.com/stffrdhrn/qemu.git tags/pull-or-20170504

for you to fetch changes up to f4d1414a9385e3375d9107b29eeb75d27daf2147:

  target/openrisc: Support non-busy idle state using PMR SPR (2017-05-04 
09:39:14 +0900)


Openrisc Features and Fixes for qemu 2.10


Stafford Horne (9):
  MAINTAINERS: Add myself as openrisc maintainer
  target/openrisc: Fixes for memory debugging
  target/openrisc: add numcores and coreid support
  migration: Add VMSTATE_UINTTL_2DARRAY()
  target/openrisc: implement shadow registers
  migration: Add VMSTATE_STRUCT_2DARRAY()
  target/openrisc: Implement full vmstate serialization
  target/openrisc: Remove duplicate features property
  target/openrisc: Support non-busy idle state using PMR SPR

Tim 'mithro' Ansell (2):
  target/openrisc: Implement EVBAR register
  target/openrisc: Implement EPH bit

 MAINTAINERS |  4 +-
 hw/openrisc/cputimer.c  |  1 +
 include/migration/cpu.h |  7 
 include/migration/vmstate.h | 18 +
 linux-user/elfload.c|  2 +-
 linux-user/main.c   | 18 -
 linux-user/openrisc/target_cpu.h|  6 +--
 linux-user/openrisc/target_signal.h |  2 +-
 linux-user/signal.c | 17 +
 target/openrisc/cpu.c   | 16 +++-
 target/openrisc/cpu.h   | 46 ++
 target/openrisc/gdbstub.c   |  4 +-
 target/openrisc/interrupt.c | 11 +-
 target/openrisc/machine.c   | 76 +++--
 target/openrisc/mmu.c   | 24 ++--
 target/openrisc/sys_helper.c| 35 +
 target/openrisc/translate.c |  5 ++-
 17 files changed, 230 insertions(+), 62 deletions(-)

-- 
2.9.3




[Qemu-devel] [PULL v2 04/11] target/openrisc: Fixes for memory debugging

2017-05-03 Thread Stafford Horne
When debugging in gdb you might want to inspect instructions in mapped
pages or in exception vectors like 0x800 etc.  This was previously not
possible in qemu since the *get_phys_page_debug() routine only looked
into the data tlb.

Change to fall back to look into instruction tlb and plain physical
pages.

Reviewed-by: Richard Henderson 
Signed-off-by: Stafford Horne 
---
 target/openrisc/mmu.c | 24 
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/target/openrisc/mmu.c b/target/openrisc/mmu.c
index 56b11d3..ce2a29d 100644
--- a/target/openrisc/mmu.c
+++ b/target/openrisc/mmu.c
@@ -124,7 +124,7 @@ static int cpu_openrisc_get_phys_addr(OpenRISCCPU *cpu,
 {
 int ret = TLBRET_MATCH;
 
-if (rw == 2) {/* ITLB */
+if (rw == MMU_INST_FETCH) {/* ITLB */
*physical = 0;
 ret = cpu->env.tlb->cpu_openrisc_map_address_code(cpu, physical,
   prot, address, rw);
@@ -221,12 +221,28 @@ hwaddr openrisc_cpu_get_phys_page_debug(CPUState *cs, 
vaddr addr)
 OpenRISCCPU *cpu = OPENRISC_CPU(cs);
 hwaddr phys_addr;
 int prot;
+int miss;
 
-if (cpu_openrisc_get_phys_addr(cpu, _addr, , addr, 0)) {
-return -1;
+/* Check memory for any kind of address, since during debug the
+   gdb can ask for anything, check data tlb for address */
+miss = cpu_openrisc_get_phys_addr(cpu, _addr, , addr, 0);
+
+/* Check instruction tlb */
+if (miss) {
+miss = cpu_openrisc_get_phys_addr(cpu, _addr, , addr,
+  MMU_INST_FETCH);
+}
+
+/* Last, fall back to a plain address */
+if (miss) {
+miss = cpu_openrisc_get_phys_nommu(cpu, _addr, , addr, 0);
 }
 
-return phys_addr;
+if (miss) {
+return -1;
+} else {
+return phys_addr;
+}
 }
 
 void cpu_openrisc_mmu_init(OpenRISCCPU *cpu)
-- 
2.9.3




[Qemu-devel] [PULL v2 05/11] target/openrisc: add numcores and coreid support

2017-05-03 Thread Stafford Horne
These are used to identify the processor in SMP system.  Their
definition has been defined in verilog cores but it not yet part of the
spec but it will be soon.

The proposal for this is available:
  https://openrisc.io/proposals/core-identifier-and-number-of-cores

Reviewed-by: Richard Henderson 
Signed-off-by: Stafford Horne 
---
 target/openrisc/sys_helper.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c
index 6ba8162..e13666b 100644
--- a/target/openrisc/sys_helper.c
+++ b/target/openrisc/sys_helper.c
@@ -233,6 +233,12 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env,
 case TO_SPR(0, 64): /* ESR */
 return env->esr;
 
+case TO_SPR(0, 128): /* COREID */
+return 0;
+
+case TO_SPR(0, 129): /* NUMCORES */
+return 1;
+
 case TO_SPR(1, 512) ... TO_SPR(1, 512+DTLB_SIZE-1): /* DTLBW0MR 0-127 */
 idx = spr - TO_SPR(1, 512);
 return env->tlb->dtlb[0][idx].mr;
-- 
2.9.3




[Qemu-devel] [PULL v2 03/11] target/openrisc: Implement EPH bit

2017-05-03 Thread Stafford Horne
From: Tim 'mithro' Ansell 

Exception Prefix High (EPH) control bit of the Supervision Register
(SR).

The significant bits (31-12) of the vector offset address for each
exception depend on the setting of the Supervision Register (SR)'s EPH
bit and the Exception Vector Base Address Register (EVBAR).

If SR[EPH] is set, the vector offset is logically ORed with the offset
0xF000.

This means if EPH is;
 * 0 - Exceptions vectors start at EVBAR
 * 1 - Exception vectors start at EVBAR | 0xF000

Signed-off-by: Tim 'mithro' Ansell 
Signed-off-by: Stafford Horne 
---
 target/openrisc/interrupt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/target/openrisc/interrupt.c b/target/openrisc/interrupt.c
index 78f0ba9..2c91fab 100644
--- a/target/openrisc/interrupt.c
+++ b/target/openrisc/interrupt.c
@@ -69,6 +69,9 @@ void openrisc_cpu_do_interrupt(CPUState *cs)
 if (env->cpucfgr & CPUCFGR_EVBARP) {
 vect_pc |= env->evbar;
 }
+if (env->sr & SR_EPH) {
+vect_pc |= 0xf000;
+}
 env->pc = vect_pc;
 } else {
 cpu_abort(cs, "Unhandled exception 0x%x\n", cs->exception_index);
-- 
2.9.3




[Qemu-devel] [PULL v2 01/11] MAINTAINERS: Add myself as openrisc maintainer

2017-05-03 Thread Stafford Horne
Jia has claimed he is no longer able to maintain.  I have fixing bugs here
and there and getting familiar with the code base. Orignal thread from Jia:

 https://lists.librecores.org/pipermail/openrisc/2017-January/000321.html

Signed-off-by: Stafford Horne 
Reviewed-by: Alex Bennée 
---
 MAINTAINERS | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c60235e..21803ca 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -196,8 +196,8 @@ F: hw/nios2/
 F: disas/nios2.c
 
 OpenRISC
-M: Jia Liu 
-S: Maintained
+M: Stafford Horne 
+S: Odd Fixes
 F: target/openrisc/
 F: hw/openrisc/
 F: tests/tcg/openrisc/
-- 
2.9.3




Re: [Qemu-devel] migration: add incremental drive-mirror and blockdev-mirror with dirtymap

2017-05-03 Thread John Snow


On 05/03/2017 03:56 AM, Daniel Kučera wrote:
> Hi all,
> 
> this patch adds possibility to start mirroring since specific dirtyblock
> bitmap.
> The use-case is, for live migrations with ZFS volume used as block device:
> 1. make dirtyblock bitmap in qemu

A "block dirty bitmap," I assume you mean. Through which interface?
"block dirty bitmap add" via QMP?

> 2. make ZFS volume snapshot

ZFS Volume Snapshot is going to be a filesystem-level operation, isn't
it? That is, creating this snapshot will necessarily create new dirty
sectors, yes?

> 3. zfs send/receive the snapshot to target machine

Why? Is this an attempt to make the process faster?

> 4. start mirroring to destination with block map from step 1

This makes me a little nervous: what guarantees do you have that the
bitmap and the ZFS snapshot were synchronous?

> 5. live migrate VM state to destination
> 
> The point is, that I'm not able to live stream zfs changed data to
> destination
> to ensure same volume state in the moment of switchover of migrated VM
> to the new hypervisor.

I'm a little concerned about the mixing of filesystem and block level
snapshots...

> 
> 
> From 7317d731d51c5d743d7a4081b368f0a6862856d7 Mon Sep 17 00:00:00 2001

What happened to your timestamp?

> From: Daniel Kucera 
> Date: Tue, 2 May 2017 15:00:39 +
> Subject: [PATCH] migration: add incremental drive-mirror and blockdev-mirror

Your actual email subject here, however, is missing the [PATCH] tag,
which is useful for it getting picked up by the patchew build bot.

>  with dirtymap added parameter bitmap which will be used instead of newly
>  created dirtymap in mirror_start_job
> 
> Signed-off-by: Daniel Kucera 
> ---
>  block/mirror.c| 41 -
>  blockdev.c|  6 +-
>  include/block/block_int.h |  4 +++-
>  qapi/block-core.json  | 12 ++--
>  4 files changed, 42 insertions(+), 21 deletions(-)
> 
> diff --git a/block/mirror.c b/block/mirror.c
> index 9f5eb69..02b2f69 100644
> --- a/block/mirror.c
> +++ b/block/mirror.c
> @@ -49,7 +49,7 @@ typedef struct MirrorBlockJob {
>  BlockDriverState *to_replace;
>  /* Used to block operations on the drive-mirror-replace target */
>  Error *replace_blocker;
> -bool is_none_mode;
> +MirrorSyncMode sync_mode;
>  BlockMirrorBackingMode backing_mode;
>  BlockdevOnError on_source_error, on_target_error;
>  bool synced;
> @@ -523,7 +523,9 @@ static void mirror_exit(BlockJob *job, void *opaque)
>  bdrv_child_try_set_perm(mirror_top_bs->backing, 0, BLK_PERM_ALL,
>  _abort);
>  if (s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
> -BlockDriverState *backing = s->is_none_mode ? src : s->base;
> +BlockDriverState *backing =
> +(s->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
> +(s->sync_mode == MIRROR_SYNC_MODE_NONE) ? src : s->base;
>  if (backing_bs(target_bs) != backing) {
>  bdrv_set_backing_hd(target_bs, backing, _err);
>  if (local_err) {
> @@ -771,7 +773,8 @@ static void coroutine_fn mirror_run(void *opaque)
>  mirror_free_init(s);
> 
>  s->last_pause_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> -if (!s->is_none_mode) {
> +if ((s->sync_mode != MIRROR_SYNC_MODE_INCREMENTAL) &&
> +  (s->sync_mode != MIRROR_SYNC_MODE_NONE)) {
>  ret = mirror_dirty_init(s);
>  if (ret < 0 || block_job_is_cancelled(>common)) {
>  goto immediate_exit;
> @@ -1114,7 +1117,8 @@ static void mirror_start_job(const char *job_id,
> BlockDriverState *bs,

Something appears to have corrupted your patch. Did you copy/paste this
into gmail? I am unable to apply it.

Please use "git send-email" as detailed in the wiki contributors guide.

>   BlockCompletionFunc *cb,
>   void *opaque,
>   const BlockJobDriver *driver,
> - bool is_none_mode, BlockDriverState *base,
> + MirrorSyncMode sync_mode, const char *bitmap,
> + BlockDriverState *base,
>   bool auto_complete, const char
> *filter_node_name,
>   Error **errp)
>  {
> @@ -1203,7 +1207,7 @@ static void mirror_start_job(const char *job_id,
> BlockDriverState *bs,
>  s->replaces = g_strdup(replaces);
>  s->on_source_error = on_source_error;
>  s->on_target_error = on_target_error;
> -s->is_none_mode = is_none_mode;
> +s->sync_mode = sync_mode;
>  s->backing_mode = backing_mode;
>  s->base = base;
>  s->granularity = granularity;
> @@ -1213,9 +1217,16 @@ static void mirror_start_job(const char *job_id,
> BlockDriverState *bs,
>  s->should_complete = true;
>  }
> 
> -s->dirty_bitmap = bdrv_create_dirty_bitmap(bs, granularity, NULL,
> 

Re: [Qemu-devel] [Qemu-devel RFC v3 3/5] msf2: Add Smartfusion2 SPI controller

2017-05-03 Thread Alistair Francis
(_On Fri, Apr 28, 2017 at 9:51 AM, Subbaraya Sundeep
 wrote:
> Modelled Microsemi's Smartfusion2 SPI controller.
>
> Signed-off-by: Subbaraya Sundeep 
> ---
> Hi Peter and Alistair,
>
> I created two SPI controllers as per SoC spec
> in hw/arm/msf2_soc.c. I am assuming there has to be two
> busses spi0 and spi1 one for each controller. In board file
> (hw/arm/msf2_som.c) attached SPI flash to SPI0 controller.
> I am not able to understand(from hw/ssi/xilinx_spips.c)
> how to create two busses in hw/ssi/msf2_spi.c.
> Please help me here. Below is the output of info qtree:
>
> (qemu) info qtree
> bus: main-system-bus
>   type System
>   dev: msf2-soc, id ""
> cpu-model = "cortex-m3"
>   dev: msf2-spi, id ""
> gpio-out "sysbus-irq" 2
> mmio 40011000/0040
> bus: spi0
>   type SSI
>   dev: msf2-spi, id ""
> gpio-out "sysbus-irq" 2
> mmio 40001000/0040
> bus: spi0
>   type SSI
>   dev: s25sl12801, id ""
> gpio-in "ssi-gpio-cs" 1
> nonvolatile-cfg = 36863 (0x8fff)
> spansion-cr1nv = 0 (0x0)
> spansion-cr2nv = 1 (0x1)
> spansion-cr3nv = 2 (0x2)
> spansion-cr4nv = 16 (0x10)
> drive = "mtd0"

Hey Sundeep,

This looks like you have created two SPI devices with one bus each.
What you want (I think) is one SPI device with two busses.

>
> Thanks,
> Sundeep
>
>  hw/ssi/Makefile.objs  |   1 +
>  hw/ssi/msf2_spi.c | 373 
> ++
>  include/hw/ssi/msf2_spi.h | 102 +
>  3 files changed, 476 insertions(+)
>  create mode 100644 hw/ssi/msf2_spi.c
>  create mode 100644 include/hw/ssi/msf2_spi.h
>
> diff --git a/hw/ssi/Makefile.objs b/hw/ssi/Makefile.objs
> index 487add2..86445d7 100644
> --- a/hw/ssi/Makefile.objs
> +++ b/hw/ssi/Makefile.objs
> @@ -4,6 +4,7 @@ common-obj-$(CONFIG_XILINX_SPI) += xilinx_spi.o
>  common-obj-$(CONFIG_XILINX_SPIPS) += xilinx_spips.o
>  common-obj-$(CONFIG_ASPEED_SOC) += aspeed_smc.o
>  common-obj-$(CONFIG_STM32F2XX_SPI) += stm32f2xx_spi.o
> +common-obj-$(CONFIG_MSF2) += msf2_spi.o
>
>  obj-$(CONFIG_OMAP) += omap_spi.o
>  obj-$(CONFIG_IMX) += imx_spi.o
> diff --git a/hw/ssi/msf2_spi.c b/hw/ssi/msf2_spi.c
> new file mode 100644
> index 000..e7ffa21
> --- /dev/null
> +++ b/hw/ssi/msf2_spi.c
> @@ -0,0 +1,373 @@
> +/*
> + * SPI controller model of Microsemi SmartFusion2.
> + *
> + * Copyright (C) 2017 Subbaraya Sundeep 
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to 
> deal
> + * in the Software without restriction, including without limitation the 
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +#include "qemu/osdep.h"
> +#include "hw/ssi/msf2_spi.h"
> +#include "sysemu/sysemu.h"
> +#include "qemu/log.h"
> +
> +#ifndef MSF2_SPI_ERR_DEBUG
> +#define MSF2_SPI_ERR_DEBUG   0
> +#endif
> +
> +#define DB_PRINT_L(lvl, fmt, args...) do { \
> +if (MSF2_SPI_ERR_DEBUG >= lvl) { \
> +qemu_log("%s: " fmt, __func__, ## args); \
> +} \
> +} while (0);
> +
> +#define DB_PRINT(fmt, args...) DB_PRINT_L(1, fmt, ## args)
> +
> +static void txfifo_reset(MSF2SpiState *s)
> +{
> +fifo32_reset(>tx_fifo);
> +
> +s->regs[R_SPI_STATUS] &= ~S_TXFIFOFUL;
> +s->regs[R_SPI_STATUS] |= S_TXFIFOEMP;
> +}
> +
> +static void rxfifo_reset(MSF2SpiState *s)
> +{
> +fifo32_reset(>rx_fifo);
> +
> +s->regs[R_SPI_STATUS] &= ~S_RXFIFOFUL;
> +s->regs[R_SPI_STATUS] |= S_RXFIFOEMP;
> +}
> +
> +static void set_fifodepth(MSF2SpiState *s)
> +{
> +int size = s->regs[R_SPI_DFSIZE] & FRAMESZ_MASK;
> +
> +if (0 <= size && size <= 8) {
> +s->fifo_depth = 32;
> +}
> +if (9 <= size && size <= 16) {
> +s->fifo_depth = 16;
> +}
> +if (17 <= size && size <= 32) {
> +s->fifo_depth = 8;
> +}
> +}
> +
> +static void msf2_spi_do_reset(MSF2SpiState *s)
> +{
> +memset(s->regs, 0, sizeof s->regs);
> +s->regs[R_SPI_CONTROL] = 0x8102;
> +   

Re: [Qemu-devel] [Qemu-devel RFC v3 1/5] msf2: Add Smartfusion2 System timer

2017-05-03 Thread Alistair Francis
On Tue, May 2, 2017 at 10:11 PM, sundeep subbaraya
 wrote:
> Hi Alistair,
>
> On Wed, May 3, 2017 at 3:25 AM, Alistair Francis  wrote:
>> On Fri, Apr 28, 2017 at 9:51 AM, Subbaraya Sundeep
>>  wrote:
>>> Modelled System Timer in Microsemi's Smartfusion2 Soc.
>>> Timer has two 32bit down counters and two interrupts.
>>>
>>> Signed-off-by: Subbaraya Sundeep 
>>> ---
>>>  hw/timer/Makefile.objs|   1 +
>>>  hw/timer/msf2_timer.c | 250 
>>> ++
>>>  include/hw/timer/msf2_timer.h |  82 ++
>>>  3 files changed, 333 insertions(+)
>>>  create mode 100644 hw/timer/msf2_timer.c
>>>  create mode 100644 include/hw/timer/msf2_timer.h
>>>
>>> diff --git a/hw/timer/Makefile.objs b/hw/timer/Makefile.objs
>>> index dd6f27e..0bdf1e1 100644
>>> --- a/hw/timer/Makefile.objs
>>> +++ b/hw/timer/Makefile.objs
>>> @@ -41,3 +41,4 @@ common-obj-$(CONFIG_STM32F2XX_TIMER) += stm32f2xx_timer.o
>>>  common-obj-$(CONFIG_ASPEED_SOC) += aspeed_timer.o
>>>
>>>  common-obj-$(CONFIG_SUN4V_RTC) += sun4v-rtc.o
>>> +common-obj-$(CONFIG_MSF2) += msf2_timer.o
>>> diff --git a/hw/timer/msf2_timer.c b/hw/timer/msf2_timer.c
>>> new file mode 100644
>>> index 000..d1dbde5
>>> --- /dev/null
>>> +++ b/hw/timer/msf2_timer.c
>>
>> Hey Sundeep,
>>
>> File names should use '-' instead of '_'.
>
> Ok will change.
>>
>>> @@ -0,0 +1,250 @@
>>> +/*
>>> + * Timer block model of Microsemi SmartFusion2.
>>> + *
>>> + * Copyright (c) 2017 Subbaraya Sundeep .
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a 
>>> copy
>>> + * of this software and associated documentation files (the "Software"), 
>>> to deal
>>> + * in the Software without restriction, including without limitation the 
>>> rights
>>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or 
>>> sell
>>> + * copies of the Software, and to permit persons to whom the Software is
>>> + * furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be included 
>>> in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 
>>> OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
>>> OTHER
>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
>>> FROM,
>>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 
>>> IN
>>> + * THE SOFTWARE.
>>> + */
>>> +
>>> +#include "qemu/osdep.h"
>>> +#include "hw/timer/msf2_timer.h"
>>> +#include "hw/sysbus.h"
>>> +#include "qemu/log.h"
>>> +#include "qemu/main-loop.h"
>>
>> Do you need to include the main-loop.h? This doesn't look right.
>>
> Ok I will remove it.
>
>>> +
>>> +#ifndef MSF2_TIMER_ERR_DEBUG
>>> +#define MSF2_TIMER_ERR_DEBUG  0
>>> +#endif
>>> +
>>> +#define DB_PRINT_L(lvl, fmt, args...) do { \
>>> +if (MSF2_TIMER_ERR_DEBUG >= lvl) { \
>>> +qemu_log("%s: " fmt, __func__, ## args); \
>>> +} \
>>> +} while (0);
>>> +
>>> +#define DB_PRINT(fmt, args...) DB_PRINT_L(1, fmt, ## args)
>>> +
>>> +static void timer_update_irq(struct Msf2Timer *st)
>>> +{
>>> +bool isr, ier;
>>> +
>>> +isr = !!(st->regs[R_TIM_RIS] & TIMER_RIS_ACK);
>>> +ier = !!(st->regs[R_TIM_CTRL] & TIMER_CTRL_INTR);
>>> +
>>> +qemu_set_irq(st->irq, (ier && isr));
>>> +}
>>> +
>>> +static void timer_update(struct Msf2Timer *st)
>>> +{
>>> +uint64_t count;
>>> +
>>> +DB_PRINT("timer=%d\n", st->nr);
>>
>> Maybe make this a little more explict. Something like "Updating timer: %d"?
>>
> Yeah will change that.
>>> +
>>> +if (!(st->regs[R_TIM_CTRL] & TIMER_CTRL_ENBL)) {
>>> +ptimer_stop(st->ptimer);
>>> +return;
>>> +}
>>> +
>>> +count = st->regs[R_TIM_LOADVAL];
>>> +ptimer_set_limit(st->ptimer, count, 1);
>>> +ptimer_run(st->ptimer, 1);
>>> +}
>>> +
>>> +static uint64_t
>>> +timer_read(void *opaque, hwaddr addr, unsigned int size)
>>> +{
>>> +MSF2TimerState *t = opaque;
>>> +struct Msf2Timer *st;
>>> +uint32_t r = 0;
>>
>> Call the ret instead of r and it should be a uint64_t.
>>
> I will change to ret. I guess uint32_t is fine because same is used
> for returning values in 32 bit registers.
>
>>> +unsigned int timer = 0;
>>
>> Isn't the timer normally an int? Try to use the same type throughout the 
>> code.
>>
> Ok will use int.
>
>>> +int isr;
>>> +int ier;
>>> +
>>> +addr >>= 2;
>>> +/*
>>> + * Two independent timers has same base address.
>>> + * Based on addr passed figure out which timer is being used.
>>> + */
>>> +if (addr >= R_TIM1_MAX) 

Re: [Qemu-devel] [PATCH v5 3/5] audio: fix WAVState leak

2017-05-03 Thread Philippe Mathieu-Daudé

On 05/03/2017 07:38 PM, Marc-André Lureau wrote:

Spotted by ASAN.

Signed-off-by: Marc-André Lureau 


Reviewed-by: Philippe Mathieu-Daudé 


---
 audio/wavcapture.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/audio/wavcapture.c b/audio/wavcapture.c
index 8bfb9e7654..5863803584 100644
--- a/audio/wavcapture.c
+++ b/audio/wavcapture.c
@@ -88,6 +88,7 @@ static void wav_capture_destroy (void *opaque)
 WAVState *wav = opaque;

 AUD_del_capture (wav->cap, wav);
+g_free (wav);
 }

 static void wav_capture_info (void *opaque)





Re: [Qemu-devel] [PATCH v5 2/5] audio: fix capture buffer leaks

2017-05-03 Thread Philippe Mathieu-Daudé

Gerd already sent this one few days ago ;)

http://lists.nongnu.org/archive/html/qemu-devel/2017-04/msg05587.html

On 05/03/2017 07:38 PM, Marc-André Lureau wrote:

Spotted by ASAN.

Signed-off-by: Marc-André Lureau 
---
 audio/audio.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/audio/audio.c b/audio/audio.c
index c8898d8422..beafed209b 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -2028,6 +2028,8 @@ void AUD_del_capture (CaptureVoiceOut *cap, void 
*cb_opaque)
 sw = sw1;
 }
 QLIST_REMOVE (cap, entries);
+g_free (cap->hw.mix_buf);
+g_free (cap->buf);
 g_free (cap);
 }
 return;





[Qemu-devel] [PATCH 4/4] iotests: Extend test 066

2017-05-03 Thread Max Reitz
066 was supposed to be a test "for discarding preallocated zero
clusters", but it did so incompletely: While it did check the image
file's integrity after the operation, it did not confirm that the
clusters are indeed freed. This patch adds this test.

In addition, new cases for writing to preallocated zero clusters are
added.

Signed-off-by: Max Reitz 
---
 tests/qemu-iotests/066 | 128 -
 tests/qemu-iotests/066.out |  46 
 2 files changed, 173 insertions(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/066 b/tests/qemu-iotests/066
index c2116a3088..8638217736 100755
--- a/tests/qemu-iotests/066
+++ b/tests/qemu-iotests/066
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Test case for discarding preallocated zero clusters in qcow2
+# Test case for preallocated zero clusters in qcow2
 #
 # Copyright (C) 2013 Red Hat, Inc.
 #
@@ -55,8 +55,134 @@ _make_test_img $IMG_SIZE
 $QEMU_IO -c "write 0 256k" -c "write -z 0 256k" -c "write 64M 512" \
 -c "discard 0 $IMG_SIZE" -c "read -P 0 0 $IMG_SIZE" "$TEST_IMG" \
  | _filter_qemu_io
+
 # Check the image (there shouldn't be any leaks)
 _check_test_img
+# Map the image (we want all clusters to be gone)
+$QEMU_IMG map "$TEST_IMG"
+
+_cleanup_test_img
+
+
+echo
+echo '=== Writing to preallocated zero clusters ==='
+echo
+
+_make_test_img $IMG_SIZE
+
+# Create data clusters (not aligned to an L2 table)
+$QEMU_IO -c 'write -P 42 1M 256k' "$TEST_IMG" | _filter_qemu_io
+orig_map=$($QEMU_IMG map --output=json "$TEST_IMG")
+
+# Convert the data clusters to preallocated zero clusters
+$QEMU_IO -c 'write -z 1M 256k' "$TEST_IMG" | _filter_qemu_io
+
+# Now write to them (with a COW needed for the head and tail)
+$QEMU_IO -c "write -P 23 $(((1024 + 32) * 1024)) 192k" "$TEST_IMG" \
+| _filter_qemu_io
+
+# Check metadata correctness
+_check_test_img
+
+# Check data correctness
+$QEMU_IO -c "read -P  0 $(( 1024 * 1024)) 32k" \
+ -c "read -P 23 $(((1024 + 32)   * 1024)) 192k" \
+ -c "read -P  0 $(((1024 + 32 + 192) * 1024)) 32k" \
+ "$TEST_IMG" \
+ | _filter_qemu_io
+
+# Check that we have actually reused the original area
+new_map=$($QEMU_IMG map --output=json "$TEST_IMG")
+if [ "$new_map" = "$orig_map" ]; then
+echo 'Successfully reused original clusters.'
+else
+echo 'Failed to reuse original clusters.'
+echo 'Original map:'
+echo "$orig_map"
+echo 'New map:'
+echo "$new_map"
+fi
+
+_cleanup_test_img
+
+
+echo
+echo '=== Writing to a snapshotted preallocated zero cluster ==='
+echo
+
+_make_test_img 64k
+
+# Create a preallocated zero cluster
+$QEMU_IO -c 'write -P 42 0 64k' -c 'write -z 0 64k' "$TEST_IMG" \
+| _filter_qemu_io
+
+# Snapshot it
+$QEMU_IMG snapshot -c foo "$TEST_IMG"
+
+# Write to the cluster
+$QEMU_IO -c 'write -P 23 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+# Check metadata correctness
+_check_test_img
+
+# Check data correctness
+$QEMU_IO -c 'read -P 23 0 64k' "$TEST_IMG" | _filter_qemu_io
+$QEMU_IMG snapshot -a foo "$TEST_IMG"
+$QEMU_IO -c 'read -P 0 0 64k' "$TEST_IMG" | _filter_qemu_io
+
+_cleanup_test_img
+
+
+echo
+echo '=== Consecutive write to a preallocated zero cluster ==='
+echo
+
+_make_test_img 192k
+
+# Create three normal clusters
+$QEMU_IO -c 'write -P 42 0 192k' "$TEST_IMG" | _filter_qemu_io
+orig_map=$($QEMU_IMG map --output=json "$TEST_IMG")
+
+# Make the middle cluster a preallocated zero cluster
+$QEMU_IO -c 'write -z 64k 64k' "$TEST_IMG" | _filter_qemu_io
+
+# Try to overwrite everything: This should reuse the whole range. To test that
+# this only issues a single continuous write request, use blkdebug.
+$QEMU_IO -c 'write -P 42 0 192k' \
+"json:{
+'driver': '$IMGFMT',
+'file': {
+'driver': 'blkdebug',
+'image.filename': '$TEST_IMG',
+'set-state': [{
+'event': 'write_aio',
+'new_state': 2
+}],
+'inject-error': [{
+'event': 'write_aio',
+'state': 2
+}]
+}
+}" \
+| _filter_qemu_io
+
+# Check metadata correctness
+_check_test_img
+
+# Check that we have actually reused the original area
+new_map=$($QEMU_IMG map --output=json "$TEST_IMG")
+if [ "$new_map" = "$orig_map" ]; then
+echo 'Successfully reused original clusters.'
+else
+echo 'Failed to reuse original clusters.'
+echo 'Original map:'
+echo "$orig_map"
+echo 'New map:'
+echo "$new_map"
+fi
+
+_cleanup_test_img
+
 
 # success, all done
 echo "*** done"
diff --git a/tests/qemu-iotests/066.out b/tests/qemu-iotests/066.out
index 7c1f31a1b1..3d9da9bd0b 100644
--- a/tests/qemu-iotests/066.out
+++ b/tests/qemu-iotests/066.out
@@ -14,4 +14,50 @@ discard 67109376/67109376 bytes at offset 0
 read 67109376/67109376 bytes at offset 0
 64 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
+Offset  

[Qemu-devel] [PATCH 3/4] qcow2: Discard preallocated zero clusters

2017-05-03 Thread Max Reitz
In discard_single_l2(), we completely discard normal clusters instead of
simply turning them into preallocated zero clusters. That means we
should probably do the same with such preallocated zero clusters:
Discard them instead of keeping them allocated.

Reported-by: Eric Blake 
Signed-off-by: Max Reitz 
---
 block/qcow2-cluster.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index fb91fd8979..31077d8102 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1511,7 +1511,8 @@ static int discard_single_l2(BlockDriverState *bs, 
uint64_t offset,
 break;
 
 case QCOW2_CLUSTER_ZERO:
-if (!full_discard) {
+/* Preallocated zero clusters should be discarded in any case 
*/
+if (!full_discard && (old_l2_entry & L2E_OFFSET_MASK) == 0) {
 continue;
 }
 break;
-- 
2.12.2




[Qemu-devel] [PATCH 2/4] qcow2: Reuse preallocated zero clusters

2017-05-03 Thread Max Reitz
Instead of just freeing preallocated zero clusters and completely
allocating them from scratch, reuse them.

We cannot do this in handle_copied(), however, since this is a COW
operation. Therefore, we have to add the new logic to handle_alloc() and
simply return the existing offset if it exists. The only catch is that
we have to convince qcow2_alloc_cluster_link_l2() not to free the old
clusters (because we have reused them).

Reported-by: Eric Blake 
Signed-off-by: Max Reitz 
---
 block/qcow2.h |  3 ++
 block/qcow2-cluster.c | 80 +++
 2 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/block/qcow2.h b/block/qcow2.h
index f8aeb08794..8731f24b82 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -322,6 +322,9 @@ typedef struct QCowL2Meta
 /** Number of newly allocated clusters */
 int nb_clusters;
 
+/** Do not free the old clusters */
+bool keep_old_clusters;
+
 /**
  * Requests that overlap with this allocation and wait to be restarted
  * when the allocating request has completed.
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 100398c565..fb91fd8979 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -309,14 +309,20 @@ static int count_contiguous_clusters(int nb_clusters, int 
cluster_size,
 uint64_t *l2_table, uint64_t stop_flags)
 {
 int i;
+int first_cluster_type;
 uint64_t mask = stop_flags | L2E_OFFSET_MASK | QCOW_OFLAG_COMPRESSED;
 uint64_t first_entry = be64_to_cpu(l2_table[0]);
 uint64_t offset = first_entry & mask;
 
-if (!offset)
+if (!offset) {
 return 0;
+}
 
-assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);
+/* must be allocated */
+first_cluster_type = qcow2_get_cluster_type(first_entry);
+assert(first_cluster_type == QCOW2_CLUSTER_NORMAL ||
+   (first_cluster_type == QCOW2_CLUSTER_ZERO &&
+(first_entry & L2E_OFFSET_MASK) != 0));
 
 for (i = 0; i < nb_clusters; i++) {
 uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -835,7 +841,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, 
QCowL2Meta *m)
  * Don't discard clusters that reach a refcount of 0 (e.g. compressed
  * clusters), the next write will reuse them anyway.
  */
-if (j != 0) {
+if (!m->keep_old_clusters && j != 0) {
 for (i = 0; i < j; i++) {
 qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
 QCOW2_DISCARD_NEVER);
@@ -1132,8 +1138,9 @@ static int handle_alloc(BlockDriverState *bs, uint64_t 
guest_offset,
 uint64_t entry;
 uint64_t nb_clusters;
 int ret;
+bool keep_old_clusters = false;
 
-uint64_t alloc_cluster_offset;
+uint64_t alloc_cluster_offset = 0;
 
 trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
  *bytes);
@@ -1170,31 +1177,54 @@ static int handle_alloc(BlockDriverState *bs, uint64_t 
guest_offset,
  * wrong with our code. */
 assert(nb_clusters > 0);
 
-qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
+if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO &&
+(entry & L2E_OFFSET_MASK) != 0 && (entry & QCOW_OFLAG_COPIED) &&
+(!*host_offset ||
+ start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK)))
+{
+/* Try to reuse preallocated zero clusters; contiguous normal clusters
+ * would be fine, too, but count_cow_clusters() above has limited
+ * nb_clusters already to a range of COW clusters */
+int preallocated_nb_clusters =
+count_contiguous_clusters(nb_clusters, s->cluster_size,
+  _table[l2_index], QCOW_OFLAG_COPIED);
+assert(preallocated_nb_clusters > 0);
 
-/* Allocate, if necessary at a given offset in the image file */
-alloc_cluster_offset = start_of_cluster(s, *host_offset);
-ret = do_alloc_cluster_offset(bs, guest_offset, _cluster_offset,
-  _clusters);
-if (ret < 0) {
-goto fail;
-}
+nb_clusters = preallocated_nb_clusters;
+alloc_cluster_offset = entry & L2E_OFFSET_MASK;
 
-/* Can't extend contiguous allocation */
-if (nb_clusters == 0) {
-*bytes = 0;
-return 0;
+/* We want to reuse these clusters, so qcow2_alloc_cluster_link_l2()
+ * should not free them. */
+keep_old_clusters = true;
 }
 
-/* !*host_offset would overwrite the image header and is reserved for "no
- * host offset preferred". If 0 was a valid host offset, it'd trigger the
- * following overlap check; do that now to avoid having an invalid value in
- * *host_offset. */
+qcow2_cache_put(bs, s->l2_table_cache, (void **) _table);
+
 if (!alloc_cluster_offset) {
-ret = 

[Qemu-devel] [PATCH 1/4] qcow2: Fix preallocation size formula

2017-05-03 Thread Max Reitz
When calculating the number of reftable entries, we should actually use
the number of refblocks and not (wrongly[1]) re-calculate it.

[1] "Wrongly" means: Dividing the number of clusters by the number of
entries per refblock and rounding down instead of up.

Reported-by: Eric Blake 
Signed-off-by: Max Reitz 
---
 block/qcow2.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 5c1573c999..f5a72a4d2d 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -2139,7 +2139,7 @@ static int qcow2_create2(const char *filename, int64_t 
total_size,
  * too, as long as the bulk is allocated here). Therefore, using
  * floating point arithmetic is fine. */
 int64_t meta_size = 0;
-uint64_t nreftablee, nrefblocke, nl1e, nl2e;
+uint64_t nreftablee, nrefblocke, nl1e, nl2e, refblock_count;
 int64_t aligned_total_size = align_offset(total_size, cluster_size);
 int refblock_bits, refblock_size;
 /* refcount entry size in bytes */
@@ -2182,11 +2182,12 @@ static int qcow2_create2(const char *filename, int64_t 
total_size,
 nrefblocke = (aligned_total_size + meta_size + cluster_size)
/ (cluster_size - rces - rces * sizeof(uint64_t)
  / cluster_size);
-meta_size += DIV_ROUND_UP(nrefblocke, refblock_size) * cluster_size;
+refblock_count = DIV_ROUND_UP(nrefblocke, refblock_size);
+meta_size += refblock_count * cluster_size;
 
 /* total size of refcount tables */
-nreftablee = nrefblocke / refblock_size;
-nreftablee = align_offset(nreftablee, cluster_size / sizeof(uint64_t));
+nreftablee = align_offset(refblock_count,
+  cluster_size / sizeof(uint64_t));
 meta_size += nreftablee * sizeof(uint64_t);
 
 qemu_opt_set_number(opts, BLOCK_OPT_SIZE,
-- 
2.12.2




[Qemu-devel] [PATCH 0/4] qcow2: Some fixes

2017-05-03 Thread Max Reitz
This series contains some fixes for issues reported by Eric.


Max Reitz (4):
  qcow2: Fix preallocation size formula
  qcow2: Reuse preallocated zero clusters
  qcow2: Discard preallocated zero clusters
  iotests: Extend test 066

 block/qcow2.h  |   3 ++
 block/qcow2-cluster.c  |  83 -
 block/qcow2.c  |   9 ++--
 tests/qemu-iotests/066 | 128 -
 tests/qemu-iotests/066.out |  46 
 5 files changed, 239 insertions(+), 30 deletions(-)

-- 
2.12.2




Re: [Qemu-devel] [PATCH v5 0/5] Memory leak fixes

2017-05-03 Thread no-reply
Hi,

This series seems to have some coding style problems. See output below for
more information:

Subject: [Qemu-devel] [PATCH v5 0/5] Memory leak fixes
Message-id: 20170503223846.6559-1-marcandre.lur...@redhat.com
Type: series

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
84762e2 dump: fix memory_mapping_filter leak
345a5d8 slirp: fix leak
3f1ae28 audio: fix WAVState leak
565e071 audio: fix capture buffer leaks
2ce5984 test-keyval: fix leaks

=== OUTPUT BEGIN ===
Checking PATCH 1/5: test-keyval: fix leaks...
Checking PATCH 2/5: audio: fix capture buffer leaks...
ERROR: space prohibited between function name and open parenthesis '('
#22: FILE: audio/audio.c:2031:
+g_free (cap->hw.mix_buf);

ERROR: space prohibited between function name and open parenthesis '('
#23: FILE: audio/audio.c:2032:
+g_free (cap->buf);

total: 2 errors, 0 warnings, 8 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 3/5: audio: fix WAVState leak...
ERROR: space prohibited between function name and open parenthesis '('
#22: FILE: audio/wavcapture.c:91:
+g_free (wav);

total: 1 errors, 0 warnings, 7 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 4/5: slirp: fix leak...
ERROR: suspect code indent for conditional statements (2, 6)
#37: FILE: slirp/socket.c:103:
+  if (so->so_tcpcb) {
+  free(so->so_tcpcb);

total: 1 errors, 0 warnings, 9 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 5/5: dump: fix memory_mapping_filter leak...
=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@freelists.org

[Qemu-devel] [PATCH v5 4/5] slirp: fix leak

2017-05-03 Thread Marc-André Lureau
Spotted by ASAN:

/x86_64/hmp/pc-0.12:
=
==22538==ERROR: LeakSanitizer: detected memory leaks

Direct leak of 224 byte(s) in 1 object(s) allocated from:
#0 0x7f0f63cdee60 in malloc (/lib64/libasan.so.3+0xc6e60)
#1 0x556f11ff32d7 in tcp_newtcpcb 
/home/elmarco/src/qemu/slirp/tcp_subr.c:250
#2 0x556f11fdb1d1 in tcp_listen /home/elmarco/src/qemu/slirp/socket.c:688
#3 0x556f11fca9d5 in slirp_add_hostfwd 
/home/elmarco/src/qemu/slirp/slirp.c:1052
#4 0x556f11f8db41 in slirp_hostfwd /home/elmarco/src/qemu/net/slirp.c:506
#5 0x556f11f8dd83 in hmp_hostfwd_add /home/elmarco/src/qemu/net/slirp.c:535

There might be a better way to fix this, but calling slirp tcp_close()
doesn't work.

Signed-off-by: Marc-André Lureau 
---
 slirp/socket.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/slirp/socket.c b/slirp/socket.c
index 86927722e1..3b49a69a93 100644
--- a/slirp/socket.c
+++ b/slirp/socket.c
@@ -100,6 +100,9 @@ sofree(struct socket *so)
   if(so->so_next && so->so_prev)
 remque(so);  /* crashes if so is not in a queue */
 
+  if (so->so_tcpcb) {
+  free(so->so_tcpcb);
+  }
   free(so);
 }
 
-- 
2.12.0.191.gc5d8de91d




[Qemu-devel] [PATCH v5 5/5] dump: fix memory_mapping_filter leak

2017-05-03 Thread Marc-André Lureau
Spotted by ASAN.

Signed-off-by: Marc-André Lureau 
---
 memory_mapping.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/memory_mapping.c b/memory_mapping.c
index 6a39d71da2..a5d38552a6 100644
--- a/memory_mapping.c
+++ b/memory_mapping.c
@@ -337,6 +337,7 @@ void memory_mapping_filter(MemoryMappingList *list, int64_t 
begin,
 if (cur->phys_addr >= begin + length ||
 cur->phys_addr + cur->length <= begin) {
 QTAILQ_REMOVE(>head, cur, next);
+g_free(cur);
 list->num--;
 continue;
 }
-- 
2.12.0.191.gc5d8de91d




[Qemu-devel] [PATCH v5 3/5] audio: fix WAVState leak

2017-05-03 Thread Marc-André Lureau
Spotted by ASAN.

Signed-off-by: Marc-André Lureau 
---
 audio/wavcapture.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/audio/wavcapture.c b/audio/wavcapture.c
index 8bfb9e7654..5863803584 100644
--- a/audio/wavcapture.c
+++ b/audio/wavcapture.c
@@ -88,6 +88,7 @@ static void wav_capture_destroy (void *opaque)
 WAVState *wav = opaque;
 
 AUD_del_capture (wav->cap, wav);
+g_free (wav);
 }
 
 static void wav_capture_info (void *opaque)
-- 
2.12.0.191.gc5d8de91d




Re: [Qemu-devel] [PATCH v5 0/5] Memory leak fixes

2017-05-03 Thread Marc-André Lureau

This is actually v1 (confused with git-publish)

- Original Message -
> Hi,
> 
> A new series of leaks spotted by ASAN. Mostly after introducing of the
> test-hmp. Would it be useful having a configure --enable-asan, and
> enabled by default with --enable-debug?
> 
> Marc-André Lureau (5):
>   test-keyval: fix leaks
>   audio: fix capture buffer leaks
>   audio: fix WAVState leak
>   slirp: fix leak
>   dump: fix memory_mapping_filter leak
> 
>  audio/audio.c   | 2 ++
>  audio/wavcapture.c  | 1 +
>  memory_mapping.c| 1 +
>  slirp/socket.c  | 3 +++
>  tests/test-keyval.c | 4 
>  5 files changed, 11 insertions(+)
> 
> --
> 2.12.0.191.gc5d8de91d
> 
> 



[Qemu-devel] [PATCH v5 2/5] audio: fix capture buffer leaks

2017-05-03 Thread Marc-André Lureau
Spotted by ASAN.

Signed-off-by: Marc-André Lureau 
---
 audio/audio.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/audio/audio.c b/audio/audio.c
index c8898d8422..beafed209b 100644
--- a/audio/audio.c
+++ b/audio/audio.c
@@ -2028,6 +2028,8 @@ void AUD_del_capture (CaptureVoiceOut *cap, void 
*cb_opaque)
 sw = sw1;
 }
 QLIST_REMOVE (cap, entries);
+g_free (cap->hw.mix_buf);
+g_free (cap->buf);
 g_free (cap);
 }
 return;
-- 
2.12.0.191.gc5d8de91d




[Qemu-devel] [PATCH v5 1/5] test-keyval: fix leaks

2017-05-03 Thread Marc-André Lureau
Spotted by ASAN.

Signed-off-by: Marc-André Lureau 
---
 tests/test-keyval.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/tests/test-keyval.c b/tests/test-keyval.c
index ba19560a22..c556b1b117 100644
--- a/tests/test-keyval.c
+++ b/tests/test-keyval.c
@@ -628,6 +628,7 @@ static void test_keyval_visit_alternate(void)
 visit_type_AltNumStr(v, "a", , _abort);
 g_assert_cmpint(ans->type, ==, QTYPE_QSTRING);
 g_assert_cmpstr(ans->u.s, ==, "1");
+qapi_free_AltNumStr(ans);
 visit_type_AltNumInt(v, "a", , );
 error_free_or_abort();
 visit_end_struct(v, NULL);
@@ -651,9 +652,12 @@ static void test_keyval_visit_any(void)
 g_assert(qlist);
 qstr = qobject_to_qstring(qlist_pop(qlist));
 g_assert_cmpstr(qstring_get_str(qstr), ==, "null");
+QDECREF(qstr);
 qstr = qobject_to_qstring(qlist_pop(qlist));
 g_assert_cmpstr(qstring_get_str(qstr), ==, "1");
 g_assert(qlist_empty(qlist));
+QDECREF(qstr);
+qobject_decref(any);
 visit_check_struct(v, _abort);
 visit_end_struct(v, NULL);
 visit_free(v);
-- 
2.12.0.191.gc5d8de91d




[Qemu-devel] [PATCH v5 0/5] Memory leak fixes

2017-05-03 Thread Marc-André Lureau
Hi,

A new series of leaks spotted by ASAN. Mostly after introducing of the
test-hmp. Would it be useful having a configure --enable-asan, and
enabled by default with --enable-debug?

Marc-André Lureau (5):
  test-keyval: fix leaks
  audio: fix capture buffer leaks
  audio: fix WAVState leak
  slirp: fix leak
  dump: fix memory_mapping_filter leak

 audio/audio.c   | 2 ++
 audio/wavcapture.c  | 1 +
 memory_mapping.c| 1 +
 slirp/socket.c  | 3 +++
 tests/test-keyval.c | 4 
 5 files changed, 11 insertions(+)

-- 
2.12.0.191.gc5d8de91d




[Qemu-devel] [PULL 3/3] vfio/pci: Fix incorrect error message

2017-05-03 Thread Alex Williamson
From: Dong Jia Shi 

When the "No host device provided" error occurs, the hint message
that starts with "Use -vfio-pci," makes no sense, since "-vfio-pci"
is not a valid command line parameter.

Correct this by replacing "-vfio-pci" with "-device vfio-pci".

Signed-off-by: Dong Jia Shi 
Reviewed-by: Eric Auger 
Signed-off-by: Alex Williamson 
---
 hw/vfio/pci.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 03a3d0154976..32aca7770177 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2625,8 +2625,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
 if (!(~vdev->host.domain || ~vdev->host.bus ||
   ~vdev->host.slot || ~vdev->host.function)) {
 error_setg(errp, "No provided host device");
-error_append_hint(errp, "Use -vfio-pci,host=:BB:DD.F "
-  "or -vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
+error_append_hint(errp, "Use -device vfio-pci,host=:BB:DD.F "
+  "or -device vfio-pci,sysfsdev=PATH_TO_DEVICE\n");
 return;
 }
 vdev->vbasedev.sysfsdev =




[Qemu-devel] [PULL 2/3] vfio: enable 8-byte reads/writes to vfio

2017-05-03 Thread Alex Williamson
From: Jose Ricardo Ziviani 

This patch enables 8-byte writes and reads to VFIO. Such implemention
is already done but it's missing the 'case' to handle such accesses in
both vfio_region_write and vfio_region_read and the MemoryRegionOps:
impl.max_access_size and impl.min_access_size.

After this patch, 8-byte writes such as:

qemu_mutex_lock locked mutex 0x10905ad8
vfio_region_write  (0001:03:00.0:region1+0xc0, 0x4140c, 4)
vfio_region_write  (0001:03:00.0:region1+0xc4, 0xa, 4)
qemu_mutex_unlock unlocked mutex 0x10905ad8

goes like this:

qemu_mutex_lock locked mutex 0x10905ad8
vfio_region_write  (0001:03:00.0:region1+0xc0, 0xbfd0008, 8)
qemu_mutex_unlock unlocked mutex 0x10905ad8

Signed-off-by: Jose Ricardo Ziviani 
Signed-off-by: Alex Williamson 
---
 hw/vfio/common.c |   10 ++
 1 file changed, 10 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6572c0744cb5..a8f12eeb3589 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -119,6 +119,9 @@ void vfio_region_write(void *opaque, hwaddr addr,
 case 4:
 buf.dword = cpu_to_le32(data);
 break;
+case 8:
+buf.qword = cpu_to_le64(data);
+break;
 default:
 hw_error("vfio: unsupported write size, %d bytes", size);
 break;
@@ -173,6 +176,9 @@ uint64_t vfio_region_read(void *opaque,
 case 4:
 data = le32_to_cpu(buf.dword);
 break;
+case 8:
+data = le64_to_cpu(buf.qword);
+break;
 default:
 hw_error("vfio: unsupported read size, %d bytes", size);
 break;
@@ -194,6 +200,10 @@ const MemoryRegionOps vfio_region_ops = {
 .min_access_size = 1,
 .max_access_size = 8,
 },
+.impl = {
+.min_access_size = 1,
+.max_access_size = 8,
+},
 };
 
 /*




[Qemu-devel] [PULL 1/3] vfio: Set MemoryRegionOps:max_access_size and min_access_size

2017-05-03 Thread Alex Williamson
From: Jose Ricardo Ziviani 

Sets valid.max_access_size and valid.min_access_size to ensure safe
8-byte accesses to vfio. Today, 8-byte accesses are broken into pairs
of 4-byte calls that goes unprotected:

qemu_mutex_lock locked mutex 0x10905ad8
  vfio_region_write  (0001:03:00.0:region1+0xc0, 0x2020c, 4)
qemu_mutex_unlock unlocked mutex 0x10905ad8
qemu_mutex_lock locked mutex 0x10905ad8
  vfio_region_write  (0001:03:00.0:region1+0xc4, 0xa, 4)
qemu_mutex_unlock unlocked mutex 0x10905ad8

which occasionally leads to:

qemu_mutex_lock locked mutex 0x10905ad8
  vfio_region_write  (0001:03:00.0:region1+0xc0, 0x2030c, 4)
qemu_mutex_unlock unlocked mutex 0x10905ad8
qemu_mutex_lock locked mutex 0x10905ad8
  vfio_region_write  (0001:03:00.0:region1+0xc0, 0x1000c, 4)
qemu_mutex_unlock unlocked mutex 0x10905ad8
qemu_mutex_lock locked mutex 0x10905ad8
  vfio_region_write  (0001:03:00.0:region1+0xc4, 0xb, 4)
qemu_mutex_unlock unlocked mutex 0x10905ad8
qemu_mutex_lock locked mutex 0x10905ad8
  vfio_region_write  (0001:03:00.0:region1+0xc4, 0xa, 4)
qemu_mutex_unlock unlocked mutex 0x10905ad8

causing strange errors in guest OS. With this patch, such accesses
are protected by the same lock guard:

qemu_mutex_lock locked mutex 0x10905ad8
vfio_region_write  (0001:03:00.0:region1+0xc0, 0x2000c, 4)
vfio_region_write  (0001:03:00.0:region1+0xc4, 0xb, 4)
qemu_mutex_unlock unlocked mutex 0x10905ad8

This happens because the 8-byte write should be broken into 4-byte
writes by memory.c:access_with_adjusted_size() in order to be under
the same lock. Today, it's done in exec.c:address_space_write_continue()
which was able to handle only 4 bytes due to a zero'ed
valid.max_access_size (see exec.c:memory_access_size()).

Signed-off-by: Jose Ricardo Ziviani 
Signed-off-by: Alex Williamson 
---
 hw/vfio/common.c |4 
 1 file changed, 4 insertions(+)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6b33b9f55df1..6572c0744cb5 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -190,6 +190,10 @@ const MemoryRegionOps vfio_region_ops = {
 .read = vfio_region_read,
 .write = vfio_region_write,
 .endianness = DEVICE_LITTLE_ENDIAN,
+.valid = {
+.min_access_size = 1,
+.max_access_size = 8,
+},
 };
 
 /*




[Qemu-devel] [PULL 0/3] VFIO fixes 2017-05-03

2017-05-03 Thread Alex Williamson
The following changes since commit e619b14746e5d8c0e53061661fd0e1da01fd4d60:

  Merge remote-tracking branch 'sthibault/tags/samuel-thibault' into staging 
(2017-05-02 15:16:29 +0100)

are available in the git repository at:


  git://github.com/awilliam/qemu-vfio.git tags/vfio-updates-20170503.0

for you to fetch changes up to 6e4e6f0d403b1fb25f9dfdbe17754c643997753d:

  vfio/pci: Fix incorrect error message (2017-05-03 14:52:35 -0600)


VFIO fixes 2017-05-03

 - Enable 8-byte memory region accesses (Jose Ricardo Ziviani)
 - Fix vfio-pci error message (Dong Jia Shi)


Dong Jia Shi (1):
  vfio/pci: Fix incorrect error message

Jose Ricardo Ziviani (2):
  vfio: Set MemoryRegionOps:max_access_size and min_access_size
  vfio: enable 8-byte reads/writes to vfio

 hw/vfio/common.c | 14 ++
 hw/vfio/pci.c|  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)



Re: [Qemu-devel] [PATCH 06/13] arm: Don't let no-MPU PMSA cores write to SCTLR.M

2017-05-03 Thread Alistair Francis
On Tue, Apr 25, 2017 at 5:07 AM, Peter Maydell  wrote:
> If the CPU is a PMSA config with no MPU implemented, then the
> SCTLR.M bit should be RAZ/WI, so that the guest can never
> turn on the non-existent MPU.
>
> Signed-off-by: Peter Maydell 

Reviewed-by: Alistair Francis 

Thanks,

Alistair

> ---
>  target/arm/helper.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index 404bfdb..f0f25c8 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -3258,6 +3258,11 @@ static void sctlr_write(CPUARMState *env, const 
> ARMCPRegInfo *ri,
>  return;
>  }
>
> +if (arm_feature(env, ARM_FEATURE_PMSA) && !cpu->has_mpu) {
> +/* M bit is RAZ/WI for PMSA with no MPU implemented */
> +value &= ~SCTLR_M;
> +}
> +
>  raw_write(env, ri, value);
>  /* ??? Lots of these bits are not implemented.  */
>  /* This may enable/disable the MMU, so do a TLB flush.  */
> --
> 2.7.4
>
>



Re: [Qemu-devel] [PATCH 08/13] armv7m: Improve "-d mmu" tracing for PMSAv7 MPU

2017-05-03 Thread Alistair Francis
On Tue, Apr 25, 2017 at 5:07 AM, Peter Maydell  wrote:
> From: Michael Davidsaver 
>
> Improve the "-d mmu" tracing for the PMSAv7 MPU translation
> process as an aid in debugging guest MPU configurations:
>  * fix a missing newline for a guest-error log
>  * report the region number with guest-error or unimp
>logs of bad region register values
>  * add a log message for the overall result of the lookup
>  * print "0x" prefix for hex values
>
> Signed-off-by: Michael Davidsaver 
> [PMM: a little tidyup, report region number in all messages
>  rather than just one]
> Signed-off-by: Peter Maydell 

Reviewed-by: Alistair Francis 

Thanks,

Alistair

> ---
>  target/arm/helper.c | 39 +++
>  1 file changed, 27 insertions(+), 12 deletions(-)
>
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index 5c044d0..9e1ed1c 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -8169,16 +8169,18 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, 
> uint32_t address,
>  }
>
>  if (!rsize) {
> -qemu_log_mask(LOG_GUEST_ERROR, "DRSR.Rsize field can not be 
> 0");
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "DRSR[%d]: Rsize field cannot be 0\n", n);
>  continue;
>  }
>  rsize++;
>  rmask = (1ull << rsize) - 1;
>
>  if (base & rmask) {
> -qemu_log_mask(LOG_GUEST_ERROR, "DRBAR %" PRIx32 " misaligned 
> "
> -  "to DRSR region size, mask = %" PRIx32,
> -  base, rmask);
> +qemu_log_mask(LOG_GUEST_ERROR,
> +  "DRBAR[%d]: 0x%" PRIx32 " misaligned "
> +  "to DRSR region size, mask = 0x%" PRIx32 "\n",
> +  n, base, rmask);
>  continue;
>  }
>
> @@ -8215,9 +8217,10 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, 
> uint32_t address,
>  }
>  }
>  if (rsize < TARGET_PAGE_BITS) {
> -qemu_log_mask(LOG_UNIMP, "No support for MPU (sub)region"
> +qemu_log_mask(LOG_UNIMP,
> +  "DRSR[%d]: No support for MPU (sub)region "
>"alignment of %" PRIu32 " bits. Minimum is 
> %d\n",
> -  rsize, TARGET_PAGE_BITS);
> +  n, rsize, TARGET_PAGE_BITS);
>  continue;
>  }
>  if (srdis) {
> @@ -8251,8 +8254,8 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, 
> uint32_t address,
>  break;
>  default:
>  qemu_log_mask(LOG_GUEST_ERROR,
> -  "Bad value for AP bits in DRACR %"
> -  PRIx32 "\n", ap);
> +  "DRACR[%d]: Bad value for AP bits: 0x%"
> +  PRIx32 "\n", n, ap);
>  }
>  } else { /* Priv. mode AP bits decoding */
>  switch (ap) {
> @@ -8269,8 +8272,8 @@ static bool get_phys_addr_pmsav7(CPUARMState *env, 
> uint32_t address,
>  break;
>  default:
>  qemu_log_mask(LOG_GUEST_ERROR,
> -  "Bad value for AP bits in DRACR %"
> -  PRIx32 "\n", ap);
> +  "DRACR[%d]: Bad value for AP bits: 0x%"
> +  PRIx32 "\n", n, ap);
>  }
>  }
>
> @@ -8448,9 +8451,21 @@ static bool get_phys_addr(CPUARMState *env, 
> target_ulong address,
>   */
>  if (arm_feature(env, ARM_FEATURE_PMSA) &&
>  arm_feature(env, ARM_FEATURE_V7)) {
> +bool ret;
>  *page_size = TARGET_PAGE_SIZE;
> -return get_phys_addr_pmsav7(env, address, access_type, mmu_idx,
> -phys_ptr, prot, fsr);
> +ret = get_phys_addr_pmsav7(env, address, access_type, mmu_idx,
> +   phys_ptr, prot, fsr);
> +qemu_log_mask(CPU_LOG_MMU, "PMSAv7 MPU lookup for %s at 0x%08" PRIx32
> +  " mmu_idx %u -> %s (prot %c%c%c)\n",
> +  access_type == 1 ? "reading" :
> +  (access_type == 2 ? "writing" : "execute"),
> +  (uint32_t)address, mmu_idx,
> +  ret ? "Miss" : "Hit",
> +  *prot & PAGE_READ ? 'r' : '-',
> +  *prot & PAGE_WRITE ? 'w' : '-',
> +  *prot & PAGE_EXEC ? 'x' : '-');
> +
> +return ret;
>  }
>
>  if (regime_translation_disabled(env, mmu_idx)) {
> --

[Qemu-devel] [PATCH v2] xen/mapcache: store dma information in revmapcache entries for debugging

2017-05-03 Thread Stefano Stabellini
The Xen mapcache is able to create long term mappings, they are called
"locked" mappings. The third parameter of the xen_map_cache call
specifies if a mapping is a "locked" mapping.


>From the QEMU point of view there are two kinds of long term mappings:

[a] device memory mappings, such as option roms and video memory
[b] dma mappings, created by dma_memory_map & friends

After certain operations, ballooning a VM in particular, Xen asks QEMU
kindly to destroy all mappings. However, certainly [a] mappings are
present and cannot be removed. That's not a problem as they are not
affected by balloonning. The *real* problem is that if there are any
mappings of type [b], any outstanding dma operations could fail. This is
a known shortcoming. In other words, when Xen asks QEMU to destroy all
mappings, it is an error if any [b] mappings exist.

However today we have no way of distinguishing [a] from [b]. Because of
that, we cannot even print a decent warning.

This patch introduces a new "dma" bool field to MapCacheRev entires, to
remember if a given mapping is for dma or is a long term device memory
mapping. When xen_invalidate_map_cache is called, we print a warning if
any [b] mappings exist. We ignore [a] mappings.

Mappings created by qemu_map_ram_ptr are assumed to be [a], while
mappings created by address_space_map->qemu_ram_ptr_length are assumed
to be [b].

The goal of the patch is to make debugging and system understanding
easier.

Signed-off-by: Stefano Stabellini 
Acked-by: Paolo Bonzini 

---

Changes in v2:
- add acked-by
- fix code style problem

diff --git a/exec.c b/exec.c
index eac6085..85769e1 100644
--- a/exec.c
+++ b/exec.c
@@ -2084,10 +2084,10 @@ void *qemu_map_ram_ptr(RAMBlock *ram_block, ram_addr_t 
addr)
  * In that case just map until the end of the page.
  */
 if (block->offset == 0) {
-return xen_map_cache(addr, 0, 0);
+return xen_map_cache(addr, 0, 0, false);
 }
 
-block->host = xen_map_cache(block->offset, block->max_length, 1);
+block->host = xen_map_cache(block->offset, block->max_length, 1, 
false);
 }
 return ramblock_ptr(block, addr);
 }
@@ -2117,10 +2117,10 @@ static void *qemu_ram_ptr_length(RAMBlock *ram_block, 
ram_addr_t addr,
  * In that case just map the requested area.
  */
 if (block->offset == 0) {
-return xen_map_cache(addr, *size, 1);
+return xen_map_cache(addr, *size, 1, true);
 }
 
-block->host = xen_map_cache(block->offset, block->max_length, 1);
+block->host = xen_map_cache(block->offset, block->max_length, 1, true);
 }
 
 return ramblock_ptr(block, addr);
diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index 31debdf..e60156c 100644
--- a/hw/i386/xen/xen-mapcache.c
+++ b/hw/i386/xen/xen-mapcache.c
@@ -62,6 +62,7 @@ typedef struct MapCacheRev {
 hwaddr paddr_index;
 hwaddr size;
 QTAILQ_ENTRY(MapCacheRev) next;
+bool dma;
 } MapCacheRev;
 
 typedef struct MapCache {
@@ -202,7 +203,7 @@ static void xen_remap_bucket(MapCacheEntry *entry,
 }
 
 static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size,
-   uint8_t lock)
+   uint8_t lock, bool dma)
 {
 MapCacheEntry *entry, *pentry = NULL;
 hwaddr address_index;
@@ -289,6 +290,7 @@ tryagain:
 if (lock) {
 MapCacheRev *reventry = g_malloc0(sizeof(MapCacheRev));
 entry->lock++;
+reventry->dma = dma;
 reventry->vaddr_req = mapcache->last_entry->vaddr_base + 
address_offset;
 reventry->paddr_index = mapcache->last_entry->paddr_index;
 reventry->size = entry->size;
@@ -300,12 +302,12 @@ tryagain:
 }
 
 uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
-   uint8_t lock)
+   uint8_t lock, bool dma)
 {
 uint8_t *p;
 
 mapcache_lock();
-p = xen_map_cache_unlocked(phys_addr, size, lock);
+p = xen_map_cache_unlocked(phys_addr, size, lock, dma);
 mapcache_unlock();
 return p;
 }
@@ -426,8 +428,11 @@ void xen_invalidate_map_cache(void)
 mapcache_lock();
 
 QTAILQ_FOREACH(reventry, >locked_entries, next) {
-DPRINTF("There should be no locked mappings at this time, "
-"but "TARGET_FMT_plx" -> %p is present\n",
+if (!reventry->dma) {
+continue;
+}
+fprintf(stderr, "Locked DMA mapping while invalidating mapcache!"
+" "TARGET_FMT_plx" -> %p is present\n",
 reventry->paddr_index, reventry->vaddr_req);
 }
 
diff --git a/include/sysemu/xen-mapcache.h b/include/sysemu/xen-mapcache.h
index b8c93b9..01daaad 100644
--- a/include/sysemu/xen-mapcache.h
+++ b/include/sysemu/xen-mapcache.h
@@ -17,7 +17,7 @@ typedef hwaddr (*phys_offset_to_gaddr_t)(hwaddr start_addr,
 void 

Re: [Qemu-devel] QEMU build breakage on ARM against Xen 4.9 caused by libxendevicemodel

2017-05-03 Thread Stefano Stabellini
On Wed, 3 May 2017, Stefano Stabellini wrote:
> On Wed, 3 May 2017, Anthony PERARD wrote:
> > On Wed, May 03, 2017 at 10:20:59AM -0700, Stefano Stabellini wrote:
> > > On Wed, 3 May 2017, Anthony PERARD wrote:
> > > > On Thu, Apr 20, 2017 at 11:05:33AM -0700, Stefano Stabellini wrote:
> > > > > On Thu, 20 Apr 2017, Paul Durrant wrote:
> > > > > > I think xencall should be part of the base xen_stable_libs anyway.
> > > > > 
> > > > > Yes, you are right. However I noticed that -lxencall needs to come 
> > > > > after
> > > > > -lxendevicemodel. So, I'll have to move -lxendevicemodel before
> > > > > $xen_stable_libs, see below. I'll merge this patch into "configure:
> > > > > detect presence of libxendevicemodel", if that's OK.
> > > > > 
> > > > > diff --git a/configure b/configure
> > > > > index 99d6cbc..3133ef8 100755
> > > > > --- a/configure
> > > > > +++ b/configure
> > > > > @@ -1992,7 +1992,7 @@ if test "$xen" != "no" ; then
> > > > >else
> > > > >  
> > > > >  xen_libs="-lxenstore -lxenctrl -lxenguest"
> > > > > -xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
> > > > > +xen_stable_libs="-lxencall -lxenforeignmemory -lxengnttab 
> > > > > -lxenevtchn"
> > > > >  
> > > > >  # First we test whether Xen headers and libraries are available.
> > > > >  # If no, we are done and there is no Xen support.
> > > > > @@ -2027,9 +2027,9 @@ int main(void) {
> > > > >return 0;
> > > > >  }
> > > > >  EOF
> > > > > -compile_prog "" "$xen_libs $xen_stable_libs -lxendevicemodel"
> > > > > +compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs"
> > > > >then
> > > > > -  xen_stable_libs="$xen_stable_libs -lxendevicemodel"
> > > > > +  xen_stable_libs="-lxendevicemodel $xen_stable_libs"
> > > > >xen_ctrl_version=40900
> > > > >xen=yes
> > > > >  elif
> > > > 
> > > > Hey, now that this patch is merged, xen.git fail to build QEMU. (osstest
> > > > qemu-mainline branch fail.)
> > > > 
> > > > That's because -lxencall is not found because -L$path_to_libxencall is
> > > > missing in xen.git.
> > > > 
> > > > But I've notice something else, libxendevicemodel.so is not linked
> > > > against libxencall, that might be the root of the issues on arm.
> > > > (libxenctrl.so is linked against libxencall.)
> > > > 
> > > > Thought?
> > > > 
> > > > 
> > > > We probably need this patch in xen:
> > > 
> > > We also need to add -L$path_to_libxencall and -I$path_to_libxencall to
> > > tools/Makefile:subdir-all-qemu-xen-dir.
> > 
> > I don't think that needed because:
> > for -I, QEMU does not use anything from xencall, and any other includes
> > does not use xencall.h.
> > 
> > for -L, I think that would be usefull only if QEMU is built staticly.
> > 
> > Also, I don't think -lxencall is needed at all, if libxendevicemodel is
> > fixed. libxencall will only be a runtime dependency.
> 
> I tried the patch below: on ARM, if I remove -L$DIR/tools/libs/call, the
> QEMU configure script detects Xen 4.7 instead of 4.9. If I also remove
> -Wl,-rpath-link=$DIR/tools/libs/call, it cannot detect Xen at all.
> 
> You are right that we can avoid -I and -l, but I think we need both -L
> and -Wl,-rpath-link for tools/libs/call.

Correction: the need for -L$DIR/tools/libs/call comes from -lxencall in
the QEMU configure script. If I remove -lxencall from configure, then it
works OK without it (as it should). But
-Wl,-rpath-link=$DIR/tools/libs/call is still required, otherwise the
xencall library cannot be found and loaded.


> > > > diff --git a/tools/libs/devicemodel/Makefile 
> > > > b/tools/libs/devicemodel/Makefile
> > > > index 55626a5049..81fa5a4ac4 100644
> > > > --- a/tools/libs/devicemodel/Makefile
> > > > +++ b/tools/libs/devicemodel/Makefile
> > > > @@ -63,7 +63,7 @@ libxendevicemodel.so.$(MAJOR): 
> > > > libxendevicemodel.so.$(MAJOR).$(MINOR)
> > > > $(SYMLINK_SHLIB) $< $@
> > > >  
> > > >  libxendevicemodel.so.$(MAJOR).$(MINOR): $(PIC_OBJS) 
> > > > libxendevicemodel.map
> > > > -   $(CC) $(LDFLAGS) $(PTHREAD_LDFLAGS) -Wl,$(SONAME_LDFLAG) 
> > > > -Wl,libxendevicemodel.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $(PIC_OBJS) 
> > > > $(LDLIBS_libxentoollog) $(APPEND_LDFLAGS)
> > > > +   $(CC) $(LDFLAGS) $(PTHREAD_LDFLAGS) -Wl,$(SONAME_LDFLAG) 
> > > > -Wl,libxendevicemodel.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $(PIC_OBJS) 
> > > > $(LDLIBS_libxentoollog) $(LDLIBS_libxencall) $(APPEND_LDFLAGS)
> > > >  
> > > >  .PHONY: install
> > > >  install: build
> > > > 
> > > > -- 
> > > > Anthony PERARD
> > > > 
> > 
> > -- 
> > Anthony PERARD
> > 
> 



[Qemu-devel] [PATCH RESEND v2 18/21] sysbus-ohci: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
sysbus-ohci needs to be mapped and wired by device or board code,
and won't work with -device. Remove the user_creatable flag from
the device class.

Cc: Marcel Apfelbaum 
Cc: Gerd Hoffmann 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* Commit message rewrite only
---
 hw/usb/hcd-ohci.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index 18b31022a7..3ada35e954 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -2159,11 +2159,6 @@ static void ohci_sysbus_class_init(ObjectClass *klass, 
void *data)
 dc->desc = "OHCI USB Controller";
 dc->props = ohci_sysbus_properties;
 dc->reset = usb_ohci_reset_sysbus;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo ohci_sysbus_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 19/21] virtio-mmio: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
virtio-mmio needs to be wired and mapped by other device or board
code, and won't work with -device. Remove the user_creatable flag
from the device class.

Cc: Laszlo Ersek 
Cc: Marcel Apfelbaum 
Cc: Peter Maydell 
Cc: Shannon Zhao 
Cc: "Michael S. Tsirkin" 
Reviewed-by: Laszlo Ersek 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* (none)
---
 hw/virtio/virtio-mmio.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
index 6491a771ff..5807aa87fe 100644
--- a/hw/virtio/virtio-mmio.c
+++ b/hw/virtio/virtio-mmio.c
@@ -450,11 +450,6 @@ static void virtio_mmio_class_init(ObjectClass *klass, 
void *data)
 dc->reset = virtio_mmio_reset;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 dc->props = virtio_mmio_properties;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo virtio_mmio_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 17/21] hpet: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
hpet needs to be mapped and wired by the board code and won't
work with -device. Remove the user_creatable flag from the device
class.

Cc: Marcel Apfelbaum 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* (none)
---
 hw/timer/hpet.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/timer/hpet.c b/hw/timer/hpet.c
index 4dcbd5bb3d..a2c18b30c3 100644
--- a/hw/timer/hpet.c
+++ b/hw/timer/hpet.c
@@ -771,11 +771,6 @@ static void hpet_device_class_init(ObjectClass *klass, 
void *data)
 dc->reset = hpet_reset;
 dc->vmsd = _hpet;
 dc->props = hpet_device_properties;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo hpet_device_info = {
-- 
2.11.0.259.g40922b1




Re: [Qemu-devel] [PATCH RESEND v2 04/21] iommu: Remove FIXME comment about user_creatable=true

2017-05-03 Thread Michael S. Tsirkin
On Wed, May 03, 2017 at 05:35:47PM -0300, Eduardo Habkost wrote:
> amd-iommu and intel-iommu are really meant to be used with
> -device, so they need user_creatable=true. Remove the FIXME
> comment.
> 
> Cc: Marcel Apfelbaum 
> Cc: "Michael S. Tsirkin" 
> Reviewed-by: Marcel Apfelbaum 
> Acked-by: Marcel Apfelbaum 
> Signed-off-by: Eduardo Habkost 

Acked-by: Michael S. Tsirkin 

> ---
> Changes v1 -> v2:
> * (none)
> ---
>  hw/i386/amd_iommu.c   | 5 +
>  hw/i386/intel_iommu.c | 5 +
>  2 files changed, 2 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
> index 7b92c8c15a..efcc93cbfd 100644
> --- a/hw/i386/amd_iommu.c
> +++ b/hw/i386/amd_iommu.c
> @@ -1186,10 +1186,7 @@ static void amdvi_class_init(ObjectClass *klass, void* 
> data)
>  dc->vmsd = _amdvi;
>  dc->hotpluggable = false;
>  dc_class->realize = amdvi_realize;
> -/*
> - * FIXME: Set only because we are not sure yet if this device
> - * will be outside the q35 sysbus whitelist.
> - */
> +/* Supported by the pc-q35-* machine types */
>  dc->user_creatable = true;
>  }
>  
> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
> index 54187a04a4..327a46cd19 100644
> --- a/hw/i386/intel_iommu.c
> +++ b/hw/i386/intel_iommu.c
> @@ -3009,10 +3009,7 @@ static void vtd_class_init(ObjectClass *klass, void 
> *data)
>  dc->hotpluggable = false;
>  x86_class->realize = vtd_realize;
>  x86_class->int_remap = vtd_int_remap;
> -/*
> - * FIXME: Set only because we are not sure yet if this device
> - * will be outside the q35 sysbus whitelist.
> - */
> +/* Supported by the pc-q35-* machine types */
>  dc->user_creatable = true;
>  }
>  
> -- 
> 2.11.0.259.g40922b1



[Qemu-devel] [PATCH RESEND v2 14/21] fw_cfg: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
fw_cfg won't work with -device, as:
* fw_cfg_init1() won't get called for the device;
* The device won't appear at /machine/fw_cfg, and won't work with
  the -fw_cfg command-line option.

Remove the user_creatable flag from the device class.

Cc: Marcel Apfelbaum 
Cc: "Michael S. Tsirkin" 
Cc: Laszlo Ersek 
Cc: Gabriel L. Somlo 
Reviewed-by: Laszlo Ersek 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* (none)
---
 hw/nvram/fw_cfg.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 7993aea792..316fca9bc1 100644
--- a/hw/nvram/fw_cfg.c
+++ b/hw/nvram/fw_cfg.c
@@ -1101,11 +1101,6 @@ static void fw_cfg_io_class_init(ObjectClass *klass, 
void *data)
 
 dc->realize = fw_cfg_io_realize;
 dc->props = fw_cfg_io_properties;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo fw_cfg_io_info = {
@@ -1172,11 +1167,6 @@ static void fw_cfg_mem_class_init(ObjectClass *klass, 
void *data)
 
 dc->realize = fw_cfg_mem_realize;
 dc->props = fw_cfg_mem_properties;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo fw_cfg_mem_info = {
-- 
2.11.0.259.g40922b1




Re: [Qemu-devel] [PATCH RESEND v2 14/21] fw_cfg: Remove user_creatable flag

2017-05-03 Thread Michael S. Tsirkin
On Wed, May 03, 2017 at 05:35:57PM -0300, Eduardo Habkost wrote:
> fw_cfg won't work with -device, as:
> * fw_cfg_init1() won't get called for the device;
> * The device won't appear at /machine/fw_cfg, and won't work with
>   the -fw_cfg command-line option.
> 
> Remove the user_creatable flag from the device class.
> 
> Cc: Marcel Apfelbaum 
> Cc: "Michael S. Tsirkin" 
> Cc: Laszlo Ersek 
> Cc: Gabriel L. Somlo 
> Reviewed-by: Laszlo Ersek 
> Acked-by: Marcel Apfelbaum 
> Signed-off-by: Eduardo Habkost 

Acked-by: Michael S. Tsirkin 

> ---
> Changes v1 -> v2:
> * (none)
> ---
>  hw/nvram/fw_cfg.c | 10 --
>  1 file changed, 10 deletions(-)
> 
> diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
> index 7993aea792..316fca9bc1 100644
> --- a/hw/nvram/fw_cfg.c
> +++ b/hw/nvram/fw_cfg.c
> @@ -1101,11 +1101,6 @@ static void fw_cfg_io_class_init(ObjectClass *klass, 
> void *data)
>  
>  dc->realize = fw_cfg_io_realize;
>  dc->props = fw_cfg_io_properties;
> -/*
> - * FIXME: Set only because we are not sure yet if this device
> - * will be outside the q35 sysbus whitelist.
> - */
> -dc->user_creatable = true;
>  }
>  
>  static const TypeInfo fw_cfg_io_info = {
> @@ -1172,11 +1167,6 @@ static void fw_cfg_mem_class_init(ObjectClass *klass, 
> void *data)
>  
>  dc->realize = fw_cfg_mem_realize;
>  dc->props = fw_cfg_mem_properties;
> -/*
> - * FIXME: Set only because we are not sure yet if this device
> - * will be outside the q35 sysbus whitelist.
> - */
> -dc->user_creatable = true;
>  }
>  
>  static const TypeInfo fw_cfg_mem_info = {
> -- 
> 2.11.0.259.g40922b1



Re: [Qemu-devel] [PATCH RESEND v2 19/21] virtio-mmio: Remove user_creatable flag

2017-05-03 Thread Michael S. Tsirkin
On Wed, May 03, 2017 at 05:36:02PM -0300, Eduardo Habkost wrote:
> virtio-mmio needs to be wired and mapped by other device or board
> code, and won't work with -device. Remove the user_creatable flag
> from the device class.
> 
> Cc: Laszlo Ersek 
> Cc: Marcel Apfelbaum 
> Cc: Peter Maydell 
> Cc: Shannon Zhao 
> Cc: "Michael S. Tsirkin" 
> Reviewed-by: Laszlo Ersek 
> Acked-by: Marcel Apfelbaum 
> Signed-off-by: Eduardo Habkost 

Acked-by: Michael S. Tsirkin 

> ---
> Changes v1 -> v2:
> * (none)
> ---
>  hw/virtio/virtio-mmio.c | 5 -
>  1 file changed, 5 deletions(-)
> 
> diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c
> index 6491a771ff..5807aa87fe 100644
> --- a/hw/virtio/virtio-mmio.c
> +++ b/hw/virtio/virtio-mmio.c
> @@ -450,11 +450,6 @@ static void virtio_mmio_class_init(ObjectClass *klass, 
> void *data)
>  dc->reset = virtio_mmio_reset;
>  set_bit(DEVICE_CATEGORY_MISC, dc->categories);
>  dc->props = virtio_mmio_properties;
> -/*
> - * FIXME: Set only because we are not sure yet if this device
> - * will be outside the q35 sysbus whitelist.
> - */
> -dc->user_creatable = true;
>  }
>  
>  static const TypeInfo virtio_mmio_info = {
> -- 
> 2.11.0.259.g40922b1



[Qemu-devel] [PATCH RESEND v2 12/21] isabus-bridge: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
isabus-bridge needs to be created by isa_bus_new(), and won't
work with -device, as it won't create the TYPE_ISA_BUS bus
itself. Remove the user_creatable flag from the device class.

Cc: Marcel Apfelbaum 
Cc: "Michael S. Tsirkin" 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* (none)
---
 hw/isa/isa-bus.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/isa/isa-bus.c b/hw/isa/isa-bus.c
index ad4ac3b4f6..348e0eab9d 100644
--- a/hw/isa/isa-bus.c
+++ b/hw/isa/isa-bus.c
@@ -221,11 +221,6 @@ static void isabus_bridge_class_init(ObjectClass *klass, 
void *data)
 
 set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories);
 dc->fw_name = "isa";
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo isabus_bridge_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 21/21] s390-pcibus: No need to set user_creatable=false explicitly

2017-05-03 Thread Eduardo Habkost
TYPE_S390_PCI_HOST_BRIDGE is a subclass of TYPE_PCI_HOST_BRIDGE,
which is a subclass of TYPE_SYS_BUS_DEVICE. TYPE_SYS_BUS_DEVICE
already sets user_creatable=false, so we don't require an
explicit user_creatable=false assignment in
s390_pcihost_class_init().

Cc: Alexander Graf 
Cc: Christian Borntraeger 
Cc: Cornelia Huck 
Cc: Frank Blaschka 
Cc: Laszlo Ersek 
Cc: Marcel Apfelbaum 
Cc: Markus Armbruster 
Cc: Peter Maydell 
Cc: Pierre Morel 
Cc: Richard Henderson 
Cc: Thomas Huth 
Cc: Yi Min Zhao 
Acked-by: Cornelia Huck 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes series v1 -> v2:
* Previous patch was:
  "s390: Add FIXME for unexplained user_creatable=false line",
  but now we know we can remove the explicit user_creatable=false
  assignment
---
 hw/s390x/s390-pci-bus.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index b60a8f22b0..66a6fbeb8c 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -872,7 +872,6 @@ static void s390_pcihost_class_init(ObjectClass *klass, 
void *data)
 DeviceClass *dc = DEVICE_CLASS(klass);
 HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass);
 
-dc->user_creatable = false;
 dc->reset = s390_pcihost_reset;
 k->init = s390_pcihost_init;
 hc->plug = s390_pcihost_hot_plug;
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 15/21] esp: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
esp devices aren't going to work with -device, as they need IRQs
to be connected and mmio to be mapped (this is done by
esp_init()). Remove the user_creatable flag from the device
class.

Cc: Marcel Apfelbaum 
Cc: Paolo Bonzini 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* Commit message rewrite only
---
 hw/scsi/esp.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/scsi/esp.c b/hw/scsi/esp.c
index 7bdc1e1b99..eee831efeb 100644
--- a/hw/scsi/esp.c
+++ b/hw/scsi/esp.c
@@ -728,11 +728,6 @@ static void sysbus_esp_class_init(ObjectClass *klass, void 
*data)
 dc->reset = sysbus_esp_hard_reset;
 dc->vmsd = _sysbus_esp_scsi;
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo sysbus_esp_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 13/21] unimplemented-device: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
unimplemented-device needs to be created and mapped using
create_unimplemented_device() (or equivalent code), and won't
work with -device. Remove the user_creatable flag from the device
class.

Cc: Marcel Apfelbaum 
Cc: Peter Maydell 
Cc: Philippe Mathieu-Daudé 
Reviewed-by: Philippe Mathieu-Daudé 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* Commit message rewrite only
---
 hw/misc/unimp.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/misc/unimp.c b/hw/misc/unimp.c
index e446c1d652..bcbb585888 100644
--- a/hw/misc/unimp.c
+++ b/hw/misc/unimp.c
@@ -90,11 +90,6 @@ static void unimp_class_init(ObjectClass *klass, void *data)
 
 dc->realize = unimp_realize;
 dc->props = unimp_properties;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo unimp_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 07/21] kvmclock: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
kvmclock should be used by guests only when the appropriate CPUID
feature flags are set on the VCPU, and it is automatically
created by kvmclock_create() when those feature flags are set.
This means creating a kvmclock device using -device is useless.
Remove user_creatable from its device class.

Cc: Marcel Apfelbaum 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Thomas Huth 
Acked-by: Marcel Apfelbaum 
Reviewed-by: Thomas Huth 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* (none)
---
 hw/i386/kvm/clock.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/i386/kvm/clock.c b/hw/i386/kvm/clock.c
index 7665bef999..13eca374cd 100644
--- a/hw/i386/kvm/clock.c
+++ b/hw/i386/kvm/clock.c
@@ -286,11 +286,6 @@ static void kvmclock_class_init(ObjectClass *klass, void 
*data)
 dc->realize = kvmclock_realize;
 dc->vmsd = _vmsd;
 dc->props = kvmclock_properties;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo kvmclock_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 20/21] xen-sysdev: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
TYPE_XENSYSDEV is only used internally by xen_be_init(), and is
not supposed to be plugged/unplugged dynamically. Remove the
user_creatable flag from the device class.

Cc: Juergen Gross ,
Cc: Peter Maydell ,
Cc: Thomas Huth 
Cc: sstabell...@kernel.org
Cc: Markus Armbruster ,
Cc: Marcel Apfelbaum ,
Cc: Laszlo Ersek 
Acked-by: Juergen Gross 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes series v1 -> v2:
* (New patch added to series)
---
 hw/xen/xen_backend.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/xen/xen_backend.c b/hw/xen/xen_backend.c
index 2b91d2d458..f29b2b027b 100644
--- a/hw/xen/xen_backend.c
+++ b/hw/xen/xen_backend.c
@@ -664,11 +664,6 @@ static void xen_sysdev_class_init(ObjectClass *klass, void 
*data)
 k->init = xen_sysdev_init;
 dc->props = xen_sysdev_properties;
 dc->bus_type = TYPE_XENSYSBUS;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo xensysdev_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 10/21] sysbus-ahci: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
The sysbus-ahci devices are supposed to be created and wired by
code from other devices, like calxeda_init() and
xlnx_zynqmp_realize(), and won't work with -device. Remove the
user_creatable flag from the device class.

Cc: John Snow 
Cc: qemu-bl...@nongnu.org
Cc: Rob Herring 
Cc: Peter Maydell 
Cc: Alistair Francis 
Cc: "Edgar E. Iglesias" 
Cc: Marcel Apfelbaum 
Reviewed-by: Alistair Francis 
Acked-by: John Snow 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* (none)
---
 hw/ide/ahci.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 7f10cda354..2ea1a282ef 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1721,11 +1721,6 @@ static void sysbus_ahci_class_init(ObjectClass *klass, 
void *data)
 dc->props = sysbus_ahci_properties;
 dc->reset = sysbus_ahci_reset;
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo sysbus_ahci_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 16/21] generic-sdhci: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
generic-sdhci needs to be wired by other devices' code, so it
can't be used with -device. Remove the user_creatable flag from
the device class.

Cc: Peter Maydell 
Cc: "Edgar E. Iglesias" 
Cc: David Gibson 
Cc: Alexander Graf 
Cc: "Michael S. Tsirkin" 
Cc: Marcel Apfelbaum 
Cc: Prasad J Pandit 
Cc: Alistair Francis 
Reviewed-by: Alistair Francis 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* (none)
---
 hw/sd/sdhci.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/sd/sdhci.c b/hw/sd/sdhci.c
index dbf61fccb8..6d6a791ee9 100644
--- a/hw/sd/sdhci.c
+++ b/hw/sd/sdhci.c
@@ -1360,11 +1360,6 @@ static void sdhci_sysbus_class_init(ObjectClass *klass, 
void *data)
 dc->props = sdhci_sysbus_properties;
 dc->realize = sdhci_sysbus_realize;
 dc->reset = sdhci_poweron_reset;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo sdhci_sysbus_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 06/21] pflash_cfi01: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
TYPE_CFI_PFLASH01 devices need to be mapped by
pflash_cfi01_register() (or equivalent) and can't be used with
-device. Remove user_creatable from the device class.

Cc: Kevin Wolf 
Cc: Max Reitz 
Cc: qemu-bl...@nongnu.org
Cc: Laszlo Ersek 
Cc: Philippe Mathieu-Daudé 
Cc: Marcel Apfelbaum 
Reviewed-by: Philippe Mathieu-Daudé 
Reviewed-by: Laszlo Ersek 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* Commit message rewrite only
---
 hw/block/pflash_cfi01.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c
index ef71956433..594d4cf6fe 100644
--- a/hw/block/pflash_cfi01.c
+++ b/hw/block/pflash_cfi01.c
@@ -927,11 +927,6 @@ static void pflash_cfi01_class_init(ObjectClass *klass, 
void *data)
 dc->props = pflash_cfi01_properties;
 dc->vmsd = _pflash;
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 09/21] kvmvapic: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
The kvmvapic device is only usable when created by
apic_common_realize(), not using -device. Remove the
user_creatable flag from the device class.

Cc: Igor Mammedov 
Cc: Marcel Apfelbaum 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Cc: "Michael S. Tsirkin" 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* Commit message rewrite only
---
 hw/i386/kvmvapic.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
index 45f6267c93..82a49556af 100644
--- a/hw/i386/kvmvapic.c
+++ b/hw/i386/kvmvapic.c
@@ -856,11 +856,6 @@ static void vapic_class_init(ObjectClass *klass, void 
*data)
 dc->reset   = vapic_reset;
 dc->vmsd= _vapic;
 dc->realize = vapic_realize;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo vapic_type = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 11/21] allwinner-ahci: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
allwinner-ahci needs its IRQ to be connected and mmio to be
mapped (this is done by the alwinner-a10 device realize method),
and won't work with -device. Remove the user_creatable flag from
the device class.

Cc: John Snow 
Cc: qemu-bl...@nongnu.org
Cc: Beniamino Galvani 
Cc: Peter Maydell 
Cc: qemu-...@nongnu.org
Cc: Marcel Apfelbaum 
Acked-by: John Snow 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* Commit message rewrite only
---
 hw/ide/ahci.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 2ea1a282ef..f60826d6e0 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1815,11 +1815,6 @@ static void allwinner_ahci_class_init(ObjectClass 
*klass, void *data)
 DeviceClass *dc = DEVICE_CLASS(klass);
 
 dc->vmsd = _allwinner_ahci;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo allwinner_ahci_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 08/21] ioapic: Remove user_creatable flag

2017-05-03 Thread Eduardo Habkost
An ioapic device is already created by the q35 initialization
code, and using "-device ioapic" or "-device kvm-ioapic" will
always fail with "Only 1 ioapics allowed". Remove the
user_creatable flag from the ioapic device classes.

Cc: Igor Mammedov 
Cc: Marcel Apfelbaum 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Richard Henderson 
Cc: Eduardo Habkost 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* (none)
---
 hw/i386/kvm/ioapic.c | 5 -
 hw/intc/ioapic.c | 5 -
 2 files changed, 10 deletions(-)

diff --git a/hw/i386/kvm/ioapic.c b/hw/i386/kvm/ioapic.c
index 348c405180..98ca480792 100644
--- a/hw/i386/kvm/ioapic.c
+++ b/hw/i386/kvm/ioapic.c
@@ -167,11 +167,6 @@ static void kvm_ioapic_class_init(ObjectClass *klass, void 
*data)
 k->post_load = kvm_ioapic_put;
 dc->reset= kvm_ioapic_reset;
 dc->props= kvm_ioapic_properties;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo kvm_ioapic_info = {
diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c
index f9e4f77def..37c4386ae3 100644
--- a/hw/intc/ioapic.c
+++ b/hw/intc/ioapic.c
@@ -448,11 +448,6 @@ static void ioapic_class_init(ObjectClass *klass, void 
*data)
 k->post_load = ioapic_update_kvm_routes;
 dc->reset = ioapic_reset_common;
 dc->props = ioapic_properties;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo ioapic_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 04/21] iommu: Remove FIXME comment about user_creatable=true

2017-05-03 Thread Eduardo Habkost
amd-iommu and intel-iommu are really meant to be used with
-device, so they need user_creatable=true. Remove the FIXME
comment.

Cc: Marcel Apfelbaum 
Cc: "Michael S. Tsirkin" 
Reviewed-by: Marcel Apfelbaum 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* (none)
---
 hw/i386/amd_iommu.c   | 5 +
 hw/i386/intel_iommu.c | 5 +
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c
index 7b92c8c15a..efcc93cbfd 100644
--- a/hw/i386/amd_iommu.c
+++ b/hw/i386/amd_iommu.c
@@ -1186,10 +1186,7 @@ static void amdvi_class_init(ObjectClass *klass, void* 
data)
 dc->vmsd = _amdvi;
 dc->hotpluggable = false;
 dc_class->realize = amdvi_realize;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
+/* Supported by the pc-q35-* machine types */
 dc->user_creatable = true;
 }
 
diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c
index 54187a04a4..327a46cd19 100644
--- a/hw/i386/intel_iommu.c
+++ b/hw/i386/intel_iommu.c
@@ -3009,10 +3009,7 @@ static void vtd_class_init(ObjectClass *klass, void 
*data)
 dc->hotpluggable = false;
 x86_class->realize = vtd_realize;
 x86_class->int_remap = vtd_int_remap;
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
+/* Supported by the pc-q35-* machine types */
 dc->user_creatable = true;
 }
 
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 02/21] sysbus: Set user_creatable=false by default on TYPE_SYS_BUS_DEVICE

2017-05-03 Thread Eduardo Habkost
commit 33cd52b5d7b9adfd009e95f07e6c64dd88ae2a31 unset
cannot_instantiate_with_device_add_yet in TYPE_SYSBUS, making all
sysbus devices appear on "-device help" and lack the "no-user"
flag in "info qdm".

To fix this, we can set user_creatable=false by default on
TYPE_SYS_BUS_DEVICE, but this requires setting
user_creatable=true explicitly on the sysbus devices that
actually work with -device.

Fortunately today we have just a few has_dynamic_sysbus=1
machines: virt, pc-q35-*, ppce500, and spapr.

virt, ppce500, and spapr have extra checks to ensure just a few
device types can be instantiated:

* virt supports only TYPE_VFIO_CALXEDA_XGMAC, TYPE_VFIO_AMD_XGBE.
* ppce500 supports only TYPE_ETSEC_COMMON.
* spapr supports only TYPE_SPAPR_PCI_HOST_BRIDGE.

This patch sets user_creatable=true explicitly on those 4 device
classes.

Now, the more complex cases:

pc-q35-*: q35 has no sysbus device whitelist yet (which is a
separate bug). We are in the process of fixing it and building a
sysbus whitelist on q35, but in the meantime we can fix the
"-device help" and "info qdm" bugs mentioned above. Also, despite
not being strictly necessary for fixing the q35 bug, reducing the
list of user_creatable=true devices will help us be more
confident when building the q35 whitelist.

xen: We also have a hack at xen_set_dynamic_sysbus(), that sets
has_dynamic_sysbus=true at runtime when using the Xen
accelerator. This hack is only used to allow xen-backend devices
to be dynamically plugged/unplugged.

This means today we can use -device with the following 22 device
types, that are the ones compiled into the qemu-system-x86_64 and
qemu-system-i386 binaries:

* allwinner-ahci
* amd-iommu
* cfi.pflash01
* esp
* fw_cfg_io
* fw_cfg_mem
* generic-sdhci
* hpet
* intel-iommu
* ioapic
* isabus-bridge
* kvmclock
* kvm-ioapic
* kvmvapic
* SUNW,fdtwo
* sysbus-ahci
* sysbus-fdc
* sysbus-ohci
* unimplemented-device
* virtio-mmio
* xen-backend
* xen-sysdev

This patch adds user_creatable=true explicitly to those devices,
temporarily, just to keep 100% compatibility with existing
behavior of q35. Subsequent patches will remove
user_creatable=true from the devices that are really not meant to
user-creatable on any machine, and remove the FIXME comment from
the ones that are really supposed to be user-creatable. This is
being done in separate patches because we still don't have an
obvious list of devices that will be whitelisted by q35, and I
would like to get each device reviewed individually.

Cc: Alexander Graf 
Cc: Alex Williamson 
Cc: Alistair Francis 
Cc: Beniamino Galvani 
Cc: Christian Borntraeger 
Cc: Cornelia Huck 
Cc: David Gibson 
Cc: "Edgar E. Iglesias" 
Cc: Eduardo Habkost 
Cc: Frank Blaschka 
Cc: Gabriel L. Somlo 
Cc: Gerd Hoffmann 
Cc: Igor Mammedov 
Cc: Jason Wang 
Cc: John Snow 
Cc: Juergen Gross 
Cc: Kevin Wolf 
Cc: Laszlo Ersek 
Cc: Marcel Apfelbaum 
Cc: Markus Armbruster 
Cc: Max Reitz 
Cc: "Michael S. Tsirkin" 
Cc: Paolo Bonzini 
Cc: Peter Maydell 
Cc: Pierre Morel 
Cc: Prasad J Pandit 
Cc: qemu-...@nongnu.org
Cc: qemu-bl...@nongnu.org
Cc: qemu-...@nongnu.org
Cc: Richard Henderson 
Cc: Rob Herring 
Cc: Shannon Zhao 
Cc: sstabell...@kernel.org
Cc: Thomas Huth 
Cc: Yi Min Zhao 
Acked-by: John Snow 
Acked-by: Juergen Gross 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* Rewrite commit message: don't pretend we are actually fixing
  the q35 issue. We're just fixing "info qdm" and "-device help".
  Making it easier to fix q35 is just a nice side-effect.
* Rewrite FIXME comments to make it clear that we just have
  user_creatable=true because we don't know yet if the device
  should be in the q35 whitelist
---
 hw/block/fdc.c   | 10 ++
 hw/block/pflash_cfi01.c  |  5 +
 hw/core/sysbus.c | 11 +++
 hw/i386/amd_iommu.c  |  5 +
 hw/i386/intel_iommu.c|  5 +
 hw/i386/kvm/clock.c  |  5 +
 hw/i386/kvm/ioapic.c |  5 +
 hw/i386/kvmvapic.c   |  5 +
 hw/ide/ahci.c| 10 ++
 hw/intc/ioapic.c |  5 +
 hw/isa/isa-bus.c |  5 +
 hw/misc/unimp.c  |  5 +
 hw/net/fsl_etsec/etsec.c |  2 ++
 hw/nvram/fw_cfg.c| 10 ++
 hw/ppc/spapr_pci.c 

[Qemu-devel] [PATCH RESEND v2 05/21] fdc: Remove user_creatable flag from sysbus-fdc & SUNW, fdtwo

2017-05-03 Thread Eduardo Habkost
sysbus-fdc and SUNW,fdtwo devices need IRQs to be wired and mmio
to be mapped, and can't be used with -device. Unset
user_creatable on their device classes.

Cc: John Snow 
Cc: Kevin Wolf 
Cc: Marcel Apfelbaum 
Cc: Max Reitz 
Cc: qemu-bl...@nongnu.org
Cc: Thomas Huth 
Acked-by: John Snow 
Reviewed-by: Thomas Huth 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* Commit message rewrite only
---
 hw/block/fdc.c | 10 --
 1 file changed, 10 deletions(-)

diff --git a/hw/block/fdc.c b/hw/block/fdc.c
index 5c28a0b0ad..2e629b398b 100644
--- a/hw/block/fdc.c
+++ b/hw/block/fdc.c
@@ -2880,11 +2880,6 @@ static void sysbus_fdc_class_init(ObjectClass *klass, 
void *data)
 
 dc->props = sysbus_fdc_properties;
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo sysbus_fdc_info = {
@@ -2911,11 +2906,6 @@ static void sun4m_fdc_class_init(ObjectClass *klass, 
void *data)
 
 dc->props = sun4m_fdc_properties;
 set_bit(DEVICE_CATEGORY_STORAGE, dc->categories);
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
-dc->user_creatable = true;
 }
 
 static const TypeInfo sun4m_fdc_info = {
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 03/21] xen-backend: Remove FIXME comment about user_creatable flag

2017-05-03 Thread Eduardo Habkost
xen-backend can be plugged/unplugged dynamically when using the
Xen accelerator, so keep the user_creatable flag on the device
class and remove the FIXME comment.

Cc: Juergen Gross ,
Cc: Peter Maydell ,
Cc: Thomas Huth 
Cc: sstabell...@kernel.org
Cc: Markus Armbruster ,
Cc: Marcel Apfelbaum ,
Cc: Laszlo Ersek 
Acked-by: Juergen Gross 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes series v1 -> v2:
* (New patch added to series)
---
 hw/xen/xen_backend.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/hw/xen/xen_backend.c b/hw/xen/xen_backend.c
index 67cb4cb9f0..2b91d2d458 100644
--- a/hw/xen/xen_backend.c
+++ b/hw/xen/xen_backend.c
@@ -619,10 +619,7 @@ static void xendev_class_init(ObjectClass *klass, void 
*data)
 
 dc->props = xendev_properties;
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
-/*
- * FIXME: Set only because we are not sure yet if this device
- * will be outside the q35 sysbus whitelist.
- */
+/* xen-backend devices can be plugged/unplugged dynamically */
 dc->user_creatable = true;
 }
 
-- 
2.11.0.259.g40922b1




[Qemu-devel] [PATCH RESEND v2 01/21] qdev: Replace cannot_instantiate_with_device_add_yet with !user_creatable

2017-05-03 Thread Eduardo Habkost
cannot_instantiate_with_device_add_yet was introduced by commit
efec3dd631d94160288392721a5f9c39e50fb2bc to replace no_user. It was
supposed to be a temporary measure.

When it was introduced, we had 54
cannot_instantiate_with_device_add_yet=true lines in the code.
Today (3 years later) this number has not shrinked: we now have
57 cannot_instantiate_with_device_add_yet=true lines. I think it
is safe to say it is not a temporary measure, and we won't see
the flag go away soon.

Instead of a long field name that misleads people to believe it
is temporary, replace it a shorter and less misleading field:
user_creatable.

Except for code comments, changes were generated using the
following Coccinelle patch:

  @@
  expression DC;
  @@
  (
  -DC->cannot_instantiate_with_device_add_yet = false;
  +DC->user_creatable = true;
  |
  -DC->cannot_instantiate_with_device_add_yet = true;
  +DC->user_creatable = false;
  )

  @@
  typedef ObjectClass;
  expression dc;
  identifier class, data;
  @@
   static void device_class_init(ObjectClass *class, void *data)
   {
   ...
   dc->hotpluggable = true;
  +dc->user_creatable = true;
   ...
   }

  @@
  @@
   struct DeviceClass {
   ...
  -bool cannot_instantiate_with_device_add_yet;
  +bool user_creatable;
   ...
  }

  @@
  expression DC;
  @@
  (
  -!DC->cannot_instantiate_with_device_add_yet
  +DC->user_creatable
  |
  -DC->cannot_instantiate_with_device_add_yet
  +!DC->user_creatable
  )

Cc: Alistair Francis 
Cc: Laszlo Ersek 
Cc: Marcel Apfelbaum 
Cc: Markus Armbruster 
Cc: Peter Maydell 
Cc: Thomas Huth 
Acked-by: Alistair Francis 
Reviewed-by: Thomas Huth 
Reviewed-by: Marcel Apfelbaum 
Acked-by: Marcel Apfelbaum 
Signed-off-by: Eduardo Habkost 
---
Changes v1 -> v2:
* (none)

Changes v2 -> v3:
* Fixed commit ID reference on commit message
* (No code changes)
---
 include/hw/qdev-core.h  | 10 +-
 include/hw/qdev-properties.h|  4 ++--
 hw/acpi/piix4.c |  2 +-
 hw/arm/spitz.c  |  2 +-
 hw/audio/marvell_88w8618.c  |  2 +-
 hw/audio/pcspk.c|  2 +-
 hw/core/or-irq.c|  2 +-
 hw/core/qdev.c  |  1 +
 hw/core/register.c  |  2 +-
 hw/dma/i8257.c  |  2 +-
 hw/dma/sparc32_dma.c|  2 +-
 hw/gpio/omap_gpio.c |  4 ++--
 hw/i2c/omap_i2c.c   |  2 +-
 hw/i2c/smbus_eeprom.c   |  2 +-
 hw/i2c/smbus_ich9.c |  2 +-
 hw/i386/pc.c|  2 +-
 hw/input/vmmouse.c  |  2 +-
 hw/intc/apic_common.c   |  2 +-
 hw/intc/etraxfs_pic.c   |  2 +-
 hw/intc/grlib_irqmp.c   |  2 +-
 hw/intc/i8259_common.c  |  2 +-
 hw/intc/nios2_iic.c |  2 +-
 hw/intc/omap_intc.c |  4 ++--
 hw/isa/lpc_ich9.c   |  2 +-
 hw/isa/piix4.c  |  2 +-
 hw/isa/vt82c686.c   |  2 +-
 hw/mips/gt64xxx_pci.c   |  2 +-
 hw/misc/vmport.c|  2 +-
 hw/net/dp8393x.c|  2 +-
 hw/net/etraxfs_eth.c|  2 +-
 hw/net/lance.c  |  2 +-
 hw/pci-bridge/dec.c |  2 +-
 hw/pci-bridge/pci_expander_bridge.c |  2 +-
 hw/pci-host/apb.c   |  2 +-
 hw/pci-host/bonito.c|  2 +-
 hw/pci-host/gpex.c  |  2 +-
 hw/pci-host/grackle.c   |  2 +-
 hw/pci-host/piix.c  |  6 +++---
 hw/pci-host/ppce500.c   |  2 +-
 hw/pci-host/prep.c  |  2 +-
 hw/pci-host/q35.c   |  4 ++--
 hw/pci-host/uninorth.c  |  8 
 hw/pci-host/versatile.c |  2 +-
 hw/pci-host/xilinx-pcie.c   |  2 +-
 hw/ppc/ppc4xx_pci.c |  2 +-
 hw/ppc/spapr_drc.c  |  2 +-
 hw/s390x/s390-pci-bus.c |  2 +-
 hw/sd/milkymist-memcard.c   |  2 +-
 hw/sd/pl181.c   |  2 +-
 hw/sh4/sh_pci.c |  2 +-
 hw/timer/i8254_common.c |  2 +-
 hw/timer/mc146818rtc.c  |  2 +-
 monitor.c   |  2 +-
 qdev-monitor.c  |  6 +++---
 qom/cpu.c   |  2 +-
 target/i386/cpu.c   |  2 +-
 56 files changed, 71 insertions(+), 70 deletions(-)

diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index 4bf86b0ad8..6ee49fbe33 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -103,16 +103,16 @@ typedef struct DeviceClass {
 Property *props;
 
 /*
- * Shall we hide this device model from -device / device_add?
+ * Can this 

[Qemu-devel] [PATCH RESEND v2 00/21] qdev/sysbus: Set user_creatable=false by default on sysbus

2017-05-03 Thread Eduardo Habkost
No code changes since v2, just a rebase to latest qemu.git
master, a trivial commit message fix at patch 1, and added
Acked-by and Reviewed-by tags.

Changes v1 -> v2


* Rewrote series name and cover letter completely to not pretend
  we're fixing the q35 lack-of-sysbus-whitelist bug, and explain
  the motivation for the series.
  * Previous series name was:
"sysbus: Don't allow -device/device_add by default"
  * Rewrote description of patch 02/21, too
  * (I really hope people read this cover letter before
commenting on individual patches.)
* Rewrote FIXME comments to make it clear that we just set
  user_creatable=true temporarily because we don't know yet if
  the device should be in the q35 whitelist.
* Set user_creatable=true on xen-backend also
  (I didn't notice it was missing because I was building QEMU
  without xen support)
  * New patches:
* "xen-backend: Remove FIXME comment about user_creatable flag"
* "xen-sysdev: Remove user_creatable flag"
* Patch:
"s390: Add FIXME for unexplained user_creatable=false line"
  replaced with:
"s390-pcibus: No need to set user_creatable=false explicitly"

Description
---

This series refactor the cannot_instantiate_with_device_add code
for sysbus. First, the cannot_instantiate_with_device_add field
is replaced by !user_creatable.

Then, we change TYPE_SYS_BUS_DEVICE to set user_creatable=false
by default, and we set user_creatable=true explicitly only on the
devices that are really supposed to be user-creatable on some
machines.

Motivation
--

First of all, this makes the code less fragile: setting
user_creatable=false or cannot_instantiate_with_device_add=true
on all sysbus devices is incorrect, and makes code that looks at
cannot_instantiate_with_device_add/user_creatable easy to break.

This also fixes a regression introduced by commit
33cd52b5d7b9adfd009e95f07e6c64dd88ae2a31, that makes all sysbus
devices appear on "-device help" and lack the "no-user" flag on
"info qdm"[1].

This will also make it possible for automatic test code (like the
device-crash-test.py script I sent a while ago[2]) skip devices
that are not supposed to be user-creatable on any machine.

A note about the lack of sysbus whitelist on q35


This series won't make the per-machine whitelist of sysbus
devices unnecessary, but just makes the user_creatable field
consistent on the sys-bus-device classes. This means q35 and xen
still need to be fixed to implemented a sysbus device whitelist.

However, despite not being strictly necessary for fixing the q35
bug, reducing the list of user_creatable=true devices will help
us be more confident when building the q35 whitelist.

Full list of user_creatable=true sysbus devices
---

In the end of this series, the only remaining sysbus devices with
user_creatable=true will be:

* vfio-amd-xgbe (arm)
* vfio-calxeda-xgmac (arm)
* amd-iommu (x86)
* intel-iommu (x86)
* xen-backend (x86)
* spapr-pci-host-bridge (ppc)
* spapr-pci-vfio-host-bridge (ppc)
* eTSEC (ppc)

References/Notes


[1] For example, before this series, we had 174 sysbus devices
listed on qemu-system-arm -device help:
  $ qemu-system-arm -machine none -device help 2>&1 | grep 'bus System' | 
wc -l
  174
  $
after this series, we now have:
  $ ./arm-softmmu/qemu-system-arm -machine none -device help 2>&1 | grep 
'bus System'
  name "vfio-amd-xgbe", bus System, desc "VFIO AMD XGBE"
  name "vfio-calxeda-xgmac", bus System, desc "VFIO Calxeda XGMAC"
  $

[2] Subject: [PATCH 0/3] script for crash-testing -device

---
Cc: Alexander Graf 
Cc: Laszlo Ersek 
Cc: Markus Armbruster 
Cc: Marcel Apfelbaum 
Cc: Thomas Huth 
Cc: Peter Maydell 

Eduardo Habkost (21):
  qdev: Replace cannot_instantiate_with_device_add_yet with
!user_creatable
  sysbus: Set user_creatable=false by default on TYPE_SYS_BUS_DEVICE
  xen-backend: Remove FIXME comment about user_creatable flag
  iommu: Remove FIXME comment about user_creatable=true
  fdc: Remove user_creatable flag from sysbus-fdc & SUNW,fdtwo
  pflash_cfi01: Remove user_creatable flag
  kvmclock: Remove user_creatable flag
  ioapic: Remove user_creatable flag
  kvmvapic: Remove user_creatable flag
  sysbus-ahci: Remove user_creatable flag
  allwinner-ahci: Remove user_creatable flag
  isabus-bridge: Remove user_creatable flag
  unimplemented-device: Remove user_creatable flag
  fw_cfg: Remove user_creatable flag
  esp: Remove user_creatable flag
  generic-sdhci: Remove user_creatable flag
  hpet: Remove user_creatable flag
  sysbus-ohci: Remove user_creatable flag
  virtio-mmio: Remove user_creatable flag
  xen-sysdev: Remove user_creatable flag
  s390-pcibus: No need to set user_creatable=false explicitly

 include/hw/qdev-core.h

Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/5] hw/ppc: removing spapr_drc_detach_cb opaques

2017-05-03 Thread Daniel Henrique Barboza
Update: I have talked with Michael Roth about the spapr_release_lmb 
callback, the flow
of the LMB releases and so on. He clarified to me that it is not 
possible to get rid of

the callback and put its code in the spapr_del_lmbs function.

The reason is that the callback is being executed by the guest via 
spapr_rtas.c:rtas_set_indicator(),
which in turn will follow the flow of the DRC releases and eventually 
will hit the spapr_release_lmb
callback, but this will not necessarily happen in the spam of the 
spapr_del_lmbs function. This means that
my idea of putting the cb code in the spapr_del_lmbs and removing the 
callback not possible.


On the other hand, Bharata raised the issue about the scan function in 
the callback being a problem.
My tests with a 1 Gb unplug didn't show any noticeable delay increase 
but in theory we support memory
unplugs of 1 Terabyte. With 1Gb of RAM we need 4 DRCs, so 1 Tb would 
require 4000 DRCs. Then we
would scan through them 4000 times. I don't think the scan inside the 
callback is feasible in this scenario

either.

In the v9 I'll migrate the sPAPRDIMMState structure like Michael Roth 
mentioned somewhere in the
v6 review to use it inside the spapr_lmb_release callback - looks like 
the best option we have.



Thanks,


Daniel



On 05/03/2017 10:56 AM, Daniel Henrique Barboza wrote:



On 05/03/2017 12:26 AM, Bharata B Rao wrote:
On Tue, May 2, 2017 at 1:13 PM, Daniel Henrique Barboza 
> 
wrote:




On 05/02/2017 12:40 AM, Bharata B Rao wrote:

On Sun, Apr 30, 2017 at 10:55 PM, Daniel Henrique Barboza
> wrote:

Following up the previous detach_cb change, this patch
removes the
detach_cb_opaque entirely from the code.

The reason is that the drc->detach_cb_opaque object can't be
restored in the post load of the upcoming DRC migration and
no detach
callbacks actually need this opaque. 'spapr_core_release' is
receiving it as NULL, 'spapr_phb_remove_pci_device_cb' is
receiving
a phb object as opaque but is't using it. These were trivial
removal
cases.

However, the LM removal callback 'spapr_lmb_release' is 
receiving

and using the opaque object, a 'sPAPRDIMMState' struct. This
struct
holds the number of LMBs the DIMM object contains and the
callback
was using this counter as a countdown to check if all LMB
DRCs were
release before proceeding to the DIMM unplug. To remove the
need of
this callback we have choices such as:

- migrate the 'sPAPRDIMMState' struct. This would require
creating a
QTAILQ to store all DIMMStates and an additional 'dimm_id'
field to
associate the DIMMState with the DIMM object. We could attach
this
QTAILQ to the 'sPAPRPHBState' and retrieve it later in the
callback.

- fetch the state of the LMB DRCs directly by scanning the
state of
them and, if all of them are released, proceed with the DIMM
unplug.

The second approach was chosen. The new
'spapr_all_lmbs_drcs_released'
function scans all LMBs of a given DIMM device to see if
their DRC
state are inactive. If all of them are inactive return
'true', 'false'
otherwise. This function is being called inside the
'spapr_lmb_release'
callback, replacing the role of the 'sPAPRDIMMState' opaque. 
The

'sPAPRDIMMState' struct was removed from the code given that
there are
no more uses for it.

After all these changes, there are no roles left for the
'detach_cb_opaque'
attribute of the 'sPAPRDRConnector' as well, so we can safely
remove
it from the code too.

Signed-off-by: Daniel Henrique Barboza
>
---
 hw/ppc/spapr.c | 46
+-
 hw/ppc/spapr_drc.c | 16 +---
 hw/ppc/spapr_pci.c |  4 ++--
 include/hw/ppc/spapr_drc.h |  6 ++
 4 files changed, 42 insertions(+), 30 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index bc11757..8b9a6cf 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1887,21 +1887,43 @@ static void spapr_drc_reset(void 
*opaque)

 }
 }

-typedef struct sPAPRDIMMState {
-uint32_t nr_lmbs;
-} sPAPRDIMMState;
+static bool spapr_all_lmbs_drcs_released(PCDIMMDevice *dimm)
+{
+Error *local_err = NULL;
+PCDIMMDeviceClass *ddc = PC_DIMM_GET_CLASS(dimm);
+MemoryRegion *mr = 

Re: [Qemu-devel] QEMU build breakage on ARM against Xen 4.9 caused by libxendevicemodel

2017-05-03 Thread Stefano Stabellini
On Wed, 3 May 2017, Anthony PERARD wrote:
> On Wed, May 03, 2017 at 10:20:59AM -0700, Stefano Stabellini wrote:
> > On Wed, 3 May 2017, Anthony PERARD wrote:
> > > On Thu, Apr 20, 2017 at 11:05:33AM -0700, Stefano Stabellini wrote:
> > > > On Thu, 20 Apr 2017, Paul Durrant wrote:
> > > > > I think xencall should be part of the base xen_stable_libs anyway.
> > > > 
> > > > Yes, you are right. However I noticed that -lxencall needs to come after
> > > > -lxendevicemodel. So, I'll have to move -lxendevicemodel before
> > > > $xen_stable_libs, see below. I'll merge this patch into "configure:
> > > > detect presence of libxendevicemodel", if that's OK.
> > > > 
> > > > diff --git a/configure b/configure
> > > > index 99d6cbc..3133ef8 100755
> > > > --- a/configure
> > > > +++ b/configure
> > > > @@ -1992,7 +1992,7 @@ if test "$xen" != "no" ; then
> > > >else
> > > >  
> > > >  xen_libs="-lxenstore -lxenctrl -lxenguest"
> > > > -xen_stable_libs="-lxenforeignmemory -lxengnttab -lxenevtchn"
> > > > +xen_stable_libs="-lxencall -lxenforeignmemory -lxengnttab 
> > > > -lxenevtchn"
> > > >  
> > > >  # First we test whether Xen headers and libraries are available.
> > > >  # If no, we are done and there is no Xen support.
> > > > @@ -2027,9 +2027,9 @@ int main(void) {
> > > >return 0;
> > > >  }
> > > >  EOF
> > > > -compile_prog "" "$xen_libs $xen_stable_libs -lxendevicemodel"
> > > > +compile_prog "" "$xen_libs -lxendevicemodel $xen_stable_libs"
> > > >then
> > > > -  xen_stable_libs="$xen_stable_libs -lxendevicemodel"
> > > > +  xen_stable_libs="-lxendevicemodel $xen_stable_libs"
> > > >xen_ctrl_version=40900
> > > >xen=yes
> > > >  elif
> > > 
> > > Hey, now that this patch is merged, xen.git fail to build QEMU. (osstest
> > > qemu-mainline branch fail.)
> > > 
> > > That's because -lxencall is not found because -L$path_to_libxencall is
> > > missing in xen.git.
> > > 
> > > But I've notice something else, libxendevicemodel.so is not linked
> > > against libxencall, that might be the root of the issues on arm.
> > > (libxenctrl.so is linked against libxencall.)
> > > 
> > > Thought?
> > > 
> > > 
> > > We probably need this patch in xen:
> > 
> > We also need to add -L$path_to_libxencall and -I$path_to_libxencall to
> > tools/Makefile:subdir-all-qemu-xen-dir.
> 
> I don't think that needed because:
> for -I, QEMU does not use anything from xencall, and any other includes
> does not use xencall.h.
> 
> for -L, I think that would be usefull only if QEMU is built staticly.
> 
> Also, I don't think -lxencall is needed at all, if libxendevicemodel is
> fixed. libxencall will only be a runtime dependency.

I tried the patch below: on ARM, if I remove -L$DIR/tools/libs/call, the
QEMU configure script detects Xen 4.7 instead of 4.9. If I also remove
-Wl,-rpath-link=$DIR/tools/libs/call, it cannot detect Xen at all.

You are right that we can avoid -I and -l, but I think we need both -L
and -Wl,-rpath-link for tools/libs/call.


> > > diff --git a/tools/libs/devicemodel/Makefile 
> > > b/tools/libs/devicemodel/Makefile
> > > index 55626a5049..81fa5a4ac4 100644
> > > --- a/tools/libs/devicemodel/Makefile
> > > +++ b/tools/libs/devicemodel/Makefile
> > > @@ -63,7 +63,7 @@ libxendevicemodel.so.$(MAJOR): 
> > > libxendevicemodel.so.$(MAJOR).$(MINOR)
> > > $(SYMLINK_SHLIB) $< $@
> > >  
> > >  libxendevicemodel.so.$(MAJOR).$(MINOR): $(PIC_OBJS) libxendevicemodel.map
> > > -   $(CC) $(LDFLAGS) $(PTHREAD_LDFLAGS) -Wl,$(SONAME_LDFLAG) 
> > > -Wl,libxendevicemodel.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $(PIC_OBJS) 
> > > $(LDLIBS_libxentoollog) $(APPEND_LDFLAGS)
> > > +   $(CC) $(LDFLAGS) $(PTHREAD_LDFLAGS) -Wl,$(SONAME_LDFLAG) 
> > > -Wl,libxendevicemodel.so.$(MAJOR) $(SHLIB_LDFLAGS) -o $@ $(PIC_OBJS) 
> > > $(LDLIBS_libxentoollog) $(LDLIBS_libxencall) $(APPEND_LDFLAGS)
> > >  
> > >  .PHONY: install
> > >  install: build
> > > 
> > > -- 
> > > Anthony PERARD
> > > 
> 
> -- 
> Anthony PERARD
> 



Re: [Qemu-devel] [PATCH v5 10/10] test-qga: Actually test 0xff sync bytes

2017-05-03 Thread Michael Roth
Quoting Markus Armbruster (2017-05-03 03:57:41)
> Michael Roth  writes:
> 
> > Quoting Michael Roth (2017-05-02 11:46:36)
> >> Quoting Eric Blake (2017-04-27 16:58:21)
> >> > Commit 62c39b3 introduced test-qga, and at face value, appears
> >> > to be testing the 'guest-sync' behavior that is recommended for
> >> > guests in sending 0xff to QGA to force the parser to reset.  But
> >> > this aspect of the test has never actually done anything: the
> >> > qmp_fd() call chain converts its string argument into QObject,
> >> > then converts that QObject back to the actual string that is
> >> > sent over the wire - and the conversion process silently drops
> >> > the 0xff byte from the string sent to QGA, thus never resetting
> >> > the QGA parser.
> >> > 
> >> > An upcoming patch will get rid of the wasteful round trip
> >> > through QObject, at which point the string in test-qga will be
> >> > directly sent over the wire.
> >> > 
> >> > But fixing qmp_fd() to actually send 0xff over the wire is not
> >> > all we have to do - the actual QMP parser loudly complains that
> >> > 0xff is not valid JSON, and sends an error message _prior_ to
> >> > actually parsing the 'guest-sync' or 'guest-sync-delimited'
> >> > command.  With 'guest-sync', we cannot easily tell if this error
> >> > message is a result of our command - which is WHY we invented
> >> > the 'guest-sync-delimited' command.  So for the testsuite, fix
> >> > things to only check 0xff behavior on 'guest-sync-delimited',
> >> > and to loop until we've consumed all garbage prior to the
> >> > requested delimiter, which matches the documented actions that
> >> > a real QGA client is supposed to do.
> >> 
> >> The full re-sync sequence is actually to perform that process,
> >> check if the response matches the client-provided sync token,
> >> and then repeat the procedure if it doesn't (in the odd case
> >> that a previous client initiated a guest-sync-delimited
> >> sequence and never consumed the response). The current
> >> implementation only performs one iteration so it's not quite
> >> a full implementation of the documentation procedure.
> >
> > Well, to be more accurate it's a full implementation of the
> > documented procedure, it's just that the procedure isn't
> > fully documented properly. I'll send a patch to address that.
> 
> Good.
> 
> >> For the immediate purpose of improving the handling to deal
> >> with the 0xFF-generated error the patch seems sound though,
> >> maybe just something worth noting in the commit msg or
> >> comments so that we might eventually test the full procedure.
> 
> Feel free to suggest something for me to add to the commit message.

Maybe change:

  "which matches the documented actions that a real QGA client
   is supposed to do."

to

  "which is compatible with the documented actions that a real
   QGA client is supposed to do."

and add the following comment to test_qga_sync_delimited

  /* 
   * Note that the full reset sequence would involve checking the
   * response of guest-sync-delimited and repeating the loop if
   * 'id' field of the response does not match the 'id' field of 
   * the request. Testing this fully would require inserting
   * garbage in the response stream and is left as a future test
   * to implement.
   */

> 
> >> In any case:
> >> 
> >> Reviewed-by: Michael Roth 
> 
> Noted, thanks!
> 




Re: [Qemu-devel] [PATCH v7 3/4] qemu-img: introduce --target-image-opts for 'convert' command

2017-05-03 Thread Max Reitz
On 02.05.2017 16:47, Daniel P. Berrange wrote:
> The '--image-opts' flag indicates whether the source filename
> includes options. The target filename has to remain in the
> plain filename format though, since it needs to be passed to
> bdrv_create().  When using --skip-create though, it would be
> possible to use image-opts syntax. This adds --target-image-opts
> to indicate that the target filename includes options. Currently
> this mandates use of the --skip-create flag too.
> 
> Reviewed-by: Fam Zheng 
> Reviewed-by: Eric Blake 

Sure you want to keep this, considering that there are quite some
changes since v5?

> Signed-off-by: Daniel P. Berrange 
> ---
>  qemu-img-cmds.hx |  4 +--
>  qemu-img.c   | 77 
> +---
>  qemu-img.texi| 12 +++--
>  3 files changed, 63 insertions(+), 30 deletions(-)

[...]

> diff --git a/qemu-img.c b/qemu-img.c
> index d8fdcb1..94c8cea 100644
> --- a/qemu-img.c
> +++ b/qemu-img.c

[...]

> @@ -1900,7 +1901,7 @@ static int img_convert(int argc, char **argv)
>  char *options = NULL;
>  Error *local_err = NULL;
>  bool writethrough, src_writethrough, quiet = false, image_opts = false,
> - skip_create = false, progress = false;
> +skip_create = false, progress = false, tgt_image_opts = false;

Not sure about the indentation here. (I personally don't like spanning
the declaration over multiple lines in the first place, but that's a
different topic.) Indenting consecutive lines by four spaces is
standard, but the indentation by five spaces had a reason.

I guess I'd personally rather keep the five-space indentation...

>  int64_t ret = -EINVAL;
>  
>  ImgConvertState s = (ImgConvertState) {

[...]

> @@ -2047,12 +2056,22 @@ static int img_convert(int argc, char **argv)
>  goto fail_getopt;
>  }
>  
> +if (tgt_image_opts && !skip_create) {
> +error_report("--target-image-opts requires use of -n flag");
> +goto fail_getopt;
> +}
> +
>  s.src_num = argc - optind - 1;
>  out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
>  
>  if (options && has_help_option(options)) {
> -ret = print_block_option_help(out_filename, out_fmt);
> -goto fail_getopt;
> +if (out_fmt) {
> +ret = print_block_option_help(out_filename, out_fmt);
> +goto out;

Shouldn't this remain goto fail_getopt;?

> +} else {
> +error_report("Option help requires a format be specified");
> +goto fail_getopt;
> +}
>  }
>  
>  if (s.src_num < 1) {

[...]

Why did you remove the compress &&
!out_bs->drv->bdrv_co_pwritev_compressed check? I liked it. :-(

Max



signature.asc
Description: OpenPGP digital signature


Re: [Qemu-devel] [PATCH] tests: acpi: extend cphp and memhp testcase with numa distance check

2017-05-03 Thread Eduardo Habkost
On Wed, May 03, 2017 at 03:07:05PM +0200, Igor Mammedov wrote:
> On Wed,  3 May 2017 17:17:16 +0800
> He Chen  wrote:
> 
> > Signed-off-by: He Chen 
> Reviewed-by: Igor Mammedov 

Thanks!

> 
> Eduardo,
> 
> could you take it through your tree as well and
> add as part of this patch SLIT tables blobs for
> piix4/q35 that tests/acpi-test-data/rebuild-expected-aml.sh
> will generate.

Queued to numa-next, updated SRAT and SLIT blobs. diffstat of commit fixup:

 tests/acpi-test-data/pc/SLIT.cphp   | Bin 0 -> 48 bytes
 tests/acpi-test-data/pc/SLIT.memhp  | Bin 0 -> 48 bytes
 tests/acpi-test-data/pc/SRAT.memhp  | Bin 224 -> 264 bytes
 tests/acpi-test-data/q35/SLIT.cphp  | Bin 0 -> 48 bytes
 tests/acpi-test-data/q35/SLIT.memhp | Bin 0 -> 48 bytes
 tests/acpi-test-data/q35/SRAT.memhp | Bin 224 -> 264 bytes
 6 files changed, 0 insertions(+), 0 deletions(-)

Are you able to explain why SRAT.memhp changed, but SRAT.cphp didn't?

> 
> > ---
> >  tests/bios-tables-test.c | 16 
> >  1 file changed, 12 insertions(+), 4 deletions(-)
> > 
> > diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c
> > index 88dbf97..c593165 100644
> > --- a/tests/bios-tables-test.c
> > +++ b/tests/bios-tables-test.c
> > @@ -710,7 +710,8 @@ static void test_acpi_piix4_tcg_cphp(void)
> >  data.machine = MACHINE_PC;
> >  data.variant = ".cphp";
> >  test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6"
> > -  " -numa node -numa node",
> > +  " -numa node -numa node"
> > +  " -numa dist,src=0,dst=1,val=21",
> >);
> >  free_test_data();
> >  }
> > @@ -723,7 +724,8 @@ static void test_acpi_q35_tcg_cphp(void)
> >  data.machine = MACHINE_Q35;
> >  data.variant = ".cphp";
> >  test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6"
> > -  " -numa node -numa node",
> > +  " -numa node -numa node"
> > +  " -numa dist,src=0,dst=1,val=21",
> >);
> >  free_test_data();
> >  }
> > @@ -772,7 +774,10 @@ static void test_acpi_q35_tcg_memhp(void)
> >  memset(, 0, sizeof(data));
> >  data.machine = MACHINE_Q35;
> >  data.variant = ".memhp";
> > -test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", );
> > +test_acpi_one(" -m 128,slots=3,maxmem=1G"
> > +  " -numa node -numa node"
> > +  " -numa dist,src=0,dst=1,val=21",
> > +  );
> >  free_test_data();
> >  }
> >  
> > @@ -783,7 +788,10 @@ static void test_acpi_piix4_tcg_memhp(void)
> >  memset(, 0, sizeof(data));
> >  data.machine = MACHINE_PC;
> >  data.variant = ".memhp";
> > -test_acpi_one(" -m 128,slots=3,maxmem=1G -numa node", );
> > +test_acpi_one(" -m 128,slots=3,maxmem=1G"
> > +  " -numa node -numa node"
> > +  " -numa dist,src=0,dst=1,val=21",
> > +  );
> >  free_test_data();
> >  }
> >  
> 

-- 
Eduardo



Re: [Qemu-devel] [PATCH v2] libvhost-user: fix crash when rings aren't ready

2017-05-03 Thread Philippe Mathieu-Daudé

On 05/03/2017 01:54 PM, Marc-André Lureau wrote:

Calling libvhost-user functions like vu_queue_get_avail_bytes() when the
queue doesn't yet have addresses will result in the crashes like the
following:

Program received signal SIGSEGV, Segmentation fault.
0x55c414112ce4 in vring_avail_idx (vq=0x55c41582fd68, vq=0x55c41582fd68)
at /home/dgilbert/git/qemu/contrib/libvhost-user/libvhost-user.c:940
940vq->shadow_avail_idx = vq->vring.avail->idx;
(gdb) p vq
$1 = (VuVirtq *) 0x55c41582fd68
(gdb) p vq->vring
$2 = {num = 0, desc = 0x0, avail = 0x0, used = 0x0, log_guest_addr = 0, flags = 
0}

at /home/dgilbert/git/qemu/contrib/libvhost-user/libvhost-user.c:940
No locals.
at /home/dgilbert/git/qemu/contrib/libvhost-user/libvhost-user.c:960
num_heads = 
out_bytes=out_bytes@entry=0x7fffd035d7c4, max_in_bytes=max_in_bytes@entry=0,
max_out_bytes=max_out_bytes@entry=0) at 
/home/dgilbert/git/qemu/contrib/libvhost-user/libvhost-user.c:1034

Add a pre-condition checks on vring.avail before accessing it.

Fix documentation and return type of vu_queue_empty() while at it.

Signed-off-by: Marc-André Lureau 


Reviewed-by: Philippe Mathieu-Daudé 


---
 contrib/libvhost-user/libvhost-user.h |  6 +++---
 contrib/libvhost-user/libvhost-user.c | 26 --
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/contrib/libvhost-user/libvhost-user.h 
b/contrib/libvhost-user/libvhost-user.h
index 156b50e989..af02a31ebe 100644
--- a/contrib/libvhost-user/libvhost-user.h
+++ b/contrib/libvhost-user/libvhost-user.h
@@ -327,13 +327,13 @@ void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, 
int enable);
 bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);

 /**
- * vu_queue_enabled:
+ * vu_queue_empty:
  * @dev: a VuDev context
  * @vq: a VuVirtq queue
  *
- * Returns: whether the queue is empty.
+ * Returns: true if the queue is empty or not ready.
  */
-int vu_queue_empty(VuDev *dev, VuVirtq *vq);
+bool vu_queue_empty(VuDev *dev, VuVirtq *vq);

 /**
  * vu_queue_notify:
diff --git a/contrib/libvhost-user/libvhost-user.c 
b/contrib/libvhost-user/libvhost-user.c
index af4faad60b..e1bf9644b5 100644
--- a/contrib/libvhost-user/libvhost-user.c
+++ b/contrib/libvhost-user/libvhost-user.c
@@ -1031,6 +1031,11 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, 
unsigned int *in_bytes,
 idx = vq->last_avail_idx;

 total_bufs = in_total = out_total = 0;
+if (unlikely(dev->broken) ||
+unlikely(!vq->vring.avail)) {
+goto done;
+}
+
 while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
 unsigned int max, num_bufs, indirect = 0;
 struct vring_desc *desc;
@@ -1121,11 +1126,16 @@ vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned 
int in_bytes,

 /* Fetch avail_idx from VQ memory only when we really need to know if
  * guest has added some buffers. */
-int
+bool
 vu_queue_empty(VuDev *dev, VuVirtq *vq)
 {
+if (unlikely(dev->broken) ||
+unlikely(!vq->vring.avail)) {
+return true;
+}
+
 if (vq->shadow_avail_idx != vq->last_avail_idx) {
-return 0;
+return false;
 }

 return vring_avail_idx(vq) == vq->last_avail_idx;
@@ -1174,7 +1184,8 @@ vring_notify(VuDev *dev, VuVirtq *vq)
 void
 vu_queue_notify(VuDev *dev, VuVirtq *vq)
 {
-if (unlikely(dev->broken)) {
+if (unlikely(dev->broken) ||
+unlikely(!vq->vring.avail)) {
 return;
 }

@@ -1291,7 +1302,8 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
 struct vring_desc *desc;
 int rc;

-if (unlikely(dev->broken)) {
+if (unlikely(dev->broken) ||
+unlikely(!vq->vring.avail)) {
 return NULL;
 }

@@ -1445,7 +1457,8 @@ vu_queue_fill(VuDev *dev, VuVirtq *vq,
 {
 struct vring_used_elem uelem;

-if (unlikely(dev->broken)) {
+if (unlikely(dev->broken) ||
+unlikely(!vq->vring.avail)) {
 return;
 }

@@ -1474,7 +1487,8 @@ vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int 
count)
 {
 uint16_t old, new;

-if (unlikely(dev->broken)) {
+if (unlikely(dev->broken) ||
+unlikely(!vq->vring.avail)) {
 return;
 }






  1   2   3   4   >