date:20180528

[Qemu-devel] [PATCH] aspeed_scu: Implement RNG register

2018-05-28 Thread Joel Stanley

The ASPEED SoCs contain a single register that returns random data when
read. This models that register so that guests can use it.

Signed-off-by: Joel Stanley 
---
 hw/misc/aspeed_scu.c | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/hw/misc/aspeed_scu.c b/hw/misc/aspeed_scu.c
index 5e6d5744eeca..8fa0cecf0fa1 100644
--- a/hw/misc/aspeed_scu.c
+++ b/hw/misc/aspeed_scu.c
@@ -16,6 +16,7 @@
 #include "qapi/visitor.h"
 #include "qemu/bitops.h"
 #include "qemu/log.h"
+#include "crypto/random.h"
 #include "trace.h"
 
 #define TO_REG(offset) ((offset) >> 2)
@@ -154,6 +155,18 @@ static const uint32_t 
ast2500_a1_resets[ASPEED_SCU_NR_REGS] = {
  [BMC_DEV_ID]  = 0x2402U
 };
 
+static uint32_t aspeed_scu_get_random(void)
+{
+Error *err = NULL;
+uint32_t num;
+
+if (qcrypto_random_bytes((uint8_t *), sizeof(num), )) {
+error_report_err(err);
+}
+
+return num;
+}
+
 static uint64_t aspeed_scu_read(void *opaque, hwaddr offset, unsigned size)
 {
 AspeedSCUState *s = ASPEED_SCU(opaque);
@@ -167,6 +180,9 @@ static uint64_t aspeed_scu_read(void *opaque, hwaddr 
offset, unsigned size)
 }
 
 switch (reg) {
+case RNG_DATA:
+return aspeed_scu_get_random();
+break;
 case WAKEUP_EN:
 qemu_log_mask(LOG_GUEST_ERROR,
   "%s: Read of write-only offset 0x%" HWADDR_PRIx "\n",
@@ -287,6 +303,9 @@ static void aspeed_scu_realize(DeviceState *dev, Error 
**errp)
   TYPE_ASPEED_SCU, SCU_IO_REGION_SIZE);
 
 sysbus_init_mmio(sbd, >iomem);
+
+if (qcrypto_random_init(errp))
+return;
 }
 
 static const VMStateDescription vmstate_aspeed_scu = {
-- 
2.17.0

Re: [Qemu-devel] [PATCH 01/13] 9p: linux: Fix a couple Linux assumptions

2018-05-28 Thread Greg Kurz

On Sat, 26 May 2018 01:23:03 -0400
k...@juliacomputing.com wrote:

> From: Keno Fischer 
> 
>  - Guard two Linux only headers.
>  - Define `ENOATTR` only if not only defined
>(it's defined in system headers on Darwin).
> 
> Signed-off-by: Keno Fischer 
> ---
>  fsdev/file-op-9p.h   | 2 ++
>  hw/9pfs/9p-local.c   | 2 ++
>  include/qemu/xattr.h | 4 +++-

Irrespectively of the discussion on checking this in configure, 
there's another user of  in fsdev/virtfs-proxy-helper.c.
I see in patch 13 that the helper won't be built on Darwin, but
this could change, and anyway, I'd like the handling of 
to stay consistent across the VirtFS code.

>  3 files changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
> index 3fa062b..a13e729 100644
> --- a/fsdev/file-op-9p.h
> +++ b/fsdev/file-op-9p.h
> @@ -16,7 +16,9 @@
>  
>  #include 
>  #include 
> +#ifdef CONFIG_LINUX
>  #include 
> +#endif
>  #include "qemu-fsdev-throttle.h"
>  
>  #define SM_LOCAL_MODE_BITS0600
> diff --git a/hw/9pfs/9p-local.c b/hw/9pfs/9p-local.c
> index b37b1db..f6c7526 100644
> --- a/hw/9pfs/9p-local.c
> +++ b/hw/9pfs/9p-local.c
> @@ -27,10 +27,12 @@
>  #include "qemu/error-report.h"
>  #include "qemu/option.h"
>  #include 
> +#ifdef CONFIG_LINUX
>  #include 
>  #ifdef CONFIG_LINUX_MAGIC_H
>  #include 
>  #endif
> +#endif
>  #include 
>  
>  #ifndef XFS_SUPER_MAGIC
> diff --git a/include/qemu/xattr.h b/include/qemu/xattr.h
> index a83fe8e..f1d0f7b 100644
> --- a/include/qemu/xattr.h
> +++ b/include/qemu/xattr.h
> @@ -22,7 +22,9 @@
>  #ifdef CONFIG_LIBATTR
>  #  include 
>  #else
> -#  define ENOATTR ENODATA
> +#  if !defined(ENOATTR)
> +#define ENOATTR ENODATA
> +#  endif
>  #  include 
>  #endif
>

Re: [Qemu-devel] [Qemu-ppc] [PATCH 1/4] spapr: remove irq_hint parameter from spapr_irq_alloc()

2018-05-28 Thread Greg Kurz

On Mon, 28 May 2018 11:20:36 +0200
Cédric Le Goater  wrote:

> On 05/28/2018 09:18 AM, Thomas Huth wrote:
> > On 28.05.2018 09:06, Cédric Le Goater wrote:  
> >> On 05/28/2018 08:17 AM, Thomas Huth wrote:  
> >>> On 25.05.2018 16:02, Greg Kurz wrote:  
>  On Fri, 18 May 2018 18:44:02 +0200
>  Cédric Le Goater  wrote:
>   
> > This IRQ number hint can possibly be used by the VIO devices if the
> > "irq" property is defined on the command line but it seems it is never
> > the case. It is not used in libvirt for instance. So, let's remove it
> > to simplify future changes.
> >  
> 
>  Setting an irq manually looks a bit anachronistic. I doubt anyone would
>  do that nowadays, and the patch does a nice cleanup. So this looks like
>  a good idea.  
> >>> [...]  
> > diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
> > index 472dd6f33a96..cc064f64fccf 100644
> > --- a/hw/ppc/spapr_vio.c
> > +++ b/hw/ppc/spapr_vio.c
> > @@ -455,7 +455,7 @@ static void spapr_vio_busdev_realize(DeviceState 
> > *qdev, Error **errp)
> >  dev->qdev.id = id;
> >  }
> >  
> > -dev->irq = spapr_irq_alloc(spapr, dev->irq, false, _err);
> > +dev->irq = spapr_irq_alloc(spapr, false, _err);  
> 
>  Silently breaking "irq" like this looks wrong. I'd rather officially 
>  remove
>  it first (ie, kill spapr_vio_props, -5 lines in spapr_vio.c).
> 
>  Of course, this raises the question of interface deprecation, and it 
>  should
>  theoretically follow the process described at:
> 
>  https://wiki.qemu.org/Features/LegacyRemoval#Rules_for_removing_an_interface
> 
>  Cc'ing Thomas, our Chief Deprecation Officer, for insights :)  
> >>>
> >>> The property is a public interface. Just because it's not used by
> >>> libvirt does not mean that nobody is using it. So yes, please follow the
> >>> rules and mark it as deprecated first for two release, before you really
> >>> remove it.  
> >>
> >> This "irq" property is a problem to introduce a new static layout of IRQ 
> >> numbers. It is in complete opposition. 
> >>
> >> Can we keep it as it is for old pseries machine (settable) and ignore it 
> >> for newer ? Would that be fine ?  
> > 
> > I think that would be fine, too. You likely need to keep the settable
> > IRQs around for the old machines anyway, to make sure that migration of
> > the old machine types still works, right?  
> 
> Yes, that is the goal of patch 3. It introduces a common sPAPR IRQ frontend,
> the first backend being the current one.
> 

If we keep "irq" but we ignore it with newer machine types, we should at
least print a warning then IMHO.

> C.
>

[Qemu-devel] [PATCH v4] block: fix QEMU crash with scsi-hd and drive_del

2018-05-28 Thread Greg Kurz

Removing a drive with drive_del while it is being used to run an I/O
intensive workload can cause QEMU to crash.

An AIO flush can yield at some point:

blk_aio_flush_entry()
 blk_co_flush(blk)
  bdrv_co_flush(blk->root->bs)
   ...
qemu_coroutine_yield()

and let the HMP command to run, free blk->root and give control
back to the AIO flush:

hmp_drive_del()
 blk_remove_bs()
  bdrv_root_unref_child(blk->root)
   child_bs = blk->root->bs
   bdrv_detach_child(blk->root)
bdrv_replace_child(blk->root, NULL)
 blk->root->bs = NULL
g_free(blk->root) <== blk->root becomes stale
   bdrv_unref(child_bs)
bdrv_delete(child_bs)
 bdrv_close()
  bdrv_drained_begin()
   bdrv_do_drained_begin()
bdrv_drain_recurse()
 aio_poll()
  ...
  qemu_coroutine_switch()

and the AIO flush completion ends up dereferencing blk->root:

  blk_aio_complete()
   scsi_aio_complete()
blk_get_aio_context(blk)
 bs = blk_bs(blk)
 ie, bs = blk->root ? blk->root->bs : NULL
^
stale

The problem is that we should avoid making block driver graph
changes while we have in-flight requests. Let's drain all I/O
for this BB before calling bdrv_root_unref_child().

Signed-off-by: Greg Kurz 
---
v4: - call blk_drain() in blk_remove_bs() (Kevin)

v3: - start drained section before modifying the graph (Stefan)

v2: - drain I/O requests when detaching the BDS (Stefan, Paolo)
---
 block/block-backend.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/block/block-backend.c b/block/block-backend.c
index 89f47b00ea24..bee1f0e41461 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -768,6 +768,11 @@ void blk_remove_bs(BlockBackend *blk)
 
 blk_update_root_state(blk);
 
+/* bdrv_root_unref_child() will cause blk->root to become stale and may
+ * switch to a completion coroutine later on. Let's drain all I/O here
+ * to avoid that and a potential QEMU crash.
+ */
+blk_drain(blk);
 bdrv_root_unref_child(blk->root);
 blk->root = NULL;
 }

Re: [Qemu-devel] [PATCH v3] block: fix QEMU crash with scsi-hd and drive_del

2018-05-28 Thread Greg Kurz

On Fri, 25 May 2018 16:02:46 +0200
Kevin Wolf  wrote:

> Am 25.05.2018 um 13:53 hat Greg Kurz geschrieben:
> > On Fri, 25 May 2018 10:37:15 +0200
> > Kevin Wolf  wrote:
> >   
> > > Am 25.05.2018 um 00:53 hat Greg Kurz geschrieben:  
> > > > Removing a drive with drive_del while it is being used to run an I/O
> > > > intensive workload can cause QEMU to crash.
> > > > 
> > > > An AIO flush can yield at some point:
> > > > 
> > > > blk_aio_flush_entry()
> > > >  blk_co_flush(blk)
> > > >   bdrv_co_flush(blk->root->bs)
> > > >...
> > > > qemu_coroutine_yield()
> > > > 
> > > > and let the HMP command to run, free blk->root and give control
> > > > back to the AIO flush:
> > > > 
> > > > hmp_drive_del()
> > > >  blk_remove_bs()
> > > >   bdrv_root_unref_child(blk->root)
> > > >child_bs = blk->root->bs
> > > >bdrv_detach_child(blk->root)
> > > > bdrv_replace_child(blk->root, NULL)
> > > >  blk->root->bs = NULL
> > > > g_free(blk->root) <== blk->root becomes stale
> > > >bdrv_unref(child_bs)
> > > > bdrv_delete(child_bs)
> > > >  bdrv_close()
> > > >   bdrv_drained_begin()
> > > >bdrv_do_drained_begin()
> > > > bdrv_drain_recurse()
> > > >  aio_poll()
> > > >   ...
> > > >   qemu_coroutine_switch()
> > > > 
> > > > and the AIO flush completion ends up dereferencing blk->root:
> > > > 
> > > >   blk_aio_complete()
> > > >scsi_aio_complete()
> > > > blk_get_aio_context(blk)
> > > >  bs = blk_bs(blk)
> > > >  ie, bs = blk->root ? blk->root->bs : NULL
> > > > ^
> > > > stale
> > > > 
> > > > The problem is that we should avoid making block driver graph
> > > > changes while we have in-flight requests. This patch hence adds
> > > > a drained section to bdrv_detach_child(), so that we're sure
> > > > all requests have been drained before blk->root becomes stale.
> > > > 
> > > > Signed-off-by: Greg Kurz 
> > > > ---
> > > > v3: - start drained section before modifying the graph (Stefan)
> > > > 
> > > > v2: - drain I/O requests when detaching the BDS (Stefan, Paolo)
> > > > ---
> > > >  block.c |4 
> > > >  1 file changed, 4 insertions(+)
> > > > 
> > > > diff --git a/block.c b/block.c
> > > > index 501b64c8193f..715c1b56c1e2 100644
> > > > --- a/block.c
> > > > +++ b/block.c
> > > > @@ -2127,12 +2127,16 @@ BdrvChild *bdrv_attach_child(BlockDriverState 
> > > > *parent_bs,
> > > >  
> > > >  static void bdrv_detach_child(BdrvChild *child)
> > > >  {
> > > > +BlockDriverState *child_bs = child->bs;
> > > > +
> > > > +bdrv_drained_begin(child_bs);
> > > >  if (child->next.le_prev) {
> > > >  QLIST_REMOVE(child, next);
> > > >  child->next.le_prev = NULL;
> > > >  }
> > > >  
> > > >  bdrv_replace_child(child, NULL);
> > > > +bdrv_drained_end(child_bs);
> > > >  
> > > >  g_free(child->name);
> > > >  g_free(child);
> > > 
> > > I wonder if the better fix would be calling blk_drain() in
> > > blk_remove_bs() (which would also better be blk_drained_begin/end...).
> > >   
> > 
> > Hmm... would blk_drain() in blk_remove_bs() ensure we don't have
> > any new activity until the BDS and BB are actually dissociated ?
> > 
> > ie, something like the following ?
> > 
> > +blk_drain(blk);
> >  bdrv_root_unref_child(blk->root);
> >  blk->root = NULL;  
> 
> I think that should be enough, as long as we hold the AioContext lock.
> In theory, callers should hold it, but I'm not sure if it's always true.

Most users of blk_remove_bs() do explicitely call aio_context_acquire(),
but not all... especially blk_unref()->blk_delete(), which has itself
a lot of users, but...

> If we're in a drain_begin/end section (rather than just after a drain),
> that might be less important because AioContext events are disabled
> then.
> 

... blk_remove_bs() already calls bdrv_drained_begin()/bdrv_drained_end()
to ensure I/O completion before removing throttle timers. It seems to
indicate that the AIO context is always acquired before calling this
function, correct ?

> Well, actually, if they didn't hold it, any drain would crash, at least
> when there still is some activity.
> 
> > because we can't do anything like:
> > 
> > +bdrv_drained_begin(blk_bs(blk));
> >  bdrv_root_unref_child(blk->root);
> > +bdrv_drained_begin(blk_bs(blk));
> >  blk->root = NULL;
> > 
> > since g_free(blk->root) gets called from under bdrv_root_unref_child()
> > at some point.  
> 
> If that's the problem, I guess we could do something like this (even
> though it's a bit ugly):
> 
> bs = blk_bs(blk);
> bdrv_ref(bs);
> bdrv_drained_begin(bs);
> bdrv_root_unref_child(blk->root)
> bdrv_drained_end(bs)
> bdrv_unref(bs)
> 
> This assumes that we're in the main thread, but that should always be
> the case for

Re: [Qemu-devel] [PATCH 0/8] block: more byte-based cleanups: vectored I/O

2018-05-28 Thread Kevin Wolf

Am 25.04.2018 um 20:32 hat Eric Blake geschrieben:
> Based-on: <20180424192506.149089-1-ebl...@redhat.com>
> ([PATCH v2 0/6] block: byte-based AIO read/write)
> Based-on: <20180424220157.177385-1-ebl...@redhat.com>
> ([PATCH] block: Merge .bdrv_co_writev{, _flags} in drivers)
> 
> My quest continues.  I spent some time pruning qcow down as far
> as possible (and was dismayed at how long it took to prove no
> iotests regressions); so for the other drivers, I did the bare
> minimum to get rid of an interface, but will leave it to those
> file owners if they want to get rid of further pointless sector
> manipulations in their files.
> 
> Next on the chopping block: bdrv_read/bdrv_write.

Nice series, looks good apart from a few minor comments on the qcow1
conversion.

For v2, can you please make sure to have proper CCs also on the cover
letter?

Kevin

Re: [Qemu-devel] [PATCH v2 20/20] arch_init: sort architectures

2018-05-28 Thread Laszlo Ersek

On 05/25/18 18:48, Michael S. Tsirkin wrote:
> Sort alphabetically. Will help us see if anything is missing (e.g. tile
> is not there now).
> 
> Signed-off-by: Michael S. Tsirkin 
> ---
>  arch_init.c | 16 
>  1 file changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/arch_init.c b/arch_init.c
> index 9597218..f4f3f61 100644
> --- a/arch_init.c
> +++ b/arch_init.c
> @@ -52,14 +52,14 @@ int graphic_depth = 32;
>  #define QEMU_ARCH QEMU_ARCH_ARM
>  #elif defined(TARGET_CRIS)
>  #define QEMU_ARCH QEMU_ARCH_CRIS
> -#elif defined(TARGET_I386)
> -#define QEMU_ARCH QEMU_ARCH_I386
>  #elif defined(TARGET_HPPA)
>  #define QEMU_ARCH QEMU_ARCH_HPPA
> -#elif defined(TARGET_M68K)
> -#define QEMU_ARCH QEMU_ARCH_M68K
> +#elif defined(TARGET_I386)
> +#define QEMU_ARCH QEMU_ARCH_I386
>  #elif defined(TARGET_LM32)
>  #define QEMU_ARCH QEMU_ARCH_LM32
> +#elif defined(TARGET_M68K)
> +#define QEMU_ARCH QEMU_ARCH_M68K
>  #elif defined(TARGET_MICROBLAZE)
>  #define QEMU_ARCH QEMU_ARCH_MICROBLAZE
>  #elif defined(TARGET_MIPS)
> @@ -80,12 +80,12 @@ int graphic_depth = 32;
>  #define QEMU_ARCH QEMU_ARCH_SH4
>  #elif defined(TARGET_SPARC)
>  #define QEMU_ARCH QEMU_ARCH_SPARC
> -#elif defined(TARGET_XTENSA)
> -#define QEMU_ARCH QEMU_ARCH_XTENSA
> -#elif defined(TARGET_UNICORE32)
> -#define QEMU_ARCH QEMU_ARCH_UNICORE32
>  #elif defined(TARGET_TRICORE)
>  #define QEMU_ARCH QEMU_ARCH_TRICORE
> +#elif defined(TARGET_UNICORE32)
> +#define QEMU_ARCH QEMU_ARCH_UNICORE32
> +#elif defined(TARGET_XTENSA)
> +#define QEMU_ARCH QEMU_ARCH_XTENSA
>  #endif
>  
>  const uint32_t arch_type = QEMU_ARCH;
> 

I sorted the list from scratch, using some regexes to temporarily move
the #define's to the same lines as the #[el]if's. My results were
identical to those of this patch.

Reviewed-by: Laszlo Ersek 

Thanks
Laszlo

Re: [Qemu-devel] [PATCH 4/8] qcow: Switch qcow_co_writev to byte-based calls

2018-05-28 Thread Kevin Wolf

Am 25.04.2018 um 20:32 hat Eric Blake geschrieben:
> We are gradually moving away from sector-based interfaces, towards
> byte-based.  Make the change for the internals of the qcow
> driver write function, by iterating over offset/bytes instead of
> sector_num/nb_sectors, and repurposing index_in_cluster and n
> to be bytes instead of sectors.
> 
> A later patch will then switch the qcow driver as a whole over
> to byte-based operation.
> 
> Signed-off-by: Eric Blake 

The same comments from patch 3 apply here.

Kevin

Re: [Qemu-devel] [PATCH 3/8] qcow: Switch qcow_co_readv to byte-based calls

2018-05-28 Thread Kevin Wolf

Am 25.04.2018 um 20:32 hat Eric Blake geschrieben:
> We are gradually moving away from sector-based interfaces, towards
> byte-based.  Make the change for the internals of the qcow
> driver read function, by iterating over offset/bytes instead of
> sector_num/nb_sectors, and repurposing index_in_cluster and n
> to be bytes instead of sectors.
> 
> A later patch will then switch the qcow driver as a whole over
> to byte-based operation.
> 
> Signed-off-by: Eric Blake 
> ---
>  block/qcow.c | 39 +++
>  1 file changed, 19 insertions(+), 20 deletions(-)
> 
> diff --git a/block/qcow.c b/block/qcow.c
> index 32730a8dd91..bf9d80fd227 100644
> --- a/block/qcow.c
> +++ b/block/qcow.c
> @@ -623,6 +623,8 @@ static coroutine_fn int qcow_co_readv(BlockDriverState 
> *bs, int64_t sector_num,
>  QEMUIOVector hd_qiov;
>  uint8_t *buf;
>  void *orig_buf;
> +int64_t offset = sector_num << BDRV_SECTOR_BITS;
> +int64_t bytes = nb_sectors << BDRV_SECTOR_BITS;

This should be okay because nb_sectors is limited to INT_MAX / 512, but
I wouldn't be surprised if Coverity complained about a possible
truncation here. Multiplying with BDRV_SECTOR_SIZE instead wouldn't be
any less readable and would avoid the problem.

>  if (qiov->niov > 1) {
>  buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
> @@ -636,36 +638,36 @@ static coroutine_fn int qcow_co_readv(BlockDriverState 
> *bs, int64_t sector_num,
> 
>  qemu_co_mutex_lock(>lock);
> 
> -while (nb_sectors != 0) {
> +while (bytes != 0) {
>  /* prepare next request */
> -ret = get_cluster_offset(bs, sector_num << 9,
> +ret = get_cluster_offset(bs, offset,
>   0, 0, 0, 0, _offset);

This surely fits in a single line now?

>  if (ret < 0) {
>  break;
>  }
> -index_in_cluster = sector_num & (s->cluster_sectors - 1);
> -n = s->cluster_sectors - index_in_cluster;
> -if (n > nb_sectors) {
> -n = nb_sectors;
> +index_in_cluster = offset & (s->cluster_size - 1);
> +n = s->cluster_size - index_in_cluster;
> +if (n > bytes) {
> +n = bytes;
>  }

"index" suggests that it refers to an object larger than a byte. qcow2
renamed the variable to offset_in_cluster when the same change was made.
A new name for a unit change would also make review a bit easier.

The logic looks correct, though.

Kevin

Re: [Qemu-devel] [RFC 3/3] acpi-build: allocate mcfg for multiple host bridges

2018-05-28 Thread Laszlo Ersek

On 05/23/18 09:32, Laszlo Ersek wrote:
> On 05/23/18 01:40, Michael S. Tsirkin wrote:
>> On Wed, May 23, 2018 at 12:42:09AM +0200, Laszlo Ersek wrote:
>>> Hold on,
>>>
>>> On 05/22/18 21:51, Laszlo Ersek wrote:
>>>
 It had taken years until the edk2 core gained a universal
 PciHostBridgeDxe driver with a well-defined platform customization
 interface, and that interface doesn't support multiple domains /
 segments.
>>>
>>> after doing a bit more research: I was wrong about this. What I
>>> remembered was not the current state. Edk2 does seem to support multiple
>>> domains, with different segment numbers, ECAM base addresses, and bus
>>> number ranges. If we figure out a placement strategy or an easy to
>>> consume representation of these data for the firmware, it might be
>>> possible for OVMF to hook them into the edk2 core (although not in the
>>> earliest firmware phases, such as SEC and PEI).
>>>
>>> In retrospect, I'm honestly surprised that so much multi-segment support
>>> has been upstreamed to the edk2 core. Sorry about the FUD. (My general
>>> points remain valid, for the record... But perhaps they no longer matter
>>> for this discussion.)
>>>
>>> (I meant to send this message soon after
>>> http://mid.mail-archive.com/fc603491-7c41-e862-a583-2bae6f165b5a@redhat.com>,
>>> but my internet connection had to die right then.)
>>>
>>> Thanks
>>> Laszlo
>>
>> Is there support for any hardware which we could emulate?
> 
> I don't see any actual hw support in the edk2 project, but I'll ask.

I got an answer in the negative.

Thanks
Laszlo

Re: [Qemu-devel] [PATCH 2/8] qcow: Switch get_cluster_offset to be byte-based

2018-05-28 Thread Kevin Wolf

Am 25.04.2018 um 20:32 hat Eric Blake geschrieben:
> We are gradually moving away from sector-based interfaces, towards
> byte-based.  Make the change for the internal helper function
> get_cluster_offset(), by changing n_start and n_end to by byte
> offsets rather than sector indices within the cluster being
> allocated.
> 
> A later patch will then switch the qcow driver as a whole over
> to byte-based operation.
> 
> Signed-off-by: Eric Blake 
> ---
>  block/qcow.c | 28 ++--
>  1 file changed, 14 insertions(+), 14 deletions(-)
> 
> diff --git a/block/qcow.c b/block/qcow.c
> index dd042b8ddbe..32730a8dd91 100644
> --- a/block/qcow.c
> +++ b/block/qcow.c
> @@ -345,8 +345,8 @@ static int qcow_reopen_prepare(BDRVReopenState *state,
>   *
>   * 0 to not allocate.
>   *
> - * 1 to allocate a normal cluster (for sector indexes 'n_start' to
> - * 'n_end')
> + * 1 to allocate a normal cluster (for byte offsets 'n_start' to
> + * 'n_end' within the cluster)
>   *
>   * 2 to allocate a compressed cluster of size
>   * 'compressed_size'. 'compressed_size' must be > 0 and <
> @@ -442,7 +442,7 @@ static int get_cluster_offset(BlockDriverState *bs,
>  BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
>  /* allocate a new cluster */
>  if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
> -(n_end - n_start) < s->cluster_sectors) {
> +(n_end - n_start) < s->cluster_size) {
>  /* if the cluster is already compressed, we must
> decompress it in the case it is not completely
> overwritten */
> @@ -480,16 +480,15 @@ static int get_cluster_offset(BlockDriverState *bs,
>  /* if encrypted, we must initialize the cluster
> content which won't be written */
>  if (bs->encrypted &&
> -(n_end - n_start) < s->cluster_sectors) {
> -uint64_t start_sect;
> +(n_end - n_start) < s->cluster_size) {
> +uint64_t start_offset;
>  assert(s->crypto);
> -start_sect = (offset & ~(s->cluster_size - 1)) >> 9;
> -for(i = 0; i < s->cluster_sectors; i++) {
> +start_offset = offset & ~(s->cluster_size - 1);
> +for (i = 0; i < s->cluster_size; i += BDRV_SECTOR_SIZE) {
>  if (i < n_start || i >= n_end) {
> -memset(s->cluster_data, 0x00, 512);
> +memset(s->cluster_data, 0x00, BDRV_SECTOR_SIZE);
>  if (qcrypto_block_encrypt(s->crypto,
> -  (start_sect + i) *
> -  BDRV_SECTOR_SIZE,
> +  start_offset + i,
>s->cluster_data,
>BDRV_SECTOR_SIZE,
>NULL) < 0) {

This code is still working in blocks of BDRV_SECTOR_SIZE here - which
you do need to keep at least partially because that's the block size
that qcrypto_block_encrypt() works with. qcrypto_block_qcow_encrypt()
even asserts it.

However, this means that even though n_start and n_end are byte-based
now, the code only works correctly with encrypted images if they are
multiples of BDRV_SECTOR_SIZE. This is currently true and we could
assert it, but it would kind of defeat the purpose of the patch.

I suppose you could make unaligned n_start/n_end work if you round down
n_start and round up n_end to the next sector boundary for the
comparison with i. For unaligned requests, we would then write a bit
more than is actually necessary, but I think that's okay because we're
initialising a previously unallocated cluster, so we don't overwrite
valid data.

Kevin

Re: [Qemu-devel] [Bug 1773753] [NEW] virsh managed save fails with qemu version v2.12.0-813-g5a5c383b13-dirty on powerpc

2018-05-28 Thread Peter Xu

On Mon, May 28, 2018 at 09:12:21AM -, Satheesh Rajendran wrote:
> Public bug reported:
> 
> Host Env:
> IBM Power8 with Fedora28 base with compiled upstream kernel, qemu, libvirt.
> 
> Host Kernel: 4.17.0-rc5-00069-g3acf4e395260
> 
> qemu-kvm(5a5c383b1373aeb6c87a0d6060f6c3dc7c53082b):
> v2.12.0-813-g5a5c383b13-dirty
> 
> libvirt(4804a4db33a37f828d033733bc47f6eff5d262c3):
> 
> Guest Kernel: 4.17.0-rc7
> 
> Steps to recreate:
> Define a guest attached with above setup and start.
> # virsh start avocado-vt-vm1
> 
> guest console;...
> # uname -r
> 4.17.0-rc7
> [root@atest-guest ~]# lscpu
> Architecture:ppc64le
> Byte Order:  Little Endian
> CPU(s):  3
> On-line CPU(s) list: 0-2
> Thread(s) per core:  1
> Core(s) per socket:  1
> Socket(s):   3
> NUMA node(s):1
> Model:   2.1 (pvr 004b 0201)
> Model name:  POWER8 (architected), altivec supported
> Hypervisor vendor:   KVM
> Virtualization type: para
> L1d cache:   64K
> L1i cache:   32K
> NUMA node0 CPU(s):   0-2
> 
> 
> # virsh managedsave avocado-vt-vm1 
> 
> Domain avocado-vt-vm1 state saved by libvirt
> 
> # virsh list
>  IdName   State
> 
> 
> # virsh start avocado-vt-vm1 Hangs forever and vm state goes to
> paused.

Libvirt is using fd migration, right?  If so, I suspect this is the
same problem with the iotest failure, and the fix should be in a pull
request:

  Message-Id: <20180525133246.7839-2-quint...@redhat.com>
  Subject: [Qemu-devel] [PULL 1/2] migration: fix exec/fd migrations

Regards,

-- 
Peter Xu

Re: [Qemu-devel] [PATCH v7 00/11] enable numa configuration before machine_init() from QMP

2018-05-28 Thread Igor Mammedov

On Thu, 24 May 2018 17:42:03 +0200
Markus Armbruster  wrote:

> Eduardo Habkost  writes:
> 
> > I'm queueing this version (including v8 of patches 5-7) on
> > numa-next.
> >
> > Markus, Daniel: you were the people I remember expressing some
> > concerns about the new preconfig mechanism.  Do you have any
> > objections to this version?  
> 
> My gut's reaction to preconfig is as nauseous as ever.  It adds
> complexity we wouldn't want or need with sanely structured program
> startup.  It might still be the best we can do, and something we need to
> do.  You have to take on technical debt sometimes.  To know for sure
> this is the case here, we'd have to explore alternatives more seriously.
> Like repaying some of the existing technical debt around there.  Sadly,
> I lack the time to take on such a project.
Usually I do quite a bit of re-factoring to reduce our technical debt,
so while working on enabling VM start with libvirt using only --preconfig
(without -S) I most likely will have to improve code around VM creation.

[...]

Re: [Qemu-devel] [PATCH v2 0/2] vl: Partial support for non-scalar properties with -object

2018-05-28 Thread Markus Armbruster

Daniel P. Berrangé  writes:

> Just a ping to say I'd like us to restart work this patch and try to get
> it mergable for the 2.13 cycle, so I can rely it on for the ACL support
> I've had out of tree since 2.6 :-)

Fair enough.  I'll see what I can do.

Re: [Qemu-devel] [PATCH v7 04/11] hmp: disable monitor in preconfig state

2018-05-28 Thread Igor Mammedov

On Fri, 25 May 2018 16:39:34 -0300
Eduardo Habkost  wrote:

> On Fri, May 25, 2018 at 08:05:30AM +0200, Markus Armbruster wrote:
> > Eduardo Habkost  writes:
> >   
> > > On Thu, May 24, 2018 at 08:16:20PM +0200, Markus Armbruster wrote:  
> > >> Markus Armbruster  writes:
> > >>   
> > >> > Igor Mammedov  writes:
> > >> >  
> > >> >> Ban it for now, if someone would need it to work early,
> > >> >> one would have to implement checks if HMP command is valid
> > >> >> at preconfig state.
> > >> >>
> > >> >> Signed-off-by: Igor Mammedov 
> > >> >> Reviewed-by: Eric Blake 
> > >> >> ---
> > >> >> v5:
> > >> >>   * add 'use QMP instead" to error message, suggesting user
> > >> >> the right interface to use
> > >> >> v4:
> > >> >>   * v3 was only printing error but not preventing command execution,
> > >> >> Fix it by returning after printing error message.
> > >> >> ("Dr. David Alan Gilbert" )
> > >> >> ---
> > >> >>  monitor.c | 6 ++
> > >> >>  1 file changed, 6 insertions(+)
> > >> >>
> > >> >> diff --git a/monitor.c b/monitor.c
> > >> >> index 39f8ee1..0ffdf1d 100644
> > >> >> --- a/monitor.c
> > >> >> +++ b/monitor.c
> > >> >> @@ -3374,6 +3374,12 @@ static void handle_hmp_command(Monitor *mon, 
> > >> >> const char *cmdline)
> > >> >>  
> > >> >>  trace_handle_hmp_command(mon, cmdline);
> > >> >>  
> > >> >> +if (runstate_check(RUN_STATE_PRECONFIG)) {
> > >> >> +monitor_printf(mon, "HMP not available in preconfig state, "
> > >> >> +"use QMP instead\n");
> > >> >> +return;
> > >> >> +}
> > >> >> +
> > >> >>  cmd = monitor_parse_command(mon, cmdline, , 
> > >> >> mon->cmd_table);
> > >> >>  if (!cmd) {
> > >> >>  return;  
> > >> >
> > >> > So we offer the user an HMP monitor, but we summarily fail all 
> > >> > commands.
> > >> > I'm sorry, but that's... searching for polite word... embarrassing.  We
> > >> > should accept HMP output only when we're ready to accept it.  Yes, that
> > >> > would involve a bit more surgery rather than this cheap hack.  The 
> > >> > whole
> > >> > preconfig thing smells like a cheap hack to me, but let's not overdo 
> > >> > it.  
> > >> 
> > >> Clarification: I don't think we need to hold the series because of
> > >> this.  I do think you should investigate delaying HMP until it can work. 
> > >>  
> > >
> > > What would it mean to delay HMP?  Not creating the socket?
> > > Creating the socket but not accepting clients?  Accepting clients
> > > but not consuming any input from the socket until we are out of
> > > preconfig?
> > >
> > > I'm not sure if any of those options would be better.  If a human
> > > is already trying to talk on the other side, it seems better to
> > > show QEMU is alive (but not ready to hold a conversation yet)
> > > than staying silent.  
> > 
> > If this
> > 
> > QEMU 2.12.50 monitor - type 'help' for more information
> > (qemu) help
> > HMP not available in preconfig state, use QMP instead
> > (qemu) quit
> > HMP not available in preconfig state, use QMP instead
> > (qemu) let me out dammit
> > HMP not available in preconfig state, use QMP instead
> > (qemu) 
> > 
> > is better than the alternatives, then I wonder how much more
> > entertainment the alternatives could provide!
> > 
> > We *can* do better.  Start like this:
> > 
> > QEMU 2.12.50 monitor is not ready with -preconfig until you complete
> > configuration with QMP
> > 
> > and when we exit preconfig state, add:
> > 
> > QEMU 2.12.50 monitor - type 'help' for more information
> > (qemu) 
> > 
> > Note that this is upfront about the monitor not being ready, avoids
> > misleading the user about "help", talks to the user in the user's terms
> > (-preconfig) instead of internal terms (preconfig state), and is more
> > specific on how to ready the monitor.  
> 
> Yes, this sounds better than any of the options I have
> considered.
> 
> Making at least 'help', 'quit', and 'exit-preconfig' work might
> be even better, though.
I'll look into both options and try to come up a patch to make it better.

Re: [Qemu-devel] [PATCH v2 2/2] vl: Partial support for non-scalar properties with -object

2018-05-28 Thread Markus Armbruster

Daniel P. Berrangé  writes:

> On Mon, Aug 14, 2017 at 07:49:54AM +0200, Markus Armbruster wrote:
>> Eric Blake  writes:
>> 
>> > On 08/11/2017 11:05 AM, Markus Armbruster wrote:
>> >> We've wanted -object to support non-scalar properties for a while.
>> >> Dan Berrange tried in "[PATCH v4 00/10]Provide a QOM-based
>> >> authorization API".  Review led to the conclusion that we need to
>> >> replace rather than add to QemuOpts.  Initial work towards that goal
>> >> has been merged to provide -blockdev (commit 8746709), but there's
>> >> substantial work left, mostly due to an bewildering array of
>> >> compatibility problems.
>> >> 
>> >> Even if a full solution is still out of reach, we can have a partial
>> >> solution now: accept -object argument in JSON syntax.  This should
>> >> unblock development work that needs non-scalar properties with
>> >> -object.
>> >> 
>> >> The implementation is similar to -blockdev, except we use the new
>> >> infrastructure only for the new JSON case, and stick to QemuOpts for
>> >> the existing KEY=VALUE,... case, to sidestep compatibility problems.
>> >> 
>> >> If we did this for more options, we'd have to factor out common code.
>> >> But for one option, this will do.
>> >> 
>> >> Signed-off-by: Markus Armbruster 
>> >> ---
>> >>  qapi-schema.json | 14 +++---
>> >>  vl.c | 51 +++
>> >>  2 files changed, 62 insertions(+), 3 deletions(-)
>> >> 
>> >>  static void object_create(bool (*type_predicate)(const char *))
>> >>  {
>> >> +ObjectOptionsQueueEntry *e, *next;
>> >> +
>> >> +QSIMPLEQ_FOREACH_SAFE(e, _queue, entry, next) {
>> >> +if (!type_predicate(e->oo->qom_type)) {
>> >> +continue;
>> >> +}
>> >> +
>> >> +loc_push_restore(>loc);
>> >> +qmp_object_add(e->oo->qom_type, e->oo->id,
>> >> +   e->oo->has_props, e->oo->props, _fatal);
>> >> +loc_pop(>loc);
>> >> +
>> >> +QSIMPLEQ_REMOVE(_queue, e, ObjectOptionsQueueEntry, entry);
>> >> +qapi_free_ObjectOptions(e->oo);
>> >> +}
>> >> +
>> >>  if (qemu_opts_foreach(qemu_find_opts("object"),
>> >
>> > This handles all JSON forms prior to any QemuOpt forms (within the two
>> > priority levels), such that a command line using:
>> >
>> >  -object type,id=1,oldstyle... -object '{'id':2, 'type':..., newstyle...}'
>> >
>> > processes the arguments in a different order than
>> >
>> >  -object type,id=1,oldstyle... -object type,id=2,oldstyle
>> >
>> > But I don't see that as too bad (ideally, someone using the {} JSON
>> > style will use it consistently).
>> 
>> Yes, that's another restriction: if you need your -object evaluated in a
>> certain order, you may have to stick to one of the two syntax variants.
>> 
>> Aside: there are two sane evaluation orders: (1) strictly left to right,
>> and (2) order doesn't matter.  QEMU of course does (3) unpredicable for
>> humans without referring back to the source code.
>
> IIUC, to "fix" the ordering problem we need to be able to consider the
> ordering of all QEMU args, not just -object.  

Yes.

> The horrible hack with the two stage setup of -object in vl.c is driven by
> the fact that some objects are referenced by -device/-chardev args, while
> objects are referencing -device/-chardev args etc. This is the big problem
> to untangle, and understandable you don't want to tackle that for this
> patch. Until we can figure out how to address the big problem, it would be
> nice not to introduce yet another ordering though driven off usage of
> json vs non-json syntax.

I proposed the patch as a quick hack to unblock development.  We then
decided we had nothing worthwhile to unblock at the time.

You now want it as a stop gap so certain features don't have to wait
until I crack the more general command line expressiveness problem.  I
agree we should reconsider what warts and restrictions are acceptable
for this different, more serious purpose.

I doubt the reordering wart and its accompanying "don't mix dotted keys
with JSON if you need evaluation in order" restriction is inacceptable.
However, avoiding the reordering wart might be easier than debating it,
so let me look into that first.

[Qemu-devel] [Bug 1773753] Re: virsh managed save fails with qemu version v2.12.0-813-g5a5c383b13-dirty on powerpc

2018-05-28 Thread Satheesh Rajendran

To recover from the failed state it requires below steps to be run.

# virsh destroy avocado-vt-vm1
Domain avocado-vt-vm1 destroyed

# virsh undefine --managed-save avocado-vt-vm1
Domain avocado-vt-vm1 has been undefined

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1773753

Title:
  virsh managed save fails with qemu version
  v2.12.0-813-g5a5c383b13-dirty on powerpc

Status in QEMU:
  New

Bug description:
  Host Env:
  IBM Power8 with Fedora28 base with compiled upstream kernel, qemu, libvirt.

  Host Kernel: 4.17.0-rc5-00069-g3acf4e395260

  qemu-kvm(5a5c383b1373aeb6c87a0d6060f6c3dc7c53082b):
  v2.12.0-813-g5a5c383b13-dirty

  libvirt(4804a4db33a37f828d033733bc47f6eff5d262c3):

  Guest Kernel: 4.17.0-rc7

  Steps to recreate:
  Define a guest attached with above setup and start.
  # virsh start avocado-vt-vm1

  guest console;...
  # uname -r
  4.17.0-rc7
  [root@atest-guest ~]# lscpu
  Architecture:ppc64le
  Byte Order:  Little Endian
  CPU(s):  3
  On-line CPU(s) list: 0-2
  Thread(s) per core:  1
  Core(s) per socket:  1
  Socket(s):   3
  NUMA node(s):1
  Model:   2.1 (pvr 004b 0201)
  Model name:  POWER8 (architected), altivec supported
  Hypervisor vendor:   KVM
  Virtualization type: para
  L1d cache:   64K
  L1i cache:   32K
  NUMA node0 CPU(s):   0-2

  
  # virsh managedsave avocado-vt-vm1 

  Domain avocado-vt-vm1 state saved by libvirt

  # virsh list
   IdName   State
  

  # virsh start avocado-vt-vm1 Hangs forever and vm state goes to
  paused.

  
  # virsh list
   IdName   State
  
   87avocado-vt-vm1 paused

  
  P:S:- with same above setup, just changing the qemu-kvm comes bydefault with 
F28 works fine.

  /usr/bin/qemu-kvm --version
  QEMU emulator version 2.11.1(qemu-2.11.1-2.fc28)

  Summary: with above other setup.
  machine type pseries-2.12 and qemu-2.11.1-2.fc28 -Works fine.

  machine type pseries-2.12/pseries-2.13 and qemu
  5a5c383b1373aeb6c87a0d6060f6c3dc7c53082b - Does not work.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1773753/+subscriptions

Re: [Qemu-devel] release retrospective, next release timing, numbering

2018-05-28 Thread Paolo Bonzini

On 02/05/2018 11:10, Daniel P. Berrangé wrote:
>> it would allow to postpone incompatible removals to relatively seldom
>> major releases, add new features during more often minor releases, and
>> fix bugs during regular patch releases.
>>
>> major releases can be scheduled every 1-2 years, for example, minor
>> releases every 3-6 months, and patch releases when needed.
> No, we do not want to extend the deprecation period further just so that
> we can adopt semver.  We explicitly chose "2 releases", so that every
> deprecation warning has the same lifetime - we don't want some deprecations
> to be 4 months long, while other deprecation warnings are 1+1/2 years long.

In practice it's already "at least" 2 releases, if only because
sometimes 1) people forget, or 2) they are busy with more important
things, or 3) we want to provide a good replacement and it turns out to
be harder than 2 releases.

Paolo

Re: [Qemu-devel] [Qemu-ppc] [PATCH 1/4] spapr: remove irq_hint parameter from spapr_irq_alloc()

2018-05-28 Thread Cédric Le Goater

On 05/28/2018 09:18 AM, Thomas Huth wrote:
> On 28.05.2018 09:06, Cédric Le Goater wrote:
>> On 05/28/2018 08:17 AM, Thomas Huth wrote:
>>> On 25.05.2018 16:02, Greg Kurz wrote:
 On Fri, 18 May 2018 18:44:02 +0200
 Cédric Le Goater  wrote:

> This IRQ number hint can possibly be used by the VIO devices if the
> "irq" property is defined on the command line but it seems it is never
> the case. It is not used in libvirt for instance. So, let's remove it
> to simplify future changes.
>

 Setting an irq manually looks a bit anachronistic. I doubt anyone would
 do that nowadays, and the patch does a nice cleanup. So this looks like
 a good idea.
>>> [...]
> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
> index 472dd6f33a96..cc064f64fccf 100644
> --- a/hw/ppc/spapr_vio.c
> +++ b/hw/ppc/spapr_vio.c
> @@ -455,7 +455,7 @@ static void spapr_vio_busdev_realize(DeviceState 
> *qdev, Error **errp)
>  dev->qdev.id = id;
>  }
>  
> -dev->irq = spapr_irq_alloc(spapr, dev->irq, false, _err);
> +dev->irq = spapr_irq_alloc(spapr, false, _err);

 Silently breaking "irq" like this looks wrong. I'd rather officially remove
 it first (ie, kill spapr_vio_props, -5 lines in spapr_vio.c).

 Of course, this raises the question of interface deprecation, and it should
 theoretically follow the process described at:

 https://wiki.qemu.org/Features/LegacyRemoval#Rules_for_removing_an_interface

 Cc'ing Thomas, our Chief Deprecation Officer, for insights :)
>>>
>>> The property is a public interface. Just because it's not used by
>>> libvirt does not mean that nobody is using it. So yes, please follow the
>>> rules and mark it as deprecated first for two release, before you really
>>> remove it.
>>
>> This "irq" property is a problem to introduce a new static layout of IRQ 
>> numbers. It is in complete opposition. 
>>
>> Can we keep it as it is for old pseries machine (settable) and ignore it 
>> for newer ? Would that be fine ?
> 
> I think that would be fine, too. You likely need to keep the settable
> IRQs around for the old machines anyway, to make sure that migration of
> the old machine types still works, right?

Yes, that is the goal of patch 3. It introduces a common sPAPR IRQ frontend,
the first backend being the current one.

C.

[Qemu-devel] [Bug 1773753] [NEW] virsh managed save fails with qemu version v2.12.0-813-g5a5c383b13-dirty on powerpc

2018-05-28 Thread Satheesh Rajendran

Public bug reported:

Host Env:
IBM Power8 with Fedora28 base with compiled upstream kernel, qemu, libvirt.

Host Kernel: 4.17.0-rc5-00069-g3acf4e395260

qemu-kvm(5a5c383b1373aeb6c87a0d6060f6c3dc7c53082b):
v2.12.0-813-g5a5c383b13-dirty

libvirt(4804a4db33a37f828d033733bc47f6eff5d262c3):

Guest Kernel: 4.17.0-rc7

Steps to recreate:
Define a guest attached with above setup and start.
# virsh start avocado-vt-vm1

guest console;...
# uname -r
4.17.0-rc7
[root@atest-guest ~]# lscpu
Architecture:ppc64le
Byte Order:  Little Endian
CPU(s):  3
On-line CPU(s) list: 0-2
Thread(s) per core:  1
Core(s) per socket:  1
Socket(s):   3
NUMA node(s):1
Model:   2.1 (pvr 004b 0201)
Model name:  POWER8 (architected), altivec supported
Hypervisor vendor:   KVM
Virtualization type: para
L1d cache:   64K
L1i cache:   32K
NUMA node0 CPU(s):   0-2


# virsh managedsave avocado-vt-vm1 

Domain avocado-vt-vm1 state saved by libvirt

# virsh list
 IdName   State


# virsh start avocado-vt-vm1 Hangs forever and vm state goes to
paused.


# virsh list
 IdName   State

 87avocado-vt-vm1 paused


P:S:- with same above setup, just changing the qemu-kvm comes bydefault with 
F28 works fine.

/usr/bin/qemu-kvm --version
QEMU emulator version 2.11.1(qemu-2.11.1-2.fc28)

Summary: with above other setup.
machine type pseries-2.12 and qemu-2.11.1-2.fc28 -Works fine.

machine type pseries-2.12/pseries-2.13 and qemu
5a5c383b1373aeb6c87a0d6060f6c3dc7c53082b - Does not work.

** Affects: qemu
 Importance: Undecided
 Status: New

** Attachment added: "vm xml"
   https://bugs.launchpad.net/bugs/1773753/+attachment/5145459/+files/vm.xml

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1773753

Title:
  virsh managed save fails with qemu version
  v2.12.0-813-g5a5c383b13-dirty on powerpc

Status in QEMU:
  New

Bug description:
  Host Env:
  IBM Power8 with Fedora28 base with compiled upstream kernel, qemu, libvirt.

  Host Kernel: 4.17.0-rc5-00069-g3acf4e395260

  qemu-kvm(5a5c383b1373aeb6c87a0d6060f6c3dc7c53082b):
  v2.12.0-813-g5a5c383b13-dirty

  libvirt(4804a4db33a37f828d033733bc47f6eff5d262c3):

  Guest Kernel: 4.17.0-rc7

  Steps to recreate:
  Define a guest attached with above setup and start.
  # virsh start avocado-vt-vm1

  guest console;...
  # uname -r
  4.17.0-rc7
  [root@atest-guest ~]# lscpu
  Architecture:ppc64le
  Byte Order:  Little Endian
  CPU(s):  3
  On-line CPU(s) list: 0-2
  Thread(s) per core:  1
  Core(s) per socket:  1
  Socket(s):   3
  NUMA node(s):1
  Model:   2.1 (pvr 004b 0201)
  Model name:  POWER8 (architected), altivec supported
  Hypervisor vendor:   KVM
  Virtualization type: para
  L1d cache:   64K
  L1i cache:   32K
  NUMA node0 CPU(s):   0-2

  
  # virsh managedsave avocado-vt-vm1 

  Domain avocado-vt-vm1 state saved by libvirt

  # virsh list
   IdName   State
  

  # virsh start avocado-vt-vm1 Hangs forever and vm state goes to
  paused.

  
  # virsh list
   IdName   State
  
   87avocado-vt-vm1 paused

  
  P:S:- with same above setup, just changing the qemu-kvm comes bydefault with 
F28 works fine.

  /usr/bin/qemu-kvm --version
  QEMU emulator version 2.11.1(qemu-2.11.1-2.fc28)

  Summary: with above other setup.
  machine type pseries-2.12 and qemu-2.11.1-2.fc28 -Works fine.

  machine type pseries-2.12/pseries-2.13 and qemu
  5a5c383b1373aeb6c87a0d6060f6c3dc7c53082b - Does not work.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1773753/+subscriptions

Re: [Qemu-devel] [PATCH v2 2/2] vl: Partial support for non-scalar properties with -object

2018-05-28 Thread Markus Armbruster

Daniel P. Berrangé  writes:

> On Mon, Aug 14, 2017 at 01:24:17PM +0200, Markus Armbruster wrote:
>> "Daniel P. Berrange"  writes:
>> 
>> > On Fri, Aug 11, 2017 at 12:47:44PM -0500, Eric Blake wrote:
>> >> On 08/11/2017 11:05 AM, Markus Armbruster wrote:
>> >> > We've wanted -object to support non-scalar properties for a while.
>> >> > Dan Berrange tried in "[PATCH v4 00/10]Provide a QOM-based
>> >> > authorization API".  Review led to the conclusion that we need to
>> >> > replace rather than add to QemuOpts.  Initial work towards that goal
>> >> > has been merged to provide -blockdev (commit 8746709), but there's
>> >> > substantial work left, mostly due to an bewildering array of
>> >> > compatibility problems.
>> >> > 
>> >> > Even if a full solution is still out of reach, we can have a partial
>> >> > solution now: accept -object argument in JSON syntax.  This should
>> >> > unblock development work that needs non-scalar properties with
>> >> > -object.
>> >> > 
>> >> > The implementation is similar to -blockdev, except we use the new
>> >> > infrastructure only for the new JSON case, and stick to QemuOpts for
>> >> > the existing KEY=VALUE,... case, to sidestep compatibility problems.
>> >> > 
>> >> > If we did this for more options, we'd have to factor out common code.
>> >> > But for one option, this will do.
>> >> > 
>> >> > Signed-off-by: Markus Armbruster 
>> >> > ---
>> >> >  qapi-schema.json | 14 +++---
>> >> >  vl.c | 51 
>> >> > +++
>> >> >  2 files changed, 62 insertions(+), 3 deletions(-)
>> >> > 
>> >> >  static void object_create(bool (*type_predicate)(const char *))
>> >> >  {
>> >> > +ObjectOptionsQueueEntry *e, *next;
>> >> > +
>> >> > +QSIMPLEQ_FOREACH_SAFE(e, _queue, entry, next) {
>> >> > +if (!type_predicate(e->oo->qom_type)) {
>> >> > +continue;
>> >> > +}
>> >> > +
>> >> > +loc_push_restore(>loc);
>> >> > +qmp_object_add(e->oo->qom_type, e->oo->id,
>> >> > +   e->oo->has_props, e->oo->props, _fatal);
>> >> > +loc_pop(>loc);
>> >> > +
>> >> > +QSIMPLEQ_REMOVE(_queue, e, ObjectOptionsQueueEntry, entry);
>> >> > +qapi_free_ObjectOptions(e->oo);
>> >> > +}
>> >> > +
>> >> >  if (qemu_opts_foreach(qemu_find_opts("object"),
>> >> 
>> >> This handles all JSON forms prior to any QemuOpt forms (within the two
>> >> priority levels), such that a command line using:
>> >> 
>> >>  -object type,id=1,oldstyle... -object '{'id':2, 'type':..., newstyle...}'
>> >> 
>> >> processes the arguments in a different order than
>> >> 
>> >>  -object type,id=1,oldstyle... -object type,id=2,oldstyle
>> >> 
>> >> But I don't see that as too bad (ideally, someone using the {} JSON
>> >> style will use it consistently).
>> >
>> > I don't really like such a constraint - the ordering of object
>> > creation is already complex with some objets created at a different
>> > point in startup to other objects. Adding yet another constraint
>> > feels like it is painting ourselves into a corner wrt future changes.
>> 
>> The full solution will evaluate -object left to right.
>> 
>> This partial solution doesn't, but it's not meant for use in anger, just
>> for unblocking development work.  Can add scary warnings to deter
>> premature use.
>> 
>> > In particular I think it is quite possible to use the dotted
>> > form primarily, and only use JSON for the immediate scenario
>> > where non-JSON form won't work - I expect that's how we would
>> > use it in libvirt - I don't see libvirt changing 100% to JSON
>> > based objects
>> 
>> You need the JSON form anyway for QMP, and for the cases where dotted
>> keys break down.  Doing both just for the command line requires code to
>> do dotted keys (which may already exist), and code to decide whether it
>> can be used (which probably doesn't exist, yet).
>> 
>> Wouldn't this add complexity?  For what benefit exactly?
>
> Surprisingly, it appears we do actually have code that generates the
> JSON syntax for (probably) all uses of -object today. In fact we are
> actually generating JSON and then converting it to dotted syntax in
> most cases, which I didn't realize when writing the above.
>
> We'll have to keep support for dotted syntax around a while for old
> QEMU versions, but it looks like we could reasonably easily switch
> to JSON syntax for all -object usage at the same time.

Excellent.

Re: [Qemu-devel] [PATCH] ARM: ACPI: Fix use-after-free due to memory realloc

2018-05-28 Thread Auger Eric

Hi Shannon,

On 05/28/2018 10:42 AM, Shannon Zhao wrote:
> acpi_data_push uses g_array_set_size to resize the memory size. If there
> is no enough contiguous memory, the address will be changed. So previous
> pointer could not be used any more. It must update the pointer and use
> the new one.
> 
> Signed-off-by: Shannon Zhao 
Reviewed-by: Eric Auger 

Thanks!

Eric
> ---
>  hw/arm/virt-acpi-build.c | 14 ++
>  1 file changed, 10 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
> index 92ceee9..30584ee 100644
> --- a/hw/arm/virt-acpi-build.c
> +++ b/hw/arm/virt-acpi-build.c
> @@ -400,7 +400,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  AcpiIortItsGroup *its;
>  AcpiIortTable *iort;
>  AcpiIortSmmu3 *smmu;
> -size_t node_size, iort_length, smmu_offset = 0;
> +size_t node_size, iort_node_offset, iort_length, smmu_offset = 0;
>  AcpiIortRC *rc;
>  
>  iort = acpi_data_push(table_data, sizeof(*iort));
> @@ -414,6 +414,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  iort_length = sizeof(*iort);
>  iort->node_count = cpu_to_le32(nb_nodes);
>  iort->node_offset = cpu_to_le32(sizeof(*iort));
> +iort_node_offset = iort->node_offset;
>  
>  /* ITS group node */
>  node_size =  sizeof(*its) + sizeof(uint32_t);
> @@ -429,7 +430,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  int irq =  vms->irqmap[VIRT_SMMU];
>  
>  /* SMMUv3 node */
> -smmu_offset = iort->node_offset + node_size;
> +smmu_offset = iort_node_offset + node_size;
>  node_size = sizeof(*smmu) + sizeof(*idmap);
>  iort_length += node_size;
>  smmu = acpi_data_push(table_data, node_size);
> @@ -450,7 +451,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  idmap->id_count = cpu_to_le32(0x);
>  idmap->output_base = 0;
>  /* output IORT node is the ITS group node (the first node) */
> -idmap->output_reference = cpu_to_le32(iort->node_offset);
> +idmap->output_reference = cpu_to_le32(iort_node_offset);
>  }
>  
>  /* Root Complex Node */
> @@ -479,9 +480,14 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
> VirtMachineState *vms)
>  idmap->output_reference = cpu_to_le32(smmu_offset);
>  } else {
>  /* output IORT node is the ITS group node (the first node) */
> -idmap->output_reference = cpu_to_le32(iort->node_offset);
> +idmap->output_reference = cpu_to_le32(iort_node_offset);
>  }
>  
> +/*
> + * Update the pointer address in case table_data->data moved during above
> + * acpi_data_push operations.
> + */
> +iort = (AcpiIortTable *)(table_data->data + iort_start);
>  iort->length = cpu_to_le32(iort_length);
>  
>  build_header(linker, table_data, (void *)(table_data->data + iort_start),
>

Re: [Qemu-devel] [PATCH v5 43/49] docker: move debian-powerpc-cross to sid based build

2018-05-28 Thread Fam Zheng

On Fri, 05/25 15:19, Alex Bennée wrote:
> The original Jessie based cross builder hasn't worked for a while. The
> state of the libraries is still perilous for cross-building QEMU but
> we can use it for building TCG tests.
> 
> The debian-apt-fake.sh script can also be dropped as it is no longer
> used.
> 
> Signed-off-by: Alex Bennée 
> ---
>  tests/docker/Makefile.include |  4 +-
>  tests/docker/dockerfiles/debian-apt-fake.sh   | 46 ---
>  .../dockerfiles/debian-powerpc-cross.docker   | 39 +++-
>  tests/docker/dockerfiles/debian8.docker   |  3 --
>  4 files changed, 7 insertions(+), 85 deletions(-)
>  delete mode 100755 tests/docker/dockerfiles/debian-apt-fake.sh
> 
> diff --git a/tests/docker/Makefile.include b/tests/docker/Makefile.include
> index 36779645d7..d59314ee65 100644
> --- a/tests/docker/Makefile.include
> +++ b/tests/docker/Makefile.include
> @@ -46,8 +46,6 @@ docker-image-%: $(DOCKER_FILES_DIR)/%.docker
>   $(if $(EXECUTABLE),--include-executable=$(EXECUTABLE)),\
>   "BUILD","$*")
>  
> -docker-image-debian-powerpc-cross: 
> EXTRA_FILES:=$(SRC_PATH)/tests/docker/dockerfiles/debian-apt-fake.sh
> -
>  # Enforce dependencies for composite images
>  docker-image-debian: docker-image-debian9
>  docker-image-debian8-mxe: docker-image-debian8
> @@ -57,7 +55,6 @@ docker-image-debian-armhf-cross: docker-image-debian9
>  docker-image-debian-arm64-cross: docker-image-debian9
>  docker-image-debian-mips-cross: docker-image-debian9
>  docker-image-debian-mips64el-cross: docker-image-debian9
> -docker-image-debian-powerpc-cross: docker-image-debian8
>  docker-image-debian-ppc64el-cross: docker-image-debian9
>  docker-image-debian-s390x-cross: docker-image-debian9
>  docker-image-debian-win32-cross: docker-image-debian8-mxe
> @@ -69,6 +66,7 @@ docker-image-debian-sh4-cross: docker-image-debian-sid
>  docker-image-debian-sparc64-cross: docker-image-debian-sid
>  docker-image-debian-mips64-cross: docker-image-debian-sid
>  docker-image-debian-riscv64-cross: docker-image-debian-sid
> +docker-image-debian-powerpc-cross: docker-image-debian-sid
>  docker-image-travis: NOUSER=1
>  
>  # Expand all the pre-requistes for each docker image and test combination
> diff --git a/tests/docker/dockerfiles/debian-apt-fake.sh 
> b/tests/docker/dockerfiles/debian-apt-fake.sh
> deleted file mode 100755
> index 2ec0fdf47a..00
> --- a/tests/docker/dockerfiles/debian-apt-fake.sh
> +++ /dev/null
> @@ -1,46 +0,0 @@
> -#! /bin/sh
> -#
> -# Generate fake debian package to resolve unimportant unmet dependencies held
> -# by upstream multiarch broken packages.
> -#
> -# Copyright (c) 2017 Philippe Mathieu-Daudé 
> -#
> -# This work is licensed under the terms of the GNU GPL, version 2
> -# or (at your option) any later version. See the COPYING file in
> -# the top-level directory.
> -
> -test $1 = "install" && shift 1
> -
> -fake_install()
> -{
> -echo "Generating fake $2 $1 $3 ..."
> -(cd /var/cache/apt/archives
> -(cat << 'EOF'
> -Section: misc
> -Priority: optional
> -Standards-Version: 3.9.2
> -
> -Package: NAME
> -Version: VERSION
> -Maintainer: qemu-devel@nongnu.org
> -Architecture: any
> -Multi-Arch: same
> -Description: fake NAME
> -EOF
> -) | sed s/NAME/$2/g | sed s/VERSION/$3/g > $2.control
> -equivs-build -a $1 $2.control 1>/dev/null 2>/dev/null
> -dpkg -i --force-overwrite $2_$3_$1.deb
> -)
> -}
> -
> -try_install()
> -{
> -name=$(echo $1|sed "s/\(.*\):\(.*\)=\(.*\)/\1/")
> -arch=$(echo $1|sed "s/\(.*\):\(.*\)=\(.*\)/\2/")
> -vers=$(echo $1|sed "s/\(.*\):\(.*\)=\(.*\)/\3/")
> -apt-get install -q -yy $1 || fake_install $arch $name $vers
> -}
> -
> -for package in $*; do
> -try_install $package
> -done
> diff --git a/tests/docker/dockerfiles/debian-powerpc-cross.docker 
> b/tests/docker/dockerfiles/debian-powerpc-cross.docker
> index a5dd46b4ac..5e62ca0df1 100644
> --- a/tests/docker/dockerfiles/debian-powerpc-cross.docker
> +++ b/tests/docker/dockerfiles/debian-powerpc-cross.docker
> @@ -1,40 +1,13 @@
>  #
>  # Docker powerpc cross-compiler target
>  #
> -# This docker target builds on the debian Jessie base image.
> +# This docker target builds on the debian sid base image which
> +# contains cross compilers for Debian "ports" targets. The original
> +# Jessie based no longer builds.
>  #
> -FROM qemu:debian8
> -MAINTAINER Philippe Mathieu-Daudé 
> +FROM qemu:debian-sid
>  
> -# Add the foreign architecture we want and install dependencies
> -RUN dpkg --add-architecture powerpc
> -RUN apt-get update
>  RUN DEBIAN_FRONTEND=noninteractive eatmydata \
>  apt-get install -y --no-install-recommends \
> -crossbuild-essential-powerpc
> -
> -#  to fix "following packages have unmet dependencies" ...
> -ADD debian-apt-fake.sh /usr/local/bin/apt-fake
> -RUN apt-get install -y --no-install-recommends \
> -equivs

Re: [Qemu-devel] [PULL 20/37] qcow2: Give the refcount cache the minimum possible size by default

2018-05-28 Thread Alberto Garcia

On Mon 28 May 2018 10:38:55 AM CEST, Kevin Wolf wrote:
>> > +if (!refcount_cache_size_set) {
>> > +*refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE * 
>> > s->cluster_size;
>> 
>> ...but in the else clause down here, we don't have the cast, and
>> Coverity complains that we evaluate a 32-bit result and then put it
>> in a 64-bit variable. Should this have the (uint64_t) cast as well ?

I suppose that's not because we put a 32-bit result in a 64-bit
variable, but because it could theoretically overflow (if
s->cluster_size > INT32_MAX / 4).

> It's a false positive, MIN_REFCOUNT_CACHE_SIZE is 4 and
> s->cluster_size is at most 2 MB, so this will never overflow.
>
> I guess we can change the code anyway to silence it?

Same opinion here, no problem with adding a cast to silence the warning.

Berto

[Qemu-devel] [PATCH] ARM: ACPI: Fix use-after-free due to memory realloc

2018-05-28 Thread Shannon Zhao

acpi_data_push uses g_array_set_size to resize the memory size. If there
is no enough contiguous memory, the address will be changed. So previous
pointer could not be used any more. It must update the pointer and use
the new one.

Signed-off-by: Shannon Zhao 
---
 hw/arm/virt-acpi-build.c | 14 ++
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c
index 92ceee9..30584ee 100644
--- a/hw/arm/virt-acpi-build.c
+++ b/hw/arm/virt-acpi-build.c
@@ -400,7 +400,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 AcpiIortItsGroup *its;
 AcpiIortTable *iort;
 AcpiIortSmmu3 *smmu;
-size_t node_size, iort_length, smmu_offset = 0;
+size_t node_size, iort_node_offset, iort_length, smmu_offset = 0;
 AcpiIortRC *rc;
 
 iort = acpi_data_push(table_data, sizeof(*iort));
@@ -414,6 +414,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 iort_length = sizeof(*iort);
 iort->node_count = cpu_to_le32(nb_nodes);
 iort->node_offset = cpu_to_le32(sizeof(*iort));
+iort_node_offset = iort->node_offset;
 
 /* ITS group node */
 node_size =  sizeof(*its) + sizeof(uint32_t);
@@ -429,7 +430,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 int irq =  vms->irqmap[VIRT_SMMU];
 
 /* SMMUv3 node */
-smmu_offset = iort->node_offset + node_size;
+smmu_offset = iort_node_offset + node_size;
 node_size = sizeof(*smmu) + sizeof(*idmap);
 iort_length += node_size;
 smmu = acpi_data_push(table_data, node_size);
@@ -450,7 +451,7 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 idmap->id_count = cpu_to_le32(0x);
 idmap->output_base = 0;
 /* output IORT node is the ITS group node (the first node) */
-idmap->output_reference = cpu_to_le32(iort->node_offset);
+idmap->output_reference = cpu_to_le32(iort_node_offset);
 }
 
 /* Root Complex Node */
@@ -479,9 +480,14 @@ build_iort(GArray *table_data, BIOSLinker *linker, 
VirtMachineState *vms)
 idmap->output_reference = cpu_to_le32(smmu_offset);
 } else {
 /* output IORT node is the ITS group node (the first node) */
-idmap->output_reference = cpu_to_le32(iort->node_offset);
+idmap->output_reference = cpu_to_le32(iort_node_offset);
 }
 
+/*
+ * Update the pointer address in case table_data->data moved during above
+ * acpi_data_push operations.
+ */
+iort = (AcpiIortTable *)(table_data->data + iort_start);
 iort->length = cpu_to_le32(iort_length);
 
 build_header(linker, table_data, (void *)(table_data->data + iort_start),
-- 
2.0.4

Re: [Qemu-devel] [PATCH 00/14] block: Make blockdev-create a job and stable API

2018-05-28 Thread Kevin Wolf

Am 25.05.2018 um 20:13 hat Eric Blake geschrieben:
> On 05/25/2018 11:33 AM, Kevin Wolf wrote:
> > This changes the x-blockdev-create QMP command so that it doesn't block
> > the monitor and the main loop any more, but starts a background job that
> > performs the image creation.
> > 
> > The basic job as implemented here is all that is necessary to make image
> > creation asynchronous and to provide a QMP interface that can be marked
> > stable, but it still lacks a few features that jobs usually provide: The
> > job will ignore pause commands and it doesn't publish progress yet (so
> > both current-progress and total-progress stay at 0). These features can
> > be added later without breaking compatibility.
> 
> Can we at least have total-progress start at 1, and current-progress move
> from 0 to 1 at completion?  Seeing a 0/1 => 1/1 transition is better than a
> divide-by-zero 0/0 ratio throughout the entire job; and libvirt doesn't want
> to add any more special-casing of 0/0 than it already has (where it wants to
> treat that as "job not yet started" rather than the more usual sense that if
> total==current the job is hopefully complete).

Sure, I can do that.

Kevin

Re: [Qemu-devel] [PULL 20/37] qcow2: Give the refcount cache the minimum possible size by default

2018-05-28 Thread Kevin Wolf

Am 25.05.2018 um 19:10 hat Peter Maydell geschrieben:
> On 15 May 2018 at 16:40, Kevin Wolf  wrote:
> > From: Alberto Garcia 
> >
> > The L2 and refcount caches have default sizes that can be overridden
> > using the l2-cache-size and refcount-cache-size (an additional
> > parameter named cache-size sets the combined size of both caches).
> 
> Hi; Coverity raises a nit about this patch (CID 1391229):

CCing Berto as the patch author.

> > diff --git a/block/qcow2.c b/block/qcow2.c
> > index 2f36e632f9..6d532470a8 100644
> > --- a/block/qcow2.c
> > +++ b/block/qcow2.c
> > @@ -802,23 +802,30 @@ static void read_cache_sizes(BlockDriverState *bs, 
> > QemuOpts *opts,
> >  } else if (refcount_cache_size_set) {
> >  *l2_cache_size = combined_cache_size - *refcount_cache_size;
> >  } else {
> > -*refcount_cache_size = combined_cache_size
> > - / (DEFAULT_L2_REFCOUNT_SIZE_RATIO + 1);
> > -*l2_cache_size = combined_cache_size - *refcount_cache_size;
> > +uint64_t virtual_disk_size = bs->total_sectors * 
> > BDRV_SECTOR_SIZE;
> > +uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 
> > 8);
> > +uint64_t min_refcount_cache =
> > +(uint64_t) MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
> 
> Here we have a (uint64_t) cast that ensures we're doing a 64x64 multiply
> rather than a 32x32 one...
> 
> > +
> > +/* Assign as much memory as possible to the L2 cache, and
> > + * use the remainder for the refcount cache */
> > +if (combined_cache_size >= max_l2_cache + min_refcount_cache) {
> > +*l2_cache_size = max_l2_cache;
> > +*refcount_cache_size = combined_cache_size - 
> > *l2_cache_size;
> > +} else {
> > +*refcount_cache_size =
> > +MIN(combined_cache_size, min_refcount_cache);
> > +*l2_cache_size = combined_cache_size - 
> > *refcount_cache_size;
> > +}
> >  }
> >  } else {
> > -if (!l2_cache_size_set && !refcount_cache_size_set) {
> > +if (!l2_cache_size_set) {
> >  *l2_cache_size = MAX(DEFAULT_L2_CACHE_BYTE_SIZE,
> >   (uint64_t)DEFAULT_L2_CACHE_CLUSTERS
> >   * s->cluster_size);
> > -*refcount_cache_size = *l2_cache_size
> > - / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
> > -} else if (!l2_cache_size_set) {
> > -*l2_cache_size = *refcount_cache_size
> > -   * DEFAULT_L2_REFCOUNT_SIZE_RATIO;
> > -} else if (!refcount_cache_size_set) {
> > -*refcount_cache_size = *l2_cache_size
> > - / DEFAULT_L2_REFCOUNT_SIZE_RATIO;
> > +}
> > +if (!refcount_cache_size_set) {
> > +*refcount_cache_size = MIN_REFCOUNT_CACHE_SIZE * 
> > s->cluster_size;
> 
> ...but in the else clause down here, we don't have the cast, and
> Coverity complains that we evaluate a 32-bit result and then
> put it in a 64-bit variable. Should this have the (uint64_t)
> cast as well ?

It's a false positive, MIN_REFCOUNT_CACHE_SIZE is 4 and s->cluster_size
is at most 2 MB, so this will never overflow.

I guess we can change the code anyway to silence it?

Kevin

Re: [Qemu-devel] [PATCH 0/6] Update Linux headers to 4.17-rc6

2018-05-28 Thread Paolo Bonzini

On 25/05/2018 16:00, Michael S. Tsirkin wrote:
> On Fri, May 25, 2018 at 02:27:49PM +0100, Peter Maydell wrote:
>> This series updates our copy of the Linux kernel headers to 4.17-rc6.
>> To do that we have to fix up some issues:
>>  * we had a hand-hacked definition of VIRTIO_GPU_CAPSET_VIRGL2
>>in our old header copy that needs to be moved to a header
>>that isn't auto-updated
>>  * we need to turn __aligned_u64 into a portable type
>>  * KVM_HINTS_DEDICATED was renamed to KVM_HINTS_REALTIME
>>  * the kernel's licensing info is no longer solely in the COPYING file
>>
>> thanks
>> -- PMM
> 
> Reviewed-by: Michael S. Tsirkin 

Queued, thanks.

Paolo

> 
>> Alex Williamson (1):
>>   virtio-gpu-3d: Define VIRTIO_GPU_CAPSET_VIRGL2 elsewhere
>>
>> Peter Maydell (5):
>>   scripts/update-linux-headers: Handle __aligned_u64
>>   scripts/update-linux-headers: Handle kernel license no longer being
>> one file
>>   target/i386/kvm.c: Handle renaming of KVM_HINTS_DEDICATED
>>   Update Linux headers to 4.17-rc6
>>   target/i386/kvm.c: Remove compatibility shim for KVM_HINTS_REALTIME
>>
>>  include/hw/virtio/virtio-gpu.h|   6 +
>>  include/standard-headers/asm-x86/hyperv.h |   1 -
>>  include/standard-headers/asm-x86/kvm_para.h   |   2 +-
>>  include/standard-headers/linux/ethtool.h  |  36 +-
>>  include/standard-headers/linux/input.h|   4 +-
>>  include/standard-headers/linux/pci_regs.h |   7 +-
>>  .../standard-headers/linux/virtio_balloon.h   |  15 +
>>  include/standard-headers/linux/virtio_gpu.h   |   1 -
>>  .../standard-headers/rdma/vmw_pvrdma-abi.h|  49 +--
>>  linux-headers/asm-arm/kvm.h   |  15 +
>>  linux-headers/asm-arm64/kvm.h |   6 +
>>  linux-headers/asm-x86/hyperv.h|   1 -
>>  linux-headers/asm-x86/kvm.h   |  19 +-
>>  linux-headers/linux/kvm.h |  30 +-
>>  linux-headers/linux/vfio.h|  27 ++
>>  target/i386/kvm.c |   2 +-
>>  linux-headers/COPYING | 358 +-
>>  .../LICENSES/exceptions/Linux-syscall-note|  25 ++
>>  linux-headers/LICENSES/preferred/BSD-2-Clause |  32 ++
>>  linux-headers/LICENSES/preferred/BSD-3-Clause |  36 ++
>>  .../{COPYING => LICENSES/preferred/GPL-2.0}   |  27 +-
>>  scripts/update-linux-headers.sh   |  17 +-
>>  22 files changed, 310 insertions(+), 406 deletions(-)
>>  delete mode 100644 include/standard-headers/asm-x86/hyperv.h
>>  delete mode 100644 linux-headers/asm-x86/hyperv.h
>>  create mode 100644 linux-headers/LICENSES/exceptions/Linux-syscall-note
>>  create mode 100644 linux-headers/LICENSES/preferred/BSD-2-Clause
>>  create mode 100644 linux-headers/LICENSES/preferred/BSD-3-Clause
>>  copy linux-headers/{COPYING => LICENSES/preferred/GPL-2.0} (96%)
>>
>> -- 
>> 2.17.0
>

Re: [Qemu-devel] [PATCH v7 1/3] qmp: adding 'wakeup-suspend-support' in query-target

2018-05-28 Thread Markus Armbruster

Eduardo Habkost  writes:

> On Fri, May 25, 2018 at 08:30:59AM +0200, Markus Armbruster wrote:
>> Eduardo Habkost  writes:
>> > On Wed, May 23, 2018 at 05:53:34PM +0200, Markus Armbruster wrote:
> [...]
>> >> >> Worse, a machine type property that is static for all machine types now
>> >> >> could conceivably become dynamic when we add a machine type
>> >> >> configuration knob.
>> >> >> 
>> >> >
>> >> > This isn't the first time a machine capability that seems static
>> >> > actually depends on other configuration arguments.  We will
>> >> > probably need to address this eventually.
>> >> 
>> >> Then the best time to address it is now, provided we can :)
>> >
>> > I'm not sure this is the best time.  If libvirt only needs the
>> > runtime value and don't need any info at query-machines time, I
>> > think support for this on query-machines will be left unused and
>> > they will only use the query-current-machine value.
>> >
>> > Just giving libvirt the runtime data it wants
>> > (query-current-machine) seems way better than requiring libvirt
>> > to interpret a set of rules and independently calculate something
>> > QEMU already knows.
>> 
>> I wouldn't mind adding a query-current-machine to report dynamic machine
>> capabilities if that helps QMP clients.  query-machines could continue
>> to report static machine capabilities then.
>> 
>> However, we do need a plan on how to distribute machine capabilities
>> between query-machines and query-current-machine, in particular how to
>> handle changing staticness.
>
> Handling dynamic data that becomes static is easy: we can keep it
> on both.

The duplication is less than elegant, but backward-compatibility
generally is.

>> wakeup-suspend-support is static for most machine types, but dynamic for
>> some.  Where should it go?
>
> I think it obviously should go on query-current-machine.  Maybe
> it can be added to query-machines in the future if it's deemed
> useful.
>
>> It needs to go into query-current-machine when its dynamic with the
>> current machine.  It may go there just to keep things regular even if
>> its static with the current machine.
>> 
>> Does it go into query-machines, too?  If not, clients lose the ability
>> to examine all machines efficiently.  Even if this isn't an issue for
>> wakeup-suspend-support: are we sure this can't be an issue for any
>> future capabilities?
>
> I don't think this will be an issue for wakup-suspend-support,
> but this is already an issue for existing capabilities.  See
> below[1].
>
>
>> 
>> If it goes into query-machines, what should its value be for the machine
>> types where it's dynamic?  Should it be absent, perhaps, letting clients
>> know they have to consult query-current-machine to find the value?
>> 
>> What if a capability previously thought static becomes dynamic?  We can
>> add it to query-current-machine just fine, but removing it from
>> query-machines would be a compatibility break.  Making it optional,
>> too.  Should all new members of MachineInfo be optional, just in case?
>> 
>
> The above are questions we must ponder if we are considering
> extending query-machines.  We have been avoiding them for a few
> years, by simply not extending query-machines very often and
> letting libvirt hardcode machine-type info.  :(
>
>
>> These are design questions we need to ponder *now*.  Picking a solution
>> that satisfies current needs while ignoring future needs has bitten us
>> in the posterior time and again.  We're not going to successfully
>> predict *all* future needs, but not even trying should be easy to beat.
>> 
>> That's what I meant by "the best time to address it is now".
>> 
>
> I agree.  I just think there are countless other use cases that
> require extending query-machines today.  I'd prefer to use one of
> them as a starting point for this design exercise, instead of
> wakeup-suspend-support.

Analyzing all the use cases we know makes sense.

> [1] Doing a:
>   $ git grep 'STR.*machine, "'
> on libvirt source is enough to find some code demonstrating where
> query-machines is already lacking today:
>
> src/libxl/libxl_capabilities.c:583:if (STREQ(machine, "xenpv"))
> src/libxl/libxl_capabilities.c:729:if (STREQ(domCaps->machine, "xenfv"))
> src/libxl/libxl_driver.c:6379:if (STRNEQ(machine, "xenpv") && 
> STRNEQ(machine, "xenfv")) {
> src/qemu/qemu_capabilities.c:2435:STREQ(def->os.machine, 
> "ppce500"))
> src/qemu/qemu_capabilities.c:2439:STREQ(def->os.machine, "prep"))
> src/qemu/qemu_capabilities.c:2443:STREQ(def->os.machine, 
> "bamboo"))
> src/qemu/qemu_capabilities.c:2446:if (STREQ(def->os.machine, 
> "mpc8544ds"))
> src/qemu/qemu_capabilities.c:5376:STREQ(def->os.machine, "isapc");
> src/qemu/qemu_command.c:3829:if (STRPREFIX(def->os.machine, 
> "s390-virtio") &&
> src/qemu/qemu_domain.c:2798:if (STREQ(def->os.machine, "isapc")) {
>

[Qemu-devel] [PATCH v8 4/5] monitor: fix comment for monitor_lock

2018-05-28 Thread Peter Xu

Fix typo in d622cb5879c.  Meanwhile move these variables close to each
other.  monitor_qapi_event_state can be declared static, add that.

Reported-by: Markus Armbruster 
Signed-off-by: Peter Xu 
---
 monitor.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/monitor.c b/monitor.c
index f23178951e..2504696d76 100644
--- a/monitor.c
+++ b/monitor.c
@@ -267,10 +267,11 @@ typedef struct QMPRequest QMPRequest;
 /* QMP checker flags */
 #define QMP_ACCEPT_UNKNOWNS 1
 
-/* Protects mon_list, monitor_event_state.  */
+/* Protects mon_list, monitor_qapi_event_state.  */
 static QemuMutex monitor_lock;
-
+static GHashTable *monitor_qapi_event_state;
 static QTAILQ_HEAD(mon_list, Monitor) mon_list;
+
 static QLIST_HEAD(mon_fdsets, MonFdset) mon_fdsets;
 static int mon_refcount;
 
@@ -572,8 +573,6 @@ static MonitorQAPIEventConf 
monitor_qapi_event_conf[QAPI_EVENT__MAX] = {
 [QAPI_EVENT_VSERPORT_CHANGE]   = { 1000 * SCALE_MS },
 };
 
-GHashTable *monitor_qapi_event_state;
-
 /*
  * Emits the event to every monitor instance, @event is only used for trace
  * Called with monitor_lock held.
-- 
2.17.0

[Qemu-devel] [PATCH v8 5/5] monitor: add lock to protect mon_fdsets

2018-05-28 Thread Peter Xu

Introduce a new global big lock for mon_fdsets.  Take it where needed.

The monitor_fdset_get_fd() handling is a bit tricky: now we need to call
qemu_mutex_unlock() which might pollute errno, so we need to make sure
the correct errno be passed up to the callers.  To make things simpler,
we let monitor_fdset_get_fd() return the -errno directly when error
happens, then in qemu_open() we move it back into errno.

Signed-off-by: Peter Xu 
---
 monitor.c | 61 ++-
 stubs/fdset.c |  2 +-
 util/osdep.c  |  3 ++-
 3 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/monitor.c b/monitor.c
index 2504696d76..8709d136bd 100644
--- a/monitor.c
+++ b/monitor.c
@@ -272,7 +272,10 @@ static QemuMutex monitor_lock;
 static GHashTable *monitor_qapi_event_state;
 static QTAILQ_HEAD(mon_list, Monitor) mon_list;
 
+/* Protects mon_fdsets */
+static QemuMutex mon_fdsets_lock;
 static QLIST_HEAD(mon_fdsets, MonFdset) mon_fdsets;
+
 static int mon_refcount;
 
 static mon_cmd_t mon_cmds[];
@@ -287,6 +290,16 @@ static QEMUClockType event_clock_type = 
QEMU_CLOCK_REALTIME;
 static void monitor_command_cb(void *opaque, const char *cmdline,
void *readline_opaque);
 
+/*
+ * This lock can be used very early, even during CLI parsing.
+ * Meanwhile it can also be used even at the end of main.  Let's keep
+ * it initialized for the whole lifecycle of QEMU.
+ */
+static void __attribute__((constructor)) mon_fdsets_lock_init(void)
+{
+qemu_mutex_init(_fdsets_lock);
+}
+
 /**
  * Is @mon a QMP monitor?
  */
@@ -2314,9 +2327,11 @@ static void monitor_fdsets_cleanup(void)
 MonFdset *mon_fdset;
 MonFdset *mon_fdset_next;
 
+qemu_mutex_lock(_fdsets_lock);
 QLIST_FOREACH_SAFE(mon_fdset, _fdsets, next, mon_fdset_next) {
 monitor_fdset_cleanup(mon_fdset);
 }
+qemu_mutex_unlock(_fdsets_lock);
 }
 
 AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque,
@@ -2351,6 +2366,7 @@ void qmp_remove_fd(int64_t fdset_id, bool has_fd, int64_t 
fd, Error **errp)
 MonFdsetFd *mon_fdset_fd;
 char fd_str[60];
 
+qemu_mutex_lock(_fdsets_lock);
 QLIST_FOREACH(mon_fdset, _fdsets, next) {
 if (mon_fdset->id != fdset_id) {
 continue;
@@ -2370,10 +2386,12 @@ void qmp_remove_fd(int64_t fdset_id, bool has_fd, 
int64_t fd, Error **errp)
 goto error;
 }
 monitor_fdset_cleanup(mon_fdset);
+qemu_mutex_unlock(_fdsets_lock);
 return;
 }
 
 error:
+qemu_mutex_unlock(_fdsets_lock);
 if (has_fd) {
 snprintf(fd_str, sizeof(fd_str), "fdset-id:%" PRId64 ", fd:%" PRId64,
  fdset_id, fd);
@@ -2389,6 +2407,7 @@ FdsetInfoList *qmp_query_fdsets(Error **errp)
 MonFdsetFd *mon_fdset_fd;
 FdsetInfoList *fdset_list = NULL;
 
+qemu_mutex_lock(_fdsets_lock);
 QLIST_FOREACH(mon_fdset, _fdsets, next) {
 FdsetInfoList *fdset_info = g_malloc0(sizeof(*fdset_info));
 FdsetFdInfoList *fdsetfd_list = NULL;
@@ -2418,6 +2437,7 @@ FdsetInfoList *qmp_query_fdsets(Error **errp)
 fdset_info->next = fdset_list;
 fdset_list = fdset_info;
 }
+qemu_mutex_unlock(_fdsets_lock);
 
 return fdset_list;
 }
@@ -2430,6 +2450,7 @@ AddfdInfo *monitor_fdset_add_fd(int fd, bool 
has_fdset_id, int64_t fdset_id,
 MonFdsetFd *mon_fdset_fd;
 AddfdInfo *fdinfo;
 
+qemu_mutex_lock(_fdsets_lock);
 if (has_fdset_id) {
 QLIST_FOREACH(mon_fdset, _fdsets, next) {
 /* Break if match found or match impossible due to ordering by ID 
*/
@@ -2450,6 +2471,7 @@ AddfdInfo *monitor_fdset_add_fd(int fd, bool 
has_fdset_id, int64_t fdset_id,
 if (fdset_id < 0) {
 error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "fdset-id",
"a non-negative value");
+qemu_mutex_unlock(_fdsets_lock);
 return NULL;
 }
 /* Use specified fdset ID */
@@ -2500,16 +2522,21 @@ AddfdInfo *monitor_fdset_add_fd(int fd, bool 
has_fdset_id, int64_t fdset_id,
 fdinfo->fdset_id = mon_fdset->id;
 fdinfo->fd = mon_fdset_fd->fd;
 
+qemu_mutex_unlock(_fdsets_lock);
 return fdinfo;
 }
 
 int monitor_fdset_get_fd(int64_t fdset_id, int flags)
 {
-#ifndef _WIN32
+#ifdef _WIN32
+return -ENOENT;
+#else
 MonFdset *mon_fdset;
 MonFdsetFd *mon_fdset_fd;
 int mon_fd_flags;
+int ret;
 
+qemu_mutex_lock(_fdsets_lock);
 QLIST_FOREACH(mon_fdset, _fdsets, next) {
 if (mon_fdset->id != fdset_id) {
 continue;
@@ -2517,20 +2544,24 @@ int monitor_fdset_get_fd(int64_t fdset_id, int flags)
 QLIST_FOREACH(mon_fdset_fd, _fdset->fds, next) {
 mon_fd_flags = fcntl(mon_fdset_fd->fd, F_GETFL);
 if (mon_fd_flags == -1) {
-return -1;
+ret = -errno;
+goto out;
 }

[Qemu-devel] [PATCH v8 0/5] monitor: let Monitor be thread safe

2018-05-28 Thread Peter Xu

v8:
- some wording changes according to previous comments [Markus]
- return -ENOENT too in stubs/fdset.c:monitor_fdset_get_fd() [Stefan]
- refactor the fdset functions a bit, drop "ret" where proper [Markus]
- one more patch to fix monitor_lock comment [Markus]
- regular rebase and torturing

v7:
- let monitor_fdset_get_fd() return -errno, touch up qemu_open() to
  translate that into errno.  [Markus]
- touch up comment for Monitor.rs
- rebase, and torture the code a bit with qtest.

v6:
- add/drop some r-bs
- address all the comments from Markus
- rebase, and run some simple qtests to make sure nothing breaks

v5:
- collect r-bs and rebase
- move two close()s outside critical section [Dave]
- move comment to end of line [Stefan]

v4:
- fix a s/cur_mon/mon/ typo

v3:
- add comment for fields that are protected by monitor lock [Stefan]
- drop most of patch 2, only keep the protections for mon->fds [Stefan]
- add one trivial patch to add some more comments for either readline
  and cpu_get/cpu_set [Stefan]
- add protection for monitor_fdset_get_fd() [Stefan]

v2:
- cc correct people... sorry.

Stefan reported this problem that in the future we might start to have
more threads operating on the same Monitor object.  This seris try to
add fundamental support for it.

Please review.  Thanks,

Peter Xu (5):
  monitor: rename out_lock to mon_lock
  monitor: protect mon->fds with mon_lock
  monitor: more comments on lock-free elements
  monitor: fix comment for monitor_lock
  monitor: add lock to protect mon_fdsets

 monitor.c | 152 --
 stubs/fdset.c |   2 +-
 util/osdep.c  |   3 +-
 3 files changed, 113 insertions(+), 44 deletions(-)

-- 
2.17.0

[Qemu-devel] [PATCH v4 18/19] replay: describe reverse debugging in docs/replay.txt

2018-05-28 Thread Pavel Dovgalyuk

This patch updates the documentation and describes usage of the reverse
debugging in QEMU+GDB.

Signed-off-by: Pavel Dovgalyuk 
---
 docs/replay.txt |   33 +
 1 file changed, 33 insertions(+)

diff --git a/docs/replay.txt b/docs/replay.txt
index f7def53..086d3f8 100644
--- a/docs/replay.txt
+++ b/docs/replay.txt
@@ -293,6 +293,39 @@ for recording and replaying must contain identical number 
of ports in record
 and replay modes, but their backends may differ.
 E.g., '-serial stdio' in record mode, and '-serial null' in replay mode.
 
+Reverse debugging
+-
+
+Reverse debugging allows "executing" the program in reverse direction.
+GDB remote protocol supports "reverse step" and "reverse continue"
+commands. The first one steps single instruction backwards in time,
+and the second one finds the last breakpoint in the past.
+
+Recorded executions may be used to enable reverse debugging. QEMU can't
+execute the code in backwards direction, but can load a snapshot and
+replay forward to find the desired position or breakpoint.
+
+The following GDB commands are supported:
+ - reverse-stepi (or rsi) - step one instruction backwards
+ - reverse-continue (or rc) - find last breakpoint in the past
+
+Reverse step loads the nearest snapshot and replays the execution until
+the required instruction is met.
+
+Reverse continue may include several passes of examining the execution
+between the snapshots. Each of the passes include the following steps:
+ 1. loading the snapshot
+ 2. replaying to examine the breakpoints
+ 3. if breakpoint or watchpoint was met
+- loading the snaphot again
+- replaying to the required breakpoint
+ 4. else
+- proceeding to the p.1 with the earlier snapshot
+
+Therefore usage of the reverse debugging requires at least one snapshot
+created in advance. See the "Snapshotting" section to learn about running
+record/replay and creating the snapshot in these modes.
+
 Replay log format
 -

[Qemu-devel] [PATCH v8 1/5] monitor: rename out_lock to mon_lock

2018-05-28 Thread Peter Xu

The out_lock is protecting a few Monitor fields.  In the future the
monitor code will start to run in multiple threads.  We are going to
turn it into a bigger lock to protect not only the out buffer but also
most of the rest.

Since at it, rearrange the Monitor struct a bit.

Reviewed-by: Stefan Hajnoczi 
Signed-off-by: Peter Xu 
---
 monitor.c | 53 +
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/monitor.c b/monitor.c
index 46814af533..14c681dc8a 100644
--- a/monitor.c
+++ b/monitor.c
@@ -207,15 +207,6 @@ struct Monitor {
 int suspend_cnt;/* Needs to be accessed atomically */
 bool skip_flush;
 bool use_io_thr;
-
-/* We can't access guest memory when holding the lock */
-QemuMutex out_lock;
-QString *outbuf;
-guint out_watch;
-
-/* Read under either BQL or out_lock, written with BQL+out_lock.  */
-int mux_out;
-
 ReadLineState *rs;
 MonitorQMP qmp;
 gchar *mon_cpu_path;
@@ -224,6 +215,20 @@ struct Monitor {
 mon_cmd_t *cmd_table;
 QLIST_HEAD(,mon_fd_t) fds;
 QTAILQ_ENTRY(Monitor) entry;
+
+/*
+ * The per-monitor lock. We can't access guest memory when holding
+ * the lock.
+ */
+QemuMutex mon_lock;
+
+/*
+ * Fields that are protected by the per-monitor lock.
+ */
+QString *outbuf;
+guint out_watch;
+/* Read under either BQL or mon_lock, written with BQL+mon_lock.  */
+int mux_out;
 };
 
 /* Let's add monitor global variables to this struct. */
@@ -366,14 +371,14 @@ static gboolean monitor_unblocked(GIOChannel *chan, 
GIOCondition cond,
 {
 Monitor *mon = opaque;
 
-qemu_mutex_lock(>out_lock);
+qemu_mutex_lock(>mon_lock);
 mon->out_watch = 0;
 monitor_flush_locked(mon);
-qemu_mutex_unlock(>out_lock);
+qemu_mutex_unlock(>mon_lock);
 return FALSE;
 }
 
-/* Called with mon->out_lock held.  */
+/* Called with mon->mon_lock held.  */
 static void monitor_flush_locked(Monitor *mon)
 {
 int rc;
@@ -411,9 +416,9 @@ static void monitor_flush_locked(Monitor *mon)
 
 void monitor_flush(Monitor *mon)
 {
-qemu_mutex_lock(>out_lock);
+qemu_mutex_lock(>mon_lock);
 monitor_flush_locked(mon);
-qemu_mutex_unlock(>out_lock);
+qemu_mutex_unlock(>mon_lock);
 }
 
 /* flush at every end of line */
@@ -421,7 +426,7 @@ static void monitor_puts(Monitor *mon, const char *str)
 {
 char c;
 
-qemu_mutex_lock(>out_lock);
+qemu_mutex_lock(>mon_lock);
 for(;;) {
 c = *str++;
 if (c == '\0')
@@ -434,7 +439,7 @@ static void monitor_puts(Monitor *mon, const char *str)
 monitor_flush_locked(mon);
 }
 }
-qemu_mutex_unlock(>out_lock);
+qemu_mutex_unlock(>mon_lock);
 }
 
 void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
@@ -725,7 +730,7 @@ static void monitor_data_init(Monitor *mon, bool skip_flush,
   bool use_io_thr)
 {
 memset(mon, 0, sizeof(Monitor));
-qemu_mutex_init(>out_lock);
+qemu_mutex_init(>mon_lock);
 qemu_mutex_init(>qmp.qmp_queue_lock);
 mon->outbuf = qstring_new();
 /* Use *mon_cmds by default. */
@@ -745,7 +750,7 @@ static void monitor_data_destroy(Monitor *mon)
 }
 readline_free(mon->rs);
 qobject_unref(mon->outbuf);
-qemu_mutex_destroy(>out_lock);
+qemu_mutex_destroy(>mon_lock);
 qemu_mutex_destroy(>qmp.qmp_queue_lock);
 monitor_qmp_cleanup_req_queue_locked(mon);
 monitor_qmp_cleanup_resp_queue_locked(mon);
@@ -777,13 +782,13 @@ char *qmp_human_monitor_command(const char *command_line, 
bool has_cpu_index,
 handle_hmp_command(, command_line);
 cur_mon = old_mon;
 
-qemu_mutex_lock(_lock);
+qemu_mutex_lock(_lock);
 if (qstring_get_length(hmp.outbuf) > 0) {
 output = g_strdup(qstring_get_str(hmp.outbuf));
 } else {
 output = g_strdup("");
 }
-qemu_mutex_unlock(_lock);
+qemu_mutex_unlock(_lock);
 
 out:
 monitor_data_destroy();
@@ -4377,9 +4382,9 @@ static void monitor_event(void *opaque, int event)
 
 switch (event) {
 case CHR_EVENT_MUX_IN:
-qemu_mutex_lock(>out_lock);
+qemu_mutex_lock(>mon_lock);
 mon->mux_out = 0;
-qemu_mutex_unlock(>out_lock);
+qemu_mutex_unlock(>mon_lock);
 if (mon->reset_seen) {
 readline_restart(mon->rs);
 monitor_resume(mon);
@@ -4399,9 +4404,9 @@ static void monitor_event(void *opaque, int event)
 } else {
 atomic_inc(>suspend_cnt);
 }
-qemu_mutex_lock(>out_lock);
+qemu_mutex_lock(>mon_lock);
 mon->mux_out = 1;
-qemu_mutex_unlock(>out_lock);
+qemu_mutex_unlock(>mon_lock);
 break;
 
 case CHR_EVENT_OPENED:
-- 
2.17.0

[Qemu-devel] [PATCH v4 17/19] gdbstub: add reverse continue support in replay mode

2018-05-28 Thread Pavel Dovgalyuk

This patch adds support of the reverse continue operation for gdbstub.
Reverse continue finds the last breakpoint that would happen in normal
execution from the beginning to the current moment.
Implementation of the reverse continue replays the execution twice:
to find the breakpoints that were hit and to seek to the last breakpoint.
Reverse continue loads the previous snapshot and tries to find the breakpoint
since that moment. If there are no such breakpoints, it proceeds to
the earlier snapshot, and so on. When no breakpoints or watchpoints were
hit at all, execution stops at the beginning of the replay log.

Signed-off-by: Pavel Dovgalyuk 
---
 cpus.c|3 ++
 exec.c|1 +
 gdbstub.c |   10 ++-
 include/sysemu/replay.h   |6 
 replay/replay-debugging.c |   69 +
 stubs/replay.c|5 +++
 6 files changed, 93 insertions(+), 1 deletion(-)

diff --git a/cpus.c b/cpus.c
index 01016af..21bd26f 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1048,6 +1048,9 @@ static void cpu_handle_guest_debug(CPUState *cpu)
 cpu->stopped = true;
 } else {
 if (!cpu->singlestep_enabled) {
+/* Report about the breakpoint and
+   make a single step to skip it */
+replay_breakpoint();
 cpu_single_step(cpu, SSTEP_ENABLE);
 } else {
 cpu_single_step(cpu, 0);
diff --git a/exec.c b/exec.c
index fd037f8..947207c 100644
--- a/exec.c
+++ b/exec.c
@@ -2584,6 +2584,7 @@ static void check_watchpoint(int offset, int len, 
MemTxAttrs attrs, int flags)
 if (replay_running_debug()) {
 /* Don't process the watchpoints when we are
in a reverse debugging operation. */
+replay_breakpoint();
 return;
 }
 if (flags == BP_MEM_READ) {
diff --git a/gdbstub.c b/gdbstub.c
index 12aac79..7771fc1 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -,6 +,14 @@ static int gdb_handle_packet(GDBState *s, const char 
*line_buf)
 put_packet(s, "E14");
 break;
 }
+case 'c':
+if (replay_reverse_continue()) {
+gdb_continue(s);
+return RS_IDLE;
+} else {
+put_packet(s, "E14");
+break;
+}
 default:
 goto unknown_command;
 }
@@ -1381,7 +1389,7 @@ static int gdb_handle_packet(GDBState *s, const char 
*line_buf)
 pstrcat(buf, sizeof(buf), ";qXfer:features:read+");
 }
 if (replay_mode == REPLAY_MODE_PLAY) {
-pstrcat(buf, sizeof(buf), ";ReverseStep+");
+pstrcat(buf, sizeof(buf), ";ReverseStep+;ReverseContinue+");
 }
 put_packet(s, buf);
 break;
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 611eabb..a3113c1 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -78,9 +78,15 @@ void replay_break(int64_t step, QEMUTimerCB callback, void 
*opaque);
 Used by gdbstub for backwards debugging.
 Returns true on success. */
 bool replay_reverse_step(void);
+/*! Start searching the last breakpoint/watchpoint.
+Used by gdbstub for backwards debugging.
+Returns true if the process successfully started. */
+bool replay_reverse_continue(void);
 /*! Returns true if replay module is processing
 reverse_continue or reverse_step request */
 bool replay_running_debug(void);
+/*! Called in reverse debugging mode to collect breakpoint information */
+void replay_breakpoint(void);
 
 /* Processing the instructions */
 
diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c
index 388cf12..edab98e 100644
--- a/replay/replay-debugging.c
+++ b/replay/replay-debugging.c
@@ -22,6 +22,8 @@
 #include "migration/snapshot.h"
 
 static bool replay_is_debugging;
+static int64_t replay_last_breakpoint;
+static int64_t replay_last_snapshot;
 
 bool replay_running_debug(void)
 {
@@ -216,3 +218,70 @@ bool replay_reverse_step(void)
 
 return false;
 }
+
+static void replay_continue_end(void)
+{
+replay_is_debugging = false;
+vm_stop(RUN_STATE_DEBUG);
+replay_break(-1LL, NULL, NULL);
+}
+
+static void replay_continue_stop(void *opaque)
+{
+Error *err = NULL;
+if (replay_last_breakpoint != -1LL) {
+replay_seek(replay_last_breakpoint, , replay_stop_vm_debug);
+if (err) {
+error_free(err);
+replay_continue_end();
+}
+return;
+}
+/* No breakpoints since the last snapshot.
+   Find previous snapshot and try again. */
+if (replay_last_snapshot != 0) {
+replay_seek(replay_last_snapshot - 1, , replay_continue_stop);
+if (err) {
+error_free(err);
+

[Qemu-devel] [PATCH v4 19/19] replay: allow loading any snapshots before recording

2018-05-28 Thread Pavel Dovgalyuk

This patch enables using -loadvm in recording mode to allow starting
the execution recording from any of the available snapshots.
It also fixes loading of the record/replay state, therefore snapshots
created in replay mode may also be used for starting the new recording.

Signed-off-by: Pavel Dovgalyuk 
---
 replay/replay-snapshot.c |   17 -
 vl.c |7 ---
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/replay/replay-snapshot.c b/replay/replay-snapshot.c
index 2ab85cf..16bacc9 100644
--- a/replay/replay-snapshot.c
+++ b/replay/replay-snapshot.c
@@ -33,11 +33,18 @@ static int replay_pre_save(void *opaque)
 static int replay_post_load(void *opaque, int version_id)
 {
 ReplayState *state = opaque;
-fseek(replay_file, state->file_offset, SEEK_SET);
-qemu_clock_set_last(QEMU_CLOCK_HOST, state->host_clock_last);
-/* If this was a vmstate, saved in recording mode,
-   we need to initialize replay data fields. */
-replay_fetch_data_kind();
+if (replay_mode == REPLAY_MODE_PLAY) {
+fseek(replay_file, state->file_offset, SEEK_SET);
+qemu_clock_set_last(QEMU_CLOCK_HOST, state->host_clock_last);
+/* If this was a vmstate, saved in recording mode,
+   we need to initialize replay data fields. */
+replay_fetch_data_kind();
+} else if (replay_mode == REPLAY_MODE_RECORD) {
+/* This is only useful for loading the initial state.
+   Therefore reset all the counters. */
+state->instructions_count = 0;
+state->block_request_id = 0;
+}
 
 return 0;
 }
diff --git a/vl.c b/vl.c
index 2aeaefb..04ef9ce 100644
--- a/vl.c
+++ b/vl.c
@@ -4651,15 +4651,16 @@ int main(int argc, char **argv, char **envp)
 replay_checkpoint(CHECKPOINT_RESET);
 qemu_system_reset(SHUTDOWN_CAUSE_NONE);
 register_global_state();
-if (replay_mode != REPLAY_MODE_NONE) {
-replay_vmstate_init();
-} else if (loadvm) {
+if (loadvm) {
 Error *local_err = NULL;
 if (load_snapshot(loadvm, _err) < 0) {
 error_report_err(local_err);
 autostart = 0;
 }
 }
+if (replay_mode != REPLAY_MODE_NONE) {
+replay_vmstate_init();
+}
 
 qdev_prop_check_globals();
 if (vmstate_dump_file) {

[Qemu-devel] [Bug 1773743] [NEW] qemu-user -g xxx -E LD_PROFILE=xxx segfault

2018-05-28 Thread mou

Public bug reported:

Here is two simple steps to reproduce the bug:

$ qemu-x86_64 -E LD_PROFILE=libc.so.6 -E LD_PROFILE_OUTPUT=. -g 12345 -L
/ /bin/ls

(libc.so and /bin/ls might change on your system, in this case we just
need a binary with a profilable needed library)

In a other window launch:

$ gdb
(gdb) target remote :12345
(gdb) c

At this point qemu will segfault.

It seems this problem is appends when sigprof passed to gdb.
One way I have found to bypass this:
patch gdbstub.c gdb_handlesig and ignore sig if
sig == TARGET_SIGPROF
(which means now I can't catch sigprof on gdb anymore)

** Affects: qemu
 Importance: Undecided
 Status: New

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1773743

Title:
  qemu-user -g xxx -E LD_PROFILE=xxx segfault

Status in QEMU:
  New

Bug description:
  Here is two simple steps to reproduce the bug:

  $ qemu-x86_64 -E LD_PROFILE=libc.so.6 -E LD_PROFILE_OUTPUT=. -g 12345
  -L / /bin/ls

  (libc.so and /bin/ls might change on your system, in this case we just
  need a binary with a profilable needed library)

  In a other window launch:

  $ gdb
  (gdb) target remote :12345
  (gdb) c

  At this point qemu will segfault.

  It seems this problem is appends when sigprof passed to gdb.
  One way I have found to bypass this:
  patch gdbstub.c gdb_handlesig and ignore sig if
  sig == TARGET_SIGPROF
  (which means now I can't catch sigprof on gdb anymore)

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1773743/+subscriptions

Re: [Qemu-devel] [Qemu-ppc] [PATCH 1/4] spapr: remove irq_hint parameter from spapr_irq_alloc()

2018-05-28 Thread Thomas Huth

On 28.05.2018 09:06, Cédric Le Goater wrote:
> On 05/28/2018 08:17 AM, Thomas Huth wrote:
>> On 25.05.2018 16:02, Greg Kurz wrote:
>>> On Fri, 18 May 2018 18:44:02 +0200
>>> Cédric Le Goater  wrote:
>>>
 This IRQ number hint can possibly be used by the VIO devices if the
 "irq" property is defined on the command line but it seems it is never
 the case. It is not used in libvirt for instance. So, let's remove it
 to simplify future changes.

>>>
>>> Setting an irq manually looks a bit anachronistic. I doubt anyone would
>>> do that nowadays, and the patch does a nice cleanup. So this looks like
>>> a good idea.
>> [...]
 diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
 index 472dd6f33a96..cc064f64fccf 100644
 --- a/hw/ppc/spapr_vio.c
 +++ b/hw/ppc/spapr_vio.c
 @@ -455,7 +455,7 @@ static void spapr_vio_busdev_realize(DeviceState 
 *qdev, Error **errp)
  dev->qdev.id = id;
  }
  
 -dev->irq = spapr_irq_alloc(spapr, dev->irq, false, _err);
 +dev->irq = spapr_irq_alloc(spapr, false, _err);
>>>
>>> Silently breaking "irq" like this looks wrong. I'd rather officially remove
>>> it first (ie, kill spapr_vio_props, -5 lines in spapr_vio.c).
>>>
>>> Of course, this raises the question of interface deprecation, and it should
>>> theoretically follow the process described at:
>>>
>>> https://wiki.qemu.org/Features/LegacyRemoval#Rules_for_removing_an_interface
>>>
>>> Cc'ing Thomas, our Chief Deprecation Officer, for insights :)
>>
>> The property is a public interface. Just because it's not used by
>> libvirt does not mean that nobody is using it. So yes, please follow the
>> rules and mark it as deprecated first for two release, before you really
>> remove it.
> 
> This "irq" property is a problem to introduce a new static layout of IRQ 
> numbers. It is in complete opposition. 
> 
> Can we keep it as it is for old pseries machine (settable) and ignore it 
> for newer ? Would that be fine ?

I think that would be fine, too. You likely need to keep the settable
IRQs around for the old machines anyway, to make sure that migration of
the old machine types still works, right?

 Thomas

[Qemu-devel] [PATCH v4 15/19] replay: flush rr queue before loading the vmstate

2018-05-28 Thread Pavel Dovgalyuk

Non-empty record/replay queue prevents saving and loading the VM state,
because it includes pending bottom halves and block coroutines.
But when the new VM state is loaded, we don't have to preserve the consistency
of the current state anymore. Therefore this patch just flushes the queue
allowing the coroutines to finish.

Signed-off-by: Pavel Dovgalyuk 
---
 include/sysemu/replay.h  |2 ++
 migration/savevm.c   |   10 --
 replay/replay-internal.h |2 --
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 98d709c..84a1ec5 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -132,6 +132,8 @@ void replay_disable_events(void);
 void replay_enable_events(void);
 /*! Returns true when saving events is enabled */
 bool replay_events_enabled(void);
+/*! Flushes events queue */
+void replay_flush_events(void);
 /*! Adds bottom half event to the queue */
 void replay_bh_schedule_event(QEMUBH *bh);
 /*! Adds input event to the queue */
diff --git a/migration/savevm.c b/migration/savevm.c
index 5c4747d..b0d6272 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -2610,12 +2610,6 @@ int load_snapshot(const char *name, Error **errp)
 AioContext *aio_context;
 MigrationIncomingState *mis = migration_incoming_get_current();
 
-if (!replay_can_snapshot()) {
-error_report("Record/replay does not allow loading snapshot "
- "right now. Try once more later.");
-return -EINVAL;
-}
-
 if (!bdrv_all_can_snapshot()) {
 error_setg(errp,
"Device '%s' is writable but does not support snapshots",
@@ -2649,6 +2643,10 @@ int load_snapshot(const char *name, Error **errp)
 return -EINVAL;
 }
 
+/* Flush the record/replay queue. Now the VM state is going
+   to change. Therefore we don't need to preserve its consistency */
+replay_flush_events();
+
 /* Flush all IO requests so they don't interfere with the new state.  */
 bdrv_drain_all_begin();
 
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index a2221e5..08ef2ec 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -146,8 +146,6 @@ void replay_read_next_clock(unsigned int kind);
 void replay_init_events(void);
 /*! Clears internal data structures for events handling */
 void replay_finish_events(void);
-/*! Flushes events queue */
-void replay_flush_events(void);
 /*! Returns true if there are any unsaved events in the queue */
 bool replay_has_events(void);
 /*! Saves events from queue into the file */

[Qemu-devel] [PATCH v4 14/19] translator: fix breakpoint processing

2018-05-28 Thread Pavel Dovgalyuk

QEMU cannot pass through the breakpoints when 'si' command is used
in remote gdb. This patch disables inserting the breakpoints
when we are already single stepping though the gdb remote protocol.
This patch also fixes icount calculation for the blocks that include
breakpoints - instruction with breakpoint is not executed and shouldn't
be used in icount calculation.

Signed-off-by: Pavel Dovgalyuk 
---
 accel/tcg/translator.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 0f9dca9..afd0a49 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -34,6 +34,8 @@ void translator_loop_temp_check(DisasContextBase *db)
 void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
  CPUState *cpu, TranslationBlock *tb)
 {
+int bp_insn = 0;
+
 /* Initialize DisasContext */
 db->tb = tb;
 db->pc_first = tb->pc;
@@ -71,11 +73,13 @@ void translator_loop(const TranslatorOps *ops, 
DisasContextBase *db,
 tcg_debug_assert(db->is_jmp == DISAS_NEXT);  /* no early exit */
 
 /* Pass breakpoint hits to target for further processing */
-if (unlikely(!QTAILQ_EMPTY(>breakpoints))) {
+if (!db->singlestep_enabled
+&& unlikely(!QTAILQ_EMPTY(>breakpoints))) {
 CPUBreakpoint *bp;
 QTAILQ_FOREACH(bp, >breakpoints, entry) {
 if (bp->pc == db->pc_next) {
 if (ops->breakpoint_check(db, cpu, bp)) {
+bp_insn = 1;
 break;
 }
 }
@@ -118,7 +122,7 @@ void translator_loop(const TranslatorOps *ops, 
DisasContextBase *db,
 
 /* Emit code to exit the TB, as indicated by db->is_jmp.  */
 ops->tb_stop(db, cpu);
-gen_tb_end(db->tb, db->num_insns);
+gen_tb_end(db->tb, db->num_insns - bp_insn);
 
 /* The disas_log hook may use these values rather than recompute.  */
 db->tb->size = db->pc_next - db->pc_first;

[Qemu-devel] [PATCH v8 3/5] monitor: more comments on lock-free elements

2018-05-28 Thread Peter Xu

Add some explicit comments for both Readline and cpu_set/cpu_get helpers
that they do not need the mon_lock protection.

Signed-off-by: Peter Xu 
---
 monitor.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/monitor.c b/monitor.c
index d6c3c08932..f23178951e 100644
--- a/monitor.c
+++ b/monitor.c
@@ -207,7 +207,15 @@ struct Monitor {
 int suspend_cnt;/* Needs to be accessed atomically */
 bool skip_flush;
 bool use_io_thr;
+
+/*
+ * State used only in the thread "owning" the monitor.
+ * If @use_io_thr, this is mon_global.mon_iothread.
+ * Else, it's the main thread.
+ * These members can be safely accessed without locks.
+ */
 ReadLineState *rs;
+
 MonitorQMP qmp;
 gchar *mon_cpu_path;
 BlockCompletionFunc *password_completion_cb;
@@ -1313,7 +1321,7 @@ void qmp_qmp_capabilities(bool has_enable, 
QMPCapabilityList *enable,
 cur_mon->qmp.commands = _commands;
 }
 
-/* set the current CPU defined by the user */
+/* Set the current CPU defined by the user. Callers must hold BQL. */
 int monitor_set_cpu(int cpu_index)
 {
 CPUState *cpu;
@@ -1327,6 +1335,7 @@ int monitor_set_cpu(int cpu_index)
 return 0;
 }
 
+/* Callers must hold BQL. */
 static CPUState *mon_get_cpu_sync(bool synchronize)
 {
 CPUState *cpu;
-- 
2.17.0

[Qemu-devel] [PATCH v4 12/19] timer: remove replay clock probe in deadline calculation

2018-05-28 Thread Pavel Dovgalyuk

Ciro Santilli reported that commit a5ed352596a8b7eb2f9acce34371b944ac3056c4
breaks the execution replay. It happens due to the probing the clock
for the new instances of iothread.
However, this probing was made in replay mode for the timer lists that
are empty.
This patch removes clock probing in replay mode.
It is an artifact of the old version with another thread model.

Signed-off-by: Pavel Dovgalyuk 
---
 util/qemu-timer.c |   11 ++-
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/util/qemu-timer.c b/util/qemu-timer.c
index 2ed1bf2..86bfe84 100644
--- a/util/qemu-timer.c
+++ b/util/qemu-timer.c
@@ -578,17 +578,10 @@ int64_t timerlistgroup_deadline_ns(QEMUTimerListGroup 
*tlg)
 {
 int64_t deadline = -1;
 QEMUClockType type;
-bool play = replay_mode == REPLAY_MODE_PLAY;
 for (type = 0; type < QEMU_CLOCK_MAX; type++) {
 if (qemu_clock_use_for_deadline(type)) {
-if (!play || type == QEMU_CLOCK_REALTIME) {
-deadline = qemu_soonest_timeout(deadline,
-
timerlist_deadline_ns(tlg->tl[type]));
-} else {
-/* Read clock from the replay file and
-   do not calculate the deadline, based on virtual clock. */
-qemu_clock_get_ns(type);
-}
+deadline = qemu_soonest_timeout(deadline,
+
timerlist_deadline_ns(tlg->tl[type]));
 }
 }
 return deadline;

[Qemu-devel] [PATCH v4 13/19] replay: refine replay-time module

2018-05-28 Thread Pavel Dovgalyuk

This patch removes refactoring artifacts from the replay/replay-time.c

Signed-off-by: Pavel Dovgalyuk 
---
 replay/replay-time.c |   27 ++-
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/replay/replay-time.c b/replay/replay-time.c
index 6a7565e..40030b8 100644
--- a/replay/replay-time.c
+++ b/replay/replay-time.c
@@ -17,14 +17,12 @@
 
 int64_t replay_save_clock(ReplayClockKind kind, int64_t clock)
 {
+g_assert(replay_file);
+g_assert(replay_mutex_locked());
 
-if (replay_file) {
-g_assert(replay_mutex_locked());
-
-replay_save_instructions();
-replay_put_event(EVENT_CLOCK + kind);
-replay_put_qword(clock);
-}
+replay_save_instructions();
+replay_put_event(EVENT_CLOCK + kind);
+replay_put_qword(clock);
 
 return clock;
 }
@@ -46,20 +44,15 @@ void replay_read_next_clock(ReplayClockKind kind)
 /*! Reads next clock event from the input. */
 int64_t replay_read_clock(ReplayClockKind kind)
 {
+int64_t ret;
 g_assert(replay_file && replay_mutex_locked());
 
 replay_account_executed_instructions();
 
-if (replay_file) {
-int64_t ret;
-if (replay_next_event_is(EVENT_CLOCK + kind)) {
-replay_read_next_clock(kind);
-}
-ret = replay_state.cached_clock[kind];
-
-return ret;
+if (replay_next_event_is(EVENT_CLOCK + kind)) {
+replay_read_next_clock(kind);
 }
+ret = replay_state.cached_clock[kind];
 
-error_report("REPLAY INTERNAL ERROR %d", __LINE__);
-exit(1);
+return ret;
 }

[Qemu-devel] [PATCH v4 11/19] replay: flush events when exiting

2018-05-28 Thread Pavel Dovgalyuk

This patch adds events processing when emulation finishes instead
of just cleaning the queue. Now the bdrv coroutines will be in consistent
state when emulator closes. It allows correct polling of the block layer
at exit.

Signed-off-by: Pavel Dovgalyuk 
---
 replay/replay-events.c   |   14 +-
 replay/replay-internal.h |2 --
 2 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/replay/replay-events.c b/replay/replay-events.c
index 707de38..0964a82 100644
--- a/replay/replay-events.c
+++ b/replay/replay-events.c
@@ -94,18 +94,6 @@ void replay_disable_events(void)
 }
 }
 
-void replay_clear_events(void)
-{
-g_assert(replay_mutex_locked());
-
-while (!QTAILQ_EMPTY(_list)) {
-Event *event = QTAILQ_FIRST(_list);
-QTAILQ_REMOVE(_list, event, events);
-
-g_free(event);
-}
-}
-
 /*! Adds specified async event to the queue */
 void replay_add_event(ReplayAsyncEventKind event_kind,
   void *opaque,
@@ -308,7 +296,7 @@ void replay_init_events(void)
 void replay_finish_events(void)
 {
 events_enabled = false;
-replay_clear_events();
+replay_flush_events();
 }
 
 bool replay_events_enabled(void)
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index 34d19eb..a2221e5 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -148,8 +148,6 @@ void replay_init_events(void);
 void replay_finish_events(void);
 /*! Flushes events queue */
 void replay_flush_events(void);
-/*! Clears events list before loading new VM state */
-void replay_clear_events(void);
 /*! Returns true if there are any unsaved events in the queue */
 bool replay_has_events(void);
 /*! Saves events from queue into the file */

[Qemu-devel] [PATCH v8 2/5] monitor: protect mon->fds with mon_lock

2018-05-28 Thread Peter Xu

mon->fds were protected by BQL.  Now protect it by mon_lock so that it
can even be used in monitor iothread.

Reviewed-by: Stefan Hajnoczi 
Reviewed-by: Markus Armbruster 
Signed-off-by: Peter Xu 
---
 monitor.c | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/monitor.c b/monitor.c
index 14c681dc8a..d6c3c08932 100644
--- a/monitor.c
+++ b/monitor.c
@@ -213,7 +213,6 @@ struct Monitor {
 BlockCompletionFunc *password_completion_cb;
 void *password_opaque;
 mon_cmd_t *cmd_table;
-QLIST_HEAD(,mon_fd_t) fds;
 QTAILQ_ENTRY(Monitor) entry;
 
 /*
@@ -225,6 +224,7 @@ struct Monitor {
 /*
  * Fields that are protected by the per-monitor lock.
  */
+QLIST_HEAD(, mon_fd_t) fds;
 QString *outbuf;
 guint out_watch;
 /* Read under either BQL or mon_lock, written with BQL+mon_lock.  */
@@ -2189,7 +2189,7 @@ static void hmp_acl_remove(Monitor *mon, const QDict 
*qdict)
 void qmp_getfd(const char *fdname, Error **errp)
 {
 mon_fd_t *monfd;
-int fd;
+int fd, tmp_fd;
 
 fd = qemu_chr_fe_get_msgfd(_mon->chr);
 if (fd == -1) {
@@ -2204,13 +2204,17 @@ void qmp_getfd(const char *fdname, Error **errp)
 return;
 }
 
+qemu_mutex_lock(_mon->mon_lock);
 QLIST_FOREACH(monfd, _mon->fds, next) {
 if (strcmp(monfd->name, fdname) != 0) {
 continue;
 }
 
-close(monfd->fd);
+tmp_fd = monfd->fd;
 monfd->fd = fd;
+qemu_mutex_unlock(_mon->mon_lock);
+/* Make sure close() is out of critical section */
+close(tmp_fd);
 return;
 }
 
@@ -2219,24 +2223,31 @@ void qmp_getfd(const char *fdname, Error **errp)
 monfd->fd = fd;
 
 QLIST_INSERT_HEAD(_mon->fds, monfd, next);
+qemu_mutex_unlock(_mon->mon_lock);
 }
 
 void qmp_closefd(const char *fdname, Error **errp)
 {
 mon_fd_t *monfd;
+int tmp_fd;
 
+qemu_mutex_lock(_mon->mon_lock);
 QLIST_FOREACH(monfd, _mon->fds, next) {
 if (strcmp(monfd->name, fdname) != 0) {
 continue;
 }
 
 QLIST_REMOVE(monfd, next);
-close(monfd->fd);
+tmp_fd = monfd->fd;
 g_free(monfd->name);
 g_free(monfd);
+qemu_mutex_unlock(_mon->mon_lock);
+/* Make sure close() is out of critical section */
+close(tmp_fd);
 return;
 }
 
+qemu_mutex_unlock(_mon->mon_lock);
 error_setg(errp, QERR_FD_NOT_FOUND, fdname);
 }
 
@@ -2244,6 +2255,7 @@ int monitor_get_fd(Monitor *mon, const char *fdname, 
Error **errp)
 {
 mon_fd_t *monfd;
 
+qemu_mutex_lock(>mon_lock);
 QLIST_FOREACH(monfd, >fds, next) {
 int fd;
 
@@ -2257,10 +2269,12 @@ int monitor_get_fd(Monitor *mon, const char *fdname, 
Error **errp)
 QLIST_REMOVE(monfd, next);
 g_free(monfd->name);
 g_free(monfd);
+qemu_mutex_unlock(>mon_lock);
 
 return fd;
 }
 
+qemu_mutex_unlock(>mon_lock);
 error_setg(errp, "File descriptor named '%s' has not been found", fdname);
 return -1;
 }
-- 
2.17.0

[Qemu-devel] [PATCH v4 05/19] replay: finish record/replay before closing the disks

2018-05-28 Thread Pavel Dovgalyuk

After recent updates block devices cannot be closed on qemu exit.
This happens due to the block request polling when replay is not finished.
Therefore now we stop execution recording before closing the block devices.

Signed-off-by: Pavel Dovgalyuk 
---
 replay/replay.c |2 ++
 vl.c|1 +
 2 files changed, 3 insertions(+)

diff --git a/replay/replay.c b/replay/replay.c
index 8228261..58a986f 100644
--- a/replay/replay.c
+++ b/replay/replay.c
@@ -366,6 +366,8 @@ void replay_finish(void)
 g_free(replay_snapshot);
 replay_snapshot = NULL;
 
+replay_mode = REPLAY_MODE_NONE;
+
 replay_finish_events();
 }
 
diff --git a/vl.c b/vl.c
index 61f5a41..2aeaefb 100644
--- a/vl.c
+++ b/vl.c
@@ -4688,6 +4688,7 @@ int main(int argc, char **argv, char **envp)
 
 /* No more vcpu or device emulation activity beyond this point */
 vm_shutdown();
+replay_finish();
 
 job_cancel_sync_all();
 bdrv_close_all();

[Qemu-devel] [PATCH v4 09/19] replay: introduce breakpoint at the specified step

2018-05-28 Thread Pavel Dovgalyuk

This patch introduces replay_break qmp and hmp commands.
These commands allow stopping at the specified instruction.
It may be useful for debugging when there are some known
events that should be investigated.
The commands have one argument - number of instructions
executed since the start of the replay.

Signed-off-by: Pavel Dovgalyuk 

--

v2:
 - renamed replay_break qmp command into replay-break
   (suggested by Eric Blake)
---
 hmp-commands.hx   |   15 
 hmp.h |1 +
 include/sysemu/replay.h   |3 ++
 qapi/misc.json|   17 ++
 replay/replay-debugging.c |   55 +
 replay/replay-internal.h  |4 +++
 replay/replay.c   |   17 ++
 7 files changed, 112 insertions(+)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index 0734fea..a6d3f12 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1849,6 +1849,21 @@ Set QOM property @var{property} of object at location 
@var{path} to value @var{v
 ETEXI
 
 {
+.name   = "replay_break",
+.args_type  = "step:i",
+.params = "step",
+.help   = "sets breakpoint on the specified step of the replay",
+.cmd= hmp_replay_break,
+},
+
+STEXI
+@item replay_break @var{step}
+@findex replay_break
+Set breakpoint on the specified step of the replay.
+Execution stops when the specified step is reached.
+ETEXI
+
+{
 .name   = "info",
 .args_type  = "item:s?",
 .params = "[subcommand]",
diff --git a/hmp.h b/hmp.h
index aeec705..c6b8e37 100644
--- a/hmp.h
+++ b/hmp.h
@@ -147,5 +147,6 @@ void hmp_info_vm_generation_id(Monitor *mon, const QDict 
*qdict);
 void hmp_info_memory_size_summary(Monitor *mon, const QDict *qdict);
 void hmp_info_sev(Monitor *mon, const QDict *qdict);
 void hmp_info_replay(Monitor *mon, const QDict *qdict);
+void hmp_replay_break(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/include/sysemu/replay.h b/include/sysemu/replay.h
index 3ced6bc..98d709c 100644
--- a/include/sysemu/replay.h
+++ b/include/sysemu/replay.h
@@ -71,6 +71,9 @@ void replay_start(void);
 void replay_finish(void);
 /*! Adds replay blocker with the specified error description */
 void replay_add_blocker(Error *reason);
+/*! Sets breakpoint at the specified step.
+If step = -1LL the existing breakpoint is removed. */
+void replay_break(int64_t step, QEMUTimerCB callback, void *opaque);
 
 /* Processing the instructions */
 
diff --git a/qapi/misc.json b/qapi/misc.json
index 50029b7..b02e776 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -3133,6 +3133,23 @@
   'returns': 'ReplayInfo' }
 
 ##
+# @replay-break:
+#
+# Set breakpoint on the specified step of the replay.
+# Execution stops when the specified step is reached.
+#
+# @step: execution step to stop at
+#
+# Since: 3.0
+#
+# Example:
+#
+# -> { "execute": "replay-break", "data": { "step": 220414 } }
+#
+##
+{ 'command': 'replay-break', 'data': { 'step': 'int' } }
+
+##
 # @xen-load-devices-state:
 #
 # Load the state of all devices from file. The RAM and the block devices
diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c
index 03e7db8..819017e 100644
--- a/replay/replay-debugging.c
+++ b/replay/replay-debugging.c
@@ -16,6 +16,8 @@
 #include "hmp.h"
 #include "monitor/monitor.h"
 #include "qapi/qapi-commands-misc.h"
+#include "qapi/qmp/qdict.h"
+#include "qemu/timer.h"
 
 void hmp_info_replay(Monitor *mon, const QDict *qdict)
 {
@@ -39,3 +41,56 @@ ReplayInfo *qmp_query_replay(Error **errp)
 retval->step = replay_get_current_step();
 return retval;
 }
+
+void replay_break(int64_t step, QEMUTimerCB callback, void *opaque)
+{
+assert(replay_mode == REPLAY_MODE_PLAY);
+assert(replay_mutex_locked());
+
+replay_break_step = step;
+if (replay_break_timer) {
+timer_del(replay_break_timer);
+timer_free(replay_break_timer);
+replay_break_timer = NULL;
+}
+
+if (replay_break_step == -1LL) {
+return;
+}
+assert(replay_break_step >= replay_get_current_step());
+assert(callback);
+
+replay_break_timer = timer_new_ns(QEMU_CLOCK_REALTIME, callback, opaque);
+}
+
+static void replay_stop_vm(void *opaque)
+{
+vm_stop(RUN_STATE_PAUSED);
+replay_break(-1LL, NULL, NULL);
+}
+
+void qmp_replay_break(int64_t step, Error **errp)
+{
+if (replay_mode ==  REPLAY_MODE_PLAY) {
+if (step >= replay_get_current_step()) {
+replay_break(step, replay_stop_vm, NULL);
+} else {
+error_setg(errp, "cannot set break at the step in the past");
+}
+} else {
+error_setg(errp, "setting the break is allowed only in play mode");
+}
+}
+
+void hmp_replay_break(Monitor *mon, const QDict *qdict)
+{
+int64_t step = qdict_get_try_int(qdict, "step", -1LL);
+Error *err = NULL;
+
+qmp_replay_break(step, );
+if (err) {
+

[Qemu-devel] [PATCH v4 10/19] replay: implement replay-seek command to proceed to the desired step

2018-05-28 Thread Pavel Dovgalyuk

This patch adds hmp/qmp commands replay_seek/replay-seek that proceed
the execution to the specified step.
The commands automatically loads nearest snapshot and replay the execution
to find the desired step.

Signed-off-by: Pavel Dovgalyuk 

--

v2:
 - renamed replay_seek qmp command into replay-seek
   (suggested by Eric Blake)
---
 hmp-commands.hx   |   15 
 hmp.h |1 +
 qapi/misc.json|   16 
 replay/replay-debugging.c |   89 +
 4 files changed, 121 insertions(+)

diff --git a/hmp-commands.hx b/hmp-commands.hx
index a6d3f12..3bd418e 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1864,6 +1864,21 @@ Execution stops when the specified step is reached.
 ETEXI
 
 {
+.name   = "replay_seek",
+.args_type  = "step:i",
+.params = "step",
+.help   = "rewinds replay to the specified step",
+.cmd= hmp_replay_seek,
+},
+
+STEXI
+@item replay_seek @var{step}
+@findex replay_seek
+Automatically proceeds to the specified step, when replaying
+the execution.
+ETEXI
+
+{
 .name   = "info",
 .args_type  = "item:s?",
 .params = "[subcommand]",
diff --git a/hmp.h b/hmp.h
index c6b8e37..52ad7b9 100644
--- a/hmp.h
+++ b/hmp.h
@@ -148,5 +148,6 @@ void hmp_info_memory_size_summary(Monitor *mon, const QDict 
*qdict);
 void hmp_info_sev(Monitor *mon, const QDict *qdict);
 void hmp_info_replay(Monitor *mon, const QDict *qdict);
 void hmp_replay_break(Monitor *mon, const QDict *qdict);
+void hmp_replay_seek(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/qapi/misc.json b/qapi/misc.json
index b02e776..5f2604c 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -3150,6 +3150,22 @@
 { 'command': 'replay-break', 'data': { 'step': 'int' } }
 
 ##
+# @replay-seek:
+#
+# Automatically proceeds to the specified step, when replaying
+# the execution.
+#
+# @step: destination execution step
+#
+# Since: 3.0
+#
+# Example:
+#
+# -> { "execute": "replay-seek", "data": { "step": 220414 } }
+##
+{ 'command': 'replay-seek', 'data': { 'step': 'int' } }
+
+##
 # @xen-load-devices-state:
 #
 # Load the state of all devices from file. The RAM and the block devices
diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c
index 819017e..8d6c03d 100644
--- a/replay/replay-debugging.c
+++ b/replay/replay-debugging.c
@@ -18,6 +18,8 @@
 #include "qapi/qapi-commands-misc.h"
 #include "qapi/qmp/qdict.h"
 #include "qemu/timer.h"
+#include "block/snapshot.h"
+#include "migration/snapshot.h"
 
 void hmp_info_replay(Monitor *mon, const QDict *qdict)
 {
@@ -94,3 +96,90 @@ void hmp_replay_break(Monitor *mon, const QDict *qdict)
 return;
 }
 }
+
+static char *replay_find_nearest_snapshot(int64_t step, int64_t* snapshot_step)
+{
+BlockDriverState *bs;
+QEMUSnapshotInfo *sn_tab;
+QEMUSnapshotInfo *nearest = NULL;
+char *ret = NULL;
+int nb_sns, i;
+AioContext *aio_context;
+
+*snapshot_step = -1;
+
+bs = bdrv_all_find_vmstate_bs();
+if (!bs) {
+goto fail;
+}
+aio_context = bdrv_get_aio_context(bs);
+
+aio_context_acquire(aio_context);
+nb_sns = bdrv_snapshot_list(bs, _tab);
+aio_context_release(aio_context);
+
+for (i = 0; i < nb_sns; i++) {
+if (bdrv_all_find_snapshot(sn_tab[i].name, ) == 0) {
+if (sn_tab[i].icount != -1ULL
+&& sn_tab[i].icount <= step
+&& (!nearest || nearest->icount < sn_tab[i].icount)) {
+nearest = _tab[i];
+}
+}
+}
+if (nearest) {
+ret = g_strdup(nearest->name);
+*snapshot_step = nearest->icount;
+}
+g_free(sn_tab);
+
+fail:
+return ret;
+}
+
+static void replay_seek(int64_t step, Error **errp, QEMUTimerCB callback)
+{
+char *snapshot = NULL;
+if (replay_mode != REPLAY_MODE_PLAY) {
+error_setg(errp, "replay must be enabled to seek");
+return;
+}
+if (!replay_snapshot) {
+error_setg(errp, "snapshotting is disabled");
+return;
+}
+int64_t snapshot_step = -1;
+snapshot = replay_find_nearest_snapshot(step, _step);
+if (snapshot) {
+if (step < replay_get_current_step()
+|| replay_get_current_step() < snapshot_step) {
+vm_stop(RUN_STATE_RESTORE_VM);
+load_snapshot(snapshot, errp);
+}
+g_free(snapshot);
+}
+if (replay_get_current_step() <= step) {
+replay_break(step, callback, NULL);
+vm_start();
+} else {
+error_setg(errp, "cannot seek to the specified step");
+}
+}
+
+void qmp_replay_seek(int64_t step, Error **errp)
+{
+replay_seek(step, errp, replay_stop_vm);
+}
+
+void hmp_replay_seek(Monitor *mon, const QDict *qdict)
+{
+int64_t step = qdict_get_try_int(qdict, "step", -1LL);
+Error *err = NULL;
+
+

[Qemu-devel] [PATCH v4 16/19] gdbstub: add reverse step support in replay mode

2018-05-28 Thread Pavel Dovgalyuk

GDB remote protocol supports two reverse debugging commands:
reverse step and reverse continue.
This patch adds support of the first one to the gdbstub.
Reverse step is intended to step one instruction in the backwards
direction. This is not possible in regular execution.
But replayed execution is deterministic, therefore we can load one of
the prior snapshots and proceed to the desired step. It is equivalent
to stepping one instruction back.
There should be at least one snapshot preceding the debugged part of
the replay log.

Signed-off-by: Pavel Dovgalyuk 
---
 accel/tcg/translator.c|1 +
 cpus.c|   14 +++---
 exec.c|5 +
 gdbstub.c |   42 +++---
 include/sysemu/replay.h   |7 +++
 replay/replay-debugging.c |   33 +
 stubs/replay.c|5 +
 7 files changed, 101 insertions(+), 6 deletions(-)

diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index afd0a49..33a543e 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -17,6 +17,7 @@
 #include "exec/gen-icount.h"
 #include "exec/log.h"
 #include "exec/translator.h"
+#include "sysemu/replay.h"
 
 /* Pairs with tcg_clear_temp_count.
To be called by #TranslatorOps.{translate_insn,tb_stop} if
diff --git a/cpus.c b/cpus.c
index b3bf018..01016af 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1042,9 +1042,17 @@ static bool cpu_can_run(CPUState *cpu)
 
 static void cpu_handle_guest_debug(CPUState *cpu)
 {
-gdb_set_stop_cpu(cpu);
-qemu_system_debug_request();
-cpu->stopped = true;
+if (!replay_running_debug()) {
+gdb_set_stop_cpu(cpu);
+qemu_system_debug_request();
+cpu->stopped = true;
+} else {
+if (!cpu->singlestep_enabled) {
+cpu_single_step(cpu, SSTEP_ENABLE);
+} else {
+cpu_single_step(cpu, 0);
+}
+}
 }
 
 #ifdef CONFIG_LINUX
diff --git a/exec.c b/exec.c
index ffa1099..fd037f8 100644
--- a/exec.c
+++ b/exec.c
@@ -2581,6 +2581,11 @@ static void check_watchpoint(int offset, int len, 
MemTxAttrs attrs, int flags)
 QTAILQ_FOREACH(wp, >watchpoints, entry) {
 if (cpu_watchpoint_address_matches(wp, vaddr, len)
 && (wp->flags & flags)) {
+if (replay_running_debug()) {
+/* Don't process the watchpoints when we are
+   in a reverse debugging operation. */
+return;
+}
 if (flags == BP_MEM_READ) {
 wp->flags |= BP_WATCHPOINT_HIT_READ;
 } else {
diff --git a/gdbstub.c b/gdbstub.c
index e4ece2f..12aac79 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -39,6 +39,7 @@
 #include "sysemu/kvm.h"
 #include "exec/semihost.h"
 #include "exec/exec-all.h"
+#include "sysemu/replay.h"
 
 #ifdef CONFIG_USER_ONLY
 #define GDB_ATTACHED "0"
@@ -334,6 +335,19 @@ typedef struct GDBState {
  */
 static int sstep_flags = SSTEP_ENABLE|SSTEP_NOIRQ|SSTEP_NOTIMER;
 
+/*! Retrieves flags for single step mode. */
+static int get_sstep_flags(void)
+{
+/* In replay mode all events written into the log should be replayed.
+ * That is why NOIRQ flag is removed in this mode.
+ */
+if (replay_mode != REPLAY_MODE_NONE) {
+return SSTEP_ENABLE;
+} else {
+return sstep_flags;
+}
+}
+
 static GDBState *gdbserver_state;
 
 bool gdb_has_xml;
@@ -424,7 +438,7 @@ static int gdb_continue_partial(GDBState *s, char 
*newstates)
 CPU_FOREACH(cpu) {
 if (newstates[cpu->cpu_index] == 's') {
 trace_gdbstub_op_stepping(cpu->cpu_index);
-cpu_single_step(cpu, sstep_flags);
+cpu_single_step(cpu, get_sstep_flags());
 }
 }
 s->running_state = 1;
@@ -443,7 +457,7 @@ static int gdb_continue_partial(GDBState *s, char 
*newstates)
 break; /* nothing to do here */
 case 's':
 trace_gdbstub_op_stepping(cpu->cpu_index);
-cpu_single_step(cpu, sstep_flags);
+cpu_single_step(cpu, get_sstep_flags());
 cpu_resume(cpu);
 flag = 1;
 break;
@@ -1082,9 +1096,28 @@ static int gdb_handle_packet(GDBState *s, const char 
*line_buf)
 addr = strtoull(p, (char **), 16);
 gdb_set_cpu_pc(s, addr);
 }
-cpu_single_step(s->c_cpu, sstep_flags);
+cpu_single_step(s->c_cpu, get_sstep_flags());
 gdb_continue(s);
 return RS_IDLE;
+case 'b':
+/* Backward debugging commands */
+if (replay_mode == REPLAY_MODE_PLAY) {
+switch (*p) {
+case 's':
+if (replay_reverse_step()) {
+gdb_continue(s);
+return RS_IDLE;
+} else {
+put_packet(s, "E14");
+break;
+}
+default:
+

[Qemu-devel] [PATCH v4 06/19] qcow2: introduce icount field for snapshots

2018-05-28 Thread Pavel Dovgalyuk

This patch introduces the icount field for saving within the snapshot.
It is required for navigation between the snapshots in record/replay mode.

Signed-off-by: Pavel Dovgalyuk 

--

v2:
 - documented format changes in docs/interop/qcow2.txt
   (suggested by Eric Blake)
---
 block/qcow2-snapshot.c |7 +++
 block/qcow2.h  |2 ++
 docs/interop/qcow2.txt |4 
 3 files changed, 13 insertions(+)

diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index 74293be..d04553e 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -103,6 +103,12 @@ int qcow2_read_snapshots(BlockDriverState *bs)
 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
 }
 
+if (extra_data_size >= 24) {
+sn->icount = be64_to_cpu(extra.icount);
+} else {
+sn->icount = -1ULL;
+}
+
 /* Read snapshot ID */
 sn->id_str = g_malloc(id_str_size + 1);
 ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
@@ -209,6 +215,7 @@ static int qcow2_write_snapshots(BlockDriverState *bs)
 memset(, 0, sizeof(extra));
 extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
 extra.disk_size = cpu_to_be64(sn->disk_size);
+extra.icount = cpu_to_be64(sn->icount);
 
 id_str_size = strlen(sn->id_str);
 name_size = strlen(sn->name);
diff --git a/block/qcow2.h b/block/qcow2.h
index 01b5250..dea3e43 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -147,6 +147,7 @@ typedef struct QEMU_PACKED QCowSnapshotHeader {
 typedef struct QEMU_PACKED QCowSnapshotExtraData {
 uint64_t vm_state_size_large;
 uint64_t disk_size;
+uint64_t icount;
 } QCowSnapshotExtraData;
 
 
@@ -160,6 +161,7 @@ typedef struct QCowSnapshot {
 uint32_t date_sec;
 uint32_t date_nsec;
 uint64_t vm_clock_nsec;
+uint64_t icount;
 } QCowSnapshot;
 
 struct Qcow2Cache;
diff --git a/docs/interop/qcow2.txt b/docs/interop/qcow2.txt
index 8e1547d..4619fb8 100644
--- a/docs/interop/qcow2.txt
+++ b/docs/interop/qcow2.txt
@@ -506,6 +506,10 @@ Snapshot table entry:
 
 Byte 48 - 55:   Virtual disk size of the snapshot in bytes
 
+Byte 56 - 63:   icount value which corresponds to
+the record/replay step when the snapshot
+was taken
+
 Version 3 images must include extra data at least up to
 byte 55.

[Qemu-devel] [PATCH v4 08/19] replay: introduce info hmp/qmp command

2018-05-28 Thread Pavel Dovgalyuk

This patch introduces 'info replay' monitor command and
corresponding qmp request.
These commands request the current record/replay mode, replay log file name,
and the execution step (number or recorded/replayed instructions).

Signed-off-by: Pavel Dovgalyuk 

--

v2:
 - renamed info_replay qmp into query-replay (suggested by Eric Blake)
---
 hmp-commands-info.hx  |   14 ++
 hmp.h |1 +
 qapi/misc.json|   35 +++
 replay/Makefile.objs  |3 ++-
 replay/replay-debugging.c |   41 +
 replay/replay-internal.h  |2 ++
 replay/replay.c   |3 +--
 7 files changed, 96 insertions(+), 3 deletions(-)
 create mode 100644 replay/replay-debugging.c

diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index ddfcd5a..f5631be 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -883,6 +883,20 @@ STEXI
 Show SEV information.
 ETEXI
 
+{
+.name   = "replay",
+.args_type  = "",
+.params = "",
+.help   = "show parameters of the record/replay",
+.cmd= hmp_info_replay,
+},
+
+STEXI
+@item info replay
+@findex info replay
+Display the current record/replay mode and the currently executing step.
+ETEXI
+
 STEXI
 @end table
 ETEXI
diff --git a/hmp.h b/hmp.h
index 20f2743..aeec705 100644
--- a/hmp.h
+++ b/hmp.h
@@ -146,5 +146,6 @@ void hmp_hotpluggable_cpus(Monitor *mon, const QDict 
*qdict);
 void hmp_info_vm_generation_id(Monitor *mon, const QDict *qdict);
 void hmp_info_memory_size_summary(Monitor *mon, const QDict *qdict);
 void hmp_info_sev(Monitor *mon, const QDict *qdict);
+void hmp_info_replay(Monitor *mon, const QDict *qdict);
 
 #endif
diff --git a/qapi/misc.json b/qapi/misc.json
index f5988cc..50029b7 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -3098,6 +3098,41 @@
   'data': [ 'none', 'record', 'play' ] }
 
 ##
+# @ReplayInfo:
+#
+# Status of the record/replay mode.
+#
+# @mode: current mode.
+#
+# @filename: name of the record/replay log file.
+#
+# @step: current step number.
+#
+# Since: 3.0
+#
+##
+{ 'struct': 'ReplayInfo',
+  'data': { 'mode': 'ReplayMode', '*filename': 'str', 'step': 'int' } }
+
+##
+# @query-replay:
+#
+# Retrieves the status of the execution record/replay.
+#
+# Returns: structure with the properties of the record/replay.
+#
+# Since: 3.0
+#
+# Example:
+#
+# -> { "execute": "query-replay" }
+# <- { "return": { "mode": "play", "filename": "log.rr", "step": 220414 } }
+#
+##
+{ 'command': 'query-replay',
+  'returns': 'ReplayInfo' }
+
+##
 # @xen-load-devices-state:
 #
 # Load the state of all devices from file. The RAM and the block devices
diff --git a/replay/Makefile.objs b/replay/Makefile.objs
index cee6539..6694e3e 100644
--- a/replay/Makefile.objs
+++ b/replay/Makefile.objs
@@ -6,4 +6,5 @@ common-obj-y += replay-input.o
 common-obj-y += replay-char.o
 common-obj-y += replay-snapshot.o
 common-obj-y += replay-net.o
-common-obj-y += replay-audio.o
\ No newline at end of file
+common-obj-y += replay-audio.o
+common-obj-y += replay-debugging.o
diff --git a/replay/replay-debugging.c b/replay/replay-debugging.c
new file mode 100644
index 000..03e7db8
--- /dev/null
+++ b/replay/replay-debugging.c
@@ -0,0 +1,41 @@
+/*
+ * replay-debugging.c
+ *
+ * Copyright (c) 2010-2018 Institute for System Programming
+ * of the Russian Academy of Sciences.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+#include "sysemu/replay.h"
+#include "replay-internal.h"
+#include "hmp.h"
+#include "monitor/monitor.h"
+#include "qapi/qapi-commands-misc.h"
+
+void hmp_info_replay(Monitor *mon, const QDict *qdict)
+{
+if (replay_mode == REPLAY_MODE_NONE) {
+monitor_printf(mon, "No record/replay\n");
+} else {
+monitor_printf(mon, "%s execution '%s': current step = %"PRId64"\n",
+replay_mode == REPLAY_MODE_RECORD ? "Recording" : "Replaying",
+replay_filename, replay_get_current_step());
+}
+}
+
+ReplayInfo *qmp_query_replay(Error **errp)
+{
+ReplayInfo *retval = g_new0(ReplayInfo, 1);
+retval->mode = replay_mode;
+if (replay_filename) {
+retval->filename = g_strdup(replay_filename);
+retval->has_filename = true;
+}
+retval->step = replay_get_current_step();
+return retval;
+}
diff --git a/replay/replay-internal.h b/replay/replay-internal.h
index ac4b27b..ef82b5e 100644
--- a/replay/replay-internal.h
+++ b/replay/replay-internal.h
@@ -91,6 +91,8 @@ extern ReplayState replay_state;
 
 /* File for replay writing */
 extern FILE *replay_file;
+/*! Name of replay file  */
+extern char *replay_filename;
 
 void replay_put_byte(uint8_t byte);
 void replay_put_event(uint8_t event);
diff --git a/replay/replay.c

[Qemu-devel] [PATCH v4 04/19] replay: don't drain/flush bdrv queue while RR is working

2018-05-28 Thread Pavel Dovgalyuk

In record/replay mode bdrv queue is controlled by replay mechanism.
It does not allow saving or loading the snapshots
when bdrv queue is not empty. Stopping the VM is not blocked by nonempty
queue, but flushing the queue is still impossible there,
because it may cause deadlocks in replay mode.
This patch disables bdrv_drain_all and bdrv_flush_all in
record/replay mode.

Signed-off-by: Pavel Dovgalyuk 
---
 block/io.c |   22 ++
 cpus.c |2 --
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/block/io.c b/block/io.c
index ca96b48..853d5a4 100644
--- a/block/io.c
+++ b/block/io.c
@@ -32,6 +32,7 @@
 #include "qemu/cutils.h"
 #include "qapi/error.h"
 #include "qemu/error-report.h"
+#include "sysemu/replay.h"
 
 #define NOT_DONE 0x7fff /* used while emulated sync operation in progress 
*/
 
@@ -408,6 +409,13 @@ void bdrv_drain_all_begin(void)
 BdrvNextIterator it;
 GSList *aio_ctxs = NULL, *ctx;
 
+/* bdrv queue is managed by record/replay,
+   waiting for finishing the I/O requests may
+   be infinite */
+if (replay_events_enabled()) {
+return;
+}
+
 /* BDRV_POLL_WHILE() for a node can only be called from its own I/O thread
  * or the main loop AioContext. We potentially use BDRV_POLL_WHILE() on
  * nodes in several different AioContexts, so make sure we're in the main
@@ -459,6 +467,13 @@ void bdrv_drain_all_end(void)
 BlockDriverState *bs;
 BdrvNextIterator it;
 
+/* bdrv queue is managed by record/replay,
+   waiting for finishing the I/O requests may
+   be endless */
+if (replay_events_enabled()) {
+return;
+}
+
 for (bs = bdrv_first(); bs; bs = bdrv_next()) {
 AioContext *aio_context = bdrv_get_aio_context(bs);
 
@@ -1841,6 +1856,13 @@ int bdrv_flush_all(void)
 BlockDriverState *bs = NULL;
 int result = 0;
 
+/* bdrv queue is managed by record/replay,
+   creating new flush request for stopping
+   the VM may break the determinism */
+if (replay_events_enabled()) {
+return result;
+}
+
 for (bs = bdrv_first(); bs; bs = bdrv_next()) {
 AioContext *aio_context = bdrv_get_aio_context(bs);
 int ret;
diff --git a/cpus.c b/cpus.c
index d1f1629..b3bf018 100644
--- a/cpus.c
+++ b/cpus.c
@@ -1016,7 +1016,6 @@ static int do_vm_stop(RunState state, bool send_stop)
 }
 
 bdrv_drain_all();
-replay_disable_events();
 ret = bdrv_flush_all();
 
 return ret;
@@ -2063,7 +2062,6 @@ int vm_prepare_start(void)
 /* We are sending this now, but the CPUs will be resumed shortly later */
 qapi_event_send_resume(_abort);
 
-replay_enable_events();
 cpu_enable_ticks();
 runstate_set(RUN_STATE_RUNNING);
 vm_state_notify(1, RUN_STATE_RUNNING);

[Qemu-devel] [PATCH v4 07/19] migration: introduce icount field for snapshots

2018-05-28 Thread Pavel Dovgalyuk

Saving icount as a parameters of the snapshot allows navigation between
them in the execution replay scenario.
This information can be used for finding a specific snapshot for rewinding
the recorded execution to the specific moment of the time.
E.g., 'reverse step' action needs to load the nearest snapshot which is
prior to the current moment of time .

Signed-off-by: Pavel Dovgalyuk 

--

v2:
 - made icount in SnapshotInfo optional (suggested by Eric Blake)
---
 block/qapi.c |   17 +
 block/qcow2-snapshot.c   |2 ++
 blockdev.c   |   10 ++
 include/block/snapshot.h |1 +
 migration/savevm.c   |5 +
 qapi/block-core.json |5 -
 qapi/block.json  |3 ++-
 7 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/block/qapi.c b/block/qapi.c
index e12968f..a89e180 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -210,6 +210,7 @@ int bdrv_query_snapshot_info_list(BlockDriverState *bs,
 info->date_nsec = sn_tab[i].date_nsec;
 info->vm_clock_sec  = sn_tab[i].vm_clock_nsec / 10;
 info->vm_clock_nsec = sn_tab[i].vm_clock_nsec % 10;
+info->icount= sn_tab[i].icount;
 
 info_list = g_new0(SnapshotInfoList, 1);
 info_list->value = info;
@@ -648,14 +649,15 @@ void bdrv_snapshot_dump(fprintf_function func_fprintf, 
void *f,
 QEMUSnapshotInfo *sn)
 {
 char buf1[128], date_buf[128], clock_buf[128];
+char icount_buf[128] = {0};
 struct tm tm;
 time_t ti;
 int64_t secs;
 
 if (!sn) {
 func_fprintf(f,
- "%-10s%-20s%7s%20s%15s",
- "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK");
+ "%-10s%-18s%7s%20s%13s%11s",
+ "ID", "TAG", "VM SIZE", "DATE", "VM CLOCK", "ICOUNT");
 } else {
 ti = sn->date_sec;
 localtime_r(, );
@@ -668,13 +670,18 @@ void bdrv_snapshot_dump(fprintf_function func_fprintf, 
void *f,
  (int)((secs / 60) % 60),
  (int)(secs % 60),
  (int)((sn->vm_clock_nsec / 100) % 1000));
+if (sn->icount != -1ULL) {
+snprintf(icount_buf, sizeof(icount_buf),
+"%"PRId64, sn->icount);
+}
 func_fprintf(f,
- "%-10s%-20s%7s%20s%15s",
+ "%-10s%-18s%7s%20s%13s%11s",
  sn->id_str, sn->name,
  get_human_readable_size(buf1, sizeof(buf1),
  sn->vm_state_size),
  date_buf,
- clock_buf);
+ clock_buf,
+ icount_buf);
 }
 }
 
@@ -842,6 +849,8 @@ void bdrv_image_info_dump(fprintf_function func_fprintf, 
void *f,
 .date_nsec = elem->value->date_nsec,
 .vm_clock_nsec = elem->value->vm_clock_sec * 10ULL +
  elem->value->vm_clock_nsec,
+.icount = elem->value->has_icount ?
+  elem->value->icount : -1ULL,
 };
 
 pstrcpy(sn.id_str, sizeof(sn.id_str), elem->value->id);
diff --git a/block/qcow2-snapshot.c b/block/qcow2-snapshot.c
index d04553e..0af32c3 100644
--- a/block/qcow2-snapshot.c
+++ b/block/qcow2-snapshot.c
@@ -379,6 +379,7 @@ int qcow2_snapshot_create(BlockDriverState *bs, 
QEMUSnapshotInfo *sn_info)
 sn->date_sec = sn_info->date_sec;
 sn->date_nsec = sn_info->date_nsec;
 sn->vm_clock_nsec = sn_info->vm_clock_nsec;
+sn->icount = sn_info->icount;
 
 /* Allocate the L1 table of the snapshot and copy the current one there. */
 l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
@@ -698,6 +699,7 @@ int qcow2_snapshot_list(BlockDriverState *bs, 
QEMUSnapshotInfo **psn_tab)
 sn_info->date_sec = sn->date_sec;
 sn_info->date_nsec = sn->date_nsec;
 sn_info->vm_clock_nsec = sn->vm_clock_nsec;
+sn_info->icount = sn->icount;
 }
 *psn_tab = sn_tab;
 return s->nb_snapshots;
diff --git a/blockdev.c b/blockdev.c
index 8de95be..80225c4 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -56,6 +56,7 @@
 #include "block/trace.h"
 #include "sysemu/arch_init.h"
 #include "sysemu/qtest.h"
+#include "sysemu/replay.h"
 #include "qemu/cutils.h"
 #include "qemu/help_option.h"
 #include "qemu/throttle-options.h"
@@ -1348,6 +1349,10 @@ SnapshotInfo 
*qmp_blockdev_snapshot_delete_internal_sync(const char *device,
 info->vm_state_size = sn.vm_state_size;
 info->vm_clock_nsec = sn.vm_clock_nsec % 10;
 info->vm_clock_sec = sn.vm_clock_nsec / 10;
+if (sn.icount != -1ULL) {
+info->icount = sn.icount;
+info->has_icount = true;
+}
 
 return info;
 
@@ -1556,6 +1561,11 @@ static void internal_snapshot_prepare(BlkActionState 
*common,
 sn->date_sec = tv.tv_sec;

[Qemu-devel] [PATCH v4 03/19] replay: update docs for record/replay with block devices

2018-05-28 Thread Pavel Dovgalyuk

This patch updates the description of the command lines for using
record/replay with attached block devices.

Signed-off-by: Pavel Dovgalyuk 
---
 docs/replay.txt |   12 +---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/docs/replay.txt b/docs/replay.txt
index 2e21e9c..f7def53 100644
--- a/docs/replay.txt
+++ b/docs/replay.txt
@@ -27,7 +27,7 @@ Usage of the record/replay:
  * First, record the execution with the following command line:
 qemu-system-i386 \
  -icount shift=7,rr=record,rrfile=replay.bin \
- -drive file=disk.qcow2,if=none,id=img-direct \
+ -drive file=disk.qcow2,if=none,snapshot,id=img-direct \
  -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \
  -device ide-hd,drive=img-blkreplay \
  -netdev user,id=net1 -device rtl8139,netdev=net1 \
@@ -35,7 +35,7 @@ Usage of the record/replay:
  * After recording, you can replay it by using another command line:
 qemu-system-i386 \
  -icount shift=7,rr=replay,rrfile=replay.bin \
- -drive file=disk.qcow2,if=none,id=img-direct \
+ -drive file=disk.qcow2,if=none,snapshot,id=img-direct \
  -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay \
  -device ide-hd,drive=img-blkreplay \
  -netdev user,id=net1 -device rtl8139,netdev=net1 \
@@ -223,7 +223,7 @@ Block devices record/replay module intercepts calls of
 bdrv coroutine functions at the top of block drivers stack.
 To record and replay block operations the drive must be configured
 as following:
- -drive file=disk.qcow2,if=none,id=img-direct
+ -drive file=disk.qcow2,if=none,snapshot,id=img-direct
  -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay
  -device ide-hd,drive=img-blkreplay
 
@@ -252,6 +252,12 @@ This snapshot is created at start of recording and 
restored at start
 of replaying. It also can be loaded while replaying to roll back
 the execution.
 
+'snapshot' flag of the disk image must be removed to save the snapshots
+in the overlay (or original image) instead of using the temporary overlay.
+ -drive file=disk.ovl,if=none,id=img-direct
+ -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay
+ -device ide-hd,drive=img-blkreplay
+
 Use QEMU monitor to create additional snapshots. 'savevm ' command
 created the snapshot and 'loadvm ' restores it. To prevent corruption
 of the original disk image, use overlay files linked to the original images.

[Qemu-devel] [PATCH v4 01/19] block: implement bdrv_snapshot_goto for blkreplay

2018-05-28 Thread Pavel Dovgalyuk

From: Pavel Dovgalyuk 

This patch enables making snapshots with blkreplay used in
block devices.
This function is required to make bdrv_snapshot_goto without
calling .bdrv_open which is not implemented.

Signed-off-by: Pavel Dovgalyuk 
---
 block/blkreplay.c |8 
 1 file changed, 8 insertions(+)

diff --git a/block/blkreplay.c b/block/blkreplay.c
index b016dbe..72b3c7b 100755
--- a/block/blkreplay.c
+++ b/block/blkreplay.c
@@ -130,6 +130,12 @@ static int coroutine_fn 
blkreplay_co_flush(BlockDriverState *bs)
 return ret;
 }
 
+static int blkreplay_snapshot_goto(BlockDriverState *bs,
+   const char *snapshot_id)
+{
+return bdrv_snapshot_goto(bs->file->bs, snapshot_id, NULL);
+}
+
 static BlockDriver bdrv_blkreplay = {
 .format_name= "blkreplay",
 .instance_size  = 0,
@@ -145,6 +151,8 @@ static BlockDriver bdrv_blkreplay = {
 .bdrv_co_pwrite_zeroes  = blkreplay_co_pwrite_zeroes,
 .bdrv_co_pdiscard   = blkreplay_co_pdiscard,
 .bdrv_co_flush  = blkreplay_co_flush,
+
+.bdrv_snapshot_goto = blkreplay_snapshot_goto,
 };
 
 static void bdrv_blkreplay_init(void)

[Qemu-devel] [PATCH v4 02/19] replay: disable default snapshot for record/replay

2018-05-28 Thread Pavel Dovgalyuk

From: Pavel Dovgalyuk 

This patch disables setting '-snapshot' option on by default
in record/replay mode. This is needed for creating vmstates in record
and replay modes.

Signed-off-by: Pavel Dovgalyuk 
---
 vl.c |   10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/vl.c b/vl.c
index 038d7f8..61f5a41 100644
--- a/vl.c
+++ b/vl.c
@@ -3187,7 +3187,13 @@ int main(int argc, char **argv, char **envp)
 drive_add(IF_PFLASH, -1, optarg, PFLASH_OPTS);
 break;
 case QEMU_OPTION_snapshot:
-snapshot = 1;
+{
+Error *blocker = NULL;
+snapshot = 1;
+error_setg(, QERR_REPLAY_NOT_SUPPORTED,
+   "-snapshot");
+replay_add_blocker(blocker);
+}
 break;
 case QEMU_OPTION_numa:
 opts = qemu_opts_parse_noisily(qemu_find_opts("numa"),
@@ -4476,7 +4482,7 @@ int main(int argc, char **argv, char **envp)
 qapi_free_BlockdevOptions(bdo->bdo);
 g_free(bdo);
 }
-if (snapshot || replay_mode != REPLAY_MODE_NONE) {
+if (snapshot) {
 qemu_opts_foreach(qemu_find_opts("drive"), drive_enable_snapshot,
   NULL, NULL);
 }

[Qemu-devel] [PATCH v4 00/19] reverse debugging

2018-05-28 Thread Pavel Dovgalyuk

GDB remote protocol supports reverse debugging of the targets.
It includes 'reverse step' and 'reverse continue' operations.
The first one finds the previous step of the execution,
and the second one is intended to stop at the last breakpoint that
would happen when the program is executed normally.

Reverse debugging is possible in the replay mode, when at least
one snapshot was created at the record or replay phase.
QEMU can use these snapshots for travelling back in time with GDB.

Running the execution in replay mode allows using GDB reverse debugging
commands:
 - reverse-stepi (or rsi): Steps one instruction to the past.
   QEMU loads on of the prior snapshots and proceeds to the desired
   instruction forward. When that step is reaches, execution stops.
 - reverse-continue (or rc): Runs execution "backwards".
   QEMU tries to find breakpoint or watchpoint by loaded prior snapshot
   and replaying the execution. Then QEMU loads snapshots again and
   replays to the latest breakpoint. When there are no breakpoints in
   the examined section of the execution, QEMU finds one more snapshot
   and tries again. After the first snapshot is processed, execution
   stops at this snapshot.

The set of patches include the following modifications:
 - gdbstub update for reverse debugging support
 - functions that automatically perform reverse step and reverse
   continue operations
 - hmp/qmp commands for manipulating the replay process
 - improvement of the snapshotting for saving the execution step
   in the snapshot parameters
 - other record/replay fixes

The patches are available in the repository:
https://github.com/ispras/qemu/tree/rr-180524

v4 changes:
 - changed 'since 2.13' to 'since 3.0' in json (as suggested by Eric Blake)

v3 changes:
 - Fixed PS/2 bug with save/load vm, which caused failures of the replay.
   The patch was sent separately.
 - Rebased to the new code base.
 - Minor fixes.

v2 changes:
 - documented reverse debugging
 - fixed start vmstate loading in record mode
 - documented qcow2 changes (as suggested by Eric Blake)
 - made icount SnapshotInfo field optional (as suggested by Eric Blake)
 - renamed qmp commands (as suggested by Eric Blake)
 - minor changes

---

Pavel Dovgalyuk (19):
  block: implement bdrv_snapshot_goto for blkreplay
  replay: disable default snapshot for record/replay
  replay: update docs for record/replay with block devices
  replay: don't drain/flush bdrv queue while RR is working
  replay: finish record/replay before closing the disks
  qcow2: introduce icount field for snapshots
  migration: introduce icount field for snapshots
  replay: introduce info hmp/qmp command
  replay: introduce breakpoint at the specified step
  replay: implement replay-seek command to proceed to the desired step
  replay: flush events when exiting
  timer: remove replay clock probe in deadline calculation
  replay: refine replay-time module
  translator: fix breakpoint processing
  replay: flush rr queue before loading the vmstate
  gdbstub: add reverse step support in replay mode
  gdbstub: add reverse continue support in replay mode
  replay: describe reverse debugging in docs/replay.txt
  replay: allow loading any snapshots before recording


 accel/tcg/translator.c|9 +
 block/blkreplay.c |8 +
 block/io.c|   22 +++
 block/qapi.c  |   17 ++-
 block/qcow2-snapshot.c|9 +
 block/qcow2.h |2 
 blockdev.c|   10 ++
 cpus.c|   19 ++-
 docs/interop/qcow2.txt|4 +
 docs/replay.txt   |   45 +++
 exec.c|6 +
 gdbstub.c |   50 +++-
 hmp-commands-info.hx  |   14 ++
 hmp-commands.hx   |   30 +
 hmp.h |3 
 include/block/snapshot.h  |1 
 include/sysemu/replay.h   |   18 +++
 migration/savevm.c|   15 +-
 qapi/block-core.json  |5 +
 qapi/block.json   |3 
 qapi/misc.json|   68 +++
 replay/Makefile.objs  |3 
 replay/replay-debugging.c |  287 +
 replay/replay-events.c|   14 --
 replay/replay-internal.h  |   10 +-
 replay/replay-snapshot.c  |   17 ++-
 replay/replay-time.c  |   27 ++--
 replay/replay.c   |   22 +++
 stubs/replay.c|   10 ++
 util/qemu-timer.c |   11 --
 vl.c  |   18 ++-
 31 files changed, 696 insertions(+), 81 deletions(-)
 create mode 100644 replay/replay-debugging.c

-- 
Pavel Dovgalyuk

Re: [Qemu-devel] [PATCH v7 4/4] monitor: add lock to protect mon_fdsets

2018-05-28 Thread Peter Xu

On Thu, May 24, 2018 at 11:03:55AM +0200, Markus Armbruster wrote:
> Peter Xu  writes:
> 
> > Similar to previous patch, but introduce a new global big lock for
> > mon_fdsets.  Take it where needed.
> 
> The previous patch is "monitor: more comments on lock-free
> fleids/funcs".  Sure you mean that one?

No... I'll remove that sentence directly:

  "Introduce a new global big lock for mon_fdsets.  Take it where needed."

> >
> > The monitor_fdset_get_fd() handling is a bit tricky: now we need to call
> > qemu_mutex_unlock() which might pollute errno, so we need to make sure
> > the correct errno be passed up to the callers.  To make things simpler,
> > we let monitor_fdset_get_fd() return the -errno directly when error
> > happens, then in qemu_open() we translate it back into errno.
> 
> Suggest s/translate/move/.

Okay.

> >
> > Signed-off-by: Peter Xu 
> > ---
> >  monitor.c| 70 +---
> >  util/osdep.c |  3 ++-
> >  2 files changed, 58 insertions(+), 15 deletions(-)
> >
> > diff --git a/monitor.c b/monitor.c
> > index f01589f945..6266ff65c4 100644
> > --- a/monitor.c
> > +++ b/monitor.c
> > @@ -271,6 +271,9 @@ typedef struct QMPRequest QMPRequest;
> >  /* Protects mon_list, monitor_event_state.  */
> 
> Not this patch's problem: there is no monitor_event_state.  Screwed up
> in commit d622cb5879c.  I guess it's monitor_qapi_event_state.

I'll append another patch to do that, and move these fields together.

> 
> >  static QemuMutex monitor_lock;
> >  
> > +/* Protects mon_fdsets */
> > +static QemuMutex mon_fdsets_lock;
> > +
> >  static QTAILQ_HEAD(mon_list, Monitor) mon_list;
> >  static QLIST_HEAD(mon_fdsets, MonFdset) mon_fdsets;
> >  static int mon_refcount;
> 
> Suggest to move mon_fdsets next to the lock protecting it.

Will do.

> 
> > @@ -287,6 +290,16 @@ static QEMUClockType event_clock_type = 
> > QEMU_CLOCK_REALTIME;
> >  static void monitor_command_cb(void *opaque, const char *cmdline,
> > void *readline_opaque);
> >  
> > +/*
> > + * This lock can be used very early, even during param parsing.
> 
> Do you mean CLI parsing?

Yes, will s/param/CLI/.

> 
> > + * Meanwhile it can also be used even at the end of main.  Let's keep
> > + * it initialized for the whole lifecycle of QEMU.
> > + */
> 
> Awkward question, since our main() is such a tangled mess, but here goes
> anyway...  The existing place to initialize monitor.c's globals is
> monitor_init_globals().  But that one runs too late, I guess:
> parse_add_fd() runs earlier, and calls monitor_fdset_add_fd().  Unclean
> even without this lock; no module should be used before its
> initialization function runs.  Sure we can't run monitor_init_globals()
> sufficiently early?

Please see the comment for monitor_init_globals():

/*
 * Note: qtest_enabled() (which is used in monitor_qapi_event_init())
 * depends on configure_accelerator() above.
 */
monitor_init_globals();

So I guess it won't work to directly move it earlier.  The init
dependency of QEMU is really complicated.  I'll be fine now that we
mark totally independent init functions (like this one, which is a
mutex init only) as constructor, then we can save some time on
ordering issue.

> 
> > +static void __attribute__((constructor)) mon_fdsets_lock_init(void)
> > +{
> > +qemu_mutex_init(_fdsets_lock);
> > +}
> > +
> >  /**
> >   * Is @mon a QMP monitor?
> >   */
> > @@ -2316,9 +2329,11 @@ static void monitor_fdsets_cleanup(void)
> >  MonFdset *mon_fdset;
> >  MonFdset *mon_fdset_next;
> >  
> > +qemu_mutex_lock(_fdsets_lock);
> >  QLIST_FOREACH_SAFE(mon_fdset, _fdsets, next, mon_fdset_next) {
> >  monitor_fdset_cleanup(mon_fdset);
> >  }
> > +qemu_mutex_unlock(_fdsets_lock);
> >  }
> >  
> >  AddfdInfo *qmp_add_fd(bool has_fdset_id, int64_t fdset_id, bool has_opaque,
> > @@ -2353,6 +2368,7 @@ void qmp_remove_fd(int64_t fdset_id, bool has_fd, 
> > int64_t fd, Error **errp)
> >  MonFdsetFd *mon_fdset_fd;
> >  char fd_str[60];
> >  
> > +qemu_mutex_lock(_fdsets_lock);
> >  QLIST_FOREACH(mon_fdset, _fdsets, next) {
> >  if (mon_fdset->id != fdset_id) {
> >  continue;
> > @@ -2372,10 +2388,12 @@ void qmp_remove_fd(int64_t fdset_id, bool has_fd, 
> > int64_t fd, Error **errp)
> >  goto error;
> >  }
> >  monitor_fdset_cleanup(mon_fdset);
> > +qemu_mutex_unlock(_fdsets_lock);
> >  return;
> >  }
> >  
> >  error:
> > +qemu_mutex_unlock(_fdsets_lock);
> >  if (has_fd) {
> >  snprintf(fd_str, sizeof(fd_str), "fdset-id:%" PRId64 ", fd:%" 
> > PRId64,
> >   fdset_id, fd);
> > @@ -2391,6 +2409,7 @@ FdsetInfoList *qmp_query_fdsets(Error **errp)
> >  MonFdsetFd *mon_fdset_fd;
> >  FdsetInfoList *fdset_list = NULL;
> >  
> > +qemu_mutex_lock(_fdsets_lock);
> >  QLIST_FOREACH(mon_fdset,

Re: [Qemu-devel] [PATCH 1/4] spapr: remove irq_hint parameter from spapr_irq_alloc()

2018-05-28 Thread Cédric Le Goater

On 05/28/2018 08:17 AM, Thomas Huth wrote:
> On 25.05.2018 16:02, Greg Kurz wrote:
>> On Fri, 18 May 2018 18:44:02 +0200
>> Cédric Le Goater  wrote:
>>
>>> This IRQ number hint can possibly be used by the VIO devices if the
>>> "irq" property is defined on the command line but it seems it is never
>>> the case. It is not used in libvirt for instance. So, let's remove it
>>> to simplify future changes.
>>>
>>
>> Setting an irq manually looks a bit anachronistic. I doubt anyone would
>> do that nowadays, and the patch does a nice cleanup. So this looks like
>> a good idea.
> [...]
>>> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
>>> index 472dd6f33a96..cc064f64fccf 100644
>>> --- a/hw/ppc/spapr_vio.c
>>> +++ b/hw/ppc/spapr_vio.c
>>> @@ -455,7 +455,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, 
>>> Error **errp)
>>>  dev->qdev.id = id;
>>>  }
>>>  
>>> -dev->irq = spapr_irq_alloc(spapr, dev->irq, false, _err);
>>> +dev->irq = spapr_irq_alloc(spapr, false, _err);
>>
>> Silently breaking "irq" like this looks wrong. I'd rather officially remove
>> it first (ie, kill spapr_vio_props, -5 lines in spapr_vio.c).
>>
>> Of course, this raises the question of interface deprecation, and it should
>> theoretically follow the process described at:
>>
>> https://wiki.qemu.org/Features/LegacyRemoval#Rules_for_removing_an_interface
>>
>> Cc'ing Thomas, our Chief Deprecation Officer, for insights :)
> 
> The property is a public interface. Just because it's not used by
> libvirt does not mean that nobody is using it. So yes, please follow the
> rules and mark it as deprecated first for two release, before you really
> remove it.

This "irq" property is a problem to introduce a new static layout of IRQ 
numbers. It is in complete opposition. 

Can we keep it as it is for old pseries machine (settable) and ignore it 
for newer ? Would that be fine ?

Thanks,

C.

Re: [Qemu-devel] [Qemu-block] Some question about savem/qcow2 incremental snapshot

2018-05-28 Thread He, Junyan

Hi yang,

Alibaba made this proposal for NVDimm snapshot optimization, 
can you give some advice about this discussion?

Thanks



-Original Message-
From: Stefan Hajnoczi [mailto:stefa...@gmail.com] 
Sent: Monday, May 14, 2018 9:49 PM
To: Kevin Wolf 
Cc: Stefan Hajnoczi ; Pankaj Gupta ; 
He, Junyan ; qemu-devel@nongnu.org; qemu block 
; Max Reitz 
Subject: Re: [Qemu-block] [Qemu-devel] Some question about savem/qcow2 
incremental snapshot

On Fri, May 11, 2018 at 07:25:31PM +0200, Kevin Wolf wrote:
> Am 10.05.2018 um 10:26 hat Stefan Hajnoczi geschrieben:
> > On Wed, May 09, 2018 at 07:54:31PM +0200, Max Reitz wrote:
> > > On 2018-05-09 12:16, Stefan Hajnoczi wrote:
> > > > On Tue, May 08, 2018 at 05:03:09PM +0200, Kevin Wolf wrote:
> > > >> Am 08.05.2018 um 16:41 hat Eric Blake geschrieben:
> > > >>> On 12/25/2017 01:33 AM, He Junyan wrote:
> > > >> 2. Make the nvdimm device use the QEMU block layer so that it is backed
> > > >>by a non-raw disk image (such as a qcow2 file representing the
> > > >>content of the nvdimm) that supports snapshots.
> > > >>
> > > >>This part is hard because it requires some completely new
> > > >>infrastructure such as mapping clusters of the image file to guest
> > > >>pages, and doing cluster allocation (including the copy on write
> > > >>logic) by handling guest page faults.
> > > >>
> > > >> I think it makes sense to invest some effort into such 
> > > >> interfaces, but be prepared for a long journey.
> > > > 
> > > > I like the suggestion but it needs to be followed up with a 
> > > > concrete design that is feasible and fair for Junyan and others to 
> > > > implement.
> > > > Otherwise the "long journey" is really just a way of rejecting 
> > > > this feature.
> > > > 
> > > > Let's discuss the details of using the block layer for NVDIMM 
> > > > and try to come up with a plan.
> > > > 
> > > > The biggest issue with using the block layer is that persistent 
> > > > memory applications use load/store instructions to directly 
> > > > access data.  This is fundamentally different from the block 
> > > > layer, which transfers blocks of data to and from the device.
> > > > 
> > > > Because of block DMA, QEMU is able to perform processing at each 
> > > > block driver graph node.  This doesn't exist for persistent 
> > > > memory because software does not trap I/O.  Therefore the 
> > > > concept of filter nodes doesn't make sense for persistent memory 
> > > > - we certainly do not want to trap every I/O because performance would 
> > > > be terrible.
> > > > 
> > > > Another difference is that persistent memory I/O is synchronous.
> > > > Load/store instructions execute quickly.  Perhaps we could use 
> > > > KVM async page faults in cases where QEMU needs to perform 
> > > > processing, but again the performance would be bad.
> > > 
> > > Let me first say that I have no idea how the interface to NVDIMM looks.
> > > I just assume it works pretty much like normal RAM (so the 
> > > interface is just that it’s a part of the physical address space).
> > > 
> > > Also, it sounds a bit like you are already discarding my idea, but 
> > > here goes anyway.
> > > 
> > > Would it be possible to introduce a buffering block driver that 
> > > presents the guest an area of RAM/NVDIMM through an NVDIMM 
> > > interface (so I suppose as part of the guest address space)?  For 
> > > writing, we’d keep a dirty bitmap on it, and then we’d 
> > > asynchronously move the dirty areas through the block layer, so 
> > > basically like mirror.  On flushing, we’d block until everything is clean.
> > > 
> > > For reading, we’d follow a COR/stream model, basically, where 
> > > everything is unpopulated in the beginning and everything is 
> > > loaded through the block layer both asynchronously all the time 
> > > and on-demand whenever the guest needs something that has not been loaded 
> > > yet.
> > > 
> > > Now I notice that that looks pretty much like a backing file model 
> > > where we constantly run both a stream and a commit job at the same time.
> > > 
> > > The user could decide how much memory to use for the buffer, so it 
> > > could either hold everything or be partially unallocated.
> > > 
> > > You’d probably want to back the buffer by NVDIMM normally, so that 
> > > nothing is lost on crashes (though this would imply that for 
> > > partial allocation the buffering block driver would need to know 
> > > the mapping between the area in real NVDIMM and its virtual 
> > > representation of it).
> > > 
> > > Just my two cents while scanning through qemu-block to find emails 
> > > that don’t actually concern me...
> > 
> > The guest kernel already implements this - it's the page cache and 
> > the block layer!
> > 
> > Doing it in QEMU with dirty memory logging enabled is less efficient 
> > than doing it in the guest.
> > 
> >

Re: [Qemu-devel] [PATCH v7 4/4] monitor: add lock to protect mon_fdsets

2018-05-28 Thread Peter Xu

On Fri, May 25, 2018 at 10:01:57AM +0100, Stefan Hajnoczi wrote:
> On Fri, May 25, 2018 at 11:30:22AM +0800, Peter Xu wrote:
> > On Thu, May 24, 2018 at 10:28:37AM +0100, Stefan Hajnoczi wrote:
> > > On Thu, May 24, 2018 at 12:39:52PM +0800, Peter Xu wrote:
> > > >  int monitor_fdset_get_fd(int64_t fdset_id, int flags)
> > > >  {
> > > > -#ifndef _WIN32
> > > > +#ifdef _WIN32
> > > > +return -ENOENT;
> > > 
> > > stubs/fdset.c:monitor_fdset_get_fd() should return -ENOENT instead of -1
> > > now.
> > 
> > Yes that's intended.  That's actually a suggestion from Markus since
> > changing the return code will simplify the code.
> 
> No, I understand that part.  I'm pointing out that
> stubs/fdset.c:monitor_fdset_get_fd() still returns -1 because it was not
> updated by this patch.
> 
> Since this patch changes the return value to -errno, the stub function
> should be updated to return a sensible -errno value too.

Sorry I overlooked.  You are right, I'll touch that up.

Regards,

-- 
Peter Xu

Re: [Qemu-devel] [PATCH v7 3/4] monitor: more comments on lock-free fleids/funcs

2018-05-28 Thread Peter Xu

On Thu, May 24, 2018 at 01:16:11PM +0200, Markus Armbruster wrote:
> Peter Xu  writes:
> 
> > On Thu, May 24, 2018 at 10:41:09AM +0200, Markus Armbruster wrote:
> >> Regarding the subject: what are "fleids"?
> >
> > Ouch. :( I meant the word "fields".
> 
> Can touch that up in my tree.

[...]

> >> > +/*
> >> > + * State used only in the thread "owning" the monitor.
> >> > + * If @use_io_thr, this is mon_global.mon_iothread.
> >> > + * Else, it's the main thread.
> >> > + * These members can be safely accessed without locks.
> >> > + */
> >> >  ReadLineState *rs;
> >> > +// other members that aren't shared
> >> 
> >> Whoops, misunderstanding!  I meant this line as a placeholder, to
> >> further illustrate my intent.  It should not be committed.  If we need a
> >> comment here, it should use /* traditional comment syntax */.
> >
> > My fault.
> >
> > Please let me know if you want me to repost...  Thanks,
> 
> Can touch that up, too.  No respin needed unless something more complex
> comes up.

I'll modify these two parts in my next post.

Regards,

-- 
Peter Xu

Re: [Qemu-devel] [PATCH 1/4] spapr: remove irq_hint parameter from spapr_irq_alloc()

2018-05-28 Thread Thomas Huth

On 25.05.2018 16:02, Greg Kurz wrote:
> On Fri, 18 May 2018 18:44:02 +0200
> Cédric Le Goater  wrote:
> 
>> This IRQ number hint can possibly be used by the VIO devices if the
>> "irq" property is defined on the command line but it seems it is never
>> the case. It is not used in libvirt for instance. So, let's remove it
>> to simplify future changes.
>>
> 
> Setting an irq manually looks a bit anachronistic. I doubt anyone would
> do that nowadays, and the patch does a nice cleanup. So this looks like
> a good idea.
[...]
>> diff --git a/hw/ppc/spapr_vio.c b/hw/ppc/spapr_vio.c
>> index 472dd6f33a96..cc064f64fccf 100644
>> --- a/hw/ppc/spapr_vio.c
>> +++ b/hw/ppc/spapr_vio.c
>> @@ -455,7 +455,7 @@ static void spapr_vio_busdev_realize(DeviceState *qdev, 
>> Error **errp)
>>  dev->qdev.id = id;
>>  }
>>  
>> -dev->irq = spapr_irq_alloc(spapr, dev->irq, false, _err);
>> +dev->irq = spapr_irq_alloc(spapr, false, _err);
> 
> Silently breaking "irq" like this looks wrong. I'd rather officially remove
> it first (ie, kill spapr_vio_props, -5 lines in spapr_vio.c).
> 
> Of course, this raises the question of interface deprecation, and it should
> theoretically follow the process described at:
> 
> https://wiki.qemu.org/Features/LegacyRemoval#Rules_for_removing_an_interface
> 
> Cc'ing Thomas, our Chief Deprecation Officer, for insights :)

The property is a public interface. Just because it's not used by
libvirt does not mean that nobody is using it. So yes, please follow the
rules and mark it as deprecated first for two release, before you really
remove it.

 Thomas

[Qemu-devel] [RFC PATCH] hw/misc/unimp: Add create_unimplemented_subregion_device()

2018-05-28 Thread Philippe Mathieu-Daudé

The create_unimplemented_device() function is very useful to
register unimplemented devices to the SysBus 'absolute' address.

Some devices are modelled as container of memory subregions,
the subregions being mmio-mapped relatively to the container
base address.

With these container devices, the current create_unimplemented_device()
does not work.
Add a function to remove the current limitation, and allow
containerized devices to use the helpful UnimplementedDevice.

Signed-off-by: Philippe Mathieu-Daudé 
---
I don't like the function name much, I first tried with
create_unimplemented_device_mr() :( Would it be cleaner renaming current
create_unimplemented_device() -> create_unimplemented_sysbus_device()?

Also, is this useful to have this code inlined?
---
 include/hw/misc/unimp.h | 42 +++--
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/include/hw/misc/unimp.h b/include/hw/misc/unimp.h
index 2a291ca42d..7ae1ace885 100644
--- a/include/hw/misc/unimp.h
+++ b/include/hw/misc/unimp.h
@@ -23,9 +23,12 @@ typedef struct {
 } UnimplementedDeviceState;
 
 /**
- * create_unimplemented_device: create and map a dummy device
+ * create_unimplemented_subregion_device: create and map a dummy device
+ *
+ * @mr: the #MemoryRegion to contain the new device.
  * @name: name of the device for debug logging
- * @base: base address of the device's MMIO region
+ * @addr: base address of the device's MMIO region, or
+ *offset relative to @mr where the device is added.
  * @size: size of the device's MMIO region
  *
  * This utility function creates and maps an instance of unimplemented-device,
@@ -35,9 +38,10 @@ typedef struct {
  * use it to cover a large region and then map other devices on top of it
  * if necessary.
  */
-static inline void create_unimplemented_device(const char *name,
-   hwaddr base,
-   hwaddr size)
+static inline void create_unimplemented_subregion_device(MemoryRegion *mr,
+ const char *name,
+ hwaddr addr,
+ hwaddr size)
 {
 DeviceState *dev = qdev_create(NULL, TYPE_UNIMPLEMENTED_DEVICE);
 
@@ -45,7 +49,33 @@ static inline void create_unimplemented_device(const char 
*name,
 qdev_prop_set_uint64(dev, "size", size);
 qdev_init_nofail(dev);
 
-sysbus_mmio_map_overlap(SYS_BUS_DEVICE(dev), 0, base, -1000);
+if (mr) {
+MemoryRegion *submr = sysbus_mmio_get_region(SYS_BUS_DEVICE(dev), 0);
+memory_region_add_subregion_overlap(mr, addr, submr, -1000);
+} else {
+sysbus_mmio_map_overlap(SYS_BUS_DEVICE(dev), 0, addr, -1000);
+}
+}
+
+/**
+ * create_unimplemented_device: create and map a dummy SysBus device
+ *
+ * @name: name of the device for debug logging
+ * @base: base address of the device's MMIO region
+ * @size: size of the device's MMIO region
+ *
+ * This utility function creates and maps an instance of unimplemented-device,
+ * which is a dummy device which simply logs all guest accesses to
+ * it via the qemu_log LOG_UNIMP debug log.
+ * The device is mapped at priority -1000, which means that you can
+ * use it to cover a large region and then map other devices on top of it
+ * if necessary.
+ */
+static inline void create_unimplemented_device(const char *name,
+   hwaddr base,
+   hwaddr size)
+{
+create_unimplemented_subregion_device(NULL, name, base, size);
 }
 
 #endif
-- 
2.17.0

< 1 2

101 - 164 of 164 matches

Mail list logo