date:20181114

Re: [Qemu-devel] [PATCH] simple firmware.json test tool

2018-11-14 Thread Gerd Hoffmann

  Hi,

> > Perhaps print a few shell commands first? Such as:
> > 
> >(
> >  VARSTORE=$(mktemp)
> >  trap 'rm -f -- "$VARSTORE"' EXIT
> >  cat -- '[VARSTORE_TEMPLATE]' >> "$VARSTORE"
> >  qemu ...
> >)
> > 
> > It really does take separate actions, just like when you create a new
> > disk image with "qemu-img".

> Any plan to respin this patch?

-ENOTIME[1].  Also no really good idea how to handle varstore.

I'd like to have something simple, so it is easy run qemu with the
correct arguments.  Just ...

  qemu-system-$arch $(qemu-firmware /path/to/firmware.json)

... would be cool, but varstore handling isn't going to fly that way.

Maybe we really have to print some shell commands and set some specific
variables, so one can do something like this:

  eval $(qemu-firmware /path/to/firmware.json)
  qemu-system-${QEMU_FIRMWARE_ARCH} ${QEMU_FIRMWARE_ARGS}

cheers,
  Gerd

[1] feel free to pick it up.

Re: [Qemu-devel] [PATCH 5/6] accel/tcg: Return -1 for execution from MMIO regions in get_page_addr_code()

2018-11-14 Thread Richard Henderson

On 11/14/18 6:19 PM, Thomas Huth wrote:
> Program received signal SIGSEGV, Segmentation fault.
> [...]
> (gdb) bt
> #0  0x55addc68 in onenand_read (opaque=0x57600600, addr=98304, 
> size=4) at hw/block/onenand.c:612

So the crash is an off-by-one on the line above:

--- a/hw/block/onenand.c
+++ b/hw/block/onenand.c
@@ -608,7 +608,7 @@ static uint64_t onenand_read(void *opaque, hwaddr addr,
 int offset = addr >> s->shift;

 switch (offset) {
-case 0x ... 0xc000:
+case 0x ... 0xbfff:
 return lduw_le_p(s->boot[0] + addr);

 case 0xf000:   /* Manufacturer ID */

as the memory segment has size 0xc000.

The guest will now eventually crash with

onenand_read: unknown OneNAND register c000
...
onenand_read: unknown OneNAND register fefe
qemu: hardware error: onenand_read: implement ECC

CPU #0:
R00= R01= R02= R03=
R04= R05= R06= R07=
R08= R09= R10= R11=
R12= R13= R14= R15=0001fe04
PSR=41d3 -Z-- A svc32
s00= s01= d00=
s02= s03= d01=
s04= s05= d02=
s06= s07= d03=
s08= s09= d04=
s10= s11= d05=
s12= s13= d06=
s14= s15= d07=
s16= s17= d08=
s18= s19= d09=
s20= s21= d10=
s22= s23= d11=
s24= s25= d12=
s26= s27= d13=
s28= s29= d14=
s30= s31= d15=
FPSCR: 
Aborted (core dumped)

I'll note that fprintf at the end of onenand_read should be
qemu_log(LOG_GUEST_ERROR) instead.


r~

[Qemu-devel] [PATCH] 9p: take write lock on fid path updates

2018-11-14 Thread Greg Kurz

Recent commit 5b76ef50f62079a fixed a race where v9fs_co_open2() could
possibly overwrite a fid path with v9fs_path_copy() while it is being
accessed by some other thread, ie, use-after-free that can be detected
by ASAN with a custom 9p client.

It turns out that the same can happen at several locations where
v9fs_path_copy() is used to set the fid path. The fix is again to
take the write lock.

Cc: P J P 
Reported-by: zhibin hu 
Signed-off-by: Greg Kurz 
---
 hw/9pfs/9p.c |   15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
index eef289e394d4..267a25533b77 100644
--- a/hw/9pfs/9p.c
+++ b/hw/9pfs/9p.c
@@ -1391,7 +1391,9 @@ static void coroutine_fn v9fs_walk(void *opaque)
 err = -EINVAL;
 goto out;
 }
+v9fs_path_write_lock(s);
 v9fs_path_copy(>path, );
+v9fs_path_unlock(s);
 } else {
 newfidp = alloc_fid(s, newfid);
 if (newfidp == NULL) {
@@ -2160,6 +2162,7 @@ static void coroutine_fn v9fs_create(void *opaque)
 V9fsString extension;
 int iounit;
 V9fsPDU *pdu = opaque;
+V9fsState *s = pdu->s;
 
 v9fs_path_init();
 v9fs_string_init();
@@ -2200,7 +2203,9 @@ static void coroutine_fn v9fs_create(void *opaque)
 if (err < 0) {
 goto out;
 }
+v9fs_path_write_lock(s);
 v9fs_path_copy(>path, );
+v9fs_path_unlock(s);
 err = v9fs_co_opendir(pdu, fidp);
 if (err < 0) {
 goto out;
@@ -2216,7 +2221,9 @@ static void coroutine_fn v9fs_create(void *opaque)
 if (err < 0) {
 goto out;
 }
+v9fs_path_write_lock(s);
 v9fs_path_copy(>path, );
+v9fs_path_unlock(s);
 } else if (perm & P9_STAT_MODE_LINK) {
 int32_t ofid = atoi(extension.data);
 V9fsFidState *ofidp = get_fid(pdu, ofid);
@@ -2234,7 +2241,9 @@ static void coroutine_fn v9fs_create(void *opaque)
 fidp->fid_type = P9_FID_NONE;
 goto out;
 }
+v9fs_path_write_lock(s);
 v9fs_path_copy(>path, );
+v9fs_path_unlock(s);
 err = v9fs_co_lstat(pdu, >path, );
 if (err < 0) {
 fidp->fid_type = P9_FID_NONE;
@@ -2272,7 +2281,9 @@ static void coroutine_fn v9fs_create(void *opaque)
 if (err < 0) {
 goto out;
 }
+v9fs_path_write_lock(s);
 v9fs_path_copy(>path, );
+v9fs_path_unlock(s);
 } else if (perm & P9_STAT_MODE_NAMED_PIPE) {
 err = v9fs_co_mknod(pdu, fidp, , fidp->uid, -1,
 0, S_IFIFO | (perm & 0777), );
@@ -2283,7 +2294,9 @@ static void coroutine_fn v9fs_create(void *opaque)
 if (err < 0) {
 goto out;
 }
+v9fs_path_write_lock(s);
 v9fs_path_copy(>path, );
+v9fs_path_unlock(s);
 } else if (perm & P9_STAT_MODE_SOCKET) {
 err = v9fs_co_mknod(pdu, fidp, , fidp->uid, -1,
 0, S_IFSOCK | (perm & 0777), );
@@ -2294,7 +2307,9 @@ static void coroutine_fn v9fs_create(void *opaque)
 if (err < 0) {
 goto out;
 }
+v9fs_path_write_lock(s);
 v9fs_path_copy(>path, );
+v9fs_path_unlock(s);
 } else {
 err = v9fs_co_open2(pdu, fidp, , -1,
 omode_to_uflags(mode)|O_CREAT, perm, );

[Qemu-devel] [PATCH] target: hax: replace g_malloc with g_malloc0

2018-11-14 Thread Li Qiang

And also the g_malloc doesn't need check return value,
remove it.

Cc: qemu-triv...@nongnu.org

Signed-off-by: Li Qiang 
---
 target/i386/hax-all.c | 15 +++
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c
index d2e512856b..a460c605af 100644
--- a/target/i386/hax-all.c
+++ b/target/i386/hax-all.c
@@ -154,13 +154,7 @@ int hax_vcpu_create(int id)
 return 0;
 }
 
-vcpu = g_malloc(sizeof(struct hax_vcpu_state));
-if (!vcpu) {
-fprintf(stderr, "Failed to alloc vcpu state\n");
-return -ENOMEM;
-}
-
-memset(vcpu, 0, sizeof(struct hax_vcpu_state));
+vcpu = g_malloc0(sizeof(struct hax_vcpu_state));
 
 ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 if (ret) {
@@ -250,11 +244,8 @@ struct hax_vm *hax_vm_create(struct hax_state *hax)
 return hax->vm;
 }
 
-vm = g_malloc(sizeof(struct hax_vm));
-if (!vm) {
-return NULL;
-}
-memset(vm, 0, sizeof(struct hax_vm));
+vm = g_malloc0(sizeof(struct hax_vm));
+
 ret = hax_host_create_vm(hax, _id);
 if (ret) {
 fprintf(stderr, "Failed to create vm %x\n", ret);
-- 
2.11.0

Re: [Qemu-devel] [RFC for-3.2 PATCH 0/7] pcie: Enhanced link speed and width support

2018-11-14 Thread geoff--- via Qemu-devel

I can confirm that these patches work as expected. Thank you kindly Alex 
for your hard work!


Tested-by: Geoffrey McRae 

On 2018-11-15 07:50, Alex Williamson wrote:

QEMU exposes gen1 PCI-express interconnect devices supporting only
2.5GT/s and x1 width.  It might not seem obvious that a virtual
bandwidth limitation can result in a real performance degradation, but
it's been reported that in some configurations assigned GPUs might not
scale their link speed up to the maximum supported value if the
downstream port above it only advertises limited link support.

As proposed[1] this series effectively implements virtual link
negotiation on downstream ports and enhances the generic PCIe root
port to allow user configurable speeds and widths.  The "negotiation"
simply mirrors the link status of the connected downstream device
providing the appearance of dynamic link speed scaling to match the
endpoint device.  Not yet implemented from the proposal is support
for globally updating defaults based on machine type, though the
foundation is provided here by allowing supporting PCIESlots to
implement an instance_init callback which can call into a common
helper for this.

I have not specifically tested migration with this, but we already
consider LNKSTA to be dynamic and the other changes implemented here
are static config space changes with no changes being implemented for
devices using default values, ie. they should be compatible by virtue
of existing config space migration support.

I think I've covered the required link related registers to support
PCIe 4.0, but please let me know if I've missed any.

Testing and feedback appreciated, patch 6/7 provides example qemu:arg
options and requirements to use with existing libvirt.  Native libvirt
support TBD.  Thanks,

Alex

[1] https://lists.gnu.org/archive/html/qemu-devel/2018-10/msg03086.html

---

Alex Williamson (7):
  pcie: Create enums for link speed and width
  pci: Sync PCIe downstream port LNKSTA on read
  qapi: Define PCIe link speed and width properties
  pcie: Add link speed and width fields to PCIESlot
  pcie: Fill PCIESlot link fields to support higher speeds and 
widths
  pcie: Allow generic PCIe root port to specify link speed and 
width

  vfio/pci: Remove PCIe Link Status emulation


 hw/core/qdev-properties.c  |  178 


 hw/pci-bridge/gen_pcie_root_port.c |2
 hw/pci-bridge/pcie_root_port.c |   14 +++
 hw/pci/pci.c   |4 +
 hw/pci/pcie.c  |  118 +++-
 hw/vfio/pci.c  |9 --
 include/hw/pci/pci.h   |   13 +++
 include/hw/pci/pcie.h  |1
 include/hw/pci/pcie_port.h |4 +
 include/hw/pci/pcie_regs.h |   23 -
 include/hw/qdev-properties.h   |8 ++
 qapi/common.json   |   42 
 12 files changed, 404 insertions(+), 12 deletions(-)

[Qemu-devel] [PATCH qemu] configure/fdt: Use more strict test for libfdt version

2018-11-14 Thread Alexey Kardashevskiy

The libfdt installed in the system is preferred to the dtc submodule by
default. The recent libfdt update added a new symbol - fdt_check_full -
and this breaks compile if there is an older libfdt installed in
the system.

This changes the test to force ./configure into using newer libfdt.

Signed-off-by: Alexey Kardashevskiy 
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index 74e313a..7e16a6c 100755
--- a/configure
+++ b/configure
@@ -3826,7 +3826,7 @@ if test "$fdt" != "no" ; then
   cat > $TMPC << EOF
 #include 
 #include 
-int main(void) { fdt_first_subnode(0, 0); return 0; }
+int main(void) { fdt_check_full(NULL, 0); return 0; }
 EOF
   if compile_prog "" "$fdt_libs" ; then
 # system DTC is good - use it
-- 
2.17.1

Re: [Qemu-devel] [PATCH] migration/migration.c: Add COLO dependency checks

2018-11-14 Thread Peter Xu

On Thu, Nov 15, 2018 at 03:09:12AM +0800, Zhang Chen wrote:
> From: Zhang Chen 
> 
> Current COLO mode(independent disk mode) need replication module work
> together. Suggested by Dr. David Alan Gilbert .
> 
> Signed-off-by: Zhang Chen 

Reviewed-by: Peter Xu 

Regards,

-- 
Peter Xu

Re: [Qemu-devel] [PATCH V2] migration/colo.c: Fix compilation issue when disable replication

2018-11-14 Thread Peter Xu

On Thu, Nov 15, 2018 at 03:16:25AM +0800, Zhang Chen wrote:
> On Wed, Nov 14, 2018 at 7:17 PM Peter Maydell 
> wrote:
> 
> > On 14 November 2018 at 11:06, Thomas Huth  wrote:
> > > On 2018-11-14 11:47, Peter Xu wrote:
> > >> On Thu, Nov 01, 2018 at 10:12:26AM +0800, Zhang Chen wrote:
> > >>> This compilation issue will occur when user use --disable-replication
> > >>> to config Qemu.
> > >>>
> > >>> Reported-by: Thomas Huth 
> > >>> Signed-off-by: Zhang Chen 
> > >>
> > >> Hi,
> > >>
> > >> How's the status of this patch?  Are we gonna merge it for 3.1?
> > >>
> > >> I just posted a similar one without knowing this (until Dave pointed
> > >> it out).  IMHO it can be a good candidate for 3.1.
> > >
> > > Maybe Peter Maydell could apply this directly to the repo as a built fix?
> >
> > I'd rather it just went through the migration tree, really...
> > it isn't actually breaking any of our travis jobs or other CI.
> >
> >
> Hi Dave,
> 
> What do you think about peter's comments?

AFAIK Juan has queued the v2 of the patch into the next migration
pull, so it'll reach 3.1 soon if nothing goes wrong.  Thanks,

-- 
Peter Xu

[Qemu-devel] [QEMU-PPC] [PATCH] target/ppc: tcg: Implement addex instruction

2018-11-14 Thread Suraj Jitindar Singh

Implement the addex instruction introduced in ISA V3.00 in qemu tcg.

The add extended using alternate carry bit (addex) instruction performs
the same operation as the add extended (adde) instruction, but using the
overflow (ov) field in the fixed point exception register (xer) as the
carry in and out instead of the carry (ca) field.

The instruction has a Z23-form, not an XO form, as follows:

--
|   31   |   RT   |   RA   |   RB   |   CY   | 170 |  0  |
--
0611   16   21   233132

However since the only valid form of the instruction defined so far is
CY = 0, we can treat this like an XO form instruction.

There is no dot form (addex.) of the instruction and the summary overflow
(so) bit in the xer is not modified by this instruction.

For simplicity we reuse the gen_op_arith_add function and add a function
argument to specify where the carry in input should come from and the
carry out output be stored (note must be the same location).

Signed-off-by: Suraj Jitindar Singh 
---
 disas/ppc.c|  2 ++
 target/ppc/translate.c | 60 +++---
 2 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/disas/ppc.c b/disas/ppc.c
index 5ab9c35a84..da1140ba2b 100644
--- a/disas/ppc.c
+++ b/disas/ppc.c
@@ -3734,6 +3734,8 @@ const struct powerpc_opcode powerpc_opcodes[] = {
 { "addmeo.", XO(31,234,1,1), XORB_MASK, PPCCOM,{ RT, RA } },
 { "ameo.",   XO(31,234,1,1), XORB_MASK, PWRCOM,{ RT, RA } },
 
+{ "addex",   XO(31,170,0,0), XO_MASK,   POWER9, { RT, RA, RB } },
+
 { "mullw",   XO(31,235,0,0), XO_MASK,  PPCCOM, { RT, RA, RB } },
 { "muls",XO(31,235,0,0), XO_MASK,  PWRCOM, { RT, RA, RB } },
 { "mullw.",  XO(31,235,0,1), XO_MASK,  PPCCOM, { RT, RA, RB } },
diff --git a/target/ppc/translate.c b/target/ppc/translate.c
index 2b37910248..96894ab9a8 100644
--- a/target/ppc/translate.c
+++ b/target/ppc/translate.c
@@ -849,7 +849,7 @@ static inline void gen_op_arith_compute_ov(DisasContext 
*ctx, TCGv arg0,
 
 static inline void gen_op_arith_compute_ca32(DisasContext *ctx,
  TCGv res, TCGv arg0, TCGv arg1,
- int sub)
+ TCGv ca32, int sub)
 {
 TCGv t0;
 
@@ -864,13 +864,14 @@ static inline void gen_op_arith_compute_ca32(DisasContext 
*ctx,
 tcg_gen_xor_tl(t0, arg0, arg1);
 }
 tcg_gen_xor_tl(t0, t0, res);
-tcg_gen_extract_tl(cpu_ca32, t0, 32, 1);
+tcg_gen_extract_tl(ca32, t0, 32, 1);
 tcg_temp_free(t0);
 }
 
 /* Common add function */
 static inline void gen_op_arith_add(DisasContext *ctx, TCGv ret, TCGv arg1,
-TCGv arg2, bool add_ca, bool compute_ca,
+TCGv arg2, TCGv ca, TCGv ca32,
+bool add_ca, bool compute_ca,
 bool compute_ov, bool compute_rc0)
 {
 TCGv t0 = ret;
@@ -888,29 +889,29 @@ static inline void gen_op_arith_add(DisasContext *ctx, 
TCGv ret, TCGv arg1,
 tcg_gen_xor_tl(t1, arg1, arg2);/* add without carry */
 tcg_gen_add_tl(t0, arg1, arg2);
 if (add_ca) {
-tcg_gen_add_tl(t0, t0, cpu_ca);
+tcg_gen_add_tl(t0, t0, ca);
 }
-tcg_gen_xor_tl(cpu_ca, t0, t1);/* bits changed w/ carry */
+tcg_gen_xor_tl(ca, t0, t1);/* bits changed w/ carry */
 tcg_temp_free(t1);
-tcg_gen_extract_tl(cpu_ca, cpu_ca, 32, 1);
+tcg_gen_extract_tl(ca, ca, 32, 1);
 if (is_isa300(ctx)) {
-tcg_gen_mov_tl(cpu_ca32, cpu_ca);
+tcg_gen_mov_tl(ca32, ca);
 }
 } else {
 TCGv zero = tcg_const_tl(0);
 if (add_ca) {
-tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, cpu_ca, zero);
-tcg_gen_add2_tl(t0, cpu_ca, t0, cpu_ca, arg2, zero);
+tcg_gen_add2_tl(t0, ca, arg1, zero, ca, zero);
+tcg_gen_add2_tl(t0, ca, t0, ca, arg2, zero);
 } else {
-tcg_gen_add2_tl(t0, cpu_ca, arg1, zero, arg2, zero);
+tcg_gen_add2_tl(t0, ca, arg1, zero, arg2, zero);
 }
-gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, 0);
+gen_op_arith_compute_ca32(ctx, t0, arg1, arg2, ca32, 0);
 tcg_temp_free(zero);
 }
 } else {
 tcg_gen_add_tl(t0, arg1, arg2);
 if (add_ca) {
-tcg_gen_add_tl(t0, t0, cpu_ca);
+tcg_gen_add_tl(t0, t0, ca);
 }
 }
 
@@ -927,40 +928,44 @@ static inline void gen_op_arith_add(DisasContext *ctx, 
TCGv ret,

Re: [Qemu-devel] [PATCH] nvme: fix oob access issue(CVE-2018-16847)

2018-11-14 Thread Li Qiang

Paolo Bonzini  于2018年11月14日周三 下午11:44写道：

> On 14/11/2018 02:38, Li Qiang wrote:
> >
> >
> > Paolo Bonzini mailto:pbonz...@redhat.com>> 于2018
> > 年11月14日周三 上午2:27写道：
> >
> > On 13/11/2018 11:17, Kevin Wolf wrote:
> > > Am 13.11.2018 um 02:45 hat Li Qiang geschrieben:
> > >> Ping what't the status of this patch.
> > >>
> > >> I see Kevin's new pr doesn't contain this patch.
> > >
> > > Oh, I thought you said that you wanted to fix this at a higher
> > level so
> > > that the problem is caught before even getting into nvme code? If
> you
> > > don't, I can apply the patch for my next pull request.
> >
> > As far as I know the bug doesn't exist.  Li Qiang, if you have a
> > reproducer please send it.
> >
> >
> > Hello Paolo,
> > Though I've send the debug information and ASAN output in the mail to
> > secal...@redhat.com , I'm glad provide here.
> > This is for read, I think the write the same but as the PoC is in
> > userspace, the mmap can only map the exact size of the MMIO,
> > So we can only write within the area. But if we using a module we can
> > write the out of MMIO I think
> > The nvme device's parameter should set as 'cmb_size_mb=2' and the PCI
> > address may differ in your system.
>
> Ok, thanks.  I've created a reproducer using qtest (though I have to run
> now and cannot post it properly).
>
> The patch for the fix is simply:
>
>
So do you send this or me?



> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index fc7dacb816..6385033af3 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -1192,7 +1192,7 @@ static const MemoryRegionOps nvme_cmb_ops = {
>  .write = nvme_cmb_write,
>  .endianness = DEVICE_LITTLE_ENDIAN,
>  .impl = {
> -.min_access_size = 2,
> +.min_access_size = 1,
>  .max_access_size = 8,
>  },
>  };
>
>

> The memory subsystem _is_ recognizing the out-of-bounds 32-bit access,
>

Thanks, this strengthen my understanding of memory subsystem.

Thanks,
Li Qiang


> but because min_access_size=2 it sends down a write at offset 2097151
> and size 2.


> Paolo
>

Re: [Qemu-devel] [PATCH v11 01/31] block: Use bdrv_refresh_filename() to pull

2018-11-14 Thread Eric Blake


On 10/5/18 6:39 PM, Max Reitz wrote:

Before this patch, bdrv_refresh_filename() is used in a pushing manner:
Whenever the BDS graph is modified, the parents of the modified edges
are supposed to be updated (recursively upwards).  However, that is
nonviable, considering that we want child changes not to concern
parents.




  include/block/block.h |  1 -
  block.c   | 31 +++
  block/qapi.c  |  4 
  block/raw-format.c|  1 +
  block/replication.c   |  2 --
  block/vhdx-log.c  |  1 +
  block/vmdk.c  |  6 ++
  block/vpc.c   |  4 +++-
  blockdev.c|  8 
  9 files changed, 38 insertions(+), 20 deletions(-)




+++ b/block/vpc.c
@@ -284,9 +284,11 @@ static int vpc_open(BlockDriverState *bs, QDict *options, 
int flags,
  
  checksum = be32_to_cpu(footer->checksum);

  footer->checksum = 0;
-if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
+if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum) {
+bdrv_refresh_filename(bs);
  fprintf(stderr, "block-vpc: The header checksum of '%s' is "
  "incorrect.\n", bs->filename);
+}


In the intervening month, this now conflicts. I'm guessing we'll see a 
v12 targeted for 4.0, since your other series on "block: Deal with 
filters" depends on this one?


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PATCH v11 07/31] iotests.py: Add node_info()

2018-11-14 Thread Eric Blake


On 10/5/18 6:39 PM, Max Reitz wrote:

This function queries a node; since we cannot do that right now, it
executes query-named-block-nodes and returns the matching node's object.

Signed-off-by: Max Reitz 
---
  tests/qemu-iotests/iotests.py | 7 +++
  1 file changed, 7 insertions(+)



Reviewed-by: Eric Blake 

--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] [PULL V2 24/26] net: ignore packet size greater than INT_MAX

2018-11-14 Thread Jason Wang




On 2018/11/15 上午12:23, Dima Stepanov wrote:

On Wed, Nov 14, 2018 at 10:59:32AM +0800, Jason Wang wrote:

On 2018/11/13 下午11:41, Dima Stepanov wrote:

Hi Jason,

I know that this patch has been already merged to stable, but i have a
question:

On Fri, Oct 19, 2018 at 11:22:23AM +0800, Jason Wang wrote:

There should not be a reason for passing a packet size greater than
INT_MAX. It's usually a hint of bug somewhere, so ignore packet size
greater than INT_MAX in qemu_deliver_packet_iov()

CC:qemu-sta...@nongnu.org
Reported-by: Daniel Shapira
Reviewed-by: Michael S. Tsirkin
Signed-off-by: Jason Wang
---
  net/net.c | 7 ++-
  1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/net.c b/net/net.c
index c66847e..07c194a 100644
--- a/net/net.c
+++ b/net/net.c
@@ -712,10 +712,15 @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
  void *opaque)
  {
  NetClientState *nc = opaque;
+size_t size = iov_size(iov, iovcnt);
  int ret;
+if (size > INT_MAX) {
+return size;

Is it okay that the function returns ssize_t (signed), but the type of the
size variable is size_t (unsigned)? For now the top level routine checks
the return value only for 0, but anyway we can return negative value
here instead of positive. What do you think?

Regards, Dima.


Any non zero value should be ok here. Actually I think because of the
conversion from size_t to ssize_t, caller actually see negative value?

I believe it depends. If long (ssize_t and size_t type) is 8 bytes, then
the routine can sometimes return positive values and sometimes negative.
I fully agree that in the current case any non zero value should be
okay. I just wanted to point on the inconsistency in types and as a
result a return value.



I see, want to post a patch for this?

Thanks



Dima.

Thanks

Re: [Qemu-devel] [PATCH] RFC: net/socket: learn to talk with a unix dgram socket

2018-11-14 Thread Jason Wang




On 2018/11/14 下午9:01, Marc-André Lureau wrote:

Hi

On Wed, Nov 14, 2018 at 7:46 AM Jason Wang  wrote:


On 2018/11/10 上午3:56, Marc-André Lureau wrote:

-net socket has a fd argument, and may be passed pre-opened sockets.

TCP sockets use framing.
UDP sockets have datagram boundaries.

When given a unix dgram socket, it will be able to read from it, but
will attempt to send on the dgram_dst, which is unset. The other end
will not receive the data.

Let's teach -net socket to recognize a UNIX DGRAM socket, and use the
regular send() command (without dgram_dst).

This makes running slirp out-of-process possible that
way (python pseudo-code):

a, b = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM)

subprocess.Popen('qemu -net socket,fd=%d -net user' % a.fileno(), shell=True)
subprocess.Popen('qemu ... -net nic -net socket,fd=%d' % b.fileno(), shell=True)

(to make slirp a seperate project altogether, we would have to have
some compatibility code and/or deprecate various options & HMP
commands for dynamic port forwarding etc - but this looks like a
reachable goal)

Signed-off-by: Marc-André Lureau 


I believe instead of supporting unnamed sockets, we should also support
named one through cli?

This could be a later patch, I have no need for it yet. Perhaps it
should be a chardev then?



I mean something like: -socket id=ud0,path=/tmp/XXX





---
   net/socket.c | 25 +
   1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/net/socket.c b/net/socket.c
index 7095eb749f..8a9c30892d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -119,9 +119,13 @@ static ssize_t net_socket_receive_dgram(NetClientState 
*nc, const uint8_t *buf,
   ssize_t ret;

   do {
-ret = qemu_sendto(s->fd, buf, size, 0,
-  (struct sockaddr *)>dgram_dst,
-  sizeof(s->dgram_dst));
+if (s->dgram_dst.sin_family != AF_UNIX) {
+ret = qemu_sendto(s->fd, buf, size, 0,
+  (struct sockaddr *)>dgram_dst,
+  sizeof(s->dgram_dst));
+} else {
+ret = send(s->fd, buf, size, 0);
+}


Any reason that send is a must here? send(2) said:
 call

 send(sockfd, buf, len, flags);

 is equivalent to

 sendto(sockfd, buf, len, flags, NULL, 0);

Yes they should be equivalent, but then we need to add ?: operators
for the dest arguments. I preferred to have an if() instead.

thanks



One possible issue here is I'm not sure there's a equivalent send() in 
e.g non POSIX system.


Thanks





   } while (ret == -1 && errno == EINTR);

   if (ret == -1 && errno == EAGAIN) {
@@ -322,6 +326,15 @@ static NetSocketState 
*net_socket_fd_init_dgram(NetClientState *peer,
   int newfd;
   NetClientState *nc;
   NetSocketState *s;
+SocketAddress *sa;
+SocketAddressType sa_type;
+
+sa = socket_local_address(fd, errp);
+if (!sa) {
+return NULL;
+}
+sa_type = sa->type;
+qapi_free_SocketAddress(sa);

   /* fd passed: multicast: "learn" dgram_dst address from bound address 
and save it
* Because this may be "shared" socket from a "master" process, 
datagrams would be recv()
@@ -365,8 +378,12 @@ static NetSocketState 
*net_socket_fd_init_dgram(NetClientState *peer,
"socket: fd=%d (cloned mcast=%s:%d)",
fd, inet_ntoa(saddr.sin_addr), ntohs(saddr.sin_port));
   } else {
+if (sa_type == SOCKET_ADDRESS_TYPE_UNIX) {
+s->dgram_dst.sin_family = AF_UNIX;
+}
+
   snprintf(nc->info_str, sizeof(nc->info_str),
- "socket: fd=%d", fd);
+ "socket: fd=%d %s", fd, SocketAddressType_str(sa_type));
   }

   return s;

Re: [Qemu-devel] [RFC for-3.2 PATCH 0/7] pcie: Enhanced link speed and width support

2018-11-14 Thread no-reply

Hi,

This series seems to have some coding style problems. See output below for
more information:

Message-id: 154222737752.9288.484557356059052047.st...@gimli.home
Type: series
Subject: [Qemu-devel] [RFC for-3.2 PATCH 0/7] pcie: Enhanced link speed and 
width support

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
a592f52 vfio/pci: Remove PCIe Link Status emulation
ee466ea pcie: Allow generic PCIe root port to specify link speed and width
7d242bd pcie: Fill PCIESlot link fields to support higher speeds and widths
24a8568 pcie: Add link speed and width fields to PCIESlot
826c404 qapi: Define PCIe link speed and width properties
dd67d2e pci: Sync PCIe downstream port LNKSTA on read
9aea030 pcie: Create enums for link speed and width

=== OUTPUT BEGIN ===
Checking PATCH 1/7: pcie: Create enums for link speed and width...
Checking PATCH 2/7: pci: Sync PCIe downstream port LNKSTA on read...
ERROR: code indent should never use tabs
#41: FILE: hw/pci/pci.c:1358:
+^Ipcie_sync_bridge_lnk(d);$

total: 1 errors, 0 warnings, 81 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 3/7: qapi: Define PCIe link speed and width properties...
Checking PATCH 4/7: pcie: Add link speed and width fields to PCIESlot...
Checking PATCH 5/7: pcie: Fill PCIESlot link fields to support higher speeds 
and widths...
Checking PATCH 6/7: pcie: Allow generic PCIe root port to specify link speed 
and width...
Checking PATCH 7/7: vfio/pci: Remove PCIe Link Status emulation...
=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

[Qemu-devel] [PATCH v2 10/13] file-posix: Audit for read/write 64-bit cleanness

2018-11-14 Thread Eric Blake

Any use of aio is inherently limited by size_t aio_nbytes in struct
aiocb.  read() is similarly limited to size_t bytes, although in
practice, the ssize_t return means any read attempt on a 32-bit
platform for more than 2G will likely return a short read (if that
much memory was even available to begin with).  And while preadv()
can technically read more than size_t bytes by use of more than one
iov, the fact that you can only pass a finite number of iov each of
which is limited to size_t bytes is a limiting factor.

While we already attempt other methods at populating a more reasonable
max_transfer limit in the cases where the kernel makes that
information available, it is important that we at least let the block
layer know about our hard limitation of size_t bytes (mainly
applicable to 32-bit compilation).  At the same time, on 64-bit
platforms, that means we are now advertising that we don't have any
other unintended size-botching problems, if the block layer were to
start handing us requests larger than 2G.

Signed-off-by: Eric Blake 
---
 block/file-posix.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/block/file-posix.c b/block/file-posix.c
index 48ad3bb372a..4b43ff8cb5c 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1073,6 +1073,9 @@ static void raw_refresh_limits(BlockDriverState *bs, 
Error **errp)
 raw_probe_alignment(bs, s->fd, errp);
 bs->bl.min_mem_alignment = s->buf_align;
 bs->bl.opt_mem_alignment = MAX(s->buf_align, getpagesize());
+if (!bs->bl.max_transfer) {
+bs->bl.max_transfer = SIZE_MAX;
+}
 }

 static int check_for_dasd(int fd)
-- 
2.17.2

[Qemu-devel] [PATCH v2 12/13] block: Document need for audit of read/write 64-bit cleanness

2018-11-14 Thread Eric Blake

At this time, any block driver that has not been audited for
64-bit cleanness, but which uses byte-based callbacks, should
explicitly document that the driver wants the block layer to
cap things at 2G.  This patch has no semantic change.  And it
shows that the things I'm not interested in auditing are:
bochs, cloop, dmg, file-win32, qcow, rbd, vvfat, vxhs

Signed-off-by: Eric Blake 
---
 block/bochs.c  | 1 +
 block/cloop.c  | 1 +
 block/dmg.c| 1 +
 block/file-win32.c | 2 ++
 block/qcow.c   | 1 +
 block/rbd.c| 1 +
 block/vvfat.c  | 1 +
 block/vxhs.c   | 1 +
 8 files changed, 9 insertions(+)

diff --git a/block/bochs.c b/block/bochs.c
index 22e7d442113..cfa449ffb6f 100644
--- a/block/bochs.c
+++ b/block/bochs.c
@@ -202,6 +202,7 @@ fail:
 static void bochs_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+bs->bl.max_transfer = BDRV_REQUEST_MAX_BYTES;
 }

 static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
diff --git a/block/cloop.c b/block/cloop.c
index df2b85f7234..1ac0ed234d8 100644
--- a/block/cloop.c
+++ b/block/cloop.c
@@ -211,6 +211,7 @@ fail:
 static void cloop_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+bs->bl.max_transfer = BDRV_REQUEST_MAX_BYTES;
 }

 static inline int cloop_read_block(BlockDriverState *bs, int block_num)
diff --git a/block/dmg.c b/block/dmg.c
index 50e91aef6d9..ea5553b6bf2 100644
--- a/block/dmg.c
+++ b/block/dmg.c
@@ -555,6 +555,7 @@ fail:
 static void dmg_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+bs->bl.max_transfer = BDRV_REQUEST_MAX_BYTES;
 }

 static inline int is_sector_in_chunk(BDRVDMGState* s,
diff --git a/block/file-win32.c b/block/file-win32.c
index f1e2187f3bd..91b2dd9ed88 100644
--- a/block/file-win32.c
+++ b/block/file-win32.c
@@ -256,6 +256,7 @@ static void raw_probe_alignment(BlockDriverState *bs, Error 
**errp)

 /* XXX Does Windows support AIO on less than 512-byte alignment? */
 bs->bl.request_alignment = 512;
+bs->bl.max_transfer = BDRV_REQUEST_MAX_BYTES;
 }

 static void raw_parse_flags(int flags, bool use_aio, int *access_flags,
@@ -716,6 +717,7 @@ static void hdev_refresh_limits(BlockDriverState *bs, Error 
**errp)
 {
 /* XXX Does Windows support AIO on less than 512-byte alignment? */
 bs->bl.request_alignment = 512;
+bs->bl.max_transfer = BDRV_REQUEST_MAX_BYTES;
 }

 static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
diff --git a/block/qcow.c b/block/qcow.c
index 4518cb4c35e..ae6deacdb1f 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -618,6 +618,7 @@ static void qcow_refresh_limits(BlockDriverState *bs, Error 
**errp)
  * it's easier to let the block layer handle rounding than to
  * audit this code further. */
 bs->bl.request_alignment = BDRV_SECTOR_SIZE;
+bs->bl.max_transfer = BDRV_REQUEST_MAX_BYTES;
 }

 static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, uint64_t offset,
diff --git a/block/rbd.c b/block/rbd.c
index 8a1a9f4b6e2..248e635b077 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -236,6 +236,7 @@ static void qemu_rbd_refresh_limits(BlockDriverState *bs, 
Error **errp)
 {
 /* XXX Does RBD support AIO on less than 512-byte alignment? */
 bs->bl.request_alignment = 512;
+bs->bl.max_transfer = BDRV_REQUEST_MAX_BYTES;
 }


diff --git a/block/vvfat.c b/block/vvfat.c
index 915a05ae4f2..35f35328baf 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1304,6 +1304,7 @@ fail:
 static void vvfat_refresh_limits(BlockDriverState *bs, Error **errp)
 {
 bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
+bs->bl.max_transfer = BDRV_REQUEST_MAX_BYTES;
 }

 static inline void vvfat_close_current_file(BDRVVVFATState *s)
diff --git a/block/vxhs.c b/block/vxhs.c
index 0cb0a007e90..5828f53432c 100644
--- a/block/vxhs.c
+++ b/block/vxhs.c
@@ -221,6 +221,7 @@ static void vxhs_refresh_limits(BlockDriverState *bs, Error 
**errp)
 {
 /* XXX Does VXHS support AIO on less than 512-byte alignment? */
 bs->bl.request_alignment = 512;
+bs->bl.max_transfer = BDRV_REQUEST_MAX_BYTES;
 }

 static int vxhs_init_and_ref(void)
-- 
2.17.2

[Qemu-devel] [PATCH v2 05/13] block: Switch to 64-bit bl.max_transfer

2018-11-14 Thread Eric Blake

This change has no semantic impact: all drivers either leave the
value at 0 (no inherent 32-bit limit is still translated into
fragmentation below 2G; see the previous patch for that audit), or
set it to a value less than 2G.  However, switching to a larger
type and enforcing the 2G cap at the block layer makes it easier
to audit specific drivers for their robustness to larger sizing,
by letting them specify a value larger than INT_MAX if they have
been audited to be 64-bit clean.

Signed-off-by: Eric Blake 
---
 include/block/block_int.h | 8 +---
 block/io.c| 7 +++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index f605622216d..b19d94dac5f 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -549,9 +549,11 @@ typedef struct BlockLimits {

 /* Maximal transfer length in bytes.  Need not be power of 2, but
  * must be multiple of opt_transfer and bl.request_alignment, or 0
- * for no 32-bit limit.  For now, anything larger than INT_MAX is
- * clamped down. */
-uint32_t max_transfer;
+ * for no 32-bit limit.  The block layer fragments all actions
+ * below 2G, so setting this value to anything larger than INT_MAX
+ * implies that the driver has been audited for 64-bit
+ * cleanness. */
+uint64_t max_transfer;

 /* memory alignment, in bytes so that no bounce buffer is needed */
 size_t min_mem_alignment;
diff --git a/block/io.c b/block/io.c
index 39d4e7a3ae1..4911a1123eb 100644
--- a/block/io.c
+++ b/block/io.c
@@ -159,6 +159,13 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error 
**errp)
 if (drv->bdrv_refresh_limits) {
 drv->bdrv_refresh_limits(bs, errp);
 }
+
+/* Clamp max_transfer to 2G */
+if (bs->bl.max_transfer > UINT32_MAX) {
+bs->bl.max_transfer = QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
+  MAX(bs->bl.opt_transfer,
+  bs->bl.request_alignment));
+}
 }

 /**
-- 
2.17.2

[Qemu-devel] [PATCH v2 09/13] RFC: crypto: Rely on block layer for fragmentation

2018-11-14 Thread Eric Blake

No need to reimplement fragmentation to BLOCK_CRYPTO_MAX_IO_SIZE
ourselves when we can ask the block layer to do it for us.

Signed-off-by: Eric Blake 

---
Question - is this patch for 'crypto' acceptable, or should we stick
with just the previous one that marks things as 64-bit clean?
---
 block/crypto.c | 80 +++---
 1 file changed, 30 insertions(+), 50 deletions(-)

diff --git a/block/crypto.c b/block/crypto.c
index 259ef2649e1..b004cef22c2 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -328,8 +328,6 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
QEMUIOVector *qiov, int flags)
 {
 BlockCrypto *crypto = bs->opaque;
-uint64_t cur_bytes; /* number of bytes in current iteration */
-uint64_t bytes_done = 0;
 uint8_t *cipher_data = NULL;
 QEMUIOVector hd_qiov;
 int ret = 0;
@@ -346,38 +344,30 @@ block_crypto_co_preadv(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 /* Bounce buffer because we don't wish to expose cipher text
  * in qiov which points to guest memory.
  */
-cipher_data =
-qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE,
-  qiov->size));
+assert(qiov->size <= BLOCK_CRYPTO_MAX_IO_SIZE);
+cipher_data = qemu_try_blockalign(bs->file->bs, qiov->size);
 if (cipher_data == NULL) {
 ret = -ENOMEM;
 goto cleanup;
 }

-while (bytes) {
-cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE);
+qemu_iovec_reset(_qiov);
+qemu_iovec_add(_qiov, cipher_data, bytes);

-qemu_iovec_reset(_qiov);
-qemu_iovec_add(_qiov, cipher_data, cur_bytes);
-
-ret = bdrv_co_preadv(bs->file, payload_offset + offset + bytes_done,
- cur_bytes, _qiov, 0);
-if (ret < 0) {
-goto cleanup;
-}
-
-if (qcrypto_block_decrypt(crypto->block, offset + bytes_done,
-  cipher_data, cur_bytes, NULL) < 0) {
-ret = -EIO;
-goto cleanup;
-}
-
-qemu_iovec_from_buf(qiov, bytes_done, cipher_data, cur_bytes);
+ret = bdrv_co_preadv(bs->file, payload_offset + offset, bytes,
+ _qiov, 0);
+if (ret < 0) {
+goto cleanup;
+}

-bytes -= cur_bytes;
-bytes_done += cur_bytes;
+if (qcrypto_block_decrypt(crypto->block, offset,
+  cipher_data, bytes, NULL) < 0) {
+ret = -EIO;
+goto cleanup;
 }

+qemu_iovec_from_buf(qiov, 0, cipher_data, bytes);
+
  cleanup:
 qemu_iovec_destroy(_qiov);
 qemu_vfree(cipher_data);
@@ -391,8 +381,6 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 QEMUIOVector *qiov, int flags)
 {
 BlockCrypto *crypto = bs->opaque;
-uint64_t cur_bytes; /* number of bytes in current iteration */
-uint64_t bytes_done = 0;
 uint8_t *cipher_data = NULL;
 QEMUIOVector hd_qiov;
 int ret = 0;
@@ -409,36 +397,28 @@ block_crypto_co_pwritev(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes,
 /* Bounce buffer because we're not permitted to touch
  * contents of qiov - it points to guest memory.
  */
-cipher_data =
-qemu_try_blockalign(bs->file->bs, MIN(BLOCK_CRYPTO_MAX_IO_SIZE,
-  qiov->size));
+assert(qiov->size <= BLOCK_CRYPTO_MAX_IO_SIZE);
+cipher_data = qemu_try_blockalign(bs->file->bs, qiov->size);
 if (cipher_data == NULL) {
 ret = -ENOMEM;
 goto cleanup;
 }

-while (bytes) {
-cur_bytes = MIN(bytes, BLOCK_CRYPTO_MAX_IO_SIZE);
+qemu_iovec_to_buf(qiov, 0, cipher_data, bytes);

-qemu_iovec_to_buf(qiov, bytes_done, cipher_data, cur_bytes);
+if (qcrypto_block_encrypt(crypto->block, offset,
+  cipher_data, bytes, NULL) < 0) {
+ret = -EIO;
+goto cleanup;
+}

-if (qcrypto_block_encrypt(crypto->block, offset + bytes_done,
-  cipher_data, cur_bytes, NULL) < 0) {
-ret = -EIO;
-goto cleanup;
-}
+qemu_iovec_reset(_qiov);
+qemu_iovec_add(_qiov, cipher_data, bytes);

-qemu_iovec_reset(_qiov);
-qemu_iovec_add(_qiov, cipher_data, cur_bytes);
-
-ret = bdrv_co_pwritev(bs->file, payload_offset + offset + bytes_done,
-  cur_bytes, _qiov, flags);
-if (ret < 0) {
-goto cleanup;
-}
-
-bytes -= cur_bytes;
-bytes_done += cur_bytes;
+ret = bdrv_co_pwritev(bs->file, payload_offset + offset,
+  bytes, _qiov, flags);
+if (ret < 0) {
+goto cleanup;
 }

  cleanup:
@@ -453,7 +433,7 @@ static void block_crypto_refresh_limits(BlockDriverState 
*bs, Error **errp)
 BlockCrypto

[Qemu-devel] [PATCH v2 04/13] block: Removed unused sector-based blocking I/O

2018-11-14 Thread Eric Blake

We are gradually moving away from sector-based interfaces, towards
byte-based.  Now that all callers of blocking I/O have been converted
to use our preferred byte-based bdrv_p{read,write}(), we can
delete the unused bdrv_{read,write}().

Note that the old byte-based code checked that callers passed values
for transactions that did not exceed 2G limits (to prevent scaling
the caller's input by the sector size from exceeding a signed 32-bit
length). We can double-check that the block layer does the same thing
for byte-based code, by noting that even though the newer signatures
allow a 64-bit length parameter, the block layer is actually
fragmenting things to honor both bl.max_transfer (0 if the driver
has no inherent limit, or has not been audited for 64-bit cleanness;
non-zero if the driver wants to enforce a particular cap) as well as
some sanity sizing, as follows:

- bdrv_merge_limits(): Uses the smaller cap of either driver (or leaves things
uncapped if both drivers enforce no cap)
- bdrv_co_do_copy_on_readv(): Caps things to 2G if driver did not request cap
- bdrv_aligned_preadv(): Caps things to 2G if driver did not request cap
- bdrv_co_do_pwrite_zeroes(): Caps things to MAX_BOUNCE_BUFFER if driver
did not request cap
- bdrv_aligned_pwritev(): Caps things to 2G if driver did not request cap

Signed-off-by: Eric Blake 
Reviewed-by: Stefan Hajnoczi 

---
v2: Enhance commit message with audit results
---
 include/block/block.h |  4 
 block/io.c| 48 ---
 2 files changed, 8 insertions(+), 44 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index 7f5453b45b6..52b18373807 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -307,10 +307,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
 BlockReopenQueue *queue, Error **errp);
 void bdrv_reopen_commit(BDRVReopenState *reopen_state);
 void bdrv_reopen_abort(BDRVReopenState *reopen_state);
-int bdrv_read(BdrvChild *child, int64_t sector_num,
-  uint8_t *buf, int nb_sectors);
-int bdrv_write(BdrvChild *child, int64_t sector_num,
-   const uint8_t *buf, int nb_sectors);
 int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags);
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
diff --git a/block/io.c b/block/io.c
index bd9d688f8ba..39d4e7a3ae1 100644
--- a/block/io.c
+++ b/block/io.c
@@ -836,45 +836,6 @@ static int bdrv_prwv_co(BdrvChild *child, int64_t offset,
 return rwco.ret;
 }

-/*
- * Process a synchronous request using coroutines
- */
-static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
-  int nb_sectors, bool is_write, BdrvRequestFlags flags)
-{
-QEMUIOVector qiov;
-struct iovec iov = {
-.iov_base = (void *)buf,
-.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
-};
-
-if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-return -EINVAL;
-}
-
-qemu_iovec_init_external(, , 1);
-return bdrv_prwv_co(child, sector_num << BDRV_SECTOR_BITS,
-, is_write, flags);
-}
-
-/* return < 0 if error. See bdrv_write() for the return codes */
-int bdrv_read(BdrvChild *child, int64_t sector_num,
-  uint8_t *buf, int nb_sectors)
-{
-return bdrv_rw_co(child, sector_num, buf, nb_sectors, false, 0);
-}
-
-/* Return < 0 if error. Important errors are:
-  -EIO generic I/O error (may happen for all errors)
-  -ENOMEDIUM   No media inserted.
-  -EINVAL  Invalid sector number or nb_sectors
-  -EACCES  Trying to write a read-only device
-*/
-int bdrv_write(BdrvChild *child, int64_t sector_num,
-   const uint8_t *buf, int nb_sectors)
-{
-return bdrv_rw_co(child, sector_num, (uint8_t *)buf, nb_sectors, true, 0);
-}

 int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags)
@@ -897,7 +858,7 @@ int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
  * flags are passed through to bdrv_pwrite_zeroes (e.g. BDRV_REQ_MAY_UNMAP,
  * BDRV_REQ_FUA).
  *
- * Returns < 0 on error, 0 on success. For error codes see bdrv_write().
+ * Returns < 0 on error, 0 on success. For error codes see bdrv_pwrite().
  */
 int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
 {
@@ -935,6 +896,7 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
 }
 }

+/* return < 0 if error. See bdrv_pwrite() for the return codes */
 int bdrv_preadv(BdrvChild *child, int64_t offset, QEMUIOVector *qiov)
 {
 int ret;
@@ -975,6 +937,12 @@ int bdrv_pwritev(BdrvChild *child, int64_t offset, 
QEMUIOVector *qiov)
 return qiov->size;
 }

+/* Return < 0 if error. Important errors are:
+  -EIO generic I/O error (may happen for all errors)
+  -ENOMEDIUM   No media inserted.
+  -EINVAL  Invalid sector number or nb_sectors
+  -EACCES  Trying

[Qemu-devel] [PATCH v2 13/13] block: Enforce non-zero bl.max_transfer

2018-11-14 Thread Eric Blake

The raw format driver and the filter drivers default to picking
up the same limits as what they wrap, and I've audited that they
are otherwise simple enough in their passthrough to be 64-bit
clean; it's not worth changing their .bdrv_refresh_limits to
advertise anything different.  Previous patches changed all
remaining byte-based format and protocol drivers to supply an
explicit max_transfer consistent with an audit (or lack thereof)
of their code.  And for drivers still using sector-based
callbacks (gluster, iscsi, parallels, qed, replication, sheepdog,
ssh, vhdx), we can easily supply a 2G default limit while waiting
for a later per-driver conversion and auditing while moving to
byte-based callbacks.

With that, we can assert that max_transfer is now always set (either
by sector-based default, by merge with a backing layer, or by
explicit settings), and enforce that it be non-zero by the end
of bdrv_refresh_limits().  This in turn lets us simplify portions
of the block layer to use MIN() instead of MIN_NON_ZERO(), while
still fragmenting things to no larger than 2G chunks.  Also, we no
longer need to rewrite the driver's bl.max_transfer to a value below
2G.  There should be no semantic change from this patch, although
it does open the door if a future patch wants to let the block layer
choose a larger value than 2G while still honoring bl.max_transfer
for fragmentation.

Signed-off-by: Eric Blake 
---

[hmm - in sending this email, I realize that I didn't specifically
check whether quorum always picks up a sane limit from its children,
since it is byte-based but lacks a .bdrv_refresh_limits]

 include/block/block_int.h | 10 +-
 block/io.c| 25 -
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/include/block/block_int.h b/include/block/block_int.h
index b19d94dac5f..410553bb0cc 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -548,11 +548,11 @@ typedef struct BlockLimits {
 uint32_t opt_transfer;

 /* Maximal transfer length in bytes.  Need not be power of 2, but
- * must be multiple of opt_transfer and bl.request_alignment, or 0
- * for no 32-bit limit.  The block layer fragments all actions
- * below 2G, so setting this value to anything larger than INT_MAX
- * implies that the driver has been audited for 64-bit
- * cleanness. */
+ * must be larger than opt_transfer and request_alignment (the
+ * block layer rounds it down as needed).  The block layer
+ * fragments all actions below 2G, so setting this value to
+ * anything larger than INT_MAX implies that the driver has been
+ * audited for 64-bit cleanness. */
 uint64_t max_transfer;

 /* memory alignment, in bytes so that no bounce buffer is needed */
diff --git a/block/io.c b/block/io.c
index 4911a1123eb..0024be0bf31 100644
--- a/block/io.c
+++ b/block/io.c
@@ -129,6 +129,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
 /* Default alignment based on whether driver has byte interface */
 bs->bl.request_alignment = (drv->bdrv_co_preadv ||
 drv->bdrv_aio_preadv) ? 1 : 512;
+/* Default cap at 2G for drivers without byte interface */
+if (bs->bl.request_alignment == 1)
+bs->bl.max_transfer = BDRV_REQUEST_MAX_BYTES;

 /* Take some limits from the children as a default */
 if (bs->file) {
@@ -160,12 +163,11 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error 
**errp)
 drv->bdrv_refresh_limits(bs, errp);
 }

-/* Clamp max_transfer to 2G */
-if (bs->bl.max_transfer > UINT32_MAX) {
-bs->bl.max_transfer = QEMU_ALIGN_DOWN(BDRV_REQUEST_MAX_BYTES,
-  MAX(bs->bl.opt_transfer,
-  bs->bl.request_alignment));
-}
+/* Check that we got a maximum cap, and round it down as needed */
+assert(bs->bl.max_transfer);
+bs->bl.max_transfer = QEMU_ALIGN_DOWN(bs->bl.max_transfer,
+  MAX(bs->bl.opt_transfer,
+  bs->bl.request_alignment));
 }

 /**
@@ -1145,8 +1147,7 @@ static int coroutine_fn 
bdrv_co_do_copy_on_readv(BdrvChild *child,
 int64_t cluster_bytes;
 size_t skip_bytes;
 int ret;
-int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
-BDRV_REQUEST_MAX_BYTES);
+int max_transfer = MIN(bs->bl.max_transfer, BDRV_REQUEST_MAX_BYTES);
 unsigned int progress = 0;

 if (!drv) {
@@ -1286,8 +1287,7 @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild 
*child,
 assert((bytes & (align - 1)) == 0);
 assert(!qiov || bytes == qiov->size);
 assert((bs->open_flags & BDRV_O_NO_IO) == 0);
-max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
-   align);
+max_transfer = QEMU_ALIGN_DOWN(MIN(bs->bl.max_transfer,

[Qemu-devel] [PATCH v2 11/13] qcow2: Audit for read/write 64-bit cleanness

2018-11-14 Thread Eric Blake

The qcow2 read/write functions do their own fragmentation (because
of cluster remapping); while we could advertise s->cluster_size
and let the block layer do fragmentation for us, that would NOT
solve the issue of the block layer handing us a length less than
a cluster but at an offset which overlaps a cluster boundary. Thus,
we still have to fragment ourselves, at which point it is easiest
to just document that this driver is 64-bit clean.

Signed-off-by: Eric Blake 
---
 block/qcow2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/qcow2.c b/block/qcow2.c
index 0b5ad130060..1dd3491f77f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1687,6 +1687,7 @@ static void qcow2_refresh_limits(BlockDriverState *bs, 
Error **errp)
 }
 bs->bl.pwrite_zeroes_alignment = s->cluster_size;
 bs->bl.pdiscard_alignment = s->cluster_size;
+bs->bl.max_transfer = INT64_MAX;
 }

 static int qcow2_reopen_prepare(BDRVReopenState *state,
-- 
2.17.2

[Qemu-devel] [PATCH v2 06/13] blkdebug: Audit for read/write 64-bit cleanness

2018-11-14 Thread Eric Blake

Since the block layer is never supposed to hand us an offset + bytes
that would exceed off_t, we can assert this in rule_check(). With
that in place, there is nothing else in the pread, pwrite, or
pwrite_zeroes code paths that can't handle inputs larger than 2G
(even if the block layer currently never hands us something
that large); update the refresh_limits callback to document this
fact, when the user doesn't specify an override.

For a user override, we have to change the QAPI type to 'uint64'
instead of 'int'. At the same time, we can also change 'align'
to 'int32' to match the existing checks in blkdebug_open() that
alignment is always smaller than 2G.

Signed-off-by: Eric Blake 
---
 qapi/block-core.json |  2 +-
 block/blkdebug.c | 17 +
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/qapi/block-core.json b/qapi/block-core.json
index d4fe710836e..32f0edd189f 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3122,7 +3122,7 @@
 { 'struct': 'BlockdevOptionsBlkdebug',
   'data': { 'image': 'BlockdevRef',
 '*config': 'str',
-'*align': 'int', '*max-transfer': 'int32',
+'*align': 'int32', '*max-transfer': 'uint64',
 '*opt-write-zero': 'int32', '*max-write-zero': 'int32',
 '*opt-discard': 'int32', '*max-discard': 'int32',
 '*inject-error': ['BlkdebugInjectErrorOptions'],
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 0759452925b..be4d65f86a0 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -415,9 +415,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict 
*options, int flags,
 align = MAX(s->align, bs->file->bs->bl.request_alignment);

 s->max_transfer = qemu_opt_get_size(opts, "max-transfer", 0);
-if (s->max_transfer &&
-(s->max_transfer >= INT_MAX ||
- !QEMU_IS_ALIGNED(s->max_transfer, align))) {
+if (s->max_transfer && !QEMU_IS_ALIGNED(s->max_transfer, align)) {
 error_setg(errp, "Cannot meet constraints with max-transfer %" PRIu64,
s->max_transfer);
 goto out;
@@ -477,6 +475,7 @@ static int rule_check(BlockDriverState *bs, uint64_t 
offset, uint64_t bytes)
 int error;
 bool immediately;

+assert(offset <= INT64_MAX - bytes);
 QSIMPLEQ_FOREACH(rule, >active_rules, active_next) {
 uint64_t inject_offset = rule->options.inject.offset;

@@ -517,9 +516,7 @@ blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, 
uint64_t bytes,
 /* Sanity check block layer guarantees */
 assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
 assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
-if (bs->bl.max_transfer) {
-assert(bytes <= bs->bl.max_transfer);
-}
+assert(bytes <= bs->bl.max_transfer);

 err = rule_check(bs, offset, bytes);
 if (err) {
@@ -538,9 +535,7 @@ blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, 
uint64_t bytes,
 /* Sanity check block layer guarantees */
 assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
 assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
-if (bs->bl.max_transfer) {
-assert(bytes <= bs->bl.max_transfer);
-}
+assert(bytes <= bs->bl.max_transfer);

 err = rule_check(bs, offset, bytes);
 if (err) {
@@ -865,9 +860,7 @@ static void blkdebug_refresh_limits(BlockDriverState *bs, 
Error **errp)
 if (s->align) {
 bs->bl.request_alignment = s->align;
 }
-if (s->max_transfer) {
-bs->bl.max_transfer = s->max_transfer;
-}
+bs->bl.max_transfer = s->max_transfer ?: INT64_MAX;
 if (s->opt_write_zero) {
 bs->bl.pwrite_zeroes_alignment = s->opt_write_zero;
 }
-- 
2.17.2

[Qemu-devel] [PATCH v2 08/13] crypto: Audit for read/write 64-bit cleanness

2018-11-14 Thread Eric Blake

The crypto read/write functions do their own fragmentation (because
everything has to go through a bounce buffer); while we could
advertise BLOCK_CRYPTO_MAX_IO_SIZE as our max_transfer (and let the
block layer do our fragmentation for us), I'm instead choosing to
document that this driver is 64-bit clean.

Signed-off-by: Eric Blake 
---
 block/crypto.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/crypto.c b/block/crypto.c
index 33ee01bebd9..259ef2649e1 100644
--- a/block/crypto.c
+++ b/block/crypto.c
@@ -453,6 +453,7 @@ static void block_crypto_refresh_limits(BlockDriverState 
*bs, Error **errp)
 BlockCrypto *crypto = bs->opaque;
 uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
 bs->bl.request_alignment = sector_size; /* No sub-sector I/O */
+bs->bl.max_transfer = INT64_MAX;
 }


-- 
2.17.2

[Qemu-devel] [PATCH v2 03/13] vvfat: Switch to byte-based calls

2018-11-14 Thread Eric Blake

We are gradually moving away from sector-based interfaces, towards
byte-based.  Make the change for the last few sector-based calls
into the block layer from the vvfat driver.

Ideally, the vvfat driver should switch to doing everything
byte-based, but that's a more invasive change that requires a
bit more auditing.

Signed-off-by: Eric Blake 
Reviewed-by: Alberto Garcia 
---
 block/vvfat.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/block/vvfat.c b/block/vvfat.c
index b7b61ea8b78..915a05ae4f2 100644
--- a/block/vvfat.c
+++ b/block/vvfat.c
@@ -1494,8 +1494,8 @@ static int vvfat_read(BlockDriverState *bs, int64_t 
sector_num,
 DLOG(fprintf(stderr, "sectors %" PRId64 "+%" PRId64
  " allocated\n", sector_num,
  n >> BDRV_SECTOR_BITS));
-if (bdrv_read(s->qcow, sector_num, buf + i * 0x200,
-  n >> BDRV_SECTOR_BITS)) {
+if (bdrv_pread(s->qcow, sector_num * BDRV_SECTOR_BITS,
+   buf + i * 0x200, n) < 0) {
 return -1;
 }
 i += (n >> BDRV_SECTOR_BITS) - 1;
@@ -1983,7 +1983,8 @@ static uint32_t 
get_cluster_count_for_direntry(BDRVVVFATState* s,
 if (res) {
 return -1;
 }
-res = bdrv_write(s->qcow, offset, s->cluster_buffer, 
1);
+res = bdrv_pwrite(s->qcow, offset * BDRV_SECTOR_SIZE,
+  s->cluster_buffer, BDRV_SECTOR_SIZE);
 if (res) {
 return -2;
 }
@@ -2146,7 +2147,7 @@ DLOG(checkpoint());
  * - get modified FAT
  * - compare the two FATs (TODO)
  * - get buffer for marking used clusters
- * - recurse direntries from root (using bs->bdrv_read to make
+ * - recurse direntries from root (using bs->bdrv_pread to make
  *sure to get the new data)
  *   - check that the FAT agrees with the size
  *   - count the number of clusters occupied by this directory and
@@ -2911,9 +2912,9 @@ static int handle_deletes(BDRVVVFATState* s)
 /*
  * synchronize mapping with new state:
  *
- * - copy FAT (with bdrv_read)
+ * - copy FAT (with bdrv_pread)
  * - mark all filenames corresponding to mappings as deleted
- * - recurse direntries from root (using bs->bdrv_read)
+ * - recurse direntries from root (using bs->bdrv_pread)
  * - delete files corresponding to mappings marked as deleted
  */
 static int do_commit(BDRVVVFATState* s)
@@ -2933,10 +2934,10 @@ static int do_commit(BDRVVVFATState* s)
 return ret;
 }

-/* copy FAT (with bdrv_read) */
+/* copy FAT (with bdrv_pread) */
 memcpy(s->fat.pointer, s->fat2, 0x200 * s->sectors_per_fat);

-/* recurse direntries from root (using bs->bdrv_read) */
+/* recurse direntries from root (using bs->bdrv_pread) */
 ret = commit_direntries(s, 0, -1);
 if (ret) {
 fprintf(stderr, "Fatal: error while committing (%d)\n", ret);
@@ -3050,7 +3051,8 @@ DLOG(checkpoint());
  * Use qcow backend. Commit later.
  */
 DLOG(fprintf(stderr, "Write to qcow backend: %d + %d\n", (int)sector_num, 
nb_sectors));
-ret = bdrv_write(s->qcow, sector_num, buf, nb_sectors);
+ret = bdrv_pwrite(s->qcow, sector_num * BDRV_SECTOR_SIZE, buf,
+  nb_sectors * BDRV_SECTOR_SIZE);
 if (ret < 0) {
 fprintf(stderr, "Error writing to qcow backend\n");
 return ret;
-- 
2.17.2

[Qemu-devel] [PATCH v2 for-4.0 00/13] block: byte-based blocking read/write

2018-11-14 Thread Eric Blake

Based-on: <20181114210548.1098207-1-ebl...@redhat.com>
[file-posix: Better checks of 64-bit copy_range]
Based-on: <20181101182738.70462-1-vsement...@virtuozzo.com>
[0/7 qcow2 decompress in threads] - more specifically, on Kevin's block-next 
branch

Also available at
https://repo.or.cz/qemu/ericb.git/shortlog/refs/tags/block-byte-blocking-v2

This series is a logical followup to other byte-based cleanups I have
done. The only remaining mention of sectors in public block.h APIs after
this series are in bdrv_nb_sectors() (converting those callser to use
bdrv_getlength() not only requires more work, but would be our
opportunity to see if we can quit rounding block sizes up beyond
request_align sizes for protocols that support sub-sector sizes), and
for computing geometries (where sectors actually make sense).

v1 was: https://lists.gnu.org/archive/html/qemu-devel/2018-04/msg04686.html
Since then:

- fold in a straggler patch that dropped from my v8 qcow2 compression
series for 3.1
- improve commit message of 4/13, to show what auditing I performed [Berto]
- based on that audit, add a lot more patches to change bl.max_transfer
to be guaranteed non-zero and allow > 2G values where safe (note that
the block layer still fragments < 2G, so a larger advertisement doesn't
actually cause large read/write transactions)

001/13:[0005] [FC] 'qcow2: Prefer byte-based calls into bs->file'
002/13:[] [--] 'vdi: Switch to byte-based calls'
003/13:[] [--] 'vvfat: Switch to byte-based calls'
004/13:[] [--] 'block: Removed unused sector-based blocking I/O'
005/13:[down] 'block: Switch to 64-bit bl.max_transfer'
006/13:[down] 'blkdebug: Audit for read/write 64-bit cleanness'
007/13:[down] 'blklogwrites: Audit for read/write 64-bit cleanness'
008/13:[down] 'crypto: Audit for read/write 64-bit cleanness'
009/13:[down] 'RFC: crypto: Rely on block layer for fragmentation'
010/13:[down] 'file-posix: Audit for read/write 64-bit cleanness'
011/13:[down] 'qcow2: Audit for read/write 64-bit cleanness'
012/13:[down] 'block: Document need for audit of read/write 64-bit cleanness'
013/13:[down] 'block: Enforce non-zero bl.max_transfer'

Patch 9/13 is marked RFC; if we like it, I'd rather squash 8 and 9
into a single patch; if we don't like it, it can be dropped without
affecting the rest of the series.

Eric Blake (13):
  qcow2: Prefer byte-based calls into bs->file
  vdi: Switch to byte-based calls
  vvfat: Switch to byte-based calls
  block: Removed unused sector-based blocking I/O
  block: Switch to 64-bit bl.max_transfer
  blkdebug: Audit for read/write 64-bit cleanness
  blklogwrites: Audit for read/write 64-bit cleanness
  crypto: Audit for read/write 64-bit cleanness
  RFC: crypto: Rely on block layer for fragmentation
  file-posix: Audit for read/write 64-bit cleanness
  qcow2: Audit for read/write 64-bit cleanness
  block: Document need for audit of read/write 64-bit cleanness
  block: Enforce non-zero bl.max_transfer

 qapi/block-core.json  |  2 +-
 include/block/block.h |  4 --
 include/block/block_int.h | 10 +++--
 block/io.c| 68 +++--
 block/blkdebug.c  | 17 +++--
 block/blklogwrites.c  |  1 +
 block/bochs.c |  1 +
 block/cloop.c |  1 +
 block/crypto.c| 79 +++
 block/dmg.c   |  1 +
 block/file-posix.c|  3 ++
 block/file-win32.c|  2 +
 block/qcow.c  |  1 +
 block/qcow2-refcount.c|  6 +--
 block/qcow2.c |  1 +
 block/rbd.c   |  1 +
 block/vdi.c   | 14 +++
 block/vvfat.c | 21 ++-
 block/vxhs.c  |  1 +
 19 files changed, 98 insertions(+), 136 deletions(-)

-- 
2.17.2

[Qemu-devel] [PATCH v2 02/13] vdi: Switch to byte-based calls

2018-11-14 Thread Eric Blake

We are gradually moving away from sector-based interfaces, towards
byte-based.  Make the change for the last few sector-based calls
into the block layer from the vdi driver.

Ideally, the vdi driver should switch to doing everything
byte-based, but that's a more invasive change that requires a
bit more auditing.

Signed-off-by: Eric Blake 
Reviewed-by: Alberto Garcia 
---
 block/vdi.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/vdi.c b/block/vdi.c
index 2380daa583e..b5d699f62c2 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -377,7 +377,7 @@ static int vdi_open(BlockDriverState *bs, QDict *options, 
int flags,

 logout("\n");

-ret = bdrv_read(bs->file, 0, (uint8_t *), 1);
+ret = bdrv_pread(bs->file, 0, , sizeof(header));
 if (ret < 0) {
 goto fail;
 }
@@ -467,15 +467,14 @@ static int vdi_open(BlockDriverState *bs, QDict *options, 
int flags,
 s->header = header;

 bmap_size = header.blocks_in_image * sizeof(uint32_t);
-bmap_size = DIV_ROUND_UP(bmap_size, SECTOR_SIZE);
-s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size * SECTOR_SIZE);
+s->bmap = qemu_try_blockalign(bs->file->bs, bmap_size);
 if (s->bmap == NULL) {
 ret = -ENOMEM;
 goto fail;
 }

-ret = bdrv_read(bs->file, s->bmap_sector, (uint8_t *)s->bmap,
-bmap_size);
+ret = bdrv_pread(bs->file, s->bmap_sector * SECTOR_SIZE, s->bmap,
+ bmap_size);
 if (ret < 0) {
 goto fail_free_bmap;
 }
@@ -694,7 +693,7 @@ nonallocating_write:
 assert(VDI_IS_ALLOCATED(bmap_first));
 *header = s->header;
 vdi_header_to_le(header);
-ret = bdrv_write(bs->file, 0, block, 1);
+ret = bdrv_pwrite(bs->file, 0, block, SECTOR_SIZE);
 g_free(block);
 block = NULL;

@@ -712,7 +711,8 @@ nonallocating_write:
 base = ((uint8_t *)>bmap[0]) + bmap_first * SECTOR_SIZE;
 logout("will write %u block map sectors starting from entry %u\n",
n_sectors, bmap_first);
-ret = bdrv_write(bs->file, offset, base, n_sectors);
+ret = bdrv_pwrite(bs->file, offset * SECTOR_SIZE, base,
+  n_sectors * SECTOR_SIZE);
 }

 return ret;
-- 
2.17.2

[Qemu-devel] [PATCH v2 01/13] qcow2: Prefer byte-based calls into bs->file

2018-11-14 Thread Eric Blake

We had only a few sector-based stragglers left; convert them to use
our preferred byte-based accesses.

Signed-off-by: Eric Blake 
Reviewed-by: Alberto Garcia 

---
v2: rebased to threaded decompression handling
[moved from a different series]
v5: commit message tweak
v2: indentation fix
---
 block/qcow2-refcount.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 1c63ac244ac..3ef53c0b2c6 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -2374,8 +2374,8 @@ write_refblocks:
 on_disk_refblock = (void *)((char *) *refcount_table +
 refblock_index * s->cluster_size);

-ret = bdrv_write(bs->file, refblock_offset / BDRV_SECTOR_SIZE,
- on_disk_refblock, s->cluster_sectors);
+ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock,
+  s->cluster_size);
 if (ret < 0) {
 fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
 goto fail;
@@ -2597,7 +2597,7 @@ fail:
  * - 0 if writing to this offset will not affect the mentioned metadata
  * - a positive QCow2MetadataOverlap value indicating one overlapping section
  * - a negative value (-errno) indicating an error while performing a check,
- *   e.g. when bdrv_read failed on QCOW2_OL_INACTIVE_L2
+ *   e.g. when bdrv_pread failed on QCOW2_OL_INACTIVE_L2
  */
 int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
  int64_t size)
-- 
2.17.2

[Qemu-devel] [PATCH v2 07/13] blklogwrites: Audit for read/write 64-bit cleanness

2018-11-14 Thread Eric Blake

Nothing in blk_log_writes_co_do_log() is inherently limited by
a 32-bit type; document this by updating the refresh_limits
callback to document that this driver is 64-bit clean.

Signed-off-by: Eric Blake 
---
 block/blklogwrites.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/block/blklogwrites.c b/block/blklogwrites.c
index ff98cd55333..c945d3d77d7 100644
--- a/block/blklogwrites.c
+++ b/block/blklogwrites.c
@@ -328,6 +328,7 @@ static void blk_log_writes_refresh_limits(BlockDriverState 
*bs, Error **errp)
 {
 BDRVBlkLogWritesState *s = bs->opaque;
 bs->bl.request_alignment = s->sectorsize;
+bs->bl.max_transfer = INT64_MAX;
 }

 static int coroutine_fn
-- 
2.17.2

[Qemu-devel] [PATCH] hax: Support for Linux hosts

2018-11-14 Thread Alexandro Sanchez Bach

Intel HAXM supports now 32-bit and 64-bit Linux hosts. This patch includes
the corresponding userland changes.

Since the Darwin userland backend is POSIX-compliant, the hax-darwin.{c,h}
files have been renamed to hax-posix.{c,h}. This prefix is consistent with
the naming used in the rest of QEMU.

Signed-off-by: Alexandro Sanchez Bach 
---
 target/i386/Makefile.objs | 4 +++-
 target/i386/hax-i386.h| 6 +++---
 target/i386/{hax-darwin.c => hax-posix.c} | 0
 target/i386/{hax-darwin.h => hax-posix.h} | 0
 4 files changed, 6 insertions(+), 4 deletions(-)
 rename target/i386/{hax-darwin.c => hax-posix.c} (100%)
 rename target/i386/{hax-darwin.h => hax-posix.h} (100%)

diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs
index 04678f5503..070e701856 100644
--- a/target/i386/Makefile.objs
+++ b/target/i386/Makefile.objs
@@ -12,8 +12,10 @@ obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o
 ifdef CONFIG_WIN32
 obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-windows.o
 endif
+ifdef CONFIG_POSIX
+obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-posix.o
+endif
 ifdef CONFIG_DARWIN
-obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-darwin.o
 obj-$(CONFIG_HVF) += hvf/
 endif
 obj-$(CONFIG_WHPX) += whpx-all.o
diff --git a/target/i386/hax-i386.h b/target/i386/hax-i386.h
index 6abc156f88..f13fa4638f 100644
--- a/target/i386/hax-i386.h
+++ b/target/i386/hax-i386.h
@@ -16,7 +16,7 @@
 #include "cpu.h"
 #include "sysemu/hax.h"
 
-#ifdef CONFIG_DARWIN
+#ifdef CONFIG_POSIX
 typedef int hax_fd;
 #endif
 
@@ -82,8 +82,8 @@ hax_fd hax_mod_open(void);
 void hax_memory_init(void);
 
 
-#ifdef CONFIG_DARWIN
-#include "target/i386/hax-darwin.h"
+#ifdef CONFIG_POSIX
+#include "target/i386/hax-posix.h"
 #endif
 
 #ifdef CONFIG_WIN32
diff --git a/target/i386/hax-darwin.c b/target/i386/hax-posix.c
similarity index 100%
rename from target/i386/hax-darwin.c
rename to target/i386/hax-posix.c
diff --git a/target/i386/hax-darwin.h b/target/i386/hax-posix.h
similarity index 100%
rename from target/i386/hax-darwin.h
rename to target/i386/hax-posix.h
-- 
2.19.1

[Qemu-devel] [PATCH for-4.0 v2] virtio: Provide version-specific variants of virtio PCI devices

2018-11-14 Thread Eduardo Habkost

Many of the current virtio-*-pci device types actually represent
3 different types of devices:
* virtio 1.0 non-transitional devices
* virtio 1.0 transitional devices
* virtio 0.9 ("legacy device" in virtio 1.0 terminology)

That would be just an annoyance if it didn't break our device/bus
compatibility QMP interfaces.  With this multi-purpose device
type, there's no way to tell management software that
transitional devices and legacy devices require a Conventional
PCI bus.

The multi-purpose device types would also prevent us from telling
management software what's the PCI vendor/device ID for them,
because their PCI IDs change at runtime depending on the bus
where they were plugged.

This patch adds separate device types for each of those virtio
device flavors:

- virtio-*-pci: the existing multi-purpose device types
  - Configurable using `disable-legacy` and `disable-modern`
properties
  - Legacy driver support is automatically enabled/disabled
depending on the bus where it is plugged
  - Supports Conventional PCI and PCI Express buses
(but Conventional PCI is incompatible with
disable-legacy=off)
  - Changes PCI vendor/device IDs at runtime
- virtio-*-pci-transitional: virtio-1.0 device supporting legacy drivers
  - Supports Conventional PCI buses only, because
it has a PIO BAR
- virtio-*-pci-non-transitional: modern-only
  - Supports both Conventional PCI and PCI Express buses

All the types above will inherit from an abstract
"virtio-*-pci-base" type, so existing code that doesn't care
about the virtio version can use "virtio-*-pci-base" on type
casts.

Note that devices that are always non-transitional won't have the
-non-transitional variants added, because it would be redundant.

A simple test script (tests/acceptance/virtio_version.py) is
included, to check if the new device types are equivalent to
using the `disable-legacy` and `disable-modern` options.

Signed-off-by: Eduardo Habkost 
---
Reference to previous discussion that originated this idea:
  https://www.mail-archive.com/qemu-devel@nongnu.org/msg558389.html

Changes v1 -> v2:
* Removed *-0.9 devices.  Nobody will want to use them, if
  transitional devices work with legacy drivers
  (Gerd Hoffmann, Michael S. Tsirkin)
* Drop virtio version from name: rename -1.0-transitional to
  -transitional (Michael S. Tsirkin)
* Renamed -1.0 to -non-transitional
* Don't add any extra variants to modern-only device types
  (they don't need it)
* Fix typo on TYPE_VIRTIO_INPUT_HOST_PCI (crash reported by
  Cornelia Huck)
* No need to change cast macros for modern-only devices
* Rename virtio_register_types() to virtio_pci_types_register()

Note about points discussed in the v1 thread:

Andrea suggested making separate transitional Conventional PCi
and transitional PCI Express device types[1].  I didn't do that
because I don't see which problems this solves.  We have many
other device types that are hybrid (support both PCI Express and
Conventional PCI) and this was never a problem for us[2].

[1] 
http://mid.mail-archive.com/6f8d59b8ee2d92d73d2957b520bd8bac989fc796.camel@redhat.com
[2] http://mid.mail-archive.com/20181017155637.GC31060@habkost.net
---
 hw/virtio/virtio-pci.h |  80 +--
 hw/display/virtio-gpu-pci.c|   8 +-
 hw/display/virtio-vga.c|   8 +-
 hw/virtio/virtio-crypto-pci.c  |   8 +-
 hw/virtio/virtio-pci.c | 215 -
 tests/acceptance/virtio_version.py | 177 
 6 files changed, 406 insertions(+), 90 deletions(-)
 create mode 100644 tests/acceptance/virtio_version.py

diff --git a/hw/virtio/virtio-pci.h b/hw/virtio/virtio-pci.h
index 813082b0d7..1d2a11504f 100644
--- a/hw/virtio/virtio-pci.h
+++ b/hw/virtio/virtio-pci.h
@@ -216,7 +216,8 @@ static inline void virtio_pci_disable_modern(VirtIOPCIProxy 
*proxy)
 /*
  * virtio-scsi-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VIRTIO_SCSI_PCI "virtio-scsi-pci"
+#define TYPE_VIRTIO_SCSI_PCI_PREFIX "virtio-scsi-pci"
+#define TYPE_VIRTIO_SCSI_PCI (TYPE_VIRTIO_SCSI_PCI_PREFIX "-base")
 #define VIRTIO_SCSI_PCI(obj) \
 OBJECT_CHECK(VirtIOSCSIPCI, (obj), TYPE_VIRTIO_SCSI_PCI)
 
@@ -229,7 +230,8 @@ struct VirtIOSCSIPCI {
 /*
  * vhost-scsi-pci: This extends VirtioPCIProxy.
  */
-#define TYPE_VHOST_SCSI_PCI "vhost-scsi-pci"
+#define TYPE_VHOST_SCSI_PCI_PREFIX "vhost-scsi-pci"
+#define TYPE_VHOST_SCSI_PCI (TYPE_VHOST_SCSI_PCI_PREFIX "-base")
 #define VHOST_SCSI_PCI(obj) \
 OBJECT_CHECK(VHostSCSIPCI, (obj), TYPE_VHOST_SCSI_PCI)
 
@@ -239,7 +241,8 @@ struct VHostSCSIPCI {
 };
 #endif
 
-#define TYPE_VHOST_USER_SCSI_PCI "vhost-user-scsi-pci"
+#define TYPE_VHOST_USER_SCSI_PCI_PREFIX "vhost-user-scsi-pci"
+#define TYPE_VHOST_USER_SCSI_PCI (TYPE_VHOST_USER_SCSI_PCI_PREFIX "-base")
 #define VHOST_USER_SCSI_PCI(obj) \
 OBJECT_CHECK(VHostUserSCSIPCI, (obj), TYPE_VHOST_USER_SCSI_PCI)
 
@@ -252,7 +255,8 @@ struct VHostUserSCSIPCI {
 /*
  * vhost-user-blk-pci: This

[Qemu-devel] [PATCH for-3.1?] file-posix: Better checks of 64-bit copy_range

2018-11-14 Thread Eric Blake

file-posix.c was taking a 64-bit bytes in raw_co_copy_range_to(),
passing it through a 32-bit parameter of paio_submit_co_full(),
then widening it back to size_t when assigning into acb->aio_nbytes.

Looking at io.c, I can't quickly tell if bdrv_co_copy_range_internal()
is fragmenting things to honor bs->bl.max_transfer, or if it can
accidentally send requests larger than 2G down to the driver. If
the former, then this is a no-op; if the latter, then someone needs
to find a way to trigger this assertion and patch the block layer
to properly fragment copy_range requests.  Either way, we're better
off with an assertion than the risk of silent data corruption.

Signed-off-by: Eric Blake 
---
 block/file-posix.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 58c86a01eaa..48ad3bb372a 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -1821,7 +1821,7 @@ static int aio_worker(void *arg)
 static int paio_submit_co_full(BlockDriverState *bs, int fd,
int64_t offset, int fd2, int64_t offset2,
QEMUIOVector *qiov,
-   int bytes, int type)
+   uint64_t bytes, int type)
 {
 RawPosixAIOData *acb = g_new(RawPosixAIOData, 1);
 ThreadPool *pool;
@@ -1832,6 +1832,7 @@ static int paio_submit_co_full(BlockDriverState *bs, int 
fd,
 acb->aio_fd2 = fd2;
 acb->aio_offset2 = offset2;

+assert(bytes <= SIZE_MAX);
 acb->aio_nbytes = bytes;
 acb->aio_offset = offset;

@@ -1848,7 +1849,7 @@ static int paio_submit_co_full(BlockDriverState *bs, int 
fd,

 static inline int paio_submit_co(BlockDriverState *bs, int fd,
  int64_t offset, QEMUIOVector *qiov,
- int bytes, int type)
+ uint64_t bytes, int type)
 {
 return paio_submit_co_full(bs, fd, offset, -1, 0, qiov, bytes, type);
 }
-- 
2.17.2

[Qemu-devel] [RFC for-3.2 PATCH 7/7] vfio/pci: Remove PCIe Link Status emulation

2018-11-14 Thread Alex Williamson

Now that the downstream port will virtually negotiate itself to the
link status of the downstream devie, we can remove this emulation.
It's not clear that it was every terribly useful anyway.

Signed-off-by: Alex Williamson 
---
 hw/vfio/pci.c |6 --
 1 file changed, 6 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 08aab328b442..0d1dd4666fd1 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1899,12 +1899,6 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int 
pos, uint8_t size,
QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), 
~0);
 vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0);
 }
-
-/* Mark the Link Status bits as emulated to allow virtual negotiation 
*/
-vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKSTA,
-   pci_get_word(vdev->pdev.config + pos +
-PCI_EXP_LNKSTA),
-   PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
 }
 
 /*

[Qemu-devel] [RFC for-3.2 PATCH 6/7] pcie: Allow generic PCIe root port to specify link speed and width

2018-11-14 Thread Alex Williamson

Allow users to specify speed and width values for the generic PCIe
root port.  Defaults remain at 2.5GT/s & x1 for compatiblity.  Machine
based defaults to increase this, such as if we wanted a pc-q35-3.2
machine to default to 16GT/s & x32, can be triggered by implementing
an instance_init callback here.

Note for libvirt testing that pcie-root-port controllers are given
default names like "pci.7" which don't play well with using the
"-set device.$name.$prop=$value" options accessible to us via
 options.  The solution is to add an  to the
pcie-root-port , for example:


  
  
  
  


The "ua-" here is a mandatory prefix.  We can then use:

  





  

Signed-off-by: Alex Williamson 
---
 hw/pci-bridge/gen_pcie_root_port.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/pci-bridge/gen_pcie_root_port.c 
b/hw/pci-bridge/gen_pcie_root_port.c
index 299de429ec1e..e3bba2ab68ef 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -124,6 +124,8 @@ static Property gen_rp_props[] = {
  res_reserve.mem_pref_32, -1),
 DEFINE_PROP_SIZE("pref64-reserve", GenPCIERootPort,
  res_reserve.mem_pref_64, -1),
+DEFINE_PROP_PCIE_LINK_SPEED("speed", PCIESlot, speed, PCIE_LINK_SPEED_2_5),
+DEFINE_PROP_PCIE_LINK_WIDTH("width", PCIESlot, width, PCIE_LINK_WIDTH_1),
 DEFINE_PROP_END_OF_LIST()
 };

[Qemu-devel] [RFC for-3.2 PATCH 3/7] qapi: Define PCIe link speed and width properties

2018-11-14 Thread Alex Williamson

Create properties to be able to define speeds and widths for PCIe
links.  The only tricky bit here is that our get and set callbacks
translate from the fixed QAPI automagic enums to those we define
in PCI code to represent the actual register segment value.

Signed-off-by: Alex Williamson 
---
 hw/core/qdev-properties.c|  178 ++
 include/hw/qdev-properties.h |8 ++
 qapi/common.json |   42 ++
 3 files changed, 228 insertions(+)

diff --git a/hw/core/qdev-properties.c b/hw/core/qdev-properties.c
index 35072dec1ecf..f5ca5b821a79 100644
--- a/hw/core/qdev-properties.c
+++ b/hw/core/qdev-properties.c
@@ -1327,3 +1327,181 @@ const PropertyInfo qdev_prop_off_auto_pcibar = {
 .set = set_enum,
 .set_default_value = set_default_value_enum,
 };
+
+/* --- PCIELinkSpeed 2_5/5/8/16 -- */
+
+static void get_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name,
+   void *opaque, Error **errp)
+{
+DeviceState *dev = DEVICE(obj);
+Property *prop = opaque;
+PCIExpLinkSpeed *p = qdev_get_prop_ptr(dev, prop);
+PCIELinkSpeed speed;
+
+switch (*p) {
+case QEMU_PCI_EXP_LNK_2_5GT:
+speed = PCIE_LINK_SPEED_2_5;
+break;
+case QEMU_PCI_EXP_LNK_5GT:
+speed = PCIE_LINK_SPEED_5;
+break;
+case QEMU_PCI_EXP_LNK_8GT:
+speed = PCIE_LINK_SPEED_8;
+break;
+case QEMU_PCI_EXP_LNK_16GT:
+speed = PCIE_LINK_SPEED_16;
+break;
+default:
+/* Unreachable */
+abort();
+}
+
+visit_type_enum(v, prop->name, (int *), prop->info->enum_table, 
errp);
+}
+
+static void set_prop_pcielinkspeed(Object *obj, Visitor *v, const char *name,
+   void *opaque, Error **errp)
+{
+DeviceState *dev = DEVICE(obj);
+Property *prop = opaque;
+PCIExpLinkSpeed *p = qdev_get_prop_ptr(dev, prop);
+PCIELinkSpeed speed;
+Error *local_err = NULL;
+
+if (dev->realized) {
+qdev_prop_set_after_realize(dev, name, errp);
+return;
+}
+
+visit_type_enum(v, prop->name, (int *),
+prop->info->enum_table, _err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+
+switch (speed) {
+case PCIE_LINK_SPEED_2_5:
+*p = QEMU_PCI_EXP_LNK_2_5GT;
+break;
+case PCIE_LINK_SPEED_5:
+*p = QEMU_PCI_EXP_LNK_5GT;
+break;
+case PCIE_LINK_SPEED_8:
+*p = QEMU_PCI_EXP_LNK_8GT;
+break;
+case PCIE_LINK_SPEED_16:
+*p = QEMU_PCI_EXP_LNK_16GT;
+break;
+default:
+/* Unreachable */
+abort();
+}
+}
+
+const PropertyInfo qdev_prop_pcie_link_speed = {
+.name = "PCIELinkSpeed",
+.description = "2_5/5/8/16",
+.enum_table = _lookup,
+.get = get_prop_pcielinkspeed,
+.set = set_prop_pcielinkspeed,
+.set_default_value = set_default_value_enum,
+};
+
+/* --- PCIELinkWidth 1/2/4/8/12/16/32 -- */
+
+static void get_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name,
+   void *opaque, Error **errp)
+{
+DeviceState *dev = DEVICE(obj);
+Property *prop = opaque;
+PCIExpLinkWidth *p = qdev_get_prop_ptr(dev, prop);
+PCIELinkWidth width;
+
+switch (*p) {
+case QEMU_PCI_EXP_LNK_X1:
+width = PCIE_LINK_WIDTH_1;
+break;
+case QEMU_PCI_EXP_LNK_X2:
+width = PCIE_LINK_WIDTH_2;
+break;
+case QEMU_PCI_EXP_LNK_X4:
+width = PCIE_LINK_WIDTH_4;
+break;
+case QEMU_PCI_EXP_LNK_X8:
+width = PCIE_LINK_WIDTH_8;
+break;
+case QEMU_PCI_EXP_LNK_X12:
+width = PCIE_LINK_WIDTH_12;
+break;
+case QEMU_PCI_EXP_LNK_X16:
+width = PCIE_LINK_WIDTH_16;
+break;
+case QEMU_PCI_EXP_LNK_X32:
+width = PCIE_LINK_WIDTH_32;
+break;
+default:
+/* Unreachable */
+abort();
+}
+
+visit_type_enum(v, prop->name, (int *), prop->info->enum_table, 
errp);
+}
+
+static void set_prop_pcielinkwidth(Object *obj, Visitor *v, const char *name,
+   void *opaque, Error **errp)
+{
+DeviceState *dev = DEVICE(obj);
+Property *prop = opaque;
+PCIExpLinkWidth *p = qdev_get_prop_ptr(dev, prop);
+PCIELinkWidth width;
+Error *local_err = NULL;
+
+if (dev->realized) {
+qdev_prop_set_after_realize(dev, name, errp);
+return;
+}
+
+visit_type_enum(v, prop->name, (int *),
+prop->info->enum_table, _err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+
+switch (width) {
+case PCIE_LINK_WIDTH_1:
+*p = QEMU_PCI_EXP_LNK_X1;
+break;
+case PCIE_LINK_WIDTH_2:
+*p = QEMU_PCI_EXP_LNK_X2;
+break;
+case PCIE_LINK_WIDTH_4:
+*p = QEMU_PCI_EXP_LNK_X4;
+break;
+case PCIE_LINK_WIDTH_8:

[Qemu-devel] [RFC for-3.2 PATCH 5/7] pcie: Fill PCIESlot link fields to support higher speeds and widths

2018-11-14 Thread Alex Williamson

Make use of the PCIESlot speed and width fields to update link
information beyond those configured in pcie_cap_v1_fill().  This is
only called for devices supporting a version 2 capability and
automatically skips any non-PCIESlot devices.  Only devices with
increased link values generate any visible config space differences.

Signed-off-by: Alex Williamson 
---
 hw/pci/pcie.c |   72 +
 1 file changed, 72 insertions(+)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 61b7b96c52cd..556ec19925b9 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -27,6 +27,7 @@
 #include "hw/pci/msi.h"
 #include "hw/pci/pci_bus.h"
 #include "hw/pci/pcie_regs.h"
+#include "hw/pci/pcie_port.h"
 #include "qemu/range.h"
 
 //#define DEBUG_PCIE
@@ -87,6 +88,74 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t type, 
uint8_t version)
 pci_set_word(cmask + PCI_EXP_LNKSTA, 0);
 }
 
+static void pcie_cap_fill_slot_lnk(PCIDevice *dev)
+{
+PCIESlot *s = (PCIESlot *)object_dynamic_cast(OBJECT(dev), TYPE_PCIE_SLOT);
+uint8_t *exp_cap = dev->config + dev->exp.exp_cap;
+
+/* Skip anything that isn't a PCIESlot */
+if (!s) {
+return;
+}
+
+/* Clear and fill LNKCAP from what was configured above */
+pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP,
+ PCI_EXP_LNKCAP_MLW | PCI_EXP_LNKCAP_SLS);
+pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+   QEMU_PCI_EXP_LNKCAP_MLW(s->width) |
+   QEMU_PCI_EXP_LNKCAP_MLS(s->speed));
+
+/*
+ * Link bandwidth notification is required for all root ports and
+ * downstream ports supporting links wider than x1.
+ */
+if (s->width > QEMU_PCI_EXP_LNK_X1) {
+pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+   PCI_EXP_LNKCAP_LBNC);
+}
+
+if (s->speed > QEMU_PCI_EXP_LNK_2_5GT) {
+/*
+ * Hot-plug capable downstream ports and downstream ports supporting
+ * link speeds greater than 5GT/s must hardwire PCI_EXP_LNKCAP_DLLLARC
+ * to 1b.  PCI_EXP_LNKCAP_DLLLARC implies PCI_EXP_LNKSTA_DLLLA, which
+ * we also hardwire to 1b here.  2.5GT/s hot-plug slots should also
+ * technically implement this, but it's not done here for 
compatibility.
+ */
+pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP,
+   PCI_EXP_LNKCAP_DLLLARC);
+pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
+   PCI_EXP_LNKSTA_DLLLA);
+
+/*
+ * Target Link Speed defaults to the highest link speed supported by
+ * the component.  2.5GT/s devices are permitted to hardwire to zero.
+ */
+pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKCTL2,
+ PCI_EXP_LNKCTL2_TLS);
+pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKCTL2,
+   QEMU_PCI_EXP_LNKCAP_MLS(s->speed) &
+   PCI_EXP_LNKCTL2_TLS);
+}
+
+/*
+ * 2.5 & 5.0GT/s can be fully described by LNKCAP, but 8.0GT/s is
+ * actually a reference to the highest bit supported in this register.
+ * We assume the device supports all link speeds.
+ */
+if (s->speed > QEMU_PCI_EXP_LNK_5GT) {
+pci_long_test_and_clear_mask(exp_cap + PCI_EXP_LNKCAP2, ~0U);
+pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
+   PCI_EXP_LNKCAP2_SLS_2_5GB |
+   PCI_EXP_LNKCAP2_SLS_5_0GB |
+   PCI_EXP_LNKCAP2_SLS_8_0GB);
+if (s->speed > QEMU_PCI_EXP_LNK_8GT) {
+pci_long_test_and_set_mask(exp_cap + PCI_EXP_LNKCAP2,
+   PCI_EXP_LNKCAP2_SLS_16_0GB);
+}
+}
+}
+
 int pcie_cap_init(PCIDevice *dev, uint8_t offset,
   uint8_t type, uint8_t port,
   Error **errp)
@@ -108,6 +177,9 @@ int pcie_cap_init(PCIDevice *dev, uint8_t offset,
 /* Filling values common with v1 */
 pcie_cap_v1_fill(dev, port, type, PCI_EXP_FLAGS_VER2);
 
+/* Fill link speed and width options */
+pcie_cap_fill_slot_lnk(dev);
+
 /* Filling v2 specific values */
 pci_set_long(exp_cap + PCI_EXP_DEVCAP2,
  PCI_EXP_DEVCAP2_EFF | PCI_EXP_DEVCAP2_EETLPP);

Re: [Qemu-devel] [PATCH for-3.2 29/41] slirp: improve a bit the debug macros

2018-11-14 Thread Marc-André Lureau

Hi

On Wed, Nov 14, 2018 at 6:04 PM Daniel P. Berrangé  wrote:
>
> On Wed, Nov 14, 2018 at 04:36:31PM +0400, Marc-André Lureau wrote:
> > Let them accept multiple arguments. Simplify the inner argument
> > handling of DEBUG_ARGS/DEBUG_MISC_DEBUG_ERROR.
> >
> > Signed-off-by: Marc-André Lureau 
> > ---
> >  slirp/debug.h  | 47 --
> >  slirp/arp_table.c  | 12 ++--
> >  slirp/bootp.c  |  3 +--
> >  slirp/cksum.c  |  4 ++--
> >  slirp/dhcpv6.c | 11 +--
> >  slirp/ip6_icmp.c   |  2 +-
> >  slirp/ip_icmp.c| 18 +-
> >  slirp/mbuf.c   |  2 +-
> >  slirp/ndp_table.c  | 18 +-
> >  slirp/slirp.c  | 12 ++--
> >  slirp/socket.c | 32 +++
> >  slirp/tcp_input.c  | 15 +++
> >  slirp/tcp_output.c |  2 +-
> >  slirp/tcp_subr.c   |  4 ++--
> >  slirp/udp.c|  6 +++---
> >  slirp/udp6.c   |  6 +++---
> >  16 files changed, 109 insertions(+), 85 deletions(-)
> >
> > diff --git a/slirp/debug.h b/slirp/debug.h
> > index 6cfa61edb3..ca3a4b04da 100644
> > --- a/slirp/debug.h
> > +++ b/slirp/debug.h
> > @@ -17,18 +17,45 @@
> >
> >  extern int slirp_debug;
> >
> > -#define DEBUG_CALL(x) if (slirp_debug & DBG_CALL) { fprintf(dfd, 
> > "%s...\n", x); fflush(dfd); }
> > -#define DEBUG_ARG(x, y) if (slirp_debug & DBG_CALL) { fputc(' ', dfd); 
> > fprintf(dfd, x, y); fputc('\n', dfd); fflush(dfd); }
> > -#define DEBUG_ARGS(x) if (slirp_debug & DBG_CALL) { fprintf x ; 
> > fflush(dfd); }
> > -#define DEBUG_MISC(x) if (slirp_debug & DBG_MISC) { fprintf x ; 
> > fflush(dfd); }
> > -#define DEBUG_ERROR(x) if (slirp_debug & DBG_ERROR) {fprintf x ; 
> > fflush(dfd); }
> > +#define DEBUG_CALL(fmt, ...) do {   \
> > +if (slirp_debug & DBG_CALL) {   \
> > +fprintf(dfd, fmt, ##__VA_ARGS__);   \
> > +fprintf(dfd, "...\n");  \
> > +fflush(dfd);\
> > +}   \
> > +} while (0)
> > +
> > +#define DEBUG_ARG(fmt, ...) do {\
> > +if (slirp_debug & DBG_CALL) {   \
> > +fputc(' ', dfd);\
> > +fprintf(dfd, fmt, ##__VA_ARGS__);   \
> > +fputc('\n', dfd);   \
> > +fflush(dfd);\
> > +}   \
> > +} while (0)
> > +
> > +#define DEBUG_ARGS(fmt, ...) DEBUG_ARG(fmt, ##__VA_ARGS__)
> > +
> > +#define DEBUG_MISC(fmt, ...) do {   \
> > +if (slirp_debug & DBG_MISC) {   \
> > +fprintf(dfd, fmt, ##__VA_ARGS__);   \
> > +fflush(dfd);\
> > +}   \
> > +} while (0)
> > +
> > +#define DEBUG_ERROR(fmt, ...) do {  \
> > +if (slirp_debug & DBG_ERROR) {  \
> > +fprintf(dfd, fmt, ##__VA_ARGS__);   \
> > +fflush(dfd);\
> > +}   \
> > +} while (0)
>
> I tend to think it would be nicer to change these to
> use  g_debug, and #define G_LOG_DOMAIN  "libslirp" in
> the .c files.
>
> This would allow apps to intercept the debug messages
> via a custom log handler.

Yes, even better would be structured logging, but it requires glib 2.50

One issue with the replacement is that we may want to keep the
slirp_debug filtering. If not, then we lose the classification.
Second issue is that slirp issues several debug calls and write \n in
the end. This is something you can't do with g_log. So it would need
some debug calls rewrite.

I would like to see this clean up patch applied before we decide how
to replace it with glog.

And yes, we should have a Slirp G_LOG_DOMAIN (I meant to do that, and forgot)

>
> Regards,
> Daniel
> --
> |: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
> |: https://libvirt.org -o-https://fstop138.berrange.com :|
> |: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

[Qemu-devel] [RFC for-3.2 PATCH 2/7] pci: Sync PCIe downstream port LNKSTA on read

2018-11-14 Thread Alex Williamson

The PCIe link speed and width between a downstream device and its
upstream port is negotiated on real hardware and susceptible to
dynamic changes due to signal issues and power management.  In the
emulated device case there is no real hardware link, but we still
might wish to have some consistency between endpoint and downstream
port via a virtual negotiation.  There is of course a real link for
assigned devices and this same virtual negotiation allows the
downstream port to match the endpoint, synchronizing on every read
to support underlying physical hardware dynamically adjusting the
link.

This negotiation is intentionally unidirectional for compatibility.
If the endpoint exceeds the capabilities of the downstream port or
there is no endpoint device, the downstream port reports negotiation
to its maximum speed and width, matching the previous case where
negotiation was absent.  De-tuning the endpoint to match a virtual
link doesn't seem to benefit anyone and is a condition we've thus
far reported without functional issues.

Note that PCI_EXP_LNKSTA is already ignored for migration
compatibility via pcie_cap_v1_fill().

Signed-off-by: Alex Williamson 
---
 hw/pci/pci.c  |4 
 hw/pci/pcie.c |   39 +++
 include/hw/pci/pci.h  |   13 +
 include/hw/pci/pcie.h |1 +
 4 files changed, 57 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index b937f0dc0af4..576c85f376b6 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1353,6 +1353,10 @@ uint32_t pci_default_read_config(PCIDevice *d,
 {
 uint32_t val = 0;
 
+if (pci_is_express_downstream_port(d) &&
+ranges_overlap(address, len, d->exp.exp_cap + PCI_EXP_LNKSTA, 2)) {
+   pcie_sync_bridge_lnk(d);
+}
 memcpy(, d->config + address, len);
 return le32_to_cpu(val);
 }
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 914a5261a79b..61b7b96c52cd 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -729,6 +729,45 @@ void pcie_add_capability(PCIDevice *dev,
 memset(dev->cmask + offset, 0xFF, size);
 }
 
+/*
+ * Sync the PCIe Link Status negotiated speed and width of a bridge with the
+ * downstream device.  If downstream device is not present, re-write with the
+ * Link Capability fields.  Limit width and speed to bridge capabilities for
+ * compatibility.  Use config_read to access the downstream device since it
+ * could be an assigned device with volatile link information.
+ */
+void pcie_sync_bridge_lnk(PCIDevice *bridge_dev)
+{
+PCIBridge *br = PCI_BRIDGE(bridge_dev);
+PCIBus *bus = pci_bridge_get_sec_bus(br);
+PCIDevice *target = bus->devices[0];
+uint8_t *exp_cap = bridge_dev->config + bridge_dev->exp.exp_cap;
+uint16_t lnksta, lnkcap = pci_get_word(exp_cap + PCI_EXP_LNKCAP);
+
+if (!target || !target->exp.exp_cap) {
+lnksta = lnkcap;
+} else {
+lnksta = target->config_read(target,
+ target->exp.exp_cap + PCI_EXP_LNKSTA,
+ sizeof(lnksta));
+
+if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) {
+lnksta &= ~PCI_EXP_LNKSTA_NLW;
+lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW;
+}
+
+if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) {
+lnksta &= ~PCI_EXP_LNKSTA_CLS;
+lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS;
+}
+}
+
+pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
+pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta &
+   (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW));
+}
+
 /**
  * pci express extended capability helper functions
  */
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index e6514bba23aa..eb12fa112ed2 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -737,6 +737,19 @@ static inline int pci_is_express(const PCIDevice *d)
 return d->cap_present & QEMU_PCI_CAP_EXPRESS;
 }
 
+static inline int pci_is_express_downstream_port(const PCIDevice *d)
+{
+uint8_t type;
+
+if (!pci_is_express(d) || !d->exp.exp_cap) {
+return 0;
+}
+
+type = pcie_cap_get_type(d);
+
+return type == PCI_EXP_TYPE_DOWNSTREAM || type == PCI_EXP_TYPE_ROOT_PORT;
+}
+
 static inline uint32_t pci_config_size(const PCIDevice *d)
 {
 return pci_is_express(d) ? PCIE_CONFIG_SPACE_SIZE : PCI_CONFIG_SPACE_SIZE;
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index b71e36970345..1976909ab4c8 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -126,6 +126,7 @@ uint16_t pcie_find_capability(PCIDevice *dev, uint16_t 
cap_id);
 void pcie_add_capability(PCIDevice *dev,
  uint16_t cap_id, uint8_t cap_ver,
  uint16_t offset, uint16_t size);
+void

[Qemu-devel] [RFC for-3.2 PATCH 4/7] pcie: Add link speed and width fields to PCIESlot

2018-11-14 Thread Alex Williamson

Add fields allowing the PCIe link speed and width of a PCIESlot to
be configured, with an instance_post_init callback on the root port
parent class to set defaults.  This allows child classes to via
properties, without requiring all implementions to support arbitrary
user selected values, and also allows child class instance_init
callbacks to set defaults, for example using a machine defined link
default.

Signed-off-by: Alex Williamson 
---
 hw/pci-bridge/pcie_root_port.c |   14 ++
 include/hw/pci/pcie_port.h |4 
 2 files changed, 18 insertions(+)

diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c
index 45f9e8cd4a36..34ad76743c44 100644
--- a/hw/pci-bridge/pcie_root_port.c
+++ b/hw/pci-bridge/pcie_root_port.c
@@ -140,6 +140,19 @@ static Property rp_props[] = {
 DEFINE_PROP_END_OF_LIST()
 };
 
+static void rp_instance_post_init(Object *obj)
+{
+PCIESlot *s = PCIE_SLOT(obj);
+
+if (!s->speed) {
+s->speed = QEMU_PCI_EXP_LNK_2_5GT;
+}
+
+if (!s->width) {
+s->width = QEMU_PCI_EXP_LNK_X1;
+}
+}
+
 static void rp_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
@@ -157,6 +170,7 @@ static void rp_class_init(ObjectClass *klass, void *data)
 static const TypeInfo rp_info = {
 .name  = TYPE_PCIE_ROOT_PORT,
 .parent= TYPE_PCIE_SLOT,
+.instance_post_init = rp_instance_post_init,
 .class_init= rp_class_init,
 .abstract  = true,
 .class_size = sizeof(PCIERootPortClass),
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index 0736014bfdb4..df242a0cafff 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -49,6 +49,10 @@ struct PCIESlot {
 /* pci express switch port with slot */
 uint8_t chassis;
 uint16_tslot;
+
+PCIExpLinkSpeed speed;
+PCIExpLinkWidth width;
+
 QLIST_ENTRY(PCIESlot) next;
 };

[Qemu-devel] [RFC for-3.2 PATCH 1/7] pcie: Create enums for link speed and width

2018-11-14 Thread Alex Williamson

In preparation for reporting higher virtual link speeds and widths,
create enums and macros to help us manage them.

Signed-off-by: Alex Williamson 
---
 hw/pci/pcie.c  |7 ---
 hw/vfio/pci.c  |3 ++-
 include/hw/pci/pcie_regs.h |   23 +--
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 6c91bd44a0a5..914a5261a79b 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -68,11 +68,12 @@ pcie_cap_v1_fill(PCIDevice *dev, uint8_t port, uint8_t 
type, uint8_t version)
 pci_set_long(exp_cap + PCI_EXP_LNKCAP,
  (port << PCI_EXP_LNKCAP_PN_SHIFT) |
  PCI_EXP_LNKCAP_ASPMS_0S |
- PCI_EXP_LNK_MLW_1 |
- PCI_EXP_LNK_LS_25);
+ QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) |
+ QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT));
 
 pci_set_word(exp_cap + PCI_EXP_LNKSTA,
- PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25);
+ QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1) |
+ QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT));
 
 if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
 pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA,
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 866f0deeb7eb..08aab328b442 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1895,7 +1895,8 @@ static int vfio_setup_pcie_cap(VFIOPCIDevice *vdev, int 
pos, uint8_t size,
PCI_EXP_TYPE_ENDPOINT << 4,
PCI_EXP_FLAGS_TYPE);
 vfio_add_emulated_long(vdev, pos + PCI_EXP_LNKCAP,
-   PCI_EXP_LNK_MLW_1 | PCI_EXP_LNK_LS_25, ~0);
+   QEMU_PCI_EXP_LNKCAP_MLW(QEMU_PCI_EXP_LNK_X1) |
+   QEMU_PCI_EXP_LNKCAP_MLS(QEMU_PCI_EXP_LNK_2_5GT), 
~0);
 vfio_add_emulated_word(vdev, pos + PCI_EXP_LNKCTL, 0, ~0);
 }
 
diff --git a/include/hw/pci/pcie_regs.h b/include/hw/pci/pcie_regs.h
index a95522a13b04..ad4e7808b8ac 100644
--- a/include/hw/pci/pcie_regs.h
+++ b/include/hw/pci/pcie_regs.h
@@ -34,10 +34,29 @@
 
 /* PCI_EXP_LINK{CAP, STA} */
 /* link speed */
-#define PCI_EXP_LNK_LS_25   1
+typedef enum PCIExpLinkSpeed {
+QEMU_PCI_EXP_LNK_2_5GT = 1,
+QEMU_PCI_EXP_LNK_5GT,
+QEMU_PCI_EXP_LNK_8GT,
+QEMU_PCI_EXP_LNK_16GT,
+} PCIExpLinkSpeed;
+
+#define QEMU_PCI_EXP_LNKCAP_MLS(speed)  (speed)
+#define QEMU_PCI_EXP_LNKSTA_CLS QEMU_PCI_EXP_LNKCAP_MLS
+
+typedef enum PCIExpLinkWidth {
+QEMU_PCI_EXP_LNK_X1 = 1,
+QEMU_PCI_EXP_LNK_X2 = 2,
+QEMU_PCI_EXP_LNK_X4 = 4,
+QEMU_PCI_EXP_LNK_X8 = 8,
+QEMU_PCI_EXP_LNK_X12 = 12,
+QEMU_PCI_EXP_LNK_X16 = 16,
+QEMU_PCI_EXP_LNK_X32 = 32,
+} PCIExpLinkWidth;
 
 #define PCI_EXP_LNK_MLW_SHIFT   ctz32(PCI_EXP_LNKCAP_MLW)
-#define PCI_EXP_LNK_MLW_1   (1 << PCI_EXP_LNK_MLW_SHIFT)
+#define QEMU_PCI_EXP_LNKCAP_MLW(width)  (width << PCI_EXP_LNK_MLW_SHIFT)
+#define QEMU_PCI_EXP_LNKSTA_NLW QEMU_PCI_EXP_LNKCAP_MLW
 
 /* PCI_EXP_LINKCAP */
 #define PCI_EXP_LNKCAP_ASPMS_SHIFT  ctz32(PCI_EXP_LNKCAP_ASPMS)

[Qemu-devel] [RFC for-3.2 PATCH 0/7] pcie: Enhanced link speed and width support

2018-11-14 Thread Alex Williamson

QEMU exposes gen1 PCI-express interconnect devices supporting only
2.5GT/s and x1 width.  It might not seem obvious that a virtual
bandwidth limitation can result in a real performance degradation, but
it's been reported that in some configurations assigned GPUs might not
scale their link speed up to the maximum supported value if the
downstream port above it only advertises limited link support.

As proposed[1] this series effectively implements virtual link
negotiation on downstream ports and enhances the generic PCIe root
port to allow user configurable speeds and widths.  The "negotiation"
simply mirrors the link status of the connected downstream device
providing the appearance of dynamic link speed scaling to match the
endpoint device.  Not yet implemented from the proposal is support
for globally updating defaults based on machine type, though the
foundation is provided here by allowing supporting PCIESlots to
implement an instance_init callback which can call into a common
helper for this.

I have not specifically tested migration with this, but we already
consider LNKSTA to be dynamic and the other changes implemented here
are static config space changes with no changes being implemented for
devices using default values, ie. they should be compatible by virtue
of existing config space migration support.

I think I've covered the required link related registers to support
PCIe 4.0, but please let me know if I've missed any.

Testing and feedback appreciated, patch 6/7 provides example qemu:arg
options and requirements to use with existing libvirt.  Native libvirt
support TBD.  Thanks,

Alex

[1] https://lists.gnu.org/archive/html/qemu-devel/2018-10/msg03086.html

---

Alex Williamson (7):
  pcie: Create enums for link speed and width
  pci: Sync PCIe downstream port LNKSTA on read
  qapi: Define PCIe link speed and width properties
  pcie: Add link speed and width fields to PCIESlot
  pcie: Fill PCIESlot link fields to support higher speeds and widths
  pcie: Allow generic PCIe root port to specify link speed and width
  vfio/pci: Remove PCIe Link Status emulation


 hw/core/qdev-properties.c  |  178 
 hw/pci-bridge/gen_pcie_root_port.c |2 
 hw/pci-bridge/pcie_root_port.c |   14 +++
 hw/pci/pci.c   |4 +
 hw/pci/pcie.c  |  118 +++-
 hw/vfio/pci.c  |9 --
 include/hw/pci/pci.h   |   13 +++
 include/hw/pci/pcie.h  |1 
 include/hw/pci/pcie_port.h |4 +
 include/hw/pci/pcie_regs.h |   23 -
 include/hw/qdev-properties.h   |8 ++
 qapi/common.json   |   42 
 12 files changed, 404 insertions(+), 12 deletions(-)

Re: [Qemu-devel] [PATCH for-3.1 v2] build: qga: add macro to force use of native mingw32 assert()

2018-11-14 Thread Michael Roth

Quoting Daniel P. Berrangé (2018-11-14 10:54:56)
> On Wed, Nov 14, 2018 at 10:44:38AM -0600, Michael Roth wrote:
> > When building qemu-ga for w32 with VSS support, some parts of qemu-ga
> > are not linked against glib, specifically the C++ bits used to
> > create the VSS provider DLL. With 3ebee3b191e, we now define assert()
> > as g_assert() for all mingw32 builds via osdep.h, which results in the
> > following build breakage:
> > 
> >   x86_64-w64-mingw32-g++ -o qga/vss-win32/qga-vss.dll 
> > qga/vss-win32/requester.o qga/vss-win32/provider.o qga/vss-win32/install.o 
> > /home/mdroth/w/qemu4.git/qga/vss-win32/qga-vss.def  -shared 
> > -Wl,--add-stdcall-alias,--enable-stdcall-fixup -lole32 -loleaut32 -lshlwapi 
> > -luuid -static
> >   qga/vss-win32/requester.o: In function `requester_freeze':
> >   /home/mdroth/w/qemu4.git/qga/vss-win32/requester.cpp:284: undefined 
> > reference to `g_assertion_message_expr'
> >   qga/vss-win32/requester.o: In function `requester_thaw':
> >   /home/mdroth/w/qemu4.git/qga/vss-win32/requester.cpp:508: undefined 
> > reference to `g_assertion_message_expr'
> >   /home/mdroth/w/qemu4.git/qga/vss-win32/requester.cpp:509: undefined 
> > reference to `g_assertion_message_expr'
> >   collect2: error: ld returned 1 exit status
> >   make: *** [/home/mdroth/w/qemu4.git/qga/vss-win32/Makefile.objs:10: 
> > qga/vss-win32/qga-vss.dll] Error 1
> >   make: *** Waiting for unfinished jobs
> > 
> > Fix this by introducing a USE_NATIVE_MINGW32_ASSERT macro that can
> > be defined prior to inclusion of osdep.h for build targets that
> > don't link against glib.
> 
> Why doesn't it link to glib and can that be fixed ?

Not sure, the original commit (b39297ae) takes some steps specifically to
avoid a glib dependency, but I'm not sure what the underlying reasons were.
One downside is that it would require a static mingw glib dependency even
for non-static qemu/qemu-ga builds, but that's not too hard to deal with if
we add the appropriate configure checks/errors for it (especially
compared to satisfying the VSS SDK dependency...)

> 
> Glib is considered a mandatory dependancy on anything in QEMU that
> uses osdep.h, since that header pulls in the glib.h header unconditionally.

Of the 3 source files in qga/vss-win32/:

install.cpp uses it for:

  #include 
  #include "config-host.h"
  #include "qemu-version.h"

requester.cpp uses it for:

  #include 
  #include 
  #include "qemu/compiler.h" //for GCC_FMT_ATTR

and provider.cpp doesn't need it.

It's not much, but if linking with static glib isn't too bad that seems
like the simplest option, and might allow for some cleanups later (like
removing the struct Error stubs re-implementations in requester.h).

Thanks for the suggestions.

> 
> > Cc: Richard Henderson 
> > Cc: Philippe Mathieu-Daudé 
> > Signed-off-by: Michael Roth 
> > ---
> > v2:
> >  * define USE_NATIVE_MINGW32_ASSERT via build recipe and avoid moving
> >#include's before osdep.h (Philippe)
> > ---
> >  include/qemu/osdep.h| 2 +-
> >  qga/vss-win32/Makefile.objs | 2 +-
> >  2 files changed, 2 insertions(+), 2 deletions(-)
> > 
> > diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> > index 3bf48bcdec..59364bfeb0 100644
> > --- a/include/qemu/osdep.h
> > +++ b/include/qemu/osdep.h
> > @@ -129,7 +129,7 @@ extern int daemon(int, int);
> >   * code that is unreachable when features are disabled.
> >   * All supported versions of Glib's g_assert() satisfy this requirement.
> >   */
> > -#ifdef __MINGW32__
> > +#if defined(__MINGW32__) && !defined(USE_NATIVE_MINGW32_ASSERT)
> 
> I don't think we should have these kind of hacks.
> 
> Either make vss-win32 link to glib, or stop it importing
> osdep.h entirely if it really must be built completely
> standalone from normal QEMU dependancies.
> 
> >  #undef assert
> >  #define assert(x)  g_assert(x)
> >  #endif
> > diff --git a/qga/vss-win32/Makefile.objs b/qga/vss-win32/Makefile.objs
> > index 23d08da225..5773bfd868 100644
> > --- a/qga/vss-win32/Makefile.objs
> > +++ b/qga/vss-win32/Makefile.objs
> > @@ -3,7 +3,7 @@
> >  qga-vss-dll-obj-y += requester.o provider.o install.o
> >  
> >  obj-qga-vss-dll-obj-y = $(addprefix $(obj)/, $(qga-vss-dll-obj-y))
> > -$(obj-qga-vss-dll-obj-y): QEMU_CXXFLAGS = $(filter-out -Wstrict-prototypes 
> > -Wmissing-prototypes -Wnested-externs -Wold-style-declaration 
> > -Wold-style-definition -Wredundant-decls -fstack-protector-all 
> > -fstack-protector-strong, $(QEMU_CFLAGS)) -Wno-unknown-pragmas 
> > -Wno-delete-non-virtual-dtor
> > +$(obj-qga-vss-dll-obj-y): QEMU_CXXFLAGS = $(filter-out -Wstrict-prototypes 
> > -Wmissing-prototypes -Wnested-externs -Wold-style-declaration 
> > -Wold-style-definition -Wredundant-decls -fstack-protector-all 
> > -fstack-protector-strong, $(QEMU_CFLAGS)) -Wno-unknown-pragmas 
> > -Wno-delete-non-virtual-dtor -DUSE_NATIVE_MINGW32_ASSERT
> >  
> >  $(obj)/qga-vss.dll: LDFLAGS = -shared 
> > -Wl,--add-stdcall-alias,--enable-stdcall-fixup -lole32

Re: [Qemu-devel] [PATCH RFC 5/6] test-string-input-visitor: split off uint64 list tests

2018-11-14 Thread David Hildenbrand

On 14.11.18 17:21, Markus Armbruster wrote:
> David Hildenbrand  writes:
> 
>> Basically copy all int64 list tests but adapt them to work on uint64
>> instead.
>>
>> Signed-off-by: David Hildenbrand 
>> ---
>>  tests/test-string-input-visitor.c | 71 +--
>>  1 file changed, 67 insertions(+), 4 deletions(-)
>>
>> diff --git a/tests/test-string-input-visitor.c 
>> b/tests/test-string-input-visitor.c
>> index 2f6360e9ca..731094f789 100644
>> --- a/tests/test-string-input-visitor.c
>> +++ b/tests/test-string-input-visitor.c
>> @@ -111,7 +111,6 @@ static void test_visitor_in_intList(TestInputVisitorData 
>> *data,
>>6, 7, 8 };
>>  int64_t expect2[] = { 32767, -32768, -32767 };
>>  int64_t expect3[] = { INT64_MIN, INT64_MAX };
>> -uint64_t expect4[] = { UINT64_MAX };
>>  Error *err = NULL;
>>  int64List *res = NULL;
>>  Visitor *v;
>> @@ -129,9 +128,6 @@ static void test_visitor_in_intList(TestInputVisitorData 
>> *data,
>>  "-9223372036854775808,9223372036854775807");
>>  check_ilist(v, expect3, ARRAY_SIZE(expect3));
>>  
>> -v = visitor_input_test_init(data, "18446744073709551615");
>> -check_ulist(v, expect4, ARRAY_SIZE(expect4));
>> -
> 
> Hmm.  Testing behavior for this input is still worthwhile, isn't it?
> 
> The use of check_ulist() here is admittedly unclean.

This check has been moved to the other function where we test ulists.

Or do you want this check here to test again ilist and see that an error
gets reported as the value is too big? Will add such range checks.

> 
>>  /* Empty list */
>>  
>>  v = visitor_input_test_init(data, "");
>> @@ -174,6 +170,71 @@ static void 
>> test_visitor_in_intList(TestInputVisitorData *data,
>>  visit_end_list(v, NULL);
>>  }
>>  
>> +static void test_visitor_in_uintList(TestInputVisitorData *data,
>> + const void *unused)
>> +{
>> +uint64_t expect1[] = { 1, 2, 0, 2, 3, 4, 20, 5, 6, 7, 8, 9, 1, 2, 3, 4, 
>> 5,
> 
> Please wrap this line a bit earlier.

Yes.

> 
>> +   6, 7, 8 };
>> +uint64_t expect2[] = { 32767, -32768, -32767 };
>> +uint64_t expect3[] = { UINT64_MAX };
>> +Error *err = NULL;
>> +uint64List *res = NULL;
>> +Visitor *v;
>> +uint64_t val;
>> +
>> +/* Valid lists */
>> +
>> +v = visitor_input_test_init(data, "1,2,0,2-4,20,5-9,1-8");
>> +check_ulist(v, expect1, ARRAY_SIZE(expect1));
>> +
>> +v = visitor_input_test_init(data, "32767,-32768--32767");
>> +check_ulist(v, expect2, ARRAY_SIZE(expect2));
>> +
>> +v = visitor_input_test_init(data, "18446744073709551615");
>> +check_ulist(v, expect3, ARRAY_SIZE(expect3));
> 
> Test behavior for large negative numbers?

Yes, will add.



-- 

Thanks,

David / dhildenb

Re: [Qemu-devel] [PATCH v2 07/11] block: Leave BDS.backing_file constant

2018-11-14 Thread Max Reitz

On 13.11.18 00:08, Eric Blake wrote:
> On 8/9/18 5:31 PM, Max Reitz wrote:
>> Parts of the block layer treat BDS.backing_file as if it were whatever
>> the image header says (i.e., if it is a relative path, it is relative to
>> the overlay), other parts treat it like a cache for
>> bs->backing->bs->filename (relative paths are relative to the CWD).
>> Considering bs->backing->bs->filename exists, let us make it mean the
>> former.
>>
>> Among other things, this now allows the user to specify a base when
>> using qemu-img to commit an image file in a directory that is not the
>> CWD (assuming, everything uses relative filenames).
>>
>> Before this patch:
>>
>> $ ./qemu-img create -f qcow2 foo/bot.qcow2 1M
>> $ ./qemu-img create -f qcow2 -b bot.qcow2 foo/mid.qcow2
>> $ ./qemu-img create -f qcow2 -b mid.qcow2 foo/top.qcow2
>> $ ./qemu-img commit -b mid.qcow2 foo/top.qcow2
>> qemu-img: Did not find 'mid.qcow2' in the backing chain of
>> 'foo/top.qcow2'
>> $ ./qemu-img commit -b foo/mid.qcow2 foo/top.qcow2
>> qemu-img: Did not find 'foo/mid.qcow2' in the backing chain of
>> 'foo/top.qcow2'
>> $ ./qemu-img commit -b $PWD/foo/mid.qcow2 foo/top.qcow2
>> qemu-img: Did not find '[...]/foo/mid.qcow2' in the backing chain of
>> 'foo/top.qcow2'
> 
> Three failures in a row - no way to commit short of changing your
> working directory.
> 
>>
>> After this patch:
>>
>> $ ./qemu-img commit -b mid.qcow2 foo/top.qcow2
>> Image committed.
>> $ ./qemu-img commit -b foo/mid.qcow2 foo/top.qcow2
>> qemu-img: Did not find 'foo/mid.qcow2' in the backing chain of
>> 'foo/top.qcow2'
>> $ ./qemu-img commit -b $PWD/foo/mid.qcow2 foo/top.qcow2
>> Image committed.
> 
> Yay, that looks saner.
> 
>>
>> With this change, bdrv_find_backing_image() must look at whether the
>> user has overridden a BDS's backing file.  If so, it can no longer use
>> bs->backing_file, but must instead compare the given filename against
>> the backing node's filename directly.
>>
>> Note that this changes the QAPI output for a node's backing_file.  We
>> had very inconsistent output there (sometimes what the image header
>> said, sometimes the actual filename of the backing image).  This
>> inconsistent output was effectively useless, so we have to decide one
>> way or the other.  Considering that bs->backing_file usually at runtime
>> contained the path to the image relative to qemu's CWD (or absolute),
>> this patch changes QAPI's backing_file to always report the
>> bs->backing->bs->filename from now on.  If you want to receive the image
>> header information, you have to refer to full-backing-filename.
>>
>> This necessitates a change to iotest 228.  The interesting information
>> it really wanted is the image header, and it can get that now, but it
>> has to use full-backing-filename instead of backing_file.  Because of
>> this patch's changes to bs->backing_file's behavior, we also need some
>> reference output changes.
>>
>> Along with the changes to bs->backing_file, stop updating
>> BDS.backing_format in bdrv_backing_attach() as well.  This necessitates
>> a change to the reference output of iotest 191.
> 
> Good explanations for the test changes.
> 
>>
>> Signed-off-by: Max Reitz 
>> ---
>>   include/block/block_int.h  | 14 +-
>>   block.c    | 29 ++---
>>   block/qapi.c   |  7 ---
>>   qemu-img.c | 12 ++--
>>   tests/qemu-iotests/191.out |  1 -
>>   tests/qemu-iotests/228 |  6 +++---
>>   tests/qemu-iotests/228.out |  6 +++---
>>   7 files changed, 51 insertions(+), 24 deletions(-)
>>
>> diff --git a/include/block/block_int.h b/include/block/block_int.h
>> index d3d8b22155..8f2c515ec1 100644
>> --- a/include/block/block_int.h
>> +++ b/include/block/block_int.h
>> @@ -737,11 +737,15 @@ struct BlockDriverState {
>>   bool walking_aio_notifiers; /* to make removal during iteration
>> safe */
>>     char filename[PATH_MAX];
>> -    char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
>> -    this file image */
>> -    /* The backing filename indicated by the image header; if we ever
>> - * open this file, then this is replaced by the resulting BDS's
>> - * filename (i.e. after a bdrv_refresh_filename() run). */
>> +    /* If non-zero, the image is a diff of this image file.  Note that
> 
> Pre-existing, but that sentence might read nicer as:
> 
> If not empty, this image is a diff in relation to backing_file.
> 
>> + * this the name given in the image header and may therefore not
> 
> "this the name" is wrong; did you mean "this is the name" or "this name"
> or "the name"?

Would any of the latter two make more grammatical sense? O:-)

Will fix.

I'll also fix the "may" wording.  Like this it sounds as if this is not
allowed to be equal to .backing->bs->filename, which of course is not
true.  It may or may not be equal.  So, I'll reword to:

"If not empty, this image is a diff in relation to

Re: [Qemu-devel] [PATCH v2 04/11] block: Storage child access function

2018-11-14 Thread Max Reitz

On 12.11.18 23:32, Eric Blake wrote:
> On 8/9/18 5:31 PM, Max Reitz wrote:
>> For completeness' sake, add a function for accessing a node's storage
>> child, too.  For filters, this is there filtered child; for non-filters,
> 
> s/there/their/
> 
>> this is bs->file.
>>
>> Some places are deliberately left unconverted:
>> - BDS opening/closing functions where bs->file is handled specially
>>    (which is basically wrong, but at least simplifies probing)
>> - bdrv_co_block_status_from_file(), because its name implies that it
>>    points to ->file
> 
> I'm wondering if we can clean up block_status to let filters have a NULL
> callback and io.c do the right thing automatically, rather than the
> current approach of filters assigning the callback to the common helper
> routine.  Maybe later in the series.

Hm!  I didn't even think of that.  Yes, everything that uses
bdrv_co_block_status_from_*() seems to be an R/W filter, and which they
use simply depends on whether they use bs->backing or bs->file.  Well,
blkdebug is the exception, because it really wants to put an assertion
there, but inlining the code there shouldn't be the showstopper.

Max



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH RFC 3/6] qapi: rewrite string-input-visitor

2018-11-14 Thread David Hildenbrand

On 14.11.18 18:38, Markus Armbruster wrote:
> David Hildenbrand  writes:
> 
>> The input visitor has some problems right now, especially
>> - unsigned type "Range" is used to process signed ranges, resulting in
>>   inconsistent behavior and ugly/magical code
>> - uint64_t are parsed like int64_t, so big uint64_t values are not
>>   supported and error messages are misleading
>> - lists/ranges of int64_t are accepted although no list is parsed and
>>   we should rather report an error
>> - lists/ranges are preparsed using int64_t, making it hard to
>>   implement uint64_t values or uint64_t lists
>> - types that don't support lists don't bail out
> 
> Known weirdness: empty list is invalid (test-string-input-visitor.c
> demonstates).  Your patch doesn't change that (or else it would update
> the test).  Should it be changed?
> 

I don't change the test, so the old behavior still works.
(empty string -> error)

>>
>> So let's rewrite it by getting rid of usage of the type "Range" and
>> properly supporting lists of int64_t and uint64_t (including ranges of
>> both types), fixing the above mentioned issues.
>>
>> Lists of other types are not supported and will properly report an
>> error. Virtual walks are now supported.
>>
>> Tests have to be fixed up:
>> - Two BUGs were hardcoded that are fixed now
>> - The string-input-visitor now actually returns a parsed list and not
>>   an ordered set.
>>
>> While at it, do some smaller style changes (e.g. use g_assert).
> 
> Please don't replace assert() by g_assert() in files that consistently
> use assert() now.

Alright, will use assert() and don't replace. (I thought g_assert() was
the new fancy thing to do it in QEMU).

> 
> In my opinion, g_assert() is one of the cases where GLib pointlessly
> reinvents wheels that have been carrying load since forever.
> 
>> Signed-off-by: David Hildenbrand 
>> ---
>>  include/qapi/string-input-visitor.h |   3 +-
>>  qapi/string-input-visitor.c | 436 +---
>>  tests/test-string-input-visitor.c   |  18 +-
>>  3 files changed, 264 insertions(+), 193 deletions(-)
>>
>> diff --git a/include/qapi/string-input-visitor.h 
>> b/include/qapi/string-input-visitor.h
>> index 33551340e3..2c40f7f5a6 100644
>> --- a/include/qapi/string-input-visitor.h
>> +++ b/include/qapi/string-input-visitor.h
>> @@ -19,8 +19,7 @@ typedef struct StringInputVisitor StringInputVisitor;
>>  
>>  /*
>>   * The string input visitor does not implement support for visiting
>> - * QAPI structs, alternates, null, or arbitrary QTypes.  It also
>> - * requires a non-null list argument to visit_start_list().
>> + * QAPI structs, alternates, null, or arbitrary QTypes.
> 
> Preexisting: neglects to spell out the list limitations, i.e. can do
> only flat lists of integers.  Care do fix that, too?

Added "Only flat lists of integers (int64/uint64) are supported."

> 
>>   */
>>  Visitor *string_input_visitor_new(const char *str);
>>  
>> diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
>> index dee708d384..16da47409e 100644
>> --- a/qapi/string-input-visitor.c
>> +++ b/qapi/string-input-visitor.c
>> @@ -4,10 +4,10 @@
>>   * Copyright Red Hat, Inc. 2012-2016
>>   *
>>   * Author: Paolo Bonzini 
>> + * David Hildenbrand 
>>   *
>>   * This work is licensed under the terms of the GNU LGPL, version 2.1 or 
>> later.
>>   * See the COPYING.LIB file in the top-level directory.
>> - *
>>   */
>>  
>>  #include "qemu/osdep.h"
>> @@ -18,21 +18,39 @@
>>  #include "qapi/qmp/qerror.h"
>>  #include "qapi/qmp/qnull.h"
>>  #include "qemu/option.h"
>> -#include "qemu/queue.h"
>> -#include "qemu/range.h"
>>  #include "qemu/cutils.h"
>>  
>> +typedef enum ListMode {
>> +/* no list parsing active / no list expected */
>> +LM_NONE,
>> +/* we have an unparsed string remaining */
>> +LM_UNPARSED,
>> +/* we have an unfinished int64 range */
>> +LM_INT64_RANGE,
>> +/* we have an unfinished uint64 range */
>> +LM_UINT64_RANGE,
>> +/* we have parsed the string completely and no range is remaining */
>> +LM_END,
>> +} ListMode;
>> +
>> +typedef union RangeLimit {
>> +int64_t i64;
>> +uint64_t u64;
>> +} RangeLimit;
>>  
>>  struct StringInputVisitor
>>  {
>>  Visitor visitor;
>>  
>> -GList *ranges;
>> -GList *cur_range;
>> -int64_t cur;
>> +/* Porperties related to list processing */
> 
> "Properties"
> 
> Suggest
> 
>/* List parsing state */

Yes, will use that.

> 
>> +ListMode lm;
>> +RangeLimit rangeNext;
>> +RangeLimit rangeEnd;
> 
> RangeLimit is a good name for rangeEnd, but not so hot for rangeNext.
> Naming is hard.

Initially I had two unnamed unions to not have to give a name ;)

RangeElement ? Will use that for now.

> 
>> +const char *unparsed_string;
>> +void *list;
> 
> You drop /* Only needed for sanity checking the caller */.
> Double-checking: is that not true anymore?

I consider such comments bad as they can easily become

Re: [Qemu-devel] [PATCH v2 03/11] block: Filtered children access functions

2018-11-14 Thread Max Reitz

On 12.11.18 23:17, Eric Blake wrote:
> On 8/9/18 5:31 PM, Max Reitz wrote:
>> What bs->file and bs->backing mean depends on the node.  For filter
>> nodes, both signify a node that will eventually receive all R/W
>> accesses.  For format nodes, bs->file contains metadata and data, and
>> bs->backing will not receive writes -- instead, writes are COWed to
>> bs->file.  Usually.
>>
>> In any case, it is not trivial to guess what a child means exactly with
>> our currently limited form of expression.  It is better to introduce
>> some functions that actually guarantee a meaning:
>>
>> - bdrv_filtered_cow_child() will return the child that receives requests
>>    filtered through COW.  That is, reads may or may not be forwarded
>>    (depending on the overlay's allocation status), but writes never go to
>>    this child.
>>
>> - bdrv_filtered_rw_child() will return the child that receives requests
>>    filtered through some very plain process.  Reads and writes issued to
>>    the parent will go to the child as well (although timing, etc. may be
>>    modified).
>>
>> - All drivers but quorum (but quorum is pretty opaque to the general
>>    block layer anyway) always only have one of these children: All read
>>    requests must be served from the filtered_rw_child (if it exists), so
>>    if there was a filtered_cow_child in addition, it would not receive
>>    any requests at all.
>>    (The closest here is mirror, where all requests are passed on to the
>>    source, but with write-blocking, write requests are "COWed" to the
>>    target.  But that just means that the target is a special child that
>>    cannot be introspected by the generic block layer functions, and that
>>    source is a filtered_rw_child.)
>>    Therefore, we can also add bdrv_filtered_child() which returns that
>>    one child (or NULL, if there is no filtered child).
>>
>> Also, many places in the current block layer should be skipping filters
>> (all filters or just the ones added implicitly, it depends) when going
>> through a block node chain.  They do not do that currently, but this
>> patch makes them.
> 
> The description makes sense; now on to the code.
> 
>>
>> Signed-off-by: Max Reitz 
>> ---
>>   qapi/block-core.json   |   4 +
>>   include/block/block.h  |   1 +
>>   include/block/block_int.h  |  33 +-
>>   block.c    | 184 -
>>   block/backup.c |   8 +-
>>   block/block-backend.c  |  16 ++-
>>   block/commit.c |  36 ---
>>   block/io.c |  27 ++---
>>   block/mirror.c |  37 ---
>>   block/qapi.c   |  26 ++---
>>   block/stream.c |  15 ++-
>>   blockdev.c |  84 ---
>>   migration/block-dirty-bitmap.c |   4 +-
>>   nbd/server.c   |   8 +-
>>   qemu-img.c |  12 ++-
>>   15 files changed, 363 insertions(+), 132 deletions(-)
>>
>> diff --git a/qapi/block-core.json b/qapi/block-core.json
>> index f20efc97f7..a71df88eb2 100644
>> --- a/qapi/block-core.json
>> +++ b/qapi/block-core.json
>> @@ -2248,6 +2248,10 @@
>>   # On successful completion the image file is updated to drop the
>> backing file
>>   # and the BLOCK_JOB_COMPLETED event is emitted.
> 
> Context: this is part of block-stream.
> 
>>   #
>> +# In case @device is a filter node, block-stream modifies the first
>> non-filter
>> +# overlay node below it to point to base's backing node (or NULL if
>> @base was
>> +# not specified) instead of modifying @device itself.
> 
> That is, if we have:
> 
> base <- filter1 <- active <- filter2
> 
> and request a block-stream with "top":"filter2", it is no different in
> effect than if we had requested "top":"active", since filter nodes can't
> be stream targets.  Makes sense.
> 
> What happens if we request "base":"filter1"? Do we want to require base
> to be a non-filter node?

Well, then you get this after streaming:

base <- active <- filter2

There is no good reason why you'd stream to remove filters (just doing a
reopen should be enough), but why not.  We can make the backing pointer
point to any child, so it doesn't matter what the child is.  The problem
is that we can only write backing file strings into actual COW overlay
nodes, so it does matter what the parent is.

>> +++ b/include/block/block_int.h
>> @@ -91,6 +91,7 @@ struct BlockDriver {
>>    * certain callbacks that refer to data (see block.c) to their
>> bs->file if
>>    * the driver doesn't implement them. Drivers that do not wish
>> to forward
>>    * must implement them and return -ENOTSUP.
>> + * Note that filters are not allowed to modify data.
> 
> They can modify offsets and timing, but not data?  Even if it is an
> encryption filter?  I'm trying to figure out if LUKS behaves like a filter.

It doesn't.  It's a format.

First of all, LUKS has metadata, so it definitely is a format.

Re: [Qemu-devel] [PATCH] cpus: run work items for all vCPUs if single-threaded

2018-11-14 Thread Emilio G. Cota

On Wed, Nov 14, 2018 at 12:44:00 +0100, Paolo Bonzini wrote:
> This avoids the following deadlock:
> 
> 1) a thread calls run_on_cpu for CPU 2 from a timer, and single_tcg_halt_cond
> is signaled
> 
> 2) CPU 1 is running and exits.  It finds no work item and enters CPU 2
> 
> 3) because the I/O thread is stuck in run_on_cpu, the round-robin kick
> timer never triggers, and CPU 2 never runs the work item
> 
> 4) run_on_cpu never completes

I'm having trouble understanding (2)->(3).

When the vCPU thread enters CPU 2, shouldn't it detect that work is
pending? As in:

/* assume cpu == cpu2 in the example above */
while (cpu && !cpu->queued_work_first && !cpu->exit_request) {

Both cpu->queued_work_first and cpu->exit_request will be set for cpu2.

I can see though how with an additional CPU the deadlock
could happen. For example, the I/O thread does run_on_cpu(cpu3),
which kicks cpu1 (i.e. the tcg_current_rr_cpu) and cpu3, but not cpu2.
Then cpu1 exits, and cpu2 starts executing; unless cpu2 exits on its
own volition, it will run forever.

Thanks,

Emilio

Re: [Qemu-devel] [Qemu-block] KVM Forum block no[td]es

2018-11-14 Thread John Snow




On 11/12/18 10:25 AM, Max Reitz wrote:
> On 12.11.18 00:36, Nir Soffer wrote:
>> On Mon, Nov 12, 2018 at 12:25 AM Max Reitz > > wrote:
>>
>> This is what I’ve taken from two or three BoF-like get-togethers on
>> blocky things.  Amendments are more than welcome, of course.
>>
>> ... 
>>
>> Bitmaps
>>
>> ===
>>
>> (Got this section from sneaking into a BoF I wasn’t invited to.  Oh
>> well.  Won’t hurt to include them here.)
>>
>> Currently, when dirty bitmaps are loaded, all IN_USE bitmaps are just
>> not loaded at all and completely ignored.  That isn’t correct, though,
>> they should either still be loaded (and automatically treated and
>> written back as fully dirty), or at least qemu-img check should
>> “repair” them (i.e. fully dirtying them).
>>
>>
>> I'm not sure making bitmaps dirty is better.
>>
>> When bitmap is marked IN_USE, it means that we cannot use it for
>> backup. Deleting the bitmap or making it as bad so it cannot be used
>> for the next backup is the same. Making the bitmap as dirty will full
>> the management layer that everything was fine when the next backup
>> includes the entire disk. It is better to cause the next backup to fail
>> in a verbose way, since the backup software can recover by doing
>> a full backup.
> 
> But making the dirty bitmap fully dirty will automatically enforce a
> full backup.
> 

I lean towards just deleting the bitmap in these cases but *not*
automatically.

If you do check -r, though, deleting them seems correct. This way you
don't get any nasty surprises when the cronjob goes to make an
incremental and it's accidentally 2TB and still running when you arrive
on Monday morning.

>> Sometimes qemu (running in a mode as bare as possible) is better than
>> using qemu-img convert, for instance.  It gives you more control
>> (through QMP; you get e.g. better progress reporting), you get all of
>> the mirror optimizations (we do have optimizations for convert, too,
>> but whether it’s any good to write the same (or different?)
>> optimizations twice is another question), and you get a common
>> interface for everything (online and offline).
>> Note that besides a bare qemu we’ve also always wanted to convert as
>> many qemu-img operations into frontends for block jobs as possible.
>> We have only done this for commit, however, even though convert looked
>> like basically the ideal target.  It was just too hard with too little
>> apparent gain, like always (and convert supports additional features
>> like concatenation which we don’t have in the runtime block layer
>> yet).
>>
>>
>> I'm not sure it is better to run qemu and use qemu-img as thin wrapper
>> over qmp.
>>
>> For example, management system may ascociate qemu-img
>> with a sanlock lease, and sanlock may try to terminate qemu-img when
>> a lease is invalidated. In this case sanlock will succeed while qemu is till
>> accessing storage it should not access.
>> ...
> 
> The point was not to run two processes, but to link the necessary bits
> of qemu into qemu-img and then use them inside the single qemu-img
> process itself.  As hinted at above, we've been doing this for qemu-img
> commit for quite some time; that command actually runs a block job under
> the hood (inside of qemu-img).
> 
> Max
>

Re: [Qemu-devel] [PATCH V2] migration/colo.c: Fix compilation issue when disable replication

2018-11-14 Thread Zhang Chen

On Wed, Nov 14, 2018 at 7:17 PM Peter Maydell 
wrote:

> On 14 November 2018 at 11:06, Thomas Huth  wrote:
> > On 2018-11-14 11:47, Peter Xu wrote:
> >> On Thu, Nov 01, 2018 at 10:12:26AM +0800, Zhang Chen wrote:
> >>> This compilation issue will occur when user use --disable-replication
> >>> to config Qemu.
> >>>
> >>> Reported-by: Thomas Huth 
> >>> Signed-off-by: Zhang Chen 
> >>
> >> Hi,
> >>
> >> How's the status of this patch?  Are we gonna merge it for 3.1?
> >>
> >> I just posted a similar one without knowing this (until Dave pointed
> >> it out).  IMHO it can be a good candidate for 3.1.
> >
> > Maybe Peter Maydell could apply this directly to the repo as a built fix?
>
> I'd rather it just went through the migration tree, really...
> it isn't actually breaking any of our travis jobs or other CI.
>
>
Hi Dave,

What do you think about peter's comments?

Thanks
Zhang Chen


> thanks
> -- PMM
>

[Qemu-devel] [PATCH] migration/migration.c: Add COLO dependency checks

2018-11-14 Thread Zhang Chen

From: Zhang Chen 

Current COLO mode(independent disk mode) need replication module work
together. Suggested by Dr. David Alan Gilbert .

Signed-off-by: Zhang Chen 
---
 migration/migration.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/migration/migration.c b/migration/migration.c
index b261c1e4ce..49ffb9997a 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -918,6 +918,15 @@ static bool migrate_caps_check(bool *cap_list,
 }
 #endif
 
+#ifndef CONFIG_REPLICATION
+if (cap_list[MIGRATION_CAPABILITY_X_COLO]) {
+error_setg(errp, "QEMU compiled without replication module"
+   " can't enable COLO");
+error_append_hint(errp, "Please enable replication before COLO.\n");
+return false;
+}
+#endif
+
 if (cap_list[MIGRATION_CAPABILITY_POSTCOPY_RAM]) {
 if (cap_list[MIGRATION_CAPABILITY_COMPRESS]) {
 /* The decompression threads asynchronously write into RAM
-- 
2.17.GIT

Re: [Qemu-devel] [RFC PATCH 0/3] Series short description

2018-11-14 Thread no-reply

Hi,

This series seems to have some coding style problems. See output below for
more information:

Message-id: 154219299016.19470.9372139354280787961.stgit@wayrath
Type: series
Subject: [Qemu-devel] [RFC PATCH 0/3] Series short description

=== TEST SCRIPT BEGIN ===
#!/bin/bash

BASE=base
n=1
total=$(git log --oneline $BASE.. | wc -l)
failed=0

git config --local diff.renamelimit 0
git config --local diff.renames True
git config --local diff.algorithm histogram

commits="$(git log --format=%H --reverse $BASE..)"
for c in $commits; do
echo "Checking PATCH $n/$total: $(git log -n 1 --format=%s $c)..."
if ! git show $c --format=email | ./scripts/checkpatch.pl --mailback -; then
failed=1
echo
fi
n=$((n+1))
done

exit $failed
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
28cb7a9 i386: custom cache size in AMD's CPUID descriptors too
1708b37 i386: custom cache size in CPUID2 and CPUID4 descriptors
ed1b0e0 i386: add properties for customizing L2 and L3 caches size

=== OUTPUT BEGIN ===
Checking PATCH 1/3: i386: add properties for customizing L2 and L3 caches 
size...
Checking PATCH 2/3: i386: custom cache size in CPUID2 and CPUID4 descriptors...
ERROR: braces {} are necessary for all arms of this statement
#38: FILE: target/i386/cpu.c:440:
+if (sets == 0)
[...]

total: 1 errors, 0 warnings, 56 lines checked

Your patch has style problems, please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Checking PATCH 3/3: i386: custom cache size in AMD's CPUID descriptors too...
=== OUTPUT END ===

Test command exited with code: 1


---
Email generated automatically by Patchew [http://patchew.org/].
Please send your feedback to patchew-de...@redhat.com

Re: [Qemu-devel] [RFC 44/48] cpus: lockstep execution support

2018-11-14 Thread Emilio G. Cota

On Wed, Nov 14, 2018 at 16:43:22 +, Alex Bennée wrote:
> 
> Emilio G. Cota  writes:
> 
> > Signed-off-by: Emilio G. Cota 
> > ---
> 
> >
> >  void cpu_interrupt(CPUState *cpu, int mask);
> > diff --git a/cpus.c b/cpus.c
> > index 3efe89354d..a446632a5c 100644
> > --- a/cpus.c
> > +++ b/cpus.c
> 
> > +
> > +static void cpu_lockstep_init(CPUState *cpu)
> > +{
> > +if (!lockstep_enabled) {
> > +return;
> > +}
> > +qemu_mutex_lock(_lock);
> > +/*
> > + * HACK: avoid racing with a wakeup, which would miss the addition
> > + * of this CPU; just wait until no wakeup is ongoing.
> > + */
> > +while (unlikely(lockstep_ongoing_wakeup)) {
> > +qemu_mutex_unlock(_lock);
> > +sched_yield();
> 
> This breaks Windows builds. I suspect if we do want to this sort of
> functionality we'll need to expose a utility function in
> oslib-posix/oslib-win32

This was just a quick hack to avoid adding a condvar to the
wake-up fast path. Fixed in v2 with the below, which only calls
cond_broadcast if needed (i.e. very rarely).

Thanks,

Emilio

diff --git a/cpus.c b/cpus.c
index d7d1bd3e00..06a952e504 100644
--- a/cpus.c
+++ b/cpus.c
@@ -84,8 +84,10 @@ static unsigned int throttle_percentage;
 static bool lockstep_enabled;
 static bool lockstep_ongoing_wakeup;
 static QemuMutex lockstep_lock;
+static QemuCond lockstep_cond;
 static int n_lockstep_running_cpus;
 static int n_lockstep_cpus;
+static int n_lockstep_initializing_cpus;
 static CPUState **lockstep_cpus;
 
 #define CPU_THROTTLE_PCT_MIN 1
@@ -1260,6 +1262,7 @@ void qemu_init_cpu_loop(void)
 qemu_init_sigbus();
 qemu_mutex_init(_global_mutex);
 qemu_mutex_init(_lock);
+qemu_cond_init(_cond);
 
 qemu_thread_get_self(_thread);
 }
@@ -1369,6 +1372,13 @@ static void lockstep_check_stop(CPUState *cpu)
 cpu_mutex_lock(cpu);
 qemu_mutex_lock(_lock);
 lockstep_ongoing_wakeup = false;
+/*
+ * If newly spawned CPUs are waiting to be added to the wait list,
+ * let them do so now.
+ */
+if (unlikely(n_lockstep_initializing_cpus)) {
+qemu_cond_broadcast(_cond);
+}
 }
 qemu_mutex_unlock(_lock);
 }
@@ -1379,16 +1389,15 @@ static void cpu_lockstep_init(CPUState *cpu)
 if (!lockstep_enabled) {
 return;
 }
+
 qemu_mutex_lock(_lock);
-/*
- * HACK: avoid racing with a wakeup, which would miss the addition
- * of this CPU; just wait until no wakeup is ongoing.
- */
-while (unlikely(lockstep_ongoing_wakeup)) {
-qemu_mutex_unlock(_lock);
-sched_yield();
-qemu_mutex_lock(_lock);
+/* avoid racing with a wakeup, which would miss the addition of this CPU */
+n_lockstep_initializing_cpus++;
+while (lockstep_ongoing_wakeup) {
+qemu_cond_wait(_cond, _lock);
 }
+n_lockstep_initializing_cpus--;
+
 lockstep_cpus = g_realloc(lockstep_cpus,
   (n_lockstep_cpus + 1) * sizeof(CPUState *));
 lockstep_cpus[n_lockstep_cpus++] = cpu;

Re: [Qemu-devel] [RFC 01/48] cpu: introduce run_on_cpu_no_bql

2018-11-14 Thread Alex Bennée



Emilio G. Cota  writes:

> On Wed, Nov 14, 2018 at 11:30:19 +, Alex Bennée wrote:
>>
>> Emilio G. Cota  writes:
>>
>> > This allows us to queue synchronous CPU work without the BQL.
>> >
>> > Will gain a user soon.
>>
>> This is also in the cpu-lock series right?
>
> No, in the cpu-lock series we add async_run_on_cpu_no_bql;
> here we add the sync version.

My mistake, being word blind:

Reviewed-by: Alex Bennée 

--
Alex Bennée

Re: [Qemu-devel] [PATCH] virtio: Provide version-specific variants of virtio PCI devices

2018-11-14 Thread Eduardo Habkost

On Thu, Oct 18, 2018 at 12:25:12PM +0200, Andrea Bolognani wrote:
> On Wed, 2018-10-17 at 12:01 -0300, Eduardo Habkost wrote:
> > On Wed, Oct 17, 2018 at 12:43:02PM +0200, Andrea Bolognani wrote:
> > > The proposal doesn't directly address the interaction between virtio
> > > protocol version and slot type. [...]
> > 
> > It does.  See the interface names added to each device type.
> 
> Sorry, I might be blind but I'm still not seeing it... I see a bunch
> of *-pci devices and exactly zero *-pcie devices. See below.
> 
> [...]
> > The difference between the devices is not just the bus type: it
> > is a different type of device with different behavior.  Using the
> > bus type to differentiate them would be misleading.
> 
> I'm not arguing that we should use the bus type *only* to
> differentiate them, but rather that we should *also* differentiate
> them by bus type; failing to do so will mean that...
> 
> > e.g. both modern and transitional virtio devices can be plugged
> > to Conventional PCI buses, but they have different PCI IDs.
> 
> ... even people who should be very familiar with the topic by now,
> like you and me, will get it wrong from time to time, as Michael
> helpfully pointed out ;)
> 
> > I'm considering doing this in v2:
> > 
> > * Remove the -0.9 device type, because nobody seems to need it
> > * Add two device types:
> >   * virtio-1-blk-pci-non-transitional
> >   * virtio-1-blk-pci-transitional
> > 
> > This way, we:
> > * Include only the major version of the spec (so
> >   we don't require new device types for virtio 1.1, 1.2, etc),
> > * Use terms that come from the Virtio spec itself, to avoid
> >   ambiguity.
> 
> That's quite a mouthful :)
> 
> Anyway, whether a device or not is transitional should go next to
> the spec version rather than at the end: this will also help with
> consistency because we will need -device and -ccw variants of all
> these, no?

Answering this question: we won't need this on the other variants
they don't have the code that silently breaks compatibility with
legacy drivers depending on the bus it's plugged.  The device is
either always transitional or always non-transitional.

The root of the problem is in virtio-pci.  More precisely, it
begins at virtio_pci_realize():

if (proxy->disable_legacy == ON_OFF_AUTO_AUTO) {
proxy->disable_legacy = pcie_port ? ON_OFF_AUTO_ON : ON_OFF_AUTO_OFF;
}

combined with virtio_pci_device_plugged():

if (legacy) {
[...]
pci_set_word(config + PCI_SUBSYSTEM_ID, virtio_bus_get_vdev_id(bus));
} else {
pci_set_word(config + PCI_VENDOR_ID,
 PCI_VENDOR_ID_REDHAT_QUMRANET);
pci_set_word(config + PCI_DEVICE_ID,
 0x1040 + virtio_bus_get_vdev_id(bus));
pci_config_set_revision(config, 1);
}


> 
> Can we assume if and when virtio 2.0 comes around it will also have
> both a pure variant and a transitional one which is compatible with
> virtio 1.0? If so, I would suggest the names should be
> 
>   virtio-1-blk-pcie   (1.x only, PCIe slot)
>   virtio-1-transitional-blk-pci   (transitional, PCI  slot)
>   virtio-1-transitional-blk-pcie  (transitional, PCIe slot)

I don't see any need to make separate transitional-pci and
transitional-pcie device types.  A -transitional device type that
supports both Conventional PCI and PCI Express will work and I
don't see any problem with that.

For reference, the next version of this patch will have 3 device
variants:

* virtio-blk-pci (existing device, for compatibility. Contains
  magic transitional/non-transitional logic depending on bus)
* virtio-blk-pci-transitional (1.x transitional, supports legacy
  drivers, Conventional PCI only)
* virtio-blk-pci-non-transitional (1.x non-transitional, no
  legacy driver support, supports both Conventional PCI and PCI
  Express)

-- 
Eduardo

Re: [Qemu-devel] [PATCH for-3.2 00/41] RFC: slirp: make it again a standalone project

2018-11-14 Thread Dr. David Alan Gilbert

* Markus Armbruster (arm...@redhat.com) wrote:
> Thomas Huth  writes:
> 
> > On 2018-11-14 15:46, Markus Armbruster wrote:
> >> Thomas Huth  writes:
> >> 
> >>> On 2018-11-14 13:59, Markus Armbruster wrote:
>  Marc-André Lureau  writes:
> 
> > Hi,
> >
> > Based-on: https://people.debian.org/~sthibault/qemu.git/ slirp branch
> >
> > This series goal is to allow building libslirp as an independent 
> > library.
> >
> > While looking at making SLIRP a seperate running process, I thought
> > that having an independent library from QEMU would be a first step.
> >
> > There has been some attempts to make slirp a seperate project in the 
> > past.
> > (https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg01092.html)
> > Unfortunately, they forked from QEMU and didn't provide enough
> > compatibility for QEMU to make use of it (in particular, vmstate
> > handling was removed, they lost git history etc). Furthermore, they
> > are not maintained as far as I can see.
> >
> > I would propose to make slirp a seperate project, that can initially
> > be used by QEMU as a submodule, keeping Makefile.objs until a proper
> > shared library with stability guarantees etc is ready..
> >
> > The subproject could created by preserving git tags, and cleaning up 
> > the code style, this way:
> >
> > git filter-branch --tree-filter "if ls * 1> /dev/null 2>&1; then 
> > clang-format -i * /dev/null; fi " -f --subdirectory-filter "slirp" 
> > --prune-empty --tag-name-filter cat -- --all
> > (my clang-format 
> > https://gist.github.com/elmarco/cb20c8d92007df0e2fb8a2404678ac73)
> >
> > What do you think?
> 
>  Has the slirp code been improved to be generally useful?  I still got it
>  filed under "friends don't let friends use that, except for testing"...
> >>>
> >>> The slirp code is already used in a lot of other projects:
> >> 
> >> The issue I have with SLIRP isn't that it solves a useless problem (au
> >> contraire!), it's that it's a useless solution.
> >
> > Ouch, that was completely arrogant and inappropriate. It's far away from
> > being useless,
> 
> ... as I immediately admit ...
> 
> >and Samuel is doing a very good job in picking up all the
> > patches and fixes that have been posted in the past months. Have you had
> > a look at the changelog at all before you wrote that sentence?
> 
> ... right in the next sentence:
> 
> >> Okay, that's an unfair
> >> exaggeration, it's not useless, I just wouldn't trust it in production,
> >> unless it has improved significantly since I last looked at it.
> 
> If my joke offended Samuel, or anyone, I offer my sincere apologies.
> 
> > Nobody said that the slirp code would suddenly be perfect, but if it's
> > getting even more traction and attention as a separate library (since
> > other projects might contribute their fixes back "upstream" in that
> > case), it could really get a solid building block for a lot of emulators
> > and similar software.
> [...]
> 
> I'm afraid we're in violent agreement there.  I wrote:
> 
> >> No objections to spinning it out, as long as it comes with a fair
> >> assessment of its limitations.
> >> 
> >> Turning it into a proper project might even improve its chances to
> >> get improved towards production quality, compared to its current
> >> existence as a corner of QEMU next to nobody wants to touch.

One thought is that it might actually be the best standalone stack
around at the moment; I mean while I'm seeing people objecting
to bits of SLIRP, I'm not seeing anyone saying

'Why don't you just use '

Dave

--
Dr. David Alan Gilbert / dgilb...@redhat.com / Manchester, UK

Re: [Qemu-devel] [PATCH] MAINTAINERS: list myself as maintainer for various Arm boards

2018-11-14 Thread Alistair Francis


On 14/11/2018 3:12 am, Peter Maydell wrote:

On 13 November 2018 at 20:10, Alistair Francis  wrote:

These two and the Xilinx boards seem a little out of place in this
patch. I agree they probably aren't maintained as well as they should
be, but the patch talks about orphaned boards and these four all have
active QEMU maintainers listed.


Yeah, I was planning to improve the commit message,
something along the lines of an extra para:

"The Xilinx boards are maintained, but patches to
them go via the target-arm tree, so add myself as a
maintainer there too."

Really this is because we're overloading "maintainer"
to mean both "this person cares about and is the
primary reviewer and yes/no decision for them" and
also "this person is responsible for getting the
patches into the tree". Perhaps we should instead
define a new letter tag for the latter ?


What about adding a "P:" tag for pull request sender?

Alistair



thanks
-- PMM

Re: [Qemu-devel] [RFC 09/48] tcg: reset runtime helpers when flushing the code cache

2018-11-14 Thread Emilio G. Cota

On Wed, Nov 14, 2018 at 17:01:13 +, Alex Bennée wrote:
> 
> Emilio G. Cota  writes:
> 
> > In preparation for adding plugin support. One of the clean-up
> > actions when uninstalling plugins will be to flush the code
> > cache. We'll also have to clear the runtime helpers, since
> > some of those runtime helpers may belong to the plugin
> > being uninstalled.
> >
> > Signed-off-by: Emilio G. Cota 
> > ---
(snip)
> > +void tcg_reset_runtime_helpers(void);
> 
> I know tcg.h doesn't have and API doc blocks but perhaps we should
> start?
> 
>   /**
>* tcg_reset_runtime_helpers:
>*
>* Remove all runtime registered helpers. Should only be called from a
>* quiescent system while flushing old code.
>*/

Better late than never. Added to v2.

(snip)
> Otherwise:
> 
> Reviewed-by: Alex Bennée 

Thanks,

E.

Re: [Qemu-devel] [RFC 06/48] tcg: use QHT for helper_table

2018-11-14 Thread Emilio G. Cota

On Wed, Nov 14, 2018 at 16:11:35 +, Alex Bennée wrote:
> 
> Emilio G. Cota  writes:
(snip)
> I needed to do this:
> 
> modified   tcg/tcg.c
> @@ -884,7 +884,7 @@ static TCGTemp *tcg_global_reg_new_internal(TCGContext 
> *s, TCGType type,
> 
>  static inline uint32_t tcg_helper_func_hash(const void *func)
>  {
> -return qemu_xxhash2((uint64_t)func);
> +return qemu_xxhash2((uint64_t)(uintptr_t)func);
>  }
> 
> To prevent the compiler complaining about:
> 
>  tcg.c:887:25: error: cast from pointer to integer of different size 
> [-Werror=pointer-to-int-cast]

Fixed, thanks.

E.

Re: [Qemu-devel] [PATCH for-3.2 00/41] RFC: slirp: make it again a standalone project

2018-11-14 Thread Markus Armbruster

Thomas Huth  writes:

> On 2018-11-14 15:46, Markus Armbruster wrote:
>> Thomas Huth  writes:
>> 
>>> On 2018-11-14 13:59, Markus Armbruster wrote:
 Marc-André Lureau  writes:

> Hi,
>
> Based-on: https://people.debian.org/~sthibault/qemu.git/ slirp branch
>
> This series goal is to allow building libslirp as an independent library.
>
> While looking at making SLIRP a seperate running process, I thought
> that having an independent library from QEMU would be a first step.
>
> There has been some attempts to make slirp a seperate project in the past.
> (https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg01092.html)
> Unfortunately, they forked from QEMU and didn't provide enough
> compatibility for QEMU to make use of it (in particular, vmstate
> handling was removed, they lost git history etc). Furthermore, they
> are not maintained as far as I can see.
>
> I would propose to make slirp a seperate project, that can initially
> be used by QEMU as a submodule, keeping Makefile.objs until a proper
> shared library with stability guarantees etc is ready..
>
> The subproject could created by preserving git tags, and cleaning up the 
> code style, this way:
>
> git filter-branch --tree-filter "if ls * 1> /dev/null 2>&1; then 
> clang-format -i * /dev/null; fi " -f --subdirectory-filter "slirp" 
> --prune-empty --tag-name-filter cat -- --all
> (my clang-format 
> https://gist.github.com/elmarco/cb20c8d92007df0e2fb8a2404678ac73)
>
> What do you think?

 Has the slirp code been improved to be generally useful?  I still got it
 filed under "friends don't let friends use that, except for testing"...
>>>
>>> The slirp code is already used in a lot of other projects:
>> 
>> The issue I have with SLIRP isn't that it solves a useless problem (au
>> contraire!), it's that it's a useless solution.
>
> Ouch, that was completely arrogant and inappropriate. It's far away from
> being useless,

... as I immediately admit ...

>and Samuel is doing a very good job in picking up all the
> patches and fixes that have been posted in the past months. Have you had
> a look at the changelog at all before you wrote that sentence?

... right in the next sentence:

>> Okay, that's an unfair
>> exaggeration, it's not useless, I just wouldn't trust it in production,
>> unless it has improved significantly since I last looked at it.

If my joke offended Samuel, or anyone, I offer my sincere apologies.

> Nobody said that the slirp code would suddenly be perfect, but if it's
> getting even more traction and attention as a separate library (since
> other projects might contribute their fixes back "upstream" in that
> case), it could really get a solid building block for a lot of emulators
> and similar software.
[...]

I'm afraid we're in violent agreement there.  I wrote:

>> No objections to spinning it out, as long as it comes with a fair
>> assessment of its limitations.
>> 
>> Turning it into a proper project might even improve its chances to
>> get improved towards production quality, compared to its current
>> existence as a corner of QEMU next to nobody wants to touch.

Re: [Qemu-devel] [RFC 06/48] tcg: use QHT for helper_table

2018-11-14 Thread Emilio G. Cota

On Wed, Nov 14, 2018 at 14:41:53 +, Alex Bennée wrote:
> Emilio G. Cota  writes:
(snip)
> > -static GHashTable *helper_table;
> > +static struct qht helper_table;
> > +static bool helper_table_inited;
> 
> Having a flag for initialisation seems a little excessive considering
> we've moved that initialisation into tcg_context_init() which has to be
> called before we do anything TCG related.

(snip)
> > +helper_table_inited = true;
> 
> so I think we can drop this and...

(snip)
> > +static inline const char *tcg_helper_find(TCGContext *s, uintptr_t val)
> >  {
> >  const char *ret = NULL;
> > -if (helper_table) {
> > -TCGHelperInfo *info = g_hash_table_lookup(helper_table, 
> > (gpointer)val);
> > +if (helper_table_inited) {
> 
> change this to a assert(helper_table.cmp) if you really want to.

I like this suggestion. The only caller of tcg_helper_find
is tcg_dump_ops, which is unlikely to be called on an
uninitialized TCGContext.

I have added this to v2, without the assert.

(snip)
> Otherwise:
> 
> Reviewed-by: Alex Bennée 

Thanks!

E.

Re: [Qemu-devel] [PATCH] block/nvme: call blk_drain in NVMe reset code to avoid lockups

2018-11-14 Thread Igor Druzhinin

On 06/11/2018 12:16, Igor Druzhinin wrote:
> When blk_flush called in NVMe reset path S/C queues are already freed
> which means that re-entering AIO handling loop having some IO requests
> unfinished will lockup or crash as their SG structures being potentially
> reused. Call blk_drain before freeing the queues to avoid this nasty
> scenario.
> 
> Signed-off-by: Igor Druzhinin 
> ---
>  hw/block/nvme.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index fc7dacb..cdf836e 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -797,6 +797,8 @@ static void nvme_clear_ctrl(NvmeCtrl *n)
>  {
>  int i;
>  
> +blk_drain(n->conf.blk);
> +
>  for (i = 0; i < n->num_queues; i++) {
>  if (n->sq[i] != NULL) {
>  nvme_free_sq(n->sq[i], n);
> 

ping?

Re: [Qemu-devel] [PATCH RFC 3/6] qapi: rewrite string-input-visitor

2018-11-14 Thread Markus Armbruster

David Hildenbrand  writes:

> The input visitor has some problems right now, especially
> - unsigned type "Range" is used to process signed ranges, resulting in
>   inconsistent behavior and ugly/magical code
> - uint64_t are parsed like int64_t, so big uint64_t values are not
>   supported and error messages are misleading
> - lists/ranges of int64_t are accepted although no list is parsed and
>   we should rather report an error
> - lists/ranges are preparsed using int64_t, making it hard to
>   implement uint64_t values or uint64_t lists
> - types that don't support lists don't bail out

Known weirdness: empty list is invalid (test-string-input-visitor.c
demonstates).  Your patch doesn't change that (or else it would update
the test).  Should it be changed?

>
> So let's rewrite it by getting rid of usage of the type "Range" and
> properly supporting lists of int64_t and uint64_t (including ranges of
> both types), fixing the above mentioned issues.
>
> Lists of other types are not supported and will properly report an
> error. Virtual walks are now supported.
>
> Tests have to be fixed up:
> - Two BUGs were hardcoded that are fixed now
> - The string-input-visitor now actually returns a parsed list and not
>   an ordered set.
>
> While at it, do some smaller style changes (e.g. use g_assert).

Please don't replace assert() by g_assert() in files that consistently
use assert() now.

In my opinion, g_assert() is one of the cases where GLib pointlessly
reinvents wheels that have been carrying load since forever.

> Signed-off-by: David Hildenbrand 
> ---
>  include/qapi/string-input-visitor.h |   3 +-
>  qapi/string-input-visitor.c | 436 +---
>  tests/test-string-input-visitor.c   |  18 +-
>  3 files changed, 264 insertions(+), 193 deletions(-)
>
> diff --git a/include/qapi/string-input-visitor.h 
> b/include/qapi/string-input-visitor.h
> index 33551340e3..2c40f7f5a6 100644
> --- a/include/qapi/string-input-visitor.h
> +++ b/include/qapi/string-input-visitor.h
> @@ -19,8 +19,7 @@ typedef struct StringInputVisitor StringInputVisitor;
>  
>  /*
>   * The string input visitor does not implement support for visiting
> - * QAPI structs, alternates, null, or arbitrary QTypes.  It also
> - * requires a non-null list argument to visit_start_list().
> + * QAPI structs, alternates, null, or arbitrary QTypes.

Preexisting: neglects to spell out the list limitations, i.e. can do
only flat lists of integers.  Care do fix that, too?

>   */
>  Visitor *string_input_visitor_new(const char *str);
>  
> diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
> index dee708d384..16da47409e 100644
> --- a/qapi/string-input-visitor.c
> +++ b/qapi/string-input-visitor.c
> @@ -4,10 +4,10 @@
>   * Copyright Red Hat, Inc. 2012-2016
>   *
>   * Author: Paolo Bonzini 
> + * David Hildenbrand 
>   *
>   * This work is licensed under the terms of the GNU LGPL, version 2.1 or 
> later.
>   * See the COPYING.LIB file in the top-level directory.
> - *
>   */
>  
>  #include "qemu/osdep.h"
> @@ -18,21 +18,39 @@
>  #include "qapi/qmp/qerror.h"
>  #include "qapi/qmp/qnull.h"
>  #include "qemu/option.h"
> -#include "qemu/queue.h"
> -#include "qemu/range.h"
>  #include "qemu/cutils.h"
>  
> +typedef enum ListMode {
> +/* no list parsing active / no list expected */
> +LM_NONE,
> +/* we have an unparsed string remaining */
> +LM_UNPARSED,
> +/* we have an unfinished int64 range */
> +LM_INT64_RANGE,
> +/* we have an unfinished uint64 range */
> +LM_UINT64_RANGE,
> +/* we have parsed the string completely and no range is remaining */
> +LM_END,
> +} ListMode;
> +
> +typedef union RangeLimit {
> +int64_t i64;
> +uint64_t u64;
> +} RangeLimit;
>  
>  struct StringInputVisitor
>  {
>  Visitor visitor;
>  
> -GList *ranges;
> -GList *cur_range;
> -int64_t cur;
> +/* Porperties related to list processing */

"Properties"

Suggest

   /* List parsing state */

> +ListMode lm;
> +RangeLimit rangeNext;
> +RangeLimit rangeEnd;

RangeLimit is a good name for rangeEnd, but not so hot for rangeNext.
Naming is hard.

> +const char *unparsed_string;
> +void *list;

You drop /* Only needed for sanity checking the caller */.
Double-checking: is that not true anymore?

>  
> +/* the original string to parse */
>  const char *string;
> -void *list; /* Only needed for sanity checking the caller */
>  };
>  
>  static StringInputVisitor *to_siv(Visitor *v)
> @@ -40,136 +58,48 @@ static StringInputVisitor *to_siv(Visitor *v)
>  return container_of(v, StringInputVisitor, visitor);
>  }
>  
> -static void free_range(void *range, void *dummy)
> -{
> -g_free(range);
> -}
> -
> -static int parse_str(StringInputVisitor *siv, const char *name, Error **errp)
> -{
> -char *str = (char *) siv->string;
> -long long start, end;
> -Range *cur;
> -char *endptr;
> -
> -if (siv->ranges) {
> -

Re: [Qemu-devel] [PATCH v3 0/4] fsdev-throttle-qmp: qmp interface for fsdev io throttling

2018-11-14 Thread no-reply

Hi,

This series failed docker-quick@centos7 build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

Message-id: cover.1542187291.git.xiezh...@huawei.com
Type: series
Subject: [Qemu-devel] [PATCH v3 0/4] fsdev-throttle-qmp: qmp interface for 
fsdev io throttling

=== TEST SCRIPT BEGIN ===
#!/bin/bash
time make docker-test-quick@centos7 SHOW_ENV=1 J=8
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
Switched to a new branch 'test'
416820d fsdev-throttle-qmp: hmp interface for fsdev io throttling
ee45ae0f fsdev-throttle-qmp: qmp interface for fsdev io throttling
6c6b122 fsdev-throttle-qmp: move struct ThrottleLimits to new file
9e8624e fsdev-throttle-qmp: factor out throttle code to reuse code

=== OUTPUT BEGIN ===
  BUILD   centos7
make[1]: Entering directory `/var/tmp/patchew-tester-tmp-iweax89d/src'
  GEN 
/var/tmp/patchew-tester-tmp-iweax89d/src/docker-src.2018-11-14-12.08.57.2432/qemu.tar
Cloning into 
'/var/tmp/patchew-tester-tmp-iweax89d/src/docker-src.2018-11-14-12.08.57.2432/qemu.tar.vroot'...
done.
Checking out files:   9% (621/6457)   
Checking out files:  10% (646/6457)   
Checking out files:  11% (711/6457)   
Checking out files:  12% (775/6457)   
Checking out files:  13% (840/6457)   
Checking out files:  14% (904/6457)   
Checking out files:  15% (969/6457)   
Checking out files:  16% (1034/6457)   
Checking out files:  17% (1098/6457)   
Checking out files:  18% (1163/6457)   
Checking out files:  19% (1227/6457)   
Checking out files:  20% (1292/6457)   
Checking out files:  21% (1356/6457)   
Checking out files:  22% (1421/6457)   
Checking out files:  23% (1486/6457)   
Checking out files:  24% (1550/6457)   
Checking out files:  25% (1615/6457)   
Checking out files:  26% (1679/6457)   
Checking out files:  27% (1744/6457)   
Checking out files:  28% (1808/6457)   
Checking out files:  29% (1873/6457)   
Checking out files:  30% (1938/6457)   
Checking out files:  31% (2002/6457)   
Checking out files:  32% (2067/6457)   
Checking out files:  33% (2131/6457)   
Checking out files:  34% (2196/6457)   
Checking out files:  35% (2260/6457)   
Checking out files:  36% (2325/6457)   
Checking out files:  37% (2390/6457)   
Checking out files:  38% (2454/6457)   
Checking out files:  39% (2519/6457)   
Checking out files:  40% (2583/6457)   
Checking out files:  41% (2648/6457)   
Checking out files:  42% (2712/6457)   
Checking out files:  43% (2777/6457)   
Checking out files:  44% (2842/6457)   
Checking out files:  45% (2906/6457)   
Checking out files:  45% (2967/6457)   
Checking out files:  46% (2971/6457)   
Checking out files:  47% (3035/6457)   
Checking out files:  48% (3100/6457)   
Checking out files:  49% (3164/6457)   
Checking out files:  50% (3229/6457)   
Checking out files:  51% (3294/6457)   
Checking out files:  52% (3358/6457)   
Checking out files:  53% (3423/6457)   
Checking out files:  54% (3487/6457)   
Checking out files:  55% (3552/6457)   
Checking out files:  56% (3616/6457)   
Checking out files:  57% (3681/6457)   
Checking out files:  58% (3746/6457)   
Checking out files:  59% (3810/6457)   
Checking out files:  60% (3875/6457)   
Checking out files:  61% (3939/6457)   
Checking out files:  62% (4004/6457)   
Checking out files:  63% (4068/6457)   
Checking out files:  64% (4133/6457)   
Checking out files:  65% (4198/6457)   
Checking out files:  66% (4262/6457)   
Checking out files:  67% (4327/6457)   
Checking out files:  68% (4391/6457)   
Checking out files:  69% (4456/6457)   
Checking out files:  70% (4520/6457)   
Checking out files:  71% (4585/6457)   
Checking out files:  72% (4650/6457)   
Checking out files:  73% (4714/6457)   
Checking out files:  74% (4779/6457)   
Checking out files:  75% (4843/6457)   
Checking out files:  76% (4908/6457)   
Checking out files:  77% (4972/6457)   
Checking out files:  78% (5037/6457)   
Checking out files:  79% (5102/6457)   
Checking out files:  80% (5166/6457)   
Checking out files:  81% (5231/6457)   
Checking out files:  82% (5295/6457)   
Checking out files:  83% (5360/6457)   
Checking out files:  84% (5424/6457)   
Checking out files:  85% (5489/6457)   
Checking out files:  86% (5554/6457)   
Checking out files:  87% (5618/6457)   
Checking out files:  88% (5683/6457)   
Checking out files:  89% (5747/6457)   
Checking out files:  90% (5812/6457)   
Checking out files:  91% (5876/6457)   
Checking out files:  92% (5941/6457)   
Checking out files:  93% (6006/6457)   
Checking out files:  94% (6070/6457)   
Checking out files:  95% (6135/6457)   
Checking out files:  96% (6199/6457)   
Checking out files:  97% (6264/6457)   
Checking out files:  98% (6328/6457)   
Checking out files:  99% (6393/6457)   
Checking out files: 100% (6457/6457)   
Checking out files: 100% (6457/6457), done.
Submodule 'dtc' (https://git.qemu.org/git/dtc.git) registered for path 'dtc'
Cloning into 'dtc'...

Re: [Qemu-devel] [Qemu-block] [PATCH 3/3] block: Removed unused sector-based blocking I/O

2018-11-14 Thread Eric Blake


[Reviving an old series]

On 5/3/18 8:36 AM, Alberto Garcia wrote:

On Fri 27 Apr 2018 05:43:33 PM CEST, Eric Blake wrote:

-static int bdrv_rw_co(BdrvChild *child, int64_t sector_num, uint8_t *buf,
-  int nb_sectors, bool is_write, BdrvRequestFlags flags)
-{
-QEMUIOVector qiov;
-struct iovec iov = {
-.iov_base = (void *)buf,
-.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
-};
-
-if (nb_sectors < 0 || nb_sectors > BDRV_REQUEST_MAX_SECTORS) {
-return -EINVAL;
-}


Do we have a check for BDRV_REQUEST_MAX_BYTES in the byte-based API?


No, but we don't need one.  First, note that bs->bl.max_transfer is
currently uint32_t, so right now, no driver can set it any larger than
4G


But note that the old limit was based on a signed integer.

BDRV_REQUEST_MAX_SECTORS is 2147483136 bytes (a bit less than 2GB).

For 32-bit integers, (INT_MAX - 1) & ~511 = 2147483136


The whole point of the byte-based callbacks is that they pass a 64-bit 
length BUT also honor the driver's max_transfer.  As long as drivers 
default to (or explicitly set) a max_transfer of 
BDRV_REQUEST_MAX_SECTORS or smaller, then the block layer has already 
taken care of fragmenting things so that the callers no longer have to 
worry about artificially fragmenting things themselves before handing 
something to the block layer that might overflow.  And you snipped the 
rest of my mail, where I argued:




But, having said that, you've made me wonder if it's time to increase
max_transfer to [u]int64_t, then audit ALL drivers to ensure that they
either properly handle requests >=4G or else set max_transfer <4G
(ideally, the block layer will default max_transfer to the value of
BDRV_REQUEST_MAX_BYTES, even if the audit means we no longer need that
macro).  Should be a separate series, though.


I will concede that you are right that because we previously used a 
signed type, I should amend my argument to state that we should audit 
ALL drivers to ensure that they either properly handle requests >= 2G 
or else set max_transfer <2G, even though max_transfer is unsigned. 
Maybe it's time that I attempt that audit, before posting a v2 of this 
series for 4.0. (Fingers crossed that it will be a quick audit)


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

Re: [Qemu-devel] KVM Forum block no[td]es

2018-11-14 Thread Max Reitz

On 13.11.18 16:12, Alberto Garcia wrote:
> On Sun 11 Nov 2018 11:25:00 PM CET, Max Reitz wrote:
> 
>> Permission system
>> =
>>
>> GRAPH_MOD
>> -
>>
>> We need some way for the commit job to prevent graph changes on its
>> chain while it is running.  Our current blocker doesn’t do the job,
>> however.  What to do?
>>
>> - We have no idea how to make a *permission* work.  Maybe the biggest
>>   problem is that it just doesn’t work as a permission, because the
>>   commit job doesn’t own the BdrvChildren that would need to be
>>   blocked (namely the @backing BdrvChild).
> 
> What I'm doing at the moment in my blockdev-reopen series is check
> whether all parents of the node I want to change share the GRAPH_MOD
> permission. If any of them doesn't then the operation fails.
> 
> This can be checked by calling bdrv_get_cumulative_perm() or simply
> bdrv_check_update_perm(..., BLK_PERM_GRAPH_MOD, ...).

Sure.

> It solves the problem because e.g. the stream block job only allows
> BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED, so no graph
> modifications allowed.

But the problem is that the commit job needs to unshare the permission
on all backing links between base and top, so none of the backing links
can be broken.  But it cannot do that because those backing links do not
belong to it (but to the respective overlay nodes).

>>   (We never quite knew what “taking the GRAPH_PERMISSION” or
>>   “unsharing the GRPAH_MOD permission” was supposed to mean.  Figuring
>>   that out always took like half an our in any face-to-face meeting,
>>   and then we decided it was pretty much useless for any case we had
>>   at hand.)
> 
> Yeah it's a bit unclear. It can mean "none of this node's children can
> be replaced / removed", which is what I'm using it for at the moment.

Yes, but that's just not useful.  I don't think we have any case where
something would be annoyed by having its immediate child replaced (other
than the visible data changing, potentially).  Usually when we say
something (e.g. a block job) wants to prevent graph modification, it's
about changing edges that exist independently of the job (such as a
backing chain).

>> Reopen
>> --
>>
>> How should permissions be handled while the reopen is under way?
>> Maybe we should take the union of @perm before and after, and the
>> intersection of @shared before and after?
> 
> Do you have an example of a case in which you're reopening a node and
> the change of permissions causes a problem?

I do not.  So currently we switch from the permissions before to the
ones after, right?  Which should be safe because that switch is a
transaction that is integrated into reopen.

However, I suppose it's possible the protocol layer gives us some issues
again.  It cannot switch the locks before commit, but committing may
fail.  What to do then?

It would be safer if we took the union/intersection in
bdrv_reopen_prepare() and then released the unnecessary flags in
bdrv_reopen_commit().  We can still get errors (as discussed), but those
can be safely ignored.  (They're just annoying, but not harmful.)

>> - Is it possible that changing an option may require taking an
>>   intermediate permission that is required neither before nor after
>>   the reopen process?
>>   Changing a child link comes to mind (like changing a child from one
>>   BDS to another, where the visible data changes, which would mean we
>>   may want to e.g. unshare CONSISTENT_READ during the reopen).
>>   However:
>>   1. It is unfeasible to unshare that for all child changes.
>>  Effectively everything requires CONSISTENT_READ, and for good
>>  reason.
>>   2. Why would a user even change a BDS to something of a different
>>  content?
>>   3. Anything that currently allows you to change a child node assumes
>>  that the user always changes it to something of the same content
>>  (some take extra care to verify this, like mirror, which makes
>>  sure that @replaces and the target are connected, and there are
>>  only filter nodes in between).
> 
> I don't think the user wants to change a BDS to something of a different
> content, but it may happen that QEMU doesn't have a way to verify
> whether the content is the same or not.
> 
> I think we have one such case already with 'blockdev-snapshot', in which
> you add a BDS with blockdev-add and then add its contents on top of an
> existing BDS.

Yeah.  The new overlay is expected to be empty so when you replace the
now-snapshot with it, the parent nodes don't see any change.  If it's
not empty...  Well, your problem.

Max

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] [PATCH 5/6] accel/tcg: Return -1 for execution from MMIO regions in get_page_addr_code()

2018-11-14 Thread Thomas Huth

On 2018-07-10 18:00, Peter Maydell wrote:
> Now that all the callers can handle get_page_addr_code() returning -1,
> remove all the code which tries to handle execution from MMIO regions
> or small-MMU-region RAM areas. This will mean that we can correctly
> execute from these areas, rather than ending up either aborting QEMU
> or delivering an incorrect guest exception.
> 
> Signed-off-by: Peter Maydell 
> ---
>  accel/tcg/cputlb.c | 95 +-
>  1 file changed, 10 insertions(+), 85 deletions(-)
> 
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index c491703f15f..abb0225dc79 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -741,39 +741,6 @@ void tlb_set_page(CPUState *cpu, target_ulong vaddr,
>  prot, mmu_idx, size);
>  }
>  
> -static void report_bad_exec(CPUState *cpu, target_ulong addr)
> -{
> -/* Accidentally executing outside RAM or ROM is quite common for
> - * several user-error situations, so report it in a way that
> - * makes it clear that this isn't a QEMU bug and provide suggestions
> - * about what a user could do to fix things.
> - */
> -error_report("Trying to execute code outside RAM or ROM at 0x"
> - TARGET_FMT_lx, addr);
> -error_printf("This usually means one of the following happened:\n\n"
> - "(1) You told QEMU to execute a kernel for the wrong 
> machine "
> - "type, and it crashed on startup (eg trying to run a "
> - "raspberry pi kernel on a versatilepb QEMU machine)\n"
> - "(2) You didn't give QEMU a kernel or BIOS filename at all, 
> "
> - "and QEMU executed a ROM full of no-op instructions until "
> - "it fell off the end\n"
> - "(3) Your guest kernel has a bug and crashed by jumping "
> - "off into nowhere\n\n"
> - "This is almost always one of the first two, so check your "
> - "command line and that you are using the right type of 
> kernel "
> - "for this machine.\n"
> - "If you think option (3) is likely then you can try 
> debugging "
> - "your guest with the -d debug options; in particular "
> - "-d guest_errors will cause the log to include a dump of 
> the "
> - "guest register state at this point.\n\n"
> - "Execution cannot continue; stopping here.\n\n");

 Hi Peter!

Looks like this patch now causes QEMU to segfault instead of printing the
above error message in certain cases, e.g.:

$ gdb --args aarch64-softmmu/qemu-system-aarch64 -M n800
[...]
(gdb) r
Starting program: aarch64-softmmu/qemu-system-aarch64 -M n800
[...]
Program received signal SIGSEGV, Segmentation fault.
[...]
(gdb) bt
#0  0x55addc68 in onenand_read (opaque=0x57600600, addr=98304, 
size=4) at hw/block/onenand.c:612
#1  0x558b175c in memory_region_read_accessor (mr=0x57600b80, 
addr=98304, value=0x7fffdbffe360, size=4, shift=0, mask=4294967295, attrs=...)
at memory.c:440
#2  0x558ae669 in access_with_adjusted_size (addr=addr@entry=98304, 
value=value@entry=0x7fffdbffe360, size=size@entry=4, access_size_min=, access_size_max=, access_fn=access_fn@entry=0x558b1720 
, mr=mr@entry=0x57600b80, 
attrs=attrs@entry=...) at memory.c:570
#3  0x558b3016 in memory_region_dispatch_read (attrs=..., size=4, 
pval=0x7fffdbffe360, addr=98304, mr=0x57600b80) at memory.c:1375
#4  0x558b3016 in memory_region_dispatch_read (mr=0x57600b80, 
addr=addr@entry=98304, pval=pval@entry=0x7fffdbffe360, size=size@entry=4, 
attrs=...)
at memory.c:1402
#5  0x5583cb23 in io_readx (env=env@entry=0x56b58a30, 
iotlbentry=iotlbentry@entry=0x56b6d6b0, mmu_idx=mmu_idx@entry=1, 
addr=addr@entry=98304, retaddr=retaddr@entry=0, recheck=, 
access_type=access_type@entry=MMU_INST_FETCH, size=size@entry=4) at 
accel/tcg/cputlb.c:729
#6  0x558d79cd in helper_le_ldl_cmmu (access_type=MMU_INST_FETCH, 
recheck=, retaddr=0, addr=98304, index=96, mmu_idx=1, 
env=0x56b58a30)
at accel/tcg/softmmu_template.h:106
#7  0x558d79cd in helper_le_ldl_cmmu (env=env@entry=0x56b58a30, 
addr=addr@entry=98304, oi=33, retaddr=retaddr@entry=0)
at accel/tcg/softmmu_template.h:144
#8  0x559d2595 in arm_tr_translate_insn (retaddr=0, ptr=98304, 
env=0x56b58a30) at include/exec/cpu_ldst_template.h:102

Any clue what's going on here?

 Thomas

Re: [Qemu-devel] [RFC 02/48] trace: expand mem_info:size_shift to 3 bits

2018-11-14 Thread Emilio G. Cota

On Wed, Nov 14, 2018 at 13:03:19 +, Alex Bennée wrote:
> 
> Emilio G. Cota  writes:
> 
> > This will allow us to trace 16B-long memory accesses.
> >
> > While at it, add some defines for the mem_info bits and simplify
> > trace_mem_get_info by making it a wrapper around trace_mem_build_info.
> >
> > Signed-off-by: Emilio G. Cota 
> 
> Currently we ignore atomic operation but I assume we'll want to
> represent them at some point here.

We "ignore" them in that we just trace them without marking them
as atomic. We do trace them since d071f4cd55 ("trace: enable tracing
of TCG atomics", 2018-06-27), which BTW is a spin-off of early
iterations of this series.

> I don't know if memory barrier behaviour would also be worth exporting?

I don't see much value in that from a tracing viewpoint; if
users need such instruction-specific details they can just
use a plugin, where they can disassemble TB's.

Thanks,

E.

Re: [Qemu-devel] [PATCH v3 0/4] fsdev-throttle-qmp: qmp interface for fsdev io throttling

2018-11-14 Thread no-reply

Hi,

This series failed docker-mingw@fedora build test. Please find the testing 
commands and
their output below. If you have Docker installed, you can probably reproduce it
locally.

Message-id: cover.1542187291.git.xiezh...@huawei.com
Type: series
Subject: [Qemu-devel] [PATCH v3 0/4] fsdev-throttle-qmp: qmp interface for 
fsdev io throttling

=== TEST SCRIPT BEGIN ===
#!/bin/bash
time make docker-test-mingw@fedora SHOW_ENV=1 J=8
=== TEST SCRIPT END ===

Updating 3c8cf5a9c21ff8782164d1def7f44bd888713384
From https://github.com/patchew-project/qemu
 * [new tag] patchew/20181114164438.6412-1-mdr...@linux.vnet.ibm.com -> 
patchew/20181114164438.6412-1-mdr...@linux.vnet.ibm.com
Switched to a new branch 'test'
416820d fsdev-throttle-qmp: hmp interface for fsdev io throttling
ee45ae0f fsdev-throttle-qmp: qmp interface for fsdev io throttling
6c6b122 fsdev-throttle-qmp: move struct ThrottleLimits to new file
9e8624e fsdev-throttle-qmp: factor out throttle code to reuse code

=== OUTPUT BEGIN ===
  BUILD   fedora
make[1]: Entering directory `/var/tmp/patchew-tester-tmp-313llwek/src'
  GEN 
/var/tmp/patchew-tester-tmp-313llwek/src/docker-src.2018-11-14-12.01.19.18139/qemu.tar
Cloning into 
'/var/tmp/patchew-tester-tmp-313llwek/src/docker-src.2018-11-14-12.01.19.18139/qemu.tar.vroot'...
done.
Checking out files:  46% (3007/6457)   
Checking out files:  47% (3035/6457)   
Checking out files:  48% (3100/6457)   
Checking out files:  49% (3164/6457)   
Checking out files:  50% (3229/6457)   
Checking out files:  51% (3294/6457)   
Checking out files:  52% (3358/6457)   
Checking out files:  53% (3423/6457)   
Checking out files:  54% (3487/6457)   
Checking out files:  55% (3552/6457)   
Checking out files:  56% (3616/6457)   
Checking out files:  57% (3681/6457)   
Checking out files:  58% (3746/6457)   
Checking out files:  59% (3810/6457)   
Checking out files:  60% (3875/6457)   
Checking out files:  61% (3939/6457)   
Checking out files:  62% (4004/6457)   
Checking out files:  63% (4068/6457)   
Checking out files:  64% (4133/6457)   
Checking out files:  65% (4198/6457)   
Checking out files:  66% (4262/6457)   
Checking out files:  67% (4327/6457)   
Checking out files:  68% (4391/6457)   
Checking out files:  69% (4456/6457)   
Checking out files:  70% (4520/6457)   
Checking out files:  71% (4585/6457)   
Checking out files:  72% (4650/6457)   
Checking out files:  73% (4714/6457)   
Checking out files:  74% (4779/6457)   
Checking out files:  75% (4843/6457)   
Checking out files:  76% (4908/6457)   
Checking out files:  77% (4972/6457)   
Checking out files:  78% (5037/6457)   
Checking out files:  79% (5102/6457)   
Checking out files:  80% (5166/6457)   
Checking out files:  81% (5231/6457)   
Checking out files:  82% (5295/6457)   
Checking out files:  83% (5360/6457)   
Checking out files:  84% (5424/6457)   
Checking out files:  85% (5489/6457)   
Checking out files:  86% (5554/6457)   
Checking out files:  87% (5618/6457)   
Checking out files:  88% (5683/6457)   
Checking out files:  89% (5747/6457)   
Checking out files:  90% (5812/6457)   
Checking out files:  91% (5876/6457)   
Checking out files:  92% (5941/6457)   
Checking out files:  93% (6006/6457)   
Checking out files:  94% (6070/6457)   
Checking out files:  95% (6135/6457)   
Checking out files:  96% (6199/6457)   
Checking out files:  97% (6264/6457)   
Checking out files:  98% (6328/6457)   
Checking out files:  99% (6393/6457)   
Checking out files: 100% (6457/6457)   
Checking out files: 100% (6457/6457), done.
Submodule 'dtc' (https://git.qemu.org/git/dtc.git) registered for path 'dtc'
Cloning into 'dtc'...
Submodule path 'dtc': checked out '88f18909db731a627456f26d779445f84e449536'
Submodule 'ui/keycodemapdb' (https://git.qemu.org/git/keycodemapdb.git) 
registered for path 'ui/keycodemapdb'
Cloning into 'ui/keycodemapdb'...
Submodule path 'ui/keycodemapdb': checked out 
'6b3d716e2b6472eb7189d3220552280ef3d832ce'
  COPYRUNNER
RUN test-mingw in qemu:fedora 
Packages installed:
SDL2-devel-2.0.9-1.fc28.x86_64
bc-1.07.1-5.fc28.x86_64
bison-3.0.4-9.fc28.x86_64
bluez-libs-devel-5.50-1.fc28.x86_64
brlapi-devel-0.6.7-19.fc28.x86_64
bzip2-1.0.6-26.fc28.x86_64
bzip2-devel-1.0.6-26.fc28.x86_64
ccache-3.4.2-2.fc28.x86_64
clang-6.0.1-2.fc28.x86_64
device-mapper-multipath-devel-0.7.4-3.git07e7bd5.fc28.x86_64
findutils-4.6.0-19.fc28.x86_64
flex-2.6.1-7.fc28.x86_64
gcc-8.2.1-5.fc28.x86_64
gcc-c++-8.2.1-5.fc28.x86_64
gettext-0.19.8.1-14.fc28.x86_64
git-2.17.2-1.fc28.x86_64
glib2-devel-2.56.3-2.fc28.x86_64
glusterfs-api-devel-4.1.5-1.fc28.x86_64
gnutls-devel-3.6.4-1.fc28.x86_64
gtk3-devel-3.22.30-1.fc28.x86_64
hostname-3.20-3.fc28.x86_64
libaio-devel-0.3.110-11.fc28.x86_64
libasan-8.2.1-5.fc28.x86_64
libattr-devel-2.4.48-3.fc28.x86_64
libcap-devel-2.25-9.fc28.x86_64
libcap-ng-devel-0.7.9-4.fc28.x86_64
libcurl-devel-7.59.0-8.fc28.x86_64
libfdt-devel-1.4.7-1.fc28.x86_64
libpng-devel-1.6.34-6.fc28.x86_64

Re: [Qemu-devel] [RFC 01/48] cpu: introduce run_on_cpu_no_bql

2018-11-14 Thread Emilio G. Cota

On Wed, Nov 14, 2018 at 11:30:19 +, Alex Bennée wrote:
> 
> Emilio G. Cota  writes:
> 
> > This allows us to queue synchronous CPU work without the BQL.
> >
> > Will gain a user soon.
> 
> This is also in the cpu-lock series right?

No, in the cpu-lock series we add async_run_on_cpu_no_bql;
here we add the sync version.

E.

Re: [Qemu-devel] [RFC 09/48] tcg: reset runtime helpers when flushing the code cache

2018-11-14 Thread Alex Bennée



Emilio G. Cota  writes:

> In preparation for adding plugin support. One of the clean-up
> actions when uninstalling plugins will be to flush the code
> cache. We'll also have to clear the runtime helpers, since
> some of those runtime helpers may belong to the plugin
> being uninstalled.
>
> Signed-off-by: Emilio G. Cota 
> ---
>  tcg/tcg.h |  1 +
>  accel/tcg/translate-all.c |  1 +
>  tcg/tcg.c | 21 +
>  3 files changed, 23 insertions(+)
>
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 3fa434d891..2c378415d2 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -1079,6 +1079,7 @@ bool tcg_op_supported(TCGOpcode op);
>  void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args);
>  void tcg_gen_runtime_helper(const TCGHelperInfo *orig, TCGTemp *ret, int 
> nargs,
>  TCGTemp **args);
> +void tcg_reset_runtime_helpers(void);

I know tcg.h doesn't have and API doc blocks but perhaps we should
start?

  /**
   * tcg_reset_runtime_helpers:
   *
   * Remove all runtime registered helpers. Should only be called from a
   * quiescent system while flushing old code.
   */

>
>  TCGOp *tcg_emit_op(TCGOpcode opc);
>  void tcg_op_remove(TCGContext *s, TCGOp *op);
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index 038d82fdb5..c8b3e0a491 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1255,6 +1255,7 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data 
> tb_flush_count)
>
>  qht_reset_size(_ctx.htable, CODE_GEN_HTABLE_SIZE);
>  page_flush_tb();
> +tcg_reset_runtime_helpers();
>
>  tcg_region_reset_all();
>  /* XXX: flush processor icache at this point if cache flush is
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 87e02da740..a6824145b0 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -874,6 +874,7 @@ static const TCGHelperInfo all_helpers[] = {
>  #include "exec/helper-tcg.h"
>  };
>  static struct qht helper_table;
> +static struct qht runtime_helper_table;
>  static bool helper_table_inited;
>
>  static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
> @@ -913,6 +914,22 @@ static void tcg_helper_insert(const TCGHelperInfo *info)
>  g_assert(inserted);
>  }
>
> +static void
> +rm_from_helper_table_and_free(void *p, uint32_t h, void *userp)
> +{
> +bool success;
> +
> +success = qht_remove(_table, p, h);
> +g_assert(success);
> +g_free(p);
> +}
> +
> +void tcg_reset_runtime_helpers(void)
> +{
> +qht_iter(_helper_table, rm_from_helper_table_and_free, NULL);
> +qht_reset(_helper_table);
> +}
> +
>  void tcg_context_init(TCGContext *s)
>  {
>  int op, total_args, n, i;
> @@ -948,6 +965,7 @@ void tcg_context_init(TCGContext *s)
>  /* Register helpers.  */
>  qht_init(_table, tcg_helper_cmp, ARRAY_SIZE(all_helpers),
>   QHT_MODE_AUTO_RESIZE);
> +qht_init(_helper_table, tcg_helper_cmp, 1, QHT_MODE_AUTO_RESIZE);
>
>  for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
>  tcg_helper_insert(_helpers[i]);
> @@ -1847,6 +1865,9 @@ void tcg_gen_runtime_helper(const TCGHelperInfo *orig, 
> TCGTemp *ret, int nargs,
>  if (unlikely(existing)) {
>  g_free(info);
>  info = existing;
> +} else {
> +qht_insert(_helper_table, info, hash, );
> +g_assert(existing == NULL);
>  }
>  }
>  do_tcg_gen_callN(info, ret, nargs, args);

Otherwise:

Reviewed-by: Alex Bennée 

--
Alex Bennée

Re: [Qemu-devel] [PATCH] vmstate: constify VMStateField

2018-11-14 Thread Christian Borntraeger




On 11/14/2018 05:56 PM, Thomas Huth wrote:
> On 2018-11-14 17:49, Peter Maydell wrote:
>> On 14 November 2018 at 16:39, Philippe Mathieu-Daudé  
>> wrote:
>>> Hi Thomas,
>>>
>>> On 14/11/18 17:29, Thomas Huth wrote:
 Please don't. For rationale, see:
 https://www.kernel.org/doc/html/v4.19/process/coding-style.html#typedefs
>>>
>>>
>>> Thanks for the pointer, I am interested in understanding why not do that.
>>> However in the link you pasted I don't see a rational about enforcing
>>> constness, I understand that since this case doesn't match the 5 rules, we
>>> should use 'struct VMStateField' directly and remove the typedef.
>>
>> QEMU's coding style is not the kernel's. In the kernel, yes,
>> they prefer "struct foo". In QEMU we generally prefer to use
>> a typedef for most structs.
> 
> Yes - my point was simply: Let's do not start to hide more things beside
> "struct" in a typedef. If I look at QEMU source code containing just a
> "VMStateField", I expect it to be a shortcut for "struct VMStateField".
> I'd never expect that it also contains the "const" attribute. I'm pretty
> sure that this would cause some confusion in the future otherwise.

I agree. typedefing a struct is one thing, typedefing qualifiers in (like const)
is getting really unexpected.

Re: [Qemu-devel] [PATCH resend for-3.1] make-release: add skiboot .version file

2018-11-14 Thread Michael Roth

Quoting Michael Roth (2018-11-09 10:13:52)
> This is needed to build skiboot from tarball-distributed sources
> since the git data the make_release.sh script relies on to generate
> it is not available.
> 
> Cc: qemu-sta...@nongnu.org
> Reported-by: Michael Tokarev 
> Signed-off-by: Michael Roth 

Hi Peter,

Should I go ahead and send this as a standalone pull or should it go in
through someone else?

> ---
>  scripts/make-release | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/scripts/make-release b/scripts/make-release
> index 04fa9defdc..c14f75b12c 100755
> --- a/scripts/make-release
> +++ b/scripts/make-release
> @@ -19,6 +19,7 @@ pushd ${destination}
>  git checkout "v${version}"
>  git submodule update --init
>  (cd roms/seabios && git describe --tags --long --dirty > .version)
> +(cd roms/skiboot && ./make_version.sh > .version)
>  # FIXME: The following line is a workaround for avoiding filename collisions
>  # when unpacking u-boot sources on case-insensitive filesystems. Once we
>  # update to something with u-boot commit 610eec7f0 we can drop this line.
> -- 
> 2.17.1
>

Re: [Qemu-devel] [PATCH] vmstate: constify VMStateField

2018-11-14 Thread Thomas Huth

On 2018-11-14 17:49, Peter Maydell wrote:
> On 14 November 2018 at 16:39, Philippe Mathieu-Daudé  
> wrote:
>> Hi Thomas,
>>
>> On 14/11/18 17:29, Thomas Huth wrote:
>>> Please don't. For rationale, see:
>>> https://www.kernel.org/doc/html/v4.19/process/coding-style.html#typedefs
>>
>>
>> Thanks for the pointer, I am interested in understanding why not do that.
>> However in the link you pasted I don't see a rational about enforcing
>> constness, I understand that since this case doesn't match the 5 rules, we
>> should use 'struct VMStateField' directly and remove the typedef.
> 
> QEMU's coding style is not the kernel's. In the kernel, yes,
> they prefer "struct foo". In QEMU we generally prefer to use
> a typedef for most structs.

Yes - my point was simply: Let's do not start to hide more things beside
"struct" in a typedef. If I look at QEMU source code containing just a
"VMStateField", I expect it to be a shortcut for "struct VMStateField".
I'd never expect that it also contains the "const" attribute. I'm pretty
sure that this would cause some confusion in the future otherwise.

 Thomas

Re: [Qemu-devel] [PATCH for-3.1 v2] build: qga: add macro to force use of native mingw32 assert()

2018-11-14 Thread Daniel P . Berrangé

On Wed, Nov 14, 2018 at 10:44:38AM -0600, Michael Roth wrote:
> When building qemu-ga for w32 with VSS support, some parts of qemu-ga
> are not linked against glib, specifically the C++ bits used to
> create the VSS provider DLL. With 3ebee3b191e, we now define assert()
> as g_assert() for all mingw32 builds via osdep.h, which results in the
> following build breakage:
> 
>   x86_64-w64-mingw32-g++ -o qga/vss-win32/qga-vss.dll 
> qga/vss-win32/requester.o qga/vss-win32/provider.o qga/vss-win32/install.o 
> /home/mdroth/w/qemu4.git/qga/vss-win32/qga-vss.def  -shared 
> -Wl,--add-stdcall-alias,--enable-stdcall-fixup -lole32 -loleaut32 -lshlwapi 
> -luuid -static
>   qga/vss-win32/requester.o: In function `requester_freeze':
>   /home/mdroth/w/qemu4.git/qga/vss-win32/requester.cpp:284: undefined 
> reference to `g_assertion_message_expr'
>   qga/vss-win32/requester.o: In function `requester_thaw':
>   /home/mdroth/w/qemu4.git/qga/vss-win32/requester.cpp:508: undefined 
> reference to `g_assertion_message_expr'
>   /home/mdroth/w/qemu4.git/qga/vss-win32/requester.cpp:509: undefined 
> reference to `g_assertion_message_expr'
>   collect2: error: ld returned 1 exit status
>   make: *** [/home/mdroth/w/qemu4.git/qga/vss-win32/Makefile.objs:10: 
> qga/vss-win32/qga-vss.dll] Error 1
>   make: *** Waiting for unfinished jobs
> 
> Fix this by introducing a USE_NATIVE_MINGW32_ASSERT macro that can
> be defined prior to inclusion of osdep.h for build targets that
> don't link against glib.

Why doesn't it link to glib and can that be fixed ?

Glib is considered a mandatory dependancy on anything in QEMU that
uses osdep.h, since that header pulls in the glib.h header unconditionally.

> Cc: Richard Henderson 
> Cc: Philippe Mathieu-Daudé 
> Signed-off-by: Michael Roth 
> ---
> v2:
>  * define USE_NATIVE_MINGW32_ASSERT via build recipe and avoid moving
>#include's before osdep.h (Philippe)
> ---
>  include/qemu/osdep.h| 2 +-
>  qga/vss-win32/Makefile.objs | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
> index 3bf48bcdec..59364bfeb0 100644
> --- a/include/qemu/osdep.h
> +++ b/include/qemu/osdep.h
> @@ -129,7 +129,7 @@ extern int daemon(int, int);
>   * code that is unreachable when features are disabled.
>   * All supported versions of Glib's g_assert() satisfy this requirement.
>   */
> -#ifdef __MINGW32__
> +#if defined(__MINGW32__) && !defined(USE_NATIVE_MINGW32_ASSERT)

I don't think we should have these kind of hacks.

Either make vss-win32 link to glib, or stop it importing
osdep.h entirely if it really must be built completely
standalone from normal QEMU dependancies.

>  #undef assert
>  #define assert(x)  g_assert(x)
>  #endif
> diff --git a/qga/vss-win32/Makefile.objs b/qga/vss-win32/Makefile.objs
> index 23d08da225..5773bfd868 100644
> --- a/qga/vss-win32/Makefile.objs
> +++ b/qga/vss-win32/Makefile.objs
> @@ -3,7 +3,7 @@
>  qga-vss-dll-obj-y += requester.o provider.o install.o
>  
>  obj-qga-vss-dll-obj-y = $(addprefix $(obj)/, $(qga-vss-dll-obj-y))
> -$(obj-qga-vss-dll-obj-y): QEMU_CXXFLAGS = $(filter-out -Wstrict-prototypes 
> -Wmissing-prototypes -Wnested-externs -Wold-style-declaration 
> -Wold-style-definition -Wredundant-decls -fstack-protector-all 
> -fstack-protector-strong, $(QEMU_CFLAGS)) -Wno-unknown-pragmas 
> -Wno-delete-non-virtual-dtor
> +$(obj-qga-vss-dll-obj-y): QEMU_CXXFLAGS = $(filter-out -Wstrict-prototypes 
> -Wmissing-prototypes -Wnested-externs -Wold-style-declaration 
> -Wold-style-definition -Wredundant-decls -fstack-protector-all 
> -fstack-protector-strong, $(QEMU_CFLAGS)) -Wno-unknown-pragmas 
> -Wno-delete-non-virtual-dtor -DUSE_NATIVE_MINGW32_ASSERT
>  
>  $(obj)/qga-vss.dll: LDFLAGS = -shared 
> -Wl,--add-stdcall-alias,--enable-stdcall-fixup -lole32 -loleaut32 -lshlwapi 
> -luuid -static
>  $(obj)/qga-vss.dll: $(obj-qga-vss-dll-obj-y) $(SRC_PATH)/$(obj)/qga-vss.def
> -- 
> 2.17.1
> 
> 

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [PATCH for-3.2 00/41] RFC: slirp: make it again a standalone project

2018-11-14 Thread Thomas Huth

On 2018-11-14 15:46, Markus Armbruster wrote:
> Thomas Huth  writes:
> 
>> On 2018-11-14 13:59, Markus Armbruster wrote:
>>> Marc-André Lureau  writes:
>>>
 Hi,

 Based-on: https://people.debian.org/~sthibault/qemu.git/ slirp branch

 This series goal is to allow building libslirp as an independent library.

 While looking at making SLIRP a seperate running process, I thought
 that having an independent library from QEMU would be a first step.

 There has been some attempts to make slirp a seperate project in the past.
 (https://lists.gnu.org/archive/html/qemu-devel/2017-02/msg01092.html)
 Unfortunately, they forked from QEMU and didn't provide enough
 compatibility for QEMU to make use of it (in particular, vmstate
 handling was removed, they lost git history etc). Furthermore, they
 are not maintained as far as I can see.

 I would propose to make slirp a seperate project, that can initially
 be used by QEMU as a submodule, keeping Makefile.objs until a proper
 shared library with stability guarantees etc is ready..

 The subproject could created by preserving git tags, and cleaning up the 
 code style, this way:

 git filter-branch --tree-filter "if ls * 1> /dev/null 2>&1; then 
 clang-format -i * /dev/null; fi " -f --subdirectory-filter "slirp" 
 --prune-empty --tag-name-filter cat -- --all
 (my clang-format 
 https://gist.github.com/elmarco/cb20c8d92007df0e2fb8a2404678ac73)

 What do you think?
>>>
>>> Has the slirp code been improved to be generally useful?  I still got it
>>> filed under "friends don't let friends use that, except for testing"...
>>
>> The slirp code is already used in a lot of other projects:
> 
> The issue I have with SLIRP isn't that it solves a useless problem (au
> contraire!), it's that it's a useless solution.

Ouch, that was completely arrogant and inappropriate. It's far away from
being useless, and Samuel is doing a very good job in picking up all the
patches and fixes that have been posted in the past months. Have you had
a look at the changelog at all before you wrote that sentence?

> Okay, that's an unfair
> exaggeration, it's not useless, I just wouldn't trust it in production,
> unless it has improved significantly since I last looked at it.

Nobody said that the slirp code would suddenly be perfect, but if it's
getting even more traction and attention as a separate library (since
other projects might contribute their fixes back "upstream" in that
case), it could really get a solid building block for a lot of emulators
and similar software.

 Thomas

Re: [Qemu-devel] [PATCH v2 1/1] qga: update docs with systemd suspend support info

2018-11-14 Thread Michael Roth

Quoting Daniel Henrique Barboza (2018-11-13 10:55:39)
> Commit 067927d62e ("qga: systemd hibernate/suspend/hybrid-sleep
> support") failed to update qapi-schema.json after adding systemd
> hibernate/suspend/hybrid-sleep capabilities to guest-suspend-* QGA
> commands.
> 
> Signed-off-by: Daniel Henrique Barboza 

Thanks, applied to qga tree:
  https://github.com/mdroth/qemu/commits/qga

> ---
>  qga/qapi-schema.json | 25 ++---
>  1 file changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
> index c6725b3ec8..61f66fc461 100644
> --- a/qga/qapi-schema.json
> +++ b/qga/qapi-schema.json
> @@ -532,12 +532,12 @@
>  #
>  # Suspend guest to disk.
>  #
> -# This command tries to execute the scripts provided by the pm-utils package.
> -# If it's not available, the suspend operation will be performed by manually
> -# writing to a sysfs file.
> +# This command attempts to suspend the guest using three strategies, in this
> +# order:
>  #
> -# For the best results it's strongly recommended to have the pm-utils
> -# package installed in the guest.
> +# - systemd hibernate
> +# - pm-utils (via pm-hibernate)
> +# - manual write into sysfs
>  #
>  # This command does NOT return a response on success. There is a high chance
>  # the command succeeded if the VM exits with a zero exit status or, when
> @@ -560,12 +560,12 @@
>  #
>  # Suspend guest to ram.
>  #
> -# This command tries to execute the scripts provided by the pm-utils package.
> -# If it's not available, the suspend operation will be performed by manually
> -# writing to a sysfs file.
> +# This command attempts to suspend the guest using three strategies, in this
> +# order:
>  #
> -# For the best results it's strongly recommended to have the pm-utils
> -# package installed in the guest.
> +# - systemd suspend
> +# - pm-utils (via pm-suspend)
> +# - manual write into sysfs
>  #
>  # IMPORTANT: guest-suspend-ram requires QEMU to support the 'system_wakeup'
>  # command.  Thus, it's *required* to query QEMU for the presence of the
> @@ -592,7 +592,10 @@
>  #
>  # Save guest state to disk and suspend to ram.
>  #
> -# This command requires the pm-utils package to be installed in the guest.
> +# This command attempts to suspend the guest by executing, in this order:
> +#
> +# - systemd hybrid-sleep
> +# - pm-utils (via pm-suspend-hybrid)
>  #
>  # IMPORTANT: guest-suspend-hybrid requires QEMU to support the 
> 'system_wakeup'
>  # command.  Thus, it's *required* to query QEMU for the presence of the
> -- 
> 2.19.1
>

Re: [Qemu-devel] [PATCH] vmstate: constify VMStateField

2018-11-14 Thread Peter Maydell

On 14 November 2018 at 16:39, Philippe Mathieu-Daudé  wrote:
> Hi Thomas,
>
> On 14/11/18 17:29, Thomas Huth wrote:
>> Please don't. For rationale, see:
>> https://www.kernel.org/doc/html/v4.19/process/coding-style.html#typedefs
>
>
> Thanks for the pointer, I am interested in understanding why not do that.
> However in the link you pasted I don't see a rational about enforcing
> constness, I understand that since this case doesn't match the 5 rules, we
> should use 'struct VMStateField' directly and remove the typedef.

QEMU's coding style is not the kernel's. In the kernel, yes,
they prefer "struct foo". In QEMU we generally prefer to use
a typedef for most structs.

thanks
-- PMM

[Qemu-devel] [PATCH for-3.1 v2] build: qga: add macro to force use of native mingw32 assert()

2018-11-14 Thread Michael Roth

When building qemu-ga for w32 with VSS support, some parts of qemu-ga
are not linked against glib, specifically the C++ bits used to
create the VSS provider DLL. With 3ebee3b191e, we now define assert()
as g_assert() for all mingw32 builds via osdep.h, which results in the
following build breakage:

  x86_64-w64-mingw32-g++ -o qga/vss-win32/qga-vss.dll qga/vss-win32/requester.o 
qga/vss-win32/provider.o qga/vss-win32/install.o 
/home/mdroth/w/qemu4.git/qga/vss-win32/qga-vss.def  -shared 
-Wl,--add-stdcall-alias,--enable-stdcall-fixup -lole32 -loleaut32 -lshlwapi 
-luuid -static
  qga/vss-win32/requester.o: In function `requester_freeze':
  /home/mdroth/w/qemu4.git/qga/vss-win32/requester.cpp:284: undefined reference 
to `g_assertion_message_expr'
  qga/vss-win32/requester.o: In function `requester_thaw':
  /home/mdroth/w/qemu4.git/qga/vss-win32/requester.cpp:508: undefined reference 
to `g_assertion_message_expr'
  /home/mdroth/w/qemu4.git/qga/vss-win32/requester.cpp:509: undefined reference 
to `g_assertion_message_expr'
  collect2: error: ld returned 1 exit status
  make: *** [/home/mdroth/w/qemu4.git/qga/vss-win32/Makefile.objs:10: 
qga/vss-win32/qga-vss.dll] Error 1
  make: *** Waiting for unfinished jobs

Fix this by introducing a USE_NATIVE_MINGW32_ASSERT macro that can
be defined prior to inclusion of osdep.h for build targets that
don't link against glib.

Cc: Richard Henderson 
Cc: Philippe Mathieu-Daudé 
Signed-off-by: Michael Roth 
---
v2:
 * define USE_NATIVE_MINGW32_ASSERT via build recipe and avoid moving
   #include's before osdep.h (Philippe)
---
 include/qemu/osdep.h| 2 +-
 qga/vss-win32/Makefile.objs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index 3bf48bcdec..59364bfeb0 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -129,7 +129,7 @@ extern int daemon(int, int);
  * code that is unreachable when features are disabled.
  * All supported versions of Glib's g_assert() satisfy this requirement.
  */
-#ifdef __MINGW32__
+#if defined(__MINGW32__) && !defined(USE_NATIVE_MINGW32_ASSERT)
 #undef assert
 #define assert(x)  g_assert(x)
 #endif
diff --git a/qga/vss-win32/Makefile.objs b/qga/vss-win32/Makefile.objs
index 23d08da225..5773bfd868 100644
--- a/qga/vss-win32/Makefile.objs
+++ b/qga/vss-win32/Makefile.objs
@@ -3,7 +3,7 @@
 qga-vss-dll-obj-y += requester.o provider.o install.o
 
 obj-qga-vss-dll-obj-y = $(addprefix $(obj)/, $(qga-vss-dll-obj-y))
-$(obj-qga-vss-dll-obj-y): QEMU_CXXFLAGS = $(filter-out -Wstrict-prototypes 
-Wmissing-prototypes -Wnested-externs -Wold-style-declaration 
-Wold-style-definition -Wredundant-decls -fstack-protector-all 
-fstack-protector-strong, $(QEMU_CFLAGS)) -Wno-unknown-pragmas 
-Wno-delete-non-virtual-dtor
+$(obj-qga-vss-dll-obj-y): QEMU_CXXFLAGS = $(filter-out -Wstrict-prototypes 
-Wmissing-prototypes -Wnested-externs -Wold-style-declaration 
-Wold-style-definition -Wredundant-decls -fstack-protector-all 
-fstack-protector-strong, $(QEMU_CFLAGS)) -Wno-unknown-pragmas 
-Wno-delete-non-virtual-dtor -DUSE_NATIVE_MINGW32_ASSERT
 
 $(obj)/qga-vss.dll: LDFLAGS = -shared 
-Wl,--add-stdcall-alias,--enable-stdcall-fixup -lole32 -loleaut32 -lshlwapi 
-luuid -static
 $(obj)/qga-vss.dll: $(obj-qga-vss-dll-obj-y) $(SRC_PATH)/$(obj)/qga-vss.def
-- 
2.17.1

Re: [Qemu-devel] [PATCH v6 11/11] authz: delete existing ACL implementation

2018-11-14 Thread Daniel P . Berrangé

On Thu, Nov 08, 2018 at 12:15:54PM +0400, Marc-André Lureau wrote:
> Hi
> 
> On Fri, Oct 19, 2018 at 5:51 PM Daniel P. Berrangé  
> wrote:
> >
> > From: "Daniel P. Berrange" 
> >
> > The 'qemu_acl' type was a previous non-QOM based attempt to provide an
> > authorization facility in QEMU. Because it is non-QOM based it cannot be
> > created via the command line and requires special monitor commands to
> > manipulate it.
> >
> > The new QAuthZ subclasses provide a superset of the functionality in
> > qemu_acl, so the latter can now be deleted. The HMP 'acl_*' monitor
> > commands are converted to use the new QAuthZSimple data type instead
> > in order to provide temporary backwards compatibility.
> >
> > Signed-off-by: Daniel P. Berrange 
> > +monitor_printf(mon, "policy: %s\n",
> > +   QAuthZListPolicy_lookup.array[auth->policy]);
> 
> please use QAuthZListPolicy_str()
> 
> > +
> > +rules = auth->rules;
> > +while (rules) {
> > +QAuthZListRule *rule = rules->value;
> > +i++;
> > +monitor_printf(mon, "%zu: %s %s\n", i,
> > +   QAuthZListPolicy_lookup.array[rule->policy],
> 
> QAuthZListPolicy_str

Yes.


> > @@ -163,12 +165,19 @@ static int vnc_auth_sasl_check_access(VncState *vs)
> >  vs->sasl.username = g_strdup((const char*)val);
> >  trace_vnc_auth_sasl_username(vs, vs->sasl.username);
> >
> > -if (vs->vd->sasl.acl == NULL) {
> > +if (vs->vd->sasl.authzid == NULL) {
> >  trace_vnc_auth_sasl_acl(vs, 1);
> >  return 0;
> >  }
> >
> > -allow = qemu_acl_party_is_allowed(vs->vd->sasl.acl, vs->sasl.username);
> > +allow = qauthz_is_allowed_by_id(vs->vd->sasl.authzid,
> > +vs->sasl.username, );
> 
> Why not use qauthz_is_allowed() with .authz ?

The .authz object is only non-NULL when using the legacy "-vnc ..,acl"
flag syntax. When using the modern syntax (introduced by the followup
series mentioned in the cover letter) we want to resolve "authzid"
every time. This allows the user to safely delete & recreate the
authorization objects on the fly.


Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [Qemu-devel] [RFC 08/48] tcg: export tcg_gen_runtime_helper

2018-11-14 Thread Alex Bennée



Emilio G. Cota  writes:

> This takes the TCGHelperInfo directly, which will allow us to generate
> helpers at run-time.
>
> Signed-off-by: Emilio G. Cota 

Reviewed-by: Alex Bennée 

> ---
>  tcg/tcg.h |  2 ++
>  tcg/tcg.c | 50 +-
>  2 files changed, 47 insertions(+), 5 deletions(-)
>
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 9f9643b470..3fa434d891 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -1077,6 +1077,8 @@ do {\
>  bool tcg_op_supported(TCGOpcode op);
>
>  void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args);
> +void tcg_gen_runtime_helper(const TCGHelperInfo *orig, TCGTemp *ret, int 
> nargs,
> +TCGTemp **args);
>
>  TCGOp *tcg_emit_op(TCGOpcode opc);
>  void tcg_op_remove(TCGContext *s, TCGOp *op);
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 08b6926894..87e02da740 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -1642,15 +1642,13 @@ bool tcg_op_supported(TCGOpcode op)
>  /* Note: we convert the 64 bit args to 32 bit and do some alignment
> and endian swap. Maybe it would be better to do the alignment
> and endian swap in tcg_reg_alloc_call(). */
> -void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
> +static void do_tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, int nargs,
> + TCGTemp **args)
>  {
>  int i, real_args, nb_rets, pi;
>  unsigned sizemask, flags;
> -TCGHelperInfo *info;
> -uint32_t hash = tcg_helper_func_hash(func);
>  TCGOp *op;
>
> -info = qht_lookup_custom(_table, func, hash, 
> tcg_helper_lookup_cmp);
>  flags = info->flags;
>  sizemask = info->sizemask;
>
> @@ -1774,7 +1772,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, 
> TCGTemp **args)
>  op->args[pi++] = temp_arg(args[i]);
>  real_args++;
>  }
> -op->args[pi++] = (uintptr_t)func;
> +op->args[pi++] = (uintptr_t)info->func;
>  op->args[pi++] = flags;
>  TCGOP_CALLI(op) = real_args;
>
> @@ -1812,6 +1810,48 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int 
> nargs, TCGTemp **args)
>  #endif /* TCG_TARGET_EXTEND_ARGS */
>  }
>
> +void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
> +{
> +TCGHelperInfo *info;
> +uint32_t hash = tcg_helper_func_hash(func);
> +
> +/*
> + * Here we can get away with tcg_helper_lookup_cmp, which only looks
> + * at the function pointer, since we have the compile-time guarantee
> + * that @func can only be in one TCGHelperInfo.
> + */
> +info = qht_lookup_custom(_table, func, hash, 
> tcg_helper_lookup_cmp);
> +do_tcg_gen_callN(info, ret, nargs, args);
> +}
> +
> +void tcg_gen_runtime_helper(const TCGHelperInfo *orig, TCGTemp *ret, int 
> nargs,
> +TCGTemp **args)
> +{
> +TCGHelperInfo *info;
> +uint32_t hash = tcg_helper_func_hash(orig->func);
> +
> +/*
> + * Use the full TCGHelperInfo lookup, since there is no guarantee that 
> func
> + * will be unique to each TCGHelperInfo. For instance, we could have the
> + * same helper function registered in several TCGHelperInfo's, each of 
> them
> + * with different flags.
> + */
> +info = qht_lookup(_table, orig, hash);
> +if (info == NULL) {
> +void *existing = NULL;
> +
> +/* @orig might be in the stack, so we need to allocate a new struct 
> */
> +info = g_new(TCGHelperInfo, 1);
> +memcpy(info, orig, sizeof(TCGHelperInfo));
> +qht_insert(_table, info, hash, );
> +if (unlikely(existing)) {
> +g_free(info);
> +info = existing;
> +}
> +}
> +do_tcg_gen_callN(info, ret, nargs, args);
> +}
> +
>  static void tcg_reg_alloc_start(TCGContext *s)
>  {
>  int i, n;


--
Alex Bennée

Re: [Qemu-devel] [RFC 44/48] cpus: lockstep execution support

2018-11-14 Thread Alex Bennée



Emilio G. Cota  writes:

> Signed-off-by: Emilio G. Cota 
> ---

>
>  void cpu_interrupt(CPUState *cpu, int mask);
> diff --git a/cpus.c b/cpus.c
> index 3efe89354d..a446632a5c 100644
> --- a/cpus.c
> +++ b/cpus.c

> +
> +static void cpu_lockstep_init(CPUState *cpu)
> +{
> +if (!lockstep_enabled) {
> +return;
> +}
> +qemu_mutex_lock(_lock);
> +/*
> + * HACK: avoid racing with a wakeup, which would miss the addition
> + * of this CPU; just wait until no wakeup is ongoing.
> + */
> +while (unlikely(lockstep_ongoing_wakeup)) {
> +qemu_mutex_unlock(_lock);
> +sched_yield();

This breaks Windows builds. I suspect if we do want to this sort of
functionality we'll need to expose a utility function in
oslib-posix/oslib-win32


--
Alex Bennée

Re: [Qemu-devel] [RFC v8 15/18] hw/arm/virt: Add virtio-iommu to the virt board

2018-11-14 Thread Auger Eric

Hi Jean,

On 11/14/18 5:01 PM, Jean-Philippe Brucker wrote:
> On 09/11/2018 11:29, Eric Auger wrote:
>> +static void create_virtio_iommu(VirtMachineState *vms,
>> +const char *pciehb_nodename, PCIBus *bus)
>> +{
>> +const char compat[] = "virtio,pci-iommu";
>> +uint16_t bdf = 0x8; /* 00:01.0 */
>> +DeviceState *dev;
>> +char *node;
>> +
>> +dev = qdev_create(BUS(bus), TYPE_VIRTIO_IOMMU_PCI);
>> +object_property_set_bool(OBJECT(dev), true, "realized", _fatal);
> 
> For the Arm virt platform, should msi_bypass default to false? Otherwise
> I don't think pass-through to guest userpace will work.
That's correct. It's a regression compared to v7. I will fix this soon
while doing the pc machine integration + resv regions flaws you reported
in the meantime.

Thanks!

Eric
> 
> Thanks,
> Jean
>

Re: [Qemu-devel] [PATCH] vmstate: constify VMStateField

2018-11-14 Thread Philippe Mathieu-Daudé


Hi Thomas,

On 14/11/18 17:29, Thomas Huth wrote:

On 2018-11-14 16:32, Philippe Mathieu-Daudé wrote:


What about enforcing the constness in the typedef?

-- >8 --
@@ -32 +32 @@ typedef struct VMStateDescription VMStateDescription;
-typedef struct VMStateField VMStateField;
+typedef const struct VMStateField VMStateField;
---


Please don't. For rationale, see:
https://www.kernel.org/doc/html/v4.19/process/coding-style.html#typedefs


Thanks for the pointer, I am interested in understanding why not do 
that. However in the link you pasted I don't see a rational about 
enforcing constness, I understand that since this case doesn't match the 
5 rules, we should use 'struct VMStateField' directly and remove the 
typedef.

Re: [Qemu-devel] [PATCH] vmstate: constify VMStateField

2018-11-14 Thread Thomas Huth

On 2018-11-14 16:32, Philippe Mathieu-Daudé wrote:
> Hi Marc-André,
> 
> On 14/11/18 14:29, Marc-André Lureau wrote:
>> Because they are supposed to remain const.
>>
>> Signed-off-by: Marc-André Lureau 
>> ---
>>   include/migration/vmstate.h |   6 +-
>>   hw/display/virtio-gpu.c |   4 +-
>>   hw/intc/s390_flic_kvm.c |   4 +-
>>   hw/nvram/eeprom93xx.c   |   6 +-
>>   hw/nvram/fw_cfg.c   |   6 +-
>>   hw/pci/msix.c   |   4 +-
>>   hw/pci/pci.c    |   8 +--
>>   hw/pci/shpc.c   |   7 ++-
>>   hw/scsi/scsi-bus.c  |   4 +-
>>   hw/timer/twl92230.c |   4 +-
>>   hw/usb/redirect.c   |  12 ++--
>>   hw/virtio/virtio.c  |   8 +--
>>   migration/savevm.c  |   7 ++-
>>   migration/vmstate-types.c   | 119 
>>   migration/vmstate.c |  31 +-
>>   target/alpha/machine.c  |   5 +-
>>   target/arm/machine.c    |  12 ++--
>>   target/hppa/machine.c   |  10 +--
>>   target/mips/machine.c   |  14 +++--
>>   target/openrisc/machine.c   |   5 +-
>>   target/ppc/machine.c    |  14 +++--
>>   target/sparc/machine.c  |   7 ++-
>>   22 files changed, 162 insertions(+), 135 deletions(-)
>>
>> diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
>> index 2b501d0466..61bef3ef5c 100644
>> --- a/include/migration/vmstate.h
>> +++ b/include/migration/vmstate.h
>> @@ -40,8 +40,8 @@ typedef struct VMStateField VMStateField;
> 
> What about enforcing the constness in the typedef?
> 
> -- >8 --
> @@ -32 +32 @@ typedef struct VMStateDescription VMStateDescription;
> -typedef struct VMStateField VMStateField;
> +typedef const struct VMStateField VMStateField;
> ---

Please don't. For rationale, see:
https://www.kernel.org/doc/html/v4.19/process/coding-style.html#typedefs

 Thomas

Re: [Qemu-devel] [PULL V2 24/26] net: ignore packet size greater than INT_MAX

2018-11-14 Thread Dima Stepanov

On Wed, Nov 14, 2018 at 10:59:32AM +0800, Jason Wang wrote:
> 
> On 2018/11/13 下午11:41, Dima Stepanov wrote:
> >Hi Jason,
> >
> >I know that this patch has been already merged to stable, but i have a
> >question:
> >
> >On Fri, Oct 19, 2018 at 11:22:23AM +0800, Jason Wang wrote:
> >>There should not be a reason for passing a packet size greater than
> >>INT_MAX. It's usually a hint of bug somewhere, so ignore packet size
> >>greater than INT_MAX in qemu_deliver_packet_iov()
> >>
> >>CC:qemu-sta...@nongnu.org
> >>Reported-by: Daniel Shapira
> >>Reviewed-by: Michael S. Tsirkin
> >>Signed-off-by: Jason Wang
> >>---
> >>  net/net.c | 7 ++-
> >>  1 file changed, 6 insertions(+), 1 deletion(-)
> >>
> >>diff --git a/net/net.c b/net/net.c
> >>index c66847e..07c194a 100644
> >>--- a/net/net.c
> >>+++ b/net/net.c
> >>@@ -712,10 +712,15 @@ ssize_t qemu_deliver_packet_iov(NetClientState 
> >>*sender,
> >>  void *opaque)
> >>  {
> >>  NetClientState *nc = opaque;
> >>+size_t size = iov_size(iov, iovcnt);
> >>  int ret;
> >>+if (size > INT_MAX) {
> >>+return size;
> >Is it okay that the function returns ssize_t (signed), but the type of the
> >size variable is size_t (unsigned)? For now the top level routine checks
> >the return value only for 0, but anyway we can return negative value
> >here instead of positive. What do you think?
> >
> >Regards, Dima.
> >
> 
> Any non zero value should be ok here. Actually I think because of the
> conversion from size_t to ssize_t, caller actually see negative value?
I believe it depends. If long (ssize_t and size_t type) is 8 bytes, then
the routine can sometimes return positive values and sometimes negative.
I fully agree that in the current case any non zero value should be
okay. I just wanted to point on the inconsistency in types and as a
result a return value.

Dima.
> 
> Thanks
>

Re: [Qemu-devel] [PATCH RFC 5/6] test-string-input-visitor: split off uint64 list tests

2018-11-14 Thread Markus Armbruster

David Hildenbrand  writes:

> Basically copy all int64 list tests but adapt them to work on uint64
> instead.
>
> Signed-off-by: David Hildenbrand 
> ---
>  tests/test-string-input-visitor.c | 71 +--
>  1 file changed, 67 insertions(+), 4 deletions(-)
>
> diff --git a/tests/test-string-input-visitor.c 
> b/tests/test-string-input-visitor.c
> index 2f6360e9ca..731094f789 100644
> --- a/tests/test-string-input-visitor.c
> +++ b/tests/test-string-input-visitor.c
> @@ -111,7 +111,6 @@ static void test_visitor_in_intList(TestInputVisitorData 
> *data,
>6, 7, 8 };
>  int64_t expect2[] = { 32767, -32768, -32767 };
>  int64_t expect3[] = { INT64_MIN, INT64_MAX };
> -uint64_t expect4[] = { UINT64_MAX };
>  Error *err = NULL;
>  int64List *res = NULL;
>  Visitor *v;
> @@ -129,9 +128,6 @@ static void test_visitor_in_intList(TestInputVisitorData 
> *data,
>  "-9223372036854775808,9223372036854775807");
>  check_ilist(v, expect3, ARRAY_SIZE(expect3));
>  
> -v = visitor_input_test_init(data, "18446744073709551615");
> -check_ulist(v, expect4, ARRAY_SIZE(expect4));
> -

Hmm.  Testing behavior for this input is still worthwhile, isn't it?

The use of check_ulist() here is admittedly unclean.

>  /* Empty list */
>  
>  v = visitor_input_test_init(data, "");
> @@ -174,6 +170,71 @@ static void test_visitor_in_intList(TestInputVisitorData 
> *data,
>  visit_end_list(v, NULL);
>  }
>  
> +static void test_visitor_in_uintList(TestInputVisitorData *data,
> + const void *unused)
> +{
> +uint64_t expect1[] = { 1, 2, 0, 2, 3, 4, 20, 5, 6, 7, 8, 9, 1, 2, 3, 4, 
> 5,

Please wrap this line a bit earlier.

> +   6, 7, 8 };
> +uint64_t expect2[] = { 32767, -32768, -32767 };
> +uint64_t expect3[] = { UINT64_MAX };
> +Error *err = NULL;
> +uint64List *res = NULL;
> +Visitor *v;
> +uint64_t val;
> +
> +/* Valid lists */
> +
> +v = visitor_input_test_init(data, "1,2,0,2-4,20,5-9,1-8");
> +check_ulist(v, expect1, ARRAY_SIZE(expect1));
> +
> +v = visitor_input_test_init(data, "32767,-32768--32767");
> +check_ulist(v, expect2, ARRAY_SIZE(expect2));
> +
> +v = visitor_input_test_init(data, "18446744073709551615");
> +check_ulist(v, expect3, ARRAY_SIZE(expect3));

Test behavior for large negative numbers?

> +
> +/* Empty list */
> +
> +v = visitor_input_test_init(data, "");
> +visit_type_uint64List(v, NULL, , _abort);
> +g_assert(!res);
> +
> +/* Not a list */
> +
> +v = visitor_input_test_init(data, "not an uint list");
> +
> +visit_type_uint64List(v, NULL, , );
> +error_free_or_abort();
> +g_assert(!res);
> +
> +/* Unvisited list tail */
> +
> +v = visitor_input_test_init(data, "0,2-3");
> +
> +visit_start_list(v, NULL, NULL, 0, _abort);
> +visit_type_uint64(v, NULL, , _abort);
> +g_assert_cmpuint(val, ==, 0);
> +visit_type_uint64(v, NULL, , _abort);
> +g_assert_cmpuint(val, ==, 2);
> +
> +visit_check_list(v, );
> +error_free_or_abort();
> +visit_end_list(v, NULL);
> +
> +/* Visit beyond end of list */
> +
> +v = visitor_input_test_init(data, "0");
> +
> +visit_start_list(v, NULL, NULL, 0, _abort);
> +visit_type_uint64(v, NULL, , );
> +g_assert_cmpuint(val, ==, 0);
> +visit_type_uint64(v, NULL, , );
> +error_free_or_abort();
> +
> +visit_check_list(v, _abort);
> +visit_end_list(v, NULL);
> +}
> +
>  static void test_visitor_in_bool(TestInputVisitorData *data,
>   const void *unused)
>  {
> @@ -334,6 +395,8 @@ int main(int argc, char **argv)
> _visitor_data, test_visitor_in_int);
>  input_visitor_test_add("/string-visitor/input/intList",
> _visitor_data, test_visitor_in_intList);
> +input_visitor_test_add("/string-visitor/input/uintList",
> +   _visitor_data, test_visitor_in_uintList);
>  input_visitor_test_add("/string-visitor/input/bool",
> _visitor_data, test_visitor_in_bool);
>  input_visitor_test_add("/string-visitor/input/number",

Re: [Qemu-devel] [RFC 07/48] tcg: export TCGHelperInfo

2018-11-14 Thread Alex Bennée



Emilio G. Cota  writes:

> Signed-off-by: Emilio G. Cota 


Reviewed-by: Alex Bennée 

> ---
>  tcg/tcg.h | 7 +++
>  tcg/tcg.c | 7 ---
>  2 files changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index f4efbaa680..9f9643b470 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -480,6 +480,13 @@ typedef TCGv_ptr TCGv_env;
>  /* Used to align parameters.  See the comment before tcgv_i32_temp.  */
>  #define TCG_CALL_DUMMY_ARG  ((TCGArg)0)
>
> +typedef struct TCGHelperInfo {
> +void *func;
> +const char *name;
> +unsigned flags;
> +unsigned sizemask;
> +} TCGHelperInfo;
> +
>  /* Conditions.  Note that these are laid out for easy manipulation by
> the functions below:
>   bit 0 is used for inverting;
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 65da3c5dbf..08b6926894 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -868,13 +868,6 @@ void tcg_pool_reset(TCGContext *s)
>  s->pool_current = NULL;
>  }
>
> -typedef struct TCGHelperInfo {
> -void *func;
> -const char *name;
> -unsigned flags;
> -unsigned sizemask;
> -} TCGHelperInfo;
> -
>  #include "exec/helper-proto.h"
>
>  static const TCGHelperInfo all_helpers[] = {


--
Alex Bennée

Re: [Qemu-devel] [RFC 06/48] tcg: use QHT for helper_table

2018-11-14 Thread Alex Bennée



Emilio G. Cota  writes:

> This will allow us to add TCG helpers at run-time.
>
> While at it, rename tcg_find_helper to tcg_helper_find for consistency
> with the added tcg_helper_foo functions.
>
> Signed-off-by: Emilio G. Cota 
> ---
>  tcg/tcg.c | 59 +--
>  1 file changed, 49 insertions(+), 10 deletions(-)
>
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index e85133ef05..65da3c5dbf 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -33,6 +33,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/cutils.h"
>  #include "qemu/host-utils.h"
> +#include "qemu/xxhash.h"
>  #include "qemu/timer.h"
>
>  /* Note: the long term plan is to reduce the dependencies on the QEMU
> @@ -879,13 +880,46 @@ typedef struct TCGHelperInfo {
>  static const TCGHelperInfo all_helpers[] = {
>  #include "exec/helper-tcg.h"
>  };
> -static GHashTable *helper_table;
> +static struct qht helper_table;
> +static bool helper_table_inited;
>
>  static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
>  static void process_op_defs(TCGContext *s);
>  static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
>  TCGReg reg, const char *name);
>
> +static inline uint32_t tcg_helper_func_hash(const void *func)
> +{
> +return qemu_xxhash2((uint64_t)func);
> +}

I needed to do this:

modified   tcg/tcg.c
@@ -884,7 +884,7 @@ static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, 
TCGType type,

 static inline uint32_t tcg_helper_func_hash(const void *func)
 {
-return qemu_xxhash2((uint64_t)func);
+return qemu_xxhash2((uint64_t)(uintptr_t)func);
 }

To prevent the compiler complaining about:

 tcg.c:887:25: error: cast from pointer to integer of different size 
[-Werror=pointer-to-int-cast]

> +
> +static bool tcg_helper_cmp(const void *ap, const void *bp)
> +{
> +const TCGHelperInfo *a = ap;
> +const TCGHelperInfo *b = bp;
> +
> +return a->func == b->func &&
> +a->flags == b->flags &&
> +a->sizemask == b->sizemask &&
> +!strcmp(a->name, b->name);
> +}
> +
> +static bool tcg_helper_lookup_cmp(const void *obj, const void *func)
> +{
> +const TCGHelperInfo *info = obj;
> +
> +return info->func == func;
> +}
> +
> +static void tcg_helper_insert(const TCGHelperInfo *info)
> +{
> +uint32_t hash = tcg_helper_func_hash(info->func);
> +bool inserted;
> +
> +inserted = qht_insert(_table, (void *)info, hash, NULL);
> +g_assert(inserted);
> +}
> +
>  void tcg_context_init(TCGContext *s)
>  {
>  int op, total_args, n, i;
> @@ -919,13 +953,13 @@ void tcg_context_init(TCGContext *s)
>  }
>
>  /* Register helpers.  */
> -/* Use g_direct_hash/equal for direct pointer comparisons on func.  */
> -helper_table = g_hash_table_new(NULL, NULL);
> +qht_init(_table, tcg_helper_cmp, ARRAY_SIZE(all_helpers),
> + QHT_MODE_AUTO_RESIZE);
>
>  for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
> -g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
> -(gpointer)_helpers[i]);
> +tcg_helper_insert(_helpers[i]);
>  }
> +helper_table_inited = true;
>
>  tcg_target_init(s);
>  process_op_defs(s);
> @@ -1620,9 +1654,10 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int 
> nargs, TCGTemp **args)
>  int i, real_args, nb_rets, pi;
>  unsigned sizemask, flags;
>  TCGHelperInfo *info;
> +uint32_t hash = tcg_helper_func_hash(func);
>  TCGOp *op;
>
> -info = g_hash_table_lookup(helper_table, (gpointer)func);
> +info = qht_lookup_custom(_table, func, hash, 
> tcg_helper_lookup_cmp);
>  flags = info->flags;
>  sizemask = info->sizemask;
>
> @@ -1825,11 +1860,15 @@ static char *tcg_get_arg_str(TCGContext *s, char *buf,
>  }
>
>  /* Find helper name.  */
> -static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
> +static inline const char *tcg_helper_find(TCGContext *s, uintptr_t val)
>  {
>  const char *ret = NULL;
> -if (helper_table) {
> -TCGHelperInfo *info = g_hash_table_lookup(helper_table, 
> (gpointer)val);
> +if (helper_table_inited) {
> +uint32_t hash = tcg_helper_func_hash((void *)val);
> +TCGHelperInfo *info;
> +
> +info = qht_lookup_custom(_table, (void *)val, hash,
> + tcg_helper_lookup_cmp);
>  if (info) {
>  ret = info->name;
>  }
> @@ -1919,7 +1958,7 @@ void tcg_dump_ops(TCGContext *s)
>
>  /* function name, flags, out args */
>  col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
> -tcg_find_helper(s, op->args[nb_oargs + 
> nb_iargs]),
> +tcg_helper_find(s, op->args[nb_oargs + 
> nb_iargs]),
>  op->args[nb_oargs + nb_iargs + 1], nb_oargs);
>  for (i = 0; i < nb_oargs; i++) {
>

Re: [Qemu-devel] [PATCH RFC 2/6] qapi: use qemu_strtod() in string-input-visitor

2018-11-14 Thread Markus Armbruster

David Hildenbrand  writes:

> Let's use the new function.
>
> Signed-off-by: David Hildenbrand 
> ---
>  qapi/string-input-visitor.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
>
> diff --git a/qapi/string-input-visitor.c b/qapi/string-input-visitor.c
> index b3fdd0827d..dee708d384 100644
> --- a/qapi/string-input-visitor.c
> +++ b/qapi/string-input-visitor.c
> @@ -20,6 +20,7 @@
>  #include "qemu/option.h"
>  #include "qemu/queue.h"
>  #include "qemu/range.h"
> +#include "qemu/cutils.h"
>  
>  
>  struct StringInputVisitor
> @@ -313,12 +314,9 @@ static void parse_type_number(Visitor *v, const char 
> *name, double *obj,
>Error **errp)
>  {
>  StringInputVisitor *siv = to_siv(v);
> -char *endp = (char *) siv->string;
>  double val;
>  
> -errno = 0;
> -val = strtod(siv->string, );
> -if (errno || endp == siv->string || *endp) {
> +if (qemu_strtod(siv->string, NULL, )) {
>  error_setg(errp, QERR_INVALID_PARAMETER_TYPE, name ? name : "null",
> "number");
>  return;

Three more: in qobject-input-visitor.c's
qobject_input_type_number_keyval(), cutil.c's do_strtosz(), and
json-parser.c's parse_literal().

The latter doesn't check for errors since the lexer ensures the input is
sane.  Overflow can still happen, and is silently ignored.  Feel free
not to convert this one.

Re: [Qemu-devel] [RFC v8 11/18] virtio-iommu: Add an msi_bypass property

2018-11-14 Thread Jean-Philippe Brucker

On 09/11/2018 11:29, Eric Auger wrote:
> +static void virtio_iommu_register_resv_region(viommu_endpoint *ep,
> +  uint8_t subtype,
> +  uint64_t start, uint64_t end)
> +{
> +viommu_interval *interval;
> +struct virtio_iommu_probe_resv_mem *reg;
> +
> +interval = g_malloc0(sizeof(*interval));
> +interval->low = start;
> +interval->high = end;
> +
> +reg = g_malloc0(sizeof(*reg));
> +reg->subtype = subtype;
> +reg->start = cpu_to_le64(start);
> +reg->end = cpu_to_le64(end);
> +
> +g_tree_insert(ep->reserved_regions, interval, reg);
> +}
> +
>  static viommu_endpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>uint32_t ep_id)
>  {
> @@ -120,6 +142,12 @@ static viommu_endpoint 
> *virtio_iommu_get_endpoint(VirtIOIOMMU *s,
>  ep->reserved_regions = g_tree_new_full((GCompareDataFunc)interval_cmp,
>  NULL, (GDestroyNotify)g_free,
>  (GDestroyNotify)g_free);
> +if (s->msi_bypass) {
> +virtio_iommu_register_resv_region(ep, VIRTIO_IOMMU_RESV_MEM_T_MSI,
> +  IOAPIC_RANGE_START,
> +  IOAPIC_RANGE_SIZE);

The last argument of register_resv_region is 'end' but you're passing a size

Thanks,
Jean

Re: [Qemu-devel] [RFC v8 15/18] hw/arm/virt: Add virtio-iommu to the virt board

2018-11-14 Thread Jean-Philippe Brucker

On 09/11/2018 11:29, Eric Auger wrote:
> +static void create_virtio_iommu(VirtMachineState *vms,
> +const char *pciehb_nodename, PCIBus *bus)
> +{
> +const char compat[] = "virtio,pci-iommu";
> +uint16_t bdf = 0x8; /* 00:01.0 */
> +DeviceState *dev;
> +char *node;
> +
> +dev = qdev_create(BUS(bus), TYPE_VIRTIO_IOMMU_PCI);
> +object_property_set_bool(OBJECT(dev), true, "realized", _fatal);

For the Arm virt platform, should msi_bypass default to false? Otherwise
I don't think pass-through to guest userpace will work.

Thanks,
Jean

Re: [Qemu-devel] [RFC v8 10/18] virtio-iommu: Implement probe request

2018-11-14 Thread Jean-Philippe Brucker

Hi Eric,

A few issues creeped in when the resv_mem structure changed

On 09/11/2018 11:29, Eric Auger wrote:
> +#define SUPPORTED_PROBE_PROPERTIES (\
> +VIRTIO_IOMMU_PROBE_T_NONE | \
> +VIRTIO_IOMMU_PROBE_T_RESV_MEM)

You might be missing "1 <<" here, since the properties types are normal
values

[...]
> +/**
> + * virtio_iommu_fill_resv_mem_prop - Add a RESV_MEM probe
> + * property into the probe request buffer
> + *
> + * @key: interval handle
> + * @value: handle to the reserved memory region
> + * @data: handle to the probe request buffer state
> + */
> +static gboolean virtio_iommu_fill_resv_mem_prop(gpointer key,
> +gpointer value,
> +gpointer data)
> +{
> +struct virtio_iommu_probe_resv_mem *resv =
> +(struct virtio_iommu_probe_resv_mem *)value;
> +struct virtio_iommu_probe_property *prop;
> +struct virtio_iommu_probe_resv_mem *current;
> +viommu_property_buffer *bufstate = (viommu_property_buffer *)data;
> +size_t size = sizeof(*resv), total_size;
> +uint8_t *prop_value;
> +
> +total_size = size + sizeof(*prop);

size already contains sizeof(*prop)

> +
> +if (bufstate->filled + total_size >= VIOMMU_PROBE_SIZE) {
> +bufstate->error = true;
> +/* get the traversal stopped by returning true */
> +return true;
> +}
> +prop = (struct virtio_iommu_probe_property *)
> +(bufstate->start + bufstate->filled);
> +prop->type = cpu_to_le16(VIRTIO_IOMMU_PROBE_T_RESV_MEM) &
> +VIRTIO_IOMMU_PROBE_T_MASK;
> +prop->length = cpu_to_le16(size);

Should be size-4

> +
> +prop_value = (uint8_t *)prop + 4;
> +current = (struct virtio_iommu_probe_resv_mem *)prop_value;
> +*current = *resv;

*resv includes the property header, but *current doesn't, so the resv
property is corrupted.

Thanks,
Jean

Re: [Qemu-devel] [PATCH] target/mips: Disable R5900 support

2018-11-14 Thread Aleksandar Markovic

Fredrik wrote:

> Philippe wrote:
>
> > Then we will fix this for the 4.0 release.
> 
> What exactly needs to be fixed regarding the psABI? The relevant opcodes
> would need to stay, and not be prohibited and removed as Aleksandar has
> suggested, since such opcode removal breaks the psABI requirements.
> 
> Finally, as Maciej explained in some detail, the document that Aleksandar
> just recently requested is known to not exist, for any MIPS implementation,
> so we are not going to make any progress on that either.
>

I am going to list all the issues that needs to be clarified and
resolved, including the reasons why they are issues at all, and
possible ways to resolve them. I can't do it right now, but it'll
happen relatively soon.

Sincerely,
Aleksandar

Re: [Qemu-devel] [PATCH] target/mips: Disable R5900 support

2018-11-14 Thread Philippe Mathieu-Daudé

Hi Fredrik,

On Wed, Nov 14, 2018 at 4:31 PM Fredrik Noring  wrote:
> On Tue, Nov 13, 2018 at 11:51:54PM +0100, Philippe Mathieu-Daudé wrote:
[...]
> > At some point while reading your reviews, I understood the R5900
> > patches introduced incorrect behaviors for the non-R5900 cpus. In this
> > case this patch wouldn't suffice.
>
> No, that was never the case and I'm not aware of any such problems.
> However, there is a refactoring series, and we have observed preexisting
> bugs in the MIPS emulation, unrelated to the R5900. The opcode decoder
> could also be improved, such as in asserting reserved instructions in
> more cases where opcodes are invalid, etc.

Yes, we figure that out with Richard Henderson while playing with the
decodetree script.

> > Hoping I misinterpreted your reviews, then this patch is OK.
> > With one of the suggested comments:
> > Reviewed-by: Philippe Mathieu-Daudé 
> >
> > Then we will fix this for the 4.0 release.
>
> What exactly needs to be fixed regarding the psABI? The relevant opcodes
> would need to stay, and not be prohibited and removed as Aleksandar has
> suggested, since such opcode removal breaks the psABI requirements.
>
> Finally, as Maciej explained in some detail, the document that Aleksandar
> just recently requested is known to not exist, for any MIPS implementation,
> so we are not going to make any progress on that either.

I think this was just bad timing with the QEMU release cycles, which
stressed Aleksandar which is the only MIPS maintainer.

Since the R5900 User is a new feature, no downstream distributions
ship it, so it is safer for the community to disable/delay it,
this means we have now 3 months to improve this code, while being more relaxed.

The QEMU community is big, we have different cultures, live in many
timezones, speak different languages, and beside all we are humans
with our daily lives :)
I guess we all sometime stress out, that's why I think, even if
disabling this feature is technically not required (as you seem to
suggest), it is sane to disable it and get back on track together.

I believe Aleksandar is paid to maintain the MIPS TCG/KVM codebase,
and the Malta/Boston boards, all code used by his employer, which is a
huge quantity of C code.
Now the project is community based, and I understand this as it is
open as contributions like yours, or boards not backed by the MIPS
shareholders/company.
The TCG maintenance is shared with Richard, but I think the rest is
too much for a single maintainer.
Somehow the community is failing here, putting too much pressure on Aleksandar.

Let's see how we can improve that during the next merge window,
hopefully the R5900 will be fixed, and eventually his little sister
R3900 will be joining too!

Regards,

Phil.

Re: [Qemu-devel] [PATCH] nvme: fix oob access issue(CVE-2018-16847)

2018-11-14 Thread Paolo Bonzini

On 14/11/2018 02:38, Li Qiang wrote:
> 
> 
> Paolo Bonzini mailto:pbonz...@redhat.com>> 于2018
> 年11月14日周三 上午2:27写道：
> 
> On 13/11/2018 11:17, Kevin Wolf wrote:
> > Am 13.11.2018 um 02:45 hat Li Qiang geschrieben:
> >> Ping what't the status of this patch.
> >>
> >> I see Kevin's new pr doesn't contain this patch.
> >
> > Oh, I thought you said that you wanted to fix this at a higher
> level so
> > that the problem is caught before even getting into nvme code? If you
> > don't, I can apply the patch for my next pull request.
> 
> As far as I know the bug doesn't exist.  Li Qiang, if you have a
> reproducer please send it.
> 
> 
> Hello Paolo,
> Though I've send the debug information and ASAN output in the mail to
> secal...@redhat.com , I'm glad provide here.
> This is for read, I think the write the same but as the PoC is in
> userspace, the mmap can only map the exact size of the MMIO,
> So we can only write within the area. But if we using a module we can
> write the out of MMIO I think
> The nvme device's parameter should set as 'cmb_size_mb=2' and the PCI
> address may differ in your system.

Ok, thanks.  I've created a reproducer using qtest (though I have to run
now and cannot post it properly).

The patch for the fix is simply:

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index fc7dacb816..6385033af3 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -1192,7 +1192,7 @@ static const MemoryRegionOps nvme_cmb_ops = {
 .write = nvme_cmb_write,
 .endianness = DEVICE_LITTLE_ENDIAN,
 .impl = {
-.min_access_size = 2,
+.min_access_size = 1,
 .max_access_size = 8,
 },
 };


The memory subsystem _is_ recognizing the out-of-bounds 32-bit access,
but because min_access_size=2 it sends down a write at offset 2097151
and size 2.

Paolo

Re: [Qemu-devel] [RFC PATCH 1/3] i386: add properties for customizing L2 and L3 caches size

2018-11-14 Thread Dario Faggioli

On Wed, 2018-11-14 at 08:14 -0600, Eric Blake wrote:
> On 11/14/18 4:56 AM, Dario Faggioli wrote:
> > ---
> >   0 files changed
> 
> That's an odd diffstat. Why is git not giving you the normal
> diffstat 
> with an actual summary of files changed?
> 
Ah, more weirdness about this submission. :-O

I've just tried re-sending the series to myself and, for this patch, I
do see a diffstat that makes sense:

 include/hw/i386/pc.h |8 
 target/i386/cpu.c|8 
 target/i386/cpu.h|3 +++
 3 files changed, 19 insertions(+)

And same is true for all other series I've sent around in the same way
and with the same tool (I went double checking a couple of them! :-P).

So, clearly, something went wrong this time. :-/

Regards,
Dario
-- 
<> (Raistlin Majere)
-
Dario Faggioli, Ph.D, http://about.me/dario.faggioli
Software Engineer @ SUSE https://www.suse.com/

signature.asc
Description: This is a digitally signed message part

Re: [Qemu-devel] [PATCH] target/mips: Disable R5900 support

2018-11-14 Thread Philippe Mathieu-Daudé

On Wed, Nov 14, 2018 at 3:02 PM Aleksandar Markovic
 wrote:
> Philippe Mathieu-Daudé  wrote:
>
> > Can you add:
> >
> > This reverts commit ed4f49ba9bb56ebca6987b1083255daf6c89b5de.
> >
> > Or
> >
> > Fixes: ed4f49ba9
> >
> > when applying?
>
> Sure, I'll add something along that line.
>
> > If the issues you mentioned are "the R5900 tcg opcodes are not
> > implemented correctly", then this patch is OK, because no cpu
> > can use the R5900 opcodes.
> >
> > At some point while reading your reviews, I understood the R5900
> > patches introduced incorrect behaviors for the non-R5900 cpus.
> > In this case this patch wouldn't suffice.
>
> This patch will be applied after the patches that Fredrik kindly
> submitted under "Fix decoding..." series. Fredrik's patches will
> remove any doubt about R5900 interfering other architectures, so,
> as a consequence, this patch is sufficient and appropriate for
> disabling R5900 support.

Ah OK, now it makes sens.

> Additional clarification: This patch is to give more time for
> all R5900 issues to be cleared in 3.1+ time frame. If R5900 code
> was left enabled, we could have unpleasant backward compatibility
> issues between R5900 support and different versions of QEMU.
>
> The R5900 support is intended to be enabled again when all issues
> are resolved and agreed upon.

The commit description is clear enough, this is what I understood from it :)

Regards,

Phil.

Re: [Qemu-devel] [PATCH] tests: add /vmstate/simple/array

2018-11-14 Thread Philippe Mathieu-Daudé


On 14/11/18 14:21, Marc-André Lureau wrote:

A very simple test to show VMSTATE_*_ARRAY usage and result. It could
be systematically extended to other primitives, but I leave that as an
exercise for others :).


If this patch get accepted, please add this good idea to the 
BiteSizedTasks wiki entry:


https://wiki.qemu.org/Contribute/BiteSizedTasks



Signed-off-by: Marc-André Lureau 
---
  tests/test-vmstate.c | 50 
  1 file changed, 50 insertions(+)

diff --git a/tests/test-vmstate.c b/tests/test-vmstate.c
index 37a7a93784..2a960dedbe 100644
--- a/tests/test-vmstate.c
+++ b/tests/test-vmstate.c
@@ -284,6 +284,55 @@ static void test_simple_primitive(void)
  FIELD_EQUAL(i64_2);
  }
  
+typedef struct TestSimpleArray {

+uint16_t u16_1[3];
+} TestSimpleArray;
+
+/* Object instantiation, we are going to use it in more than one test */
+
+TestSimpleArray obj_simple_arr = {
+.u16_1 = { 0x42, 0x43, 0x44 },
+};
+
+/* Description of the values.  If you add a primitive type
+   you are expected to add a test here */
+
+static const VMStateDescription vmstate_simple_arr = {
+.name = "simple/array",
+.version_id = 1,
+.minimum_version_id = 1,
+.fields = (VMStateField[]) {
+VMSTATE_UINT16_ARRAY(u16_1, TestSimpleArray, 3),
+VMSTATE_END_OF_LIST()
+}
+};
+
+uint8_t wire_simple_arr[] = {
+/* u16_1 */ 0x00, 0x42,
+/* u16_1 */ 0x00, 0x43,
+/* u16_1 */ 0x00, 0x44,
+QEMU_VM_EOF, /* just to ensure we won't get EOF reported prematurely */
+};
+
+static void obj_simple_arr_copy(void *target, void *source)
+{
+memcpy(target, source, sizeof(TestSimpleArray));
+}
+
+static void test_simple_array(void)
+{
+TestSimpleArray obj, obj_clone;
+
+memset(, 0, sizeof(obj));
+save_vmstate(_simple_arr, _simple_arr);
+
+compare_vmstate(wire_simple_arr, sizeof(wire_simple_arr));
+
+SUCCESS(load_vmstate(_simple_arr, , _clone,
+ obj_simple_arr_copy, 1, wire_simple_arr,
+ sizeof(wire_simple_arr)));
+}
+
  typedef struct TestStruct {
  uint32_t a, b, c, e;
  uint64_t d, f;
@@ -863,6 +912,7 @@ int main(int argc, char **argv)
  
  g_test_init(, , NULL);

  g_test_add_func("/vmstate/simple/primitive", test_simple_primitive);
+g_test_add_func("/vmstate/simple/array", test_simple_array);
  g_test_add_func("/vmstate/versioned/load/v1", test_load_v1);
  g_test_add_func("/vmstate/versioned/load/v2", test_load_v2);
  g_test_add_func("/vmstate/field_exists/load/noskip", test_load_noskip);

Re: [Qemu-devel] [PATCH] vmstate: constify VMStateField

2018-11-14 Thread Philippe Mathieu-Daudé


Hi Marc-André,

On 14/11/18 14:29, Marc-André Lureau wrote:

Because they are supposed to remain const.

Signed-off-by: Marc-André Lureau 
---
  include/migration/vmstate.h |   6 +-
  hw/display/virtio-gpu.c |   4 +-
  hw/intc/s390_flic_kvm.c |   4 +-
  hw/nvram/eeprom93xx.c   |   6 +-
  hw/nvram/fw_cfg.c   |   6 +-
  hw/pci/msix.c   |   4 +-
  hw/pci/pci.c|   8 +--
  hw/pci/shpc.c   |   7 ++-
  hw/scsi/scsi-bus.c  |   4 +-
  hw/timer/twl92230.c |   4 +-
  hw/usb/redirect.c   |  12 ++--
  hw/virtio/virtio.c  |   8 +--
  migration/savevm.c  |   7 ++-
  migration/vmstate-types.c   | 119 
  migration/vmstate.c |  31 +-
  target/alpha/machine.c  |   5 +-
  target/arm/machine.c|  12 ++--
  target/hppa/machine.c   |  10 +--
  target/mips/machine.c   |  14 +++--
  target/openrisc/machine.c   |   5 +-
  target/ppc/machine.c|  14 +++--
  target/sparc/machine.c  |   7 ++-
  22 files changed, 162 insertions(+), 135 deletions(-)

diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h
index 2b501d0466..61bef3ef5c 100644
--- a/include/migration/vmstate.h
+++ b/include/migration/vmstate.h
@@ -40,8 +40,8 @@ typedef struct VMStateField VMStateField;


What about enforcing the constness in the typedef?

-- >8 --
@@ -32 +32 @@ typedef struct VMStateDescription VMStateDescription;
-typedef struct VMStateField VMStateField;
+typedef const struct VMStateField VMStateField;
---


   */
  struct VMStateInfo {
  const char *name;
-int (*get)(QEMUFile *f, void *pv, size_t size, VMStateField *field);
-int (*put)(QEMUFile *f, void *pv, size_t size, VMStateField *field,
+int (*get)(QEMUFile *f, void *pv, size_t size, const VMStateField *field);
+int (*put)(QEMUFile *f, void *pv, size_t size, const VMStateField *field,
 QJSON *vmdesc);
  };
  
@@ -186,7 +186,7 @@ struct VMStateDescription {

  int (*post_load)(void *opaque, int version_id);
  int (*pre_save)(void *opaque);
  bool (*needed)(void *opaque);
-VMStateField *fields;
+const VMStateField *fields;
  const VMStateDescription **subsections;
  };
  
diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c

index 7be3a9d404..c6fab56f9b 100644
--- a/hw/display/virtio-gpu.c
+++ b/hw/display/virtio-gpu.c
@@ -1073,7 +1073,7 @@ static const VMStateDescription 
vmstate_virtio_gpu_scanouts = {
  };
  
  static int virtio_gpu_save(QEMUFile *f, void *opaque, size_t size,

-   VMStateField *field, QJSON *vmdesc)
+   const VMStateField *field, QJSON *vmdesc)
  {
  VirtIOGPU *g = opaque;
  struct virtio_gpu_simple_resource *res;
@@ -1101,7 +1101,7 @@ static int virtio_gpu_save(QEMUFile *f, void *opaque, 
size_t size,
  }
  
  static int virtio_gpu_load(QEMUFile *f, void *opaque, size_t size,

-   VMStateField *field)
+   const VMStateField *field)
  {
  VirtIOGPU *g = opaque;
  struct virtio_gpu_simple_resource *res;
diff --git a/hw/intc/s390_flic_kvm.c b/hw/intc/s390_flic_kvm.c
index 3f804ad52e..a03df37560 100644
--- a/hw/intc/s390_flic_kvm.c
+++ b/hw/intc/s390_flic_kvm.c
@@ -376,7 +376,7 @@ static void kvm_s390_release_adapter_routes(S390FLICState 
*fs,
   * reached
   */
  static int kvm_flic_save(QEMUFile *f, void *opaque, size_t size,
- VMStateField *field, QJSON *vmdesc)
+ const VMStateField *field, QJSON *vmdesc)
  {
  KVMS390FLICState *flic = opaque;
  int len = FLIC_SAVE_INITIAL_SIZE;
@@ -426,7 +426,7 @@ static int kvm_flic_save(QEMUFile *f, void *opaque, size_t 
size,
   * in QEMUFile
   */
  static int kvm_flic_load(QEMUFile *f, void *opaque, size_t size,
- VMStateField *field)
+ const VMStateField *field)
  {
  uint64_t len = 0;
  uint64_t count = 0;
diff --git a/hw/nvram/eeprom93xx.c b/hw/nvram/eeprom93xx.c
index 2fd0e3c29f..2db3d7cce6 100644
--- a/hw/nvram/eeprom93xx.c
+++ b/hw/nvram/eeprom93xx.c
@@ -95,15 +95,15 @@ struct _eeprom_t {
   */
  
  static int get_uint16_from_uint8(QEMUFile *f, void *pv, size_t size,

- VMStateField *field)
+ const VMStateField *field)
  {
  uint16_t *v = pv;
  *v = qemu_get_ubyte(f);
  return 0;
  }
  
-static int put_unused(QEMUFile *f, void *pv, size_t size, VMStateField *field,

-  QJSON *vmdesc)
+static int put_unused(QEMUFile *f, void *pv, size_t size,
+  const VMStateField *field, QJSON *vmdesc)
  {
  fprintf(stderr, "uint16_from_uint8 is used only for backwards 
compatibility.\n");
  fprintf(stderr, "Never should be used to write a new state.\n");
diff --git a/hw/nvram/fw_cfg.c b/hw/nvram/fw_cfg.c
index 946f765f7f..3cb726ff68 100644
---

1 2 3 >

1 - 100 of 265 matches

Mail list logo