Re: [Qemu-devel] [PATCH V3 2/3] net/filter-mirror:Add filter-redirector func

2016-03-06 Thread Jason Wang


On 03/04/2016 08:01 PM, Zhang Chen wrote:
> Filter-redirector is a netfilter plugin.
> It gives qemu the ability to redirect net packet.
> redirector can redirect filter's net packet to outdev.
> and redirect indev's packet to filter.
>
>   filter
> +
> |
> |
> redirector  |
>+--+
>|| |
>|| |
>|| |
>   indev +---+   +-->  outdev
>|| |
>|| |
>|| |
>+--+
> |
> |
> v
>   filter
>
> usage:
>
> -netdev user,id=hn0
> -chardev socket,id=s0,host=ip_primary,port=X,server,nowait
> -chardev socket,id=s1,host=ip_primary,port=Y,server,nowait
> -filter-redirector,id=r0,netdev=hn0,queue=tx/rx/all,indev=s0,outdev=s1
>
> Signed-off-by: Zhang Chen 
> Signed-off-by: Wen Congyang 
> ---
>  net/filter-mirror.c | 211 
> 
>  qemu-options.hx |   8 ++
>  vl.c|   3 +-
>  3 files changed, 221 insertions(+), 1 deletion(-)
>
> diff --git a/net/filter-mirror.c b/net/filter-mirror.c
> index 4ff7619..d137168 100644
> --- a/net/filter-mirror.c
> +++ b/net/filter-mirror.c
> @@ -25,11 +25,19 @@
>  #define FILTER_MIRROR(obj) \
>  OBJECT_CHECK(MirrorState, (obj), TYPE_FILTER_MIRROR)
>  
> +#define FILTER_REDIRECTOR(obj) \
> +OBJECT_CHECK(MirrorState, (obj), TYPE_FILTER_REDIRECTOR)
> +
>  #define TYPE_FILTER_MIRROR "filter-mirror"
> +#define TYPE_FILTER_REDIRECTOR "filter-redirector"
> +#define REDIRECT_HEADER_LEN sizeof(uint32_t)
>  
>  typedef struct MirrorState {
>  NetFilterState parent_obj;
> +NetQueue *incoming_queue;
> +char *indev;
>  char *outdev;
> +CharDriverState *chr_in;
>  CharDriverState *chr_out;
>  } MirrorState;
>  
> @@ -67,6 +75,68 @@ err:
>  return ret < 0 ? ret : -EIO;
>  }
>  
> +static int redirector_chr_can_read(void *opaque)
> +{
> +return REDIRECT_HEADER_LEN;
> +}
> +
> +static void redirector_chr_read(void *opaque, const uint8_t *buf, int size)
> +{
> +NetFilterState *nf = opaque;
> +MirrorState *s = FILTER_REDIRECTOR(nf);
> +uint32_t len;
> +int ret = 0;
> +uint8_t *recv_buf;
> +
> +memcpy(, buf, size);

stack overflow if size > sizeof(len)?

> +if (size < REDIRECT_HEADER_LEN) {
> +ret = qemu_chr_fe_read_all(s->chr_in, ((uint8_t *)) + size,
> +REDIRECT_HEADER_LEN - size);

There maybe some misunderstanding for my previous reply. You can have a
look at net_socket_send() for reference. You could

- use a buffer for storing len
- each time when you receive partial len, store them in the buffer and
advance the pointer until you receive at least sizeof(len) bytes.

Then you can proceed and do something similar when you're receiving the
packet itself.

> +if (ret != (REDIRECT_HEADER_LEN - size)) {
> +error_report("filter-redirector recv size failed");
> +return;
> +}
> +}
> +
> +len = ntohl(len);
> +
> +if (len > 0 && len < NET_BUFSIZE) {
> +recv_buf = g_malloc(len);
> +ret = qemu_chr_fe_read_all(s->chr_in, recv_buf, len);
> +if (ret != len) {
> +error_report("filter-redirector recv buf failed");
> +g_free(recv_buf);
> +return;
> +}
> +
> +if (nf->direction == NET_FILTER_DIRECTION_ALL ||
> +nf->direction == NET_FILTER_DIRECTION_TX) {
> +ret = qemu_net_queue_send(s->incoming_queue, nf->netdev,
> +0, (const uint8_t *)recv_buf, len, NULL);

Rethink about this, we don't queue any packet in incoming_queue, so
probably there's no need to have a incoming_queue for redirector. We can
just use qemu_netfilter_pass_to_next() here.

> +
> +if (ret < 0) {
> +/*
> +  *FIXME: just report an error, let NET_FILTER_DIRECTION_RX 
> have chance to pass
> +  * to next filter ?
> +  */
> +error_report("filter-redirector out to guest failed");
> +}
> +}
> +
> +if (nf->direction == NET_FILTER_DIRECTION_ALL ||
> +nf->direction == NET_FILTER_DIRECTION_RX) {
> +struct iovec iov = {
> +.iov_base = recv_buf,
> +.iov_len = sizeof(recv_buf)
> +};
> +qemu_netfilter_pass_to_next(nf->netdev->peer, 0, , 1, nf);

Another issue not relate to this patch. Looks like we can rename this to
qemu_netfilter_iterate(), which seems better. Care to send a patch of this?

> +}
> +g_free(recv_buf);
> +} else {
> +

Re: [Qemu-devel] [PATCH] net.c: Moved large array in nc_sendv_compat from the stack to the heap

2016-03-06 Thread Alex Bennée

Nikos Filippakis  writes:

> Thank you for your comments!
>
> On Sun, Mar 6, 2016 at 9:47 AM, Alex Bennée  wrote:
>>
>>
>> Nikos Filippakis  writes:
>>
>> > Hello everyone! I am interested in getting to know the codebase a little 
>> > better
>> > so that I can eventually apply for a GSOC position.
>> > This is my first attempt at posting a patch to a mailing list, any feedback
>> > is greatly appreciated.
>>
>> OK first things first this sort of meta comment belongs in the cover
>> letter. However for a single patch you may want to put such things below
>> the --- in the commit message as that will get stripped when the
>> maintainer eventually applies the patch. Otherwise your meta-comments
>> will end up in the version log ;-)
>>
>> You'll see people use the --- area to keep version notes as patches go
>> through revisions.
>>
>
> I thought that could be considered part of the cover letter, didn't
> realize it would end up on the version log. Sorry about that (:

When you use git format-patch with --cover-letter to format a series of
patches you'll get exactly that. For individual patches like this one
then bellow the --- works. The fact your a potential GSOC student is
useful information to us on the list, just not in the actual commit log
in git ;-)

>
>> >
>> > Signed-off-by: Nikos Filippakis 
>> > ---
>> >  net/net.c | 17 -
>> >  1 file changed, 12 insertions(+), 5 deletions(-)
>> >
>> > diff --git a/net/net.c b/net/net.c
>> > index aebf753..79e9d7c 100644
>> > --- a/net/net.c
>> > +++ b/net/net.c
>> > @@ -710,23 +710,30 @@ ssize_t qemu_send_packet_raw(NetClientState *nc, 
>> > const uint8_t *buf, int size)
>> >  static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec 
>> > *iov,
>> > int iovcnt, unsigned flags)
>> >  {
>> > -uint8_t buf[NET_BUFSIZE];
>> >  uint8_t *buffer;
>> >  size_t offset;
>> > +ssize_t ret;
>>
>> With that said your comment needs to explain why you've just made the
>> change. I see NET_BUFSIZE is quite large so maybe this should be a
>> clean-up across the rest of the code-base, what's so special about this
>> function? Have you measured any difference in performance?
>>
>
> This method is one of several mentioned on the wiki as having big
> stack frames because of such arrays, something
> someone new to the project could easily fix, either by moving it to
> the heap or reducing the array size. Since further
> down there is a call to memcpy with NET_BUFSIZE length, I thought I'd
> just move it to the heap.

That's fine. In fact referencing the wiki bite-sized tasks would
probably be enough context for the commit message.

> Nothing special about this method, I'm planning to do the same with
> the others, just trying to get some
> familiarity with the mailing list.

Don't worry too much, it usually takes a few attempts to get your first
patch applied and the workflow sorted out.

> Besides, I'm not sure if I should put such small changes to different
> files in many small commits, or a large one.

The key byword here is bisectability. If regressions get introduced we
want to be able to quickly identify the culprit with git-bisect. So it
is important that every commit in the project builds cleanly. For
something like this I'd argue a series of patches would make sense as
they are likely in different functional places in the code.

>
>> >
>> >  if (iovcnt == 1) {
>> >  buffer = iov[0].iov_base;
>> >  offset = iov[0].iov_len;
>> >  } else {
>> > -buffer = buf;
>> > -offset = iov_to_buf(iov, iovcnt, 0, buf, sizeof(buf));
>> > +buffer = g_malloc(NET_BUFSIZE * sizeof(uint8_t));
>> > +offset = iov_to_buf(iov, iovcnt, 0, buffer,
>> > +NET_BUFSIZE * sizeof(uint8_t));
>> >  }
>> >
>> >  if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
>> > -return nc->info->receive_raw(nc, buffer, offset);
>> > +ret = nc->info->receive_raw(nc, buffer, offset);
>> >  } else {
>> > -return nc->info->receive(nc, buffer, offset);
>> > +ret = nc->info->receive(nc, buffer, offset);
>> >  }
>> > +
>> > +if (iovcnt != 1) {
>> > +g_free(buffer);
>> > +}
>>
>> This is a short function so you can get away with it but this sort of
>> logic can be confusing ("The iovec count was 1 therefor I should have
>> allocated a buffer" vs "I have an allocated buffer"). In general you
>> should know the various g_* functions tolerate NULLs well so maybe you
>> can structure the code differently (skipping the details ;-):
>>
>> uint8_t *buffer, *dynbuf = NULL;
>>
>> if (iovcnt == 1)
>> {
>>   buffer = ...
>> } else {
>>   buffer = dynbuf = g_malloc(NET_BUFSIZE * sizeof(uint8_t));
>>   ...
>> }
>> ...
>>
>> g_free(dynbuf)
>>
>
> You're right, I didn't quite like the way I 

Re: [Qemu-devel] [PATCH v3 3/3] arm: implement query-gic-capability

2016-03-06 Thread Andrew Jones
On Mon, Mar 07, 2016 at 01:38:24PM +0800, Peter Xu wrote:
> On Mon, Mar 07, 2016 at 06:12:38AM +0100, Andrew Jones wrote:
> > On Mon, Mar 07, 2016 at 12:23:28PM +0800, Peter Xu wrote:
> > > +#ifdef CONFIG_KVM
> > > +/*
> > > + * This is merely the same as kvm_create_device(). The only
> > > + * difference is we are using raw fds rather than KVMState, so that
> > > + * we can use it even without kvm_state initialized.
> > > + */
> > > +static int kvm_create_device_fds(int kvm_fd, int vmfd,
> > > + uint64_t type, bool test)
> > 
> > I don't think we need this helper function. Who else will call it?
> > Particularly without test==true? Anyway, I think three ioctls directly
> > called from qmp_query_gic_capability should be OK.
> 
> Right. However, I would still consider using a helper function if
> you would not mind. E.g, how about this:

Fine by me.

Thanks,
drew

> 
> #ifdef CONFIG_KVM
> /* Test whether KVM support specific device. */
> static inline int kvm_support_device(int vmfd, uint64_t type)
> {
> struct kvm_create_device create_dev = {
> .type = type,
> .fd = -1,
> .flags = KVM_CREATE_DEVICE_TEST,
> };
> return ioctl(vmfd, KVM_CREATE_DEVICE, _dev);
> }
> #endif
> 
> Thanks.
> Peter
> 



Re: [Qemu-devel] [PATCH v9 4/7] s390x/cpu: Tolerate max_cpus

2016-03-06 Thread David Hildenbrand
> Once hotplug is enabled, interrupts may come in for CPUs
> with an address > smp_cpus.  Allocate for this and allow
> search routines to look beyond smp_cpus.
> 
> Signed-off-by: Matthew Rosato 

Reviewed-by: David Hildenbrand 

> ---
>  hw/s390x/s390-virtio.c | 11 ++-
>  1 file changed, 6 insertions(+), 5 deletions(-)
> 
> diff --git a/hw/s390x/s390-virtio.c b/hw/s390x/s390-virtio.c
> index c501a48..57c3c88 100644
> --- a/hw/s390x/s390-virtio.c
> +++ b/hw/s390x/s390-virtio.c
> @@ -58,15 +58,16 @@
>  #define S390_TOD_CLOCK_VALUE_MISSING0x00
>  #define S390_TOD_CLOCK_VALUE_PRESENT0x01
> 
> -static S390CPU **ipi_states;
> +static S390CPU **cpu_states;
> 
>  S390CPU *s390_cpu_addr2state(uint16_t cpu_addr)
>  {
> -if (cpu_addr >= smp_cpus) {
> +if (cpu_addr >= max_cpus) {
>  return NULL;
>  }
> 
> -return ipi_states[cpu_addr];
> +/* Fast lookup via CPU ID */
> +return cpu_states[cpu_addr];
>  }
> 
>  void s390_init_ipl_dev(const char *kernel_filename,
> @@ -101,14 +102,14 @@ void s390_init_cpus(MachineState *machine)
>  machine->cpu_model = "host";
>  }
> 
> -ipi_states = g_malloc(sizeof(S390CPU *) * smp_cpus);
> +cpu_states = g_malloc0(sizeof(S390CPU *) * max_cpus);
> 
>  for (i = 0; i < smp_cpus; i++) {
>  S390CPU *cpu;
> 
>  cpu = cpu_s390x_init(machine->cpu_model);
> 
> -ipi_states[i] = cpu;
> +cpu_states[i] = cpu;
>  }
>  }
> 



David




Re: [Qemu-devel] [PATCH V3 1/3] net/filter-mirror: Change filter_mirror_send interface

2016-03-06 Thread Jason Wang


On 03/04/2016 08:01 PM, Zhang Chen wrote:
> Change filter_mirror_send interface to make it easier
> to used by other filter
>
> Signed-off-by: Zhang Chen 
> Signed-off-by: Wen Congyang 
> ---
>  net/filter-mirror.c | 10 +-
>  1 file changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/net/filter-mirror.c b/net/filter-mirror.c
> index ee13d94..4ff7619 100644
> --- a/net/filter-mirror.c
> +++ b/net/filter-mirror.c
> @@ -33,11 +33,10 @@ typedef struct MirrorState {
>  CharDriverState *chr_out;
>  } MirrorState;
>  
> -static int filter_mirror_send(NetFilterState *nf,
> +static int filter_mirror_send(CharDriverState *chr_out,
> const struct iovec *iov,
> int iovcnt)

The indent looks odds here, please send an independent patch to fix this
by the way.

Thanks



Re: [Qemu-devel] [RFC PATCH v2 3/3] VFIO: Type1 IOMMU mapping support for vGPU

2016-03-06 Thread Jike Song
Hi Neo,

On Fri, Mar 4, 2016 at 3:00 PM, Neo Jia  wrote:
> On Wed, Mar 02, 2016 at 04:38:34PM +0800, Jike Song wrote:
>> On 02/24/2016 12:24 AM, Kirti Wankhede wrote:
>> > +   vgpu_dma->size = map->size;
>> > +
>> > +   vgpu_link_dma(vgpu_iommu, vgpu_dma);
>>
>> Hi Kirti & Neo,
>>
>> seems that no one actually setup mappings for IOMMU here?
>>
>
> Hi Jike,
>
> Yes.
>
> The actual mapping should be done by the host kernel driver after calling the
> translation/pinning API vgpu_dma_do_translate.

Thanks for the reply. I mis-deleted the mail in my intel account, so
reply with private mail account, sorry for that.


In vgpu_dma_do_translate():

for (i = 0; i < count; i++) {
   {snip}
   dma_addr_t iova = gfn_buffer[i] << PAGE_SHIFT;
   vgpu_dma = vgpu_find_dma(vgpu_iommu, iova, 0 /*  size */);

remote_vaddr = vgpu_dma->vaddr + iova - vgpu_dma->iova;
if (get_user_pages_unlocked(NULL, mm, remote_vaddr, 1, 1, 0, page) == 1) {
pfn = page_to_pfn(page[0]);
}
gfn_buffer[i] = pfn;
}

If I understand correctly, the purpose of above code, is given an
array of gfns, try to pin & return associated pfns. There is still no
IOMMU mappings here.  Is it supposed to be the caller who should set
up IOMMU by DMA api such as dma_map_page(), after calling
vgpu_dma_do_translate()?


-- 
Thanks,
Jike



[Qemu-devel] broken socket events on win32 qemu

2016-03-06 Thread Andrew Baumann
Hi Daniel,

This commit ("char: convert from GIOChannel to QIOChannel"):
https://github.com/qemu/qemu/commit/9894dc0cdcc397ee5b26370bc53da6d360a363c2
... appears to have broken socket events for character devices on Win32. For 
example, I can no longer connect to a GDB stub (started with: "-gdb 
tcp:127.0.0.1:1234"), since tcp_chr_accept is never called.

Without having looked very closely at the code, I suspect the problem may be 
that we've lost the special-case treatment of socket handles as distinct from 
file descriptors on Win32 (they are different namespaces, and different APIs 
are needed). The previous version of qemu-char.c special-cased sockets in 
io_channel_from_socket():

-#ifdef _WIN32
-chan = g_io_channel_win32_new_socket(fd);
-#else
-chan = g_io_channel_unix_new(fd);
-#endif

... but I don't see anything equivalent in io/channel-socket.c. Am I looking in 
the wrong place?

BTW, The same change introduces another problem on win32: server sockets like 
the GDB example above fail on getpeername() with "Unable to query remote socket 
address: Unknown error". This seems to be caused by a definition of ENOTCONN 
that is not WSAENOTCONN. I'm still trying to figure out why that is, and how to 
best fix it.

Regards,
Andrew



Re: [Qemu-devel] [Xen-devel] RFC: configuring QEMU virtfs for Xen PV(H) guests

2016-03-06 Thread Juergen Gross
Hi Wei,

On 15/02/16 14:44, Wei Liu wrote:
> On Mon, Feb 15, 2016 at 02:33:05PM +0100, Juergen Gross wrote:
>> On 15/02/16 14:16, Wei Liu wrote:
>>> On Mon, Feb 15, 2016 at 09:07:13AM +, Paul Durrant wrote:
>
>>> [...]
> # Option 2: Invent a xen-9p device
>
> Another way of doing it is to expose a dummy xen-9p device, so that we
> can use -fsdev XXX -device xen-9p,YYY.  This simple device should be
> used to capture the parameters like mount_tag and fsdev_id, and then
> chained itself to a known location.  Later Xen transport can traverse
> this known location. This xen-9p device doesn't seem to fit well into
> the hierarchy. The best I can think of its parent should be
> TYPE_DEVICE.  In this case:
>
> 1. Toolstack arranges some xenstore entries.
> 2. Toolstack arranges command line options for QEMU:
>   -fsdev XXX -device xen-9p,XXX
> 3. QEMU starts up in xen-attach mode, scans xenstore for relevant
>entries, then traverses the known location.
>
> Downside: Inventing a dummy device looks suboptimal to me.
>>
>> Sorry, didn't notice this thread before.
>>
> 
> No need to be sorry. I posted this last Friday night. I wouldn't expect
> many replies on Monady.
> 
>> For Xen pvUSB backend in qemu I need a Xen system device acting as
>> parent for being able to attach/detach virtual USB busses.
>>
>> I haven't had time to update my patches for some time, but the patch
>> for this system device is rather easy. It could be used as a parent
>> of the xen-9p devices, too.
>>
>> I've attached the patch for reference.
>>
> 
> Thanks. I will have a look at your patch.

Did you have some time to look at the patch? I'm asking because I
finally found some time to start working on V2 of my qemu based pvUSB
backend. Stefano asked me to hide the system device in my backend and
I want to avoid that in case you are needing it, too.

Juergen



Re: [Qemu-devel] [PATCH v3 0/5] Deterministic replay extensions

2016-03-06 Thread dovgaluk

Ping?

Pavel Dovgalyuk

Pavel Dovgalyuk писал 2016-03-01 14:07:
This set of patches is related to the reverse execution and 
deterministic
replay of qemu execution. It includes recording and replaying of serial 
devices

and block devices operations.

With these patches one can record and deterministically replay behavior
of the system with connected disk drives and serial communication ports
(e.g., telnet terminal).

Patches for deterministic replay of the block devices intercept calls 
of

bdrv coroutine functions at the top of block drivers stack.
To record and replay block operations the drive must be configured
as following:
 -drive file=disk.qcow,if=none,id=img-direct
 -drive driver=blkreplay,if=none,image=img-direct,id=img-blkreplay
 -device ide-hd,drive=img-blkreplay

blkreplay driver should be inserted between disk image and virtual 
driver

controller. Therefore all disk requests may be recorded and replayed.

v3 changes:
 - introduced bdrv_flush callback for block drivers
 - introduced block driver for recording block operations (as
suggested by Kevin Wolf)
 - added documentation for block record/replay

v2 changes:
 - removed obsolete call of qemu_clock_warp
 - fixed record/replay of aio_cancel
 - simplified call sequence for blk_aio_ functions in non-replay mode
(as suggested by Kevin Wolf)

---

Pavel Dovgalyuk (5):
  replay: character devices
  icount: remove obsolete warp call
  replay: introduce new checkpoint for icount warp
  block: add flush callback
  replay: introduce block devices record/replay


 block/Makefile.objs   |2 -
 block/blkreplay.c |  156 
+

 block/io.c|6 ++
 cpus.c|   10 +--
 docs/replay.txt   |   20 ++
 gdbstub.c |2 -
 include/block/block_int.h |7 ++
 include/qemu/timer.h  |3 +
 include/sysemu/char.h |   26 
 include/sysemu/replay.h   |   15 
 main-loop.c   |2 -
 qemu-char.c   |   56 ++--
 qemu-timer.c  |2 -
 replay/Makefile.objs  |1
 replay/replay-char.c  |   97 
 replay/replay-events.c|   41 ++--
 replay/replay-internal.h  |   16 +
 replay/replay.c   |   25 +++
 stubs/clock-warp.c|2 -
 stubs/replay.c|4 +
 20 files changed, 469 insertions(+), 24 deletions(-)
 create mode 100755 block/blkreplay.c
 create mode 100755 replay/replay-char.c





[Qemu-devel] [PATCH 8/9] hw/arm: QOM'ify strongarm.c

2016-03-06 Thread xiaoqiang zhao
Drop the use of old SysBus init function and use instance_init

Signed-off-by: xiaoqiang zhao 
---
 hw/arm/strongarm.c | 66 +++---
 1 file changed, 28 insertions(+), 38 deletions(-)

diff --git a/hw/arm/strongarm.c b/hw/arm/strongarm.c
index 3b17a21..78f58c6 100644
--- a/hw/arm/strongarm.c
+++ b/hw/arm/strongarm.c
@@ -177,19 +177,18 @@ static const MemoryRegionOps strongarm_pic_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int strongarm_pic_initfn(SysBusDevice *sbd)
+static void strongarm_pic_initfn(Object *obj)
 {
-DeviceState *dev = DEVICE(sbd);
-StrongARMPICState *s = STRONGARM_PIC(dev);
+DeviceState *dev = DEVICE(obj);
+StrongARMPICState *s = STRONGARM_PIC(obj);
+SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
 qdev_init_gpio_in(dev, strongarm_pic_set_irq, SA_PIC_SRCS);
-memory_region_init_io(>iomem, OBJECT(s), _pic_ops, s,
+memory_region_init_io(>iomem, obj, _pic_ops, s,
   "pic", 0x1000);
 sysbus_init_mmio(sbd, >iomem);
 sysbus_init_irq(sbd, >irq);
 sysbus_init_irq(sbd, >fiq);
-
-return 0;
 }
 
 static int strongarm_pic_post_load(void *opaque, int version_id)
@@ -215,9 +214,7 @@ static VMStateDescription vmstate_strongarm_pic_regs = {
 static void strongarm_pic_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
-SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-k->init = strongarm_pic_initfn;
 dc->desc = "StrongARM PIC";
 dc->vmsd = _strongarm_pic_regs;
 }
@@ -226,6 +223,7 @@ static const TypeInfo strongarm_pic_info = {
 .name  = TYPE_STRONGARM_PIC,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(StrongARMPICState),
+.instance_init = strongarm_pic_initfn,
 .class_init= strongarm_pic_class_init,
 };
 
@@ -379,9 +377,10 @@ static const MemoryRegionOps strongarm_rtc_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int strongarm_rtc_init(SysBusDevice *dev)
+static void strongarm_rtc_init(Object *obj)
 {
-StrongARMRTCState *s = STRONGARM_RTC(dev);
+StrongARMRTCState *s = STRONGARM_RTC(obj);
+SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 struct tm tm;
 
 s->rttr = 0x0;
@@ -398,11 +397,9 @@ static int strongarm_rtc_init(SysBusDevice *dev)
 sysbus_init_irq(dev, >rtc_irq);
 sysbus_init_irq(dev, >rtc_hz_irq);
 
-memory_region_init_io(>iomem, OBJECT(s), _rtc_ops, s,
+memory_region_init_io(>iomem, obj, _rtc_ops, s,
   "rtc", 0x1);
 sysbus_init_mmio(dev, >iomem);
-
-return 0;
 }
 
 static void strongarm_rtc_pre_save(void *opaque)
@@ -441,9 +438,7 @@ static const VMStateDescription vmstate_strongarm_rtc_regs 
= {
 static void strongarm_rtc_sysbus_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
-SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-k->init = strongarm_rtc_init;
 dc->desc = "StrongARM RTC Controller";
 dc->vmsd = _strongarm_rtc_regs;
 }
@@ -452,6 +447,7 @@ static const TypeInfo strongarm_rtc_sysbus_info = {
 .name  = TYPE_STRONGARM_RTC,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(StrongARMRTCState),
+.instance_init = strongarm_rtc_init,
 .class_init= strongarm_rtc_sysbus_class_init,
 };
 
@@ -644,16 +640,17 @@ static DeviceState *strongarm_gpio_init(hwaddr base,
 return dev;
 }
 
-static int strongarm_gpio_initfn(SysBusDevice *sbd)
+static void strongarm_gpio_initfn(Object *obj)
 {
-DeviceState *dev = DEVICE(sbd);
-StrongARMGPIOInfo *s = STRONGARM_GPIO(dev);
+DeviceState *dev = DEVICE(obj);
+StrongARMGPIOInfo *s = STRONGARM_GPIO(obj);
+SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 int i;
 
 qdev_init_gpio_in(dev, strongarm_gpio_set, 28);
 qdev_init_gpio_out(dev, s->handler, 28);
 
-memory_region_init_io(>iomem, OBJECT(s), _gpio_ops, s,
+memory_region_init_io(>iomem, obj, _gpio_ops, s,
   "gpio", 0x1000);
 
 sysbus_init_mmio(sbd, >iomem);
@@ -661,8 +658,6 @@ static int strongarm_gpio_initfn(SysBusDevice *sbd)
 sysbus_init_irq(sbd, >irqs[i]);
 }
 sysbus_init_irq(sbd, >irqX);
-
-return 0;
 }
 
 static const VMStateDescription vmstate_strongarm_gpio_regs = {
@@ -685,9 +680,7 @@ static const VMStateDescription vmstate_strongarm_gpio_regs 
= {
 static void strongarm_gpio_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
-SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-k->init = strongarm_gpio_initfn;
 dc->desc = "StrongARM GPIO controller";
 dc->vmsd = _strongarm_gpio_regs;
 }
@@ -696,6 +689,7 @@ static const TypeInfo strongarm_gpio_info = {
 .name  = TYPE_STRONGARM_GPIO,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(StrongARMGPIOInfo),
+.instance_init = 

[Qemu-devel] [PATCH 7/9] hw/arm: QOM'ify stellaris.c

2016-03-06 Thread xiaoqiang zhao
* Drop the use of old SysBus init function and use instance_init
* Use DeviceClass::vmsd instead of 'vmstate_register' function

Signed-off-by: xiaoqiang zhao 
---
 hw/arm/stellaris.c | 48 
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/hw/arm/stellaris.c b/hw/arm/stellaris.c
index de8dbb2..cc0e0ae 100644
--- a/hw/arm/stellaris.c
+++ b/hw/arm/stellaris.c
@@ -315,23 +315,22 @@ static const VMStateDescription vmstate_stellaris_gptm = {
 }
 };
 
-static int stellaris_gptm_init(SysBusDevice *sbd)
+static void stellaris_gptm_init(Object *obj)
 {
-DeviceState *dev = DEVICE(sbd);
-gptm_state *s = STELLARIS_GPTM(dev);
+DeviceState *dev = DEVICE(obj);
+gptm_state *s = STELLARIS_GPTM(obj);
+SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
 sysbus_init_irq(sbd, >irq);
 qdev_init_gpio_out(dev, >trigger, 1);
 
-memory_region_init_io(>iomem, OBJECT(s), _ops, s,
+memory_region_init_io(>iomem, obj, _ops, s,
   "gptm", 0x1000);
 sysbus_init_mmio(sbd, >iomem);
 
 s->opaque[0] = s->opaque[1] = s;
 s->timer[0] = timer_new_ns(QEMU_CLOCK_VIRTUAL, gptm_tick, >opaque[0]);
 s->timer[1] = timer_new_ns(QEMU_CLOCK_VIRTUAL, gptm_tick, >opaque[1]);
-vmstate_register(dev, -1, _stellaris_gptm, s);
-return 0;
 }
 
 
@@ -872,23 +871,22 @@ static const VMStateDescription vmstate_stellaris_i2c = {
 }
 };
 
-static int stellaris_i2c_init(SysBusDevice *sbd)
+static void stellaris_i2c_init(Object *obj)
 {
-DeviceState *dev = DEVICE(sbd);
-stellaris_i2c_state *s = STELLARIS_I2C(dev);
+DeviceState *dev = DEVICE(obj);
+stellaris_i2c_state *s = STELLARIS_I2C(obj);
+SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 I2CBus *bus;
 
 sysbus_init_irq(sbd, >irq);
 bus = i2c_init_bus(dev, "i2c");
 s->bus = bus;
 
-memory_region_init_io(>iomem, OBJECT(s), _i2c_ops, s,
+memory_region_init_io(>iomem, obj, _i2c_ops, s,
   "i2c", 0x1000);
 sysbus_init_mmio(sbd, >iomem);
 /* ??? For now we only implement the master interface.  */
 stellaris_i2c_reset(s);
-vmstate_register(dev, -1, _stellaris_i2c, s);
-return 0;
 }
 
 /* Analogue to Digital Converter.  This is only partially implemented,
@@ -1159,23 +1157,22 @@ static const VMStateDescription vmstate_stellaris_adc = 
{
 }
 };
 
-static int stellaris_adc_init(SysBusDevice *sbd)
+static void stellaris_adc_init(Object *obj)
 {
-DeviceState *dev = DEVICE(sbd);
-stellaris_adc_state *s = STELLARIS_ADC(dev);
+DeviceState *dev = DEVICE(obj);
+stellaris_adc_state *s = STELLARIS_ADC(obj);
+SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 int n;
 
 for (n = 0; n < 4; n++) {
 sysbus_init_irq(sbd, >irq[n]);
 }
 
-memory_region_init_io(>iomem, OBJECT(s), _adc_ops, s,
+memory_region_init_io(>iomem, obj, _adc_ops, s,
   "adc", 0x1000);
 sysbus_init_mmio(sbd, >iomem);
 stellaris_adc_reset(s);
 qdev_init_gpio_in(dev, stellaris_adc_trigger, 1);
-vmstate_register(dev, -1, _stellaris_adc, s);
-return 0;
 }
 
 static
@@ -1424,43 +1421,46 @@ machine_init(stellaris_machine_init)
 
 static void stellaris_i2c_class_init(ObjectClass *klass, void *data)
 {
-SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
+DeviceClass *dc = DEVICE_CLASS(klass);
 
-sdc->init = stellaris_i2c_init;
+dc->vmsd = _stellaris_i2c;
 }
 
 static const TypeInfo stellaris_i2c_info = {
 .name  = TYPE_STELLARIS_I2C,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(stellaris_i2c_state),
+.instance_init = stellaris_i2c_init,
 .class_init= stellaris_i2c_class_init,
 };
 
 static void stellaris_gptm_class_init(ObjectClass *klass, void *data)
 {
-SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
+DeviceClass *dc = DEVICE_CLASS(klass);
 
-sdc->init = stellaris_gptm_init;
+dc->vmsd = _stellaris_gptm;
 }
 
 static const TypeInfo stellaris_gptm_info = {
 .name  = TYPE_STELLARIS_GPTM,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(gptm_state),
+.instance_init = stellaris_gptm_init,
 .class_init= stellaris_gptm_class_init,
 };
 
 static void stellaris_adc_class_init(ObjectClass *klass, void *data)
 {
-SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
+DeviceClass *dc = DEVICE_CLASS(klass);
 
-sdc->init = stellaris_adc_init;
+dc->vmsd = _stellaris_adc;
 }
 
 static const TypeInfo stellaris_adc_info = {
 .name  = TYPE_STELLARIS_ADC,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(stellaris_adc_state),
+.instance_init = stellaris_adc_init,
 .class_init= stellaris_adc_class_init,
 };
 
-- 
2.1.4





[Qemu-devel] [PATCH 3/9] hw/arm: QOM'ify integratorcp.c

2016-03-06 Thread xiaoqiang zhao
* Drop the use of old SysBus init function and use instance_init
* Remove the empty 'icp_pic_class_init' from Typeinfo

Signed-off-by: xiaoqiang zhao 
---
 hw/arm/integratorcp.c | 32 
 1 file changed, 12 insertions(+), 20 deletions(-)

diff --git a/hw/arm/integratorcp.c b/hw/arm/integratorcp.c
index c6656a8..782201a 100644
--- a/hw/arm/integratorcp.c
+++ b/hw/arm/integratorcp.c
@@ -239,9 +239,10 @@ static const MemoryRegionOps integratorcm_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int integratorcm_init(SysBusDevice *dev)
+static void integratorcm_init(Object *obj)
 {
-IntegratorCMState *s = INTEGRATOR_CM(dev);
+IntegratorCMState *s = INTEGRATOR_CM(obj);
+SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
 s->cm_osc = 0x0148;
 /* ??? What should the high bits of this value be?  */
@@ -266,17 +267,16 @@ static int integratorcm_init(SysBusDevice *dev)
 s->cm_init = 0x0112;
 s->cm_refcnt_offset = muldiv64(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL), 24,
1000);
-memory_region_init_ram(>flash, OBJECT(s), "integrator.flash", 0x10,
+memory_region_init_ram(>flash, obj, "integrator.flash", 0x10,
_fatal);
 vmstate_register_ram_global(>flash);
 
-memory_region_init_io(>iomem, OBJECT(s), _ops, s,
+memory_region_init_io(>iomem, obj, _ops, s,
   "integratorcm", 0x0080);
 sysbus_init_mmio(dev, >iomem);
 
 integratorcm_do_remap(s);
 /* ??? Save/restore.  */
-return 0;
 }
 
 /* Integrator/CP hardware emulation.  */
@@ -391,18 +391,18 @@ static const MemoryRegionOps icp_pic_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int icp_pic_init(SysBusDevice *sbd)
+static void icp_pic_init(Object *obj)
 {
-DeviceState *dev = DEVICE(sbd);
-icp_pic_state *s = INTEGRATOR_PIC(dev);
+DeviceState *dev = DEVICE(obj);
+icp_pic_state *s = INTEGRATOR_PIC(obj);
+SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
 qdev_init_gpio_in(dev, icp_pic_set_irq, 32);
 sysbus_init_irq(sbd, >parent_irq);
 sysbus_init_irq(sbd, >parent_fiq);
-memory_region_init_io(>iomem, OBJECT(s), _pic_ops, s,
+memory_region_init_io(>iomem, obj, _pic_ops, s,
   "icp-pic", 0x0080);
 sysbus_init_mmio(sbd, >iomem);
-return 0;
 }
 
 /* CP control registers.  */
@@ -627,9 +627,7 @@ static Property core_properties[] = {
 static void core_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
-SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-k->init = integratorcm_init;
 dc->props = core_properties;
 }
 
@@ -637,21 +635,15 @@ static const TypeInfo core_info = {
 .name  = TYPE_INTEGRATOR_CM,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(IntegratorCMState),
+.instance_init = integratorcm_init,
 .class_init= core_class_init,
 };
 
-static void icp_pic_class_init(ObjectClass *klass, void *data)
-{
-SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
-
-sdc->init = icp_pic_init;
-}
-
 static const TypeInfo icp_pic_info = {
 .name  = TYPE_INTEGRATOR_PIC,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(icp_pic_state),
-.class_init= icp_pic_class_init,
+.instance_init = icp_pic_init,
 };
 
 static const TypeInfo icp_ctrl_regs_info = {
-- 
2.1.4





[Qemu-devel] [PATCH 4/9] hw/arm: QOM'ify pxa2xx.c

2016-03-06 Thread xiaoqiang zhao
Drop the use of old SysBus init function and use instance_init

Signed-off-by: xiaoqiang zhao 
---
 hw/arm/pxa2xx.c | 26 +++---
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/hw/arm/pxa2xx.c b/hw/arm/pxa2xx.c
index ff6ac7a..8bdffca 100644
--- a/hw/arm/pxa2xx.c
+++ b/hw/arm/pxa2xx.c
@@ -1103,9 +1103,10 @@ static const MemoryRegionOps pxa2xx_rtc_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int pxa2xx_rtc_init(SysBusDevice *dev)
+static void pxa2xx_rtc_init(Object *obj)
 {
-PXA2xxRTCState *s = PXA2XX_RTC(dev);
+PXA2xxRTCState *s = PXA2XX_RTC(obj);
+SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 struct tm tm;
 int wom;
 
@@ -1134,11 +1135,9 @@ static int pxa2xx_rtc_init(SysBusDevice *dev)
 
 sysbus_init_irq(dev, >rtc_irq);
 
-memory_region_init_io(>iomem, OBJECT(s), _rtc_ops, s,
+memory_region_init_io(>iomem, obj, _rtc_ops, s,
   "pxa2xx-rtc", 0x1);
 sysbus_init_mmio(dev, >iomem);
-
-return 0;
 }
 
 static void pxa2xx_rtc_pre_save(void *opaque)
@@ -1191,9 +1190,7 @@ static const VMStateDescription vmstate_pxa2xx_rtc_regs = 
{
 static void pxa2xx_rtc_sysbus_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
-SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-k->init = pxa2xx_rtc_init;
 dc->desc = "PXA2xx RTC Controller";
 dc->vmsd = _pxa2xx_rtc_regs;
 }
@@ -1202,6 +1199,7 @@ static const TypeInfo pxa2xx_rtc_sysbus_info = {
 .name  = TYPE_PXA2XX_RTC,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(PXA2xxRTCState),
+.instance_init = pxa2xx_rtc_init,
 .class_init= pxa2xx_rtc_sysbus_class_init,
 };
 
@@ -1497,19 +1495,18 @@ PXA2xxI2CState *pxa2xx_i2c_init(hwaddr base,
 return s;
 }
 
-static int pxa2xx_i2c_initfn(SysBusDevice *sbd)
+static void pxa2xx_i2c_initfn(Object *obj)
 {
-DeviceState *dev = DEVICE(sbd);
-PXA2xxI2CState *s = PXA2XX_I2C(dev);
+DeviceState *dev = DEVICE(obj);
+PXA2xxI2CState *s = PXA2XX_I2C(obj);
+SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 
 s->bus = i2c_init_bus(dev, "i2c");
 
-memory_region_init_io(>iomem, OBJECT(s), _i2c_ops, s,
+memory_region_init_io(>iomem, obj, _i2c_ops, s,
   "pxa2xx-i2c", s->region_size);
 sysbus_init_mmio(sbd, >iomem);
 sysbus_init_irq(sbd, >irq);
-
-return 0;
 }
 
 I2CBus *pxa2xx_i2c_bus(PXA2xxI2CState *s)
@@ -1526,9 +1523,7 @@ static Property pxa2xx_i2c_properties[] = {
 static void pxa2xx_i2c_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
-SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-k->init = pxa2xx_i2c_initfn;
 dc->desc = "PXA2xx I2C Bus Controller";
 dc->vmsd = _pxa2xx_i2c;
 dc->props = pxa2xx_i2c_properties;
@@ -1538,6 +1533,7 @@ static const TypeInfo pxa2xx_i2c_info = {
 .name  = TYPE_PXA2XX_I2C,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(PXA2xxI2CState),
+.instance_init = pxa2xx_i2c_initfn,
 .class_init= pxa2xx_i2c_class_init,
 };
 
-- 
2.1.4





[Qemu-devel] [PATCH 5/9] hw/arm: QOM'ify pxa2xx_pic.c

2016-03-06 Thread xiaoqiang zhao
Remove the empty 'pxa2xx_pic_initfn' and it's
setup code in the 'pxa2xx_pic_class_init'

Signed-off-by: xiaoqiang zhao 
---
 hw/arm/pxa2xx_pic.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/hw/arm/pxa2xx_pic.c b/hw/arm/pxa2xx_pic.c
index 8a39b1c..9e08c55 100644
--- a/hw/arm/pxa2xx_pic.c
+++ b/hw/arm/pxa2xx_pic.c
@@ -308,17 +308,10 @@ static VMStateDescription vmstate_pxa2xx_pic_regs = {
 },
 };
 
-static int pxa2xx_pic_initfn(SysBusDevice *dev)
-{
-return 0;
-}
-
 static void pxa2xx_pic_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
-SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-k->init = pxa2xx_pic_initfn;
 dc->desc = "PXA2xx PIC";
 dc->vmsd = _pxa2xx_pic_regs;
 }
-- 
2.1.4





[Qemu-devel] [PATCH 9/9] hw/arm: QOM'ify versatilepb.c

2016-03-06 Thread xiaoqiang zhao
Drop the use of old SysBus init function and use instance_init

Signed-off-by: xiaoqiang zhao 
---
 hw/arm/versatilepb.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/hw/arm/versatilepb.c b/hw/arm/versatilepb.c
index d061f0f..a2411e3 100644
--- a/hw/arm/versatilepb.c
+++ b/hw/arm/versatilepb.c
@@ -150,10 +150,11 @@ static const MemoryRegionOps vpb_sic_ops = {
 .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-static int vpb_sic_init(SysBusDevice *sbd)
+static void vpb_sic_init(Object *obj)
 {
-DeviceState *dev = DEVICE(sbd);
-vpb_sic_state *s = VERSATILE_PB_SIC(dev);
+DeviceState *dev = DEVICE(obj);
+vpb_sic_state *s = VERSATILE_PB_SIC(obj);
+SysBusDevice *sbd = SYS_BUS_DEVICE(obj);
 int i;
 
 qdev_init_gpio_in(dev, vpb_sic_set_irq, 32);
@@ -161,10 +162,9 @@ static int vpb_sic_init(SysBusDevice *sbd)
 sysbus_init_irq(sbd, >parent[i]);
 }
 s->irq = 31;
-memory_region_init_io(>iomem, OBJECT(s), _sic_ops, s,
+memory_region_init_io(>iomem, obj, _sic_ops, s,
   "vpb-sic", 0x1000);
 sysbus_init_mmio(sbd, >iomem);
-return 0;
 }
 
 /* Board init.  */
@@ -424,9 +424,7 @@ machine_init(versatile_machine_init)
 static void vpb_sic_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
-SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-k->init = vpb_sic_init;
 dc->vmsd = _vpb_sic;
 }
 
@@ -434,6 +432,7 @@ static const TypeInfo vpb_sic_info = {
 .name  = TYPE_VERSATILE_PB_SIC,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(vpb_sic_state),
+.instance_init = vpb_sic_init,
 .class_init= vpb_sic_class_init,
 };
 
-- 
2.1.4





[Qemu-devel] [PATCH 0/9] some QOM'ify work under hw/arm

2016-03-06 Thread xiaoqiang zhao
This patch set trying to QOM'ify code under hw/arm directory.
As previous patches to hw/timer/*, we use instance_init instead of
the SysBus's init function. 


xiaoqiang zhao (9):
  hw/arm: QOM'ify armv7m.c
  hw/arm: QOM'ify highbank.c
  hw/arm: QOM'ify integratorcp.c
  hw/arm: QOM'ify pxa2xx.c
  hw/arm: QOM'ify pxa2xx_pic.c
  hw/arm: QOM'ify spitz.c
  hw/arm: QOM'ify stellaris.c
  hw/arm: QOM'ify strongarm.c
  hw/arm: QOM'ify versatilepb.c

 hw/arm/armv7m.c   | 11 -
 hw/arm/highbank.c | 12 --
 hw/arm/integratorcp.c | 32 ++---
 hw/arm/pxa2xx.c   | 26 +---
 hw/arm/pxa2xx_pic.c   |  7 --
 hw/arm/spitz.c| 23 +++---
 hw/arm/stellaris.c| 48 ++---
 hw/arm/strongarm.c| 66 ++-
 hw/arm/versatilepb.c  | 13 +-
 9 files changed, 100 insertions(+), 138 deletions(-)

-- 
2.1.4





[Qemu-devel] [PATCH 2/9] hw/arm: QOM'ify highbank.c

2016-03-06 Thread xiaoqiang zhao
Drop the use of old SysBus init function and use instance_init

Signed-off-by: xiaoqiang zhao 
---
 hw/arm/highbank.c | 12 +---
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/hw/arm/highbank.c b/hw/arm/highbank.c
index e25cf5e..8f38dff 100644
--- a/hw/arm/highbank.c
+++ b/hw/arm/highbank.c
@@ -167,23 +167,20 @@ static void highbank_regs_reset(DeviceState *dev)
 s->regs[0x43] = 0x05F40121;
 }
 
-static int highbank_regs_init(SysBusDevice *dev)
+static void highbank_regs_init(Object *obj)
 {
-HighbankRegsState *s = HIGHBANK_REGISTERS(dev);
+HighbankRegsState *s = HIGHBANK_REGISTERS(obj);
+SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-memory_region_init_io(>iomem, OBJECT(s), _mem_ops, s->regs,
+memory_region_init_io(>iomem, obj, _mem_ops, s->regs,
   "highbank_regs", 0x1000);
 sysbus_init_mmio(dev, >iomem);
-
-return 0;
 }
 
 static void highbank_regs_class_init(ObjectClass *klass, void *data)
 {
-SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
 DeviceClass *dc = DEVICE_CLASS(klass);
 
-sbc->init = highbank_regs_init;
 dc->desc = "Calxeda Highbank registers";
 dc->vmsd = _highbank_regs;
 dc->reset = highbank_regs_reset;
@@ -193,6 +190,7 @@ static const TypeInfo highbank_regs_info = {
 .name  = TYPE_HIGHBANK_REGISTERS,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(HighbankRegsState),
+.instance_init = highbank_regs_init,
 .class_init= highbank_regs_class_init,
 };
 
-- 
2.1.4





[Qemu-devel] [PATCH 1/9] hw/arm: QOM'ify armv7m.c

2016-03-06 Thread xiaoqiang zhao
Drop the use of old SysBus init function and use instance_init

Signed-off-by: xiaoqiang zhao 
---
 hw/arm/armv7m.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/hw/arm/armv7m.c b/hw/arm/armv7m.c
index ed7d97f..139247e 100644
--- a/hw/arm/armv7m.c
+++ b/hw/arm/armv7m.c
@@ -129,14 +129,14 @@ typedef struct {
 uint32_t base;
 } BitBandState;
 
-static int bitband_init(SysBusDevice *dev)
+static void bitband_init(Object *obj)
 {
-BitBandState *s = BITBAND(dev);
+BitBandState *s = BITBAND(obj);
+SysBusDevice *dev = SYS_BUS_DEVICE(obj);
 
-memory_region_init_io(>iomem, OBJECT(s), _ops, >base,
+memory_region_init_io(>iomem, obj, _ops, >base,
   "bitband", 0x0200);
 sysbus_init_mmio(dev, >iomem);
-return 0;
 }
 
 static void armv7m_bitband_init(void)
@@ -241,9 +241,7 @@ static Property bitband_properties[] = {
 static void bitband_class_init(ObjectClass *klass, void *data)
 {
 DeviceClass *dc = DEVICE_CLASS(klass);
-SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
 
-k->init = bitband_init;
 dc->props = bitband_properties;
 }
 
@@ -251,6 +249,7 @@ static const TypeInfo bitband_info = {
 .name  = TYPE_BITBAND,
 .parent= TYPE_SYS_BUS_DEVICE,
 .instance_size = sizeof(BitBandState),
+.instance_init = bitband_init,
 .class_init= bitband_class_init,
 };
 
-- 
2.1.4





Re: [Qemu-devel] [PATCHv9 09/10] qapi-schema, qemu-options & slirp: Adding Qemu options for IPv6 addresses

2016-03-06 Thread Jason Wang


On 02/23/2016 03:28 AM, Samuel Thibault wrote:
> From: Yann Bordenave 
>
> This patch adds parameters to manage some new options in the qemu -net
> command.
> Slirp IPv6 address, network prefix, and DNS IPv6 address can be given in
> argument to the qemu command.
> Defaults parameters are respectively fec0::2, fec0::, /64 and fec0::3.
>
> Signed-off-by: Yann Bordenave 
> Signed-off-by: Samuel Thibault 
> ---
>  net/net.c| 31 +
>  net/slirp.c  | 69 
> +++-
>  qapi-schema.json | 40 
>  qemu-options.hx  | 18 +--
>  slirp/libslirp.h |  8 ---
>  slirp/slirp.c| 16 +++--
>  6 files changed, 150 insertions(+), 32 deletions(-)
>
> diff --git a/net/net.c b/net/net.c
> index aebf753..d67904f 100644

[...]

>  
>  while (slirp_configs) {
> diff --git a/qapi-schema.json b/qapi-schema.json
> index 8d04897..a62c2ec 100644
> --- a/qapi-schema.json
> +++ b/qapi-schema.json
> @@ -2399,6 +2399,14 @@
>  # @dnssearch: #optional list of DNS suffixes to search, passed as DHCP option
>  # to the guest
>  #
> +# @ip6-prefix: #optional IPv6 network prefix (default is fec0::) (since 2.6)
> +#
> +# @ip6-prefixlen: #optional IPv6 network prefix length (default is 64) 
> (since 2.6)
> +#
> +# @ip6-host: #optional guest-visible IPv6 address of the host (since 2.6)
> +#
> +# @ip6-dns: #optional guest-visible IPv6 address of the virtual nameserver 
> (since 2.6)
> +#
>  # @smb: #optional root directory of the built-in SMB server
>  #
>  # @smbserver: #optional IP address of the built-in SMB server
> @@ -2412,20 +2420,24 @@
>  ##
>  { 'struct': 'NetdevUserOptions',
>'data': {
> -'*hostname':  'str',
> -'*restrict':  'bool',
> -'*ip':'str',
> -'*net':   'str',
> -'*host':  'str',
> -'*tftp':  'str',
> -'*bootfile':  'str',
> -'*dhcpstart': 'str',
> -'*dns':   'str',
> -'*dnssearch': ['String'],
> -'*smb':   'str',
> -'*smbserver': 'str',
> -'*hostfwd':   ['String'],
> -'*guestfwd':  ['String'] } }
> +'*hostname':'str',
> +'*restrict':'bool',
> +'*ip':  'str',
> +'*net': 'str',
> +'*host':'str',
> +'*tftp':'str',
> +'*bootfile':'str',
> +'*dhcpstart':   'str',
> +'*dns': 'str',
> +'*dnssearch':   ['String'],
> +'*ip6-prefix':  'str',
> +'*ip6-prefixlen':   'int',
> +'*ip6-host':'str',
> +'*ip6-dns': 'str',
> +'*smb': 'str',
> +'*smbserver':   'str',
> +'*hostfwd': ['String'],
> +'*guestfwd':['String'] } }

Please do unnecessary changes like this since it will cause extra effort
for git blame and other things.




Re: [Qemu-devel] [PATCHv9 06/10] slirp: Reindent after refactoring

2016-03-06 Thread Jason Wang


On 02/23/2016 03:28 AM, Samuel Thibault wrote:
> From: Guillaume Subiron 
>
> No code change.
>
> Signed-off-by: Guillaume Subiron 
> Signed-off-by: Samuel Thibault 
> Reviewed-by: Thomas Huth 
> ---
>  slirp/tcp_input.c  | 92 
> +++---
>  slirp/tcp_output.c | 29 +
>  slirp/tcp_subr.c   | 50 ++---

How about other files in slirp/ ?



Re: [Qemu-devel] [PATCHv9 03/10] slirp: Adding IPv6 UDP support

2016-03-06 Thread Jason Wang


On 02/23/2016 03:28 AM, Samuel Thibault wrote:
> From: Guillaume Subiron 
>
> This adds the sin6 case in the fhost and lhost unions and related macros.
> It adds udp6_input() and udp6_output().
> It adds the IPv6 case in sorecvfrom().
> Finally, udp_input() is called by ip6_input().
>
> Signed-off-by: Guillaume Subiron 
> Signed-off-by: Samuel Thibault 
> Reviewed-by: Thomas Huth 
> ---
>  slirp/Makefile.objs |   2 +-
>  slirp/ip6_input.c   |   2 +-
>  slirp/socket.c  |   5 ++
>  slirp/socket.h  |   6 ++
>  slirp/udp.h |   5 ++
>  slirp/udp6.c| 157 
> 
>  6 files changed, 175 insertions(+), 2 deletions(-)
>  create mode 100644 slirp/udp6.c
>
> diff --git a/slirp/Makefile.objs b/slirp/Makefile.objs
> index 4e3a289..6748e4f 100644
> --- a/slirp/Makefile.objs
> +++ b/slirp/Makefile.objs
> @@ -1,5 +1,5 @@
>  common-obj-y = cksum.o if.o ip_icmp.o ip6_icmp.o ip6_input.o ip6_output.o \
> ip_input.o ip_output.o dnssearch.o
>  common-obj-y += slirp.o mbuf.o misc.o sbuf.o socket.o tcp_input.o 
> tcp_output.o
> -common-obj-y += tcp_subr.o tcp_timer.o udp.o bootp.o tftp.o arp_table.o \
> +common-obj-y += tcp_subr.o tcp_timer.o udp.o udp6.o bootp.o tftp.o 
> arp_table.o \
>  ndp_table.o
> diff --git a/slirp/ip6_input.c b/slirp/ip6_input.c
> index b6a438d..9ca6d32 100644
> --- a/slirp/ip6_input.c
> +++ b/slirp/ip6_input.c
> @@ -58,7 +58,7 @@ void ip6_input(struct mbuf *m)
>  icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE);
>  break;
>  case IPPROTO_UDP:
> -icmp6_send_error(m, ICMP6_UNREACH, ICMP6_UNREACH_NO_ROUTE);
> +udp6_input(m);
>  break;
>  case IPPROTO_ICMPV6:
>  icmp6_input(m);
> diff --git a/slirp/socket.c b/slirp/socket.c
> index 32b1ba3..b79ddec 100644
> --- a/slirp/socket.c
> +++ b/slirp/socket.c
> @@ -541,7 +541,12 @@ sorecvfrom(struct socket *so)
>  (struct sockaddr_in *) ,
>  so->so_iptos);
>   break;
> + case AF_INET6:
> + udp6_output(so, m, (struct sockaddr_in6 *) ,
> + (struct sockaddr_in6 *) );
> + break;
>   default:
> + g_assert_not_reached();
>   break;
>   }
> } /* rx error */
> diff --git a/slirp/socket.h b/slirp/socket.h
> index bcebce1..e9c9b05 100644
> --- a/slirp/socket.h
> +++ b/slirp/socket.h
> @@ -34,17 +34,23 @@ struct socket {
>union {   /* foreign host */
>struct sockaddr_storage ss;
>struct sockaddr_in sin;
> +  struct sockaddr_in6 sin6;
>} fhost;
>  #define so_faddr fhost.sin.sin_addr
>  #define so_fport fhost.sin.sin_port
> +#define so_faddr6 fhost.sin6.sin6_addr
> +#define so_fport6 fhost.sin6.sin6_port
>  #define so_ffamily fhost.ss.ss_family
>  
>union {   /* local host */
>struct sockaddr_storage ss;
>struct sockaddr_in sin;
> +  struct sockaddr_in6 sin6;
>} lhost;
>  #define so_laddr lhost.sin.sin_addr
>  #define so_lport lhost.sin.sin_port
> +#define so_laddr6 lhost.sin6.sin6_addr
> +#define so_lport6 lhost.sin6.sin6_port
>  #define so_lfamily lhost.ss.ss_family
>  
>uint8_tso_iptos;   /* Type of service */
> diff --git a/slirp/udp.h b/slirp/udp.h
> index 2f9de38..10cc780 100644
> --- a/slirp/udp.h
> +++ b/slirp/udp.h
> @@ -83,4 +83,9 @@ struct socket * udp_listen(Slirp *, uint32_t, u_int, 
> uint32_t, u_int,
>  int udp_output(struct socket *so, struct mbuf *m,
>  struct sockaddr_in *saddr, struct sockaddr_in *daddr,
>  int iptos);
> +
> +void udp6_input(register struct mbuf *);
> +int udp6_output(struct socket *so, struct mbuf *m,
> +struct sockaddr_in6 *saddr, struct sockaddr_in6 *daddr);
> +
>  #endif
> diff --git a/slirp/udp6.c b/slirp/udp6.c
> new file mode 100644
> index 000..8952de2
> --- /dev/null
> +++ b/slirp/udp6.c
> @@ -0,0 +1,157 @@
> +/*
> + * Copyright (c) 2013
> + * Guillaume Subiron
> + */
> +
> +#include "slirp.h"
> +#include "qemu/osdep.h"
> +#include "udp.h"
> +
> +void udp6_input(struct mbuf *m)
> +{
> +Slirp *slirp = m->slirp;
> +struct ip6 *ip, save_ip;
> +struct udphdr *uh;
> +int hlen = sizeof(struct ip6);
> +int len;
> +struct socket *so;
> +struct sockaddr_in6 lhost;
> +
> +DEBUG_CALL("udp6_input");
> +DEBUG_ARG("m = %lx", (long)m);
> +
> +if (slirp->restricted) {
> +goto bad;
> +}
> +
> +ip = mtod(m, struct ip6 *);
> +m->m_len -= hlen;
> +m->m_data += hlen;
> +uh = mtod(m, struct udphdr *);
> +m->m_len += hlen;
> +m->m_data -= hlen;
> +
> +if (ip6_cksum(m)) {
> +goto bad;
> +}
> +
> +len = ntohs((uint16_t)uh->uh_ulen);
> +
> +/*
> + * Make mbuf data length reflect UDP length.
> + * If not enough data to 

Re: [Qemu-devel] [PATCHv9 02/10] slirp: Adding ICMPv6 error sending

2016-03-06 Thread Jason Wang


On 02/23/2016 03:28 AM, Samuel Thibault wrote:
> From: Yann Bordenave 
>
> Disambiguation : icmp_error is renamed into icmp_send_error, since it
> doesn't manage errors, but only sends ICMP Error messages.
>
> Adding icmp6_send_error to send ICMPv6 Error messages. This function is
> simpler than the v4 version.
> Adding some calls in various functions to send ICMP errors, when a
> received packet is too big, or when its hop limit is 0.

Let's split this into two patches to avoid mixing renaming and new
function introduction.



Re: [Qemu-devel] [PATCHv9 01/10] slirp: Adding IPv6, ICMPv6 Echo and NDP autoconfiguration

2016-03-06 Thread Jason Wang


On 02/23/2016 03:28 AM, Samuel Thibault wrote:
> From: Guillaume Subiron 
>
> This patch adds the functions needed to handle IPv6 packets. ICMPv6 and
> NDP headers are implemented.
>
> Slirp is now able to send NDP Router or Neighbor Advertisement when it
> receives Router or Neighbor Solicitation. Using a 64bit-sized IPv6
> prefix, the guest is now able to perform stateless autoconfiguration
> (SLAAC) and to compute its IPv6 address.
>
> This patch adds an ndp_table, mainly inspired by arp_table, to keep an
> NDP cache and manage network address resolution.
> Slirp regularly sends NDP Neighbor Advertisement, as recommended by the
> RFC, to make the guest refresh its route.
>
> This also adds ip6_cksum() to compute ICMPv6 checksums using IPv6
> pseudo-header.
>
> Some #define ETH_* are moved upper in slirp.h to make them accessible to
> other slirp/*.h
>
> Signed-off-by: Guillaume Subiron 
> Signed-off-by: Samuel Thibault 
> Reviewed-by: Thomas Huth 
> ---
>  slirp/Makefile.objs |   6 +-
>  slirp/cksum.c   |  25 
>  slirp/if.c  |   2 +-
>  slirp/ip6.h | 138 +
>  slirp/ip6_icmp.c| 347 
> 
>  slirp/ip6_icmp.h| 203 ++
>  slirp/ip6_input.c   |  69 +++
>  slirp/ip6_output.c  |  38 ++
>  slirp/ndp_table.c   |  84 +
>  slirp/slirp.c   |  51 +++-
>  slirp/slirp.h   |  37 ++
>  slirp/socket.h  |   7 ++
>  12 files changed, 1001 insertions(+), 6 deletions(-)
>  create mode 100644 slirp/ip6.h
>  create mode 100644 slirp/ip6_icmp.c
>  create mode 100644 slirp/ip6_icmp.h
>  create mode 100644 slirp/ip6_input.c
>  create mode 100644 slirp/ip6_output.c
>  create mode 100644 slirp/ndp_table.c
>

[...]

> +
> +/*
> + * Switch out to protocol's input routine.
> + */
> +switch (ip6->ip_nh) {
> +#if 0
> +case IPPROTO_TCP:
> +tcp_input(m, hlen, (struct socket *)NULL);
> +break;
> +case IPPROTO_UDP:
> +udp_input(m, hlen);
> +break;
> +#endif

This looks odd, why need this?





Re: [Qemu-devel] [RFC qemu 0/4] A PV solution for live migration optimization

2016-03-06 Thread Li, Liang Z
> > No. And it's exactly what I mean. The ballooned memory is still
> > processed during live migration without skipping. The live migration code is
> in migration/ram.c.
> 
> So if guest acknowledged VIRTIO_BALLOON_F_MUST_TELL_HOST, we can
> teach qemu to skip these pages.
> Want to write a patch to do this?
> 

Yes, we really can teach qemu to skip these pages and it's not hard.  
The problem is the poor performance, this PV solution is aimed to make it more
efficient and reduce the performance impact on guest.

> > >
> > > > > > The only advantage of ' inflating the balloon before live
> > > > > > migration' is simple,
> > > > > nothing more.
> > > > >
> > > > > That's a big advantage.  Another one is that it does something
> > > > > useful in real- world scenarios.
> > > > >
> > > >
> > > > I don't think the heave performance impaction is something useful
> > > > in real
> > > world scenarios.
> > > >
> > > > Liang
> > > > > Roman.
> > >
> > > So fix the performance then. You will have to try harder if you want
> > > to convince people that the performance is due to bad host/guest
> > > interface, and so we have to change *that*.
> > >
> >
> > Actually, the PV solution is irrelevant with the balloon mechanism, I
> > just use it to transfer information between host and guest.
> > I am not sure if I should implement a new virtio device, and I want to
> > get the answer from the community.
> > In this RFC patch, to make things simple, I choose to extend the
> > virtio-balloon and use the extended interface to transfer the request and
> free_page_bimap content.
> >
> > I am not intend to change the current virtio-balloon implementation.
> >
> > Liang
> 
> And the answer would depend on the answer to my question above.
> Does balloon need an interface passing page bitmaps around?

Yes, I need a new interface.

> Does this speed up any operations?

No, a new interface will not speed up anything, but it is the easiest way to 
solve the compatibility issue.

> OTOH what if you use the regular balloon interface with your patches?
>

The regular balloon interfaces have their specific function and I can't use 
them in my patches.
If using these regular interface, I have to do a lot of changes to keep the 
compatibility. 

> 
> > > --
> > > MST



Re: [Qemu-devel] [PATCHv9 0/10] slirp: Adding IPv6 support to Qemu -net user mode

2016-03-06 Thread Jason Wang


On 02/23/2016 03:28 AM, Samuel Thibault wrote:
> Hello,
>
> This is another respin of IPv6 in Qemu -net user mode.
>
>
> These patches add ICMPv6, NDP, make UDP and TCP compatible with IPv6, and add
> TFTP over IPv6.
>

Thanks a lot for the patches. Looks good overall, but see some issues:

- the series does not build on w32:
In file included from ./slirp/slirp.h:126:0,
 from stubs/slirp.c:3:
./slirp/ip6.h:9:24: fatal error: netinet/in.h: No such file or directory
compilation terminated.
- Lots of checkpatch warnings, let's try to silent it.
- The patches do not apply to master cleanly.
- I expects a unit-test for this. You may want to have a look at the
pxe-test in tests/, I think it could be extended to test ipv6 slirp somehow.

And also some nits, see individual patches.

> Difference with version 8 is:
> - Fix freeing random number generator
> - Fix coding style
> - Fix prefix lengths in in6_equal_dns macro
>
> Here is a summary of the patches:
> Guillaume Subiron (7):
>   slirp: Adding IPv6, ICMPv6 Echo and NDP autoconfiguration
>   slirp: Adding IPv6 UDP support
>   slirp: Factorizing tcpiphdr structure with an union
>   slirp: Generalizing and neutralizing various TCP functions before
> adding IPv6 stuff
>   slirp: Reindent after refactoring
>   slirp: Handle IPv6 in TCP functions
>   slirp: Adding IPv6 address for DNS relay
>
> Thomas Huth (1):
>   slirp: Add IPv6 support to the TFTP code
>
> Yann Bordenave (2):
>   slirp: Adding ICMPv6 error sending
>   qapi-schema, qemu-options & slirp: Adding Qemu options for IPv6
> addresses
>
>  net/net.c   |  31 
>  net/slirp.c |  69 -
>  qapi-schema.json|  40 +++--
>  qemu-options.hx |  18 ++-
>  slirp/Makefile.objs |   6 +-
>  slirp/cksum.c   |  25 
>  slirp/if.c  |   2 +-
>  slirp/if.h  |   4 +-
>  slirp/ip6.h | 142 ++
>  slirp/ip6_icmp.c| 411 
> 
>  slirp/ip6_icmp.h| 213 +++
>  slirp/ip6_input.c   |  73 ++
>  slirp/ip6_output.c  |  38 +
>  slirp/ip_icmp.c |  12 +-
>  slirp/ip_icmp.h |   4 +-
>  slirp/ip_input.c|  10 +-
>  slirp/libslirp.h|   8 +-
>  slirp/mbuf.c|   4 +-
>  slirp/ndp_table.c   |  84 +++
>  slirp/slirp.c   |  81 +--
>  slirp/slirp.h   |  43 +-
>  slirp/socket.c  |  54 ++-
>  slirp/socket.h  |  13 ++
>  slirp/tcp.h |   2 +
>  slirp/tcp_input.c   | 181 ---
>  slirp/tcp_output.c  |  51 +--
>  slirp/tcp_subr.c| 114 +++
>  slirp/tcp_timer.c   |   3 +-
>  slirp/tcpip.h   |  40 -
>  slirp/tftp.c| 133 +
>  slirp/tftp.h|   7 +-
>  slirp/udp.c |  19 ++-
>  slirp/udp.h |   5 +
>  slirp/udp6.c| 167 +
>  34 files changed, 1877 insertions(+), 230 deletions(-)
>  create mode 100644 slirp/ip6.h
>  create mode 100644 slirp/ip6_icmp.c
>  create mode 100644 slirp/ip6_icmp.h
>  create mode 100644 slirp/ip6_input.c
>  create mode 100644 slirp/ip6_output.c
>  create mode 100644 slirp/ndp_table.c
>  create mode 100644 slirp/udp6.c
>




Re: [Qemu-devel] [RFC PATCH v1 08/10] spapr: CPU hotplug support

2016-03-06 Thread Bharata B Rao
On Mon, Mar 07, 2016 at 02:49:06PM +1100, David Gibson wrote:
> On Fri, Mar 04, 2016 at 12:24:19PM +0530, Bharata B Rao wrote:
> > Set up device tree entries for the hotplugged CPU core and use the
> > exising EPOW event infrastructure to send CPU hotplug notification to
> > the guest.
> > 
> > Signed-off-by: Bharata B Rao 
> > ---
> >  hw/ppc/spapr.c  | 73 
> > -
> >  hw/ppc/spapr_cpu_core.c | 60 +
> >  hw/ppc/spapr_events.c   |  3 ++
> >  hw/ppc/spapr_rtas.c | 24 ++
> >  include/hw/ppc/spapr.h  |  4 +++
> >  include/hw/ppc/spapr_cpu_core.h |  2 ++
> >  6 files changed, 165 insertions(+), 1 deletion(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index 5acb612..6c4ac50 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -603,6 +603,18 @@ static void spapr_populate_cpu_dt(CPUState *cs, void 
> > *fdt, int offset,
> >  size_t page_sizes_prop_size;
> >  uint32_t vcpus_per_socket = smp_threads * smp_cores;
> >  uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> > +sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> > +sPAPRDRConnector *drc;
> > +sPAPRDRConnectorClass *drck;
> > +int drc_index;
> > +
> > +if (smc->dr_cpu_enabled) {
> > +drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index);
> > +g_assert(drc);
> > +drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> > +drc_index = drck->get_index(drc);
> > +_FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", 
> > drc_index)));
> > +}
> >  
> >  /* Note: we keep CI large pages off for now because a 64K capable guest
> >   * provisioned with large pages might otherwise try to map a qemu
> > @@ -987,6 +999,16 @@ static void spapr_finalize_fdt(sPAPRMachineState 
> > *spapr,
> >  _FDT(spapr_drc_populate_dt(fdt, 0, NULL, 
> > SPAPR_DR_CONNECTOR_TYPE_LMB));
> >  }
> >  
> > +if (smc->dr_cpu_enabled) {
> > +int offset = fdt_path_offset(fdt, "/cpus");
> > +ret = spapr_drc_populate_dt(fdt, offset, NULL,
> > +SPAPR_DR_CONNECTOR_TYPE_CPU);
> > +if (ret < 0) {
> > +error_report("Couldn't set up CPU DR device tree properties");
> > +exit(1);
> > +}
> > +}
> > +
> >  _FDT((fdt_pack(fdt)));
> >  
> >  if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
> > @@ -1181,7 +1203,7 @@ static void ppc_spapr_reset(void)
> >  
> >  }
> >  
> > -static void spapr_cpu_reset(void *opaque)
> > +void spapr_cpu_reset(void *opaque)
> >  {
> >  sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >  PowerPCCPU *cpu = opaque;
> > @@ -1622,6 +1644,8 @@ static void spapr_boot_set(void *opaque, const char 
> > *boot_device,
> >  void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error 
> > **errp)
> >  {
> >  CPUPPCState *env = >env;
> > +CPUState *cs = CPU(cpu);
> > +int i;
> >  
> >  /* Set time-base frequency to 512 MHz */
> >  cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> > @@ -1646,6 +1670,14 @@ void spapr_cpu_init(sPAPRMachineState *spapr, 
> > PowerPCCPU *cpu, Error **errp)
> >  }
> >  }
> >  
> > +/* Set NUMA node for the added CPUs  */
> > +for (i = 0; i < nb_numa_nodes; i++) {
> > +if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> > +cs->numa_node = i;
> > +break;
> > +}
> > +}
> > +
> 
> This hunk seems like it belongs in a different patch.

It appears that this would be needed by other archs also to set the
NUMA node for the hot-plugged CPU. How about make an API out of this
and use this something like below ? Igor ?

---
diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 0aeefd2..8347234 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -1112,6 +1112,7 @@ void pc_hot_add_cpu(const int64_t id, Error **errp)
 error_propagate(errp, local_err);
 return;
 }
+numa_set_cpu(CPU(cpu));
 object_unref(OBJECT(cpu));
 }
 
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index a42f8c0..f2b3b67 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1645,7 +1645,6 @@ void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU 
*cpu, Error **errp)
 {
 CPUPPCState *env = >env;
 CPUState *cs = CPU(cpu);
-int i;
 
 /* Set time-base frequency to 512 MHz */
 cpu_ppc_tb_init(env, TIMEBASE_FREQ);
@@ -1671,12 +1670,7 @@ void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU 
*cpu, Error **errp)
 }
 
 /* Set NUMA node for the added CPUs  */
-for (i = 0; i < nb_numa_nodes; i++) {
-if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
-cs->numa_node = i;
-break;
-}
-}
+numa_set_cpu(cs);
 
 xics_cpu_setup(spapr->icp, cpu);

Re: [Qemu-devel] [PATCH v4 07/26] crypto: add support for the serpent cipher algorithm

2016-03-06 Thread Fam Zheng
On Mon, 02/29 12:00, Daniel P. Berrange wrote:
> New cipher algorithms 'serpent-128', 'serpent-192' and
> 'serpent-256' are defined for the Serpent algorithm.
> 
> The nettle and gcrypt cipher backends are updated to
> support the new cipher and a test vector added to the
> cipher test suite. The new algorithm is enabled in the
> LUKS block encryption driver.
> 
> Signed-off-by: Daniel P. Berrange 

Reviewed-by: Fam Zheng 



Re: [Qemu-devel] [PATCH v4 06/26] crypto: add support for the cast5-128 cipher algorithm

2016-03-06 Thread Fam Zheng
On Mon, 02/29 12:00, Daniel P. Berrange wrote:
> A new cipher algorithm 'cast-5-128' is defined for the
> Cast-5 algorithm with 128 bit key size. Smaller key sizes
> are supported by Cast-5, but nothing in QEMU should use
> them, so only 128 bit keys are permitted.
> 
> The nettle and gcrypt cipher backends are updated to
> support the new cipher and a test vector added to the
> cipher test suite. The new algorithm is enabled in the
> LUKS block encryption driver.
> 
> Signed-off-by: Daniel P. Berrange 

Reviewed-by: Fam Zheng 



Re: [Qemu-devel] [PATCH v4 05/26] crypto: skip testing of unsupported cipher algorithms

2016-03-06 Thread Fam Zheng
On Mon, 02/29 12:00, Daniel P. Berrange wrote:
> We don't guarantee that all crypto backends will support
> all cipher algorithms, so we should skip tests unless
> the crypto backend indicates support.
> 
> Signed-off-by: Daniel P. Berrange 
> ---
>  tests/test-crypto-cipher.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
> 
> diff --git a/tests/test-crypto-cipher.c b/tests/test-crypto-cipher.c
> index 9f912ec..7a073e9 100644
> --- a/tests/test-crypto-cipher.c
> +++ b/tests/test-crypto-cipher.c
> @@ -380,7 +380,9 @@ int main(int argc, char **argv)
>  g_assert(qcrypto_init(NULL) == 0);
>  
>  for (i = 0; i < G_N_ELEMENTS(test_data); i++) {
> -g_test_add_data_func(test_data[i].path, _data[i], test_cipher);
> +if (qcrypto_cipher_supports(test_data[i].alg)) {
> +g_test_add_data_func(test_data[i].path, _data[i], 
> test_cipher);
> +}
>  }
>  
>  g_test_add_func("/crypto/cipher/null-iv",
> -- 
> 2.5.0
> 

Reviewed-by: Fam Zheng 



Re: [Qemu-devel] [PATCH v4 04/26] crypto: add support for anti-forensic split algorithm

2016-03-06 Thread Fam Zheng
On Mon, 02/29 12:00, Daniel P. Berrange wrote:
> The LUKS format specifies an anti-forensic split algorithm which
> is used to artificially expand the size of the key material on
> disk. This is an implementation of that algorithm.
> 
> Signed-off-by: Daniel P. Berrange 
> ---
>  crypto/Makefile.objs|   1 +
>  crypto/afsplit.c| 158 
>  include/crypto/afsplit.h| 135 +++
>  tests/.gitignore|   1 +
>  tests/Makefile  |   2 +
>  tests/test-crypto-afsplit.c | 190 
> 
>  6 files changed, 487 insertions(+)
>  create mode 100644 crypto/afsplit.c
>  create mode 100644 include/crypto/afsplit.h
>  create mode 100644 tests/test-crypto-afsplit.c
> 
> diff --git a/crypto/Makefile.objs b/crypto/Makefile.objs
> index 3040989..5136737 100644
> --- a/crypto/Makefile.objs
> +++ b/crypto/Makefile.objs
> @@ -17,6 +17,7 @@ crypto-obj-y += ivgen.o
>  crypto-obj-y += ivgen-essiv.o
>  crypto-obj-y += ivgen-plain.o
>  crypto-obj-y += ivgen-plain64.o
> +crypto-obj-y += afsplit.o
>  
>  # Let the userspace emulators avoid linking gnutls/etc
>  crypto-aes-obj-y = aes.o
> diff --git a/crypto/afsplit.c b/crypto/afsplit.c
> new file mode 100644
> index 000..8074913
> --- /dev/null
> +++ b/crypto/afsplit.c
> @@ -0,0 +1,158 @@
> +/*
> + * QEMU Crypto anti forensic information splitter
> + *
> + * Copyright (c) 2015-2016 Red Hat, Inc.
> + *
> + * Derived from cryptsetup package lib/luks1/af.c
> + *
> + * Copyright (C) 2004, Clemens Fruhwirth 
> + * Copyright (C) 2009-2012, Red Hat, Inc. All rights reserved.
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see 
> .
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "crypto/afsplit.h"
> +#include "crypto/random.h"
> +
> +
> +static void qcrypto_afsplit_xor(size_t blocklen,
> +const uint8_t *in1,
> +const uint8_t *in2,
> +uint8_t *out)
> +{
> +size_t i;
> +for (i = 0; i < blocklen; i++) {
> +out[i] = in1[i] ^ in2[i];
> +}
> +}
> +
> +
> +static int qcrypto_afsplit_hash(QCryptoHashAlgorithm hash,
> +size_t blocklen,
> +uint8_t *block,
> +Error **errp)
> +{
> +size_t digestlen = qcrypto_hash_digest_len(hash);
> +
> +size_t hashcount = blocklen / digestlen;

Do you want to use DIV_ROUND_UP? Because if blocklen < digestlen, hashcount is
0, and your for loop below will be skipped.

Fam

> +size_t finallen = blocklen % digestlen;
> +uint32_t i;
> +
> +if (finallen) {
> +hashcount++;
> +} else {
> +finallen = digestlen;
> +}
> +
> +for (i = 0; i < hashcount; i++) {
> +uint8_t *out = NULL;
> +size_t outlen = 0;
> +uint32_t iv = cpu_to_be32(i);
> +struct iovec in[] = {
> +{ .iov_base = ,
> +  .iov_len = sizeof(iv) },
> +{ .iov_base = block + (i * digestlen),
> +  .iov_len = (i == (hashcount - 1)) ? finallen : digestlen },
> +};
> +
> +if (qcrypto_hash_bytesv(hash,
> +in,
> +G_N_ELEMENTS(in),
> +, ,
> +errp) < 0) {
> +return -1;
> +}
> +
> +assert(outlen == digestlen);
> +memcpy(block + (i * digestlen), out,
> +   (i == (hashcount - 1)) ? finallen : digestlen);
> +g_free(out);
> +}
> +
> +return 0;
> +}
> +
> +
> +int qcrypto_afsplit_encode(QCryptoHashAlgorithm hash,
> +   size_t blocklen,
> +   uint32_t stripes,
> +   const uint8_t *in,
> +   uint8_t *out,
> +   Error **errp)
> +{
> +uint8_t *block = g_new0(uint8_t, blocklen);
> +size_t i;
> +int ret = -1;
> +
> +for (i = 0; i < (stripes - 1); i++) {
> +if (qcrypto_random_bytes(out + (i * blocklen), blocklen, errp) < 0) {
> +goto cleanup;
> +}
> +
> +qcrypto_afsplit_xor(blocklen,
> + 

Re: [Qemu-devel] [PATCH v4 03/26] crypto: add support for generating initialization vectors

2016-03-06 Thread Fam Zheng
On Mon, 02/29 12:00, Daniel P. Berrange wrote:
> There are a number of different algorithms that can be used
> to generate initialization vectors for disk encryption. This
> introduces a simple internal QCryptoBlockIV object to provide
> a consistent internal API to the different algorithms. The
> initially implemented algorithms are 'plain', 'plain64' and
> 'essiv', each matching the same named algorithm provided
> by the Linux kernel dm-crypt driver.
> 
> Signed-off-by: Daniel P. Berrange 

Reviewed-by: Fam Zheng 



Re: [Qemu-devel] [PATCH v3 3/3] arm: implement query-gic-capability

2016-03-06 Thread Peter Xu
On Mon, Mar 07, 2016 at 06:12:38AM +0100, Andrew Jones wrote:
> On Mon, Mar 07, 2016 at 12:23:28PM +0800, Peter Xu wrote:
> > +#ifdef CONFIG_KVM
> > +/*
> > + * This is merely the same as kvm_create_device(). The only
> > + * difference is we are using raw fds rather than KVMState, so that
> > + * we can use it even without kvm_state initialized.
> > + */
> > +static int kvm_create_device_fds(int kvm_fd, int vmfd,
> > + uint64_t type, bool test)
> 
> I don't think we need this helper function. Who else will call it?
> Particularly without test==true? Anyway, I think three ioctls directly
> called from qmp_query_gic_capability should be OK.

Right. However, I would still consider using a helper function if
you would not mind. E.g, how about this:

#ifdef CONFIG_KVM
/* Test whether KVM support specific device. */
static inline int kvm_support_device(int vmfd, uint64_t type)
{
struct kvm_create_device create_dev = {
.type = type,
.fd = -1,
.flags = KVM_CREATE_DEVICE_TEST,
};
return ioctl(vmfd, KVM_CREATE_DEVICE, _dev);
}
#endif

Thanks.
Peter



Re: [Qemu-devel] [PATCH v2][Outreachy Round 12]

2016-03-06 Thread Sarah Khan
Thank you for your feedback.
Will do the required.
Sorry for the delay as I was out of station.


Thanks,
Sarah

On Sat, Mar 5, 2016 at 4:12 PM, Markus Armbruster  wrote:

> Your commit message isn't quite right, yet.  The first line is empty,
> which leads to
>
> Subject: [PATCH v2][Outreachy Round 12]
>
> in e-mail, which isn't really useful.  It should be a line of the form
>
> subsystem: Headline describing the patch briefly
>
> In e-mail, this becomes something like
>
> Subject: [PATCH v2] subsystem: Headline describing the patch briefly
>
> The [PATCH v2] gets inserted by git-format-patch.
>
> Finding the appropriate subsystem is unfortunately less than
> straightforward.  You can use "git log --oneline FILENAME..." for ideas.
> For your patch, I'd use linux-user.
>
> Since this is a rather busy list, it's important to cc the right people
> on patches.  Start with "scripts/get_maintainer PATCH-FILE".  The script
> looks up the files you patch in MAINTAINERS for you.  In your case, its
> output is
>
> Riku Voipio  (maintainer:Overall)
> qemu-devel@nongnu.org (open list:All patches CC here)
>
> Send the patch to qemu-devel, cc: Riku.
>
> All of this and much more is documented at
> .  It's a learning curve,
> but we'll gladly help you along.
>
> Sarah Khan  writes:
>
> > Replaced g_malloc() with g_new() as it would detect multiplication
> overflow if nb_fields ever oversize.
>
> Long line, please break it like you do in the next paragraph.
>
> > There is no corresponding free(). thunk_register_struct() is called
> > only at startup from the linux-user code in order to populate the
> > struct_entries array; this data structure then remains live for
> > the entire lifetime of the program and is automatically freed when
> > QEMU exits.
> >
> > This replacement was suggested as part of the bite-sized tasks.
> >
> > Signed-off-by: Sarah Khan 
> > ---
> > Changes in v2 :Make commit message clearer
> >  Replace g_malloc() with g_new()
> > ---
> >  thunk.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/thunk.c b/thunk.c
> > index bddabae..6237702 100644
> > --- a/thunk.c
> > +++ b/thunk.c
> > @@ -88,7 +88,7 @@ void thunk_register_struct(int id, const char *name,
> const argtype *types)
> >  for(i = 0;i < 2; i++) {
> >  offset = 0;
> >  max_align = 1;
> > -se->field_offsets[i] = g_malloc(nb_fields * sizeof(int));
> > +se->field_offsets[i] = g_new(int,nb_fields);
> >  type_ptr = se->field_types;
> >  for(j = 0;j < nb_fields; j++) {
> >  size = thunk_type_size(type_ptr, i);
>
> Oh, this is an *incremental* patch: it applies on top of your v1.
> That's awkward.  Your v2 should *replace* v1, not add to it.
>
> Style nitpick: we want a space after comma.  Recommend to check your
> patches with scripts/checkpatch.pl.  Output for this one is:
>
> ERROR: space required after that ',' (ctx:VxV)
> #127: FILE: thunk.c:91:
> +se->field_offsets[i] = g_new(int,nb_fields);
>  ^
>
> total: 1 errors, 0 warnings, 8 lines checked
>
> /home/armbru/Mail/mail/redhat/xlst/qemu-devel/385421 has style
> problems, please review.  If any of these errors
> are false positives report them to the maintainer, see
> CHECKPATCH in MAINTAINERS.
>
> Welcome to qemu-devel, Sarah!
>


Re: [Qemu-devel] [RFC qemu 0/4] A PV solution for live migration optimization

2016-03-06 Thread Li, Liang Z
> > On 04/03/2016 15:26, Li, Liang Z wrote:
> > >> >
> > >> > The memory usage will keep increasing due to ever growing caches,
> > >> > etc, so you'll be left with very little free memory fairly soon.
> > >> >
> > > I don't think so.
> > >
> >
> > Roman is right.  For example, here I am looking at a 64 GB (physical)
> > machine which was booted about 30 minutes ago, and which is running
> > disk-heavy workloads (installing VMs).
> >
> > Since I have started writing this email (2 minutes?), the amount of
> > free memory has already gone down from 37 GB to 33 GB.  I expect that
> > by the time I have finished running the workload, in two hours, it
> > will not have any free memory.
> 
> But what about a VM sitting idle, or that just has more RAM assigned to it
> than is currently using.
>  I've got a host here that's been up for 46 days and has been doing some
> heavy VM debugging a few days ago, but today:
> 
> # free -m
>   totalusedfree  shared  buff/cache   
> available
> Mem:  965361146   44834 184   50555   
> 94735
> 
> I very rarely use all it's RAM, so it's got a big chunk of free RAM, and yes 
> it's
> got a big chunk of cache as well.
> 
> Dave
> 
> >
> > Paolo

I begin to realize Roman's opinions. The PV solution can't handle the cache 
memory while inflating balloon could.
Inflating balloon so as to skipping the cache memory is no good for guest's 
performance.

How much of the free memory in the guest depends on the workload in the VM  and 
the time VM has already run
before live migration. Even the memory usage will keep increasing due to ever 
growing caches, but we don't know
when the live migration will happen, assuming there are no or very little free 
pages in the guest is not quite right.

The advantage of the pv solution is the smaller performance impact, comparing 
with inflating the balloon.

Liang






Re: [Qemu-devel] [PATCH v4 02/26] crypto: add support for PBKDF2 algorithm

2016-03-06 Thread Fam Zheng
On Mon, 02/29 12:00, Daniel P. Berrange wrote:
> The LUKS data format includes use of PBKDF2 (Password-Based
> Key Derivation Function). The Nettle library can provide
> an implementation of this, but we don't want code directly
> depending on a specific crypto library backend. Introduce
> a new include/crypto/pbkdf.h header which defines a QEMU
> API for invoking PBKDK2. The initial implementations are
> backed by nettle & gcrypt, which are commonly available
> with distros shipping GNUTLS.
> 
> The test suite data is taken from the cryptsetup codebase
> under the LGPLv2.1+ license. This merely aims to verify
> that whatever backend we provide for this function in QEMU
> will comply with the spec.
> 
> Signed-off-by: Daniel P. Berrange 

Reviewed-by: Fam Zheng 



Re: [Qemu-devel] [PATCH v3 3/3] arm: implement query-gic-capability

2016-03-06 Thread Andrew Jones
On Mon, Mar 07, 2016 at 12:23:28PM +0800, Peter Xu wrote:
> For emulated GIC capabilities, currently only gicv2 is supported. We
> need to add gicv3 in when emulated gicv3 ready. For KVM accelerated ARM
> VM, we detect the capability bits using ioctls.
> 
> When probing the KVM capabilities, we cannot leverage existing helper
> functions like kvm_create_device() since QEMU might be using TCG while
> probing (actually this is the case for libvirt probing). So, one
> temporary VM is created to do the probing.
> 
> Signed-off-by: Peter Xu 
> ---
>  target-arm/machine.c | 105 
> ++-
>  1 file changed, 104 insertions(+), 1 deletion(-)
> 
> diff --git a/target-arm/machine.c b/target-arm/machine.c
> index 60bd5c1..ff50411 100644
> --- a/target-arm/machine.c
> +++ b/target-arm/machine.c
> @@ -1,3 +1,5 @@
> +#include 
> +#include 
>  #include "qemu/osdep.h"
>  #include "hw/hw.h"
>  #include "hw/boards.h"
> @@ -347,7 +349,108 @@ const char *gicv3_class_name(void)
>  exit(1);
>  }
>  
> +static GICCapability *gic_cap_new(int version)
> +{
> +GICCapability *cap = g_new0(GICCapability, 1);
> +cap->version = version;
> +/* by default, support none */
> +cap->emulated = false;
> +cap->kernel = false;
> +return cap;
> +}
> +
> +static GICCapabilityList *gic_cap_list_add(GICCapabilityList *head,
> +   GICCapability *cap)
> +{
> +GICCapabilityList *item = g_new0(GICCapabilityList, 1);
> +item->value = cap;
> +item->next = head;
> +return item;
> +}
> +
> +#ifdef CONFIG_KVM
> +/*
> + * This is merely the same as kvm_create_device(). The only
> + * difference is we are using raw fds rather than KVMState, so that
> + * we can use it even without kvm_state initialized.
> + */
> +static int kvm_create_device_fds(int kvm_fd, int vmfd,
> + uint64_t type, bool test)

I don't think we need this helper function. Who else will call it?
Particularly without test==true? Anyway, I think three ioctls directly
called from qmp_query_gic_capability should be OK.

> +{
> +int ret;
> +struct kvm_create_device create_dev;
> +
> +create_dev.type = type;
> +create_dev.fd = -1;
> +create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
> +
> +if (ioctl(kvm_fd, KVM_CHECK_EXTENSION, KVM_CAP_DEVICE_CTRL) <= 0) {
> +return -ENOTSUP;
> +}
> +
> +ret = ioctl(vmfd, KVM_CREATE_DEVICE, _dev);
> +if (ret) {
> +return ret;
> +}
> +
> +return test ? 0 : create_dev.fd;
> +}
> +#endif
> +
>  GICCapabilityList *qmp_query_gic_capability(Error **errp)
>  {
> -return NULL;
> +GICCapabilityList *head = NULL;
> +GICCapability *v2 = gic_cap_new(2), *v3 = gic_cap_new(3);
> +
> +v2->emulated = true;
> +/* FIXME: we'd change to true after we get emulated GICv3. */
> +v3->emulated = false;
> +
> +#ifdef CONFIG_KVM
> +{
> +/*
> + * HACK: here we create one temporary VM, do the probing,
> + * then release it properly.
> + */
> +int kvm_fd = -1;
> +int vmfd = -1;
> +
> +kvm_fd = qemu_open("/dev/kvm", O_RDWR);
> +if (kvm_fd == -1) {
> +/* KVM may not enabled on host, which is fine. */
> +goto out;
> +}
> +
> +do {
> +/* For ARM, VM type could only be zero now. */
> +vmfd = ioctl(kvm_fd, KVM_CREATE_VM, 0);
> +} while (vmfd == -EINTR);
> +
> +if (vmfd < 0) {
> +goto kvm_fd_close;
> +}
> +
> +/* Test KVM GICv2 */
> +if (kvm_create_device_fds(kvm_fd, vmfd, KVM_DEV_TYPE_ARM_VGIC_V2,
> +  true) >= 0) {
> +v2->kernel = true;
> +}
> +
> +/* Test KVM GICv3 */
> +if (kvm_create_device_fds(kvm_fd, vmfd, KVM_DEV_TYPE_ARM_VGIC_V3,
> +  true) >= 0) {
> +v3->kernel = true;
> +}
> +
> +close(vmfd);
> +kvm_fd_close:
> +close(kvm_fd);
> +}
> +#endif
> +
> +out:
> +head = gic_cap_list_add(head, v2);
> +head = gic_cap_list_add(head, v3);
> +
> +return head;
>  }
> -- 
> 2.4.3
> 
> 



Re: [Qemu-devel] [PATCHv9 0/10] slirp: Adding IPv6 support to Qemu -net user mode

2016-03-06 Thread Jason Wang


On 03/07/2016 12:59 AM, Samuel Thibault wrote:
> Hello,
>
> Jan Kiszka, on Fri 04 Mar 2016 16:50:32 +0100, wrote:
>> On 2016-03-04 09:41, Thomas Huth wrote:
>>> On 22.02.2016 20:28, Samuel Thibault wrote:
 Hello,

 This is another respin of IPv6 in Qemu -net user mode.

 These patches add ICMPv6, NDP, make UDP and TCP compatible with IPv6, and 
 add
 TFTP over IPv6.
>>> *ping*
>>>
>>>  Jan, Jason,
>>>
>>> could you please have a look at this series? Would be cool to include it
>>> for 2.6 so that we'd finally have IPv6 support in Slirp, too.
>>> As far as I could see, the patches look fine now - there was just one
>>> rather cosmetic issue left in patch 9/10, but it should also be ok in
>>> the current shape already, I think, so IMHO no need for a respin.
>> As indicated before, we need someone else than me to manage the slirp
>> system. I'm out of bandwidth for this task, sorry.
> Ok, Jason?
>
> I'm really surprised that a patch that has been reviewed two times (by
> me then by Thomas) seems so difficult to get it.  I understand that
> it's far from trivial, but I have concerns with how qemu can manage
> contributions.
>
> Samuel

Sorry. I wasn't aware of needing a new maintainer for slirp before. Will
look at this series soon and take slirp until we have a new maintainer.

Btw, do you want to be the maintainer of slirp? (Or maybe Tohmas want to
do this?)

Thanks



Re: [Qemu-devel] [PATCH v2 3/3] arm: implement query-gic-capability

2016-03-06 Thread Peter Xu
On Fri, Mar 04, 2016 at 09:43:24AM +0100, Andrea Bolognani wrote:
> On Fri, 2016-03-04 at 10:52 +0800, Peter Xu wrote:
> > Andrea, do you know how much effort we need to add this support for
> > libvirt, say, we can specify "accel=" or "-enable-kvm" as extra
> > parameter when probing?
> 
> I'm afraid this is not going to be possible for the same reason
> we have to use '-M none' when probing: at that point in time, we
> simply have no idea what the guests will look like. Actually,
> it's the other way around, in that the result of probing (host
> and domain capabilities) will influence the guest configuration
> created by the user / management tool.
> 
> And we definitely can't use 'accel=kvm' unconditionally, because
> then we won't be able to probe eg. the qemu-system-aarch64 binary
> installed on a x86_64 host.

Agreed. It is awkward to specify these for probing.

Posting v3 to allow it work even without kvm enabled. I am using the
hacky way to do it since I still have no better way to do it. Let's
see whether we can get more review comments on that.

Thanks!
Peter



[Qemu-devel] [PATCH v3 3/3] arm: implement query-gic-capability

2016-03-06 Thread Peter Xu
For emulated GIC capabilities, currently only gicv2 is supported. We
need to add gicv3 in when emulated gicv3 ready. For KVM accelerated ARM
VM, we detect the capability bits using ioctls.

When probing the KVM capabilities, we cannot leverage existing helper
functions like kvm_create_device() since QEMU might be using TCG while
probing (actually this is the case for libvirt probing). So, one
temporary VM is created to do the probing.

Signed-off-by: Peter Xu 
---
 target-arm/machine.c | 105 ++-
 1 file changed, 104 insertions(+), 1 deletion(-)

diff --git a/target-arm/machine.c b/target-arm/machine.c
index 60bd5c1..ff50411 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -1,3 +1,5 @@
+#include 
+#include 
 #include "qemu/osdep.h"
 #include "hw/hw.h"
 #include "hw/boards.h"
@@ -347,7 +349,108 @@ const char *gicv3_class_name(void)
 exit(1);
 }
 
+static GICCapability *gic_cap_new(int version)
+{
+GICCapability *cap = g_new0(GICCapability, 1);
+cap->version = version;
+/* by default, support none */
+cap->emulated = false;
+cap->kernel = false;
+return cap;
+}
+
+static GICCapabilityList *gic_cap_list_add(GICCapabilityList *head,
+   GICCapability *cap)
+{
+GICCapabilityList *item = g_new0(GICCapabilityList, 1);
+item->value = cap;
+item->next = head;
+return item;
+}
+
+#ifdef CONFIG_KVM
+/*
+ * This is merely the same as kvm_create_device(). The only
+ * difference is we are using raw fds rather than KVMState, so that
+ * we can use it even without kvm_state initialized.
+ */
+static int kvm_create_device_fds(int kvm_fd, int vmfd,
+ uint64_t type, bool test)
+{
+int ret;
+struct kvm_create_device create_dev;
+
+create_dev.type = type;
+create_dev.fd = -1;
+create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
+
+if (ioctl(kvm_fd, KVM_CHECK_EXTENSION, KVM_CAP_DEVICE_CTRL) <= 0) {
+return -ENOTSUP;
+}
+
+ret = ioctl(vmfd, KVM_CREATE_DEVICE, _dev);
+if (ret) {
+return ret;
+}
+
+return test ? 0 : create_dev.fd;
+}
+#endif
+
 GICCapabilityList *qmp_query_gic_capability(Error **errp)
 {
-return NULL;
+GICCapabilityList *head = NULL;
+GICCapability *v2 = gic_cap_new(2), *v3 = gic_cap_new(3);
+
+v2->emulated = true;
+/* FIXME: we'd change to true after we get emulated GICv3. */
+v3->emulated = false;
+
+#ifdef CONFIG_KVM
+{
+/*
+ * HACK: here we create one temporary VM, do the probing,
+ * then release it properly.
+ */
+int kvm_fd = -1;
+int vmfd = -1;
+
+kvm_fd = qemu_open("/dev/kvm", O_RDWR);
+if (kvm_fd == -1) {
+/* KVM may not enabled on host, which is fine. */
+goto out;
+}
+
+do {
+/* For ARM, VM type could only be zero now. */
+vmfd = ioctl(kvm_fd, KVM_CREATE_VM, 0);
+} while (vmfd == -EINTR);
+
+if (vmfd < 0) {
+goto kvm_fd_close;
+}
+
+/* Test KVM GICv2 */
+if (kvm_create_device_fds(kvm_fd, vmfd, KVM_DEV_TYPE_ARM_VGIC_V2,
+  true) >= 0) {
+v2->kernel = true;
+}
+
+/* Test KVM GICv3 */
+if (kvm_create_device_fds(kvm_fd, vmfd, KVM_DEV_TYPE_ARM_VGIC_V3,
+  true) >= 0) {
+v3->kernel = true;
+}
+
+close(vmfd);
+kvm_fd_close:
+close(kvm_fd);
+}
+#endif
+
+out:
+head = gic_cap_list_add(head, v2);
+head = gic_cap_list_add(head, v3);
+
+return head;
 }
-- 
2.4.3




[Qemu-devel] [PATCH v3 1/3] arm: qmp: add GICCapability struct

2016-03-06 Thread Peter Xu
Define new struct to describe whether we support specific GIC version.

Signed-off-by: Peter Xu 
---
 qapi-schema.json | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/qapi-schema.json b/qapi-schema.json
index 7b8f2a1..0b2de6c 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4135,3 +4135,25 @@
 ##
 { 'enum': 'ReplayMode',
   'data': [ 'none', 'record', 'play' ] }
+
+##
+# @GICCapability:
+#
+# This struct describes capability for a specific GIC version. These
+# bits are not only decided by QEMU/KVM software version, but also
+# decided by the hardware that the program is running upon.
+#
+# @version:  version of GIC to be described.
+#
+# @emulated: whether current QEMU/hardware supports emulated GIC
+#device in user space.
+#
+# @kernel:   whether current QEMU/hardware supports hardware
+#accelerated GIC device in kernel.
+#
+# Since: 2.6
+##
+{ 'struct': 'GICCapability',
+  'data': { 'version': 'int',
+'emulated': 'bool',
+'kernel': 'bool' } }
-- 
2.4.3




[Qemu-devel] [PATCH v3 2/3] arm: qmp: add query-gic-capability interface

2016-03-06 Thread Peter Xu
This patch adds the command "query-gic-capability" but not implemnet
it. The command is ARM-only. Return of the command is a list of
GICCapability struct that describes all GIC versions that current QEMU
and system support.

Signed-off-by: Peter Xu 
---
 monitor.c|  8 
 qapi-schema.json | 11 +++
 qmp-commands.hx  | 26 ++
 scripts/qapi.py  |  1 +
 target-arm/machine.c |  6 ++
 5 files changed, 52 insertions(+)

diff --git a/monitor.c b/monitor.c
index 73eac17..3b34feb 100644
--- a/monitor.c
+++ b/monitor.c
@@ -4241,3 +4241,11 @@ void qmp_dump_skeys(const char *filename, Error **errp)
 error_setg(errp, QERR_FEATURE_DISABLED, "dump-skeys");
 }
 #endif
+
+#ifndef TARGET_ARM
+GICCapabilityList *qmp_query_gic_capability(Error **errp)
+{
+error_setg(errp, QERR_FEATURE_DISABLED, "query-gic-capability");
+return NULL;
+}
+#endif
diff --git a/qapi-schema.json b/qapi-schema.json
index 0b2de6c..f42c8f7 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -4157,3 +4157,14 @@
   'data': { 'version': 'int',
 'emulated': 'bool',
 'kernel': 'bool' } }
+
+##
+# @query-gic-capability:
+#
+# Return a list of supported GIC version capabilities.
+#
+# Returns: a list of GICCapability.
+#
+# Since: 2.6
+##
+{ 'command': 'query-gic-capability', 'returns': ['GICCapability'] }
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 13f158d..5e843f2 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -4852,3 +4852,29 @@ Example:
  {"type": 0, "out-pport": 0, "pport": 0, "vlan-id": 3840,
   "pop-vlan": 1, "id": 251658240}
]}
+
+EQMP
+
+#if defined TARGET_ARM
+{
+.name   = "query-gic-capability",
+.args_type  = "",
+.mhandler.cmd_new = qmp_marshal_query_gic_capability,
+},
+#endif
+
+SQMP
+query-gic-capability
+---
+
+Return a list of supported ARM GIC versions and their capabilities.
+
+Arguments: None
+
+Example:
+
+-> { "execute": "query-gic-capability" }
+<- { "return": [{ "version": 2, "emulated": true, "kernel": false },
+{ "version": 3, "emulated": false, "kernel": true } ] }
+
+EQMP
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 849..9dc8f73 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -46,6 +46,7 @@ returns_whitelist = [
 'query-tpm-models',
 'query-tpm-types',
 'ringbuf-read',
+'query-gic-capability',
 
 # From QGA:
 'guest-file-open',
diff --git a/target-arm/machine.c b/target-arm/machine.c
index 03a73d9..60bd5c1 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -5,6 +5,7 @@
 #include "sysemu/kvm.h"
 #include "kvm_arm.h"
 #include "internals.h"
+#include "qmp-commands.h"
 
 static bool vfp_needed(void *opaque)
 {
@@ -345,3 +346,8 @@ const char *gicv3_class_name(void)
 
 exit(1);
 }
+
+GICCapabilityList *qmp_query_gic_capability(Error **errp)
+{
+return NULL;
+}
-- 
2.4.3




[Qemu-devel] [PATCH v3 0/3] ARM: add query-gic-capability SMP command

2016-03-06 Thread Peter Xu
v3 changes:
- patch 2: remove func declaration, add qmp header [Drew]
- patch 3: being able to detect KVM GIC capabilities even without
  kvm enabled [Andrea]: this is a little bit hacky, need some more
  review on this.

v2 changes:
- result layout change: use array and dict for the capability bits
  rather than a single array of strings [Andrea/Markus]
- spelling out what GIC is in doc [Eric]

This patch is to add ARM-specific command "query-gic-capability".

The new command can report which kind of GIC device the host/QEMU
support. The returned result is in the form of array.

Sample command and output:

{"execute": "query-gic-capability"}
{"return": [{"emulated": false, "version": 3, "kernel": false},
{"emulated": true, "version": 2, "kernel": true}]}

Testing:

Smoke tests on both x86 (emulated) and another moonshot ARM server.

Peter Xu (3):
  arm: qmp: add GICCapability struct
  arm: qmp: add query-gic-capability interface
  arm: implement query-gic-capability

 monitor.c|   8 
 qapi-schema.json |  33 
 qmp-commands.hx  |  26 
 scripts/qapi.py  |   1 +
 target-arm/machine.c | 109 +++
 5 files changed, 177 insertions(+)

-- 
2.4.3




Re: [Qemu-devel] [PATCH v10 1/3] Add new block driver interface to add/delete a BDS's child

2016-03-06 Thread Changlong Xie

On 03/06/2016 01:27 AM, Max Reitz wrote:

Sorry that I wasn't so pedantic last time; or maybe I should rather be
sorry that I'm so pedantic this time.


Hi Max
Welcome all your comments : )



On 16.02.2016 10:37, Changlong Xie wrote:

From: Wen Congyang 

In some cases, we want to take a quorum child offline, and take
another child online.

Signed-off-by: Wen Congyang 
Signed-off-by: zhanghailiang 
Signed-off-by: Gonglei 
Signed-off-by: Changlong Xie 
Reviewed-by: Eric Blake 
Reviewed-by: Alberto Garcia 
---
  block.c   | 50 +++
  include/block/block.h |  5 +
  include/block/block_int.h |  5 +
  3 files changed, 60 insertions(+)

diff --git a/block.c b/block.c
index efc3c43..08aa979 100644
--- a/block.c
+++ b/block.c
@@ -4399,3 +4399,53 @@ void bdrv_refresh_filename(BlockDriverState *bs)
  QDECREF(json);
  }
  }
+
+/*
+ * Hot add/remove a BDS's child. So the user can take a child offline when
+ * it is broken and take a new child online
+ */
+void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
+Error **errp)
+{
+
+if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
+error_setg(errp, "The node %s doesn't support adding a child",


As I said in my reply to v9's patch 3 (and I see you've followed it), I
don't quite like contractions in error messages, so I'd rather like this
to be "does not" instead of "doesn't".


Okay



If you don't decide to change this patch, then feel free to leave this
as it is, because that way you can keep Eric's and Berto's R-bs.


+   bdrv_get_device_or_node_name(parent_bs));
+return;
+}
+
+if (!QLIST_EMPTY(_bs->parents)) {
+error_setg(errp, "The node %s already has a parent",
+   child_bs->node_name);
+return;
+}
+
+parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
+}
+
+void bdrv_del_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
+Error **errp)


I wondered before and now I'm wondering why I didn't say anything. Why
is this function taking a BDS pointer as child_bs? Why not just
"BdrvChild *child"? Its only caller will be qmp_x_blockdev_change()
which gets the child BDS from bdrv_find_child(), which could just as
well return a BdrvChild instead of a BDS pointer.

I see two advantages to this: First, it's a bit clearer since this
operation actually does not delete the child *BDS* but only the *edge*
between parent and child, and that edge is what a BdrvChild is.


That's convincing.



And second, some block drivers may prefer the BdrvChild over the BDS
itself. They can always trivially get the BDS from the BdrvChild
structure, but the other way around is difficult.

The only disadvantage I can see is that it then becomes asymmetric to
bdrv_add_child(). But that's fine, it's a different function after all.


As you said, they are different fuctions at all. So i don't think it's a 
big deal.



bdrv_add_child() creates a BdrvChild object (an edge), bdrv_del_child()
deletes it.

(I don't know whether you had this discussion with someone else before,
though. Sorry if you did, I missed it, then.)


+{
+BdrvChild *child;
+
+if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
+error_setg(errp, "The node %s doesn't support removing a child",
+   bdrv_get_device_or_node_name(parent_bs));


Again, optional s/doesn't/does not/.


okay




+return;
+}
+
+QLIST_FOREACH(child, _bs->children, next) {
+if (child->bs == child_bs) {
+break;
+}
+}
+
+if (!child) {
+error_setg(errp, "The node %s is not a child of %s",
+   bdrv_get_device_or_node_name(child_bs),
+   bdrv_get_device_or_node_name(parent_bs));


Is there a special reason why you are using
bdrv_get_device_or_node_name() for the child here, while in
bdrv_add_child() you directly use the node name?



Although we directly use the node name in bdrv_add_child(), but we still 
need treat bdrv_del_child() as a separate function, right? In up 
condition, we can't determine if child->bs supports BB or not, so i 
think bdrv_get_device_or_node_name(child->bs) is ok here.



Neither seems wrong to me. A child is unlikely to have a BB of its own,
but if it does, printing its name instead of the node name may be


bdrv_get_device_or_node_name() can do that.

Thanks
-Xie


appropriate. I'm just wondering about the difference between
bdrv_add_child() and bdrv_del_child().

Max


+return;
+}
+
+parent_bs->drv->bdrv_del_child(parent_bs, child_bs, errp);
+}
diff --git a/include/block/block.h b/include/block/block.h
index 1c4f4d8..ecde190 100644
--- a/include/block/block.h

Re: [Qemu-devel] [RFC PATCH v1 07/10] spapr: Represent boot CPUs as spapr-cpu-core devices

2016-03-06 Thread David Gibson
On Fri, Mar 04, 2016 at 12:24:18PM +0530, Bharata B Rao wrote:
> Initialize boot CPUs as spapr-cpu-core devices and create links from
> machine object to these core devices. These links can be considered
> as CPU slots in which core devices will get hot-plugged. spapr-cpu-core
> device's slot property indicates the slot where it is plugged. Information
> about all the CPU slots can be obtained by walking these links.
> 
> Also prevent topologies that have or can result in incomplete cores.
> 
> Signed-off-by: Bharata B Rao 
> ---
>  hw/ppc/spapr.c  | 85 
> ++---
>  hw/ppc/spapr_cpu_core.c |  9 ++
>  include/hw/ppc/spapr.h  |  4 +++
>  3 files changed, 87 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index e9d4abf..5acb612 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -64,6 +64,7 @@
>  
>  #include "hw/compat.h"
>  #include "qemu-common.h"
> +#include "hw/ppc/spapr_cpu_core.h"
>  
>  #include 
>  
> @@ -1614,8 +1615,11 @@ static void spapr_boot_set(void *opaque, const char 
> *boot_device,
>  machine->boot_order = g_strdup(boot_device);
>  }
>  
> -static void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu,
> -   Error **errp)
> +/*
> + * TODO: Check if some of these can be moved to rtas_start_cpu() where
> + * a few other things required for hotplugged CPUs are being done.
> + */
> +void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error **errp)
>  {
>  CPUPPCState *env = >env;
>  
> @@ -1643,7 +1647,6 @@ static void spapr_cpu_init(sPAPRMachineState *spapr, 
> PowerPCCPU *cpu,
>  }
>  
>  xics_cpu_setup(spapr->icp, cpu);
> -
>  qemu_register_reset(spapr_cpu_reset, cpu);
>  }
>  
> @@ -1720,6 +1723,28 @@ static void spapr_validate_node_memory(MachineState 
> *machine, Error **errp)
>  }
>  }
>  
> +/*
> + * Check to see if core is being hot-plugged into an already populated slot.
> + */
> +static void spapr_cpu_core_allow_set_link(Object *obj, const char *name,
> +  Object *val, Error **errp)
> +{
> +Object *core = object_property_get_link(qdev_get_machine(), name, NULL);
> +
> +/*
> + * Allow the link to be unset when the core is unplugged.
> + */
> +if (!val) {
> +return;
> +}
> +
> +if (core) {
> +char *path = object_get_canonical_path(core);
> +error_setg(errp, "Slot %s already populated with %s", name, path);
> +g_free(path);
> +}
> +}
> +
>  /* pSeries LPAR / sPAPR hardware init */
>  static void ppc_spapr_init(MachineState *machine)
>  {
> @@ -1728,7 +1753,6 @@ static void ppc_spapr_init(MachineState *machine)
>  const char *kernel_filename = machine->kernel_filename;
>  const char *kernel_cmdline = machine->kernel_cmdline;
>  const char *initrd_filename = machine->initrd_filename;
> -PowerPCCPU *cpu;
>  PCIHostState *phb;
>  int i;
>  MemoryRegion *sysmem = get_system_memory();
> @@ -1742,6 +1766,20 @@ static void ppc_spapr_init(MachineState *machine)
>  long load_limit, fw_size;
>  bool kernel_le = false;
>  char *filename;
> +int spapr_cores = smp_cpus / smp_threads;
> +int spapr_max_cores = max_cpus / smp_threads;
> +
> +if (smp_cpus % smp_threads) {
> +error_report("smp_cpus (%u) must be multiple of threads (%u)",
> + smp_cpus, smp_threads);
> +exit(1);
> +}
> +
> +if (max_cpus % smp_threads) {
> +error_report("max_cpus (%u) must be multiple of threads (%u)",
> + max_cpus, smp_threads);
> +exit(1);
> +}
>  
>  msi_supported = true;
>  
> @@ -1800,13 +1838,37 @@ static void ppc_spapr_init(MachineState *machine)
>  if (machine->cpu_model == NULL) {
>  machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
>  }
> -for (i = 0; i < smp_cpus; i++) {
> -cpu = cpu_ppc_init(machine->cpu_model);
> -if (cpu == NULL) {
> -error_report("Unable to find PowerPC CPU definition");
> -exit(1);
> +
> +spapr->cores = g_new0(Object *, spapr_max_cores);
> +
> +for (i = 0; i < spapr_max_cores; i++) {
> +char name[32];
> +
> +/*
> + * Create links from machine objects to all possible cores.
> + */
> +snprintf(name, sizeof(name), "%s[%d]", SPAPR_MACHINE_CPU_CORE_PROP, 
> i);
> +object_property_add_link(OBJECT(spapr), name, TYPE_SPAPR_CPU_CORE,
> + (Object **)>cores[i],
> + spapr_cpu_core_allow_set_link,
> + OBJ_PROP_LINK_UNREF_ON_RELEASE,
> + _fatal);

These links no longer make sense to me.  When it was a set of links to
the hotpluggable units on the machine, I could see why they might be
useful.  But now that they're links explicitly to 

Re: [Qemu-devel] [RFC PATCH v1 00/10] Core based CPU hotplug for PowerPC sPAPR

2016-03-06 Thread David Gibson
On Fri, Mar 04, 2016 at 11:57:18AM +0100, Igor Mammedov wrote:
> On Fri,  4 Mar 2016 12:24:11 +0530
> Bharata B Rao  wrote:
> 
> > Hi,
> > 
> > This is the next version of "Core based CPU hotplug for PowerPC sPAPR" that
> > was posted at
> > https://lists.gnu.org/archive/html/qemu-ppc/2016-02/msg00286.html
> > 
> > Here is a quick summary on how this approach is different from the
> > previous approaches that I have been pursuing with the last one being
> > v7 that posted here:
> > https://lists.gnu.org/archive/html/qemu-ppc/2016-01/msg00477.html
> > 
> > - Earlier approaches used an independent PowerPC specific core device while
> >   this approach uses an sPAPR specific core device that is derived from
> >   generic core device.
> > - The earlier approach didn't have the notion of where the boot time as
> >   well as hot-plugged cores would sit. In this approach QOM links are
> >   created from machine object to all possible core objects. While the
> >   links are set for boot time cores during machine init, the same is done
> >   for hotplugged cores at hotplug time. The name of this link property
> >   is standardized as "core[N]" since these links link the machine object
> >   with core devices. The link name ("core[N]") is used with core device's
> >   "slot" property to identify the QOM link to set for this core.
> >   (qemu) device_add 
> > spapr-cpu-core,id=core2,nr_threads=8,cpu_model=host,slot=core[2]
> > - The ealier approach created threads from instance_init of the core object
> >   using a modified cpu_generic_init() routine. It used smp_threads and
> >   MachineState->cpu_model globals. In the current approach, nr_threads and
> >   cpu_model are obtained as properties and threads are created from
> >   property setters. The thread objects are allocated as part of core
> >   device and object_initialize() is used to initialize them.
> > 
> > Some open questions remain:
> > 
> > - Does this device_add semantics look ok ?
> >   (qemu) device_add 
> > spapr-cpu-core,id=core2,nr_threads=8,cpu_model=host,slot=core[2]
> looks fine to me, only I'd do following changes:
> s/nr_threads/threads/
> s/slot/core/ and make it numeric property
> 
> > - Is there need for machine to core object links ? If not, what would
> >   determine the slot/location of the core device ?
> I'd drop links altogether for this series as they were a part of an attempt
> to implement QOM based introspection interface, which is not must have
> provided QMP interface would provide all necessary for hotplug information
> which links aren't capable of to do.

Yeah, I tend to agree.

> Back in the days Anthony suggested that we 'might' use links to implement
> hotplug because there wasn't any other infrastructure hotplug
> infrastructure for bus-less devices. But now we have HOTPLUG_HANDLER
> interface that obsoleted, what links were supposed to handle at
> link set time hooks.
> 
> But somehow we stuck on links idea, though their initial role in hotplug
> were obsoleted and we still trying to fit them in design where
> they are not really necessary.
> 
> > - How to fit this with CPUSlotProperties HMP interface that Igor is
> >   working on ?
> I'll try to re-factor QMP interface RFC on top of this series
> so it would be easier for you to review it wrt spapr.
> 
> 
> > This version has the following changes:
> > 
> > - Dropped QMP and HMP enumeration patches for now since it isn't clear
> >   if the approach I took would be preferrable by all archs. Will wait
> >   and see how Igor's patches evolve here.
> > - Added the following pre-req patches to support CPU removal:
> >   exec: Remove cpu from cpus list during cpu_exec_exit()
> >   exec: Do vmstate unregistration from cpu_exec_exit()
> >   cpu: Reclaim vCPU objects
> >   cpu: Add a sync version of cpu_remove()
> > - Added sPAPR CPU hot removal support
> >   xics,xics_kvm: Handle CPU unplug correctly
> >   spapr: CPU hot unplug support
> > - Moved up the "slot" property into abstract cpu-core device.
> > - Recover when thread creation fails (spapr_cpu_core_create_threads())
> > - cpu_model property in spapr-cpu-core deivce is now tracked using
> >   ObjectClass pointer instead of string.
> > - Fail topologies with incomplete cores from within sPAPR's machine init.
> > - Fixes in spapr-cpu-core object creation from machine init to create
> >   only boottime CPUs.
> > - Moved board specific wiring code for CPU threads (spapr_cpu_init)
> >   into core's realize method to be called after each thread's realization.
> > - No specific action under TYPE_CPU in ->plug() handler either for hotplug
> >   or hot removal.
> > - Moved all core related CPU hotplug routines into spapr_cpu_core.c where
> >   it truly belongs.
> > - Setting of NUMA node for hotplugged CPUs moved to spapr_cpu_init()).
> > - Compared to previous implementation of hot removal that was part of
> >   different series earlier, this implementation moves all core removal
> >   logic into 

Re: [Qemu-devel] [RFC PATCH v1 05/10] cpu: Abstract CPU core type

2016-03-06 Thread David Gibson
On Fri, Mar 04, 2016 at 07:07:20PM +0100, Igor Mammedov wrote:
> On Fri, 4 Mar 2016 16:32:53 +0530
> Bharata B Rao  wrote:
> 
> > On Fri, Mar 04, 2016 at 11:38:45AM +0100, Igor Mammedov wrote:
> > > On Fri,  4 Mar 2016 12:24:16 +0530
> > > Bharata B Rao  wrote:
> > >   
> > > > Add an abstract CPU core type that could be used by machines that want
> > > > to define and hotplug CPUs in core granularity.
> > > > 
> > > > Signed-off-by: Bharata B Rao 
> > > > ---
> > > >  hw/cpu/Makefile.objs  |  1 +
> > > >  hw/cpu/core.c | 44 
> > > >  include/hw/cpu/core.h | 30 ++
> > > >  3 files changed, 75 insertions(+)
> > > >  create mode 100644 hw/cpu/core.c
> > > >  create mode 100644 include/hw/cpu/core.h
> > > > 
> > > > diff --git a/hw/cpu/Makefile.objs b/hw/cpu/Makefile.objs
> > > > index 0954a18..942a4bb 100644
> > > > --- a/hw/cpu/Makefile.objs
> > > > +++ b/hw/cpu/Makefile.objs
> > > > @@ -2,4 +2,5 @@ obj-$(CONFIG_ARM11MPCORE) += arm11mpcore.o
> > > >  obj-$(CONFIG_REALVIEW) += realview_mpcore.o
> > > >  obj-$(CONFIG_A9MPCORE) += a9mpcore.o
> > > >  obj-$(CONFIG_A15MPCORE) += a15mpcore.o
> > > > +obj-y += core.o
> > > >  
> > > > diff --git a/hw/cpu/core.c b/hw/cpu/core.c
> > > > new file mode 100644
> > > > index 000..d8caf37
> > > > --- /dev/null
> > > > +++ b/hw/cpu/core.c
> > > > @@ -0,0 +1,44 @@
> > > > +/*
> > > > + * CPU core abstract device
> > > > + *
> > > > + * Copyright (C) 2016 Bharata B Rao 
> > > > + *
> > > > + * This work is licensed under the terms of the GNU GPL, version 2 or 
> > > > later.
> > > > + * See the COPYING file in the top-level directory.
> > > > + */
> > > > +#include "hw/cpu/core.h"
> > > > +
> > > > +static char *core_prop_get_slot(Object *obj, Error **errp)
> > > > +{
> > > > +CPUCore *core = CPU_CORE(obj);
> > > > +
> > > > +return g_strdup(core->slot);
> > > > +}
> > > > +
> > > > +static void core_prop_set_slot(Object *obj, const char *val, Error 
> > > > **errp)
> > > > +{
> > > > +CPUCore *core = CPU_CORE(obj);
> > > > +
> > > > +core->slot = g_strdup(val);
> > > > +}
> > > > +
> > > > +static void cpu_core_instance_init(Object *obj)
> > > > +{
> > > > +object_property_add_str(obj, "slot", core_prop_get_slot, 
> > > > core_prop_set_slot,
> > > > +NULL);
> > > > +}
> > > > +
> > > > +static const TypeInfo cpu_core_type_info = {
> > > > +.name = TYPE_CPU_CORE,
> > > > +.parent = TYPE_DEVICE,
> > > > +.abstract = true,
> > > > +.instance_size = sizeof(CPUCore),
> > > > +.instance_init = cpu_core_instance_init,
> > > > +};
> > > > +
> > > > +static void cpu_core_register_types(void)
> > > > +{
> > > > +type_register_static(_core_type_info);
> > > > +}
> > > > +
> > > > +type_init(cpu_core_register_types)
> > > > diff --git a/include/hw/cpu/core.h b/include/hw/cpu/core.h
> > > > new file mode 100644
> > > > index 000..2daa724
> > > > --- /dev/null
> > > > +++ b/include/hw/cpu/core.h
> > > > @@ -0,0 +1,30 @@
> > > > +/*
> > > > + * CPU core abstract device
> > > > + *
> > > > + * Copyright (C) 2016 Bharata B Rao 
> > > > + *
> > > > + * This work is licensed under the terms of the GNU GPL, version 2 or 
> > > > later.
> > > > + * See the COPYING file in the top-level directory.
> > > > + */
> > > > +#ifndef HW_CPU_CORE_H
> > > > +#define HW_CPU_CORE_H
> > > > +
> > > > +#include "qemu/osdep.h"
> > > > +#include "hw/qdev.h"
> > > > +
> > > > +#define TYPE_CPU_CORE "cpu-core"
> > > > +
> > > > +#define CPU_CORE(obj) \
> > > > +OBJECT_CHECK(CPUCore, (obj), TYPE_CPU_CORE)
> > > > +
> > > > +typedef struct CPUCore {
> > > > +/*< private >*/
> > > > +DeviceState parent_obj;
> > > > +
> > > > +/*< public >*/
> > > > +char *slot;
> > > > +} CPUCore;
> > > > +
> > > > +#define CPU_CORE_SLOT_PROP "slot"  
> > > as it's generic property I'd rename to 'core' so it would fit all users  
> > 
> > Ok. Also note that this is a string property which is associated with the
> > link name (string) that we created from machine object to this core. I think
> > it would be ideal if this becomes an interger  property in which case it
> > becomes easier to feed the core location into your CPUSlotProperties.core.
> agreed, it should be core number.

The slot stuff is continuing to confuse me a bit.  I see that we need
some kind of "address" value, but how best to do it is not clear to
me.

Changing this to an integer sounds like it's probably a good idea.
I'm a bit wary of just calling it "core" though.  Do all platforms
even necessarily have a core id?

I'm wondering if the addressing is something that needs to move the
the platform specific subtypes, while some other stuff can move to the
generic base type.

> > > on top of that I'd add numeric 'threads' property to base class so
> > 

Re: [Qemu-devel] [RFC PATCH v1 08/10] spapr: CPU hotplug support

2016-03-06 Thread David Gibson
On Fri, Mar 04, 2016 at 12:24:19PM +0530, Bharata B Rao wrote:
> Set up device tree entries for the hotplugged CPU core and use the
> exising EPOW event infrastructure to send CPU hotplug notification to
> the guest.
> 
> Signed-off-by: Bharata B Rao 
> ---
>  hw/ppc/spapr.c  | 73 
> -
>  hw/ppc/spapr_cpu_core.c | 60 +
>  hw/ppc/spapr_events.c   |  3 ++
>  hw/ppc/spapr_rtas.c | 24 ++
>  include/hw/ppc/spapr.h  |  4 +++
>  include/hw/ppc/spapr_cpu_core.h |  2 ++
>  6 files changed, 165 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 5acb612..6c4ac50 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -603,6 +603,18 @@ static void spapr_populate_cpu_dt(CPUState *cs, void 
> *fdt, int offset,
>  size_t page_sizes_prop_size;
>  uint32_t vcpus_per_socket = smp_threads * smp_cores;
>  uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
> +sPAPRMachineClass *smc = SPAPR_MACHINE_GET_CLASS(qdev_get_machine());
> +sPAPRDRConnector *drc;
> +sPAPRDRConnectorClass *drck;
> +int drc_index;
> +
> +if (smc->dr_cpu_enabled) {
> +drc = spapr_dr_connector_by_id(SPAPR_DR_CONNECTOR_TYPE_CPU, index);
> +g_assert(drc);
> +drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
> +drc_index = drck->get_index(drc);
> +_FDT((fdt_setprop_cell(fdt, offset, "ibm,my-drc-index", drc_index)));
> +}
>  
>  /* Note: we keep CI large pages off for now because a 64K capable guest
>   * provisioned with large pages might otherwise try to map a qemu
> @@ -987,6 +999,16 @@ static void spapr_finalize_fdt(sPAPRMachineState *spapr,
>  _FDT(spapr_drc_populate_dt(fdt, 0, NULL, 
> SPAPR_DR_CONNECTOR_TYPE_LMB));
>  }
>  
> +if (smc->dr_cpu_enabled) {
> +int offset = fdt_path_offset(fdt, "/cpus");
> +ret = spapr_drc_populate_dt(fdt, offset, NULL,
> +SPAPR_DR_CONNECTOR_TYPE_CPU);
> +if (ret < 0) {
> +error_report("Couldn't set up CPU DR device tree properties");
> +exit(1);
> +}
> +}
> +
>  _FDT((fdt_pack(fdt)));
>  
>  if (fdt_totalsize(fdt) > FDT_MAX_SIZE) {
> @@ -1181,7 +1203,7 @@ static void ppc_spapr_reset(void)
>  
>  }
>  
> -static void spapr_cpu_reset(void *opaque)
> +void spapr_cpu_reset(void *opaque)
>  {
>  sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>  PowerPCCPU *cpu = opaque;
> @@ -1622,6 +1644,8 @@ static void spapr_boot_set(void *opaque, const char 
> *boot_device,
>  void spapr_cpu_init(sPAPRMachineState *spapr, PowerPCCPU *cpu, Error **errp)
>  {
>  CPUPPCState *env = >env;
> +CPUState *cs = CPU(cpu);
> +int i;
>  
>  /* Set time-base frequency to 512 MHz */
>  cpu_ppc_tb_init(env, TIMEBASE_FREQ);
> @@ -1646,6 +1670,14 @@ void spapr_cpu_init(sPAPRMachineState *spapr, 
> PowerPCCPU *cpu, Error **errp)
>  }
>  }
>  
> +/* Set NUMA node for the added CPUs  */
> +for (i = 0; i < nb_numa_nodes; i++) {
> +if (test_bit(cs->cpu_index, numa_info[i].node_cpu)) {
> +cs->numa_node = i;
> +break;
> +}
> +}
> +

This hunk seems like it belongs in a different patch.

>  xics_cpu_setup(spapr->icp, cpu);
>  qemu_register_reset(spapr_cpu_reset, cpu);
>  }
> @@ -1768,6 +1800,7 @@ static void ppc_spapr_init(MachineState *machine)
>  char *filename;
>  int spapr_cores = smp_cpus / smp_threads;
>  int spapr_max_cores = max_cpus / smp_threads;
> +int smt = kvmppc_smt_threads();
>  
>  if (smp_cpus % smp_threads) {
>  error_report("smp_cpus (%u) must be multiple of threads (%u)",
> @@ -1834,6 +1867,15 @@ static void ppc_spapr_init(MachineState *machine)
>  spapr_validate_node_memory(machine, _fatal);
>  }
>  
> +if (smc->dr_cpu_enabled) {
> +for (i = 0; i < spapr_max_cores; i++) {
> +sPAPRDRConnector *drc =
> +spapr_dr_connector_new(OBJECT(spapr),
> +   SPAPR_DR_CONNECTOR_TYPE_CPU, i * smt);
> +qemu_register_reset(spapr_drc_reset, drc);
> +}
> +}
> +

Nit: would this be cleaner to include in the same loop that constructs
the (empty) links and boot-time cpu cores?

>  /* init CPUs */
>  if (machine->cpu_model == NULL) {
>  machine->cpu_model = kvm_enabled() ? "host" : "POWER7";
> @@ -2267,6 +2309,27 @@ out:
>  error_propagate(errp, local_err);
>  }
>  
> +void *spapr_populate_hotplug_cpu_dt(DeviceState *dev, CPUState *cs,
> +int *fdt_offset, sPAPRMachineState 
> *spapr)
> +{
> +PowerPCCPU *cpu = POWERPC_CPU(cs);
> +DeviceClass *dc = DEVICE_GET_CLASS(cs);
> +int id = ppc_get_vcpu_dt_id(cpu);
> +void *fdt;
> +int offset, 

Re: [Qemu-devel] [RFC PATCH v1 02/10] exec: Do vmstate unregistration from cpu_exec_exit()

2016-03-06 Thread Bharata B Rao
On Mon, Mar 07, 2016 at 01:51:43PM +1100, David Gibson wrote:
> On Fri, Mar 04, 2016 at 12:24:13PM +0530, Bharata B Rao wrote:
> > cpu_exec_init() does vmstate_register and register_savevm for the CPU 
> > device.
> > These need to be undone from cpu_exec_exit(). These changes are needed to
> > support CPU hot removal.
> > 
> > Signed-off-by: Bharata B Rao 
> > ---
> >  exec.c | 12 
> >  1 file changed, 12 insertions(+)
> > 
> > diff --git a/exec.c b/exec.c
> > index 7c3f747..b8eeb54 100644
> > --- a/exec.c
> > +++ b/exec.c
> > @@ -613,6 +613,8 @@ static void cpu_release_index(CPUState *cpu)
> >  
> >  void cpu_exec_exit(CPUState *cpu)
> >  {
> > +CPUClass *cc = CPU_GET_CLASS(cpu);
> > +
> >  #if defined(CONFIG_USER_ONLY)
> >  cpu_list_lock();
> >  #endif
> > @@ -630,6 +632,16 @@ void cpu_exec_exit(CPUState *cpu)
> >  #if defined(CONFIG_USER_ONLY)
> >  cpu_list_unlock();
> >  #endif
> > +
> > +if (cc->vmsd != NULL) {
> > +vmstate_unregister(NULL, cc->vmsd, cpu);
> > +}
> > +#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
> > +unregister_savevm(NULL, "cpu", cpu->env_ptr);
> > +#endif
> > +if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
> > +vmstate_unregister(NULL, _cpu_common, cpu);
> > +}
> >  }
> >  
> >  void cpu_exec_init(CPUState *cpu, Error **errp)
> 
> Looking at the current cpu_exec_init() I'm seeing the
> vmstate_register() calls which are balacned here, but not a
> register_savevm().  Is that due to a change in cpu_exec_init() since
> you first wrote this patch?

Yes register_savevm() was removed by 945123a554c3. Will fix this patch
in the next iteration.

Regards,
Bharata.




[Qemu-devel] [PATCH v2 11/11] vfio: add 'aer' property to expose aercap

2016-03-06 Thread Cao jin
From: Chen Fan 

add 'aer' property to let user able to decide whether expose
the aer capability. by default we should disable aer feature,
because it needs configuration restrictions.

Signed-off-by: Chen Fan 
---
 hw/vfio/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0ac5a70..87903af 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3178,6 +3178,8 @@ static Property vfio_pci_dev_properties[] = {
sub_vendor_id, PCI_ANY_ID),
 DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
sub_device_id, PCI_ANY_ID),
+DEFINE_PROP_BIT("aer", VFIOPCIDevice, features,
+VFIO_FEATURE_ENABLE_AER_BIT, false),
 /*
  * TODO - support passed fds... is this necessary?
  * DEFINE_PROP_STRING("vfiofd", VFIOPCIDevice, vfiofd_name),
-- 
1.9.3






[Qemu-devel] [PATCH v2 02/11] vfio: squeeze out vfio_pci_do_hot_reset for support bus reset

2016-03-06 Thread Cao jin
From: Chen Fan 

squeeze out vfio_pci_do_hot_reset to do host bus reset when AER recovery.

Signed-off-by: Chen Fan 
---
 hw/vfio/pci.c | 75 +++
 1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 9ddaa99..b03c394 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1736,6 +1736,48 @@ error:
 return ret;
 }
 
+static int vfio_pci_do_hot_reset(VFIOPCIDevice *vdev,
+ struct vfio_pci_hot_reset_info *info)
+{
+VFIOGroup *group;
+struct vfio_pci_hot_reset *reset;
+int32_t *fds;
+int ret, i, count;
+struct vfio_pci_dependent_device *devices;
+
+/* Determine how many group fds need to be passed */
+count = 0;
+devices = >devices[0];
+QLIST_FOREACH(group, _group_list, next) {
+for (i = 0; i < info->count; i++) {
+if (group->groupid == devices[i].group_id) {
+count++;
+break;
+}
+}
+}
+
+reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
+reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
+fds = >group_fds[0];
+
+/* Fill in group fds */
+QLIST_FOREACH(group, _group_list, next) {
+for (i = 0; i < info->count; i++) {
+if (group->groupid == devices[i].group_id) {
+fds[reset->count++] = group->fd;
+break;
+}
+}
+}
+
+/* Bus reset! */
+ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
+g_free(reset);
+
+return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
 PCIDevice *pdev = >pdev;
@@ -1877,9 +1919,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool 
single)
 VFIOGroup *group;
 struct vfio_pci_hot_reset_info *info = NULL;
 struct vfio_pci_dependent_device *devices;
-struct vfio_pci_hot_reset *reset;
-int32_t *fds;
-int ret, i, count;
+int ret, i;
 bool multi = false;
 
 trace_vfio_pci_hot_reset(vdev->vbasedev.name, single ? "one" : "multi");
@@ -1958,34 +1998,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool 
single)
 goto out_single;
 }
 
-/* Determine how many group fds need to be passed */
-count = 0;
-QLIST_FOREACH(group, _group_list, next) {
-for (i = 0; i < info->count; i++) {
-if (group->groupid == devices[i].group_id) {
-count++;
-break;
-}
-}
-}
-
-reset = g_malloc0(sizeof(*reset) + (count * sizeof(*fds)));
-reset->argsz = sizeof(*reset) + (count * sizeof(*fds));
-fds = >group_fds[0];
-
-/* Fill in group fds */
-QLIST_FOREACH(group, _group_list, next) {
-for (i = 0; i < info->count; i++) {
-if (group->groupid == devices[i].group_id) {
-fds[reset->count++] = group->fd;
-break;
-}
-}
-}
-
-/* Bus reset! */
-ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_PCI_HOT_RESET, reset);
-g_free(reset);
+ret = vfio_pci_do_hot_reset(vdev, info);
 
 trace_vfio_pci_hot_reset_result(vdev->vbasedev.name,
 ret ? "%m" : "Success");
-- 
1.9.3






[Qemu-devel] [PATCH v2 09/11] vfio: vote a device to do host bus reset

2016-03-06 Thread Cao jin
From: Chen Fan 

Signed-off-by: Chen Fan 
---
 hw/vfio/pci.c | 57 +
 hw/vfio/pci.h |  1 +
 2 files changed, 58 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 24848c9..8e902d2 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1937,6 +1937,8 @@ static void vfio_check_host_bus_reset(VFIOPCIDevice 
*vdev, Error **errp)
 /* List all affected devices by bus reset */
 devices = >devices[0];
 
+vdev->single_depend_dev = (info->count == 1);
+
 /* Verify that we have all the groups required */
 for (i = 0; i < info->count; i++) {
 PCIHostDeviceAddress host;
@@ -2998,6 +3000,49 @@ static void vfio_exitfn(PCIDevice *pdev)
 vfio_unregister_bars(vdev);
 }
 
+static VFIOPCIDevice *vfio_pci_get_do_reset_device(VFIOPCIDevice *vdev)
+{
+struct vfio_pci_hot_reset_info *info = NULL;
+struct vfio_pci_dependent_device *devices;
+VFIOGroup *group;
+VFIODevice *vbasedev_iter;
+int ret;
+
+ret = vfio_get_hot_reset_info(vdev, );
+if (ret) {
+error_report("vfio: Cannot enable AER for device %s,"
+ " device does not support hot reset.",
+ vdev->vbasedev.name);
+return NULL;
+}
+
+devices = >devices[0];
+
+QLIST_FOREACH(group, _group_list, next) {
+if (group->groupid == devices[0].group_id) {
+break;
+}
+}
+
+if (!group) {
+error_report("vfio: Cannot enable AER for device %s, "
+"depends on group %d which is not owned.",
+vdev->vbasedev.name, devices[0].group_id);
+return NULL;
+}
+
+QLIST_FOREACH(vbasedev_iter, >device_list, next) {
+if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI ||
+!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+continue;
+}
+
+return container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+}
+
+return NULL;
+}
+
 static void vfio_pci_reset(DeviceState *dev)
 {
 PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
@@ -3005,6 +3050,18 @@ static void vfio_pci_reset(DeviceState *dev)
 
 trace_vfio_pci_reset(vdev->vbasedev.name);
 
+if (pdev->bus->in_hot_reset) {
+VFIOPCIDevice *tmp;
+
+tmp = vfio_pci_get_do_reset_device(vdev);
+if (tmp) {
+if (tmp == vdev) {
+vfio_pci_hot_reset(vdev, vdev->single_depend_dev);
+}
+return;
+}
+}
+
 vfio_pci_pre_reset(vdev);
 
 if (vdev->resetfn && !vdev->resetfn(vdev)) {
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index aff46c2..32bd31f 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -143,6 +143,7 @@ typedef struct VFIOPCIDevice {
 bool no_kvm_intx;
 bool no_kvm_msi;
 bool no_kvm_msix;
+bool single_depend_dev;
 } VFIOPCIDevice;
 
 uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
-- 
1.9.3






[Qemu-devel] [PATCH v2 08/11] pci: introduce pci bus pre reset

2016-03-06 Thread Cao jin
From: Chen Fan 

in order to distinguish a hot reset with a normal reset. we add this
pre reset call back to notice that we should do a hot reset for all devices.

Signed-off-by: Chen Fan 
---
 hw/core/qdev.c   |  4 ++--
 hw/pci/pci.c | 11 +++
 hw/pci/pci_bridge.c  |  5 -
 include/hw/pci/pci_bus.h |  5 +
 include/hw/qdev-core.h   |  3 +++
 5 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/hw/core/qdev.c b/hw/core/qdev.c
index 779de2b..e36fa07 100644
--- a/hw/core/qdev.c
+++ b/hw/core/qdev.c
@@ -304,14 +304,14 @@ void qdev_unplug(DeviceState *dev, Error **errp)
 }
 }
 
-static int qdev_reset_one(DeviceState *dev, void *opaque)
+int qdev_reset_one(DeviceState *dev, void *opaque)
 {
 device_reset(dev);
 
 return 0;
 }
 
-static int qbus_reset_one(BusState *bus, void *opaque)
+int qbus_reset_one(BusState *bus, void *opaque)
 {
 BusClass *bc = BUS_GET_CLASS(bus);
 if (bc->reset) {
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 72650c5..7c79767 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -257,6 +257,15 @@ void pci_device_reset(PCIDevice *dev)
 pci_do_device_reset(dev);
 }
 
+int pcibus_pre_reset(BusState *qbus, void *opaque)
+{
+PCIBus *pci_bus = DO_UPCAST(PCIBus, qbus, qbus);
+
+pci_bus->in_hot_reset = true;
+
+return 0;
+}
+
 /*
  * Trigger pci bus reset under a given bus.
  * Called via qbus_reset_all on RST# assert, after the devices
@@ -276,6 +285,8 @@ static void pcibus_reset(BusState *qbus)
 for (i = 0; i < bus->nirq; i++) {
 assert(bus->irq_count[i] == 0);
 }
+
+bus->in_hot_reset = false;
 }
 
 static void pci_host_bus_register(PCIBus *bus, DeviceState *parent)
diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c
index 7eab9d5..6c35171 100644
--- a/hw/pci/pci_bridge.c
+++ b/hw/pci/pci_bridge.c
@@ -269,7 +269,10 @@ void pci_bridge_write_config(PCIDevice *d,
 newctl = pci_get_word(d->config + PCI_BRIDGE_CONTROL);
 if (~oldctl & newctl & PCI_BRIDGE_CTL_BUS_RESET) {
 /* Trigger hot reset on 0->1 transition. */
-qbus_reset_all(>sec_bus.qbus);
+qbus_walk_children(>sec_bus.qbus, NULL,
+   pcibus_pre_reset,
+   qdev_reset_one,
+   qbus_reset_one, >sec_bus.qbus);
 }
 }
 
diff --git a/include/hw/pci/pci_bus.h b/include/hw/pci/pci_bus.h
index 403fec6..1a87179 100644
--- a/include/hw/pci/pci_bus.h
+++ b/include/hw/pci/pci_bus.h
@@ -39,8 +39,13 @@ struct PCIBus {
Keep a count of the number of devices with raised IRQs.  */
 int nirq;
 int *irq_count;
+
+/* The bus need to do a hot reset */
+bool in_hot_reset;
 };
 
+int pcibus_pre_reset(BusState *qbus, void *opaque);
+
 typedef struct PCIBridgeWindows PCIBridgeWindows;
 
 /*
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index abcdee8..3d71c17 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -401,4 +401,7 @@ static inline bool qbus_is_hotpluggable(BusState *bus)
 void device_listener_register(DeviceListener *listener);
 void device_listener_unregister(DeviceListener *listener);
 
+int qdev_reset_one(DeviceState *dev, void *opaque);
+int qbus_reset_one(BusState *bus, void *opaque);
+
 #endif
-- 
1.9.3






[Qemu-devel] [PATCH v2 03/11] vfio: add pcie extended capability support

2016-03-06 Thread Cao jin
From: Chen Fan 

For vfio pcie device, we could expose the extended capability on
PCIE bus. due to add a new pcie capability at the tail of the chain,
in order to avoid config space overwritten, we introduce a copy config
for parsing extended caps. and rebuild the pcie extended config space.

Signed-off-by: Chen Fan 
---
 hw/vfio/pci.c | 72 ++-
 1 file changed, 71 insertions(+), 1 deletion(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index b03c394..e64cce3 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1518,6 +1518,21 @@ static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, 
uint8_t pos)
 return next - pos;
 }
 
+
+static uint16_t vfio_ext_cap_max_size(const uint8_t *config, uint16_t pos)
+{
+uint16_t tmp, next = PCIE_CONFIG_SPACE_SIZE;
+
+for (tmp = PCI_CONFIG_SPACE_SIZE; tmp;
+tmp = PCI_EXT_CAP_NEXT(pci_get_long(config + tmp))) {
+if (tmp > pos && tmp < next) {
+next = tmp;
+}
+}
+
+return next - pos;
+}
+
 static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask)
 {
 pci_set_word(buf, (pci_get_word(buf) & ~mask) | val);
@@ -1853,16 +1868,71 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, 
uint8_t pos)
 return 0;
 }
 
+static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
+{
+PCIDevice *pdev = >pdev;
+uint32_t header;
+uint16_t cap_id, next, size;
+uint8_t cap_ver;
+uint8_t *config;
+
+/*
+ * pcie_add_capability always inserts the new capability at the tail
+ * of the chain.  Therefore to end up with a chain that matches the
+ * physical device, we cache the config space to avoid overwriting
+ * the original config space when we parse the extended capabilities.
+ */
+config = g_memdup(pdev->config, vdev->config_size);
+
+for (next = PCI_CONFIG_SPACE_SIZE; next;
+ next = PCI_EXT_CAP_NEXT(pci_get_long(config + next))) {
+header = pci_get_long(config + next);
+cap_id = PCI_EXT_CAP_ID(header);
+cap_ver = PCI_EXT_CAP_VER(header);
+
+/*
+ * If it becomes important to configure extended capabilities to their
+ * actual size, use this as the default when it's something we don't
+ * recognize. Since QEMU doesn't actually handle many of the config
+ * accesses, exact size doesn't seem worthwhile.
+ */
+size = vfio_ext_cap_max_size(config, next);
+
+pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+pci_set_long(dev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+
+/* Use emulated next pointer to allow dropping extended caps */
+pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
+   PCI_EXT_CAP_NEXT_MASK);
+}
+
+g_free(config);
+return 0;
+}
+
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
 {
 PCIDevice *pdev = >pdev;
+int ret;
 
 if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
 !pdev->config[PCI_CAPABILITY_LIST]) {
 return 0; /* Nothing to add */
 }
 
-return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+ret = vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+if (ret) {
+return ret;
+}
+
+/* on PCI bus, it doesn't make sense to expose extended capabilities. */
+if (!pci_is_express(pdev) ||
+!pci_bus_is_express(pdev->bus) ||
+!pci_get_long(pdev->config + PCI_CONFIG_SPACE_SIZE)) {
+return 0;
+}
+
+return vfio_add_ext_cap(vdev);
 }
 
 static void vfio_pci_pre_reset(VFIOPCIDevice *vdev)
-- 
1.9.3






[Qemu-devel] [PATCH v2 06/11] pci: add a is_valid_func callback to check device if complete

2016-03-06 Thread Cao jin
From: Chen Fan 

Signed-off-by: Chen Fan 
---
 hw/pci/pci.c | 39 +++
 include/hw/pci/pci.h |  1 +
 2 files changed, 40 insertions(+)

diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index d940f79..72650c5 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -1836,6 +1836,31 @@ PCIDevice *pci_find_device(PCIBus *bus, int bus_num, 
uint8_t devfn)
 return bus->devices[devfn];
 }
 
+static void pci_bus_check_device(PCIDevice *pdev, Error **errp)
+{
+PCIBus *bus = pdev->bus;
+PCIDeviceClass *pc;
+int i;
+Error *local_err = NULL;
+
+for (i = 0; i < ARRAY_SIZE(bus->devices); ++i) {
+if (!bus->devices[i]) {
+continue;
+}
+
+pc = PCI_DEVICE_GET_CLASS(bus->devices[i]);
+if (!pc->is_valid_func) {
+continue;
+}
+
+pc->is_valid_func(bus->devices[i], _err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+}
+}
+
 static void pci_qdev_realize(DeviceState *qdev, Error **errp)
 {
 PCIDevice *pci_dev = (PCIDevice *)qdev;
@@ -1878,6 +1903,20 @@ static void pci_qdev_realize(DeviceState *qdev, Error 
**errp)
 pci_qdev_unrealize(DEVICE(pci_dev), NULL);
 return;
 }
+
+/*
+ *  If the function is func 0, indicate the closure of the slot.
+ *  then we get the chance to check all functions on same device
+ *  if valid.
+ */
+if (pci_get_function_0(pci_dev) == pci_dev) {
+pci_bus_check_device(pci_dev, _err);
+if (local_err) {
+error_propagate(errp, local_err);
+pci_qdev_unrealize(DEVICE(pci_dev), NULL);
+return;
+}
+}
 }
 
 static void pci_default_realize(PCIDevice *dev, Error **errp)
diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h
index dedf277..4e56256 100644
--- a/include/hw/pci/pci.h
+++ b/include/hw/pci/pci.h
@@ -191,6 +191,7 @@ typedef struct PCIDeviceClass {
 
 void (*realize)(PCIDevice *dev, Error **errp);
 int (*init)(PCIDevice *dev);/* TODO convert to realize() and remove */
+void (*is_valid_func)(PCIDevice *dev, Error **errp);
 PCIUnregisterFunc *exit;
 PCIConfigReadFunc *config_read;
 PCIConfigWriteFunc *config_write;
-- 
1.9.3






[Qemu-devel] [PATCH v2 01/11] vfio: extract vfio_get_hot_reset_info as a single function

2016-03-06 Thread Cao jin
From: Chen Fan 

the function is used to get affected devices by bus reset.
so here extract it, and can used for aer soon.

Signed-off-by: Chen Fan 
---
 hw/vfio/pci.c | 66 +++
 1 file changed, 48 insertions(+), 18 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e671506..9ddaa99 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1691,6 +1691,51 @@ static void vfio_check_af_flr(VFIOPCIDevice *vdev, 
uint8_t pos)
 }
 }
 
+/*
+ * return negative with errno, return 0 on success.
+ * if success, the point of ret_info fill with the affected device reset info.
+ *
+ */
+static int vfio_get_hot_reset_info(VFIOPCIDevice *vdev,
+   struct vfio_pci_hot_reset_info **ret_info)
+{
+struct vfio_pci_hot_reset_info *info;
+int ret, count;
+
+*ret_info = NULL;
+
+info = g_malloc0(sizeof(*info));
+info->argsz = sizeof(*info);
+
+ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+if (ret && errno != ENOSPC) {
+ret = -errno;
+goto error;
+}
+
+count = info->count;
+
+info = g_realloc(info, sizeof(*info) +
+ (count * sizeof(struct vfio_pci_dependent_device)));
+info->argsz = sizeof(*info) +
+  (count * sizeof(struct vfio_pci_dependent_device));
+
+ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
+if (ret) {
+ret = -errno;
+error_report("vfio: hot reset info failed: %m");
+goto error;
+}
+
+*ret_info = info;
+info = NULL;
+
+return 0;
+error:
+g_free(info);
+return ret;
+}
+
 static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t pos)
 {
 PCIDevice *pdev = >pdev;
@@ -1830,7 +1875,7 @@ static bool vfio_pci_host_match(PCIHostDeviceAddress 
*host1,
 static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool single)
 {
 VFIOGroup *group;
-struct vfio_pci_hot_reset_info *info;
+struct vfio_pci_hot_reset_info *info = NULL;
 struct vfio_pci_dependent_device *devices;
 struct vfio_pci_hot_reset *reset;
 int32_t *fds;
@@ -1842,12 +1887,8 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool 
single)
 vfio_pci_pre_reset(vdev);
 vdev->vbasedev.needs_reset = false;
 
-info = g_malloc0(sizeof(*info));
-info->argsz = sizeof(*info);
-
-ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-if (ret && errno != ENOSPC) {
-ret = -errno;
+ret = vfio_get_hot_reset_info(vdev, );
+if (ret) {
 if (!vdev->has_pm_reset) {
 error_report("vfio: Cannot reset device %04x:%02x:%02x.%x, "
  "no available reset mechanism.", vdev->host.domain,
@@ -1856,18 +1897,7 @@ static int vfio_pci_hot_reset(VFIOPCIDevice *vdev, bool 
single)
 goto out_single;
 }
 
-count = info->count;
-info = g_realloc(info, sizeof(*info) + (count * sizeof(*devices)));
-info->argsz = sizeof(*info) + (count * sizeof(*devices));
 devices = >devices[0];
-
-ret = ioctl(vdev->vbasedev.fd, VFIO_DEVICE_GET_PCI_HOT_RESET_INFO, info);
-if (ret) {
-ret = -errno;
-error_report("vfio: hot reset info failed: %m");
-goto out_single;
-}
-
 trace_vfio_pci_hot_reset_has_dep_devices(vdev->vbasedev.name);
 
 /* Verify that we have all the groups required */
-- 
1.9.3






[Qemu-devel] [PATCH v2 07/11] vfio: add check aer functionality for hotplug device

2016-03-06 Thread Cao jin
From: Chen Fan 

Signed-off-by: Chen Fan 
---
 hw/vfio/pci.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 0898e34..24848c9 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3034,6 +3034,20 @@ post_reset:
 vfio_pci_post_reset(vdev);
 }
 
+static void vfio_pci_is_valid(PCIDevice *dev, Error **errp)
+{
+VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, dev);
+Error *local_err = NULL;
+
+if (DEVICE(dev)->hotplugged &&
+(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+vfio_check_host_bus_reset(vdev, _err);
+if (local_err) {
+error_propagate(errp, local_err);
+}
+}
+}
+
 static void vfio_instance_init(Object *obj)
 {
 PCIDevice *pci_dev = PCI_DEVICE(obj);
@@ -3087,6 +3101,7 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, 
void *data)
 set_bit(DEVICE_CATEGORY_MISC, dc->categories);
 pdc->init = vfio_initfn;
 pdc->exit = vfio_exitfn;
+pdc->is_valid_func = vfio_pci_is_valid;
 pdc->config_read = vfio_pci_read_config;
 pdc->config_write = vfio_pci_write_config;
 pdc->is_express = 1; /* We might be */
-- 
1.9.3






[Qemu-devel] [PATCH v2 05/11] vfio: add check host bus reset is support or not

2016-03-06 Thread Cao jin
From: Chen Fan 

when boot up a VM that assigning vfio devices with aer enabled, we
must check the vfio device whether support host bus reset. because
when one error occur. OS driver always recover the device by do a
bus reset, in order to recover the vfio device, qemu must to do a
host bus reset to reset the device to default status. and for all
affected devices by the bus reset. we must check them whether all
are assigned to the VM.

Signed-off-by: Chen Fan 
---
 hw/vfio/pci.c | 218 --
 hw/vfio/pci.h |   1 +
 2 files changed, 212 insertions(+), 7 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8ec9b25..0898e34 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1868,6 +1868,197 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, 
uint8_t pos)
 return 0;
 }
 
+static bool vfio_pci_host_slot_match(PCIHostDeviceAddress *host1,
+ PCIHostDeviceAddress *host2)
+{
+return (host1->domain == host2->domain && host1->bus == host2->bus &&
+host1->slot == host2->slot);
+}
+
+static bool vfio_pci_host_match(PCIHostDeviceAddress *host1,
+PCIHostDeviceAddress *host2)
+{
+return (vfio_pci_host_slot_match(host1, host2) &&
+host1->function == host2->function);
+}
+
+struct VFIODeviceFind {
+PCIDevice *pdev;
+bool found;
+};
+
+static void vfio_check_device_noreset(PCIBus *bus, PCIDevice *pdev,
+  void *opaque)
+{
+DeviceState *dev = DEVICE(pdev);
+DeviceClass *dc = DEVICE_GET_CLASS(dev);
+VFIOPCIDevice *vdev;
+struct VFIODeviceFind *find = opaque;
+
+if (find->found) {
+return;
+}
+
+if (!object_dynamic_cast(OBJECT(dev), "vfio-pci")) {
+if (!dc->reset) {
+goto found;
+}
+return;
+}
+vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+if (!(vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+!vdev->vbasedev.reset_works) {
+goto found;
+}
+
+return;
+found:
+find->pdev = pdev;
+find->found = true;
+}
+
+static void vfio_check_host_bus_reset(VFIOPCIDevice *vdev, Error **errp)
+{
+PCIBus *bus = vdev->pdev.bus;
+struct vfio_pci_hot_reset_info *info = NULL;
+struct vfio_pci_dependent_device *devices;
+VFIOGroup *group;
+struct VFIODeviceFind find;
+int ret, i;
+
+ret = vfio_get_hot_reset_info(vdev, );
+if (ret) {
+error_setg(errp, "vfio: Cannot enable AER for device %s,"
+   " device does not support hot reset.",
+   vdev->vbasedev.name);
+return;
+}
+
+/* List all affected devices by bus reset */
+devices = >devices[0];
+
+/* Verify that we have all the groups required */
+for (i = 0; i < info->count; i++) {
+PCIHostDeviceAddress host;
+VFIOPCIDevice *tmp;
+VFIODevice *vbasedev_iter;
+bool found = false;
+
+host.domain = devices[i].segment;
+host.bus = devices[i].bus;
+host.slot = PCI_SLOT(devices[i].devfn);
+host.function = PCI_FUNC(devices[i].devfn);
+
+/* Skip the current device */
+if (vfio_pci_host_match(, >host)) {
+continue;
+}
+
+/* Ensure we own the group of the affected device */
+QLIST_FOREACH(group, _group_list, next) {
+if (group->groupid == devices[i].group_id) {
+break;
+}
+}
+
+if (!group) {
+error_setg(errp, "vfio: Cannot enable AER for device %s, "
+   "depends on group %d which is not owned.",
+   vdev->vbasedev.name, devices[i].group_id);
+goto out;
+}
+
+/* Ensure affected devices for reset on the same slot */
+QLIST_FOREACH(vbasedev_iter, >device_list, next) {
+if (vbasedev_iter->type != VFIO_DEVICE_TYPE_PCI) {
+continue;
+}
+tmp = container_of(vbasedev_iter, VFIOPCIDevice, vbasedev);
+if (vfio_pci_host_match(, >host)) {
+/*
+ * AER errors may be broadcast to all functions of a multi-
+ * function endpoint.  If any of those sibling functions are
+ * also assigned, they need to have AER enabled or else an
+ * error may continue to cause a vm_stop condition.  IOW,
+ * AER setup of this function would be pointless.
+ */
+if (vfio_pci_host_slot_match(>host, >host) &&
+!(tmp->features & VFIO_FEATURE_ENABLE_AER)) {
+error_setg(errp, "vfio: Cannot enable AER for device %s, 
on same slot"
+   " the dependent device %s which does not enable 
AER.",
+   vdev->vbasedev.name, tmp->vbasedev.name);
+  

[Qemu-devel] [PATCH v2 10/11] vfio-pci: pass the aer error to guest

2016-03-06 Thread Cao jin
From: Chen Fan 

when the vfio device encounters an uncorrectable error in host,
the vfio_pci driver will signal the eventfd registered by this
vfio device, resulting in the qemu eventfd handler getting
invoked.

this patch is to pass the error to guest and let the guest driver
recover from the error.

Signed-off-by: Chen Fan 
---
 hw/vfio/pci.c | 57 +++--
 1 file changed, 51 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 8e902d2..0ac5a70 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2552,18 +2552,63 @@ static void vfio_put_device(VFIOPCIDevice *vdev)
 static void vfio_err_notifier_handler(void *opaque)
 {
 VFIOPCIDevice *vdev = opaque;
+PCIDevice *dev = >pdev;
+Error *local_err = NULL;
+PCIEAERMsg msg = {
+.severity = 0,
+.source_id = (pci_bus_num(dev->bus) << 8) | dev->devfn,
+};
 
 if (!event_notifier_test_and_clear(>err_notifier)) {
 return;
 }
 
 /*
- * TBD. Retrieve the error details and decide what action
- * needs to be taken. One of the actions could be to pass
- * the error to the guest and have the guest driver recover
- * from the error. This requires that PCIe capabilities be
- * exposed to the guest. For now, we just terminate the
- * guest to contain the error.
+ * in case the real hardware configuration has been changed,
+ * here we should recheck the bus reset capability.
+ */
+if (vdev->features & VFIO_FEATURE_ENABLE_AER) {
+vfio_check_host_bus_reset(vdev, _err);
+if (local_err) {
+error_report_err(local_err);
+}
+goto stop;
+}
+/*
+ * we should read the error details from the real hardware
+ * configuration spaces, here we only need to do is signaling
+ * to guest an uncorrectable error has occurred.
+ */
+if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+dev->exp.aer_cap) {
+uint8_t *aer_cap = dev->config + dev->exp.aer_cap;
+uint32_t uncor_status;
+bool isfatal;
+
+uncor_status = vfio_pci_read_config(dev,
+   dev->exp.aer_cap + PCI_ERR_UNCOR_STATUS, 4);
+
+/*
+ * if the error is not emitted by this device, we can
+ * just ignore it.
+ */
+if (!(uncor_status & ~0UL)) {
+return;
+}
+
+isfatal = uncor_status & pci_get_long(aer_cap + PCI_ERR_UNCOR_SEVER);
+
+msg.severity = isfatal ? PCI_ERR_ROOT_CMD_FATAL_EN :
+ PCI_ERR_ROOT_CMD_NONFATAL_EN;
+
+pcie_aer_msg(dev, );
+return;
+}
+
+stop:
+/*
+ * If the aer capability is not exposed to the guest. we just
+ * terminate the guest to contain the error.
  */
 
 error_report("%s(%04x:%02x:%02x.%x) Unrecoverable error detected.  "
-- 
1.9.3






[Qemu-devel] [PATCH v2 04/11] vfio: add aer support for vfio device

2016-03-06 Thread Cao jin
From: Chen Fan 

Calling pcie_aer_init to initilize aer related registers for
vfio device, then reload physical related registers to expose
device capability.

Signed-off-by: Chen Fan 
---
 hw/vfio/pci.c | 81 ---
 hw/vfio/pci.h |  3 +++
 2 files changed, 81 insertions(+), 3 deletions(-)

diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index e64cce3..8ec9b25 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -1868,6 +1868,62 @@ static int vfio_add_std_cap(VFIOPCIDevice *vdev, uint8_t 
pos)
 return 0;
 }
 
+static int vfio_setup_aer(VFIOPCIDevice *vdev, uint8_t cap_ver,
+  int pos, uint16_t size)
+{
+PCIDevice *pdev = >pdev;
+PCIDevice *dev_iter;
+uint8_t type;
+uint32_t errcap;
+
+if (!(vdev->features & VFIO_FEATURE_ENABLE_AER)) {
+pcie_add_capability(pdev, PCI_EXT_CAP_ID_ERR,
+cap_ver, pos, size);
+return 0;
+}
+
+dev_iter = pci_bridge_get_device(pdev->bus);
+if (!dev_iter) {
+goto error;
+}
+
+while (dev_iter) {
+type = pcie_cap_get_type(dev_iter);
+if ((type != PCI_EXP_TYPE_ROOT_PORT &&
+ type != PCI_EXP_TYPE_UPSTREAM &&
+ type != PCI_EXP_TYPE_DOWNSTREAM)) {
+goto error;
+}
+
+if (!dev_iter->exp.aer_cap) {
+goto error;
+}
+
+dev_iter = pci_bridge_get_device(dev_iter->bus);
+}
+
+errcap = vfio_pci_read_config(pdev, pos + PCI_ERR_CAP, 4);
+/*
+ * The ability to record multiple headers is depending on
+ * the state of the Multiple Header Recording Capable bit and
+ * enabled by the Multiple Header Recording Enable bit.
+ */
+if ((errcap & PCI_ERR_CAP_MHRC) &&
+(errcap & PCI_ERR_CAP_MHRE)) {
+pdev->exp.aer_log.log_max = PCIE_AER_LOG_MAX_DEFAULT;
+} else {
+pdev->exp.aer_log.log_max = 0;
+}
+
+pcie_cap_deverr_init(pdev);
+return pcie_aer_init(pdev, pos, size);
+
+error:
+error_report("vfio: Unable to enable AER for device %s, parent bus "
+ "does not support AER signaling", vdev->vbasedev.name);
+return -1;
+}
+
 static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
 {
 PCIDevice *pdev = >pdev;
@@ -1875,6 +1931,7 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
 uint16_t cap_id, next, size;
 uint8_t cap_ver;
 uint8_t *config;
+int ret = 0;
 
 /*
  * pcie_add_capability always inserts the new capability at the tail
@@ -1898,16 +1955,29 @@ static int vfio_add_ext_cap(VFIOPCIDevice *vdev)
  */
 size = vfio_ext_cap_max_size(config, next);
 
-pcie_add_capability(pdev, cap_id, cap_ver, next, size);
-pci_set_long(dev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
+switch (cap_id) {
+case PCI_EXT_CAP_ID_ERR:
+ret = vfio_setup_aer(vdev, cap_ver, next, size);
+break;
+default:
+pcie_add_capability(pdev, cap_id, cap_ver, next, size);
+break;
+}
+
+if (ret) {
+goto out;
+}
+
+pci_set_long(pdev->config + next, PCI_EXT_CAP(cap_id, cap_ver, 0));
 
 /* Use emulated next pointer to allow dropping extended caps */
 pci_long_test_and_set_mask(vdev->emulated_config_bits + next,
PCI_EXT_CAP_NEXT_MASK);
 }
 
+out:
 g_free(config);
-return 0;
+return ret;
 }
 
 static int vfio_add_capabilities(VFIOPCIDevice *vdev)
@@ -2662,6 +2732,11 @@ static int vfio_initfn(PCIDevice *pdev)
 goto out_teardown;
 }
 
+if ((vdev->features & VFIO_FEATURE_ENABLE_AER) &&
+!pdev->exp.aer_cap) {
+goto out_teardown;
+}
+
 /* QEMU emulates all of MSI & MSIX */
 if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
 memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 6256587..e0c53f2 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -15,6 +15,7 @@
 #include "qemu-common.h"
 #include "exec/memory.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/pci_bridge.h"
 #include "hw/vfio/vfio-common.h"
 #include "qemu/event_notifier.h"
 #include "qemu/queue.h"
@@ -128,6 +129,8 @@ typedef struct VFIOPCIDevice {
 #define VFIO_FEATURE_ENABLE_VGA (1 << VFIO_FEATURE_ENABLE_VGA_BIT)
 #define VFIO_FEATURE_ENABLE_REQ_BIT 1
 #define VFIO_FEATURE_ENABLE_REQ (1 << VFIO_FEATURE_ENABLE_REQ_BIT)
+#define VFIO_FEATURE_ENABLE_AER_BIT 2
+#define VFIO_FEATURE_ENABLE_AER (1 << VFIO_FEATURE_ENABLE_AER_BIT)
 int32_t bootindex;
 uint8_t pm_cap;
 bool has_vga;
-- 
1.9.3






[Qemu-devel] [PATCH v2 Resend 00/11] vfio-pci: pass the aer error to guest, part2

2016-03-06 Thread Cao jin
From: Chen Fan 

v1-v2:
   1. limit all devices on same bus in guest are on same bus in host in patch 
5/11.
   2. patch 05/11 ~ 09/11 has been changed.

Chen Fan (11):
  vfio: extract vfio_get_hot_reset_info as a single function
  vfio: squeeze out vfio_pci_do_hot_reset for support bus reset
  vfio: add pcie extended capability support
  vfio: add aer support for vfio device
  vfio: add check host bus reset is support or not
  pci: add a is_valid_func callback to check device if complete
  vfio: add check aer functionality for hotplug device
  pci: introduce pci bus pre reset
  vfio: vote a device to do host bus reset
  vfio-pci: pass the aer error to guest
  vfio: add 'aer' property to expose aercap

 hw/core/qdev.c   |   4 +-
 hw/pci/pci.c |  50 
 hw/pci/pci_bridge.c  |   5 +-
 hw/vfio/pci.c| 638 ++-
 hw/vfio/pci.h|   5 +
 include/hw/pci/pci.h |   1 +
 include/hw/pci/pci_bus.h |   5 +
 include/hw/qdev-core.h   |   3 +
 8 files changed, 645 insertions(+), 66 deletions(-)

-- 
1.9.3






[Qemu-devel] [PULL 08/14] rocker: forbid to change world type

2016-03-06 Thread Jason Wang
From: Jiri Pirko 

Port to world assignment should be permitted only by qemu user. Driver
should not be able to do it, so forbid that possibility.

Signed-off-by: Jiri Pirko 
Signed-off-by: Jason Wang 
---
 hw/net/rocker/rocker.c| 8 +++-
 hw/net/rocker/rocker_fp.c | 5 +
 hw/net/rocker/rocker_fp.h | 1 +
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index f3e994d..a1d921d 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -400,7 +400,13 @@ static int cmd_set_port_settings(Rocker *r,
 
 if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]) {
 mode = rocker_tlv_get_u8(tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_MODE]);
-fp_port_set_world(fp_port, r->worlds[mode]);
+if (mode >= ROCKER_WORLD_TYPE_MAX) {
+return -ROCKER_EINVAL;
+}
+/* We don't support world change. */
+if (!fp_port_check_world(fp_port, r->worlds[mode])) {
+return -ROCKER_EINVAL;
+}
 }
 
 if (tlvs[ROCKER_TLV_CMD_PORT_SETTINGS_LEARNING]) {
diff --git a/hw/net/rocker/rocker_fp.c b/hw/net/rocker/rocker_fp.c
index af37fef..0149899 100644
--- a/hw/net/rocker/rocker_fp.c
+++ b/hw/net/rocker/rocker_fp.c
@@ -186,6 +186,11 @@ void fp_port_set_world(FpPort *port, World *world)
 port->world = world;
 }
 
+bool fp_port_check_world(FpPort *port, World *world)
+{
+return port->world == world;
+}
+
 bool fp_port_enabled(FpPort *port)
 {
 return port->enabled;
diff --git a/hw/net/rocker/rocker_fp.h b/hw/net/rocker/rocker_fp.h
index ab80fd8..04592bb 100644
--- a/hw/net/rocker/rocker_fp.h
+++ b/hw/net/rocker/rocker_fp.h
@@ -40,6 +40,7 @@ int fp_port_set_settings(FpPort *port, uint32_t speed,
 bool fp_port_from_pport(uint32_t pport, uint32_t *port);
 World *fp_port_get_world(FpPort *port);
 void fp_port_set_world(FpPort *port, World *world);
+bool fp_port_check_world(FpPort *port, World *world);
 bool fp_port_enabled(FpPort *port);
 void fp_port_enable(FpPort *port);
 void fp_port_disable(FpPort *port);
-- 
2.5.0




[Qemu-devel] [PULL 09/14] rocker: return -ENOMEM in case of some world alloc fails

2016-03-06 Thread Jason Wang
From: Jiri Pirko 

Until now, 0 is returned in this error case. Fix it ro return -ENOMEM.

Signed-off-by: Jiri Pirko 
Signed-off-by: Jason Wang 
---
 hw/net/rocker/rocker.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index a1d921d..104c097 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -1301,6 +1301,7 @@ static int pci_rocker_init(PCIDevice *dev)
 
 for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
 if (!r->worlds[i]) {
+err = -ENOMEM;
 goto err_world_alloc;
 }
 }
-- 
2.5.0




[Qemu-devel] [PULL 10/14] rocker: add name field into WorldOps ale let world specify its name

2016-03-06 Thread Jason Wang
From: Jiri Pirko 

Also use this in world_name getter function.

Signed-off-by: Jiri Pirko 
Signed-off-by: Jason Wang 
---
 hw/net/rocker/rocker_of_dpa.c | 1 +
 hw/net/rocker/rocker_world.c  | 7 +--
 hw/net/rocker/rocker_world.h  | 1 +
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/hw/net/rocker/rocker_of_dpa.c b/hw/net/rocker/rocker_of_dpa.c
index da3fc54..0a134eb 100644
--- a/hw/net/rocker/rocker_of_dpa.c
+++ b/hw/net/rocker/rocker_of_dpa.c
@@ -2614,6 +2614,7 @@ RockerOfDpaGroupList 
*qmp_query_rocker_of_dpa_groups(const char *name,
 }
 
 static WorldOps of_dpa_ops = {
+.name = "ofdpa",
 .init = of_dpa_init,
 .uninit = of_dpa_uninit,
 .ig = of_dpa_ig,
diff --git a/hw/net/rocker/rocker_world.c b/hw/net/rocker/rocker_world.c
index 1ed0fcd..89777e9 100644
--- a/hw/net/rocker/rocker_world.c
+++ b/hw/net/rocker/rocker_world.c
@@ -98,10 +98,5 @@ enum rocker_world_type world_type(World *world)
 
 const char *world_name(World *world)
 {
-switch (world->type) {
-case ROCKER_WORLD_TYPE_OF_DPA:
-return "OF_DPA";
-default:
-return "unknown";
-}
+return world->ops->name;
 }
diff --git a/hw/net/rocker/rocker_world.h b/hw/net/rocker/rocker_world.h
index 18d277b..58ade47 100644
--- a/hw/net/rocker/rocker_world.h
+++ b/hw/net/rocker/rocker_world.h
@@ -33,6 +33,7 @@ typedef int (world_cmd)(World *world, DescInfo *info,
 RockerTlv *cmd_info_tlv);
 
 typedef struct world_ops {
+const char *name;
 world_init *init;
 world_uninit *uninit;
 world_ig *ig;
-- 
2.5.0




[Qemu-devel] [PULL 06/14] tests/test-filter-mirror:add filter-mirror unit test

2016-03-06 Thread Jason Wang
From: Zhang Chen 

In this unit test we will test the mirror function.

start qemu with:
 -netdev socket,id=qtest-bn0,fd=sockfd
 -device e1000,netdev=qtest-bn0,id=qtest-e0
 -chardev socket,id=mirror0,path=/tmp/filter-mirror-test.sock,server,nowait
 -object filter-mirror,id=qtest-f0,netdev=qtest-bn0,queue=tx,outdev=mirror0

We inject packet through socket netdev(qtest-bn0),
filter-mirror(qtest-f0) will mirror the packet to chardev mirror0.
Then we try read packet from mirror0 and then compare it with what
we've injected.

Signed-off-by: Zhang Chen 
Signed-off-by: Wen Congyang 
Signed-off-by: Jason Wang 
---
 tests/.gitignore   |  1 +
 tests/Makefile |  2 ++
 tests/test-filter-mirror.c | 90 ++
 3 files changed, 93 insertions(+)
 create mode 100644 tests/test-filter-mirror.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 787c95c..10df017 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -63,5 +63,6 @@ test-write-threshold
 test-x86-cpuid
 test-xbzrle
 test-netfilter
+test-filter-mirror
 *-test
 qapi-schema/*.test.*
diff --git a/tests/Makefile b/tests/Makefile
index cd4bbd4..5a8f590 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -213,6 +213,7 @@ ifeq ($(CONFIG_VHOST_NET_TEST_i386),)
 check-qtest-x86_64-$(CONFIG_VHOST_NET_TEST_x86_64) += 
tests/vhost-user-test$(EXESUF)
 endif
 check-qtest-i386-y += tests/test-netfilter$(EXESUF)
+check-qtest-i386-y += tests/test-filter-mirror$(EXESUF)
 check-qtest-x86_64-y = $(check-qtest-i386-y)
 gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c
 gcov-files-x86_64-y = $(subst 
i386-softmmu/,x86_64-softmmu/,$(gcov-files-i386-y))
@@ -566,6 +567,7 @@ tests/qemu-iotests/socket_scm_helper$(EXESUF): 
tests/qemu-iotests/socket_scm_hel
 tests/test-qemu-opts$(EXESUF): tests/test-qemu-opts.o $(test-util-obj-y)
 tests/test-write-threshold$(EXESUF): tests/test-write-threshold.o 
$(test-block-obj-y)
 tests/test-netfilter$(EXESUF): tests/test-netfilter.o $(qtest-obj-y)
+tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y)
 tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o 
contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
 tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o
 
diff --git a/tests/test-filter-mirror.c b/tests/test-filter-mirror.c
new file mode 100644
index 000..a68dc62
--- /dev/null
+++ b/tests/test-filter-mirror.c
@@ -0,0 +1,90 @@
+/*
+ * QTest testcase for filter-mirror
+ *
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Author: Zhang Chen 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include 
+#include "libqtest.h"
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+
+static void test_mirror(void)
+{
+#ifndef _WIN32
+/* socketpair(PF_UNIX) which does not exist on windows */
+
+int send_sock[2], recv_sock;
+char *cmdline;
+uint32_t ret = 0, len = 0;
+char send_buf[] = "Hello! filter-mirror~";
+char sock_path[] = "filter-mirror.XX";
+char *recv_buf;
+uint32_t size = sizeof(send_buf);
+size = htonl(size);
+
+ret = socketpair(PF_UNIX, SOCK_STREAM, 0, send_sock);
+g_assert_cmpint(ret, !=, -1);
+
+ret = mkstemp(sock_path);
+g_assert_cmpint(ret, !=, -1);
+
+cmdline = g_strdup_printf("-netdev socket,id=qtest-bn0,fd=%d "
+ "-device e1000,netdev=qtest-bn0,id=qtest-e0 "
+ "-chardev socket,id=mirror0,path=%s,server,nowait "
+ "-object 
filter-mirror,id=qtest-f0,netdev=qtest-bn0,queue=tx,outdev=mirror0 "
+ , send_sock[1], sock_path);
+qtest_start(cmdline);
+g_free(cmdline);
+
+recv_sock = unix_connect(sock_path, NULL);
+g_assert_cmpint(recv_sock, !=, -1);
+
+struct iovec iov[] = {
+{
+.iov_base = ,
+.iov_len = sizeof(size),
+}, {
+.iov_base = send_buf,
+.iov_len = sizeof(send_buf),
+},
+};
+ret = iov_send(send_sock[0], iov, 2, 0, sizeof(size) + sizeof(send_buf));
+g_assert_cmpint(ret, ==, sizeof(send_buf) + sizeof(size));
+close(send_sock[0]);
+
+ret = qemu_recv(recv_sock, , sizeof(len), 0);
+g_assert_cmpint(ret, ==, sizeof(len));
+len = ntohl(len);
+
+g_assert_cmpint(len, ==, sizeof(send_buf));
+recv_buf = g_malloc(len);
+ret = qemu_recv(recv_sock, recv_buf, len, 0);
+g_assert_cmpstr(recv_buf, ==, send_buf);
+
+g_free(recv_buf);
+close(recv_sock);
+unlink(sock_path);
+
+#endif
+}
+
+int main(int argc, char **argv)
+{
+int ret;
+
+g_test_init(, , NULL);
+
+qtest_add_func("/netfilter/mirror", test_mirror);
+ret = g_test_run();
+qtest_end();
+
+   

[Qemu-devel] [PULL 07/14] net: netmap: probe netmap interface for virtio-net header

2016-03-06 Thread Jason Wang
From: Vincenzo Maffione 

Previous implementation of has_ufo, has_vnet_hdr, has_vnet_hdr_len, etc.
did not really probe for virtio-net header support for the netmap
interface attached to the backend. These callbacks were correct for
VALE ports, but incorrect for hardware NICs, pipes, monitors, etc.

This patch fixes the implementation to work properly with all kinds
of netmap ports.

Signed-off-by: Vincenzo Maffione 
Signed-off-by: Jason Wang 
---
 net/netmap.c | 59 ++-
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/net/netmap.c b/net/netmap.c
index 9710321..1b42728 100644
--- a/net/netmap.c
+++ b/net/netmap.c
@@ -323,20 +323,47 @@ static void netmap_cleanup(NetClientState *nc)
 }
 
 /* Offloading manipulation support callbacks. */
-static bool netmap_has_ufo(NetClientState *nc)
+static int netmap_fd_set_vnet_hdr_len(NetmapState *s, int len)
 {
-return true;
+struct nmreq req;
+
+/* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header
+ * length for the netmap adapter associated to 's->ifname'.
+ */
+memset(, 0, sizeof(req));
+pstrcpy(req.nr_name, sizeof(req.nr_name), s->ifname);
+req.nr_version = NETMAP_API;
+req.nr_cmd = NETMAP_BDG_VNET_HDR;
+req.nr_arg1 = len;
+
+return ioctl(s->nmd->fd, NIOCREGIF, );
 }
 
-static bool netmap_has_vnet_hdr(NetClientState *nc)
+static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len)
 {
+NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
+int prev_len = s->vnet_hdr_len;
+
+/* Check that we can set the new length. */
+if (netmap_fd_set_vnet_hdr_len(s, len)) {
+return false;
+}
+
+/* Restore the previous length. */
+if (netmap_fd_set_vnet_hdr_len(s, prev_len)) {
+error_report("Failed to restore vnet-hdr length %d on %s: %s",
+ prev_len, s->ifname, strerror(errno));
+abort();
+}
+
 return true;
 }
 
-static bool netmap_has_vnet_hdr_len(NetClientState *nc, int len)
+/* A netmap interface that supports virtio-net headers always
+ * supports UFO, so we use this callback also for the has_ufo hook. */
+static bool netmap_has_vnet_hdr(NetClientState *nc)
 {
-return len == 0 || len == sizeof(struct virtio_net_hdr) ||
-len == sizeof(struct virtio_net_hdr_mrg_rxbuf);
+return netmap_has_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr));
 }
 
 static void netmap_using_vnet_hdr(NetClientState *nc, bool enable)
@@ -347,20 +374,11 @@ static void netmap_set_vnet_hdr_len(NetClientState *nc, 
int len)
 {
 NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 int err;
-struct nmreq req;
 
-/* Issue a NETMAP_BDG_VNET_HDR command to change the virtio-net header
- * length for the netmap adapter associated to 's->ifname'.
- */
-memset(, 0, sizeof(req));
-pstrcpy(req.nr_name, sizeof(req.nr_name), s->ifname);
-req.nr_version = NETMAP_API;
-req.nr_cmd = NETMAP_BDG_VNET_HDR;
-req.nr_arg1 = len;
-err = ioctl(s->nmd->fd, NIOCREGIF, );
+err = netmap_fd_set_vnet_hdr_len(s, len);
 if (err) {
-error_report("Unable to execute NETMAP_BDG_VNET_HDR on %s: %s",
- s->ifname, strerror(errno));
+error_report("Unable to set vnet-hdr length %d on %s: %s",
+ len, s->ifname, strerror(errno));
 } else {
 /* Keep track of the current length. */
 s->vnet_hdr_len = len;
@@ -373,8 +391,7 @@ static void netmap_set_offload(NetClientState *nc, int 
csum, int tso4, int tso6,
 NetmapState *s = DO_UPCAST(NetmapState, nc, nc);
 
 /* Setting a virtio-net header length greater than zero automatically
- * enables the offloadings.
- */
+ * enables the offloadings. */
 if (!s->vnet_hdr_len) {
 netmap_set_vnet_hdr_len(nc, sizeof(struct virtio_net_hdr));
 }
@@ -388,7 +405,7 @@ static NetClientInfo net_netmap_info = {
 .receive_iov = netmap_receive_iov,
 .poll = netmap_poll,
 .cleanup = netmap_cleanup,
-.has_ufo = netmap_has_ufo,
+.has_ufo = netmap_has_vnet_hdr,
 .has_vnet_hdr = netmap_has_vnet_hdr,
 .has_vnet_hdr_len = netmap_has_vnet_hdr_len,
 .using_vnet_hdr = netmap_using_vnet_hdr,
-- 
2.5.0




[Qemu-devel] [PULL 05/14] net/filter-mirror:Add filter-mirror

2016-03-06 Thread Jason Wang
From: Zhang Chen 

Filter-mirror is a netfilter plugin.
It gives qemu the ability to mirror
packets to a chardev.

usage:

-netdev tap,id=hn0
-chardev socket,id=mirror0,host=ip_primary,port=X,server,nowait
-filter-mirror,id=m0,netdev=hn0,queue=tx/rx/all,outdev=mirror0

Signed-off-by: Zhang Chen 
Signed-off-by: Wen Congyang 
Reviewed-by: Yang Hongyang 
Reviewed-by: zhanghailiang 
Signed-off-by: Jason Wang 
---
 net/Makefile.objs   |   1 +
 net/filter-mirror.c | 182 
 qemu-options.hx |   5 ++
 vl.c|   3 +-
 4 files changed, 190 insertions(+), 1 deletion(-)
 create mode 100644 net/filter-mirror.c

diff --git a/net/Makefile.objs b/net/Makefile.objs
index 5fa2f97..b7c22fd 100644
--- a/net/Makefile.objs
+++ b/net/Makefile.objs
@@ -15,3 +15,4 @@ common-obj-$(CONFIG_VDE) += vde.o
 common-obj-$(CONFIG_NETMAP) += netmap.o
 common-obj-y += filter.o
 common-obj-y += filter-buffer.o
+common-obj-y += filter-mirror.o
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
new file mode 100644
index 000..625b940
--- /dev/null
+++ b/net/filter-mirror.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2016 HUAWEI TECHNOLOGIES CO., LTD.
+ * Copyright (c) 2016 FUJITSU LIMITED
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Author: Zhang Chen 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "net/filter.h"
+#include "net/net.h"
+#include "qemu-common.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi-visit.h"
+#include "qom/object.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "sysemu/char.h"
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+
+#define FILTER_MIRROR(obj) \
+OBJECT_CHECK(MirrorState, (obj), TYPE_FILTER_MIRROR)
+
+#define TYPE_FILTER_MIRROR "filter-mirror"
+
+typedef struct MirrorState {
+NetFilterState parent_obj;
+char *outdev;
+CharDriverState *chr_out;
+} MirrorState;
+
+static int filter_mirror_send(NetFilterState *nf,
+   const struct iovec *iov,
+   int iovcnt)
+{
+MirrorState *s = FILTER_MIRROR(nf);
+int ret = 0;
+ssize_t size = 0;
+uint32_t len =  0;
+char *buf;
+
+size = iov_size(iov, iovcnt);
+if (!size) {
+return 0;
+}
+
+len = htonl(size);
+ret = qemu_chr_fe_write_all(s->chr_out, (uint8_t *), sizeof(len));
+if (ret != sizeof(len)) {
+goto err;
+}
+
+buf = g_malloc(size);
+iov_to_buf(iov, iovcnt, 0, buf, size);
+ret = qemu_chr_fe_write_all(s->chr_out, (uint8_t *)buf, size);
+g_free(buf);
+if (ret != size) {
+goto err;
+}
+
+return 0;
+
+err:
+return ret < 0 ? ret : -EIO;
+}
+
+static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
+ NetClientState *sender,
+ unsigned flags,
+ const struct iovec *iov,
+ int iovcnt,
+ NetPacketSent *sent_cb)
+{
+int ret;
+
+ret = filter_mirror_send(nf, iov, iovcnt);
+if (ret) {
+error_report("filter_mirror_send failed(%s)", strerror(-ret));
+}
+
+/*
+ * we don't hope this error interrupt the normal
+ * path of net packet, so we always return zero.
+ */
+return 0;
+}
+
+static void filter_mirror_cleanup(NetFilterState *nf)
+{
+MirrorState *s = FILTER_MIRROR(nf);
+
+if (s->chr_out) {
+qemu_chr_fe_release(s->chr_out);
+}
+}
+
+static void filter_mirror_setup(NetFilterState *nf, Error **errp)
+{
+MirrorState *s = FILTER_MIRROR(nf);
+
+if (!s->outdev) {
+error_setg(errp, "filter filter mirror needs 'outdev' "
+"property set");
+return;
+}
+
+s->chr_out = qemu_chr_find(s->outdev);
+if (s->chr_out == NULL) {
+error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
+  "Device '%s' not found", s->outdev);
+return;
+}
+
+if (qemu_chr_fe_claim(s->chr_out) != 0) {
+error_setg(errp, QERR_DEVICE_IN_USE, s->outdev);
+return;
+}
+}
+
+static void filter_mirror_class_init(ObjectClass *oc, void *data)
+{
+NetFilterClass *nfc = NETFILTER_CLASS(oc);
+
+nfc->setup = filter_mirror_setup;
+nfc->cleanup = filter_mirror_cleanup;
+nfc->receive_iov = filter_mirror_receive_iov;
+}
+
+static char *filter_mirror_get_outdev(Object *obj, Error **errp)
+{
+MirrorState *s = FILTER_MIRROR(obj);
+
+return g_strdup(s->outdev);
+}
+
+static void
+filter_mirror_set_outdev(Object *obj, const char *value, Error 

[Qemu-devel] [PULL 13/14] filter-buffer: Add status_changed callback processing

2016-03-06 Thread Jason Wang
From: zhanghailiang 

While the status of filter-buffer changing from 'on' to 'off',
it need to release all the buffered packets, and delete the related
timer, while switch from 'off' to 'on', it need to resume the release
packets timer.

Here, we extract the process of setup timer into a new helper,
which will be used in the new status_changed callback.

Signed-off-by: zhanghailiang 
Cc: Jason Wang 
Cc: Yang Hongyang 
Signed-off-by: Jason Wang 
---
 net/filter-buffer.c | 34 --
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/net/filter-buffer.c b/net/filter-buffer.c
index 12ad2e3..972177b 100644
--- a/net/filter-buffer.c
+++ b/net/filter-buffer.c
@@ -100,6 +100,19 @@ static void filter_buffer_cleanup(NetFilterState *nf)
 }
 }
 
+static void filter_buffer_setup_timer(NetFilterState *nf)
+{
+FilterBufferState *s = FILTER_BUFFER(nf);
+
+if (s->interval) {
+timer_init_us(>release_timer, QEMU_CLOCK_VIRTUAL,
+  filter_buffer_release_timer, nf);
+/* Timer armed to fire in s->interval microseconds. */
+timer_mod(>release_timer,
+  qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+}
+}
+
 static void filter_buffer_setup(NetFilterState *nf, Error **errp)
 {
 FilterBufferState *s = FILTER_BUFFER(nf);
@@ -115,12 +128,20 @@ static void filter_buffer_setup(NetFilterState *nf, Error 
**errp)
 }
 
 s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
-if (s->interval) {
-timer_init_us(>release_timer, QEMU_CLOCK_VIRTUAL,
-  filter_buffer_release_timer, nf);
-/* Timer armed to fire in s->interval microseconds. */
-timer_mod(>release_timer,
-  qemu_clock_get_us(QEMU_CLOCK_VIRTUAL) + s->interval);
+filter_buffer_setup_timer(nf);
+}
+
+static void filter_buffer_status_changed(NetFilterState *nf, Error **errp)
+{
+FilterBufferState *s = FILTER_BUFFER(nf);
+
+if (!nf->on) {
+if (s->interval) {
+timer_del(>release_timer);
+}
+filter_buffer_flush(nf);
+} else {
+filter_buffer_setup_timer(nf);
 }
 }
 
@@ -131,6 +152,7 @@ static void filter_buffer_class_init(ObjectClass *oc, void 
*data)
 nfc->setup = filter_buffer_setup;
 nfc->cleanup = filter_buffer_cleanup;
 nfc->receive_iov = filter_buffer_receive_iov;
+nfc->status_changed = filter_buffer_status_changed;
 }
 
 static void filter_buffer_get_interval(Object *obj, Visitor *v,
-- 
2.5.0




[Qemu-devel] [PULL 14/14] net: check packet payload length

2016-03-06 Thread Jason Wang
From: Prasad J Pandit 

While computing IP checksum, 'net_checksum_calculate' reads
payload length from the packet. It could exceed the given 'data'
buffer size. Add a check to avoid it.

Reported-by: Liu Ling 
Signed-off-by: Prasad J Pandit 
Signed-off-by: Jason Wang 
---
 net/checksum.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/net/checksum.c b/net/checksum.c
index b5016ab..d0fa424 100644
--- a/net/checksum.c
+++ b/net/checksum.c
@@ -60,6 +60,11 @@ void net_checksum_calculate(uint8_t *data, int length)
 int hlen, plen, proto, csum_offset;
 uint16_t csum;
 
+/* Ensure data has complete L2 & L3 headers. */
+if (length < 14 + 20) {
+return;
+}
+
 if ((data[14] & 0xf0) != 0x40)
return; /* not IPv4 */
 hlen  = (data[14] & 0x0f) * 4;
@@ -77,8 +82,9 @@ void net_checksum_calculate(uint8_t *data, int length)
return;
 }
 
-if (plen < csum_offset+2)
-   return;
+if (plen < csum_offset + 2 || 14 + hlen + plen > length) {
+return;
+}
 
 data[14+hlen+csum_offset]   = 0;
 data[14+hlen+csum_offset+1] = 0;
-- 
2.5.0




[Qemu-devel] [PULL 02/14] net: filter: correctly remove filter from the list during finalization

2016-03-06 Thread Jason Wang
Qemu may crash when we want to add two filters on the same netdev but
the initialization of second fails (e.g missing parameters):

./qemu-system-x86_64 -netdev user,id=un0 \
 -object filter-buffer,id=f0,netdev=un0,interval=10 \
 -object filter-buffer,id=f1,netdev=un0
Segmentation fault (core dumped)

This is because we don't check whether or not the filter was in the
list of netdev. This patch fixes this.

Cc: Yang Hongyang 
Reviewed-by: Yang Hongyang 
Signed-off-by: Jason Wang 
---
 net/filter.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/filter.c b/net/filter.c
index d2a514e..7cdbc6c 100644
--- a/net/filter.c
+++ b/net/filter.c
@@ -196,7 +196,8 @@ static void netfilter_finalize(Object *obj)
 nfc->cleanup(nf);
 }
 
-if (nf->netdev && !QTAILQ_EMPTY(>netdev->filters)) {
+if (nf->netdev && !QTAILQ_EMPTY(>netdev->filters) &&
+nf->next.tqe_prev) {
 QTAILQ_REMOVE(>netdev->filters, nf, next);
 }
 g_free(nf->netdev_id);
-- 
2.5.0




[Qemu-devel] [PULL 11/14] rocker: allow user to specify rocker world by property

2016-03-06 Thread Jason Wang
From: Jiri Pirko 

Add property to specify rocker world. All ports will be assigned to this
world.

Signed-off-by: Jiri Pirko 
Signed-off-by: Jason Wang 
---
 hw/net/rocker/rocker.c | 29 -
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/hw/net/rocker/rocker.c b/hw/net/rocker/rocker.c
index 104c097..30f2ce4 100644
--- a/hw/net/rocker/rocker.c
+++ b/hw/net/rocker/rocker.c
@@ -43,6 +43,7 @@ struct rocker {
 
 /* switch configuration */
 char *name;  /* switch name */
+char *world_name;/* world name */
 uint32_t fp_ports;   /* front-panel port count */
 NICPeers *fp_ports_peers;
 MACAddr fp_start_macaddr;/* front-panel port 0 mac addr */
@@ -1286,6 +1287,18 @@ static void rocker_msix_uninit(Rocker *r)
 rocker_msix_vectors_unuse(r, ROCKER_MSIX_VEC_COUNT(r->fp_ports));
 }
 
+static World *rocker_world_type_by_name(Rocker *r, const char *name)
+{
+int i;
+
+for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
+if (strcmp(name, world_name(r->worlds[i])) == 0) {
+return r->worlds[i];
+   }
+}
+return NULL;
+}
+
 static int pci_rocker_init(PCIDevice *dev)
 {
 Rocker *r = to_rocker(dev);
@@ -1297,7 +1310,6 @@ static int pci_rocker_init(PCIDevice *dev)
 /* allocate worlds */
 
 r->worlds[ROCKER_WORLD_TYPE_OF_DPA] = of_dpa_world_alloc(r);
-r->world_dflt = r->worlds[ROCKER_WORLD_TYPE_OF_DPA];
 
 for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
 if (!r->worlds[i]) {
@@ -1306,6 +1318,19 @@ static int pci_rocker_init(PCIDevice *dev)
 }
 }
 
+if (!r->world_name) {
+r->world_name = 
g_strdup(world_name(r->worlds[ROCKER_WORLD_TYPE_OF_DPA]));
+}
+
+r->world_dflt = rocker_world_type_by_name(r, r->world_name);
+if (!r->world_dflt) {
+fprintf(stderr,
+"rocker: requested world \"%s\" does not exist\n",
+r->world_name);
+err = -EINVAL;
+goto err_world_type_by_name;
+}
+
 /* set up memory-mapped region at BAR0 */
 
 memory_region_init_io(>mmio, OBJECT(r), _mmio_ops, r,
@@ -1439,6 +1464,7 @@ err_duplicate:
 err_msix_init:
 object_unparent(OBJECT(>msix_bar));
 object_unparent(OBJECT(>mmio));
+err_world_type_by_name:
 err_world_alloc:
 for (i = 0; i < ROCKER_WORLD_TYPE_MAX; i++) {
 if (r->worlds[i]) {
@@ -1510,6 +1536,7 @@ static void rocker_reset(DeviceState *dev)
 
 static Property rocker_properties[] = {
 DEFINE_PROP_STRING("name", Rocker, name),
+DEFINE_PROP_STRING("world", Rocker, world_name),
 DEFINE_PROP_MACADDR("fp_start_macaddr", Rocker,
 fp_start_macaddr),
 DEFINE_PROP_UINT64("switch_id", Rocker,
-- 
2.5.0




[Qemu-devel] [PULL 12/14] filter: Add 'status' property for filter object

2016-03-06 Thread Jason Wang
From: zhanghailiang 

With this property, users can control if this filter is 'on'
or 'off'. The default behavior for filter is 'on'.

For some types of filters, they may need to react to status changing,
So here, we introduced status changing callback/notifier for filter class.

We will skip the disabled ('off') filter when delivering packets in net layer.

Signed-off-by: zhanghailiang 
Cc: Jason Wang 
Cc: Yang Hongyang 
Signed-off-by: Jason Wang 
---
 include/net/filter.h |  4 
 net/filter.c | 41 +
 qemu-options.hx  |  4 +++-
 3 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/include/net/filter.h b/include/net/filter.h
index 5639976..cfb1172 100644
--- a/include/net/filter.h
+++ b/include/net/filter.h
@@ -36,12 +36,15 @@ typedef ssize_t (FilterReceiveIOV)(NetFilterState *nc,
int iovcnt,
NetPacketSent *sent_cb);
 
+typedef void (FilterStatusChanged) (NetFilterState *nf, Error **errp);
+
 typedef struct NetFilterClass {
 ObjectClass parent_class;
 
 /* optional */
 FilterSetup *setup;
 FilterCleanup *cleanup;
+FilterStatusChanged *status_changed;
 /* mandatory */
 FilterReceiveIOV *receive_iov;
 } NetFilterClass;
@@ -55,6 +58,7 @@ struct NetFilterState {
 char *netdev_id;
 NetClientState *netdev;
 NetFilterDirection direction;
+bool on;
 QTAILQ_ENTRY(NetFilterState) next;
 };
 
diff --git a/net/filter.c b/net/filter.c
index 7cdbc6c..a08ef68 100644
--- a/net/filter.c
+++ b/net/filter.c
@@ -17,6 +17,11 @@
 #include "qom/object_interfaces.h"
 #include "qemu/iov.h"
 
+static inline bool qemu_can_skip_netfilter(NetFilterState *nf)
+{
+return !nf->on;
+}
+
 ssize_t qemu_netfilter_receive(NetFilterState *nf,
NetFilterDirection direction,
NetClientState *sender,
@@ -25,6 +30,9 @@ ssize_t qemu_netfilter_receive(NetFilterState *nf,
int iovcnt,
NetPacketSent *sent_cb)
 {
+if (qemu_can_skip_netfilter(nf)) {
+return 0;
+}
 if (nf->direction == direction ||
 nf->direction == NET_FILTER_DIRECTION_ALL) {
 return NETFILTER_GET_CLASS(OBJECT(nf))->receive_iov(
@@ -134,8 +142,38 @@ static void netfilter_set_direction(Object *obj, int 
direction, Error **errp)
 nf->direction = direction;
 }
 
+static char *netfilter_get_status(Object *obj, Error **errp)
+{
+NetFilterState *nf = NETFILTER(obj);
+
+return nf->on ? g_strdup("on") : g_strdup("off");
+}
+
+static void netfilter_set_status(Object *obj, const char *str, Error **errp)
+{
+NetFilterState *nf = NETFILTER(obj);
+NetFilterClass *nfc = NETFILTER_GET_CLASS(obj);
+
+if (strcmp(str, "on") && strcmp(str, "off")) {
+error_setg(errp, "Invalid value for netfilter status, "
+ "should be 'on' or 'off'");
+return;
+}
+if (nf->on == !strcmp(str, "on")) {
+return;
+}
+nf->on = !nf->on;
+if (nfc->status_changed) {
+nfc->status_changed(nf, errp);
+}
+}
+
 static void netfilter_init(Object *obj)
 {
+NetFilterState *nf = NETFILTER(obj);
+
+nf->on = true;
+
 object_property_add_str(obj, "netdev",
 netfilter_get_netdev_id, netfilter_set_netdev_id,
 NULL);
@@ -143,6 +181,9 @@ static void netfilter_init(Object *obj)
  NetFilterDirection_lookup,
  netfilter_get_direction, netfilter_set_direction,
  NULL);
+object_property_add_str(obj, "status",
+netfilter_get_status, netfilter_set_status,
+NULL);
 }
 
 static void netfilter_complete(UserCreatable *uc, Error **errp)
diff --git a/qemu-options.hx b/qemu-options.hx
index 9922cc5..9bf09ed 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3788,11 +3788,13 @@ version by providing the @var{passwordid} parameter. 
This provides
 the ID of a previously created @code{secret} object containing the
 password for decryption.
 
-@item -object 
filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}]
+@item -object 
filter-buffer,id=@var{id},netdev=@var{netdevid},interval=@var{t}[,queue=@var{all|rx|tx}][,status=@var{on|off}]
 
 Interval @var{t} can't be 0, this filter batches the packet delivery: all
 packets arriving in a given interval on netdev @var{netdevid} are delayed
 until the end of the interval. Interval is in microseconds.
+@option{status} is optional that indicate whether the netfilter is
+on (enabled) or off (disabled), the default status for netfilter will be 'on'.
 
 queue @var{all|rx|tx} is an option that can be 

[Qemu-devel] [PULL 01/14] net: ne2000: check ring buffer control registers

2016-03-06 Thread Jason Wang
From: Prasad J Pandit 

Ne2000 NIC uses ring buffer of NE2000_MEM_SIZE(49152)
bytes to process network packets. Registers PSTART & PSTOP
define ring buffer size & location. Setting these registers
to invalid values could lead to infinite loop or OOB r/w
access issues. Add check to avoid it.

Reported-by: Yang Hongke 
Tested-by: Yang Hongke 
Signed-off-by: Prasad J Pandit 
Signed-off-by: Jason Wang 
---
 hw/net/ne2000.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
index e408083..f0feaf9 100644
--- a/hw/net/ne2000.c
+++ b/hw/net/ne2000.c
@@ -155,6 +155,10 @@ static int ne2000_buffer_full(NE2000State *s)
 {
 int avail, index, boundary;
 
+if (s->stop <= s->start) {
+return 1;
+}
+
 index = s->curpag << 8;
 boundary = s->boundary << 8;
 if (index < boundary)
-- 
2.5.0




[Qemu-devel] [PULL 04/14] net: simplify net_init_tap_one logic

2016-03-06 Thread Jason Wang
From: Paolo Bonzini 

net_init_tap_one receives in vhostfdname a fd name from vhostfd= or
vhostfds=, or NULL if there is no vhostfd=/vhostfds=.  It is simpler
to just check vhostfdname, than it is to check for vhostfd= or
vhostfds=.  This also calms down Coverity, which otherwise thinks
that monitor_fd_param could dereference a NULL vhostfdname.

Signed-off-by: Paolo Bonzini 
Signed-off-by: Jason Wang 
---
 net/tap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/tap.c b/net/tap.c
index cfb6831..cd7a7fc 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -662,7 +662,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, 
NetClientState *peer,
 options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
 options.net_backend = >nc;
 
-if (tap->has_vhostfd || tap->has_vhostfds) {
+if (vhostfdname) {
 vhostfd = monitor_fd_param(cur_mon, vhostfdname, );
 if (vhostfd == -1) {
 error_propagate(errp, err);
@@ -684,7 +684,7 @@ static void net_init_tap_one(const NetdevTapOptions *tap, 
NetClientState *peer,
"vhost-net requested but could not be initialized");
 return;
 }
-} else if (tap->has_vhostfd || tap->has_vhostfds) {
+} else if (vhostfdname) {
 error_setg(errp, "vhostfd= is not valid without vhost");
 }
 }
-- 
2.5.0




[Qemu-devel] [PULL 00/14] Net patches

2016-03-06 Thread Jason Wang
The following changes since commit 1464ad45cd6cdeb0b5c1a54d3d3791396e47e52f:

  Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2016-03-04' into 
staging (2016-03-06 11:53:27 +)

are available in the git repository at:

  https://github.com/jasowang/qemu.git tags/net-pull-request

for you to fetch changes up to a2f2e45c6edbba9e1961056fa77c696208b40c8e:

  net: check packet payload length (2016-03-07 10:15:48 +0800)



- a new netfilter implementation: mirror
- netfilter could be disabled and enabled through qom-set now
- fix netfilter crash when specifiying wrong parameters
- rocker switch now can allow user to specifiy world
- fix OOB access for ne2000


Jason Wang (1):
  net: filter: correctly remove filter from the list during finalization

Jiri Pirko (4):
  rocker: forbid to change world type
  rocker: return -ENOMEM in case of some world alloc fails
  rocker: add name field into WorldOps ale let world specify its name
  rocker: allow user to specify rocker world by property

Paolo Bonzini (1):
  net: simplify net_init_tap_one logic

Prasad J Pandit (2):
  net: ne2000: check ring buffer control registers
  net: check packet payload length

Thomas Huth (1):
  MAINTAINERS: Add entries for include/net/ files

Vincenzo Maffione (1):
  net: netmap: probe netmap interface for virtio-net header

Zhang Chen (2):
  net/filter-mirror:Add filter-mirror
  tests/test-filter-mirror:add filter-mirror unit test

zhanghailiang (2):
  filter: Add 'status' property for filter object
  filter-buffer: Add status_changed callback processing

 MAINTAINERS   |   2 +
 hw/net/ne2000.c   |   4 +
 hw/net/rocker/rocker.c|  38 -
 hw/net/rocker/rocker_fp.c |   5 ++
 hw/net/rocker/rocker_fp.h |   1 +
 hw/net/rocker/rocker_of_dpa.c |   1 +
 hw/net/rocker/rocker_world.c  |   7 +-
 hw/net/rocker/rocker_world.h  |   1 +
 include/net/filter.h  |   4 +
 net/Makefile.objs |   1 +
 net/checksum.c|  10 ++-
 net/filter-buffer.c   |  34 ++--
 net/filter-mirror.c   | 182 ++
 net/filter.c  |  44 +-
 net/netmap.c  |  59 +-
 net/tap.c |   4 +-
 qemu-options.hx   |   9 ++-
 tests/.gitignore  |   1 +
 tests/Makefile|   2 +
 tests/test-filter-mirror.c|  90 +
 vl.c  |   3 +-
 21 files changed, 460 insertions(+), 42 deletions(-)
 create mode 100644 net/filter-mirror.c
 create mode 100644 tests/test-filter-mirror.c



[Qemu-devel] [PULL 03/14] MAINTAINERS: Add entries for include/net/ files

2016-03-06 Thread Jason Wang
From: Thomas Huth 

The include/net/ files correspond to the files in the net/ directory,
thus there should be corresponding entries in the MAINTAINERS file.

Signed-off-by: Thomas Huth 
Signed-off-by: Jason Wang 
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2f5a338..1fa9753 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1122,6 +1122,7 @@ Network device backends
 M: Jason Wang 
 S: Maintained
 F: net/
+F: include/net/
 T: git git://github.com/jasowang/qemu.git net
 
 Netmap network backend
@@ -1221,6 +1222,7 @@ M: Jan Kiszka 
 S: Maintained
 F: slirp/
 F: net/slirp.c
+F: include/net/slirp.h
 T: git git://git.kiszka.org/qemu.git queues/slirp
 
 Tracing
-- 
2.5.0




Re: [Qemu-devel] [RFC PATCH v1 02/10] exec: Do vmstate unregistration from cpu_exec_exit()

2016-03-06 Thread David Gibson
On Fri, Mar 04, 2016 at 12:24:13PM +0530, Bharata B Rao wrote:
> cpu_exec_init() does vmstate_register and register_savevm for the CPU device.
> These need to be undone from cpu_exec_exit(). These changes are needed to
> support CPU hot removal.
> 
> Signed-off-by: Bharata B Rao 
> ---
>  exec.c | 12 
>  1 file changed, 12 insertions(+)
> 
> diff --git a/exec.c b/exec.c
> index 7c3f747..b8eeb54 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -613,6 +613,8 @@ static void cpu_release_index(CPUState *cpu)
>  
>  void cpu_exec_exit(CPUState *cpu)
>  {
> +CPUClass *cc = CPU_GET_CLASS(cpu);
> +
>  #if defined(CONFIG_USER_ONLY)
>  cpu_list_lock();
>  #endif
> @@ -630,6 +632,16 @@ void cpu_exec_exit(CPUState *cpu)
>  #if defined(CONFIG_USER_ONLY)
>  cpu_list_unlock();
>  #endif
> +
> +if (cc->vmsd != NULL) {
> +vmstate_unregister(NULL, cc->vmsd, cpu);
> +}
> +#if defined(CPU_SAVE_VERSION) && !defined(CONFIG_USER_ONLY)
> +unregister_savevm(NULL, "cpu", cpu->env_ptr);
> +#endif
> +if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
> +vmstate_unregister(NULL, _cpu_common, cpu);
> +}
>  }
>  
>  void cpu_exec_init(CPUState *cpu, Error **errp)

Looking at the current cpu_exec_init() I'm seeing the
vmstate_register() calls which are balacned here, but not a
register_savevm().  Is that due to a change in cpu_exec_init() since
you first wrote this patch?

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [RFC PATCH v1 01/10] exec: Remove cpu from cpus list during cpu_exec_exit()

2016-03-06 Thread David Gibson
On Fri, Mar 04, 2016 at 12:24:12PM +0530, Bharata B Rao wrote:
> CPUState *cpu gets added to the cpus list during cpu_exec_init(). It
> should be removed from cpu_exec_exit().
> 
> cpu_exec_init() is called from generic CPU::instance_finalize and some
> archs like PowerPC call it from CPU unrealizefn. So ensure that we
> dequeue the cpu only once.
> 
> Now -1 value for cpu->cpu_index indicates that we have already dequeued
> the cpu for CONFIG_USER_ONLY case also.
> 
> Signed-off-by: Bharata B Rao 

Reviewed-by: David Gibson 
-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH 1/2] target-ppc: Add helpers for updating a CPU's SDR1 and external HPT

2016-03-06 Thread David Gibson
On Fri, Mar 04, 2016 at 10:59:17AM +0100, Thomas Huth wrote:
> On 04.03.2016 06:35, David Gibson wrote:
> > When a Power cpu with 64-bit hash MMU has it's hash page table (HPT)
> > pointer updated by a write to the SDR1 register we need to update some
> > derived variables.  Likewise, when the cpu is configured for an external
> > HPT (one not in the guest memory space) some derived variables need to be
> > updated.
> > 
> > Currently the logic for this is (partially) duplicated in ppc_store_sdr1()
> > and in spapr_cpu_reset().  In future we're going to need it in some other
> > places, so make some common helpers for this update.
> > 
> > In addition the new ppc_hash64_set_external_hpt() helper also updates
> > SDR1 in KVM - it's not updated by the normal runtime KVM <-> qemu CPU
> > synchronization.  In a sense this belongs logically in the
> > ppc_hash64_set_sdr1() helper, but that is called from
> > kvm_arch_get_registers() so can't itself call cpu_synchronize_state()
> > without infinite recursion.  In practice this doesn't matter because
> > the only other caller is TCG specific.
> > 
> > Currently there aren't situations where updating SDR1 at runtime in KVM
> > matters, but there are going to be in future.
> > 
> > Signed-off-by: David Gibson 
> > ---
> >  hw/ppc/spapr.c  | 13 ++---
> >  target-ppc/kvm.c| 15 +++
> >  target-ppc/kvm_ppc.h|  6 ++
> >  target-ppc/mmu-hash64.c | 42 ++
> >  target-ppc/mmu-hash64.h |  6 ++
> >  target-ppc/mmu_helper.c | 13 ++---
> >  6 files changed, 77 insertions(+), 18 deletions(-)
> > 
> > diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> > index e9d4abf..a88e3af 100644
> > --- a/hw/ppc/spapr.c
> > +++ b/hw/ppc/spapr.c
> > @@ -1196,17 +1196,8 @@ static void spapr_cpu_reset(void *opaque)
> >  
> >  env->spr[SPR_HIOR] = 0;
> >  
> > -env->external_htab = (uint8_t *)spapr->htab;
> > -env->htab_base = -1;
> > -/*
> > - * htab_mask is the mask used to normalize hash value to PTEG index.
> > - * htab_shift is log2 of hash table size.
> > - * We have 8 hpte per group, and each hpte is 16 bytes.
> > - * ie have 128 bytes per hpte entry.
> > - */
> > -env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1;
> > -env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
> > -(spapr->htab_shift - 18);
> > +ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift,
> > +_fatal);
> >  }
> >  
> >  static void spapr_create_nvram(sPAPRMachineState *spapr)
> > diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
> > index d67c169..99b3231 100644
> > --- a/target-ppc/kvm.c
> > +++ b/target-ppc/kvm.c
> > @@ -2537,3 +2537,18 @@ int kvmppc_enable_hwrng(void)
> >  
> >  return kvmppc_enable_hcall(kvm_state, H_RANDOM);
> >  }
> > +
> > +int kvmppc_update_sdr1(PowerPCCPU *cpu)
> > +{
> > +CPUState *cs = CPU(cpu);
> > +
> > +if (!kvm_enabled()) {
> > +return 0; /* nothing to do */
> > +}
> 
> Since you're updating more than SDR1 below ... Could you maybe add a
> "assert(cs->kvm_vcpu_dirty)" here, just to make sure that nobody
> accidentally ever calls this function without synchronizing the
> registers first?

Hmm.. that's a good idea, but it doesn't work so well with the one below.

> 
> > +/* The normal KVM_PUT_RUNTIME_STATE doesn't include SDR1, which is
> > + * why we need an explicit update for it.  KVM_PUT_RESET_STATE is
> > + * overkill, but this is a pretty rare operation, so it's simpler
> > + * than writing a special purpose updater */
> > +return kvm_arch_put_registers(cs, KVM_PUT_RESET_STATE);
> > +}
> 
> That indeed looks like a big overkill ... what about extracting the
> block from kvm_arch_put_registers() which sets the the "struct
> kvm_sregs" via KVM_SET_SREGS into a separate function, and then call
> that function here instead?
> kvm_arch_put_registers() is IMHO way too big anyway, so separating some
> code into external functions there would improve readability there, too.

Yeah, fair enough.  I'll split that up in a prelim patch for v2.

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


[Qemu-devel] [PATCHv2 1/3] target-ppc: Split out SREGS get/put functions

2016-03-06 Thread David Gibson
Currently the getting and setting of Power MMU registers (sregs) take up
large inline chunks of the kvm_arch_get_registers() and
kvm_arch_put_registers() functions.  Especially since there are two
variants (for Book-E and Book-S CPUs), only one of which will be used in
practice, this is pretty hard to read.

This patch splits these out into helper functions for clarity.  No
functional change is expected.

Signed-off-by: David Gibson 
---
 target-ppc/kvm.c | 421 ++-
 1 file changed, 228 insertions(+), 193 deletions(-)

diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index d67c169..8a762e8 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -867,6 +867,44 @@ static int kvm_put_vpa(CPUState *cs)
 }
 #endif /* TARGET_PPC64 */
 
+static int kvmppc_put_books_sregs(PowerPCCPU *cpu)
+{
+CPUPPCState *env = >env;
+struct kvm_sregs sregs;
+int i;
+
+sregs.pvr = env->spr[SPR_PVR];
+
+sregs.u.s.sdr1 = env->spr[SPR_SDR1];
+
+/* Sync SLB */
+#ifdef TARGET_PPC64
+for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
+sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
+if (env->slb[i].esid & SLB_ESID_V) {
+sregs.u.s.ppc64.slb[i].slbe |= i;
+}
+sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
+}
+#endif
+
+/* Sync SRs */
+for (i = 0; i < 16; i++) {
+sregs.u.s.ppc32.sr[i] = env->sr[i];
+}
+
+/* Sync BATs */
+for (i = 0; i < 8; i++) {
+/* Beware. We have to swap upper and lower bits here */
+sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
+| env->DBAT[1][i];
+sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
+| env->IBAT[1][i];
+}
+
+return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, );
+}
+
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
 PowerPCCPU *cpu = POWERPC_CPU(cs);
@@ -920,39 +958,8 @@ int kvm_arch_put_registers(CPUState *cs, int level)
 }
 
 if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
-struct kvm_sregs sregs;
-
-sregs.pvr = env->spr[SPR_PVR];
-
-sregs.u.s.sdr1 = env->spr[SPR_SDR1];
-
-/* Sync SLB */
-#ifdef TARGET_PPC64
-for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
-sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
-if (env->slb[i].esid & SLB_ESID_V) {
-sregs.u.s.ppc64.slb[i].slbe |= i;
-}
-sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
-}
-#endif
-
-/* Sync SRs */
-for (i = 0; i < 16; i++) {
-sregs.u.s.ppc32.sr[i] = env->sr[i];
-}
-
-/* Sync BATs */
-for (i = 0; i < 8; i++) {
-/* Beware. We have to swap upper and lower bits here */
-sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
-| env->DBAT[1][i];
-sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
-| env->IBAT[1][i];
-}
-
-ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, );
-if (ret) {
+ret = kvmppc_put_books_sregs(cpu);
+if (ret < 0) {
 return ret;
 }
 }
@@ -1014,12 +1021,197 @@ static void kvm_sync_excp(CPUPPCState *env, int 
vector, int ivor)
  env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
 }
 
+static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
+{
+CPUPPCState *env = >env;
+struct kvm_sregs sregs;
+int ret;
+
+ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, );
+if (ret < 0) {
+return ret;
+}
+
+if (sregs.u.e.features & KVM_SREGS_E_BASE) {
+env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
+env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
+env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
+env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
+env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
+env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
+env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
+env->spr[SPR_DECR] = sregs.u.e.dec;
+env->spr[SPR_TBL] = sregs.u.e.tb & 0x;
+env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
+env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
+}
+
+if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
+env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
+env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
+env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
+env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
+env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
+}
+
+if (sregs.u.e.features & KVM_SREGS_E_64) {
+env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
+}
+
+if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
+env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
+}
+
+if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
+env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
+kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
+ 

[Qemu-devel] [PATCHv2 3/3] target-ppc: Eliminate kvmppc_kern_htab global

2016-03-06 Thread David Gibson
fa48b43 "target-ppc: Remove hack for ppc_hash64_load_hpte*() with HV KVM"
purports to remove a hack in the handling of hash page tables (HPTs)
managed by KVM instead of qemu.  However, it actually went in the wrong
direction.

That patch requires anything looking for an external HPT (that is one not
managed by the guest itself) to check both env->external_htab (for a qemu
managed HPT) and kvmppc_kern_htab (for a KVM managed HPT).  That's a
problem because kvmppc_kern_htab is local to mmu-hash64.c, but some places
which need to check for an external HPT are outside that, such as
kvm_arch_get_registers().  The latter was subtly broken by the earlier
patch such that gdbstub can no longer access memory.

Basically a KVM managed HPT is much more like a qemu managed HPT than it is
like a guest managed HPT, so the original "hack" was actually on the right
track.

This partially reverts fa48b43, so we again mark a KVM managed external HPT
by putting a special but non-NULL value in env->external_htab.  It then
goes further, using that marker to eliminate the kvmppc_kern_htab global
entirely.  The ppc_hash64_set_external_hpt() helper function is extended
to set that marker if passed a NULL value (if you're setting an external
HPT, but don't have an actual HPT to set, the assumption is that it must
be a KVM managed HPT).

This also has some flow-on changes to the HPT access helpers, required by
the above changes.

Reported-by: Greg Kurz 
Signed-off-by: David Gibson 
Reviewed-by: Thomas Huth 
---
 hw/ppc/spapr.c  |  3 +--
 hw/ppc/spapr_hcall.c| 10 +-
 target-ppc/mmu-hash64.c | 40 ++--
 target-ppc/mmu-hash64.h |  9 +++--
 4 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 72a018b..43708a2 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1091,7 +1091,7 @@ static void spapr_reallocate_hpt(sPAPRMachineState 
*spapr, int shift,
 }
 
 spapr->htab_shift = shift;
-kvmppc_kern_htab = true;
+spapr->htab = NULL;
 } else {
 /* kernel-side HPT not needed, allocate in userspace instead */
 size_t size = 1ULL << shift;
@@ -1106,7 +1106,6 @@ static void spapr_reallocate_hpt(sPAPRMachineState 
*spapr, int shift,
 
 memset(spapr->htab, 0, size);
 spapr->htab_shift = shift;
-kvmppc_kern_htab = false;
 
 for (i = 0; i < size / HASH_PTE_SIZE_64; i++) {
 DIRTY_HPTE(HPTE(spapr->htab, i));
diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 1733482..b2b1b93 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -122,17 +122,17 @@ static target_ulong h_enter(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 break;
 }
 }
-ppc_hash64_stop_access(token);
+ppc_hash64_stop_access(cpu, token);
 if (index == 8) {
 return H_PTEG_FULL;
 }
 } else {
 token = ppc_hash64_start_access(cpu, pte_index);
 if (ppc_hash64_load_hpte0(cpu, token, 0) & HPTE64_V_VALID) {
-ppc_hash64_stop_access(token);
+ppc_hash64_stop_access(cpu, token);
 return H_PTEG_FULL;
 }
-ppc_hash64_stop_access(token);
+ppc_hash64_stop_access(cpu, token);
 }
 
 ppc_hash64_store_hpte(cpu, pte_index + index,
@@ -165,7 +165,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, 
target_ulong ptex,
 token = ppc_hash64_start_access(cpu, ptex);
 v = ppc_hash64_load_hpte0(cpu, token, 0);
 r = ppc_hash64_load_hpte1(cpu, token, 0);
-ppc_hash64_stop_access(token);
+ppc_hash64_stop_access(cpu, token);
 
 if ((v & HPTE64_V_VALID) == 0 ||
 ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
@@ -288,7 +288,7 @@ static target_ulong h_protect(PowerPCCPU *cpu, 
sPAPRMachineState *spapr,
 token = ppc_hash64_start_access(cpu, pte_index);
 v = ppc_hash64_load_hpte0(cpu, token, 0);
 r = ppc_hash64_load_hpte1(cpu, token, 0);
-ppc_hash64_stop_access(token);
+ppc_hash64_stop_access(cpu, token);
 
 if ((v & HPTE64_V_VALID) == 0 ||
 ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 7b5200b..a9ae0b3 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -36,10 +36,11 @@
 #endif
 
 /*
- * Used to indicate whether we have allocated htab in the
- * host kernel
+ * Used to indicate that a CPU has it's hash page table (HPT) managed
+ * within the host kernel
  */
-bool kvmppc_kern_htab;
+#define MMU_HASH64_KVM_MANAGED_HPT  ((void *)-1)
+
 /*
  * SLB handling
  */
@@ -283,7 +284,11 @@ void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void 
*hpt, int shift,
 
 cpu_synchronize_state(CPU(cpu));
 
-env->external_htab = hpt;
+if (hpt) {
+env->external_htab = hpt;
+} else {
+env->external_htab 

[Qemu-devel] [PATCHv2 2/3] target-ppc: Add helpers for updating a CPU's SDR1 and external HPT

2016-03-06 Thread David Gibson
When a Power cpu with 64-bit hash MMU has it's hash page table (HPT)
pointer updated by a write to the SDR1 register we need to update some
derived variables.  Likewise, when the cpu is configured for an external
HPT (one not in the guest memory space) some derived variables need to be
updated.

Currently the logic for this is (partially) duplicated in ppc_store_sdr1()
and in spapr_cpu_reset().  In future we're going to need it in some other
places, so make some common helpers for this update.

In addition the new ppc_hash64_set_external_hpt() helper also updates
SDR1 in KVM - it's not updated by the normal runtime KVM <-> qemu CPU
synchronization.  In a sense this belongs logically in the
ppc_hash64_set_sdr1() helper, but that is called from
kvm_arch_get_registers() so can't itself call cpu_synchronize_state()
without infinite recursion.  In practice this doesn't matter because
the only other caller is TCG specific.

Currently there aren't situations where updating SDR1 at runtime in KVM
matters, but there are going to be in future.

Signed-off-by: David Gibson 
---
 hw/ppc/spapr.c  | 13 ++---
 target-ppc/kvm.c|  2 +-
 target-ppc/kvm_ppc.h|  6 ++
 target-ppc/mmu-hash64.c | 43 +++
 target-ppc/mmu-hash64.h |  6 ++
 target-ppc/mmu_helper.c | 13 ++---
 6 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 64c4acc..72a018b 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -1196,17 +1196,8 @@ static void spapr_cpu_reset(void *opaque)
 
 env->spr[SPR_HIOR] = 0;
 
-env->external_htab = (uint8_t *)spapr->htab;
-env->htab_base = -1;
-/*
- * htab_mask is the mask used to normalize hash value to PTEG index.
- * htab_shift is log2 of hash table size.
- * We have 8 hpte per group, and each hpte is 16 bytes.
- * ie have 128 bytes per hpte entry.
- */
-env->htab_mask = (1ULL << (spapr->htab_shift - 7)) - 1;
-env->spr[SPR_SDR1] = (target_ulong)(uintptr_t)spapr->htab |
-(spapr->htab_shift - 18);
+ppc_hash64_set_external_hpt(cpu, spapr->htab, spapr->htab_shift,
+_fatal);
 }
 
 static void spapr_create_nvram(sPAPRMachineState *spapr)
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 8a762e8..380dff6 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -867,7 +867,7 @@ static int kvm_put_vpa(CPUState *cs)
 }
 #endif /* TARGET_PPC64 */
 
-static int kvmppc_put_books_sregs(PowerPCCPU *cpu)
+int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 {
 CPUPPCState *env = >env;
 struct kvm_sregs sregs;
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index fd64c44..fc79312 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -55,6 +55,7 @@ void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong 
pte_index,
  target_ulong pte0, target_ulong pte1);
 bool kvmppc_has_cap_fixup_hcalls(void);
 int kvmppc_enable_hwrng(void);
+int kvmppc_put_books_sregs(PowerPCCPU *cpu);
 
 #else
 
@@ -246,6 +247,11 @@ static inline int kvmppc_enable_hwrng(void)
 {
 return -1;
 }
+
+static inline int kvmppc_put_books_sregs(PowerPCCPU *cpu)
+{
+abort();
+}
 #endif
 
 #ifndef CONFIG_KVM
diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c
index 9c58fbf..7b5200b 100644
--- a/target-ppc/mmu-hash64.c
+++ b/target-ppc/mmu-hash64.c
@@ -258,6 +258,49 @@ target_ulong helper_load_slb_vsid(CPUPPCState *env, 
target_ulong rb)
 /*
  * 64-bit hash table MMU handling
  */
+void ppc_hash64_set_sdr1(PowerPCCPU *cpu, target_ulong value,
+ Error **errp)
+{
+CPUPPCState *env = >env;
+target_ulong htabsize = value & SDR_64_HTABSIZE;
+
+env->spr[SPR_SDR1] = value;
+if (htabsize > 28) {
+error_setg(errp,
+   "Invalid HTABSIZE 0x" TARGET_FMT_lx" stored in SDR1",
+   htabsize);
+htabsize = 28;
+}
+env->htab_mask = (1ULL << (htabsize + 18 - 7)) - 1;
+env->htab_base = value & SDR_64_HTABORG;
+}
+
+void ppc_hash64_set_external_hpt(PowerPCCPU *cpu, void *hpt, int shift,
+ Error **errp)
+{
+CPUPPCState *env = >env;
+Error *local_err = NULL;
+
+cpu_synchronize_state(CPU(cpu));
+
+env->external_htab = hpt;
+ppc_hash64_set_sdr1(cpu, (target_ulong)(uintptr_t)hpt | (shift - 18),
+_err);
+if (local_err) {
+error_propagate(errp, local_err);
+return;
+}
+
+/* Not strictly necessary, but makes it clearer that an external
+ * htab is in use when debugging */
+env->htab_base = -1;
+
+if (kvm_enabled()) {
+if (kvmppc_put_books_sregs(cpu) < 0) {
+error_setg(errp, "Unable to update SDR1 in KVM");
+}
+}
+}
 
 static int ppc_hash64_pte_prot(PowerPCCPU *cpu,
ppc_slb_t *slb, ppc_hash_pte64_t pte)
diff --git 

[Qemu-devel] [PATCHv2 0/3] target-ppc: Clean up handling of SDR1 and external HPTs

2016-03-06 Thread David Gibson
These patches cleans up handling of SDR1 (master page table pointer
register for Power) and related cases with an external (i.e. managed
by qemu or KVM, rather than the guest) hash page table (HPT).

I wouldn't push 1/3 or 2/3 on their own after the soft freeze, except
that they simplify 3/3, which fixes a real regression.

Changes since v1:
  * Split out SREGS getters/putters instead of creating a special one just for 
SDR1

David Gibson (3):
  target-ppc: Split out SREGS get/put functions
  target-ppc: Add helpers for updating a CPU's SDR1 and external HPT
  target-ppc: Eliminate kvmppc_kern_htab global

 hw/ppc/spapr.c  |  16 +-
 hw/ppc/spapr_hcall.c|  10 +-
 target-ppc/kvm.c| 421 ++--
 target-ppc/kvm_ppc.h|   6 +
 target-ppc/mmu-hash64.c |  81 +++---
 target-ppc/mmu-hash64.h |  11 +-
 target-ppc/mmu_helper.c |  13 +-
 7 files changed, 315 insertions(+), 243 deletions(-)

-- 
2.5.0




Re: [Qemu-devel] [Qemu-arm] [PATCH] Fix bug: SRS instructions would trap to EL3 in Secure EL1 even if specified mode was not monitor mode.

2016-03-06 Thread Peter Maydell
On 7 March 2016 at 02:04, Sergey Fedorov  wrote:
> On 23.02.2016 01:25, Ralf-Philipp Weinmann wrote:
>>
>> According to the ARMv8 Architecture reference manual [F6.1.203], ALL
>> of the following conditions need to be met for SRS to trap to EL3:
>> * It is executed at Secure PL1.
>> * The specified mode is monitor mode.
>> * EL3 is using AArch64.
>
>
> The code changes in the patch looks good for me. But anyway, you should:
>  (1) tweak the commit message title according to the requirements [1] and
>  (2) add your "Singed-off-by:" line [2]

Ralf-Philipp sent a second version of this email which did
have the signed-off-by line, so this patch is already in
master (as commit ba63cf47a9304113); I did tweak the
commit message as I applied it.

thanks
-- PMM



Re: [Qemu-devel] [PATCH v3 1/4] adb-keys.h: initial commit

2016-03-06 Thread Programmingkid

On Mar 5, 2016, at 7:02 PM, Eric Blake wrote:

> On 03/04/2016 10:15 PM, Programmingkid wrote:
>> 
> 
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a 
>> copy
>> + * of this software and associated documentation files (the "Software"), to 
>> deal
>> + * in the Software without restriction, including without limitation the 
>> rights
>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>> + * copies of the Software, and to permit persons to whom the Software is
>> + * furnished to do so, subject to the following conditions:
> 
> Any reason you chose the weaker BSD license instead of the
> project-default of GPLv2+?

Could you provide a file name or a link that has the exact text you wish to see 
in the file?

> Can you provide a URL to the source document that you used for coming up
> with this list, so that we can better double-check your work?  (Do it as
> a comment in this file, if you have one)

Does this look good:

/*
 *  adb-keys.h
 *
 *  Provides an enum of all the Macintosh keycodes.
 *  Note: keys like Power, volume related, and eject are handled at a lower
 *level and are not available to QEMU. That doesn't mean we can't
 *substitute one key for another. The function keys like F1 make a good
 *substitute for these keys. This can be done in the GTK, SDL, or Cocoa
 *code.
 *  Additional information: http://stackoverflow.com/questions/3202629/where-can
 *  -i-find-a-list-of-mac-virtual-key-codes
 */




[Qemu-devel] Google Summer of Code 2016: Make Mac OS 9 boot successfully with all Apple extensions

2016-03-06 Thread Programmingkid
=== TITLE ===

Make all versions of Mac OS 9 boot successfully in QEMU with all Apple 
extensions
 
 '''Summary:''' Short description of the project

Mac OS 9.2 does boot in QEMU without extensions, but certain extensions that 
ship with Mac OS 9.2 need to be disabled or else they will cause booting to 
fail. We need any issues with OpenBIOS and QEMU fixed so that Mac OS 9 will 
boot successfully in QEMU. 
 
 Detailed description of the project.

These are the known extensions that cause Mac OS 9.2 to crash:

• Apple Audio Extension
• Apple Enet
• Multiprocessing folder
• Open Transport aslm modules
• Text Encoding Converter

Without them some applications running in Mac OS 9 are severely limited or 
simply refuse to run.
 
 '''Links:'''

http://wiki.qemu.org/PowerPC
http://www.emaculation.com/forum/viewtopic.php?f=34=7047=3044078243e3da8a27009ae46b7a40db

 
 '''Details:'''

 * Skill level: advanced
 * Language: C, PowerPC assembly
 * Mentor: None currently
 * Suggested by: John Arbuckle programmingk...@gmail.com




Re: [Qemu-devel] [Qemu-arm] [PATCH] Fix bug: SRS instructions would trap to EL3 in Secure EL1 even if specified mode was not monitor mode.

2016-03-06 Thread Sergey Fedorov

On 23.02.2016 01:25, Ralf-Philipp Weinmann wrote:

According to the ARMv8 Architecture reference manual [F6.1.203], ALL
of the following conditions need to be met for SRS to trap to EL3:
* It is executed at Secure PL1.
* The specified mode is monitor mode.
* EL3 is using AArch64.


The code changes in the patch looks good for me. But anyway, you should:
 (1) tweak the commit message title according to the requirements [1] and
 (2) add your "Singed-off-by:" line [2]

Actually, you'd better read the whole page [3] carefully.

[1] 
http://wiki.qemu.org/Contribute/SubmitAPatch#Write_a_meaningful_commit_message
[2] 
http://wiki.qemu.org/Contribute/SubmitAPatch#Patch_emails_must_include_a_Signed-off-by:_line

[3] http://wiki.qemu.org/Contribute/SubmitAPatch

Kind regards,
Sergey


---
  target-arm/translate.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/target-arm/translate.c b/target-arm/translate.c
index c29c47f..a7688bb 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -7582,7 +7582,8 @@ static void gen_srs(DisasContext *s,
  bool undef = false;
  
  /* SRS is:

- * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
+ * - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1 and
+ *   mode is monitor mode
   * - UNDEFINED in Hyp mode
   * - UNPREDICTABLE in User or System mode
   * - UNPREDICTABLE if the specified mode is:
@@ -7592,7 +7593,7 @@ static void gen_srs(DisasContext *s,
   * -- Monitor, if we are Non-secure
   * For the UNPREDICTABLE cases we choose to UNDEF.
   */
-if (s->current_el == 1 && !s->ns) {
+if (s->current_el == 1 && !s->ns && mode == ARM_CPU_MODE_MON) {
  gen_exception_insn(s, 4, EXCP_UDEF, syn_uncategorized(), 3);
  return;
  }





Re: [Qemu-devel] [Nbd] [RFC 1/1] nbd (specification): add NBD_CMD_WRITE_ZEROES command

2016-03-06 Thread Denis V. Lunev

On 03/06/2016 01:28 PM, Wouter Verhelst wrote:

Hi all,

On Fri, Mar 04, 2016 at 03:03:26PM +0100, Paolo Bonzini wrote:

NBD-wise, I think the TRIM command is good as it is, and
NBD_CMD_WRITE_ZEROES should be added like Den is doing.

It also makes sense to use trimming to implement NBD_CMD_WRITE_ZEROES,
but it should be explicitly requested by the user.  For this, my
suggestion is that NBD_CMD_WRITE_ZEROES should have an
NBD_FLAG_TRY_TRIM flag in bit 16.  If specified, the backend can use a
zero-writing mechanism that trims, _but_ it must ensure that the bytes
read as zero.  If it cannot ensure that, it must not trim and it
should instead do a full write.  This is similar to the SCSI command
WRITE SAME (when the command payload is all zeroes).  Like Kevin said,
it also happens to map nicely to the QEMU block device layer.

That seems like a sensible approach, yes.

Would one of you who feels strongly about this be willing to write up a
proposed spec for that? I could do it, but since I'm not 100% sure I
understand all the specific requirements, I'm uncomfortable doing so.

If that doesn't raise any obvious issues that I'm aware of, I'd be happy
to add it to the "experimental" section of the proto.md file.

(speaking of which, I notice that the STARTTLS patches got merged into
qemu, so I've moved the description of that to the main body and out of
the "experimental" part of that document)


I am going to send next RFC next Wednesday.
Just waited other opinions not from QEMU side.

Den



[Qemu-devel] [PATCH v2] net.c: Moved large array in nc_sendv_compat from the stack to the heap

2016-03-06 Thread Nikos Filippakis
Allocate large array in nc_sendv_compat on the heap to reduce stack frame size, 
as stated in the BiteSizedTasks wiki page.

Signed-off-by: Nikos Filippakis 
---
 net/net.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/net/net.c b/net/net.c
index aebf753..ac78aba 100644
--- a/net/net.c
+++ b/net/net.c
@@ -710,23 +710,28 @@ ssize_t qemu_send_packet_raw(NetClientState *nc, const 
uint8_t *buf, int size)
 static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov,
int iovcnt, unsigned flags)
 {
-uint8_t buf[NET_BUFSIZE];
-uint8_t *buffer;
+uint8_t *buffer, *dynbuf = NULL;
 size_t offset;
+ssize_t ret;
 
 if (iovcnt == 1) {
 buffer = iov[0].iov_base;
 offset = iov[0].iov_len;
 } else {
-buffer = buf;
-offset = iov_to_buf(iov, iovcnt, 0, buf, sizeof(buf));
+buffer = dynbuf = g_new(uint8_t, NET_BUFSIZE);
+offset = iov_to_buf(iov, iovcnt, 0, buffer,
+NET_BUFSIZE * sizeof(uint8_t));
 }
 
 if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
-return nc->info->receive_raw(nc, buffer, offset);
+ret = nc->info->receive_raw(nc, buffer, offset);
 } else {
-return nc->info->receive(nc, buffer, offset);
+ret = nc->info->receive(nc, buffer, offset);
 }
+
+g_free(dynbuf);
+
+return ret;
 }
 
 ssize_t qemu_deliver_packet_iov(NetClientState *sender,
-- 
1.9.1




Re: [Qemu-devel] [PATCH] net.c: Moved large array in nc_sendv_compat from the stack to the heap

2016-03-06 Thread Nikos Filippakis
Thank you for your comments!

On Sun, Mar 6, 2016 at 9:47 AM, Alex Bennée  wrote:
>
>
> Nikos Filippakis  writes:
>
> > Hello everyone! I am interested in getting to know the codebase a little 
> > better
> > so that I can eventually apply for a GSOC position.
> > This is my first attempt at posting a patch to a mailing list, any feedback
> > is greatly appreciated.
>
> OK first things first this sort of meta comment belongs in the cover
> letter. However for a single patch you may want to put such things below
> the --- in the commit message as that will get stripped when the
> maintainer eventually applies the patch. Otherwise your meta-comments
> will end up in the version log ;-)
>
> You'll see people use the --- area to keep version notes as patches go
> through revisions.
>

I thought that could be considered part of the cover letter, didn't
realize it would end up on the version log. Sorry about that (:

> >
> > Signed-off-by: Nikos Filippakis 
> > ---
> >  net/net.c | 17 -
> >  1 file changed, 12 insertions(+), 5 deletions(-)
> >
> > diff --git a/net/net.c b/net/net.c
> > index aebf753..79e9d7c 100644
> > --- a/net/net.c
> > +++ b/net/net.c
> > @@ -710,23 +710,30 @@ ssize_t qemu_send_packet_raw(NetClientState *nc, 
> > const uint8_t *buf, int size)
> >  static ssize_t nc_sendv_compat(NetClientState *nc, const struct iovec *iov,
> > int iovcnt, unsigned flags)
> >  {
> > -uint8_t buf[NET_BUFSIZE];
> >  uint8_t *buffer;
> >  size_t offset;
> > +ssize_t ret;
>
> With that said your comment needs to explain why you've just made the
> change. I see NET_BUFSIZE is quite large so maybe this should be a
> clean-up across the rest of the code-base, what's so special about this
> function? Have you measured any difference in performance?
>

This method is one of several mentioned on the wiki as having big
stack frames because of such arrays, something
someone new to the project could easily fix, either by moving it to
the heap or reducing the array size. Since further
down there is a call to memcpy with NET_BUFSIZE length, I thought I'd
just move it to the heap.

Nothing special about this method, I'm planning to do the same with
the others, just trying to get some
familiarity with the mailing list.

Besides, I'm not sure if I should put such small changes to different
files in many small commits, or a large one.

> >
> >  if (iovcnt == 1) {
> >  buffer = iov[0].iov_base;
> >  offset = iov[0].iov_len;
> >  } else {
> > -buffer = buf;
> > -offset = iov_to_buf(iov, iovcnt, 0, buf, sizeof(buf));
> > +buffer = g_malloc(NET_BUFSIZE * sizeof(uint8_t));
> > +offset = iov_to_buf(iov, iovcnt, 0, buffer,
> > +NET_BUFSIZE * sizeof(uint8_t));
> >  }
> >
> >  if (flags & QEMU_NET_PACKET_FLAG_RAW && nc->info->receive_raw) {
> > -return nc->info->receive_raw(nc, buffer, offset);
> > +ret = nc->info->receive_raw(nc, buffer, offset);
> >  } else {
> > -return nc->info->receive(nc, buffer, offset);
> > +ret = nc->info->receive(nc, buffer, offset);
> >  }
> > +
> > +if (iovcnt != 1) {
> > +g_free(buffer);
> > +}
>
> This is a short function so you can get away with it but this sort of
> logic can be confusing ("The iovec count was 1 therefor I should have
> allocated a buffer" vs "I have an allocated buffer"). In general you
> should know the various g_* functions tolerate NULLs well so maybe you
> can structure the code differently (skipping the details ;-):
>
> uint8_t *buffer, *dynbuf = NULL;
>
> if (iovcnt == 1)
> {
>   buffer = ...
> } else {
>   buffer = dynbuf = g_malloc(NET_BUFSIZE * sizeof(uint8_t));
>   ...
> }
> ...
>
> g_free(dynbuf)
>

You're right, I didn't quite like the way I did it either. I'm
resubmitting it, hopefully fixing these mistakes.

> > +
> > +return ret;
> >  }
> >
> >  ssize_t qemu_deliver_packet_iov(NetClientState *sender,
>
>
> --
> Alex Bennée



Re: [Qemu-devel] [PATCHv9 0/10] slirp: Adding IPv6 support to Qemu -net user mode

2016-03-06 Thread Samuel Thibault
Hello,

Jan Kiszka, on Fri 04 Mar 2016 16:50:32 +0100, wrote:
> On 2016-03-04 09:41, Thomas Huth wrote:
> > On 22.02.2016 20:28, Samuel Thibault wrote:
> >> Hello,
> >>
> >> This is another respin of IPv6 in Qemu -net user mode.
> >>
> >> These patches add ICMPv6, NDP, make UDP and TCP compatible with IPv6, and 
> >> add
> >> TFTP over IPv6.
> > 
> > *ping*
> > 
> >  Jan, Jason,
> > 
> > could you please have a look at this series? Would be cool to include it
> > for 2.6 so that we'd finally have IPv6 support in Slirp, too.
> > As far as I could see, the patches look fine now - there was just one
> > rather cosmetic issue left in patch 9/10, but it should also be ok in
> > the current shape already, I think, so IMHO no need for a respin.
> 
> As indicated before, we need someone else than me to manage the slirp
> system. I'm out of bandwidth for this task, sorry.

Ok, Jason?

I'm really surprised that a patch that has been reviewed two times (by
me then by Thomas) seems so difficult to get it.  I understand that
it's far from trivial, but I have concerns with how qemu can manage
contributions.

Samuel



Re: [Qemu-devel] [PATCH] linux-user: Check array bounds in errno conversion

2016-03-06 Thread Laurent Vivier


Le 03/03/2016 19:35, Peter Maydell a écrit :
> From: Timothy E Baldwin 
> 
> Check array bounds in host_to_target_errno() and target_to_host_errno().
> 
> Signed-off-by: Timothy Edward Baldwin 
> Message-id: 
> 1441497448-32489-2-git-send-email-t.e.baldwi...@members.leeds.ac.uk
> [PMM: Add a lower-bound check, use braces on if(), tweak commit message]
> Signed-off-by: Peter Maydell 

Reviewed-by: Laurent Vivier 

> ---
> This is a bugfix patch fished out of Timothy's signal-race-fixes
> patch series. We had a previous go-around doing this with unsigned
> integers, but that doesn't work.
> 
>  linux-user/syscall.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index 9517531..f9dcdd4 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -617,15 +617,19 @@ static uint16_t 
> host_to_target_errno_table[ERRNO_TABLE_SIZE] = {
>  
>  static inline int host_to_target_errno(int err)
>  {
> -if(host_to_target_errno_table[err])
> +if (err >= 0 && err < ERRNO_TABLE_SIZE &&
> +host_to_target_errno_table[err]) {
>  return host_to_target_errno_table[err];
> +}
>  return err;
>  }
>  
>  static inline int target_to_host_errno(int err)
>  {
> -if (target_to_host_errno_table[err])
> +if (err >= 0 && err < ERRNO_TABLE_SIZE &&
> +target_to_host_errno_table[err]) {
>  return target_to_host_errno_table[err];
> +}
>  return err;
>  }
>  
> 



Re: [Qemu-devel] [PULL v2 00/12] QAPI patches for 2016-03-04

2016-03-06 Thread Peter Maydell
On 5 March 2016 at 16:50, Markus Armbruster  wrote:
> v2: Missing S-o-b supplied
>
> The following changes since commit 3c0f12df65da872d5fbccae469f2cb21ed1c03b7:
>
>   Merge remote-tracking branch 
> 'remotes/pmaydell/tags/pull-target-arm-20160304' into staging (2016-03-04 
> 11:46:32 +)
>
> are available in the git repository at:
>
>   git://repo.or.cz/qemu/armbru.git tags/pull-qapi-2016-03-04
>
> for you to fetch changes up to 48eb62a74fc2d6b0ae9e5f414304a85cfbf33066:
>
>   qapi: Drop useless 'data' member of unions (2016-03-05 10:42:06 +0100)
>
> 
> QAPI patches for 2016-03-04
>

Applied, thanks.

-- PMM



[Qemu-devel] [PATCH] hw/acpi: fix Q35 support for legacy Windows OS

2016-03-06 Thread Marcel Apfelbaum
Legacy Windows operating systems like Windows XP and Windows 2003
require _DIS method to be present for all interrupt links.

PC machines already have a no-op implemented for GSI links, add
it also in Q35.

Signed-off-by: Marcel Apfelbaum 
---

Hi,

I tested this patch with WinXP and Win 2003, but also with Win 10, and Fedora.
This solves a BSOD early in the setup process.

WinXP/2003 can be tested using:
 -device piix3-ide,id=legacyide \
 -drive file=winxp-q35.qcow2,if=none,id=disk -device 
ide-hd,drive=disk,bus=legacyide.0 \
 -drive file=xp_sp3.iso,if=none,id=cdrom -device 
ide-cd,drive=cdrom,bus=legacyide.1 

Please note that you have to use the piix3-ide because Win2003 (and probably XP)
does not have Q35 AHCI drivers inbox.

Thanks,
Marcel

 hw/i386/acpi-build.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c
index 52c9470..4c66568 100644
--- a/hw/i386/acpi-build.c
+++ b/hw/i386/acpi-build.c
@@ -1509,6 +1509,9 @@ static Aml *build_gsi_link_dev(const char *name, uint8_t 
uid, uint8_t gsi)
 
 aml_append(dev, aml_name_decl("_CRS", crs));
 
+method = aml_method("_DIS", 0, AML_NOTSERIALIZED);
+aml_append(dev, method);
+
 method = aml_method("_SRS", 1, AML_NOTSERIALIZED);
 aml_append(dev, method);
 
-- 
2.4.3




[Qemu-devel] [PATCH v2] ui/console: add escape sequence \e[5,6n

2016-03-06 Thread Ren Kimura
Add support of escape sequence "\e[5n" and "\e[6n" to console.
"\e[5n" reports status of console and it always succeed
in virtual console.
"\e[6n" reports now cursor position in console.

Signed-off-by: Ren Kimura 
---
 ui/console.c | 35 +--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/ui/console.c b/ui/console.c
index ae61382..537a509 100644
--- a/ui/console.c
+++ b/ui/console.c
@@ -757,6 +757,25 @@ static void console_clear_xy(QemuConsole *s, int x, int y)
 update_xy(s, x, y);
 }
 
+static void console_respond_str(QemuConsole *s, const char *buf)
+{
+TextCell *c;
+int y1;
+while (*buf) {
+if (s->x >= s->width) {
+s->x = 0;
+console_put_lf(s);
+}
+y1 = (s->y_base + s->y) % s->total_height;
+c = >cells[y1 * s->width + s->x];
+c->ch = *buf;
+c->t_attrib = s->t_attrib;
+update_xy(s, s->x, s->y);
+s->x++;
+buf++;
+}
+}
+
 /* set cursor, checking bounds */
 static void set_cursor(QemuConsole *s, int x, int y)
 {
@@ -782,6 +801,7 @@ static void console_putchar(QemuConsole *s, int ch)
 TextCell *c;
 int y1, i;
 int x, y;
+char response[40];
 
 switch(s->state) {
 case TTY_STATE_NORM:
@@ -956,8 +976,19 @@ static void console_putchar(QemuConsole *s, int ch)
 console_handle_escape(s);
 break;
 case 'n':
-/* report cursor position */
-/* TODO: send ESC[row;colR */
+switch (s->esc_params[0]) {
+case 5:
+/* report console status (always succeed)*/
+console_respond_str(s, "\033[0n");
+break;
+case 6:
+/* report cursor position */
+sprintf(response, "\033[%d;%dR",
+   (s->y_base + s->y) % s->total_height + 1,
+s->x + 1);
+console_respond_str(s, response);
+break;
+}
 break;
 case 's':
 /* save cursor position */
-- 
2.5.0




Re: [Qemu-devel] [Nbd] [RFC 1/1] nbd (specification): add NBD_CMD_WRITE_ZEROES command

2016-03-06 Thread Wouter Verhelst
Hi all,

On Fri, Mar 04, 2016 at 03:03:26PM +0100, Paolo Bonzini wrote:
> NBD-wise, I think the TRIM command is good as it is, and
> NBD_CMD_WRITE_ZEROES should be added like Den is doing.
> 
> It also makes sense to use trimming to implement NBD_CMD_WRITE_ZEROES,
> but it should be explicitly requested by the user.  For this, my
> suggestion is that NBD_CMD_WRITE_ZEROES should have an
> NBD_FLAG_TRY_TRIM flag in bit 16.  If specified, the backend can use a
> zero-writing mechanism that trims, _but_ it must ensure that the bytes
> read as zero.  If it cannot ensure that, it must not trim and it
> should instead do a full write.  This is similar to the SCSI command
> WRITE SAME (when the command payload is all zeroes).  Like Kevin said,
> it also happens to map nicely to the QEMU block device layer.

That seems like a sensible approach, yes.

Would one of you who feels strongly about this be willing to write up a
proposed spec for that? I could do it, but since I'm not 100% sure I
understand all the specific requirements, I'm uncomfortable doing so.

If that doesn't raise any obvious issues that I'm aware of, I'd be happy
to add it to the "experimental" section of the proto.md file.

(speaking of which, I notice that the STARTTLS patches got merged into
qemu, so I've moved the description of that to the main body and out of
the "experimental" part of that document)

-- 
< ron> I mean, the main *practical* problem with C++, is there's like a dozen
   people in the world who think they really understand all of its rules,
   and pretty much all of them are just lying to themselves too.
 -- #debian-devel, OFTC, 2016-02-12



Re: [Qemu-devel] [PATCH] ui/console: add escape sequence \e[5,6n

2016-03-06 Thread Ren Kimura
Oh OK. I'll send version2 of this patch that contains comment about these.

2016-03-06 18:49 GMT+09:00 Peter Maydell :

> On 5 March 2016 at 23:50, Ren Kimura  wrote:
> > This patch add support of escape sequence "\e[5,6n".
> > This implementation similar to that of linux tty driver.
>
> Could you describe what the escape sequences do, please?
> (both in the commit message and in a comment in the code).
>
> thanks
> -- PMM
>


Re: [Qemu-devel] [PATCH] ui/console: add escape sequence \e[5,6n

2016-03-06 Thread Peter Maydell
On 5 March 2016 at 23:50, Ren Kimura  wrote:
> This patch add support of escape sequence "\e[5,6n".
> This implementation similar to that of linux tty driver.

Could you describe what the escape sequences do, please?
(both in the commit message and in a comment in the code).

thanks
-- PMM



Re: [Qemu-devel] [PATCH][Outreachy]

2016-03-06 Thread Alex Bennée

Sarah Khan  writes:

> util/envlist.c:This patch replaces malloc with g_malloc
>
> This replacement was suggested as part of the bite-sized tasks.
>
> Signed-off-by: Sarah Khan 

Thanks for your contribution. A few notes for the next revision.

It's always worth running $SRC/scripts/checkpatch.pl to check for style
issues. While there are bits of the code base that don't conform we tend
to fix up style issues with the code that is being changed as we go.

>
> --
> diff --git a/util/envlist.c b/util/envlist.c
> index e86857e..0324fe2 100644
> --- a/util/envlist.c
> +++ b/util/envlist.c
> @@ -25,7 +25,7 @@ envlist_create(void)
>  {
>   envlist_t *envlist;
>
> - if ((envlist = malloc(sizeof (*envlist))) == NULL)
> + if ((envlist = g_malloc(sizeof (*envlist))) == NULL)
>   return (NULL);

g_malloc has different behaviour from malloc in so far as it never fails
(or rather if it does you program aborts). This means a lot of
boilerplate checking can be simplified.

>
>   QLIST_INIT(>el_entries);
> @@ -48,10 +48,10 @@ envlist_free(envlist_t *envlist)
>   entry = envlist->el_entries.lh_first;
>   QLIST_REMOVE(entry, ev_link);
>
> - free((char *)entry->ev_var);
> - free(entry);
> + g_free((char *)entry->ev_var);
> + g_free(entry);
>   }
> - free(envlist);
> + g_free(envlist);
>  }
>
>  /*
> @@ -155,16 +155,16 @@ envlist_setenv(envlist_t *envlist, const char *env)
>
>   if (entry != NULL) {
>   QLIST_REMOVE(entry, ev_link);
> - free((char *)entry->ev_var);
> - free(entry);
> + g_free((char *)entry->ev_var);
> + g_free(entry);
>   } else {
>   envlist->el_count++;
>   }
>
> - if ((entry = malloc(sizeof (*entry))) == NULL)
> + if ((entry = g_malloc(sizeof (*entry))) == NULL)
>   return (errno);
>   if ((entry->ev_var = strdup(env)) == NULL) {

If you're cleaning up the memory allocation you should probably also use
g_strdup() here for the string allocation (it gets g_free'd later).

> - free(entry);
> + g_free(entry);
>   return (errno);
>   }
>   QLIST_INSERT_HEAD(>el_entries, entry, ev_link);
> @@ -201,8 +201,8 @@ envlist_unsetenv(envlist_t *envlist, const char *env)
>   }
>   if (entry != NULL) {
>   QLIST_REMOVE(entry, ev_link);
> - free((char *)entry->ev_var);
> - free(entry);
> + g_free((char *)entry->ev_var);
> + g_free(entry);
>
>   envlist->el_count--;
>   }
> @@ -225,7 +225,7 @@ envlist_to_environ(const envlist_t *envlist, size_t 
> *count)
>   struct envlist_entry *entry;
>   char **env, **penv;
>
> - penv = env = malloc((envlist->el_count + 1) * sizeof (char *));
> + penv = env = g_malloc((envlist->el_count + 1) * sizeof (char *));
>   if (env == NULL)
>   return (NULL);

Again this checking is now redundant.

> ---
>  util/envlist.c | 22 +++---
>  1 file changed, 11 insertions(+), 11 deletions(-)
>
> diff --git a/util/envlist.c b/util/envlist.c
> index e86857e..0324fe2 100644
> --- a/util/envlist.c
> +++ b/util/envlist.c
> @@ -25,7 +25,7 @@ envlist_create(void)
>  {
>   envlist_t *envlist;
>
> - if ((envlist = malloc(sizeof (*envlist))) == NULL)
> + if ((envlist = g_malloc(sizeof (*envlist))) == NULL)
>   return (NULL);
>
>   QLIST_INIT(>el_entries);
> @@ -48,10 +48,10 @@ envlist_free(envlist_t *envlist)
>   entry = envlist->el_entries.lh_first;
>   QLIST_REMOVE(entry, ev_link);
>
> - free((char *)entry->ev_var);
> - free(entry);
> + g_free((char *)entry->ev_var);
> + g_free(entry);
>   }
> - free(envlist);
> + g_free(envlist);
>  }
>
>  /*
> @@ -155,16 +155,16 @@ envlist_setenv(envlist_t *envlist, const char *env)
>
>   if (entry != NULL) {
>   QLIST_REMOVE(entry, ev_link);
> - free((char *)entry->ev_var);
> - free(entry);
> + g_free((char *)entry->ev_var);
> + g_free(entry);
>   } else {
>   envlist->el_count++;
>   }
>
> - if ((entry = malloc(sizeof (*entry))) == NULL)
> + if ((entry = g_malloc(sizeof (*entry))) == NULL)
>   return (errno);
>   if ((entry->ev_var = strdup(env)) == NULL) {
> - free(entry);
> + g_free(entry);
>   return (errno);
>   }
>   QLIST_INSERT_HEAD(>el_entries, entry, ev_link);
> @@ -201,8 +201,8 @@ envlist_unsetenv(envlist_t *envlist, const char *env)
>   }
>   if (entry != NULL) {
>   QLIST_REMOVE(entry, ev_link);
> - free((char *)entry->ev_var);
> - free(entry);
> + g_free((char *)entry->ev_var);
> +