Re: [Qemu-devel] [PATCH 4/7 V6] Header with various utility functions shared by VMWARE SCSI and network devices

2012-04-18 Thread Michael S. Tsirkin
On Tue, Apr 17, 2012 at 03:32:38PM +0300, Dmitry Fleytman wrote:
 From: Dmitry Fleytman dmi...@daynix.com
 
 Signed-off-by: Dmitry Fleytman dmi...@daynix.com
 Signed-off-by: Yan Vugenfirer y...@daynix.com

I can easily see how different vmware devices would
share some code. However:

 ---
  hw/vmware_utils.h |  126 
 +
  1 files changed, 126 insertions(+), 0 deletions(-)
  create mode 100644 hw/vmware_utils.h
 
 diff --git a/hw/vmware_utils.h b/hw/vmware_utils.h
 new file mode 100644
 index 000..0d261c0
 --- /dev/null
 +++ b/hw/vmware_utils.h
 @@ -0,0 +1,126 @@
 +/*
 + * QEMU VMWARE paravirtual devices - auxiliary code
 + *
 + * Copyright (c) 2012 Ravello Systems LTD (http://ravellosystems.com)
 + *
 + * Developed by Daynix Computing LTD (http://www.daynix.com)
 + *
 + * Authors:
 + * Dmitry Fleytman dmi...@daynix.com
 + * Yan Vugenfirer y...@daynix.com
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + *
 + */
 +
 +#ifndef VMWARE_UTILS_H
 +#define VMWARE_UTILS_H
 +
 +#ifndef VMW_SHPRN
 +#define VMW_SHPRN(fmt, ...) do {} while (0)
 +#endif
 +
 +/* Shared memory access functions with byte swap support */
 +static inline void
 +vmw_shmem_read(target_phys_addr_t addr, void *buf, int len)
 +{
 +VMW_SHPRN(SHMEM r: % PRIx64 , len: %d to %p, addr, len, buf);
 +cpu_physical_memory_read(addr, buf, len);
 +}
 +
 +static inline void
 +vmw_shmem_write(target_phys_addr_t addr, void *buf, int len)
 +{
 +VMW_SHPRN(SHMEM w: % PRIx64 , len: %d to %p, addr, len, buf);
 +cpu_physical_memory_write(addr, buf, len);
 +}
 +
 +static inline void
 +vmw_shmem_rw(target_phys_addr_t addr, void *buf, int len, int is_write)
 +{
 +VMW_SHPRN(SHMEM r/w: % PRIx64 , len: %d (to %p), is write: %d,
 +  addr, len, buf, is_write);
 +
 +cpu_physical_memory_rw(addr, buf, len, is_write);
 +}
 +
 +static inline void
 +vmw_shmem_set(target_phys_addr_t addr, uint8 val, int len)
 +{
 +int i;
 +VMW_SHPRN(SHMEM set: % PRIx64 , len: %d (value 0x%X), addr, len, 
 val);
 +
 +for (i = 0; i  len; i++) {
 +cpu_physical_memory_write(addr + i, val, 1);
 +}
 +}
 +
 +static inline uint32_t
 +vmw_shmem_ld8(target_phys_addr_t addr)
 +{
 +uint8_t res = ldub_phys(addr);
 +VMW_SHPRN(SHMEM load8: % PRIx64  (value 0x%X), addr, res);
 +return res;
 +}
 +
 +static inline void
 +vmw_shmem_st8(target_phys_addr_t addr, uint8_t value)
 +{
 +VMW_SHPRN(SHMEM store8: % PRIx64  (value 0x%X), addr, value);
 +stb_phys(addr, value);
 +}
 +
 +static inline uint32_t
 +vmw_shmem_ld16(target_phys_addr_t addr)
 +{
 +uint16_t res = lduw_le_phys(addr);
 +VMW_SHPRN(SHMEM load16: % PRIx64  (value 0x%X), addr, res);
 +return res;
 +}
 +
 +static inline void
 +vmw_shmem_st16(target_phys_addr_t addr, uint16_t value)
 +{
 +VMW_SHPRN(SHMEM store16: % PRIx64  (value 0x%X), addr, value);
 +stw_le_phys(addr, value);
 +}
 +
 +static inline uint32_t
 +vmw_shmem_ld32(target_phys_addr_t addr)
 +{
 +uint32_t res = ldl_le_phys(addr);
 +VMW_SHPRN(SHMEM load32: % PRIx64  (value 0x%X), addr, res);
 +return res;
 +}
 +
 +static inline void
 +vmw_shmem_st32(target_phys_addr_t addr, uint32_t value)
 +{
 +VMW_SHPRN(SHMEM store32: % PRIx64  (value 0x%X), addr, value);
 +stl_le_phys(addr, value);
 +}
 +
 +static inline uint64_t
 +vmw_shmem_ld64(target_phys_addr_t addr)
 +{
 +uint64_t res = ldq_le_phys(addr);
 +VMW_SHPRN(SHMEM load64: % PRIx64  (value % PRIx64 ), addr, res);
 +return res;
 +}
 +
 +static inline void
 +vmw_shmem_st64(target_phys_addr_t addr, uint64_t value)
 +{
 +VMW_SHPRN(SHMEM store64: % PRIx64  (value % PRIx64 ), addr, value);
 +stq_le_phys(addr, value);
 +}
 +

Pls remove these wrappers.  These are just memory stores. Our codebase
is too large as it is without every driver wrapping all standard calls.


 +/* MACROS for simplification of operations on array-style registers */

UPPERCASE ABUSE

 +#define IS_MULTIREG_ADDR(addr, base, cnt, regsize) \
 +(((addr + 1)  (base))  ((addr)  (base) + (cnt) * (regsize)))


Same as range_covers_byte(base, cnt * regsize, addr)?

 +
 +#define MULTIREG_IDX_BY_ADDR(addr, base, regsize)  \
 +(((addr) - (base)) / (regsize))
 +

Above two macros is all that's left. No objection but it does not say
what they do - want to add minimal documentation?
And please prefix with VMWARE_ or something.

 +#endif
 -- 
 1.7.7.6



Re: [Qemu-devel] [PATCH 0/7 V6] VMXNET3 paravirtual NIC device implementation

2012-04-18 Thread Gerhard Wiesinger
As already pretested this patch I got from Dmitry now I successfully 
tested it on Fedora 16 and Knoppix 6.7 (retested again successfully).


netio performance on localhost (ok):
Packet size  1k bytes:  33645 KByte/s Tx,  25279 KByte/s Rx.
Packet size  2k bytes:  45884 KByte/s Tx,  24854 KByte/s Rx.
Packet size  4k bytes:  80332 KByte/s Tx,  42068 KByte/s Rx.
Packet size  8k bytes:  117696 KByte/s Tx,  64489 KByte/s Rx.
Packet size 16k bytes:  150418 KByte/s Tx,  100288 KByte/s Rx.
Packet size 32k bytes:  182412 KByte/s Tx,  138779 KByte/s Rx.

Since hw passed also WHQL tests it is IHMO ready for commit :-)

Formal:
Tested-by: Gerhard Wiesinger li...@wiesinger.com

Ciao,
Gerhard

On 17.04.2012 14:32, Dmitry Fleytman wrote:

From: Dmitry Fleytmandmi...@daynix.com

This set of patches implements VMWare VMXNET3 paravirtual NIC device.
The device supports of all the device features including offload capabilties,
VLANs and etc.
The device is tested on different OSes:
 Fedora 15
 Ubuntu 10.4
 Centos 6.2
 Windows 2008R2
 Windows 2008 64bit
 Windows 2008 32bit
 Windows 2003 64bit
 Windows 2003 32bit

Changes in V6:
Fixed most of problems pointed out by Michael S. Tsirkin
The only issue still open is creation of shared place
with generic network structures and functions. Currently
all generic network code introduced by VMXNET3 resides in
vmxnet_utils.c/h files. It could be moved to some shared location however
we believe it is a matter of separate refactoring as there are a lot of 
copy-pasted
definitions in almost every device and code cleanup efforts requred in order
to create truly shared codebase.

  Reported-by: Michael S. Tsirkinm...@redhat.com

Implemented suggestions by Anthony Liguori

  Reported-by: Anthony Liguorialigu...@us.ibm.com

Fixed incorrect checksum caclulation for some packets in SW offloads mode

  Reported-by: Gerhard Wiesingerli...@wiesinger.com

Changes in V5:
MSI-X save/load implemented in the device instead of pci bus as
suggested by Michael S. Tsirkin

  Reported-by: Michael S. Tsirkinm...@redhat.com

Patches regrouped as suggested by Paolo Bonzini

  Reported-by: Paolo Bonzinipbonz...@redhat.com

Changes in V4:
Fixed a few problems uncovered by NETIO test suit
Assertion on failure to initialize MSI/MSI-X replaced with warning
message and fallback to Legacy/MSI respectively

  Reported-by: Gerhard Wiesingerli...@wiesinger.com

Various coding style adjustments and patch split-up as suggested by Anthony
Liguori

  Reported-by: Anthony Liguorialigu...@us.ibm.com

Live migration support added

Changes in V3:
Fixed crash when net device that is used as network fronted has no
virtio HDR support.
Task offloads emulation for cases when net device that is used as
network fronted has no virtio HDR support.

  Reported-by: Gerhard Wiesingerli...@wiesinger.com

Changes in V2:
License text changed accoring to community suggestions
Standard license header from GPLv2+ - licensed QEMU files used

Dmitry Fleytman (7):
   Adding missing flag VIRTIO_NET_HDR_F_DATA_VALID from Linux kernel
 source tree Reformatting comments according to checkpatch.pl
 requirements
   Adding utility function net_checksum_add_cont() that allows checksum
calculation of scattered data with odd chunk sizes
   Adding utility function iov_net_csum_add() for iovec checksum
 calculation Adding utility function iov_rebuild() for smart
 iovec copy
   Header with various utility functions shared by VMWARE SCSI and
 network devices
   Various utility functions used by VMWARE network devices
   Packet abstraction used by VMWARE network devices
   VMXNET3 paravirtualized device implementation Device vmxnet3
 added.

  Makefile.objs   |1 +
  default-configs/pci.mak |1 +
  hw/pci.h|1 +
  hw/virtio-net.h |   13 +-
  hw/vmware_utils.h   |  126 +++
  hw/vmxnet3.c| 2435 +++
  hw/vmxnet3.h|  762 +++
  hw/vmxnet_debug.h   |  121 +++
  hw/vmxnet_pkt.c |  776 +++
  hw/vmxnet_pkt.h |  311 ++
  hw/vmxnet_utils.c   |  219 +
  hw/vmxnet_utils.h   |  341 +++
  iov.c   |   53 +
  iov.h   |6 +
  net/checksum.c  |   13 +-
  net/checksum.h  |   14 +-
  16 files changed, 5180 insertions(+), 13 deletions(-)
  create mode 100644 hw/vmware_utils.h
  create mode 100644 hw/vmxnet3.c
  create mode 100644 hw/vmxnet3.h
  create mode 100644 hw/vmxnet_debug.h
  create mode 100644 hw/vmxnet_pkt.c
  create mode 100644 hw/vmxnet_pkt.h
  create mode 100644 hw/vmxnet_utils.c
  create mode 100644 hw/vmxnet_utils.h






[Qemu-devel] [Bug 984476] Re: segmentaion error when DMAing

2012-04-18 Thread Stefan Weil
Was it TCP segmentaion Error? Then it is still there.

Thanks for reporting. It will be fixed in latest QEMU.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/984476

Title:
  segmentaion error when DMAing

Status in QEMU:
  New

Bug description:
  When working with QEMU's PCI network card E1000 emulator, I
  accidentally put virtual addresses into the memory mapped registers
  when I should have put physical addresses. Short story is, the address
  was too large for the physical address space so when the network card
  tried to DMA the location it tossed a segmentaion error out to the
  console. That's right--not a segmentation error, but a segmentaion
  error. I just thought I'd let ya'll know about that little typo.

  My qemu -version gives QEMU emulator version 0.15.0, Copyright (c)
  2003-2008 Fabrice Bellard on linux version 2.6.32. I guess it might
  be an older version, dunno if the typo's still there.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/984476/+subscriptions



[Qemu-devel] [Bug 984476] Re: segmentaion error when DMAing

2012-04-18 Thread Stefan Weil
** Changed in: qemu
   Status: New = Confirmed

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/984476

Title:
  segmentaion error when DMAing

Status in QEMU:
  Confirmed

Bug description:
  When working with QEMU's PCI network card E1000 emulator, I
  accidentally put virtual addresses into the memory mapped registers
  when I should have put physical addresses. Short story is, the address
  was too large for the physical address space so when the network card
  tried to DMA the location it tossed a segmentaion error out to the
  console. That's right--not a segmentation error, but a segmentaion
  error. I just thought I'd let ya'll know about that little typo.

  My qemu -version gives QEMU emulator version 0.15.0, Copyright (c)
  2003-2008 Fabrice Bellard on linux version 2.6.32. I guess it might
  be an older version, dunno if the typo's still there.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/984476/+subscriptions



[Qemu-devel] [Bug 984516] Re: should use sdl-config for static build not pkg-config

2012-04-18 Thread Stefan Weil
pkg-config supports --static, and QEMU uses it.

Please try whether

   pkg-config --libs --static sdl

gives the correct flags with your distribution. If not, that
distribution is buggy.

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/984516

Title:
  should use sdl-config for static build not pkg-config

Status in QEMU:
  New

Bug description:
  In the configure script when a user wants to compile a static QEMU and
  enable SDL support (i.e. ./configure --static --enable-sdl):

  pkg-config does not have an option --static-libs. For correct
  results (to find the static archive libSDL.a) you need to use sdl-
  config --static-libs.

  
  This is how I get it to work for me anyway:

  
  diff --git a/configure b/configure
  index 2d62d12..3de4c9b 100755
  --- a/configure
  +++ b/configure
  @@ -1548,7 +1548,7 @@ int main( void ) { return SDL_Init (SDL_INIT_VIDEO); }
   EOF
 sdl_cflags=`$sdlconfig --cflags 2 /dev/null`
 if test $static = yes ; then
  -sdl_libs=`$sdlconfig --static-libs 2/dev/null`
  +sdl_libs=`${SDL_CONFIG-${cross_prefix}sdl-config} --static-libs`
 else
   sdl_libs=`$sdlconfig --libs 2 /dev/null`
 fi

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/984516/+subscriptions



Re: [Qemu-devel] [PATCH v6 2/5] qerror: add five qerror strings

2012-04-18 Thread Orit Wasserman
On 04/17/2012 05:54 PM, Amos Kong wrote:
 Add five new qerror strings, they are about socket:
   QERR_SOCKET_CONNECT_IN_PROGRESS
   QERR_SOCKET_CONNECT_FAILED
   QERR_SOCKET_LISTEN_FAILED
   QERR_SOCKET_BIND_FAILED
   QERR_SOCKET_CREATE_FAILED
 
 Signed-off-by: Amos Kong ak...@redhat.com
 ---
  qerror.c |   20 
  qerror.h |   15 +++
  2 files changed, 35 insertions(+), 0 deletions(-)
 
 diff --git a/qerror.c b/qerror.c
 index 96fbe71..7afe1ac 100644
 --- a/qerror.c
 +++ b/qerror.c
 @@ -304,6 +304,26 @@ static const QErrorStringTable qerror_table[] = {
  .error_fmt = QERR_VNC_SERVER_FAILED,
  .desc  = Could not start VNC server on %(target),
  },
 +{
 +.error_fmt = QERR_SOCKET_CONNECT_IN_PROGRESS,
 +.desc  = Connection cannot be completed immediately,
 +},
 +{
 +.error_fmt = QERR_SOCKET_CONNECT_FAILED,
 +.desc  = Fail to connect socket,
 +},
 +{
 +.error_fmt = QERR_SOCKET_LISTEN_FAILED,
 +.desc  = Fail to listen socket,
 +},
 +{
 +.error_fmt = QERR_SOCKET_BIND_FAILED,
 +.desc  = Fail to bind socket,
 +},
 +{
 +.error_fmt = QERR_SOCKET_CREATE_FAILED,
 +.desc  = Fail to create socket,
 +},
  {}
  };
  
 diff --git a/qerror.h b/qerror.h
 index 5c23c1f..4cbba48 100644
 --- a/qerror.h
 +++ b/qerror.h
 @@ -248,4 +248,19 @@ QError *qobject_to_qerror(const QObject *obj);
  #define QERR_VNC_SERVER_FAILED \
  { 'class': 'VNCServerFailed', 'data': { 'target': %s } }
  
 +#define QERR_SOCKET_CONNECT_IN_PROGRESS \
 +{ 'class': 'SockConnectInprogress', 'data': {} }
 +
 +#define QERR_SOCKET_CONNECT_FAILED \
 +{ 'class': 'SockConnectFailed', 'data': {} }
 +
 +#define QERR_SOCKET_LISTEN_FAILED \
 +{ 'class': 'SockListenFailed', 'data': {} }
 +
 +#define QERR_SOCKET_BIND_FAILED \
 +{ 'class': 'SockBindFailed', 'data': {} }
 +
 +#define QERR_SOCKET_CREATE_FAILED \
 +{ 'class': 'SockCreateFailed', 'data': {} }

For the FAILED error we will probably need more data , how about adding a 
string 
that can contain the strerror string ?

Orit
 +
  #endif /* QERROR_H */
 




Re: [Qemu-devel] [PATCH v6 1/5] sockets: change inet_connect() to support nonblock socket

2012-04-18 Thread Orit Wasserman
On 04/17/2012 05:54 PM, Amos Kong wrote:
 Add a bool argument to inet_connect() to assign if set socket
 to block/nonblock, and delete original argument 'socktype'
 that is unused.
 
 Retry to connect when following errors are got:
   -EINTR
   -EWOULDBLOCK (win32)
 Connect's successful for nonblock socket when following
 errors are got, user should wait for connecting by select():
   -EINPROGRESS
   -WSAEALREADY (win32)
 
 Change nbd, vnc to use new interface.
 
 Signed-off-by: Amos Kong ak...@redhat.com
 ---
  nbd.c  |2 +-
  qemu-sockets.c |   58 
 +++-
  qemu_socket.h  |2 +-
  ui/vnc.c   |2 +-
  4 files changed, 48 insertions(+), 16 deletions(-)
 
 diff --git a/nbd.c b/nbd.c
 index 406e555..b4e68a9 100644
 --- a/nbd.c
 +++ b/nbd.c
 @@ -146,7 +146,7 @@ int tcp_socket_outgoing(const char *address, uint16_t 
 port)
  
  int tcp_socket_outgoing_spec(const char *address_and_port)
  {
 -return inet_connect(address_and_port, SOCK_STREAM);
 +return inet_connect(address_and_port, true);
  }
  
  int tcp_socket_incoming(const char *address, uint16_t port)
 diff --git a/qemu-sockets.c b/qemu-sockets.c
 index 6bcb8e3..e886195 100644
 --- a/qemu-sockets.c
 +++ b/qemu-sockets.c
 @@ -51,6 +51,9 @@ static QemuOptsList dummy_opts = {
  },{
  .name = ipv6,
  .type = QEMU_OPT_BOOL,
 +},{
 +.name = block,
 +.type = QEMU_OPT_BOOL,
  },
  { /* end if list */ }
  },
 @@ -201,7 +204,8 @@ int inet_connect_opts(QemuOpts *opts)
  const char *port;
  char uaddr[INET6_ADDRSTRLEN+1];
  char uport[33];
 -int sock,rc;
 +int sock, rc, err;
 +bool block;
  
  memset(ai,0, sizeof(ai));
  ai.ai_flags = AI_CANONNAME | AI_ADDRCONFIG;
 @@ -210,6 +214,7 @@ int inet_connect_opts(QemuOpts *opts)
  
  addr = qemu_opt_get(opts, host);
  port = qemu_opt_get(opts, port);
 +block = qemu_opt_get_bool(opts, block, 0);
  if (addr == NULL || port == NULL) {
  fprintf(stderr, inet_connect: host and/or port not specified\n);
  return -1;
 @@ -241,21 +246,44 @@ int inet_connect_opts(QemuOpts *opts)
  continue;
  }
  setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,(void*)on,sizeof(on));
 -
 +if (!block) {
 +socket_set_nonblock(sock);
 +}
  /* connect to peer */
 -if (connect(sock,e-ai_addr,e-ai_addrlen)  0) {
 -if (NULL == e-ai_next)
 -fprintf(stderr, %s: connect(%s,%s,%s,%s): %s\n, 
 __FUNCTION__,
 -inet_strfamily(e-ai_family),
 -e-ai_canonname, uaddr, uport, strerror(errno));
 -closesocket(sock);
 -continue;
 +do {
 +err = 0;
 +if (connect(sock, e-ai_addr, e-ai_addrlen)  0) {
 +err = -socket_error();
 +}
 +#ifndef _WIN32
 +} while (err == -EINTR || err == -EWOULDBLOCK);
 +#else
 +} while (err == -EINTR);
 +#endif

We shouldn't retry to connect for a blocking socket, please add a check for 
!block.
According to msn docs in WIN32 if we get EWOULDBLOCK , we should do select 
http://msdn.microsoft.com/en-us/library/windows/desktop/ms737625(v=vs.85).aspx
so I think we only need to retry for -EINTR.

 +
 +if (err = 0) {
 +goto success;
 +} else if (!block  err == -EINPROGRESS) {
 +goto success;
 +#ifdef _WIN32
 +} else if (!block  err == -WSAEALREADY) {

Also EWOULDBLOCK
This is more a style comment as I feel to code doesn't need the go to.

Check for an error path so the rest of the function looks like:

if (err  0) {
if ( block ||
#ifndef __WIN32
 err != -EINPROGRESS ) {
#else 
(err != -EWOULDBLOCK  err != -WASALREADY) ) {
#endif
if (NULL == e-ai_next) {
fprintf(stderr, %s: connect(%s,%s,%s,%s): %s\n, __FUNCTION__,
inet_strfamily(e-ai_family),
e-ai_canonname, uaddr, uport, strerror(errno));
}
closesocket(sock);
sock = -1;
}

freeaddrinfo(res);
return sock;
}

 +goto success;
 +#endif
  }
 -freeaddrinfo(res);
 -return sock;
 +
 +if (NULL == e-ai_next) {
 +fprintf(stderr, %s: connect(%s,%s,%s,%s): %s\n, __FUNCTION__,
 +inet_strfamily(e-ai_family),
 +e-ai_canonname, uaddr, uport, strerror(errno));
 +}
 +closesocket(sock);
  }
  freeaddrinfo(res);
  return -1;
 +
 +success:
 +freeaddrinfo(res);
 +return sock;
  }
  
  int inet_dgram_opts(QemuOpts *opts)
 @@ -449,14 +477,18 @@ int inet_listen(const char *str, char *ostr, int olen,
  return sock;
  }
  
 -int inet_connect(const char *str, int socktype)
 +int inet_connect(const char *str, bool block)
  {
  QemuOpts *opts;
  int sock = -1;
  
  opts = 

Re: [Qemu-devel] [PATCH 15/18] qapi: implement support for variable argument list

2012-04-18 Thread Paolo Bonzini
Il 17/04/2012 22:42, Luiz Capitulino ha scritto:
 On Tue, 17 Apr 2012 22:26:55 +0200
 Paolo Bonzini pbonz...@redhat.com wrote:
 
 Il 17/04/2012 21:36, Luiz Capitulino ha scritto:
 +switch(qobject_type(obj)) {
 +case QTYPE_QSTRING:
 +qstring_append(arglist,
 +   qstring_get_str(qobject_to_qstring(obj)));
 +break;

 Does this escape commas correctly?
 
 No, but does it have to? Does QemuOpts accept an option with a coma in it?

Yes, ,, is parsed as ,.

 It seems much easier to use no_gen and qemu_opts_from_qdict...  Then
 cmd_netdev_add can be
 
 netdev_add/del is expected to be a stable interface, so we can't use no_gen.

You can have hmp_netdev_add and the no_gen qmp_netdev_add as front-ends
for the QAPI cmd_netdev_add.  I think it's fair when we have to take
into account backwards-compatibility.  The conversion gives correct
error propagation, so even though QemuOpts still leaks it's a step in
the right direction.

   void cmd_foo(QemuOpts *arglist, Error **errp);
 
 Until now we're treating hmp.c like an external QMP C client, using QemuOpts
 this way will leak qemu internals to hmp.c...

True, but on the other hand it sounds strange to have QAPI clients
encoding options manually and escaping commas.

A KeyValueList (list of string-string associations) could be an
alternative, but I do think that ultimately we want to have a visitor
and remove QemuOpts altogether from net.c.  I can write a proof of
concept in a couple of weeks.  Again, we can proceed in steps.

Paolo



Re: [Qemu-devel] [PATCH v6 3/5] sockets: use error class to pass connect error

2012-04-18 Thread Orit Wasserman
On 04/17/2012 05:54 PM, Amos Kong wrote:
 Add a new argument in inet_connect()/inet_connect_opts()
 to pass back connect error.
 
 Change nbd, vnc to use new interface.
 
 Signed-off-by: Amos Kong ak...@redhat.com
 ---
  nbd.c  |2 +-
  qemu-char.c|2 +-
  qemu-sockets.c |   13 ++---
  qemu_socket.h  |6 --
  ui/vnc.c   |2 +-
  5 files changed, 17 insertions(+), 8 deletions(-)
 
 diff --git a/nbd.c b/nbd.c
 index b4e68a9..bb71f00 100644
 --- a/nbd.c
 +++ b/nbd.c
 @@ -146,7 +146,7 @@ int tcp_socket_outgoing(const char *address, uint16_t 
 port)
  
  int tcp_socket_outgoing_spec(const char *address_and_port)
  {
 -return inet_connect(address_and_port, true);
 +return inet_connect(address_and_port, true, NULL);
  }
  
  int tcp_socket_incoming(const char *address, uint16_t port)
 diff --git a/qemu-char.c b/qemu-char.c
 index 74c60e1..09f990a 100644
 --- a/qemu-char.c
 +++ b/qemu-char.c
 @@ -2444,7 +2444,7 @@ static CharDriverState *qemu_chr_open_socket(QemuOpts 
 *opts)
  if (is_listen) {
  fd = inet_listen_opts(opts, 0);
  } else {
 -fd = inet_connect_opts(opts);
 +fd = inet_connect_opts_err(opts, NULL);
  }
  }
  if (fd  0) {
 diff --git a/qemu-sockets.c b/qemu-sockets.c
 index e886195..2bd87fa 100644
 --- a/qemu-sockets.c
 +++ b/qemu-sockets.c
 @@ -197,7 +197,7 @@ listen:
  return slisten;
  }
  
 -int inet_connect_opts(QemuOpts *opts)
 +int inet_connect_opts(QemuOpts *opts, Error **errp)
  {
  struct addrinfo ai,*res,*e;
  const char *addr;
 @@ -217,6 +217,7 @@ int inet_connect_opts(QemuOpts *opts)
  block = qemu_opt_get_bool(opts, block, 0);
  if (addr == NULL || port == NULL) {
  fprintf(stderr, inet_connect: host and/or port not specified\n);
 +error_set(errp, QERR_SOCKET_CREATE_FAILED);

More details on the error (see previous patch comment) you can add the string 
in the fprintf.

  return -1;
  }
  
 @@ -229,6 +230,7 @@ int inet_connect_opts(QemuOpts *opts)
  if (0 != (rc = getaddrinfo(addr, port, ai, res))) {
  fprintf(stderr,getaddrinfo(%s,%s): %s\n, addr, port,
  gai_strerror(rc));
 +error_set(errp, QERR_SOCKET_CREATE_FAILED);

same here , you can add gai_strerror(rc) string.

   return -1;
  }
  
 @@ -254,6 +256,7 @@ int inet_connect_opts(QemuOpts *opts)
  err = 0;
  if (connect(sock, e-ai_addr, e-ai_addrlen)  0) {
  err = -socket_error();
 +error_set(errp, QERR_SOCKET_CONNECT_FAILED);

This can cause a leak later in case of nonblocking socket when you call 
error_set again.
why not do the check here and set the correct error.

you can check later in the function to check if we are in connect in progress
something like (add also the WIN32):

if (!block  err == -EINPROGRESS ) {
error_set(errp,QERR_CONNECT_IN_PROGRESS).
} else {
error_set(errp, QERR_SOCKET_CONNECT_FAILED);
}

  }
  #ifndef _WIN32
  } while (err == -EINTR || err == -EWOULDBLOCK);
 @@ -264,9 +267,11 @@ int inet_connect_opts(QemuOpts *opts)
  if (err = 0) {
  goto success;
  } else if (!block  err == -EINPROGRESS) {
 +error_set(errp, QERR_SOCKET_CONNECT_IN_PROGRESS);
  goto success;
  #ifdef _WIN32
  } else if (!block  err == -WSAEALREADY) {
 +error_set(errp, QERR_SOCKET_CONNECT_IN_PROGRESS);
  goto success;
  #endif
  }

This will be removed and if you changed the code to:

if (error_is_set(errp)  !error_is_type(QERR_SOCKET_CONNECT_IN_PROGRESS) {
if (NULL == e-ai_next)
fprintf(stderr, %s: connect(%s,%s,%s,%s): %s\n, __FUNCTION__,
inet_strfamily(e-ai_family),
e-ai_canonname, uaddr, uport, strerror(errno));
closesocket(sock);
sock =-1
}

freeaddrinfo(res);
return sock;

Orit

 @@ -477,7 +482,7 @@ int inet_listen(const char *str, char *ostr, int olen,
  return sock;
  }
  
 -int inet_connect(const char *str, bool block)
 +int inet_connect(const char *str, bool block, Error **errp)
  {
  QemuOpts *opts;
  int sock = -1;
 @@ -487,7 +492,9 @@ int inet_connect(const char *str, bool block)
  if (block) {
  qemu_opt_set(opts, block, on);
  }
 -sock = inet_connect_opts(opts);
 +sock = inet_connect_opts(opts, errp);
 +} else {
 +error_set(errp, QERR_SOCKET_CREATE_FAILED);
  }
  qemu_opts_del(opts);
  return sock;
 diff --git a/qemu_socket.h b/qemu_socket.h
 index f73e26d..26998ef 100644
 --- a/qemu_socket.h
 +++ b/qemu_socket.h
 @@ -27,6 +27,8 @@ int inet_aton(const char *cp, struct in_addr *ia);
  #endif /* !_WIN32 */
  
  #include qemu-option.h
 +#include error.h
 +#include qerror.h
  
  /* misc helpers */
  int qemu_socket(int domain, int type, int protocol);
 @@ -40,8 +42,8 @@ 

[Qemu-devel] [PATCH 00/12 v12] introducing a new, dedicated guest memory dump mechanism

2012-04-18 Thread Wen Congyang
Hi, all

'virsh dump' can not work when host pci device is used by guest. We have
discussed this issue here:
http://lists.nongnu.org/archive/html/qemu-devel/2011-10/msg00736.html

The last version is here:
http://lists.nongnu.org/archive/html/qemu-devel/2012-03/msg04743.html

We have determined to introduce a new command dump-guest-memory to dump
guest's memory. The core file's format is elf32 or elf64.

Note:
1. The guest should be x86 or x86_64. The other arch is not supported now.
2. If you use old gdb, gdb may crash. I use gdb-7.3.1, and it does not crash.
3. If the OS is in the second kernel, gdb may not work well, and crash can
   work by specifying '--machdep phys_addr=xxx' in the command line. The
   reason is that the second kernel will update the page table, and we can
   not get the page table for the first kernel.
4. The cpu's state is stored in QEMU note. You neet to modify crash to use
   it to calculate phys_base.
5. If the guest OS is 32 bit and the memory size is larger than 4G, the vmcore
   is elf64 format. You should use the gdb which is built with 
--enable-64-bit-bfd.
6. This patchset is based on the upstream tree, and apply one patch that is 
still
   in Luiz Capitulino's tree, because I use the API qemu_get_fd() in this 
patchset.

Changes from v11 to v12:
1. rebase and resend

Changes from v10 to v11:
1. addressed Luiz's and Hatayam's comment
2. fix a bug about filtering feature

Changes from v9 to v10:
1. fix some bug
2. addressed Luiz's and Hatayam's comment
3. remove cancel and query command

Changes from v8 to v9:
1. remove async support(it will be reimplemented after QAPI async commands 
support
   is finished)
2. fix some typo error

Changes from v7 to v8:
1. addressed Hatayama's comments

Changes from v6 to v7:
1. addressed Jan's comments
2. fix some bugs
3. store cpu's state into the vmcore

Changes from v5 to v6:
1. allow user to dump a fraction of the memory
2. fix some bugs

Changes from v4 to v5:
1. convert the new command dump to QAPI 

Changes from v3 to v4:
1. support it to run asynchronously
2. add API to cancel dumping and query dumping progress
3. add API to control dumping speed
4. auto cancel dumping when the user resumes vm, and the status is failed.

Changes from v2 to v3:
1. address Jan Kiszka's comment

Changes from v1 to v2:
1. fix virt addr in the vmcore.

Wen Congyang (12):
  Add API to create memory mapping list
  Add API to check whether a physical address is I/O address
  implement cpu_get_memory_mapping()
  Add API to check whether paging mode is enabled
  Add API to get memory mapping
  Add API to get memory mapping without do paging
  target-i386: Add API to write elf notes to core file
  target-i386: Add API to write cpu status to core file
  target-i386: add API to get dump info
  make gdb_id() generally avialable and rename it to cpu_index()
  QError: Introduce new error for the dump-guest-memory command
  introduce a new monitor command 'dump-guest-memory' to dump guest's
memory

 Makefile.target   |3 +
 configure |8 +
 cpu-all.h |   67 +++
 cpu-common.h  |2 +
 dump.c|  828 +
 dump.h|   23 +
 elf.h |5 +
 exec.c|9 +
 gdbstub.c |   19 +-
 gdbstub.h |9 +
 hmp-commands.hx   |   28 ++
 hmp.c |   22 +
 hmp.h |1 +
 memory_mapping.c  |  249 +++
 memory_mapping.h  |   68 +++
 qapi-schema.json  |   34 ++
 qerror.c  |4 +
 qerror.h  |3 +
 qmp-commands.hx   |   38 ++
 target-i386/arch_dump.c   |  426 +++
 target-i386/arch_memory_mapping.c |  271 
 21 files changed, 2103 insertions(+), 14 deletions(-)
 create mode 100644 dump.c
 create mode 100644 dump.h
 create mode 100644 memory_mapping.c
 create mode 100644 memory_mapping.h
 create mode 100644 target-i386/arch_dump.c
 create mode 100644 target-i386/arch_memory_mapping.c






Re: [Qemu-devel] [PATCH v6 5/5] use inet_listen()/inet_connect() to support ipv6 migration

2012-04-18 Thread Orit Wasserman
On 04/17/2012 05:54 PM, Amos Kong wrote:
 Use help functions in qemu-socket.c for tcp migration,
 which already support ipv6 addresses.
 
 Currently errp will be set to UNDEFINED_ERROR when migration fails,
 qemu would output migration failed: ..., and current user can
 see a message(An undefined error has occurred) in monitor.
 
 This patch changed tcp_start_outgoing_migration()/inet_connect()
 /inet_connect_opts(), socket error would be passed back,
 then current user can see a meaningful err message in monitor.
 
 Qemu will exit if listening fails, so output socket error
 to qemu stderr.
 
 For IPv6 brackets must be mandatory if you require a port.
 Referencing to RFC5952, the recommended format is:
   [2312::8274]:5200
 
 test status: Successed
 listen side: qemu-kvm  -incoming tcp:[2312::8274]:5200
 client side: qemu-kvm ...
  (qemu) migrate -d tcp:[2312::8274]:5200
 
 Signed-off-by: Amos Kong ak...@redhat.com
 ---
  migration-tcp.c |   74 
 +--
  migration.c |   14 ++
  migration.h |7 +++--
  vl.c|6 
  4 files changed, 35 insertions(+), 66 deletions(-)
 
 diff --git a/migration-tcp.c b/migration-tcp.c
 index 35a5781..1ecfd1e 100644
 --- a/migration-tcp.c
 +++ b/migration-tcp.c
 @@ -79,45 +79,29 @@ static void tcp_wait_for_connect(void *opaque)
  }
  }
  
 -int tcp_start_outgoing_migration(MigrationState *s, const char *host_port)
 +int tcp_start_outgoing_migration(MigrationState *s, const char *host_port,
 + Error **errp)
  {
 -struct sockaddr_in addr;
 -int ret;
 -
 -ret = parse_host_port(addr, host_port);
 -if (ret  0) {
 -return ret;
 -}
 -
  s-get_error = socket_errno;
  s-write = socket_write;
  s-close = tcp_close;
  
 -s-fd = qemu_socket(PF_INET, SOCK_STREAM, 0);
 -if (s-fd == -1) {
 -DPRINTF(Unable to open socket);
 -return -socket_error();
 -}
 -
 -socket_set_nonblock(s-fd);
 -
 -do {
 -ret = connect(s-fd, (struct sockaddr *)addr, sizeof(addr));
 -if (ret == -1) {
 -ret = -socket_error();
 -}
 -if (ret == -EINPROGRESS || ret == -EWOULDBLOCK) {
 -qemu_set_fd_handler2(s-fd, NULL, NULL, tcp_wait_for_connect, s);
 -return 0;
 -}
 -} while (ret == -EINTR);
 +s-fd = inet_connect(host_port, false, errp);
  
 -if (ret  0) {
 +if (error_is_type(*errp, QERR_SOCKET_CONNECT_IN_PROGRESS)) {
 +DPRINTF(connect in progress);
 +qemu_set_fd_handler2(s-fd, NULL, NULL, tcp_wait_for_connect, s);
 +} else if (error_is_type(*errp, QERR_SOCKET_CREATE_FAILED)) {
 +DPRINTF(connect failed\n);
 +return -1;
 +} else if (error_is_type(*errp, QERR_SOCKET_CONNECT_FAILED)) {
  DPRINTF(connect failed\n);
  migrate_fd_error(s);
 -return ret;
 +return -1;
 +} else {
 +migrate_fd_connect(s);
  }
 -migrate_fd_connect(s);
 +
  return 0;
  }
  
 @@ -155,40 +139,18 @@ out2:
  close(s);
  }
  
 -int tcp_start_incoming_migration(const char *host_port)
 +int tcp_start_incoming_migration(const char *host_port, Error **errp)
  {
 -struct sockaddr_in addr;
 -int val;
  int s;
  
 -DPRINTF(Attempting to start an incoming migration\n);
 -
 -if (parse_host_port(addr, host_port)  0) {
 -fprintf(stderr, invalid host/port combination: %s\n, host_port);
 -return -EINVAL;
 -}
 -
 -s = qemu_socket(PF_INET, SOCK_STREAM, 0);
 -if (s == -1) {
 -return -socket_error();
 -}
 -
 -val = 1;
 -setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (const char *)val, sizeof(val));
 +s = inet_listen(host_port, NULL, 256, SOCK_STREAM, 0, errp);
  
 -if (bind(s, (struct sockaddr *)addr, sizeof(addr)) == -1) {
 -goto err;
 -}
 -if (listen(s, 1) == -1) {
 -goto err;
 +if (s  0) {
 +return -1;
  }
  
  qemu_set_fd_handler2(s, NULL, tcp_accept_incoming_migration, NULL,
   (void *)(intptr_t)s);
  
  return 0;
 -
 -err:
 -close(s);
 -return -socket_error();
  }
 diff --git a/migration.c b/migration.c
 index 94f7839..6289bc7 100644
 --- a/migration.c
 +++ b/migration.c
 @@ -60,13 +60,13 @@ static MigrationState *migrate_get_current(void)
  return current_migration;
  }
  
 -int qemu_start_incoming_migration(const char *uri)
 +int qemu_start_incoming_migration(const char *uri, Error **errp)
  {
  const char *p;
  int ret;
  
  if (strstart(uri, tcp:, p))
 -ret = tcp_start_incoming_migration(p);
 +ret = tcp_start_incoming_migration(p, errp);
  #if !defined(WIN32)
  else if (strstart(uri, exec:, p))
  ret =  exec_start_incoming_migration(p);
 @@ -413,7 +413,7 @@ void qmp_migrate(const char *uri, bool has_blk, bool blk,
  s = migrate_init(blk, inc);
  
  if (strstart(uri, 

[Qemu-devel] [PATCH 01/12 v12] Add API to create memory mapping list

2012-04-18 Thread Wen Congyang
The memory mapping list stores virtual address and physical address mapping.
The virtual address and physical address are contiguous in the mapping.
The folloing patch will use this information to create PT_LOAD in the vmcore.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 Makefile.target  |1 +
 memory_mapping.c |  166 ++
 memory_mapping.h |   47 +++
 3 files changed, 214 insertions(+), 0 deletions(-)
 create mode 100644 memory_mapping.c
 create mode 100644 memory_mapping.h

diff --git a/Makefile.target b/Makefile.target
index 84951a0..cefb762 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -226,6 +226,7 @@ obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-$(CONFIG_VGA) += vga.o
 obj-y += memory.o savevm.o
+obj-y += memory_mapping.o
 LIBS+=-lz
 
 obj-i386-$(CONFIG_KVM) += hyperv.o
diff --git a/memory_mapping.c b/memory_mapping.c
new file mode 100644
index 000..718f271
--- /dev/null
+++ b/memory_mapping.c
@@ -0,0 +1,166 @@
+/*
+ * QEMU memory mapping
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ * Wen Congyang we...@cn.fujitsu.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include cpu.h
+#include cpu-all.h
+#include memory_mapping.h
+
+static void memory_mapping_list_add_mapping_sorted(MemoryMappingList *list,
+   MemoryMapping *mapping)
+{
+MemoryMapping *p;
+
+QTAILQ_FOREACH(p, list-head, next) {
+if (p-phys_addr = mapping-phys_addr) {
+QTAILQ_INSERT_BEFORE(p, mapping, next);
+return;
+}
+}
+QTAILQ_INSERT_TAIL(list-head, mapping, next);
+}
+
+static void create_new_memory_mapping(MemoryMappingList *list,
+  target_phys_addr_t phys_addr,
+  target_phys_addr_t virt_addr,
+  ram_addr_t length)
+{
+MemoryMapping *memory_mapping;
+
+memory_mapping = g_malloc(sizeof(MemoryMapping));
+memory_mapping-phys_addr = phys_addr;
+memory_mapping-virt_addr = virt_addr;
+memory_mapping-length = length;
+list-last_mapping = memory_mapping;
+list-num++;
+memory_mapping_list_add_mapping_sorted(list, memory_mapping);
+}
+
+static inline bool mapping_contiguous(MemoryMapping *map,
+  target_phys_addr_t phys_addr,
+  target_phys_addr_t virt_addr)
+{
+return phys_addr == map-phys_addr + map-length 
+   virt_addr == map-virt_addr + map-length;
+}
+
+/*
+ * [map-phys_addr, map-phys_addr + map-length) and
+ * [phys_addr, phys_addr + length) have intersection?
+ */
+static inline bool mapping_have_same_region(MemoryMapping *map,
+target_phys_addr_t phys_addr,
+ram_addr_t length)
+{
+return !(phys_addr + length  map-phys_addr ||
+ phys_addr = map-phys_addr + map-length);
+}
+
+/*
+ * [map-phys_addr, map-phys_addr + map-length) and
+ * [phys_addr, phys_addr + length) have intersection. The virtual address in 
the
+ * intersection are the same?
+ */
+static inline bool mapping_conflict(MemoryMapping *map,
+target_phys_addr_t phys_addr,
+target_phys_addr_t virt_addr)
+{
+return virt_addr - map-virt_addr != phys_addr - map-phys_addr;
+}
+
+/*
+ * [map-virt_addr, map-virt_addr + map-length) and
+ * [virt_addr, virt_addr + length) have intersection. And the physical address
+ * in the intersection are the same.
+ */
+static inline void mapping_merge(MemoryMapping *map,
+ target_phys_addr_t virt_addr,
+ ram_addr_t length)
+{
+if (virt_addr  map-virt_addr) {
+map-length += map-virt_addr - virt_addr;
+map-virt_addr = virt_addr;
+}
+
+if ((virt_addr + length) 
+(map-virt_addr + map-length)) {
+map-length = virt_addr + length - map-virt_addr;
+}
+}
+
+void memory_mapping_list_add_merge_sorted(MemoryMappingList *list,
+  target_phys_addr_t phys_addr,
+  target_phys_addr_t virt_addr,
+  ram_addr_t length)
+{
+MemoryMapping *memory_mapping, *last_mapping;
+
+if (QTAILQ_EMPTY(list-head)) {
+create_new_memory_mapping(list, phys_addr, virt_addr, length);
+return;
+}
+
+last_mapping = list-last_mapping;
+if (last_mapping) {
+if (mapping_contiguous(last_mapping, phys_addr, virt_addr)) {
+last_mapping-length += length;
+return;
+}
+}
+
+QTAILQ_FOREACH(memory_mapping, list-head, next) {
+if 

[Qemu-devel] [PATCH 02/12 v12] Add API to check whether a physical address is I/O address

2012-04-18 Thread Wen Congyang
This API will be used in the following patch.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 cpu-common.h |2 ++
 exec.c   |9 +
 2 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/cpu-common.h b/cpu-common.h
index dca5175..fcd50dc 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -71,6 +71,8 @@ void cpu_physical_memory_unmap(void *buffer, 
target_phys_addr_t len,
 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
 void cpu_unregister_map_client(void *cookie);
 
+bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr);
+
 /* Coalesced MMIO regions are areas where write operations can be reordered.
  * This usually implies that write operations are side-effect free.  This 
allows
  * batching which can make a major impact on performance when using
diff --git a/exec.c b/exec.c
index 77d6866..fe00b76 100644
--- a/exec.c
+++ b/exec.c
@@ -4662,3 +4662,12 @@ bool virtio_is_big_endian(void)
 #undef env
 
 #endif
+
+bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
+{
+MemoryRegionSection *section;
+
+section = phys_page_find(phys_addr  TARGET_PAGE_BITS);
+
+return !is_ram_rom_romd(section);
+}
-- 
1.7.1




[Qemu-devel] [PATCH 03/12 v12] implement cpu_get_memory_mapping()

2012-04-18 Thread Wen Congyang
Walk cpu's page table and collect all virtual address and physical address 
mapping.
Then, add these mapping into memory mapping list. If the guest does not use 
paging,
it will do nothing. Note: the I/O memory will be skipped.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 Makefile.target   |1 +
 configure |4 +
 cpu-all.h |   11 ++
 target-i386/arch_memory_mapping.c |  266 +
 4 files changed, 282 insertions(+), 0 deletions(-)
 create mode 100644 target-i386/arch_memory_mapping.c

diff --git a/Makefile.target b/Makefile.target
index cefb762..b41d663 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -89,6 +89,7 @@ libobj-y += helper.o
 ifeq ($(TARGET_BASE_ARCH), i386)
 libobj-y += cpu.o
 endif
+libobj-$(CONFIG_HAVE_GET_MEMORY_MAPPING) += arch_memory_mapping.o
 libobj-$(TARGET_SPARC64) += vis_helper.o
 libobj-$(CONFIG_NEED_MMU) += mmu.o
 libobj-$(TARGET_ARM) += neon_helper.o iwmmxt_helper.o
diff --git a/configure b/configure
index 2d62d12..d706200 100755
--- a/configure
+++ b/configure
@@ -3684,6 +3684,10 @@ case $target_arch2 in
   fi
 fi
 esac
+case $target_arch2 in
+  i386|x86_64)
+echo CONFIG_HAVE_GET_MEMORY_MAPPING=y  $config_target_mak
+esac
 if test $target_arch2 = ppc64 -a $fdt = yes; then
   echo CONFIG_PSERIES=y  $config_target_mak
 fi
diff --git a/cpu-all.h b/cpu-all.h
index f7d6867..bb43548 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -22,6 +22,7 @@
 #include qemu-common.h
 #include qemu-tls.h
 #include cpu-common.h
+#include memory_mapping.h
 
 /* some important defines:
  *
@@ -526,4 +527,14 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
 uint8_t *buf, int len, int is_write);
 
+#if defined(CONFIG_HAVE_GET_MEMORY_MAPPING)
+int cpu_get_memory_mapping(MemoryMappingList *list, CPUArchState *env);
+#else
+static inline int cpu_get_memory_mapping(MemoryMappingList *list,
+ CPUArchState *env)
+{
+return -1;
+}
+#endif
+
 #endif /* CPU_ALL_H */
diff --git a/target-i386/arch_memory_mapping.c 
b/target-i386/arch_memory_mapping.c
new file mode 100644
index 000..dd64bec
--- /dev/null
+++ b/target-i386/arch_memory_mapping.c
@@ -0,0 +1,266 @@
+/*
+ * i386 memory mapping
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ * Wen Congyang we...@cn.fujitsu.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include cpu.h
+#include cpu-all.h
+
+/* PAE Paging or IA-32e Paging */
+static void walk_pte(MemoryMappingList *list, target_phys_addr_t 
pte_start_addr,
+ int32_t a20_mask, target_ulong start_line_addr)
+{
+target_phys_addr_t pte_addr, start_paddr;
+uint64_t pte;
+target_ulong start_vaddr;
+int i;
+
+for (i = 0; i  512; i++) {
+pte_addr = (pte_start_addr + i * 8)  a20_mask;
+pte = ldq_phys(pte_addr);
+if (!(pte  PG_PRESENT_MASK)) {
+/* not present */
+continue;
+}
+
+start_paddr = (pte  ~0xfff)  ~(0x1ULL  63);
+if (cpu_physical_memory_is_io(start_paddr)) {
+/* I/O region */
+continue;
+}
+
+start_vaddr = start_line_addr | ((i  0x1fff)  12);
+memory_mapping_list_add_merge_sorted(list, start_paddr,
+ start_vaddr, 1  12);
+}
+}
+
+/* 32-bit Paging */
+static void walk_pte2(MemoryMappingList *list,
+  target_phys_addr_t pte_start_addr, int32_t a20_mask,
+  target_ulong start_line_addr)
+{
+target_phys_addr_t pte_addr, start_paddr;
+uint32_t pte;
+target_ulong start_vaddr;
+int i;
+
+for (i = 0; i  1024; i++) {
+pte_addr = (pte_start_addr + i * 4)  a20_mask;
+pte = ldl_phys(pte_addr);
+if (!(pte  PG_PRESENT_MASK)) {
+/* not present */
+continue;
+}
+
+start_paddr = pte  ~0xfff;
+if (cpu_physical_memory_is_io(start_paddr)) {
+/* I/O region */
+continue;
+}
+
+start_vaddr = start_line_addr | ((i  0x3ff)  12);
+memory_mapping_list_add_merge_sorted(list, start_paddr,
+ start_vaddr, 1  12);
+}
+}
+
+/* PAE Paging or IA-32e Paging */
+static void walk_pde(MemoryMappingList *list, target_phys_addr_t 
pde_start_addr,
+ int32_t a20_mask, target_ulong start_line_addr)
+{
+target_phys_addr_t pde_addr, pte_start_addr, start_paddr;
+uint64_t pde;
+target_ulong line_addr, start_vaddr;
+int i;
+
+for (i = 0; i  512; i++) {
+pde_addr = (pde_start_addr + i * 8)  a20_mask;
+pde = ldq_phys(pde_addr);
+if (!(pde  PG_PRESENT_MASK)) {
+/* not present 

[Qemu-devel] [PATCH 04/12 v12] Add API to check whether paging mode is enabled

2012-04-18 Thread Wen Congyang
This API will be used in the following patch.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 cpu-all.h |6 ++
 target-i386/arch_memory_mapping.c |7 ++-
 2 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index bb43548..eb35415 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -529,12 +529,18 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong 
addr,
 
 #if defined(CONFIG_HAVE_GET_MEMORY_MAPPING)
 int cpu_get_memory_mapping(MemoryMappingList *list, CPUArchState *env);
+bool cpu_paging_enabled(CPUArchState *env);
 #else
 static inline int cpu_get_memory_mapping(MemoryMappingList *list,
  CPUArchState *env)
 {
 return -1;
 }
+
+static inline bool cpu_paging_enabled(CPUArchState *env)
+{
+return true;
+}
 #endif
 
 #endif /* CPU_ALL_H */
diff --git a/target-i386/arch_memory_mapping.c 
b/target-i386/arch_memory_mapping.c
index dd64bec..bd50e11 100644
--- a/target-i386/arch_memory_mapping.c
+++ b/target-i386/arch_memory_mapping.c
@@ -233,7 +233,7 @@ static void walk_pml4e(MemoryMappingList *list,
 
 int cpu_get_memory_mapping(MemoryMappingList *list, CPUArchState *env)
 {
-if (!(env-cr[0]  CR0_PG_MASK)) {
+if (!cpu_paging_enabled(env)) {
 /* paging is disabled */
 return 0;
 }
@@ -264,3 +264,8 @@ int cpu_get_memory_mapping(MemoryMappingList *list, 
CPUArchState *env)
 
 return 0;
 }
+
+bool cpu_paging_enabled(CPUArchState *env)
+{
+return env-cr[0]  CR0_PG_MASK;
+}
-- 
1.7.1




[Qemu-devel] [PATCH 05/12 v12] Add API to get memory mapping

2012-04-18 Thread Wen Congyang
Add API to get all virtual address and physical address mapping.
If the guest doesn't use paging, the virtual address is equal to the phyical
address. The virtual address and physical address mapping is for gdb's user, and
it does not include the memory that is not referenced by the page table. So if
you want to use crash to anaylze the vmcore, please do not specify -p option.
the reason why the -p option is not default explicitly: guest machine in a
catastrophic state can have corrupted memory, which we cannot trust.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 memory_mapping.c |   47 +++
 memory_mapping.h |   15 +++
 2 files changed, 62 insertions(+), 0 deletions(-)

diff --git a/memory_mapping.c b/memory_mapping.c
index 718f271..627397a 100644
--- a/memory_mapping.c
+++ b/memory_mapping.c
@@ -164,3 +164,50 @@ void memory_mapping_list_init(MemoryMappingList *list)
 list-last_mapping = NULL;
 QTAILQ_INIT(list-head);
 }
+
+#if defined(CONFIG_HAVE_GET_MEMORY_MAPPING)
+
+static CPUArchState *find_paging_enabled_cpu(CPUArchState *start_cpu)
+{
+CPUArchState *env;
+
+for (env = start_cpu; env != NULL; env = env-next_cpu) {
+if (cpu_paging_enabled(env)) {
+return env;
+}
+}
+
+return NULL;
+}
+
+int qemu_get_guest_memory_mapping(MemoryMappingList *list)
+{
+CPUArchState *env, *first_paging_enabled_cpu;
+RAMBlock *block;
+ram_addr_t offset, length;
+int ret;
+
+first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu);
+if (first_paging_enabled_cpu) {
+for (env = first_paging_enabled_cpu; env != NULL; env = env-next_cpu) 
{
+ret = cpu_get_memory_mapping(list, env);
+if (ret  0) {
+return -1;
+}
+}
+return 0;
+}
+
+/*
+ * If the guest doesn't use paging, the virtual address is equal to 
physical
+ * address.
+ */
+QLIST_FOREACH(block, ram_list.blocks, next) {
+offset = block-offset;
+length = block-length;
+create_new_memory_mapping(list, offset, offset, length);
+}
+
+return 0;
+}
+#endif
diff --git a/memory_mapping.h b/memory_mapping.h
index 836b047..4d44641 100644
--- a/memory_mapping.h
+++ b/memory_mapping.h
@@ -44,4 +44,19 @@ void memory_mapping_list_free(MemoryMappingList *list);
 
 void memory_mapping_list_init(MemoryMappingList *list);
 
+/*
+ * Return value:
+ *0: success
+ *   -1: failed
+ *   -2: unsupported
+ */
+#if defined(CONFIG_HAVE_GET_MEMORY_MAPPING)
+int qemu_get_guest_memory_mapping(MemoryMappingList *list);
+#else
+static inline int qemu_get_guest_memory_mapping(MemoryMappingList *list)
+{
+return -2;
+}
+#endif
+
 #endif
-- 
1.7.1




[Qemu-devel] [PATCH 06/12 v12] Add API to get memory mapping without do paging

2012-04-18 Thread Wen Congyang
crash does not need the virtual address and physical address mapping, and the
mapping does not include the memory that is not referenced by the page table.
crash does not use the virtual address, so we can create the mapping for all
physical memory(virtual address is always 0). This patch provides a API to do
this thing, and it will be used in the following patch.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 memory_mapping.c |9 +
 memory_mapping.h |3 +++
 2 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/memory_mapping.c b/memory_mapping.c
index 627397a..adb1595 100644
--- a/memory_mapping.c
+++ b/memory_mapping.c
@@ -211,3 +211,12 @@ int qemu_get_guest_memory_mapping(MemoryMappingList *list)
 return 0;
 }
 #endif
+
+void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list)
+{
+RAMBlock *block;
+
+QLIST_FOREACH(block, ram_list.blocks, next) {
+create_new_memory_mapping(list, block-offset, 0, block-length);
+}
+}
diff --git a/memory_mapping.h b/memory_mapping.h
index 4d44641..a583e44 100644
--- a/memory_mapping.h
+++ b/memory_mapping.h
@@ -59,4 +59,7 @@ static inline int 
qemu_get_guest_memory_mapping(MemoryMappingList *list)
 }
 #endif
 
+/* get guest's memory mapping without do paging(virtual address is 0). */
+void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list);
+
 #endif
-- 
1.7.1




[Qemu-devel] [PATCH 07/12 v12] target-i386: Add API to write elf notes to core file

2012-04-18 Thread Wen Congyang
The core file contains register's value. These APIs write registers to
core file, and them will be called in the following patch.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 Makefile.target |1 +
 configure   |4 +
 cpu-all.h   |   23 +
 target-i386/arch_dump.c |  240 +++
 4 files changed, 268 insertions(+), 0 deletions(-)
 create mode 100644 target-i386/arch_dump.c

diff --git a/Makefile.target b/Makefile.target
index b41d663..dc35266 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -228,6 +228,7 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-$(CONFIG_VGA) += vga.o
 obj-y += memory.o savevm.o
 obj-y += memory_mapping.o
+obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o
 LIBS+=-lz
 
 obj-i386-$(CONFIG_KVM) += hyperv.o
diff --git a/configure b/configure
index d706200..5d58d08 100755
--- a/configure
+++ b/configure
@@ -3703,6 +3703,10 @@ if test $target_softmmu = yes ; then
   if test $smartcard_nss = yes ; then
 echo subdir-$target: subdir-libcacard  $config_host_mak
   fi
+  case $target_arch2 in
+i386|x86_64)
+  echo CONFIG_HAVE_CORE_DUMP=y  $config_target_mak
+  esac
 fi
 if test $target_user_only = yes ; then
   echo CONFIG_USER_ONLY=y  $config_target_mak
diff --git a/cpu-all.h b/cpu-all.h
index eb35415..f132ec0 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -543,4 +543,27 @@ static inline bool cpu_paging_enabled(CPUArchState *env)
 }
 #endif
 
+typedef int (*write_core_dump_function)
+(target_phys_addr_t offset, void *buf, size_t size, void *opaque);
+#if defined(CONFIG_HAVE_CORE_DUMP)
+int cpu_write_elf64_note(write_core_dump_function f, CPUArchState *env,
+ int cpuid, target_phys_addr_t *offset, void *opaque);
+int cpu_write_elf32_note(write_core_dump_function f, CPUArchState *env,
+ int cpuid, target_phys_addr_t *offset, void *opaque);
+#else
+static inline int cpu_write_elf64_note(write_core_dump_function f,
+   CPUArchState *env, int cpuid,
+   target_phys_addr_t *offset, void 
*opaque)
+{
+return -1;
+}
+
+static inline int cpu_write_elf32_note(write_core_dump_function f,
+   CPUArchState *env, int cpuid,
+   target_phys_addr_t *offset, void 
*opaque)
+{
+return -1;
+}
+#endif
+
 #endif /* CPU_ALL_H */
diff --git a/target-i386/arch_dump.c b/target-i386/arch_dump.c
new file mode 100644
index 000..285c6e1
--- /dev/null
+++ b/target-i386/arch_dump.c
@@ -0,0 +1,240 @@
+/*
+ * i386 memory mapping
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ * Wen Congyang we...@cn.fujitsu.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include cpu.h
+#include cpu-all.h
+#include elf.h
+
+#ifdef TARGET_X86_64
+typedef struct {
+target_ulong r15, r14, r13, r12, rbp, rbx, r11, r10;
+target_ulong r9, r8, rax, rcx, rdx, rsi, rdi, orig_rax;
+target_ulong rip, cs, eflags;
+target_ulong rsp, ss;
+target_ulong fs_base, gs_base;
+target_ulong ds, es, fs, gs;
+} x86_64_user_regs_struct;
+
+typedef struct {
+char pad1[32];
+uint32_t pid;
+char pad2[76];
+x86_64_user_regs_struct regs;
+char pad3[8];
+} x86_64_elf_prstatus;
+
+static int x86_64_write_elf64_note(write_core_dump_function f,
+   CPUArchState *env, int id,
+   target_phys_addr_t *offset, void *opaque)
+{
+x86_64_user_regs_struct regs;
+Elf64_Nhdr *note;
+char *buf;
+int descsz, note_size, name_size = 5;
+const char *name = CORE;
+int ret;
+
+regs.r15 = env-regs[15];
+regs.r14 = env-regs[14];
+regs.r13 = env-regs[13];
+regs.r12 = env-regs[12];
+regs.r11 = env-regs[11];
+regs.r10 = env-regs[10];
+regs.r9  = env-regs[9];
+regs.r8  = env-regs[8];
+regs.rbp = env-regs[R_EBP];
+regs.rsp = env-regs[R_ESP];
+regs.rdi = env-regs[R_EDI];
+regs.rsi = env-regs[R_ESI];
+regs.rdx = env-regs[R_EDX];
+regs.rcx = env-regs[R_ECX];
+regs.rbx = env-regs[R_EBX];
+regs.rax = env-regs[R_EAX];
+regs.rip = env-eip;
+regs.eflags = env-eflags;
+
+regs.orig_rax = 0; /* FIXME */
+regs.cs = env-segs[R_CS].selector;
+regs.ss = env-segs[R_SS].selector;
+regs.fs_base = env-segs[R_FS].base;
+regs.gs_base = env-segs[R_GS].base;
+regs.ds = env-segs[R_DS].selector;
+regs.es = env-segs[R_ES].selector;
+regs.fs = env-segs[R_FS].selector;
+regs.gs = env-segs[R_GS].selector;
+
+descsz = sizeof(x86_64_elf_prstatus);
+note_size = ((sizeof(Elf64_Nhdr) + 3) / 4 + (name_size + 3) / 4 +
+(descsz + 3) / 4) * 4;
+note = g_malloc(note_size);
+
+memset(note, 0, note_size);
+note-n_namesz = cpu_to_le32(name_size);
+

[Qemu-devel] [PATCH 08/12 v12] target-i386: Add API to write cpu status to core file

2012-04-18 Thread Wen Congyang
The core file has register's value. But it does not include all registers value.
Store the cpu status into QEMU note, and the user can get more information
from vmcore. If you change QEMUCPUState, please count up QEMUCPUSTATE_VERSION.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 cpu-all.h   |   20 ++
 target-i386/arch_dump.c |  152 +++
 2 files changed, 172 insertions(+), 0 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index f132ec0..33e9354 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -550,6 +550,10 @@ int cpu_write_elf64_note(write_core_dump_function f, 
CPUArchState *env,
  int cpuid, target_phys_addr_t *offset, void *opaque);
 int cpu_write_elf32_note(write_core_dump_function f, CPUArchState *env,
  int cpuid, target_phys_addr_t *offset, void *opaque);
+int cpu_write_elf64_qemunote(write_core_dump_function f, CPUArchState *env,
+ target_phys_addr_t *offset, void *opaque);
+int cpu_write_elf32_qemunote(write_core_dump_function f, CPUArchState *env,
+ target_phys_addr_t *offset, void *opaque);
 #else
 static inline int cpu_write_elf64_note(write_core_dump_function f,
CPUArchState *env, int cpuid,
@@ -564,6 +568,22 @@ static inline int 
cpu_write_elf32_note(write_core_dump_function f,
 {
 return -1;
 }
+
+static inline int cpu_write_elf64_qemunote(write_core_dump_function f,
+   CPUArchState *env,
+   target_phys_addr_t *offset,
+   void *opaque);
+{
+return -1;
+}
+
+static inline int cpu_write_elf32_qemunote(write_core_dump_function f,
+   CPUArchState *env,
+   target_phys_addr_t *offset,
+   void *opaque)
+{
+return -1;
+}
 #endif
 
 #endif /* CPU_ALL_H */
diff --git a/target-i386/arch_dump.c b/target-i386/arch_dump.c
index 285c6e1..0f61dae 100644
--- a/target-i386/arch_dump.c
+++ b/target-i386/arch_dump.c
@@ -238,3 +238,155 @@ int cpu_write_elf32_note(write_core_dump_function f, 
CPUArchState *env,
 
 return 0;
 }
+
+/*
+ * please count up QEMUCPUSTATE_VERSION if you have changed definition of
+ * QEMUCPUState, and modify the tools using this information accordingly.
+ */
+#define QEMUCPUSTATE_VERSION (1)
+
+struct QEMUCPUSegment {
+uint32_t selector;
+uint32_t limit;
+uint32_t flags;
+uint32_t pad;
+uint64_t base;
+};
+
+typedef struct QEMUCPUSegment QEMUCPUSegment;
+
+struct QEMUCPUState {
+uint32_t version;
+uint32_t size;
+uint64_t rax, rbx, rcx, rdx, rsi, rdi, rsp, rbp;
+uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
+uint64_t rip, rflags;
+QEMUCPUSegment cs, ds, es, fs, gs, ss;
+QEMUCPUSegment ldt, tr, gdt, idt;
+uint64_t cr[5];
+};
+
+typedef struct QEMUCPUState QEMUCPUState;
+
+static void copy_segment(QEMUCPUSegment *d, SegmentCache *s)
+{
+d-pad = 0;
+d-selector = s-selector;
+d-limit = s-limit;
+d-flags = s-flags;
+d-base = s-base;
+}
+
+static void qemu_get_cpustate(QEMUCPUState *s, CPUArchState *env)
+{
+memset(s, 0, sizeof(QEMUCPUState));
+
+s-version = QEMUCPUSTATE_VERSION;
+s-size = sizeof(QEMUCPUState);
+
+s-rax = env-regs[R_EAX];
+s-rbx = env-regs[R_EBX];
+s-rcx = env-regs[R_ECX];
+s-rdx = env-regs[R_EDX];
+s-rsi = env-regs[R_ESI];
+s-rdi = env-regs[R_EDI];
+s-rsp = env-regs[R_ESP];
+s-rbp = env-regs[R_EBP];
+#ifdef TARGET_X86_64
+s-r8  = env-regs[8];
+s-r9  = env-regs[9];
+s-r10 = env-regs[10];
+s-r11 = env-regs[11];
+s-r12 = env-regs[12];
+s-r13 = env-regs[13];
+s-r14 = env-regs[14];
+s-r15 = env-regs[15];
+#endif
+s-rip = env-eip;
+s-rflags = env-eflags;
+
+copy_segment(s-cs, env-segs[R_CS]);
+copy_segment(s-ds, env-segs[R_DS]);
+copy_segment(s-es, env-segs[R_ES]);
+copy_segment(s-fs, env-segs[R_FS]);
+copy_segment(s-gs, env-segs[R_GS]);
+copy_segment(s-ss, env-segs[R_SS]);
+copy_segment(s-ldt, env-ldt);
+copy_segment(s-tr, env-tr);
+copy_segment(s-gdt, env-gdt);
+copy_segment(s-idt, env-idt);
+
+s-cr[0] = env-cr[0];
+s-cr[1] = env-cr[1];
+s-cr[2] = env-cr[2];
+s-cr[3] = env-cr[3];
+s-cr[4] = env-cr[4];
+}
+
+static inline int cpu_write_qemu_note(write_core_dump_function f,
+  CPUArchState *env,
+  target_phys_addr_t *offset,
+  void *opaque,
+  int type)
+{
+QEMUCPUState state;
+Elf64_Nhdr *note64;
+Elf32_Nhdr *note32;
+void *note;
+char *buf;
+int descsz, note_size, name_size = 5, note_head_size;
+const char *name = QEMU;
+int ret;

[Qemu-devel] [PATCH 09/12 v12] target-i386: add API to get dump info

2012-04-18 Thread Wen Congyang
Dump info contains: endian, class and architecture. The next
patch will use these information to create vmcore. Note: on
x86 box, the  class is ELFCLASS64 if the memory is larger than 4G.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 cpu-all.h   |7 +++
 dump.h  |   23 +++
 target-i386/arch_dump.c |   34 ++
 3 files changed, 64 insertions(+), 0 deletions(-)
 create mode 100644 dump.h

diff --git a/cpu-all.h b/cpu-all.h
index 33e9354..64676f7 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -23,6 +23,7 @@
 #include qemu-tls.h
 #include cpu-common.h
 #include memory_mapping.h
+#include dump.h
 
 /* some important defines:
  *
@@ -554,6 +555,7 @@ int cpu_write_elf64_qemunote(write_core_dump_function f, 
CPUArchState *env,
  target_phys_addr_t *offset, void *opaque);
 int cpu_write_elf32_qemunote(write_core_dump_function f, CPUArchState *env,
  target_phys_addr_t *offset, void *opaque);
+int cpu_get_dump_info(ArchDumpInfo *info);
 #else
 static inline int cpu_write_elf64_note(write_core_dump_function f,
CPUArchState *env, int cpuid,
@@ -584,6 +586,11 @@ static inline int 
cpu_write_elf32_qemunote(write_core_dump_function f,
 {
 return -1;
 }
+
+static inline int cpu_get_dump_info(ArchDumpInfo *info)
+{
+return -1;
+}
 #endif
 
 #endif /* CPU_ALL_H */
diff --git a/dump.h b/dump.h
new file mode 100644
index 000..28340cf
--- /dev/null
+++ b/dump.h
@@ -0,0 +1,23 @@
+/*
+ * QEMU dump
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ * Wen Congyang we...@cn.fujitsu.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef DUMP_H
+#define DUMP_H
+
+typedef struct ArchDumpInfo {
+int d_machine;  /* Architecture */
+int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
+int d_class;/* ELFCLASS32 or ELFCLASS64 */
+} ArchDumpInfo;
+
+#endif
diff --git a/target-i386/arch_dump.c b/target-i386/arch_dump.c
index 0f61dae..1a75dea 100644
--- a/target-i386/arch_dump.c
+++ b/target-i386/arch_dump.c
@@ -13,6 +13,7 @@
 
 #include cpu.h
 #include cpu-all.h
+#include dump.h
 #include elf.h
 
 #ifdef TARGET_X86_64
@@ -390,3 +391,36 @@ int cpu_write_elf32_qemunote(write_core_dump_function f, 
CPUArchState *env,
 {
 return cpu_write_qemu_note(f, env, offset, opaque, 0);
 }
+
+int cpu_get_dump_info(ArchDumpInfo *info)
+{
+bool lma = false;
+RAMBlock *block;
+
+#ifdef TARGET_X86_64
+lma = !!(first_cpu-hflags  HF_LMA_MASK);
+#endif
+
+if (lma) {
+info-d_machine = EM_X86_64;
+} else {
+info-d_machine = EM_386;
+}
+info-d_endian = ELFDATA2LSB;
+
+if (lma) {
+info-d_class = ELFCLASS64;
+} else {
+info-d_class = ELFCLASS32;
+
+QLIST_FOREACH(block, ram_list.blocks, next) {
+if (block-offset + block-length  UINT_MAX) {
+/* The memory size is greater than 4G */
+info-d_class = ELFCLASS64;
+break;
+}
+}
+}
+
+return 0;
+}
-- 
1.7.1




[Qemu-devel] [PATCH 10/12 v12] make gdb_id() generally avialable and rename it to cpu_index()

2012-04-18 Thread Wen Congyang
The following patch also needs this API, so make it generally avialable.
The function gdb_id() will not be used in gdbstub.c now, so its name is
not suitable, and rename it to cpu_index()

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 gdbstub.c |   19 +--
 gdbstub.h |9 +
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/gdbstub.c b/gdbstub.c
index 6a7e2c4..423ffec 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1938,21 +1938,12 @@ static void gdb_set_cpu_pc(GDBState *s, target_ulong pc)
 #endif
 }
 
-static inline int gdb_id(CPUArchState *env)
-{
-#if defined(CONFIG_USER_ONLY)  defined(CONFIG_USE_NPTL)
-return env-host_tid;
-#else
-return env-cpu_index + 1;
-#endif
-}
-
 static CPUArchState *find_cpu(uint32_t thread_id)
 {
 CPUArchState *env;
 
 for (env = first_cpu; env != NULL; env = env-next_cpu) {
-if (gdb_id(env) == thread_id) {
+if (cpu_index(env) == thread_id) {
 return env;
 }
 }
@@ -1980,7 +1971,7 @@ static int gdb_handle_packet(GDBState *s, const char 
*line_buf)
 case '?':
 /* TODO: Make this return the correct value for user-mode.  */
 snprintf(buf, sizeof(buf), T%02xthread:%02x;, GDB_SIGNAL_TRAP,
- gdb_id(s-c_cpu));
+ cpu_index(s-c_cpu));
 put_packet(s, buf);
 /* Remove all the breakpoints when this query is issued,
  * because gdb is doing and initial connect and the state
@@ -2275,7 +2266,7 @@ static int gdb_handle_packet(GDBState *s, const char 
*line_buf)
 } else if (strcmp(p,sThreadInfo) == 0) {
 report_cpuinfo:
 if (s-query_cpu) {
-snprintf(buf, sizeof(buf), m%x, gdb_id(s-query_cpu));
+snprintf(buf, sizeof(buf), m%x, cpu_index(s-query_cpu));
 put_packet(s, buf);
 s-query_cpu = s-query_cpu-next_cpu;
 } else
@@ -2423,7 +2414,7 @@ static void gdb_vm_state_change(void *opaque, int 
running, RunState state)
 }
 snprintf(buf, sizeof(buf),
  T%02xthread:%02x;%swatch: TARGET_FMT_lx ;,
- GDB_SIGNAL_TRAP, gdb_id(env), type,
+ GDB_SIGNAL_TRAP, cpu_index(env), type,
  env-watchpoint_hit-vaddr);
 env-watchpoint_hit = NULL;
 goto send_packet;
@@ -2456,7 +2447,7 @@ static void gdb_vm_state_change(void *opaque, int 
running, RunState state)
 ret = GDB_SIGNAL_UNKNOWN;
 break;
 }
-snprintf(buf, sizeof(buf), T%02xthread:%02x;, ret, gdb_id(env));
+snprintf(buf, sizeof(buf), T%02xthread:%02x;, ret, cpu_index(env));
 
 send_packet:
 put_packet(s, buf);
diff --git a/gdbstub.h b/gdbstub.h
index b44e275..668de66 100644
--- a/gdbstub.h
+++ b/gdbstub.h
@@ -30,6 +30,15 @@ void gdb_register_coprocessor(CPUArchState *env,
   gdb_reg_cb get_reg, gdb_reg_cb set_reg,
   int num_regs, const char *xml, int g_pos);
 
+static inline int cpu_index(CPUArchState *env)
+{
+#if defined(CONFIG_USER_ONLY)  defined(CONFIG_USE_NPTL)
+return env-host_tid;
+#else
+return env-cpu_index + 1;
+#endif
+}
+
 #endif
 
 #ifdef CONFIG_USER_ONLY
-- 
1.7.1




[Qemu-devel] [PATCH 11/12 v12] QError: Introduce new error for the dump-guest-memory command

2012-04-18 Thread Wen Congyang
The new error is QERR_PIPE_OR_SOCKET_FD, which is going to be
used by the QAPI dump-guest-memory command.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 qerror.c |4 
 qerror.h |3 +++
 2 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/qerror.c b/qerror.c
index 96fbe71..117c42f 100644
--- a/qerror.c
+++ b/qerror.c
@@ -304,6 +304,10 @@ static const QErrorStringTable qerror_table[] = {
 .error_fmt = QERR_VNC_SERVER_FAILED,
 .desc  = Could not start VNC server on %(target),
 },
+{
+.error_fmt = QERR_PIPE_OR_SOCKET_FD,
+.desc  = lseek() failed: the fd is associated with a pipe, 
socket,
+},
 {}
 };
 
diff --git a/qerror.h b/qerror.h
index 5c23c1f..e28cdad 100644
--- a/qerror.h
+++ b/qerror.h
@@ -248,4 +248,7 @@ QError *qobject_to_qerror(const QObject *obj);
 #define QERR_VNC_SERVER_FAILED \
 { 'class': 'VNCServerFailed', 'data': { 'target': %s } }
 
+#define QERR_PIPE_OR_SOCKET_FD \
+{ 'class': 'PipeOrSocketFD', 'data': {} }
+
 #endif /* QERROR_H */
-- 
1.7.1




Re: [Qemu-devel] xen build failure

2012-04-18 Thread Alexander Graf

On 17.04.2012, at 18:53, Stefano Stabellini wrote:

 On Tue, 17 Apr 2012, Alon Levy wrote:
 I have xen-devel-4.1.2-14.fc17.x86_64 and am getting the following build
 error:
 
  CCx86_64-softmmu/xen-all.o
 /home/alon/src/spice_upstream/qemu/xen-all.c: In function 
 ‘xen_hvm_inject_msi’:
 /home/alon/src/spice_upstream/qemu/xen-all.c:132:5: error: implicit 
 declaration of function ‘xc_hvm_inject_msi’ 
 [-Werror=implicit-function-declaration]
 /home/alon/src/spice_upstream/qemu/xen-all.c:132:5: error: nested extern 
 declaration of ‘xc_hvm_inject_msi’ [-Werror=nested-externs]
 
 commit f1dbf015dfb0aa7f66f710a1f1bc58b662951de2 seems to be missing a
 capability / version check.
 
 Thanks for the report!
 I tested it without Xen and with Xen 4.2 but not with Xen 4.1 :-/

So that means that our buildbot coverage is missing Xen 4.1 :). Could you guys 
possibly provide a machine with Xen 4.1 that'd be running buildbot so we see at 
least the compile time issues? :)


Alex




[Qemu-devel] [PATCH 12/12 v12] introduce a new monitor command 'dump-guest-memory' to dump guest's memory

2012-04-18 Thread Wen Congyang
The command's usage:
   dump [-p] protocol [begin] [length]
The supported protocol can be file or fd:
1. file: the protocol starts with file:, and the following string is
   the file's path.
2. fd: the protocol starts with fd:, and the following string is the
   fd's name.

Note:
  1. If you want to use gdb to process the core, please specify -p option.
 The reason why the -p option is not default is:
   a. guest machine in a catastrophic state can have corrupted memory,
  which we cannot trust.
   b. The guest machine can be in read-mode even if paging is enabled.
  For example: the guest machine uses ACPI to sleep, and ACPI sleep
  state goes in real-mode.
  2. This command doesn't support the fd that is is associated with a pipe,
 socket, or FIFO(lseek will fail with such fd).
  3. If you don't want to dump all guest's memory, please specify the start
 physical address and the length.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 Makefile.target  |2 +-
 dump.c   |  828 ++
 elf.h|5 +
 hmp-commands.hx  |   28 ++
 hmp.c|   22 ++
 hmp.h|1 +
 memory_mapping.c |   27 ++
 memory_mapping.h |3 +
 qapi-schema.json |   34 +++
 qmp-commands.hx  |   38 +++
 10 files changed, 987 insertions(+), 1 deletions(-)
 create mode 100644 dump.c

diff --git a/Makefile.target b/Makefile.target
index dc35266..e810b52 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -228,7 +228,7 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-$(CONFIG_VGA) += vga.o
 obj-y += memory.o savevm.o
 obj-y += memory_mapping.o
-obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o
+obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o dump.o
 LIBS+=-lz
 
 obj-i386-$(CONFIG_KVM) += hyperv.o
diff --git a/dump.c b/dump.c
new file mode 100644
index 000..87fb0dd
--- /dev/null
+++ b/dump.c
@@ -0,0 +1,828 @@
+/*
+ * QEMU dump
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ * Wen Congyang we...@cn.fujitsu.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include qemu-common.h
+#include unistd.h
+#include elf.h
+#include sys/procfs.h
+#include glib.h
+#include cpu.h
+#include cpu-all.h
+#include targphys.h
+#include monitor.h
+#include kvm.h
+#include dump.h
+#include sysemu.h
+#include bswap.h
+#include memory_mapping.h
+#include error.h
+#include qmp-commands.h
+#include gdbstub.h
+
+static uint16_t cpu_convert_to_target16(uint16_t val, int endian)
+{
+if (endian == ELFDATA2LSB) {
+val = cpu_to_le16(val);
+} else {
+val = cpu_to_be16(val);
+}
+
+return val;
+}
+
+static uint32_t cpu_convert_to_target32(uint32_t val, int endian)
+{
+if (endian == ELFDATA2LSB) {
+val = cpu_to_le32(val);
+} else {
+val = cpu_to_be32(val);
+}
+
+return val;
+}
+
+static uint64_t cpu_convert_to_target64(uint64_t val, int endian)
+{
+if (endian == ELFDATA2LSB) {
+val = cpu_to_le64(val);
+} else {
+val = cpu_to_be64(val);
+}
+
+return val;
+}
+
+typedef struct DumpState {
+ArchDumpInfo dump_info;
+MemoryMappingList list;
+uint16_t phdr_num;
+uint32_t sh_info;
+bool have_section;
+bool resume;
+target_phys_addr_t memory_offset;
+int fd;
+
+RAMBlock *block;
+ram_addr_t start;
+bool has_filter;
+int64_t begin;
+int64_t length;
+Error **errp;
+} DumpState;
+
+static int dump_cleanup(DumpState *s)
+{
+int ret = 0;
+
+memory_mapping_list_free(s-list);
+if (s-fd != -1) {
+close(s-fd);
+}
+if (s-resume) {
+vm_start();
+}
+
+return ret;
+}
+
+static void dump_error(DumpState *s, const char *reason)
+{
+dump_cleanup(s);
+}
+
+static int fd_write_vmcore(target_phys_addr_t offset, void *buf, size_t size,
+   void *opaque)
+{
+DumpState *s = opaque;
+int fd = s-fd;
+off_t ret;
+size_t writen_size;
+
+while (1) {
+ret = lseek(fd, offset, SEEK_SET);
+if (ret  0) {
+if (errno == ESPIPE) {
+error_set(s-errp, QERR_PIPE_OR_SOCKET_FD);
+return -1;
+}
+
+if (errno != EINTR  errno != EAGAIN) {
+return -1;
+}
+continue;
+}
+break;
+}
+
+/* The fd may be passed from user, and it can be non-blocked */
+while (size) {
+writen_size = qemu_write_full(fd, buf, size);
+if (writen_size != size  errno != EAGAIN) {
+return -1;
+}
+
+buf += writen_size;
+size -= writen_size;
+}
+
+return 0;
+}
+
+static int write_elf64_header(DumpState *s)
+{
+Elf64_Ehdr elf_header;
+int ret;
+int endian = s-dump_info.d_endian;
+
+memset(elf_header, 0, sizeof(Elf64_Ehdr));
+memcpy(elf_header, ELFMAG, 

Re: [Qemu-devel] [PATCH] block: Fix spelling in comment (ineffcient - inefficient)

2012-04-18 Thread Kevin Wolf
Am 17.04.2012 19:41, schrieb Stefan Weil:
 Signed-off-by: Stefan Weil s...@weilnetz.de
 ---
  block/cow.c |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)
 
 diff --git a/block/cow.c b/block/cow.c
 index 8d3c9f8..a5a00eb 100644
 --- a/block/cow.c
 +++ b/block/cow.c
 @@ -103,7 +103,7 @@ static int cow_open(BlockDriverState *bs, int flags)
  }
  
  /*
 - * XXX(hch): right now these functions are extremely ineffcient.
 + * XXX(hch): right now these functions are extremely inefficient.
   * We should just read the whole bitmap we'll need in one go instead.
   */
  static inline int cow_set_bit(BlockDriverState *bs, int64_t bitnum)

Thanks, applied to the block branch.

Kevin



[Qemu-devel] [PATCH 1/2 v8] add-cow file format

2012-04-18 Thread Dong Xu Wang
From: Dong Xu Wang wdon...@linux.vnet.ibm.com

Provide a new file format: add-cow. The usage can be found in add-cow.txt of
this patch.

CC: Marcelo Tosatti mtosa...@redhat.com
CC: Kevin Wolf kw...@redhat.com
CC: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
Signed-off-by: Dong Xu Wang wdon...@linux.vnet.ibm.com
---
 Makefile.objs  |1 +
 block.c|2 +-
 block.h|1 +
 block/add-cow-cache.c  |  197 
 block/add-cow.c|  332 
 block/add-cow.h|   63 +
 block_int.h|1 +
 docs/specs/add-cow.txt |   68 ++
 8 files changed, 664 insertions(+), 1 deletions(-)
 create mode 100644 block/add-cow-cache.c
 create mode 100644 block/add-cow.c
 create mode 100644 block/add-cow.h
 create mode 100644 docs/specs/add-cow.txt

diff --git a/Makefile.objs b/Makefile.objs
index 5c3bcda..c32c627 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -52,6 +52,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o 
dmg.o bochs.o vpc.o vv
 block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o 
qcow2-cache.o
 block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-nested-y += qed-check.o
+block-nested-y += add-cow.o add-cow-cache.o
 block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
 block-nested-y += stream.o
 block-nested-$(CONFIG_WIN32) += raw-win32.o
diff --git a/block.c b/block.c
index c0c90f0..abada9f 100644
--- a/block.c
+++ b/block.c
@@ -194,7 +194,7 @@ static void bdrv_io_limits_intercept(BlockDriverState *bs,
 }
 
 /* check if the path starts with protocol: */
-static int path_has_protocol(const char *path)
+int path_has_protocol(const char *path)
 {
 #ifdef _WIN32
 if (is_windows_drive(path) ||
diff --git a/block.h b/block.h
index f163e54..f74c79e 100644
--- a/block.h
+++ b/block.h
@@ -319,6 +319,7 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, 
QEMUSnapshotInfo *sn);
 
 char *get_human_readable_size(char *buf, int buf_size, int64_t size);
 int path_is_absolute(const char *path);
+int path_has_protocol(const char *path);
 void path_combine(char *dest, int dest_size,
   const char *base_path,
   const char *filename);
diff --git a/block/add-cow-cache.c b/block/add-cow-cache.c
new file mode 100644
index 000..2ea0ac4
--- /dev/null
+++ b/block/add-cow-cache.c
@@ -0,0 +1,197 @@
+/*
+ * Cache For QEMU ADD-COW Disk Format
+ *
+ * Copyright IBM, Corp. 2012
+ *
+ * Authors:
+ *  Dong Xu Wang wdon...@linux.vnet.ibm.com
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include block_int.h
+#include qemu-common.h
+#include add-cow.h
+
+AddCowCache *add_cow_cache_create(BlockDriverState *bs, int num_tables)
+{
+BDRVAddCowState *s = bs-opaque;
+AddCowCache *c;
+int i;
+
+c = g_malloc0(sizeof(*c));
+c-size = num_tables;
+c-entries = g_malloc0(sizeof(*c-entries) * num_tables);
+
+for (i = 0; i  c-size; i++) {
+c-entries[i].table = qemu_blockalign(bs, 8 * s-cluster_size);
+c-entries[i].offset = -1;
+}
+
+return c;
+}
+
+void add_cow_cache_destroy(BlockDriverState *bs, AddCowCache *c)
+{
+int i;
+
+for (i = 0; i  c-size; i++) {
+qemu_vfree(c-entries[i].table);
+}
+
+g_free(c-entries);
+g_free(c);
+}
+
+static int add_cow_cache_find_entry_to_replace(AddCowCache *c)
+{
+int i;
+int min_count = INT_MAX;
+int min_index = -1;
+
+
+for (i = 0; i  c-size; i++) {
+if (c-entries[i].cache_hits  min_count) {
+min_index = i;
+min_count = c-entries[i].cache_hits;
+}
+
+c-entries[i].cache_hits /= 2;
+}
+
+return min_index;
+}
+
+static int add_cow_cache_entry_flush(BlockDriverState *bs,
+AddCowCache *c, int i)
+{
+BDRVAddCowState *s = bs-opaque;
+int ret = 0, j;
+
+if (!c-entries[i].dirty || (-1 == c-entries[i].offset)) {
+return 0;
+}
+
+for (j = 0; j  8; j++) {
+if (c-entries[i].bitmap  (1  j)) {
+ret = bdrv_pwrite(s-image_hd,
+c-entries[i].offset * BDRV_SECTOR_SIZE + s-cluster_size * j,
+c-entries[i].table + s-cluster_size * j,
+s-cluster_size);
+}
+if (ret  0) {
+return ret;
+}
+}
+ret = bdrv_flush(s-image_hd);
+if (ret  0) {
+return ret;
+}
+
+ret = bdrv_pwrite(bs-file,
+sizeof(AddCowHeader) + (c-entries[i].offset  10),
+c-entries[i].bitmap,
+1);
+if (ret  0) {
+return ret;
+}
+ret = bdrv_flush(bs-file);
+if (ret  0) {
+return ret;
+}
+
+c-entries[i].dirty = false;
+return 0;
+}
+
+void add_cow_cache_entry_mark_dirty(AddCowCache *c, 

[Qemu-devel] [PATCH 2/2 v8] add-cow: support snapshot_blkdev

2012-04-18 Thread Dong Xu Wang
From: Dong Xu Wang wdon...@linux.vnet.ibm.com

Raw file format does not support snapshot_blkdev command, we can use add-cow
to do this.

CC: Marcelo Tosatti mtosa...@redhat.com
CC: Kevin Wolf kw...@redhat.com
CC: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
Signed-off-by: Dong Xu Wang wdon...@linux.vnet.ibm.com
---
 blockdev.c  |   31 +++
 docs/live-block-ops.txt |   10 +-
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/blockdev.c b/blockdev.c
index 0c2440e..0332b49 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -780,15 +780,38 @@ void qmp_transaction(BlockdevActionList *dev_list, Error 
**errp)
 
 /* create new image w/backing file */
 if (mode != NEW_IMAGE_MODE_EXISTING) {
-ret = bdrv_img_create(new_image_file, format,
+if (strcmp(format, add-cow)) {
+ret = bdrv_img_create(new_image_file, format,
   states-old_bs-filename,
   states-old_bs-drv-format_name,
   NULL, -1, flags);
-if (ret) {
-error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
-goto delete_and_fail;
+} else {
+char image_file[1024];
+char option[1024];
+uint64_t size;
+
+bdrv_get_geometry(states-old_bs, size);
+size *= BDRV_SECTOR_SIZE;
+
+sprintf(image_file, %s.raw, new_image_file);
+
+ret = bdrv_img_create(image_file, raw, NULL,
+  NULL, NULL, size, flags);
+if (ret) {
+error_set(errp, QERR_UNDEFINED_ERROR);
+return;
+}
+sprintf(option, image_file=%s.raw, new_image_file);
+ret = bdrv_img_create(new_image_file, format,
+  states-old_bs-filename,
+  states-old_bs-drv-format_name,
+  option, -1, flags);
 }
 }
+if (ret) {
+error_set(errp, QERR_OPEN_FILE_FAILED, new_image_file);
+goto delete_and_fail;
+}
 
 /* We will manually add the backing_hd field to the bs later */
 states-new_bs = bdrv_new();
diff --git a/docs/live-block-ops.txt b/docs/live-block-ops.txt
index a257087..c97344b 100644
--- a/docs/live-block-ops.txt
+++ b/docs/live-block-ops.txt
@@ -2,7 +2,8 @@ LIVE BLOCK OPERATIONS
 =
 
 High level description of live block operations. Note these are not
-supported for use with the raw format at the moment.
+supported for use with the raw format at the moment, but we can use
+add-cow as metadata to suport raw format.
 
 Snapshot live merge
 ===
@@ -56,3 +57,10 @@ into that image. Example:
 (qemu) block_stream ide0-hd0
 
 
+
+Raw is not supported, but we can use add-cow in the 1st step:
+
+(qemu) snapshot_blkdev ide0-hd0 /new-path/disk.img add-cow
+
+It will create a raw file named disk.img.raw, with the same virtual size of
+ide0-hd0 first, and then create disk.img.
-- 
1.7.5.4




Re: [Qemu-devel] [PATCH 2/3] qtest: enable qtest for most targets

2012-04-18 Thread Peter Maydell
On 17 April 2012 22:33, Anthony Liguori aligu...@us.ibm.com wrote:
 Kernel loading is a hack.  I'll go out on a limb and say that most non-x86
 boards are doing it completely wrong.  Messing around with CPU state has no
 business in machine init.  It creates horrible dependencies about RAM
 initialization order and problems for reset/live migration.

 The kernel should be presented as a virtual device (an emulated flash or
 whatever) and there should be firmware that loads the kernel appropriately.
 Then we wouldn't need changes like this in the first place.

Yeah, I tend to agree that that would be a cleaner approach, but so far
it hasn't been a requirement. Feel free to submit patches to fix kernel
loading for these boards if you want to be able to rely on this behaviour :-)

-- PMM



[Qemu-devel] [RFC PATCH 0/2] Standard SD host controller model

2012-04-18 Thread Igor Mitsyanko
First patch introduces standard SD host controller model. This is accumulated
version of my previous patch I sent a while ago and a recent SDHCI patch by
Peter A. G. Crosthwaite.
Second patch introduces Exynos4210-specific SDHCI built on top of standard SDHCI
model.

Igor Mitsyanko (2):
  hw: introduce standard SD host controller
  target-arm: introduce Exynos4210 SD host controller model

 Makefile.objs   |1 +
 Makefile.target |1 +
 default-configs/arm-softmmu.mak |1 +
 hw/exynos4210.c |   20 +
 hw/exynos4210_sdhci.c   |  438 ++
 hw/sdhci.c  | 1265 +++
 hw/sdhci.h  |  307 ++
 7 files changed, 2033 insertions(+), 0 deletions(-)
 create mode 100644 hw/exynos4210_sdhci.c
 create mode 100644 hw/sdhci.c
 create mode 100644 hw/sdhci.h

-- 
1.7.4.1




[Qemu-devel] [RFC PATCH 2/2] target-arm: introduce Exynos4210 SD host controller model

2012-04-18 Thread Igor Mitsyanko
Exynos4210 SD/MMC host controller is based on SD association standart host
controller ver. 2.00

Signed-off-by: Igor Mitsyanko i.mitsya...@samsung.com
---
 Makefile.target   |1 +
 hw/exynos4210.c   |   20 +++
 hw/exynos4210_sdhci.c |  438 +
 3 files changed, 459 insertions(+), 0 deletions(-)
 create mode 100644 hw/exynos4210_sdhci.c

diff --git a/Makefile.target b/Makefile.target
index 84951a0..7cd58a1 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -373,6 +373,7 @@ obj-arm-y += realview_gic.o realview.o arm_sysctl.o 
arm11mpcore.o a9mpcore.o
 obj-arm-y += exynos4210_gic.o exynos4210_combiner.o exynos4210.o
 obj-arm-y += exynos4_boards.o exynos4210_uart.o exynos4210_pwm.o
 obj-arm-y += exynos4210_pmu.o exynos4210_mct.o exynos4210_fimd.o
+obj-arm-y += exynos4210_sdhci.o
 obj-arm-y += arm_l2x0.o
 obj-arm-y += arm_mptimer.o a15mpcore.o
 obj-arm-y += armv7m.o armv7m_nvic.o stellaris.o pl022.o stellaris_enet.o
diff --git a/hw/exynos4210.c b/hw/exynos4210.c
index afc4bdc..4f9d91b 100644
--- a/hw/exynos4210.c
+++ b/hw/exynos4210.c
@@ -56,6 +56,12 @@
 #define EXYNOS4210_EXT_COMBINER_BASE_ADDR   0x1044
 #define EXYNOS4210_INT_COMBINER_BASE_ADDR   0x10448000
 
+/* SD/MMC host controllers SFR base addresses */
+#define EXYNOS4210_SDHC0_BASE_ADDR  0x1251
+#define EXYNOS4210_SDHC1_BASE_ADDR  0x1252
+#define EXYNOS4210_SDHC2_BASE_ADDR  0x1253
+#define EXYNOS4210_SDHC3_BASE_ADDR  0x1254
+
 /* PMU SFR base address */
 #define EXYNOS4210_PMU_BASE_ADDR0x1002
 
@@ -289,6 +295,20 @@ Exynos4210State *exynos4210_init(MemoryRegion *system_mem,
EXYNOS4210_UART3_FIFO_SIZE, 3, NULL,
   s-irq_table[exynos4210_get_irq(EXYNOS4210_UART_INT_GRP, 
3)]);
 
+/*** SD/MMC host controllers ***/
+
+sysbus_create_simple(exynos4210.sdhci, EXYNOS4210_SDHC0_BASE_ADDR,
+s-irq_table[exynos4210_get_irq(29, 0)]);
+
+sysbus_create_simple(exynos4210.sdhci, EXYNOS4210_SDHC1_BASE_ADDR,
+s-irq_table[exynos4210_get_irq(29, 1)]);
+
+sysbus_create_simple(exynos4210.sdhci, EXYNOS4210_SDHC2_BASE_ADDR,
+s-irq_table[exynos4210_get_irq(29, 2)]);
+
+sysbus_create_simple(exynos4210.sdhci, EXYNOS4210_SDHC3_BASE_ADDR,
+s-irq_table[exynos4210_get_irq(29, 3)]);
+
 /*** Display controller (FIMD) ***/
 sysbus_create_varargs(exynos4210.fimd, EXYNOS4210_FIMD0_BASE_ADDR,
 s-irq_table[exynos4210_get_irq(11, 0)],
diff --git a/hw/exynos4210_sdhci.c b/hw/exynos4210_sdhci.c
new file mode 100644
index 000..cb63279
--- /dev/null
+++ b/hw/exynos4210_sdhci.c
@@ -0,0 +1,438 @@
+/*
+ * Samsung Exynos4210 SD/MMC host controller model
+ *
+ * Copyright (c) 2012 Samsung Electronics Co., Ltd.
+ * Mitsyanko Igor i.mitsya...@samsung.com
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ * See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see http://www.gnu.org/licenses/.
+ */
+
+#include sdhci.h
+
+#define EXYNOS4_SDHC_CAPABILITIES0x05E80080
+#define EXYNOS4_SDHC_MAX_BUFSZ   512
+
+#define EXYNOS4_SDHC_DEBUG   0
+
+#if EXYNOS4_SDHC_DEBUG == 0
+#define DPRINT_L1(fmt, args...)   do { } while (0)
+#define DPRINT_L2(fmt, args...)   do { } while (0)
+#define ERRPRINT(fmt, args...)do { } while (0)
+#elif EXYNOS4_SDHC_DEBUG == 1
+#define DPRINT_L1(fmt, args...)   \
+do {fprintf(stderr, QEMU SDHC: fmt, ## args); } while (0)
+#define DPRINT_L2(fmt, args...)   do { } while (0)
+#define ERRPRINT(fmt, args...)\
+do {fprintf(stderr, QEMU SDHC ERROR: fmt, ## args); } while (0)
+#else
+#define DPRINT_L1(fmt, args...)   \
+do {fprintf(stderr, QEMU SDHC: fmt, ## args); } while (0)
+#define DPRINT_L2(fmt, args...)   \
+do {fprintf(stderr, QEMU SDHC: fmt, ## args); } while (0)
+#define ERRPRINT(fmt, args...)\
+do {fprintf(stderr, QEMU SDHC ERROR: fmt, ## args); } while (0)
+#endif
+
+
+#define TYPE_EXYNOS4_SDHCexynos4210.sdhci
+#define EXYNOS4_SDHCI(obj)   \
+ OBJECT_CHECK(Exynos4SDHCIState, (obj), TYPE_EXYNOS4_SDHC)
+
+/* ADMA Error Status Register */
+#define EXYNOS4_SDHC_FINAL_BLOCK (1  10)
+#define EXYNOS4_SDHC_CONTINUE_REQ(1  9)
+#define EXYNOS4_SDHC_IRQ_STAT(1  8)
+/* Control register 2 */
+#define EXYNOS4_SDHC_CONTROL20x80
+#define 

Re: [Qemu-devel] [PATCH 12/12 v12] introduce a new monitor command 'dump-guest-memory' to dump guest's memory

2012-04-18 Thread Daniel P. Berrange
On Wed, Apr 18, 2012 at 03:30:04PM +0800, Wen Congyang wrote:
 The command's usage:
dump [-p] protocol [begin] [length]
 The supported protocol can be file or fd:
 1. file: the protocol starts with file:, and the following string is
the file's path.
 2. fd: the protocol starts with fd:, and the following string is the
fd's name.
 
 Note:
   1. If you want to use gdb to process the core, please specify -p option.
  The reason why the -p option is not default is:
a. guest machine in a catastrophic state can have corrupted memory,
   which we cannot trust.

What is the behaviour of this command if we set '-p', and the guest
is corrupt ?

If '-p' is not safe when the guest is in a corrupted state, then I'd
argue that '-p' is not safe *ever*, since a malicious guest could
setup bad page maps at any time it likes and we've no way of knowing
this from the host.

b. The guest machine can be in read-mode even if paging is enabled.
   For example: the guest machine uses ACPI to sleep, and ACPI sleep
   state goes in real-mode.
   2. This command doesn't support the fd that is is associated with a pipe,
  socket, or FIFO(lseek will fail with such fd).

How hard would it be to lift that restriction ? When libvirt does save to
file, or core dump these days, we tend to pass a pipe FD to QEMU, which
is connected to libvirt's I/O helper process. The reason for this is that
it lets us turn on O_DIRECT for the dumped data, which has proved to be
quite an important feature for oVirt. So I'd rather like to keep that
ability with the new dump command.

Regards,
Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|



[Qemu-devel] [PATCH] qxl-render: fix broken vnc+spice since commit f934493

2012-04-18 Thread Alon Levy
Notify any listeners such as vnc that the displaysurface has been
changed, otherwise they will segfault when first accessing the freed old
displaysurface data.

Signed-off-by: Alon Levy al...@redhat.com
---
 hw/qxl-render.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/qxl-render.c b/hw/qxl-render.c
index 04eca5c..ee9ea6d 100644
--- a/hw/qxl-render.c
+++ b/hw/qxl-render.c
@@ -126,6 +126,7 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice 
*qxl)
 qemu_resize_displaysurface(vga-ds,
 qxl-guest_primary.surface.width,
 qxl-guest_primary.surface.height);
+dpy_resize(vga-ds);
 }
 }
 for (i = 0; i  qxl-num_dirty_rects; i++) {
-- 
1.7.10




[Qemu-devel] [PATCH v2] qxl-render: fix broken vnc+spice since commit f934493

2012-04-18 Thread Alon Levy
Notify any listeners such as vnc that the displaysurface has been
changed, otherwise they will segfault when first accessing the freed old
displaysurface data.

Signed-off-by: Alon Levy al...@redhat.com
---
 hw/qxl-render.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/qxl-render.c b/hw/qxl-render.c
index 04eca5c..0331c23 100644
--- a/hw/qxl-render.c
+++ b/hw/qxl-render.c
@@ -127,6 +127,7 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice 
*qxl)
 qxl-guest_primary.surface.width,
 qxl-guest_primary.surface.height);
 }
+dpy_resize(vga-ds);
 }
 for (i = 0; i  qxl-num_dirty_rects; i++) {
 if (qemu_spice_rect_is_empty(qxl-dirty+i)) {
-- 
1.7.10




Re: [Qemu-devel] [PATCH 12/12 v12] introduce a new monitor command 'dump-guest-memory' to dump guest's memory

2012-04-18 Thread Wen Congyang
At 04/18/2012 05:23 PM, Daniel P. Berrange Wrote:
 On Wed, Apr 18, 2012 at 03:30:04PM +0800, Wen Congyang wrote:
 The command's usage:
dump [-p] protocol [begin] [length]
 The supported protocol can be file or fd:
 1. file: the protocol starts with file:, and the following string is
the file's path.
 2. fd: the protocol starts with fd:, and the following string is the
fd's name.

 Note:
   1. If you want to use gdb to process the core, please specify -p option.
  The reason why the -p option is not default is:
a. guest machine in a catastrophic state can have corrupted memory,
   which we cannot trust.
 
 What is the behaviour of this command if we set '-p', and the guest
 is corrupt ?
 
 If '-p' is not safe when the guest is in a corrupted state, then I'd
 argue that '-p' is not safe *ever*, since a malicious guest could
 setup bad page maps at any time it likes and we've no way of knowing
 this from the host.

Yes, '-p' is not safe, and the default value is false.
Someone may use gdb to deal with the vmcore, so '-p' is useful for them.

 
b. The guest machine can be in read-mode even if paging is enabled.
   For example: the guest machine uses ACPI to sleep, and ACPI sleep
   state goes in real-mode.
   2. This command doesn't support the fd that is is associated with a pipe,
  socket, or FIFO(lseek will fail with such fd).
 
 How hard would it be to lift that restriction ? When libvirt does save to
 file, or core dump these days, we tend to pass a pipe FD to QEMU, which
 is connected to libvirt's I/O helper process. The reason for this is that
 it lets us turn on O_DIRECT for the dumped data, which has proved to be
 quite an important feature for oVirt. So I'd rather like to keep that
 ability with the new dump command.

The reason is that we will use lseek(fd, ...). If you pass a pipe FD
to qemu, lseek() will fail. I donot know the note size before we
write it to the core, so I use lseek()...

Thanks
Wen Congyang

 
 Regards,
 Daniel




Re: [Qemu-devel] [PATCH 12/12 v12] introduce a new monitor command 'dump-guest-memory' to dump guest's memory

2012-04-18 Thread Wen Congyang
At 04/18/2012 05:23 PM, Daniel P. Berrange Wrote:
 On Wed, Apr 18, 2012 at 03:30:04PM +0800, Wen Congyang wrote:
 The command's usage:
dump [-p] protocol [begin] [length]
 The supported protocol can be file or fd:
 1. file: the protocol starts with file:, and the following string is
the file's path.
 2. fd: the protocol starts with fd:, and the following string is the
fd's name.

 Note:
   1. If you want to use gdb to process the core, please specify -p option.
  The reason why the -p option is not default is:
a. guest machine in a catastrophic state can have corrupted memory,
   which we cannot trust.
 
 What is the behaviour of this command if we set '-p', and the guest
 is corrupt ?
 
 If '-p' is not safe when the guest is in a corrupted state, then I'd
 argue that '-p' is not safe *ever*, since a malicious guest could
 setup bad page maps at any time it likes and we've no way of knowing
 this from the host.
 
b. The guest machine can be in read-mode even if paging is enabled.
   For example: the guest machine uses ACPI to sleep, and ACPI sleep
   state goes in real-mode.
   2. This command doesn't support the fd that is is associated with a pipe,
  socket, or FIFO(lseek will fail with such fd).
 
 How hard would it be to lift that restriction ? When libvirt does save to
 file, or core dump these days, we tend to pass a pipe FD to QEMU, which
 is connected to libvirt's I/O helper process. The reason for this is that
 it lets us turn on O_DIRECT for the dumped data, which has proved to be
 quite an important feature for oVirt. So I'd rather like to keep that
 ability with the new dump command.

Here is the libvirt's code in doCoreDump():

/* Core dumps usually imply last-ditch analysis efforts are
 * desired, so we intentionally do not unlink even if a file was
 * created.  */
if ((fd = qemuOpenFile(driver, path,
   O_CREAT | O_TRUNC | O_WRONLY | directFlag,
   NULL, NULL))  0)


In my test, the fd returns from qemuOpenFile is not a pipe FD. So
O_DIRECT still exists.
Do I miss something?

Thanks
Wen Congyang
 
 Regards,
 Daniel




Re: [Qemu-devel] [PATCH 12/12 v12] introduce a new monitor command 'dump-guest-memory' to dump guest's memory

2012-04-18 Thread Daniel P. Berrange
On Wed, Apr 18, 2012 at 05:44:36PM +0800, Wen Congyang wrote:
 At 04/18/2012 05:23 PM, Daniel P. Berrange Wrote:
  On Wed, Apr 18, 2012 at 03:30:04PM +0800, Wen Congyang wrote:
  The command's usage:
 dump [-p] protocol [begin] [length]
  The supported protocol can be file or fd:
  1. file: the protocol starts with file:, and the following string is
 the file's path.
  2. fd: the protocol starts with fd:, and the following string is the
 fd's name.
 
  Note:
1. If you want to use gdb to process the core, please specify -p option.
   The reason why the -p option is not default is:
 a. guest machine in a catastrophic state can have corrupted memory,
which we cannot trust.
  
  What is the behaviour of this command if we set '-p', and the guest
  is corrupt ?
  
  If '-p' is not safe when the guest is in a corrupted state, then I'd
  argue that '-p' is not safe *ever*, since a malicious guest could
  setup bad page maps at any time it likes and we've no way of knowing
  this from the host.
  
 b. The guest machine can be in read-mode even if paging is enabled.
For example: the guest machine uses ACPI to sleep, and ACPI sleep
state goes in real-mode.
2. This command doesn't support the fd that is is associated with a pipe,
   socket, or FIFO(lseek will fail with such fd).
  
  How hard would it be to lift that restriction ? When libvirt does save to
  file, or core dump these days, we tend to pass a pipe FD to QEMU, which
  is connected to libvirt's I/O helper process. The reason for this is that
  it lets us turn on O_DIRECT for the dumped data, which has proved to be
  quite an important feature for oVirt. So I'd rather like to keep that
  ability with the new dump command.
 
 Here is the libvirt's code in doCoreDump():
 
 /* Core dumps usually imply last-ditch analysis efforts are
  * desired, so we intentionally do not unlink even if a file was
  * created.  */
 if ((fd = qemuOpenFile(driver, path,
O_CREAT | O_TRUNC | O_WRONLY | directFlag,
NULL, NULL))  0)
 
 
 In my test, the fd returns from qemuOpenFile is not a pipe FD. So
 O_DIRECT still exists.
 Do I miss something?

If we turned on O_DIRECT on the 'fd' that we pass to QEMU, then QEMU
needs to be carefully written to use aligned memory buffers and do
writes in the correct size. It almost certainly does not do this
correctly.

Thus libvirt uses a I/O helper process where the I/O helper has the
FD for the actual file and QEMU just gets a pipe FD. Thus only the
I/O helper needs to careful about O_DIRECT alignment issues.


Daniel
-- 
|: http://berrange.com  -o-http://www.flickr.com/photos/dberrange/ :|
|: http://libvirt.org  -o- http://virt-manager.org :|
|: http://autobuild.org   -o- http://search.cpan.org/~danberr/ :|
|: http://entangle-photo.org   -o-   http://live.gnome.org/gtk-vnc :|



Re: [Qemu-devel] [PATCH RFC 2/6] target-i386: add cpu-model property to x86_cpu

2012-04-18 Thread Andreas Färber
Am 17.04.2012 01:36, schrieb Igor Mammedov:
 From: Igor Mammedov niall...@gmail.com
 
 Signed-off-by: Igor Mammedov niall...@gmail.com
 ---
  cpu-defs.h   |2 +-
  hw/pc.c  |   10 --
  target-i386/cpu.c|   33 +
  target-i386/helper.c |   12 
  4 files changed, 42 insertions(+), 15 deletions(-)
 
 diff --git a/cpu-defs.h b/cpu-defs.h
 index 3268968..8f1caaf 100644
 --- a/cpu-defs.h
 +++ b/cpu-defs.h
 @@ -221,7 +221,7 @@ typedef struct CPUWatchpoint {
  struct QemuCond *halt_cond; \
  int thread_kicked;  \
  struct qemu_work_item *queued_work_first, *queued_work_last;\
 -const char *cpu_model_str;  \
 +char *cpu_model_str;  \

Breaks alignment of \.

  struct KVMState *kvm_state; \
  struct kvm_run *kvm_run;\
  int kvm_fd; \
 diff --git a/hw/pc.c b/hw/pc.c
 index d2c122e..7f0de99 100644
 --- a/hw/pc.c
 +++ b/hw/pc.c
 @@ -931,7 +931,6 @@ static CPUX86State *pc_new_cpu(const char *cpu_model)
  
  env = cpu_init(cpu_model);
  if (!env) {
 -fprintf(stderr, Unable to find x86 CPU definition\n);
  exit(1);
  }
  if ((env-cpuid_features  CPUID_APIC) || smp_cpus  1) {
 @@ -950,15 +949,6 @@ void pc_cpus_init(const char *cpu_model)
  {
  int i;
  
 -/* init CPUs */
 -if (cpu_model == NULL) {
 -#ifdef TARGET_X86_64
 -cpu_model = qemu64;
 -#else
 -cpu_model = qemu32;
 -#endif
 -}
 -
  for(i = 0; i  smp_cpus; i++) {
  pc_new_cpu(cpu_model);
  }
 diff --git a/target-i386/cpu.c b/target-i386/cpu.c
 index e12c851..30ae0c2 100644
 --- a/target-i386/cpu.c
 +++ b/target-i386/cpu.c
 @@ -29,6 +29,8 @@
  
  #include hyperv.h
  
 +#include qerror.h
 +
  /* feature flags taken from Intel Processor Identification and the CPUID
   * Instruction and AMD's CPUID Specification.  In cases of disagreement
   * between feature naming conventions, aliases may be added.
 @@ -1468,6 +1470,27 @@ static void mce_init(X86CPU *cpu)
  }
  }
  
 +static char *x86_get_cpu_model(Object *obj, Error **errp)
 +{
 +X86CPU *cpu = X86_CPU(obj);
 +CPUX86State *env = cpu-env;
 +return g_strdup(env-cpu_model_str);
 +}
 +
 +static void x86_set_cpu_model(Object *obj, const char *value, Error **errp)
 +{
 +X86CPU *cpu = X86_CPU(obj);
 +CPUX86State *env = cpu-env;
 +
 +g_free((gpointer)env-cpu_model_str);
 +env-cpu_model_str = g_strdup(value);
 +
 +if (cpu_x86_register(env, env-cpu_model_str)  0) {
 +fprintf(stderr, Unable to find x86 CPU definition\n);
 +error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
 +}
 +}

This will cease to work when we model CPU definitions as subclasses - in
that case we cannot change the type after instantiating it. No one
objected to that aspect of the RFC so far, so I was planning to post
that as part 3 of my series.

We might inspect the model string (typename plus read-only - question is
whether that is useful (cf. CPU feature flag modelling).

Andreas

 +
  static void x86_cpu_initfn(Object *obj)
  {
  X86CPU *cpu = X86_CPU(obj);
 @@ -1475,6 +1498,16 @@ static void x86_cpu_initfn(Object *obj)
  
  cpu_exec_init(env);
  env-cpuid_apic_id = env-cpu_index;
 +
 +object_property_add_str(obj, cpu-model,
 +x86_get_cpu_model, x86_set_cpu_model, NULL);
 +
 +#ifdef TARGET_X86_64
 +object_property_set_str(OBJECT(cpu), qemu64, cpu-model, NULL);
 +#else
 +object_property_set_str(OBJECT(cpu), qemu32, cpu-model, NULL);
 +#endif
 +
  mce_init(cpu);
  }
  
 diff --git a/target-i386/helper.c b/target-i386/helper.c
 index d92d3d4..df33d83 100644
 --- a/target-i386/helper.c
 +++ b/target-i386/helper.c
 @@ -1161,11 +1161,11 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
  {
  X86CPU *cpu;
  CPUX86State *env;
 +Error *errp = NULL;
  static int inited;
  
  cpu = X86_CPU(object_new(TYPE_X86_CPU));
  env = cpu-env;
 -env-cpu_model_str = cpu_model;
  
  /* init various static tables used in TCG mode */
  if (tcg_enabled()  !inited) {
 @@ -1176,9 +1176,13 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
  cpu_set_debug_excp_handler(breakpoint_handler);
  #endif
  }
 -if (cpu_x86_register(env, cpu_model)  0) {
 -object_delete(OBJECT(cpu));
 -return NULL;
 +
 +if (cpu_model) {
 +object_property_set_str(OBJECT(cpu), cpu_model, cpu-model, errp);
 +if (errp) {
 +object_delete(OBJECT(cpu));
 +return NULL;
 +}
  }
  
  qemu_init_vcpu(env);

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer 

Re: [Qemu-devel] [PATCH v2] qxl-render: fix broken vnc+spice since commit f934493

2012-04-18 Thread Gerd Hoffmann
On 04/18/12 11:27, Alon Levy wrote:
 Notify any listeners such as vnc that the displaysurface has been
 changed, otherwise they will segfault when first accessing the freed old
 displaysurface data.

Patch added to spice patch queue.

thanks,
  Gerd



[Qemu-devel] [PATCH 2/3] qxl: set default values of vram*_size_mb to -1

2012-04-18 Thread Gerd Hoffmann
From: Alon Levy al...@redhat.com

The addition of those values caused a regression where not specifying
any value for the vram bar size would result in a 4096 _byte_ surface
area. This is ok for the windows driver but causes the X driver to be
unusable. Also, it's a regression. This patch returns the default
behavior of having a 64 megabyte vram BAR.

Signed-off-by: Alon Levy al...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/qxl.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/hw/qxl.c b/hw/qxl.c
index 47a162e..db2318e 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -1959,8 +1959,8 @@ static Property qxl_properties[] = {
 DEFINE_PROP_UINT32(guestdebug, PCIQXLDevice, guestdebug, 0),
 DEFINE_PROP_UINT32(cmdlog, PCIQXLDevice, cmdlog, 0),
 DEFINE_PROP_UINT32(ram_size_mb,  PCIQXLDevice, ram_size_mb, -1),
-DEFINE_PROP_UINT32(vram_size_mb, PCIQXLDevice, vram32_size_mb, 0),
-DEFINE_PROP_UINT32(vram64_size_mb, PCIQXLDevice, vram_size_mb, 0),
+DEFINE_PROP_UINT32(vram_size_mb, PCIQXLDevice, vram32_size_mb, -1),
+DEFINE_PROP_UINT32(vram64_size_mb, PCIQXLDevice, vram_size_mb, -1),
 DEFINE_PROP_END_OF_LIST(),
 };
 
-- 
1.7.1




[Qemu-devel] [PATCH 3/3] qxl-render: fix broken vnc+spice since commit f934493

2012-04-18 Thread Gerd Hoffmann
From: Alon Levy al...@redhat.com

Notify any listeners such as vnc that the displaysurface has been
changed, otherwise they will segfault when first accessing the freed old
displaysurface data.

Signed-off-by: Alon Levy al...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 hw/qxl-render.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/hw/qxl-render.c b/hw/qxl-render.c
index 28ab182..f7f1bfd 100644
--- a/hw/qxl-render.c
+++ b/hw/qxl-render.c
@@ -127,6 +127,7 @@ static void qxl_render_update_area_unlocked(PCIQXLDevice 
*qxl)
 qxl-guest_primary.surface.width,
 qxl-guest_primary.surface.height);
 }
+dpy_resize(vga-ds);
 }
 for (i = 0; i  qxl-num_dirty_rects; i++) {
 if (qemu_spice_rect_is_empty(qxl-dirty+i)) {
-- 
1.7.1




[Qemu-devel] [PULL 0/3] spice patch queue

2012-04-18 Thread Gerd Hoffmann
  Hi,

Three little fixes accumulated over the last few weeks.

please pull,
  Gerd

The following changes since commit 158fd3ce98afd21f2e2639600f6414ea703a9121:

  qemu-timer.c: Remove 250us timeouts (2012-04-16 12:56:48 -0500)

are available in the git repository at:
  git://anongit.freedesktop.org/spice/qemu spice.v52

Alon Levy (3):
  trace-events: remove unused qxl_vga_ioport_while_not_in_vga_mode
  qxl: set default values of vram*_size_mb to -1
  qxl-render: fix broken vnc+spice since commit f934493

 hw/qxl-render.c |1 +
 hw/qxl.c|4 ++--
 trace-events|1 -
 3 files changed, 3 insertions(+), 3 deletions(-)



Re: [Qemu-devel] [PATCH 0/3] switch to seavgabios

2012-04-18 Thread Gerd Hoffmann
  Hi,

 Second, the display panning via vesa pmi was broken in qemu for three
 years(!) and nobody noticed.  The linux kernel's vesafb can use the vesa
 pmi, it is disabled by default though due to bioses tending to be buggy.
  I'm not aware of other users.

 = Is this actually used by anyone?  Seems not ...
 
 It's used by me, when i feel nostalgic and want to watch old DOS stuff.

Pointer?
I'd like to have a test case which breaks with the new vgabios.

thanks,
  Gerd



[Qemu-devel] [PATCH] vhost-net: adjust vhost_net.[c|h] - vhost-net.[c|h]

2012-04-18 Thread zwu . kernel
From: Zhi Yong Wu wu...@linux.vnet.ibm.com

Keep the consistent file naming style with other files

Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com
---
 Makefile.target |2 +-
 hw/vhost-net.c  |  250 +++
 hw/vhost-net.h  |   20 +
 hw/vhost_net.c  |  250 ---
 hw/vhost_net.h  |   20 -
 hw/virtio-net.c |2 +-
 net/tap.c   |2 +-
 7 files changed, 273 insertions(+), 273 deletions(-)
 create mode 100644 hw/vhost-net.c
 create mode 100644 hw/vhost-net.h
 delete mode 100644 hw/vhost_net.c
 delete mode 100644 hw/vhost_net.h

diff --git a/Makefile.target b/Makefile.target
index 84951a0..4bccdb1 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -219,7 +219,7 @@ obj-y = arch_init.o cpus.o monitor.o machine.o gdbstub.o 
balloon.o ioport.o
 obj-$(CONFIG_NO_PCI) += pci-stub.o
 obj-$(CONFIG_VIRTIO) += virtio.o virtio-blk.o virtio-balloon.o virtio-net.o 
virtio-serial-bus.o
 obj-$(CONFIG_VIRTIO) += virtio-scsi.o
-obj-y += vhost_net.o
+obj-y += vhost-net.o
 obj-$(CONFIG_VHOST_NET) += vhost.o
 obj-$(CONFIG_REALLY_VIRTFS) += 9pfs/virtio-9p-device.o
 obj-$(CONFIG_KVM) += kvm.o kvm-all.o
diff --git a/hw/vhost-net.c b/hw/vhost-net.c
new file mode 100644
index 000..48937d2
--- /dev/null
+++ b/hw/vhost-net.c
@@ -0,0 +1,250 @@
+/*
+ * vhost-net support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin m...@redhat.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include net.h
+#include net/tap.h
+
+#include virtio-net.h
+#include vhost-net.h
+#include qemu-error.h
+
+#include config.h
+
+#ifdef CONFIG_VHOST_NET
+#include linux/vhost.h
+#include sys/socket.h
+#include linux/kvm.h
+#include fcntl.h
+#include sys/ioctl.h
+#include linux/virtio_ring.h
+#include netpacket/packet.h
+#include net/ethernet.h
+#include net/if.h
+#include netinet/in.h
+
+#include stdio.h
+
+#include vhost.h
+
+struct vhost_net {
+struct vhost_dev dev;
+struct vhost_virtqueue vqs[2];
+int backend;
+VLANClientState *vc;
+};
+
+unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
+{
+/* Clear features not supported by host kernel. */
+if (!(net-dev.features  (1  VIRTIO_F_NOTIFY_ON_EMPTY))) {
+features = ~(1  VIRTIO_F_NOTIFY_ON_EMPTY);
+}
+if (!(net-dev.features  (1  VIRTIO_RING_F_INDIRECT_DESC))) {
+features = ~(1  VIRTIO_RING_F_INDIRECT_DESC);
+}
+if (!(net-dev.features  (1  VIRTIO_RING_F_EVENT_IDX))) {
+features = ~(1  VIRTIO_RING_F_EVENT_IDX);
+}
+if (!(net-dev.features  (1  VIRTIO_NET_F_MRG_RXBUF))) {
+features = ~(1  VIRTIO_NET_F_MRG_RXBUF);
+}
+return features;
+}
+
+void vhost_net_ack_features(struct vhost_net *net, unsigned features)
+{
+net-dev.acked_features = net-dev.backend_features;
+if (features  (1  VIRTIO_F_NOTIFY_ON_EMPTY)) {
+net-dev.acked_features |= (1  VIRTIO_F_NOTIFY_ON_EMPTY);
+}
+if (features  (1  VIRTIO_RING_F_INDIRECT_DESC)) {
+net-dev.acked_features |= (1  VIRTIO_RING_F_INDIRECT_DESC);
+}
+if (features  (1  VIRTIO_RING_F_EVENT_IDX)) {
+net-dev.acked_features |= (1  VIRTIO_RING_F_EVENT_IDX);
+}
+if (features  (1  VIRTIO_NET_F_MRG_RXBUF)) {
+net-dev.acked_features |= (1  VIRTIO_NET_F_MRG_RXBUF);
+}
+}
+
+static int vhost_net_get_fd(VLANClientState *backend)
+{
+switch (backend-info-type) {
+case NET_CLIENT_TYPE_TAP:
+return tap_get_fd(backend);
+default:
+fprintf(stderr, vhost-net requires tap backend\n);
+return -EBADFD;
+}
+}
+
+struct vhost_net *vhost_net_init(VLANClientState *backend, int devfd,
+ bool force)
+{
+int r;
+struct vhost_net *net = g_malloc(sizeof *net);
+if (!backend) {
+fprintf(stderr, vhost-net requires backend to be setup\n);
+goto fail;
+}
+r = vhost_net_get_fd(backend);
+if (r  0) {
+goto fail;
+}
+net-vc = backend;
+net-dev.backend_features = tap_has_vnet_hdr(backend) ? 0 :
+(1  VHOST_NET_F_VIRTIO_NET_HDR);
+net-backend = r;
+
+r = vhost_dev_init(net-dev, devfd, force);
+if (r  0) {
+goto fail;
+}
+if (!tap_has_vnet_hdr_len(backend,
+  sizeof(struct virtio_net_hdr_mrg_rxbuf))) {
+net-dev.features = ~(1  VIRTIO_NET_F_MRG_RXBUF);
+}
+if (~net-dev.features  net-dev.backend_features) {
+fprintf(stderr, vhost lacks feature mask % PRIu64  for backend\n,
+(uint64_t)(~net-dev.features  net-dev.backend_features));
+vhost_dev_cleanup(net-dev);
+goto fail;
+}
+
+/* Set sane init value. Override when guest 

Re: [Qemu-devel] [PATCH v6 1/5] sockets: change inet_connect() to support nonblock socket

2012-04-18 Thread Amos Kong

On 18/04/12 14:52, Orit Wasserman wrote:

On 04/17/2012 05:54 PM, Amos Kong wrote:

Add a bool argument to inet_connect() to assign if set socket
to block/nonblock, and delete original argument 'socktype'
that is unused.

Retry to connect when following errors are got:
   -EINTR
   -EWOULDBLOCK (win32)
Connect's successful for nonblock socket when following
errors are got, user should wait for connecting by select():
   -EINPROGRESS
   -WSAEALREADY (win32)

Change nbd, vnc to use new interface.

Signed-off-by: Amos Kongak...@redhat.com
---
  nbd.c  |2 +-
  qemu-sockets.c |   58 +++-
  qemu_socket.h  |2 +-
  ui/vnc.c   |2 +-
  4 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/nbd.c b/nbd.c
index 406e555..b4e68a9 100644
--- a/nbd.c
+++ b/nbd.c
@@ -146,7 +146,7 @@ int tcp_socket_outgoing(const char *address, uint16_t port)

  int tcp_socket_outgoing_spec(const char *address_and_port)
  {
-return inet_connect(address_and_port, SOCK_STREAM);
+return inet_connect(address_and_port, true);
  }

  int tcp_socket_incoming(const char *address, uint16_t port)
diff --git a/qemu-sockets.c b/qemu-sockets.c
index 6bcb8e3..e886195 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -51,6 +51,9 @@ static QemuOptsList dummy_opts = {
  },{
  .name = ipv6,
  .type = QEMU_OPT_BOOL,
+},{
+.name = block,
+.type = QEMU_OPT_BOOL,
  },
  { /* end if list */ }
  },
@@ -201,7 +204,8 @@ int inet_connect_opts(QemuOpts *opts)
  const char *port;
  char uaddr[INET6_ADDRSTRLEN+1];
  char uport[33];
-int sock,rc;
+int sock, rc, err;
+bool block;

  memset(ai,0, sizeof(ai));
  ai.ai_flags = AI_CANONNAME | AI_ADDRCONFIG;
@@ -210,6 +214,7 @@ int inet_connect_opts(QemuOpts *opts)

  addr = qemu_opt_get(opts, host);
  port = qemu_opt_get(opts, port);
+block = qemu_opt_get_bool(opts, block, 0);
  if (addr == NULL || port == NULL) {
  fprintf(stderr, inet_connect: host and/or port not specified\n);
  return -1;
@@ -241,21 +246,44 @@ int inet_connect_opts(QemuOpts *opts)
  continue;
  }
  setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,(void*)on,sizeof(on));
-
+if (!block) {
+socket_set_nonblock(sock);
+}
  /* connect to peer */
-if (connect(sock,e-ai_addr,e-ai_addrlen)  0) {
-if (NULL == e-ai_next)
-fprintf(stderr, %s: connect(%s,%s,%s,%s): %s\n, __FUNCTION__,
-inet_strfamily(e-ai_family),
-e-ai_canonname, uaddr, uport, strerror(errno));
-closesocket(sock);
-continue;
+do {
+err = 0;
+if (connect(sock, e-ai_addr, e-ai_addrlen)  0) {
+err = -socket_error();
+}
+#ifndef _WIN32
+} while (err == -EINTR || err == -EWOULDBLOCK);
+#else
+} while (err == -EINTR);
+#endif


We shouldn't retry to connect for a blocking socket, please add a check for 
!block.
According to msn docs in WIN32 if we get EWOULDBLOCK , we should do select
http://msdn.microsoft.com/en-us/library/windows/desktop/ms737625(v=vs.85).aspx
so I think we only need to retry for -EINTR.


Hi Orit, thanks for your review.

In 
http://msdn.microsoft.com/en-us/library/windows/desktop/ms740668%28v=vs.85%29.aspx,

for 'EWOULDBLOCK': the operation should be retried later.

However, the two methods all works.  I would not re-try for EWOULDBLOCK 
(posix  win32).



+
+if (err= 0) {
+goto success;
+} else if (!block  err == -EINPROGRESS) {
+goto success;
+#ifdef _WIN32
+} else if (!block  err == -WSAEALREADY) {


Also EWOULDBLOCK
This is more a style comment as I feel to code doesn't need the go to.


Ok.


Check for an error path so the rest of the function looks like:

if (err  0) {
if ( block ||
#ifndef __WIN32
 err != -EINPROGRESS ) {
#else
(err != -EWOULDBLOCK  err != -WASALREADY) ) {
#endif



EINPROGRESS and EWOULDBLOCK are for posix and win32,

  if (err   0) {
if (block || (err != -EINPROGRESS  err != -EWOULDBLOCK
  #ifndef __WIN32
 )) {
  #else
  err != -WASALREADY)) {
  #endif



if (NULL == e-ai_next) {
 fprintf(stderr, %s: connect(%s,%s,%s,%s): %s\n, __FUNCTION__,
 inet_strfamily(e-ai_family),
 e-ai_canonname, uaddr, uport, strerror(errno));
 }
 closesocket(sock);
sock = -1;
}

freeaddrinfo(res);
return sock;
}


...

--
Amos.



[Qemu-devel] [PATCH 1/3] trace-events: remove unused qxl_vga_ioport_while_not_in_vga_mode

2012-04-18 Thread Gerd Hoffmann
From: Alon Levy al...@redhat.com

The resulting stp file fails to load because of an unresolvable probe.

Signed-off-by: Alon Levy al...@redhat.com
Signed-off-by: Gerd Hoffmann kra...@redhat.com
---
 trace-events |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/trace-events b/trace-events
index a5f276d..0e25d20 100644
--- a/trace-events
+++ b/trace-events
@@ -781,7 +781,6 @@ qxl_spice_reset_memslots(int qid) %d
 qxl_spice_update_area(int qid, uint32_t surface_id, uint32_t left, uint32_t 
right, uint32_t top, uint32_t bottom) %d sid=%d [%d,%d,%d,%d]
 qxl_spice_update_area_rest(int qid, uint32_t num_dirty_rects, uint32_t 
clear_dirty_region) %d #d=%d clear=%d
 qxl_surfaces_dirty(int qid, int surface, int offset, int size) %d surface=%d 
offset=%d size=%d
-qxl_vga_ioport_while_not_in_vga_mode(int qid) %d (int qid, reset to VGA mode 
because of VGA io)
 
 # hw/qxl-render.c
 qxl_render_blit_guest_primary_initialized(void) 
-- 
1.7.1




[Qemu-devel] [PATCH] qxl: don't assert on guest create_guest_primary

2012-04-18 Thread Alon Levy
initiate the implicit destroy ourselves.

Signed-off-by: Alon Levy al...@redhat.com
---
 hw/qxl.c |   10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/hw/qxl.c b/hw/qxl.c
index 6776a7f..1ab3348 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -1163,7 +1163,15 @@ static void qxl_create_guest_primary(PCIQXLDevice *qxl, 
int loadvm,
 QXLDevSurfaceCreate surface;
 QXLSurfaceCreate *sc = qxl-guest_primary.surface;
 
-assert(qxl-mode != QXL_MODE_NATIVE);
+if (qxl-mode == QXL_MODE_NATIVE) {
+/*
+ * allow a create without a destroy. This could be used
+ * later for an atomic change primary but right now just
+ * destroy the primary for the guest. Note that this uses
+ * the ability to have multiple concurrent async commands.
+ */
+qxl_destroy_primary(qxl, async);
+}
 qxl_exit_vga_mode(qxl);
 
 surface.format = le32_to_cpu(sc-format);
-- 
1.7.10




Re: [Qemu-devel] [PATCH 0/3] switch to seavgabios

2012-04-18 Thread Michael Tokarev
On 18.04.2012 14:31, Gerd Hoffmann wrote:
   Hi,
 
 Second, the display panning via vesa pmi was broken in qemu for three
 years(!) and nobody noticed.  The linux kernel's vesafb can use the vesa
 pmi, it is disabled by default though due to bioses tending to be buggy.
  I'm not aware of other users.

 = Is this actually used by anyone?  Seems not ...

 It's used by me, when i feel nostalgic and want to watch old DOS stuff.
 
 Pointer?
 I'd like to have a test case which breaks with the new vgabios.

We talked with malc briefly on irc yesterday, and this is what
he gave me:

http://cvs.savannah.gnu.org/viewvc/vgabios/vbe.c?root=vgabiosr1=1.47r2=1.48

this is not the test case but the missing support he's referring to.

It appears the patch implements just 2 functions which both just does
int10, so should be easy to implement in seabios, but my knowlege
isn's sufficient to do that (it needs some asm coding).

Thanks,

/mjt



Re: [Qemu-devel] xen build failure

2012-04-18 Thread Stefan Hajnoczi
On Wed, Apr 18, 2012 at 09:26:44AM +0200, Alexander Graf wrote:
 
 On 17.04.2012, at 18:53, Stefano Stabellini wrote:
 
  On Tue, 17 Apr 2012, Alon Levy wrote:
  I have xen-devel-4.1.2-14.fc17.x86_64 and am getting the following build
  error:
  
   CCx86_64-softmmu/xen-all.o
  /home/alon/src/spice_upstream/qemu/xen-all.c: In function 
  ‘xen_hvm_inject_msi’:
  /home/alon/src/spice_upstream/qemu/xen-all.c:132:5: error: implicit 
  declaration of function ‘xc_hvm_inject_msi’ 
  [-Werror=implicit-function-declaration]
  /home/alon/src/spice_upstream/qemu/xen-all.c:132:5: error: nested extern 
  declaration of ‘xc_hvm_inject_msi’ [-Werror=nested-externs]
  
  commit f1dbf015dfb0aa7f66f710a1f1bc58b662951de2 seems to be missing a
  capability / version check.
  
  Thanks for the report!
  I tested it without Xen and with Xen 4.2 but not with Xen 4.1 :-/
 
 So that means that our buildbot coverage is missing Xen 4.1 :). Could you 
 guys possibly provide a machine with Xen 4.1 that'd be running buildbot so we 
 see at least the compile time issues? :)

It would be good to have buildslave for Xen versions.  Here is more info
on how to volunteer a buildslave:

http://wiki.qemu.org/ContinuousIntegration

Stefan




Re: [Qemu-devel] [PATCH v6 2/5] qerror: add five qerror strings

2012-04-18 Thread Paolo Bonzini
Il 18/04/2012 08:51, Orit Wasserman ha scritto:
 On 04/17/2012 05:54 PM, Amos Kong wrote:
 Add five new qerror strings, they are about socket:
   QERR_SOCKET_CONNECT_IN_PROGRESS
   QERR_SOCKET_CONNECT_FAILED
   QERR_SOCKET_LISTEN_FAILED
   QERR_SOCKET_BIND_FAILED
   QERR_SOCKET_CREATE_FAILED

 Signed-off-by: Amos Kong ak...@redhat.com
 ---
  qerror.c |   20 
  qerror.h |   15 +++
  2 files changed, 35 insertions(+), 0 deletions(-)

 diff --git a/qerror.c b/qerror.c
 index 96fbe71..7afe1ac 100644
 --- a/qerror.c
 +++ b/qerror.c
 @@ -304,6 +304,26 @@ static const QErrorStringTable qerror_table[] = {
  .error_fmt = QERR_VNC_SERVER_FAILED,
  .desc  = Could not start VNC server on %(target),
  },
 +{
 +.error_fmt = QERR_SOCKET_CONNECT_IN_PROGRESS,
 +.desc  = Connection cannot be completed immediately,
 +},
 +{
 +.error_fmt = QERR_SOCKET_CONNECT_FAILED,
 +.desc  = Fail to connect socket,
 +},
 +{
 +.error_fmt = QERR_SOCKET_LISTEN_FAILED,
 +.desc  = Fail to listen socket,
 +},
 +{
 +.error_fmt = QERR_SOCKET_BIND_FAILED,
 +.desc  = Fail to bind socket,
 +},
 +{
 +.error_fmt = QERR_SOCKET_CREATE_FAILED,
 +.desc  = Fail to create socket,
 +},
  {}
  };
  
 diff --git a/qerror.h b/qerror.h
 index 5c23c1f..4cbba48 100644
 --- a/qerror.h
 +++ b/qerror.h
 @@ -248,4 +248,19 @@ QError *qobject_to_qerror(const QObject *obj);
  #define QERR_VNC_SERVER_FAILED \
  { 'class': 'VNCServerFailed', 'data': { 'target': %s } }
  
 +#define QERR_SOCKET_CONNECT_IN_PROGRESS \
 +{ 'class': 'SockConnectInprogress', 'data': {} }
 +
 +#define QERR_SOCKET_CONNECT_FAILED \
 +{ 'class': 'SockConnectFailed', 'data': {} }
 +
 +#define QERR_SOCKET_LISTEN_FAILED \
 +{ 'class': 'SockListenFailed', 'data': {} }
 +
 +#define QERR_SOCKET_BIND_FAILED \
 +{ 'class': 'SockBindFailed', 'data': {} }
 +
 +#define QERR_SOCKET_CREATE_FAILED \
 +{ 'class': 'SockCreateFailed', 'data': {} }
 
 For the FAILED error we will probably need more data , how about adding a 
 string 
 that can contain the strerror string ?

This was nack-ed in the past, and the numeric errno values are not
portable.  However, as a follow up we could add a QAPI-specific enum
(QEMU_ERRNO_EACCES, QEMU_ERRNO_ECONNREFUSED, etc.) and convert errno
values to it.




Re: [Qemu-devel] [RFC PATCH 1/3] iommu: Introduce iommu_group

2012-04-18 Thread David Gibson
On Mon, Apr 02, 2012 at 03:14:40PM -0600, Alex Williamson wrote:
 IOMMUs often do not have visibility of individual devices in the
 system.  Due to IOMMU design, bus topology, or device quirks, we
 can often only identify groups of devices.  Examples include
 Intel VT-d  AMD-Vi which often have function level visibility
 compared to POWER partitionable endpoints which have bridge level
 granularity.

That's a significant oversimplification of the situation on POWER,
although it doesn't really matter in this context.  On older (i.e. pre
PCI-E) hardware, PEs have either host bridge (i.e. domain)
granularity, or in IIUC in some cases p2p bridge granularity, using
special p2p bridges, since that's the only real way to do iommu
differentiation without the PCI-E requestor IDs.  This isn't as coarse
as it seems in practice, because the hardware is usually built with a
bridge per physical PCI slot.

On newer PCI-E hardware, the PE granularity is basically a firmware
decision, and can go down to function level.  I believe pHyp puts the
granularity at the bridge level.  Our non-virtualized Linux firmware
currently does put it at the function level, but Ben is thinking about
changing that to bridge level: again, because of the hardware design
that isn't as coarse as it seems, and at this level we can hardware
guarantee isolation to a degree that's not possible at the function
level.

  PCIe-to-PCI bridges also often cloud the IOMMU
 visibility as it cannot distiguish devices behind the bridge.
 Devices can also sometimes hurt themselves by initiating DMA using
 the wrong source ID on a multifunction PCI device.
 
 IOMMU groups are meant to help solve these problems and hopefully
 become the working unit of the IOMMI API.

So far, so simple.  No objections here.  I am trying to work out what
the real difference in approach is in this seriers from either your or
my earlier isolation group series.  AFAICT it's just that this
approach is explicitly only about IOMMU identity, ignoring (here) any
other factors which might affect isolation.  Or am I missing
something?

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson



Re: [Qemu-devel] [PATCH v6 2/5] qerror: add five qerror strings

2012-04-18 Thread Amos Kong

On 18/04/12 19:25, Paolo Bonzini wrote:

Il 18/04/2012 08:51, Orit Wasserman ha scritto:

On 04/17/2012 05:54 PM, Amos Kong wrote:

Add five new qerror strings, they are about socket:
   QERR_SOCKET_CONNECT_IN_PROGRESS
   QERR_SOCKET_CONNECT_FAILED
   QERR_SOCKET_LISTEN_FAILED
   QERR_SOCKET_BIND_FAILED
   QERR_SOCKET_CREATE_FAILED

Signed-off-by: Amos Kongak...@redhat.com
---
  qerror.c |   20 
  qerror.h |   15 +++
  2 files changed, 35 insertions(+), 0 deletions(-)

diff --git a/qerror.c b/qerror.c
index 96fbe71..7afe1ac 100644
--- a/qerror.c
+++ b/qerror.c
@@ -304,6 +304,26 @@ static const QErrorStringTable qerror_table[] = {
  .error_fmt = QERR_VNC_SERVER_FAILED,
  .desc  = Could not start VNC server on %(target),
  },
+{
+.error_fmt = QERR_SOCKET_CONNECT_IN_PROGRESS,
+.desc  = Connection cannot be completed immediately,
+},
+{
+.error_fmt = QERR_SOCKET_CONNECT_FAILED,
+.desc  = Fail to connect socket,
+},
+{
+.error_fmt = QERR_SOCKET_LISTEN_FAILED,
+.desc  = Fail to listen socket,
+},
+{
+.error_fmt = QERR_SOCKET_BIND_FAILED,
+.desc  = Fail to bind socket,
+},
+{
+.error_fmt = QERR_SOCKET_CREATE_FAILED,
+.desc  = Fail to create socket,
+},
  {}
  };

diff --git a/qerror.h b/qerror.h
index 5c23c1f..4cbba48 100644
--- a/qerror.h
+++ b/qerror.h
@@ -248,4 +248,19 @@ QError *qobject_to_qerror(const QObject *obj);
  #define QERR_VNC_SERVER_FAILED \
  { 'class': 'VNCServerFailed', 'data': { 'target': %s } }

+#define QERR_SOCKET_CONNECT_IN_PROGRESS \
+{ 'class': 'SockConnectInprogress', 'data': {} }
+
+#define QERR_SOCKET_CONNECT_FAILED \
+{ 'class': 'SockConnectFailed', 'data': {} }
+
+#define QERR_SOCKET_LISTEN_FAILED \
+{ 'class': 'SockListenFailed', 'data': {} }
+
+#define QERR_SOCKET_BIND_FAILED \
+{ 'class': 'SockBindFailed', 'data': {} }
+
+#define QERR_SOCKET_CREATE_FAILED \
+{ 'class': 'SockCreateFailed', 'data': {} }


For the FAILED error we will probably need more data , how about adding a string
that can contain the strerror string ?


We should provide monitor users more meaningful error note,
and output the strerror string to qemu stderr.
so I think current patch is ok.



This was nack-ed in the past, and the numeric errno values are not
portable.  However, as a follow up we could add a QAPI-specific enum
(QEMU_ERRNO_EACCES, QEMU_ERRNO_ECONNREFUSED, etc.) and convert errno
values to it.


--
Amos.



Re: [Qemu-devel] [PATCH v6 2/5] qerror: add five qerror strings

2012-04-18 Thread Orit Wasserman
On 04/18/2012 02:25 PM, Paolo Bonzini wrote:
 Il 18/04/2012 08:51, Orit Wasserman ha scritto:
 On 04/17/2012 05:54 PM, Amos Kong wrote:
 Add five new qerror strings, they are about socket:
   QERR_SOCKET_CONNECT_IN_PROGRESS
   QERR_SOCKET_CONNECT_FAILED
   QERR_SOCKET_LISTEN_FAILED
   QERR_SOCKET_BIND_FAILED
   QERR_SOCKET_CREATE_FAILED

 Signed-off-by: Amos Kong ak...@redhat.com
 ---
  qerror.c |   20 
  qerror.h |   15 +++
  2 files changed, 35 insertions(+), 0 deletions(-)

 diff --git a/qerror.c b/qerror.c
 index 96fbe71..7afe1ac 100644
 --- a/qerror.c
 +++ b/qerror.c
 @@ -304,6 +304,26 @@ static const QErrorStringTable qerror_table[] = {
  .error_fmt = QERR_VNC_SERVER_FAILED,
  .desc  = Could not start VNC server on %(target),
  },
 +{
 +.error_fmt = QERR_SOCKET_CONNECT_IN_PROGRESS,
 +.desc  = Connection cannot be completed immediately,
 +},
 +{
 +.error_fmt = QERR_SOCKET_CONNECT_FAILED,
 +.desc  = Fail to connect socket,
 +},
 +{
 +.error_fmt = QERR_SOCKET_LISTEN_FAILED,
 +.desc  = Fail to listen socket,
 +},
 +{
 +.error_fmt = QERR_SOCKET_BIND_FAILED,
 +.desc  = Fail to bind socket,
 +},
 +{
 +.error_fmt = QERR_SOCKET_CREATE_FAILED,
 +.desc  = Fail to create socket,
 +},
  {}
  };
  
 diff --git a/qerror.h b/qerror.h
 index 5c23c1f..4cbba48 100644
 --- a/qerror.h
 +++ b/qerror.h
 @@ -248,4 +248,19 @@ QError *qobject_to_qerror(const QObject *obj);
  #define QERR_VNC_SERVER_FAILED \
  { 'class': 'VNCServerFailed', 'data': { 'target': %s } }
  
 +#define QERR_SOCKET_CONNECT_IN_PROGRESS \
 +{ 'class': 'SockConnectInprogress', 'data': {} }
 +
 +#define QERR_SOCKET_CONNECT_FAILED \
 +{ 'class': 'SockConnectFailed', 'data': {} }
 +
 +#define QERR_SOCKET_LISTEN_FAILED \
 +{ 'class': 'SockListenFailed', 'data': {} }
 +
 +#define QERR_SOCKET_BIND_FAILED \
 +{ 'class': 'SockBindFailed', 'data': {} }
 +
 +#define QERR_SOCKET_CREATE_FAILED \
 +{ 'class': 'SockCreateFailed', 'data': {} }

 For the FAILED error we will probably need more data , how about adding a 
 string 
 that can contain the strerror string ?
 
 This was nack-ed in the past, and the numeric errno values are not
 portable.  However, as a follow up we could add a QAPI-specific enum
 (QEMU_ERRNO_EACCES, QEMU_ERRNO_ECONNREFUSED, etc.) and convert errno
 values to it.
 

I agree, it can be handled later. Migration error messages are very cryptic and 
we need to make them
more user friendly in general.
 





Re: [Qemu-devel] [RFC PATCH 0/2] Standard SD host controller model

2012-04-18 Thread Peter Maydell
On 18 April 2012 09:43, Igor Mitsyanko i.mitsya...@samsung.com wrote:
 First patch introduces standard SD host controller model. This is accumulated
 version of my previous patch I sent a while ago and a recent SDHCI patch by
 Peter A. G. Crosthwaite.
 Second patch introduces Exynos4210-specific SDHCI built on top of standard 
 SDHCI
 model.

I'd still like you and Peter Crosthwaite (and Vincent Palatin if he's around)
to agree on one version of hw/sdhci.c that you're both happy with. I'm not
going to spend time on detailed code review while we still have two
competing patchsets.

thanks
-- PMM



Re: [Qemu-devel] [RFC PATCH 2/3] iommu: Create basic group infrastructure and update AMD-Vi Intel VT-d

2012-04-18 Thread David Gibson
On Mon, Apr 02, 2012 at 03:14:46PM -0600, Alex Williamson wrote:
 IOMMU groups define the minimum granularity of the IOMMU.  We therefore
 create groups using a dma_dev which is the effective requestor ID for
 the entire group.  Additional devices can be added to groups based on
 system topology, IOMMU limitations, or device quirks.
 
 This implementation also includes a simple idr database for creating
 a flat address space of group IDs across multiple IOMMUs.  Updates
 included for Intel VT-d, using example iommu callbacks for adding and
 removing devices, as well as AMD-Vi, which tracks devices on it's own.
 We should be able to better integrate the iommu_group within existing
 AMD-Vi structs or provide a private data location within the iommu_group
 where we can eventually reduce redundancy.
 
 Signed-off-by: Alex Williamson alex.william...@redhat.com

Looks reasonable as far as it goes.  This still lacks an obvious means
for doing group assignment of devices on busses subordinate to devices
that are on iommu managed busses.  Which as we discussed earlier is a
bit of a can of worms, but necessary.

 ---
 
  drivers/iommu/amd_iommu.c   |   50 ++-
  drivers/iommu/intel-iommu.c |   76 +
  drivers/iommu/iommu.c   |  198 
 ++-
  include/linux/iommu.h   |   23 +
  4 files changed, 267 insertions(+), 80 deletions(-)
 
 diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
 index f75e060..876db28 100644
 --- a/drivers/iommu/amd_iommu.c
 +++ b/drivers/iommu/amd_iommu.c
 @@ -256,9 +256,10 @@ static bool check_device(struct device *dev)
  
  static int iommu_init_device(struct device *dev)
  {
 - struct pci_dev *pdev = to_pci_dev(dev);
 + struct pci_dev *dma_pdev, *pdev = to_pci_dev(dev);
   struct iommu_dev_data *dev_data;
   u16 alias;
 + int ret;
  
   if (dev-archdata.iommu)
   return 0;
 @@ -279,6 +280,26 @@ static int iommu_init_device(struct device *dev)
   return -ENOTSUPP;
   }
   dev_data-alias_data = alias_data;
 +
 + dma_pdev = pci_get_bus_and_slot(alias  8, alias  0xff);
 + } else
 + dma_pdev = pdev;
 +
 + /* dma_pdev = iommu_pci_quirk(dma_pdev) */

Presumably an actual implementation of the quirk is coming?  It might
be an idea for it to take both the individual and representative
devices, in case that information is useful.

 + if (!dma_pdev-dev.iommu_group) {
 + struct iommu_group *group;
 +
 + group = iommu_group_alloc(dma_pdev-dev);
 + if (IS_ERR(group))
 + return PTR_ERR(group);
 + }
 +
 + ret = iommu_group_add_device(dma_pdev-dev.iommu_group, dev);
 + if (ret) {
 + if (iommu_group_empty(dma_pdev-dev.iommu_group))
 + iommu_group_free(dma_pdev-dev.iommu_group);
 + return ret;

It might be nice to have generic helpers that do this kind of lifetime
handling of the groups, but that's a detail.

   }
  
   if (pci_iommuv2_capable(pdev)) {
 @@ -309,6 +330,12 @@ static void iommu_ignore_device(struct device *dev)
  
  static void iommu_uninit_device(struct device *dev)
  {
 + struct iommu_group *group = dev-iommu_group;
 +
 + iommu_group_remove_device(dev);
 + if (iommu_group_empty(group))
 + iommu_group_free(group);
 +
   /*
* Nothing to do here - we keep dev_data around for unplugged devices
* and reuse it when the device is re-plugged - not doing so would
 @@ -3191,26 +3218,6 @@ static int amd_iommu_domain_has_cap(struct 
 iommu_domain *domain,
   return 0;
  }
  
 -static int amd_iommu_device_group(struct device *dev, unsigned int *groupid)
 -{
 - struct iommu_dev_data *dev_data = dev-archdata.iommu;
 - struct pci_dev *pdev = to_pci_dev(dev);
 - u16 devid;
 -
 - if (!dev_data)
 - return -ENODEV;
 -
 - if (pdev-is_virtfn || !iommu_group_mf)
 - devid = dev_data-devid;
 - else
 - devid = calc_devid(pdev-bus-number,
 -PCI_DEVFN(PCI_SLOT(pdev-devfn), 0));
 -
 - *groupid = amd_iommu_alias_table[devid];
 -
 - return 0;
 -}
 -
  static struct iommu_ops amd_iommu_ops = {
   .domain_init = amd_iommu_domain_init,
   .domain_destroy = amd_iommu_domain_destroy,
 @@ -3220,7 +3227,6 @@ static struct iommu_ops amd_iommu_ops = {
   .unmap = amd_iommu_unmap,
   .iova_to_phys = amd_iommu_iova_to_phys,
   .domain_has_cap = amd_iommu_domain_has_cap,
 - .device_group = amd_iommu_device_group,
   .pgsize_bitmap  = AMD_IOMMU_PGSIZES,
  };
  
 diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
 index c9c6053..41ab7d0 100644
 --- a/drivers/iommu/intel-iommu.c
 +++ b/drivers/iommu/intel-iommu.c
 @@ -4075,54 +4075,59 @@ static int intel_iommu_domain_has_cap(struct 
 iommu_domain *domain,
   return 0;
  }
  
 

Re: [Qemu-devel] [PATCH 2/3] qtest: enable qtest for most targets

2012-04-18 Thread Anthony Liguori

On 04/18/2012 03:36 AM, Peter Maydell wrote:

On 17 April 2012 22:33, Anthony Liguorialigu...@us.ibm.com  wrote:

Kernel loading is a hack.  I'll go out on a limb and say that most non-x86
boards are doing it completely wrong.  Messing around with CPU state has no
business in machine init.  It creates horrible dependencies about RAM
initialization order and problems for reset/live migration.

The kernel should be presented as a virtual device (an emulated flash or
whatever) and there should be firmware that loads the kernel appropriately.
Then we wouldn't need changes like this in the first place.


Yeah, I tend to agree that that would be a cleaner approach, but so far
it hasn't been a requirement. Feel free to submit patches to fix kernel
loading for these boards if you want to be able to rely on this behaviour :-)


Well hopefully now we can agree that what Blue had to do with qtest isn't so 
bad :-)

Regards,

Anthony Liguori



-- PMM






Re: [Qemu-devel] [RFC PATCH 0/2] Standard SD host controller model

2012-04-18 Thread Igor Mitsyanko
Yes, I've sent it as RFC for exactly that reason, to discuss it with 
Peter and anyone else

who wants to participate, see if he'll agree or not.

On 04/18/2012 04:04 PM, Peter Maydell wrote:

On 18 April 2012 09:43, Igor Mitsyankoi.mitsya...@samsung.com  wrote:

First patch introduces standard SD host controller model. This is accumulated
version of my previous patch I sent a while ago and a recent SDHCI patch by
Peter A. G. Crosthwaite.
Second patch introduces Exynos4210-specific SDHCI built on top of standard SDHCI
model.

I'd still like you and Peter Crosthwaite (and Vincent Palatin if he's around)
to agree on one version of hw/sdhci.c that you're both happy with. I'm not
going to spend time on detailed code review while we still have two
competing patchsets.

thanks
-- PMM



--
Mitsyanko Igor
ASWG, Moscow RD center, Samsung Electronics
email: i.mitsya...@samsung.com




Re: [Qemu-devel] [PATCH 2/3] qtest: enable qtest for most targets

2012-04-18 Thread Andreas Färber
Am 18.04.2012 14:09, schrieb Anthony Liguori:
 On 04/18/2012 03:36 AM, Peter Maydell wrote:
 On 17 April 2012 22:33, Anthony Liguorialigu...@us.ibm.com  wrote:
 Kernel loading is a hack.  I'll go out on a limb and say that most
 non-x86
 boards are doing it completely wrong.  Messing around with CPU state
 has no
 business in machine init.  It creates horrible dependencies about RAM
 initialization order and problems for reset/live migration.

 The kernel should be presented as a virtual device (an emulated flash or
 whatever) and there should be firmware that loads the kernel
 appropriately.
 Then we wouldn't need changes like this in the first place.

 Yeah, I tend to agree that that would be a cleaner approach, but so far
 it hasn't been a requirement. Feel free to submit patches to fix kernel
 loading for these boards if you want to be able to rely on this
 behaviour :-)
 
 Well hopefully now we can agree that what Blue had to do with qtest
 isn't so bad :-)

What I find bad here is that this is a touch-all patch that's touching
multiple targets and lots of ARM boards at once (and that I'm not seeing
what 3/3 this might be good for). I'd suggest to break it down, then I
can integrate most of the target-* pieces into my pending QOM'ification
series with which this collides in cpu_*_init(). In the area of ARM we
don't strictly need to make all boards qtest'able at once, so Peter
could defer the decision to Calxeda and Samsung or some new ARM
bootloader framework if he sees the need, as maintainer.

Andreas

-- 
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg, Germany
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer; HRB 16746 AG Nürnberg



[Qemu-devel] [PATCH] qemu-io: Add command line switch for cache mode

2012-04-18 Thread Kevin Wolf
To be used as in 'qemu-io -t writeback test.img'

Signed-off-by: Kevin Wolf kw...@redhat.com
---
 qemu-io.c |   10 +-
 1 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/qemu-io.c b/qemu-io.c
index e6fcd77..3095a22 100644
--- a/qemu-io.c
+++ b/qemu-io.c
@@ -1784,6 +1784,7 @@ static void usage(const char *name)
   -g, --growable   allow file to grow (only applies to protocols)\n
   -m, --misalign   misalign allocations for O_DIRECT\n
   -k, --native-aio use kernel AIO implementation (on Linux only)\n
+  -t, --cache=MODE use the given cache mode for the image\n
   -T, --trace FILE enable trace events listed in the given file\n
   -h, --help   display this help and exit\n
   -V, --versionoutput version information and exit\n
@@ -1796,7 +1797,7 @@ int main(int argc, char **argv)
 {
 int readonly = 0;
 int growable = 0;
-const char *sopt = hVc:rsnmgkT:;
+const char *sopt = hVc:rsnmgkt:T:;
 const struct option lopt[] = {
 { help, 0, NULL, 'h' },
 { version, 0, NULL, 'V' },
@@ -1808,6 +1809,7 @@ int main(int argc, char **argv)
 { misalign, 0, NULL, 'm' },
 { growable, 0, NULL, 'g' },
 { native-aio, 0, NULL, 'k' },
+{ cache, 1, NULL, 't' },
 { trace, 1, NULL, 'T' },
 { NULL, 0, NULL, 0 }
 };
@@ -1840,6 +1842,12 @@ int main(int argc, char **argv)
 case 'k':
 flags |= BDRV_O_NATIVE_AIO;
 break;
+case 't':
+if (bdrv_parse_cache_flags(optarg, flags)  0) {
+error_report(Invalid cache option: %s, optarg);
+exit(1);
+}
+break;
 case 'T':
 if (!trace_backend_init(optarg, NULL)) {
 exit(1); /* error message will have been printed */
-- 
1.7.6.5




[Qemu-devel] SD card subsystem synchronous I/O

2012-04-18 Thread Stefan Hajnoczi
Recently there have been new SD card emulation patches so I want to
raise the issue of synchronous I/O while there is focus on the SD
subsystem.  Maybe some of the people who are improving the SD
subsystem will be able to help.

sd_blk_read() and sd_blk_write() use the synchronous block I/O
functions to read/write data on behalf of the guest.  Device emulation
runs in the vcpu thread with the QEMU global mutex held, and therefore
both the guest vcpu and QEMU's own monitor and VNC server are
unresponsive while bdrv_read()/bdrv_write() is blocked.

This makes bdrv_read()/bdrv_write() in device emulation code a
performance problem - the guest becomes unresponsive and laggy under
heavy I/O.  In extreme cases, like image files on NFS with a network
connectivity issue, it can affect the reliability of QEMU as a whole
because the monitor and VNC are unavailable until the I/O operation
completes.

Device emulation should use the bdrv_aio_readv()/bdrv_aio_writev()
functions so that control can return to the guest.  When the I/O
operation completes a callback function is invoked and the device
emulation can signal completion to the guest - usually by setting bits
in hardware registers and raising an interrupt.  The result is good
responsiveness and the monitor/VNC remain available even under heavy
I/O.

The challenge is how to convert hw/sd.c and possibly update emulated
SD controllers.  We need to stop assuming that a read/write operation
can be performed instantly and need to use a
bdrv_aio_readv()/bdrv_aio_writev() callback function to complete the
I/O.

Since I am not familiar with the SD specification or the hw/sd.c code
very well I want to check:

* Is anyone willing to convert the SD subsystem?

* Will it be possible to convert just hw/sd.c without affecting
emulated SD controllers?
  * If we're going to need to fix all controllers in addition to
hw/sd.c, then adding more controllers grows the problem.

Stefan



Re: [Qemu-devel] [PATCH v2 0/2] ide: convert pio code path to asynchronous I/O

2012-04-18 Thread Richard Davies
Stefan Hajnoczi wrote:
  Chris and Richard: Please test this to confirm that it fixes the hang you
  reported.
...
 Ping?

We never explicitly said, but yes v2 does fix the hang for us, like v1 did.

We are certainly +1 for this going into qemu 1.1.

Thanks,

Richard.



[Qemu-devel] [PATCH 00/12 v13] introducing a new, dedicated guest memory dump mechanism

2012-04-18 Thread Wen Congyang
Hi, all

'virsh dump' can not work when host pci device is used by guest. We have
discussed this issue here:
http://lists.nongnu.org/archive/html/qemu-devel/2011-10/msg00736.html

The last version is here:
http://lists.nongnu.org/archive/html/qemu-devel/2012-04/msg02440.html

We have determined to introduce a new command dump-guest-memory to dump
guest's memory. The core file's format is elf32 or elf64.

Note:
1. The guest should be x86 or x86_64. The other arch is not supported now.
2. If you use old gdb, gdb may crash. I use gdb-7.3.1, and it does not crash.
3. If the OS is in the second kernel, gdb may not work well, and crash can
   work by specifying '--machdep phys_addr=xxx' in the command line. The
   reason is that the second kernel will update the page table, and we can
   not get the page table for the first kernel.
4. The cpu's state is stored in QEMU note. You neet to modify crash to use
   it to calculate phys_base.
5. If the guest OS is 32 bit and the memory size is larger than 4G, the vmcore
   is elf64 format. You should use the gdb which is built with 
--enable-64-bit-bfd.

Changes from v12 to v13:
1. Support the fd that is is associated with a pipe, socket, or FIFO

Changes from v11 to v12:
1. rebase and resend

Changes from v10 to v11:
1. addressed Luiz's and Hatayam's comment
2. fix a bug about filtering feature

Changes from v9 to v10:
1. fix some bug
2. addressed Luiz's and Hatayam's comment
3. remove cancel and query command

Changes from v8 to v9:
1. remove async support(it will be reimplemented after QAPI async commands 
support
   is finished)
2. fix some typo error

Changes from v7 to v8:
1. addressed Hatayama's comments

Changes from v6 to v7:
1. addressed Jan's comments
2. fix some bugs
3. store cpu's state into the vmcore

Changes from v5 to v6:
1. allow user to dump a fraction of the memory
2. fix some bugs

Changes from v4 to v5:
1. convert the new command dump to QAPI 

Changes from v3 to v4:
1. support it to run asynchronously
2. add API to cancel dumping and query dumping progress
3. add API to control dumping speed
4. auto cancel dumping when the user resumes vm, and the status is failed.

Changes from v2 to v3:
1. address Jan Kiszka's comment

Changes from v1 to v2:
1. fix virt addr in the vmcore.

Wen Congyang (12):
  Add API to create memory mapping list
  Add API to check whether a physical address is I/O address
  implement cpu_get_memory_mapping()
  Add API to check whether paging mode is enabled
  Add API to get memory mapping
  Add API to get memory mapping without do paging
  target-i386: Add API to write elf notes to core file
  target-i386: Add API to write cpu status to core file
  target-i386: add API to get dump info
  target-i386: Add API to get note's size
  make gdb_id() generally avialable and rename it to cpu_index()
  introduce a new monitor command 'dump-guest-memory' to dump guest's
memory

 Makefile.target   |3 +
 configure |8 +
 cpu-all.h |   70 +++
 cpu-common.h  |2 +
 dump.c|  872 +
 dump.h|   23 +
 elf.h |5 +
 exec.c|9 +
 gdbstub.c |   19 +-
 gdbstub.h |9 +
 hmp-commands.hx   |   28 ++
 hmp.c |   22 +
 hmp.h |1 +
 memory_mapping.c  |  249 +++
 memory_mapping.h  |   68 +++
 qapi-schema.json  |   33 ++
 qmp-commands.hx   |   38 ++
 target-i386/arch_dump.c   |  449 +++
 target-i386/arch_memory_mapping.c |  271 
 19 files changed, 2165 insertions(+), 14 deletions(-)
 create mode 100644 dump.c
 create mode 100644 dump.h
 create mode 100644 memory_mapping.c
 create mode 100644 memory_mapping.h
 create mode 100644 target-i386/arch_dump.c
 create mode 100644 target-i386/arch_memory_mapping.c







Re: [Qemu-devel] [PATCH RFC 2/6] target-i386: add cpu-model property to x86_cpu

2012-04-18 Thread Igor Mammedov

On 04/18/2012 12:03 PM, Andreas Färber wrote:

Am 17.04.2012 01:36, schrieb Igor Mammedov:

From: Igor Mammedovniall...@gmail.com

Signed-off-by: Igor Mammedovniall...@gmail.com
---
  cpu-defs.h   |2 +-
  hw/pc.c  |   10 --
  target-i386/cpu.c|   33 +
  target-i386/helper.c |   12 
  4 files changed, 42 insertions(+), 15 deletions(-)

diff --git a/cpu-defs.h b/cpu-defs.h
index 3268968..8f1caaf 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -221,7 +221,7 @@ typedef struct CPUWatchpoint {
  struct QemuCond *halt_cond; \
  int thread_kicked;  \
  struct qemu_work_item *queued_work_first, *queued_work_last;\
-const char *cpu_model_str;  \
+char *cpu_model_str;  \


Breaks alignment of \.


  struct KVMState *kvm_state; \
  struct kvm_run *kvm_run;\
  int kvm_fd; \
diff --git a/hw/pc.c b/hw/pc.c
index d2c122e..7f0de99 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -931,7 +931,6 @@ static CPUX86State *pc_new_cpu(const char *cpu_model)

  env = cpu_init(cpu_model);
  if (!env) {
-fprintf(stderr, Unable to find x86 CPU definition\n);
  exit(1);
  }
  if ((env-cpuid_features  CPUID_APIC) || smp_cpus  1) {
@@ -950,15 +949,6 @@ void pc_cpus_init(const char *cpu_model)
  {
  int i;

-/* init CPUs */
-if (cpu_model == NULL) {
-#ifdef TARGET_X86_64
-cpu_model = qemu64;
-#else
-cpu_model = qemu32;
-#endif
-}
-
  for(i = 0; i  smp_cpus; i++) {
  pc_new_cpu(cpu_model);
  }
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index e12c851..30ae0c2 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -29,6 +29,8 @@

  #include hyperv.h

+#include qerror.h
+
  /* feature flags taken from Intel Processor Identification and the CPUID
   * Instruction and AMD's CPUID Specification.  In cases of disagreement
   * between feature naming conventions, aliases may be added.
@@ -1468,6 +1470,27 @@ static void mce_init(X86CPU *cpu)
  }
  }

+static char *x86_get_cpu_model(Object *obj, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+CPUX86State *env =cpu-env;
+return g_strdup(env-cpu_model_str);
+}
+
+static void x86_set_cpu_model(Object *obj, const char *value, Error **errp)
+{
+X86CPU *cpu = X86_CPU(obj);
+CPUX86State *env =cpu-env;
+
+g_free((gpointer)env-cpu_model_str);
+env-cpu_model_str = g_strdup(value);
+
+if (cpu_x86_register(env, env-cpu_model_str)  0) {
+fprintf(stderr, Unable to find x86 CPU definition\n);
+error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
+}
+}


This will cease to work when we model CPU definitions as subclasses - in
that case we cannot change the type after instantiating it. No one
objected to that aspect of the RFC so far, so I was planning to post
that as part 3 of my series.

subclass will model only a fixed set of features, but we might wish to override
(add/remove) features provided by it. Now we can do it by adding +/-FEATURE
to cpu_model string. So we need somehow parse features list from
cpu_model string, we've got from cmdline



We might inspect the model string (typename plus read-only - question is
whether that is useful (cf. CPU feature flag modelling).

Andreas


+
  static void x86_cpu_initfn(Object *obj)
  {
  X86CPU *cpu = X86_CPU(obj);
@@ -1475,6 +1498,16 @@ static void x86_cpu_initfn(Object *obj)

  cpu_exec_init(env);
  env-cpuid_apic_id = env-cpu_index;
+
+object_property_add_str(obj, cpu-model,
+x86_get_cpu_model, x86_set_cpu_model, NULL);
+
+#ifdef TARGET_X86_64
+object_property_set_str(OBJECT(cpu), qemu64, cpu-model, NULL);
+#else
+object_property_set_str(OBJECT(cpu), qemu32, cpu-model, NULL);
+#endif
+
  mce_init(cpu);
  }

diff --git a/target-i386/helper.c b/target-i386/helper.c
index d92d3d4..df33d83 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1161,11 +1161,11 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
  {
  X86CPU *cpu;
  CPUX86State *env;
+Error *errp = NULL;
  static int inited;

  cpu = X86_CPU(object_new(TYPE_X86_CPU));
  env =cpu-env;
-env-cpu_model_str = cpu_model;

  /* init various static tables used in TCG mode */
  if (tcg_enabled()  !inited) {
@@ -1176,9 +1176,13 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
  cpu_set_debug_excp_handler(breakpoint_handler);
  #endif
  }
-if (cpu_x86_register(env, cpu_model)  0) {
-object_delete(OBJECT(cpu));
-return NULL;
+
+if (cpu_model) {
+object_property_set_str(OBJECT(cpu), cpu_model, cpu-model,errp);
+if (errp) {
+

[Qemu-devel] [PATCH 01/12 v13] Add API to create memory mapping list

2012-04-18 Thread Wen Congyang
The memory mapping list stores virtual address and physical address mapping.
The virtual address and physical address are contiguous in the mapping.
The folloing patch will use this information to create PT_LOAD in the vmcore.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 Makefile.target  |1 +
 memory_mapping.c |  166 ++
 memory_mapping.h |   47 +++
 3 files changed, 214 insertions(+), 0 deletions(-)
 create mode 100644 memory_mapping.c
 create mode 100644 memory_mapping.h

diff --git a/Makefile.target b/Makefile.target
index 84951a0..cefb762 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -226,6 +226,7 @@ obj-$(CONFIG_KVM) += kvm.o kvm-all.o
 obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-$(CONFIG_VGA) += vga.o
 obj-y += memory.o savevm.o
+obj-y += memory_mapping.o
 LIBS+=-lz
 
 obj-i386-$(CONFIG_KVM) += hyperv.o
diff --git a/memory_mapping.c b/memory_mapping.c
new file mode 100644
index 000..718f271
--- /dev/null
+++ b/memory_mapping.c
@@ -0,0 +1,166 @@
+/*
+ * QEMU memory mapping
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ * Wen Congyang we...@cn.fujitsu.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include cpu.h
+#include cpu-all.h
+#include memory_mapping.h
+
+static void memory_mapping_list_add_mapping_sorted(MemoryMappingList *list,
+   MemoryMapping *mapping)
+{
+MemoryMapping *p;
+
+QTAILQ_FOREACH(p, list-head, next) {
+if (p-phys_addr = mapping-phys_addr) {
+QTAILQ_INSERT_BEFORE(p, mapping, next);
+return;
+}
+}
+QTAILQ_INSERT_TAIL(list-head, mapping, next);
+}
+
+static void create_new_memory_mapping(MemoryMappingList *list,
+  target_phys_addr_t phys_addr,
+  target_phys_addr_t virt_addr,
+  ram_addr_t length)
+{
+MemoryMapping *memory_mapping;
+
+memory_mapping = g_malloc(sizeof(MemoryMapping));
+memory_mapping-phys_addr = phys_addr;
+memory_mapping-virt_addr = virt_addr;
+memory_mapping-length = length;
+list-last_mapping = memory_mapping;
+list-num++;
+memory_mapping_list_add_mapping_sorted(list, memory_mapping);
+}
+
+static inline bool mapping_contiguous(MemoryMapping *map,
+  target_phys_addr_t phys_addr,
+  target_phys_addr_t virt_addr)
+{
+return phys_addr == map-phys_addr + map-length 
+   virt_addr == map-virt_addr + map-length;
+}
+
+/*
+ * [map-phys_addr, map-phys_addr + map-length) and
+ * [phys_addr, phys_addr + length) have intersection?
+ */
+static inline bool mapping_have_same_region(MemoryMapping *map,
+target_phys_addr_t phys_addr,
+ram_addr_t length)
+{
+return !(phys_addr + length  map-phys_addr ||
+ phys_addr = map-phys_addr + map-length);
+}
+
+/*
+ * [map-phys_addr, map-phys_addr + map-length) and
+ * [phys_addr, phys_addr + length) have intersection. The virtual address in 
the
+ * intersection are the same?
+ */
+static inline bool mapping_conflict(MemoryMapping *map,
+target_phys_addr_t phys_addr,
+target_phys_addr_t virt_addr)
+{
+return virt_addr - map-virt_addr != phys_addr - map-phys_addr;
+}
+
+/*
+ * [map-virt_addr, map-virt_addr + map-length) and
+ * [virt_addr, virt_addr + length) have intersection. And the physical address
+ * in the intersection are the same.
+ */
+static inline void mapping_merge(MemoryMapping *map,
+ target_phys_addr_t virt_addr,
+ ram_addr_t length)
+{
+if (virt_addr  map-virt_addr) {
+map-length += map-virt_addr - virt_addr;
+map-virt_addr = virt_addr;
+}
+
+if ((virt_addr + length) 
+(map-virt_addr + map-length)) {
+map-length = virt_addr + length - map-virt_addr;
+}
+}
+
+void memory_mapping_list_add_merge_sorted(MemoryMappingList *list,
+  target_phys_addr_t phys_addr,
+  target_phys_addr_t virt_addr,
+  ram_addr_t length)
+{
+MemoryMapping *memory_mapping, *last_mapping;
+
+if (QTAILQ_EMPTY(list-head)) {
+create_new_memory_mapping(list, phys_addr, virt_addr, length);
+return;
+}
+
+last_mapping = list-last_mapping;
+if (last_mapping) {
+if (mapping_contiguous(last_mapping, phys_addr, virt_addr)) {
+last_mapping-length += length;
+return;
+}
+}
+
+QTAILQ_FOREACH(memory_mapping, list-head, next) {
+if 

[Qemu-devel] [PATCH 02/12 v13] Add API to check whether a physical address is I/O address

2012-04-18 Thread Wen Congyang
This API will be used in the following patch.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 cpu-common.h |2 ++
 exec.c   |9 +
 2 files changed, 11 insertions(+), 0 deletions(-)

diff --git a/cpu-common.h b/cpu-common.h
index dca5175..fcd50dc 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -71,6 +71,8 @@ void cpu_physical_memory_unmap(void *buffer, 
target_phys_addr_t len,
 void *cpu_register_map_client(void *opaque, void (*callback)(void *opaque));
 void cpu_unregister_map_client(void *cookie);
 
+bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr);
+
 /* Coalesced MMIO regions are areas where write operations can be reordered.
  * This usually implies that write operations are side-effect free.  This 
allows
  * batching which can make a major impact on performance when using
diff --git a/exec.c b/exec.c
index 77d6866..fe00b76 100644
--- a/exec.c
+++ b/exec.c
@@ -4662,3 +4662,12 @@ bool virtio_is_big_endian(void)
 #undef env
 
 #endif
+
+bool cpu_physical_memory_is_io(target_phys_addr_t phys_addr)
+{
+MemoryRegionSection *section;
+
+section = phys_page_find(phys_addr  TARGET_PAGE_BITS);
+
+return !is_ram_rom_romd(section);
+}
-- 
1.7.1




[Qemu-devel] [PATCH 03/12 v13] implement cpu_get_memory_mapping()

2012-04-18 Thread Wen Congyang
Walk cpu's page table and collect all virtual address and physical address 
mapping.
Then, add these mapping into memory mapping list. If the guest does not use 
paging,
it will do nothing. Note: the I/O memory will be skipped.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 Makefile.target   |1 +
 configure |4 +
 cpu-all.h |   11 ++
 target-i386/arch_memory_mapping.c |  266 +
 4 files changed, 282 insertions(+), 0 deletions(-)
 create mode 100644 target-i386/arch_memory_mapping.c

diff --git a/Makefile.target b/Makefile.target
index cefb762..b41d663 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -89,6 +89,7 @@ libobj-y += helper.o
 ifeq ($(TARGET_BASE_ARCH), i386)
 libobj-y += cpu.o
 endif
+libobj-$(CONFIG_HAVE_GET_MEMORY_MAPPING) += arch_memory_mapping.o
 libobj-$(TARGET_SPARC64) += vis_helper.o
 libobj-$(CONFIG_NEED_MMU) += mmu.o
 libobj-$(TARGET_ARM) += neon_helper.o iwmmxt_helper.o
diff --git a/configure b/configure
index 2d62d12..d706200 100755
--- a/configure
+++ b/configure
@@ -3684,6 +3684,10 @@ case $target_arch2 in
   fi
 fi
 esac
+case $target_arch2 in
+  i386|x86_64)
+echo CONFIG_HAVE_GET_MEMORY_MAPPING=y  $config_target_mak
+esac
 if test $target_arch2 = ppc64 -a $fdt = yes; then
   echo CONFIG_PSERIES=y  $config_target_mak
 fi
diff --git a/cpu-all.h b/cpu-all.h
index f7d6867..bb43548 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -22,6 +22,7 @@
 #include qemu-common.h
 #include qemu-tls.h
 #include cpu-common.h
+#include memory_mapping.h
 
 /* some important defines:
  *
@@ -526,4 +527,14 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
 uint8_t *buf, int len, int is_write);
 
+#if defined(CONFIG_HAVE_GET_MEMORY_MAPPING)
+int cpu_get_memory_mapping(MemoryMappingList *list, CPUArchState *env);
+#else
+static inline int cpu_get_memory_mapping(MemoryMappingList *list,
+ CPUArchState *env)
+{
+return -1;
+}
+#endif
+
 #endif /* CPU_ALL_H */
diff --git a/target-i386/arch_memory_mapping.c 
b/target-i386/arch_memory_mapping.c
new file mode 100644
index 000..dd64bec
--- /dev/null
+++ b/target-i386/arch_memory_mapping.c
@@ -0,0 +1,266 @@
+/*
+ * i386 memory mapping
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ * Wen Congyang we...@cn.fujitsu.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include cpu.h
+#include cpu-all.h
+
+/* PAE Paging or IA-32e Paging */
+static void walk_pte(MemoryMappingList *list, target_phys_addr_t 
pte_start_addr,
+ int32_t a20_mask, target_ulong start_line_addr)
+{
+target_phys_addr_t pte_addr, start_paddr;
+uint64_t pte;
+target_ulong start_vaddr;
+int i;
+
+for (i = 0; i  512; i++) {
+pte_addr = (pte_start_addr + i * 8)  a20_mask;
+pte = ldq_phys(pte_addr);
+if (!(pte  PG_PRESENT_MASK)) {
+/* not present */
+continue;
+}
+
+start_paddr = (pte  ~0xfff)  ~(0x1ULL  63);
+if (cpu_physical_memory_is_io(start_paddr)) {
+/* I/O region */
+continue;
+}
+
+start_vaddr = start_line_addr | ((i  0x1fff)  12);
+memory_mapping_list_add_merge_sorted(list, start_paddr,
+ start_vaddr, 1  12);
+}
+}
+
+/* 32-bit Paging */
+static void walk_pte2(MemoryMappingList *list,
+  target_phys_addr_t pte_start_addr, int32_t a20_mask,
+  target_ulong start_line_addr)
+{
+target_phys_addr_t pte_addr, start_paddr;
+uint32_t pte;
+target_ulong start_vaddr;
+int i;
+
+for (i = 0; i  1024; i++) {
+pte_addr = (pte_start_addr + i * 4)  a20_mask;
+pte = ldl_phys(pte_addr);
+if (!(pte  PG_PRESENT_MASK)) {
+/* not present */
+continue;
+}
+
+start_paddr = pte  ~0xfff;
+if (cpu_physical_memory_is_io(start_paddr)) {
+/* I/O region */
+continue;
+}
+
+start_vaddr = start_line_addr | ((i  0x3ff)  12);
+memory_mapping_list_add_merge_sorted(list, start_paddr,
+ start_vaddr, 1  12);
+}
+}
+
+/* PAE Paging or IA-32e Paging */
+static void walk_pde(MemoryMappingList *list, target_phys_addr_t 
pde_start_addr,
+ int32_t a20_mask, target_ulong start_line_addr)
+{
+target_phys_addr_t pde_addr, pte_start_addr, start_paddr;
+uint64_t pde;
+target_ulong line_addr, start_vaddr;
+int i;
+
+for (i = 0; i  512; i++) {
+pde_addr = (pde_start_addr + i * 8)  a20_mask;
+pde = ldq_phys(pde_addr);
+if (!(pde  PG_PRESENT_MASK)) {
+/* not present 

[Qemu-devel] [PATCH 04/12 v13] Add API to check whether paging mode is enabled

2012-04-18 Thread Wen Congyang
This API will be used in the following patch.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 cpu-all.h |6 ++
 target-i386/arch_memory_mapping.c |7 ++-
 2 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index bb43548..eb35415 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -529,12 +529,18 @@ int cpu_memory_rw_debug(CPUArchState *env, target_ulong 
addr,
 
 #if defined(CONFIG_HAVE_GET_MEMORY_MAPPING)
 int cpu_get_memory_mapping(MemoryMappingList *list, CPUArchState *env);
+bool cpu_paging_enabled(CPUArchState *env);
 #else
 static inline int cpu_get_memory_mapping(MemoryMappingList *list,
  CPUArchState *env)
 {
 return -1;
 }
+
+static inline bool cpu_paging_enabled(CPUArchState *env)
+{
+return true;
+}
 #endif
 
 #endif /* CPU_ALL_H */
diff --git a/target-i386/arch_memory_mapping.c 
b/target-i386/arch_memory_mapping.c
index dd64bec..bd50e11 100644
--- a/target-i386/arch_memory_mapping.c
+++ b/target-i386/arch_memory_mapping.c
@@ -233,7 +233,7 @@ static void walk_pml4e(MemoryMappingList *list,
 
 int cpu_get_memory_mapping(MemoryMappingList *list, CPUArchState *env)
 {
-if (!(env-cr[0]  CR0_PG_MASK)) {
+if (!cpu_paging_enabled(env)) {
 /* paging is disabled */
 return 0;
 }
@@ -264,3 +264,8 @@ int cpu_get_memory_mapping(MemoryMappingList *list, 
CPUArchState *env)
 
 return 0;
 }
+
+bool cpu_paging_enabled(CPUArchState *env)
+{
+return env-cr[0]  CR0_PG_MASK;
+}
-- 
1.7.1




[Qemu-devel] [PATCH 05/12 v13] Add API to get memory mapping

2012-04-18 Thread Wen Congyang
Add API to get all virtual address and physical address mapping.
If the guest doesn't use paging, the virtual address is equal to the phyical
address. The virtual address and physical address mapping is for gdb's user, and
it does not include the memory that is not referenced by the page table. So if
you want to use crash to anaylze the vmcore, please do not specify -p option.
the reason why the -p option is not default explicitly: guest machine in a
catastrophic state can have corrupted memory, which we cannot trust.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 memory_mapping.c |   47 +++
 memory_mapping.h |   15 +++
 2 files changed, 62 insertions(+), 0 deletions(-)

diff --git a/memory_mapping.c b/memory_mapping.c
index 718f271..627397a 100644
--- a/memory_mapping.c
+++ b/memory_mapping.c
@@ -164,3 +164,50 @@ void memory_mapping_list_init(MemoryMappingList *list)
 list-last_mapping = NULL;
 QTAILQ_INIT(list-head);
 }
+
+#if defined(CONFIG_HAVE_GET_MEMORY_MAPPING)
+
+static CPUArchState *find_paging_enabled_cpu(CPUArchState *start_cpu)
+{
+CPUArchState *env;
+
+for (env = start_cpu; env != NULL; env = env-next_cpu) {
+if (cpu_paging_enabled(env)) {
+return env;
+}
+}
+
+return NULL;
+}
+
+int qemu_get_guest_memory_mapping(MemoryMappingList *list)
+{
+CPUArchState *env, *first_paging_enabled_cpu;
+RAMBlock *block;
+ram_addr_t offset, length;
+int ret;
+
+first_paging_enabled_cpu = find_paging_enabled_cpu(first_cpu);
+if (first_paging_enabled_cpu) {
+for (env = first_paging_enabled_cpu; env != NULL; env = env-next_cpu) 
{
+ret = cpu_get_memory_mapping(list, env);
+if (ret  0) {
+return -1;
+}
+}
+return 0;
+}
+
+/*
+ * If the guest doesn't use paging, the virtual address is equal to 
physical
+ * address.
+ */
+QLIST_FOREACH(block, ram_list.blocks, next) {
+offset = block-offset;
+length = block-length;
+create_new_memory_mapping(list, offset, offset, length);
+}
+
+return 0;
+}
+#endif
diff --git a/memory_mapping.h b/memory_mapping.h
index 836b047..4d44641 100644
--- a/memory_mapping.h
+++ b/memory_mapping.h
@@ -44,4 +44,19 @@ void memory_mapping_list_free(MemoryMappingList *list);
 
 void memory_mapping_list_init(MemoryMappingList *list);
 
+/*
+ * Return value:
+ *0: success
+ *   -1: failed
+ *   -2: unsupported
+ */
+#if defined(CONFIG_HAVE_GET_MEMORY_MAPPING)
+int qemu_get_guest_memory_mapping(MemoryMappingList *list);
+#else
+static inline int qemu_get_guest_memory_mapping(MemoryMappingList *list)
+{
+return -2;
+}
+#endif
+
 #endif
-- 
1.7.1




[Qemu-devel] [PATCH 06/12 v13] Add API to get memory mapping without do paging

2012-04-18 Thread Wen Congyang
crash does not need the virtual address and physical address mapping, and the
mapping does not include the memory that is not referenced by the page table.
crash does not use the virtual address, so we can create the mapping for all
physical memory(virtual address is always 0). This patch provides a API to do
this thing, and it will be used in the following patch.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 memory_mapping.c |9 +
 memory_mapping.h |3 +++
 2 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/memory_mapping.c b/memory_mapping.c
index 627397a..adb1595 100644
--- a/memory_mapping.c
+++ b/memory_mapping.c
@@ -211,3 +211,12 @@ int qemu_get_guest_memory_mapping(MemoryMappingList *list)
 return 0;
 }
 #endif
+
+void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list)
+{
+RAMBlock *block;
+
+QLIST_FOREACH(block, ram_list.blocks, next) {
+create_new_memory_mapping(list, block-offset, 0, block-length);
+}
+}
diff --git a/memory_mapping.h b/memory_mapping.h
index 4d44641..a583e44 100644
--- a/memory_mapping.h
+++ b/memory_mapping.h
@@ -59,4 +59,7 @@ static inline int 
qemu_get_guest_memory_mapping(MemoryMappingList *list)
 }
 #endif
 
+/* get guest's memory mapping without do paging(virtual address is 0). */
+void qemu_get_guest_simple_memory_mapping(MemoryMappingList *list);
+
 #endif
-- 
1.7.1




Re: [Qemu-devel] [PATCH 15/18] qapi: implement support for variable argument list

2012-04-18 Thread Luiz Capitulino
On Wed, 18 Apr 2012 08:57:19 +0200
Paolo Bonzini pbonz...@redhat.com wrote:

 Il 17/04/2012 22:42, Luiz Capitulino ha scritto:
  On Tue, 17 Apr 2012 22:26:55 +0200
  Paolo Bonzini pbonz...@redhat.com wrote:
  
  Il 17/04/2012 21:36, Luiz Capitulino ha scritto:
  +switch(qobject_type(obj)) {
  +case QTYPE_QSTRING:
  +qstring_append(arglist,
  +   qstring_get_str(qobject_to_qstring(obj)));
  +break;
 
  Does this escape commas correctly?
  
  No, but does it have to? Does QemuOpts accept an option with a coma in it?
 
 Yes, ,, is parsed as ,.

The current code doesn't escape either... Either, it's because the user is
expected to it him/herself or we don't have any option that accepts a coma.

  It seems much easier to use no_gen and qemu_opts_from_qdict...  Then
  cmd_netdev_add can be
  
  netdev_add/del is expected to be a stable interface, so we can't use no_gen.
 
 You can have hmp_netdev_add and the no_gen qmp_netdev_add as front-ends
 for the QAPI cmd_netdev_add.  I think it's fair when we have to take
 into account backwards-compatibility.  The conversion gives correct
 error propagation, so even though QemuOpts still leaks it's a step in
 the right direction.

I thought Anthony had plans to replace QemuOpts with something else,
I think it was qcfg, but I might be wrong. Anthony?



[Qemu-devel] [PATCH 07/12 v13] target-i386: Add API to write elf notes to core file

2012-04-18 Thread Wen Congyang
The core file contains register's value. These APIs write registers to
core file, and them will be called in the following patch.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 Makefile.target |1 +
 configure   |4 +
 cpu-all.h   |   22 +
 target-i386/arch_dump.c |  233 +++
 4 files changed, 260 insertions(+), 0 deletions(-)
 create mode 100644 target-i386/arch_dump.c

diff --git a/Makefile.target b/Makefile.target
index b41d663..dc35266 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -228,6 +228,7 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-$(CONFIG_VGA) += vga.o
 obj-y += memory.o savevm.o
 obj-y += memory_mapping.o
+obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o
 LIBS+=-lz
 
 obj-i386-$(CONFIG_KVM) += hyperv.o
diff --git a/configure b/configure
index d706200..5d58d08 100755
--- a/configure
+++ b/configure
@@ -3703,6 +3703,10 @@ if test $target_softmmu = yes ; then
   if test $smartcard_nss = yes ; then
 echo subdir-$target: subdir-libcacard  $config_host_mak
   fi
+  case $target_arch2 in
+i386|x86_64)
+  echo CONFIG_HAVE_CORE_DUMP=y  $config_target_mak
+  esac
 fi
 if test $target_user_only = yes ; then
   echo CONFIG_USER_ONLY=y  $config_target_mak
diff --git a/cpu-all.h b/cpu-all.h
index eb35415..f23b578 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -543,4 +543,26 @@ static inline bool cpu_paging_enabled(CPUArchState *env)
 }
 #endif
 
+typedef int (*write_core_dump_function)(void *buf, size_t size, void *opaque);
+#if defined(CONFIG_HAVE_CORE_DUMP)
+int cpu_write_elf64_note(write_core_dump_function f, CPUArchState *env,
+ int cpuid, void *opaque);
+int cpu_write_elf32_note(write_core_dump_function f, CPUArchState *env,
+ int cpuid, void *opaque);
+#else
+static inline int cpu_write_elf64_note(write_core_dump_function f,
+   CPUArchState *env, int cpuid,
+   void *opaque)
+{
+return -1;
+}
+
+static inline int cpu_write_elf32_note(write_core_dump_function f,
+   CPUArchState *env, int cpuid,
+   void *opaque)
+{
+return -1;
+}
+#endif
+
 #endif /* CPU_ALL_H */
diff --git a/target-i386/arch_dump.c b/target-i386/arch_dump.c
new file mode 100644
index 000..d55c2ce
--- /dev/null
+++ b/target-i386/arch_dump.c
@@ -0,0 +1,233 @@
+/*
+ * i386 memory mapping
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ * Wen Congyang we...@cn.fujitsu.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include cpu.h
+#include cpu-all.h
+#include elf.h
+
+#ifdef TARGET_X86_64
+typedef struct {
+target_ulong r15, r14, r13, r12, rbp, rbx, r11, r10;
+target_ulong r9, r8, rax, rcx, rdx, rsi, rdi, orig_rax;
+target_ulong rip, cs, eflags;
+target_ulong rsp, ss;
+target_ulong fs_base, gs_base;
+target_ulong ds, es, fs, gs;
+} x86_64_user_regs_struct;
+
+typedef struct {
+char pad1[32];
+uint32_t pid;
+char pad2[76];
+x86_64_user_regs_struct regs;
+char pad3[8];
+} x86_64_elf_prstatus;
+
+static int x86_64_write_elf64_note(write_core_dump_function f,
+   CPUArchState *env, int id,
+   void *opaque)
+{
+x86_64_user_regs_struct regs;
+Elf64_Nhdr *note;
+char *buf;
+int descsz, note_size, name_size = 5;
+const char *name = CORE;
+int ret;
+
+regs.r15 = env-regs[15];
+regs.r14 = env-regs[14];
+regs.r13 = env-regs[13];
+regs.r12 = env-regs[12];
+regs.r11 = env-regs[11];
+regs.r10 = env-regs[10];
+regs.r9  = env-regs[9];
+regs.r8  = env-regs[8];
+regs.rbp = env-regs[R_EBP];
+regs.rsp = env-regs[R_ESP];
+regs.rdi = env-regs[R_EDI];
+regs.rsi = env-regs[R_ESI];
+regs.rdx = env-regs[R_EDX];
+regs.rcx = env-regs[R_ECX];
+regs.rbx = env-regs[R_EBX];
+regs.rax = env-regs[R_EAX];
+regs.rip = env-eip;
+regs.eflags = env-eflags;
+
+regs.orig_rax = 0; /* FIXME */
+regs.cs = env-segs[R_CS].selector;
+regs.ss = env-segs[R_SS].selector;
+regs.fs_base = env-segs[R_FS].base;
+regs.gs_base = env-segs[R_GS].base;
+regs.ds = env-segs[R_DS].selector;
+regs.es = env-segs[R_ES].selector;
+regs.fs = env-segs[R_FS].selector;
+regs.gs = env-segs[R_GS].selector;
+
+descsz = sizeof(x86_64_elf_prstatus);
+note_size = ((sizeof(Elf64_Nhdr) + 3) / 4 + (name_size + 3) / 4 +
+(descsz + 3) / 4) * 4;
+note = g_malloc(note_size);
+
+memset(note, 0, note_size);
+note-n_namesz = cpu_to_le32(name_size);
+note-n_descsz = cpu_to_le32(descsz);
+note-n_type = cpu_to_le32(NT_PRSTATUS);
+buf = (char *)note;
+buf += ((sizeof(Elf64_Nhdr) + 3) / 4) * 4;
+memcpy(buf, name, 

[Qemu-devel] [PATCH 08/12 v13] target-i386: Add API to write cpu status to core file

2012-04-18 Thread Wen Congyang
The core file has register's value. But it does not include all registers value.
Store the cpu status into QEMU note, and the user can get more information
from vmcore. If you change QEMUCPUState, please count up QEMUCPUSTATE_VERSION.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 cpu-all.h   |   18 ++
 target-i386/arch_dump.c |  149 +++
 2 files changed, 167 insertions(+), 0 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index f23b578..0ea1f7f 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -549,6 +549,10 @@ int cpu_write_elf64_note(write_core_dump_function f, 
CPUArchState *env,
  int cpuid, void *opaque);
 int cpu_write_elf32_note(write_core_dump_function f, CPUArchState *env,
  int cpuid, void *opaque);
+int cpu_write_elf64_qemunote(write_core_dump_function f, CPUArchState *env,
+ void *opaque);
+int cpu_write_elf32_qemunote(write_core_dump_function f, CPUArchState *env,
+ void *opaque);
 #else
 static inline int cpu_write_elf64_note(write_core_dump_function f,
CPUArchState *env, int cpuid,
@@ -563,6 +567,20 @@ static inline int 
cpu_write_elf32_note(write_core_dump_function f,
 {
 return -1;
 }
+
+static inline int cpu_write_elf64_qemunote(write_core_dump_function f,
+   CPUArchState *env,
+   void *opaque);
+{
+return -1;
+}
+
+static inline int cpu_write_elf32_qemunote(write_core_dump_function f,
+   CPUArchState *env,
+   void *opaque)
+{
+return -1;
+}
 #endif
 
 #endif /* CPU_ALL_H */
diff --git a/target-i386/arch_dump.c b/target-i386/arch_dump.c
index d55c2ce..ddbe20c 100644
--- a/target-i386/arch_dump.c
+++ b/target-i386/arch_dump.c
@@ -231,3 +231,152 @@ int cpu_write_elf32_note(write_core_dump_function f, 
CPUArchState *env,
 
 return 0;
 }
+
+/*
+ * please count up QEMUCPUSTATE_VERSION if you have changed definition of
+ * QEMUCPUState, and modify the tools using this information accordingly.
+ */
+#define QEMUCPUSTATE_VERSION (1)
+
+struct QEMUCPUSegment {
+uint32_t selector;
+uint32_t limit;
+uint32_t flags;
+uint32_t pad;
+uint64_t base;
+};
+
+typedef struct QEMUCPUSegment QEMUCPUSegment;
+
+struct QEMUCPUState {
+uint32_t version;
+uint32_t size;
+uint64_t rax, rbx, rcx, rdx, rsi, rdi, rsp, rbp;
+uint64_t r8, r9, r10, r11, r12, r13, r14, r15;
+uint64_t rip, rflags;
+QEMUCPUSegment cs, ds, es, fs, gs, ss;
+QEMUCPUSegment ldt, tr, gdt, idt;
+uint64_t cr[5];
+};
+
+typedef struct QEMUCPUState QEMUCPUState;
+
+static void copy_segment(QEMUCPUSegment *d, SegmentCache *s)
+{
+d-pad = 0;
+d-selector = s-selector;
+d-limit = s-limit;
+d-flags = s-flags;
+d-base = s-base;
+}
+
+static void qemu_get_cpustate(QEMUCPUState *s, CPUArchState *env)
+{
+memset(s, 0, sizeof(QEMUCPUState));
+
+s-version = QEMUCPUSTATE_VERSION;
+s-size = sizeof(QEMUCPUState);
+
+s-rax = env-regs[R_EAX];
+s-rbx = env-regs[R_EBX];
+s-rcx = env-regs[R_ECX];
+s-rdx = env-regs[R_EDX];
+s-rsi = env-regs[R_ESI];
+s-rdi = env-regs[R_EDI];
+s-rsp = env-regs[R_ESP];
+s-rbp = env-regs[R_EBP];
+#ifdef TARGET_X86_64
+s-r8  = env-regs[8];
+s-r9  = env-regs[9];
+s-r10 = env-regs[10];
+s-r11 = env-regs[11];
+s-r12 = env-regs[12];
+s-r13 = env-regs[13];
+s-r14 = env-regs[14];
+s-r15 = env-regs[15];
+#endif
+s-rip = env-eip;
+s-rflags = env-eflags;
+
+copy_segment(s-cs, env-segs[R_CS]);
+copy_segment(s-ds, env-segs[R_DS]);
+copy_segment(s-es, env-segs[R_ES]);
+copy_segment(s-fs, env-segs[R_FS]);
+copy_segment(s-gs, env-segs[R_GS]);
+copy_segment(s-ss, env-segs[R_SS]);
+copy_segment(s-ldt, env-ldt);
+copy_segment(s-tr, env-tr);
+copy_segment(s-gdt, env-gdt);
+copy_segment(s-idt, env-idt);
+
+s-cr[0] = env-cr[0];
+s-cr[1] = env-cr[1];
+s-cr[2] = env-cr[2];
+s-cr[3] = env-cr[3];
+s-cr[4] = env-cr[4];
+}
+
+static inline int cpu_write_qemu_note(write_core_dump_function f,
+  CPUArchState *env,
+  void *opaque,
+  int type)
+{
+QEMUCPUState state;
+Elf64_Nhdr *note64;
+Elf32_Nhdr *note32;
+void *note;
+char *buf;
+int descsz, note_size, name_size = 5, note_head_size;
+const char *name = QEMU;
+int ret;
+
+qemu_get_cpustate(state, env);
+
+descsz = sizeof(state);
+if (type == 0) {
+note_head_size = sizeof(Elf32_Nhdr);
+} else {
+note_head_size = sizeof(Elf64_Nhdr);
+}
+note_size = ((note_head_size + 3) / 4 + (name_size + 3) / 4 +
+(descsz + 3) / 4) * 4;
+note = 

[Qemu-devel] [PATCH 09/12 v13] target-i386: add API to get dump info

2012-04-18 Thread Wen Congyang
Dump info contains: endian, class and architecture. The next
patch will use these information to create vmcore. Note: on
x86 box, the  class is ELFCLASS64 if the memory is larger than 4G.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 cpu-all.h   |7 +++
 dump.h  |   23 +++
 target-i386/arch_dump.c |   34 ++
 3 files changed, 64 insertions(+), 0 deletions(-)
 create mode 100644 dump.h

diff --git a/cpu-all.h b/cpu-all.h
index 0ea1f7f..1a1a06d 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -23,6 +23,7 @@
 #include qemu-tls.h
 #include cpu-common.h
 #include memory_mapping.h
+#include dump.h
 
 /* some important defines:
  *
@@ -553,6 +554,7 @@ int cpu_write_elf64_qemunote(write_core_dump_function f, 
CPUArchState *env,
  void *opaque);
 int cpu_write_elf32_qemunote(write_core_dump_function f, CPUArchState *env,
  void *opaque);
+int cpu_get_dump_info(ArchDumpInfo *info);
 #else
 static inline int cpu_write_elf64_note(write_core_dump_function f,
CPUArchState *env, int cpuid,
@@ -581,6 +583,11 @@ static inline int 
cpu_write_elf32_qemunote(write_core_dump_function f,
 {
 return -1;
 }
+
+static inline int cpu_get_dump_info(ArchDumpInfo *info)
+{
+return -1;
+}
 #endif
 
 #endif /* CPU_ALL_H */
diff --git a/dump.h b/dump.h
new file mode 100644
index 000..28340cf
--- /dev/null
+++ b/dump.h
@@ -0,0 +1,23 @@
+/*
+ * QEMU dump
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ * Wen Congyang we...@cn.fujitsu.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef DUMP_H
+#define DUMP_H
+
+typedef struct ArchDumpInfo {
+int d_machine;  /* Architecture */
+int d_endian;   /* ELFDATA2LSB or ELFDATA2MSB */
+int d_class;/* ELFCLASS32 or ELFCLASS64 */
+} ArchDumpInfo;
+
+#endif
diff --git a/target-i386/arch_dump.c b/target-i386/arch_dump.c
index ddbe20c..e378579 100644
--- a/target-i386/arch_dump.c
+++ b/target-i386/arch_dump.c
@@ -13,6 +13,7 @@
 
 #include cpu.h
 #include cpu-all.h
+#include dump.h
 #include elf.h
 
 #ifdef TARGET_X86_64
@@ -380,3 +381,36 @@ int cpu_write_elf32_qemunote(write_core_dump_function f, 
CPUArchState *env,
 {
 return cpu_write_qemu_note(f, env, opaque, 0);
 }
+
+int cpu_get_dump_info(ArchDumpInfo *info)
+{
+bool lma = false;
+RAMBlock *block;
+
+#ifdef TARGET_X86_64
+lma = !!(first_cpu-hflags  HF_LMA_MASK);
+#endif
+
+if (lma) {
+info-d_machine = EM_X86_64;
+} else {
+info-d_machine = EM_386;
+}
+info-d_endian = ELFDATA2LSB;
+
+if (lma) {
+info-d_class = ELFCLASS64;
+} else {
+info-d_class = ELFCLASS32;
+
+QLIST_FOREACH(block, ram_list.blocks, next) {
+if (block-offset + block-length  UINT_MAX) {
+/* The memory size is greater than 4G */
+info-d_class = ELFCLASS64;
+break;
+}
+}
+}
+
+return 0;
+}
-- 
1.7.1



[Qemu-devel] [PATCH 10/12 v13] target-i386: Add API to get note's size

2012-04-18 Thread Wen Congyang
We should know where the note and memory is stored before writing
them to vmcore. If we know this, we can avoid using lseek() when
creating vmcore.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 cpu-all.h   |6 ++
 target-i386/arch_dump.c |   33 +
 2 files changed, 39 insertions(+), 0 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index 1a1a06d..5c9838d 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -555,6 +555,7 @@ int cpu_write_elf64_qemunote(write_core_dump_function f, 
CPUArchState *env,
 int cpu_write_elf32_qemunote(write_core_dump_function f, CPUArchState *env,
  void *opaque);
 int cpu_get_dump_info(ArchDumpInfo *info);
+size_t cpu_get_note_size(int class, int machine, int nr_cpus);
 #else
 static inline int cpu_write_elf64_note(write_core_dump_function f,
CPUArchState *env, int cpuid,
@@ -588,6 +589,11 @@ static inline int cpu_get_dump_info(ArchDumpInfo *info)
 {
 return -1;
 }
+
+static inline int cpu_get_note_size(int class, int machine, int nr_cpus)
+{
+return -1;
+}
 #endif
 
 #endif /* CPU_ALL_H */
diff --git a/target-i386/arch_dump.c b/target-i386/arch_dump.c
index e378579..135d855 100644
--- a/target-i386/arch_dump.c
+++ b/target-i386/arch_dump.c
@@ -414,3 +414,36 @@ int cpu_get_dump_info(ArchDumpInfo *info)
 
 return 0;
 }
+
+size_t cpu_get_note_size(int class, int machine, int nr_cpus)
+{
+int name_size = 5; /* CORE or QEMU */
+size_t elf_note_size = 0;
+size_t qemu_note_size = 0;
+int elf_desc_size = 0;
+int qemu_desc_size = 0;
+int note_head_size;
+
+if (class == ELFCLASS32) {
+note_head_size = sizeof(Elf32_Nhdr);
+} else {
+note_head_size = sizeof(Elf64_Nhdr);
+}
+
+if (machine == EM_386) {
+elf_desc_size = sizeof(x86_elf_prstatus);
+}
+#ifdef TARGET_X86_64
+else {
+elf_desc_size = sizeof(x86_64_elf_prstatus);
+}
+#endif
+qemu_desc_size = sizeof(QEMUCPUState);
+
+elf_note_size = ((note_head_size + 3) / 4 + (name_size + 3) / 4 +
+ (elf_desc_size + 3) / 4) * 4;
+qemu_note_size = ((note_head_size + 3) / 4 + (name_size + 3) / 4 +
+  (qemu_desc_size + 3) / 4) * 4;
+
+return (elf_note_size + qemu_note_size) * nr_cpus;
+}
-- 
1.7.1




[Qemu-devel] [PATCH 11/12 v13] make gdb_id() generally avialable and rename it to cpu_index()

2012-04-18 Thread Wen Congyang
The following patch also needs this API, so make it generally avialable.
The function gdb_id() will not be used in gdbstub.c now, so its name is
not suitable, and rename it to cpu_index()

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 gdbstub.c |   19 +--
 gdbstub.h |9 +
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/gdbstub.c b/gdbstub.c
index 6a7e2c4..423ffec 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1938,21 +1938,12 @@ static void gdb_set_cpu_pc(GDBState *s, target_ulong pc)
 #endif
 }
 
-static inline int gdb_id(CPUArchState *env)
-{
-#if defined(CONFIG_USER_ONLY)  defined(CONFIG_USE_NPTL)
-return env-host_tid;
-#else
-return env-cpu_index + 1;
-#endif
-}
-
 static CPUArchState *find_cpu(uint32_t thread_id)
 {
 CPUArchState *env;
 
 for (env = first_cpu; env != NULL; env = env-next_cpu) {
-if (gdb_id(env) == thread_id) {
+if (cpu_index(env) == thread_id) {
 return env;
 }
 }
@@ -1980,7 +1971,7 @@ static int gdb_handle_packet(GDBState *s, const char 
*line_buf)
 case '?':
 /* TODO: Make this return the correct value for user-mode.  */
 snprintf(buf, sizeof(buf), T%02xthread:%02x;, GDB_SIGNAL_TRAP,
- gdb_id(s-c_cpu));
+ cpu_index(s-c_cpu));
 put_packet(s, buf);
 /* Remove all the breakpoints when this query is issued,
  * because gdb is doing and initial connect and the state
@@ -2275,7 +2266,7 @@ static int gdb_handle_packet(GDBState *s, const char 
*line_buf)
 } else if (strcmp(p,sThreadInfo) == 0) {
 report_cpuinfo:
 if (s-query_cpu) {
-snprintf(buf, sizeof(buf), m%x, gdb_id(s-query_cpu));
+snprintf(buf, sizeof(buf), m%x, cpu_index(s-query_cpu));
 put_packet(s, buf);
 s-query_cpu = s-query_cpu-next_cpu;
 } else
@@ -2423,7 +2414,7 @@ static void gdb_vm_state_change(void *opaque, int 
running, RunState state)
 }
 snprintf(buf, sizeof(buf),
  T%02xthread:%02x;%swatch: TARGET_FMT_lx ;,
- GDB_SIGNAL_TRAP, gdb_id(env), type,
+ GDB_SIGNAL_TRAP, cpu_index(env), type,
  env-watchpoint_hit-vaddr);
 env-watchpoint_hit = NULL;
 goto send_packet;
@@ -2456,7 +2447,7 @@ static void gdb_vm_state_change(void *opaque, int 
running, RunState state)
 ret = GDB_SIGNAL_UNKNOWN;
 break;
 }
-snprintf(buf, sizeof(buf), T%02xthread:%02x;, ret, gdb_id(env));
+snprintf(buf, sizeof(buf), T%02xthread:%02x;, ret, cpu_index(env));
 
 send_packet:
 put_packet(s, buf);
diff --git a/gdbstub.h b/gdbstub.h
index b44e275..668de66 100644
--- a/gdbstub.h
+++ b/gdbstub.h
@@ -30,6 +30,15 @@ void gdb_register_coprocessor(CPUArchState *env,
   gdb_reg_cb get_reg, gdb_reg_cb set_reg,
   int num_regs, const char *xml, int g_pos);
 
+static inline int cpu_index(CPUArchState *env)
+{
+#if defined(CONFIG_USER_ONLY)  defined(CONFIG_USE_NPTL)
+return env-host_tid;
+#else
+return env-cpu_index + 1;
+#endif
+}
+
 #endif
 
 #ifdef CONFIG_USER_ONLY
-- 
1.7.1




[Qemu-devel] [PATCH 12/12 v13] introduce a new monitor command 'dump-guest-memory' to dump guest's memory

2012-04-18 Thread Wen Congyang
The command's usage:
   dump [-p] protocol [begin] [length]
The supported protocol can be file or fd:
1. file: the protocol starts with file:, and the following string is
   the file's path.
2. fd: the protocol starts with fd:, and the following string is the
   fd's name.

Note:
  1. If you want to use gdb to process the core, please specify -p option.
 The reason why the -p option is not default is:
   a. guest machine in a catastrophic state can have corrupted memory,
  which we cannot trust.
   b. The guest machine can be in read-mode even if paging is enabled.
  For example: the guest machine uses ACPI to sleep, and ACPI sleep
  state goes in real-mode.
  2. If you don't want to dump all guest's memory, please specify the start
 physical address and the length.

Signed-off-by: Wen Congyang we...@cn.fujitsu.com
---
 Makefile.target  |2 +-
 dump.c   |  872 ++
 elf.h|5 +
 hmp-commands.hx  |   28 ++
 hmp.c|   22 ++
 hmp.h|1 +
 memory_mapping.c |   27 ++
 memory_mapping.h |3 +
 qapi-schema.json |   33 ++
 qmp-commands.hx  |   38 +++
 10 files changed, 1030 insertions(+), 1 deletions(-)
 create mode 100644 dump.c

diff --git a/Makefile.target b/Makefile.target
index dc35266..e810b52 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -228,7 +228,7 @@ obj-$(CONFIG_NO_KVM) += kvm-stub.o
 obj-$(CONFIG_VGA) += vga.o
 obj-y += memory.o savevm.o
 obj-y += memory_mapping.o
-obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o
+obj-$(CONFIG_HAVE_CORE_DUMP) += arch_dump.o dump.o
 LIBS+=-lz
 
 obj-i386-$(CONFIG_KVM) += hyperv.o
diff --git a/dump.c b/dump.c
new file mode 100644
index 000..635a0e5
--- /dev/null
+++ b/dump.c
@@ -0,0 +1,872 @@
+/*
+ * QEMU dump
+ *
+ * Copyright Fujitsu, Corp. 2011, 2012
+ *
+ * Authors:
+ * Wen Congyang we...@cn.fujitsu.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include qemu-common.h
+#include unistd.h
+#include elf.h
+#include sys/procfs.h
+#include glib.h
+#include cpu.h
+#include cpu-all.h
+#include targphys.h
+#include monitor.h
+#include kvm.h
+#include dump.h
+#include sysemu.h
+#include bswap.h
+#include memory_mapping.h
+#include error.h
+#include qmp-commands.h
+#include gdbstub.h
+
+static uint16_t cpu_convert_to_target16(uint16_t val, int endian)
+{
+if (endian == ELFDATA2LSB) {
+val = cpu_to_le16(val);
+} else {
+val = cpu_to_be16(val);
+}
+
+return val;
+}
+
+static uint32_t cpu_convert_to_target32(uint32_t val, int endian)
+{
+if (endian == ELFDATA2LSB) {
+val = cpu_to_le32(val);
+} else {
+val = cpu_to_be32(val);
+}
+
+return val;
+}
+
+static uint64_t cpu_convert_to_target64(uint64_t val, int endian)
+{
+if (endian == ELFDATA2LSB) {
+val = cpu_to_le64(val);
+} else {
+val = cpu_to_be64(val);
+}
+
+return val;
+}
+
+typedef struct DumpState {
+ArchDumpInfo dump_info;
+MemoryMappingList list;
+uint16_t phdr_num;
+uint32_t sh_info;
+bool have_section;
+bool resume;
+size_t note_size;
+target_phys_addr_t memory_offset;
+int fd;
+
+RAMBlock *block;
+ram_addr_t start;
+bool has_filter;
+int64_t begin;
+int64_t length;
+Error **errp;
+} DumpState;
+
+static int dump_cleanup(DumpState *s)
+{
+int ret = 0;
+
+memory_mapping_list_free(s-list);
+if (s-fd != -1) {
+close(s-fd);
+}
+if (s-resume) {
+vm_start();
+}
+
+return ret;
+}
+
+static void dump_error(DumpState *s, const char *reason)
+{
+dump_cleanup(s);
+}
+
+static int fd_write_vmcore(void *buf, size_t size, void *opaque)
+{
+DumpState *s = opaque;
+int fd = s-fd;
+size_t writen_size;
+
+/* The fd may be passed from user, and it can be non-blocked */
+while (size) {
+writen_size = qemu_write_full(fd, buf, size);
+if (writen_size != size  errno != EAGAIN) {
+return -1;
+}
+
+buf += writen_size;
+size -= writen_size;
+}
+
+return 0;
+}
+
+static int write_elf64_header(DumpState *s)
+{
+Elf64_Ehdr elf_header;
+int ret;
+int endian = s-dump_info.d_endian;
+
+memset(elf_header, 0, sizeof(Elf64_Ehdr));
+memcpy(elf_header, ELFMAG, SELFMAG);
+elf_header.e_ident[EI_CLASS] = ELFCLASS64;
+elf_header.e_ident[EI_DATA] = s-dump_info.d_endian;
+elf_header.e_ident[EI_VERSION] = EV_CURRENT;
+elf_header.e_type = cpu_convert_to_target16(ET_CORE, endian);
+elf_header.e_machine = cpu_convert_to_target16(s-dump_info.d_machine,
+   endian);
+elf_header.e_version = cpu_convert_to_target32(EV_CURRENT, endian);
+elf_header.e_ehsize = cpu_convert_to_target16(sizeof(elf_header), endian);
+elf_header.e_phoff = 

Re: [Qemu-devel] [PATCH 00/12 v13] introducing a new, dedicated guest memory dump mechanism

2012-04-18 Thread Wen Congyang
At 04/18/2012 08:49 PM, Wen Congyang Wrote:
 Hi, all
 
 'virsh dump' can not work when host pci device is used by guest. We have
 discussed this issue here:
 http://lists.nongnu.org/archive/html/qemu-devel/2011-10/msg00736.html
 
 The last version is here:
 http://lists.nongnu.org/archive/html/qemu-devel/2012-04/msg02440.html
 
 We have determined to introduce a new command dump-guest-memory to dump
 guest's memory. The core file's format is elf32 or elf64.
 
 Note:
 1. The guest should be x86 or x86_64. The other arch is not supported now.
 2. If you use old gdb, gdb may crash. I use gdb-7.3.1, and it does not crash.
 3. If the OS is in the second kernel, gdb may not work well, and crash can
work by specifying '--machdep phys_addr=xxx' in the command line. The
reason is that the second kernel will update the page table, and we can
not get the page table for the first kernel.
 4. The cpu's state is stored in QEMU note. You neet to modify crash to use
it to calculate phys_base.
 5. If the guest OS is 32 bit and the memory size is larger than 4G, the vmcore
is elf64 format. You should use the gdb which is built with 
 --enable-64-bit-bfd.
 
 Changes from v12 to v13:
 1. Support the fd that is is associated with a pipe, socket, or FIFO

Hi, Luiz,

The master logic is not changed. I only update 4 patches:
patch7 and patch8: remove the offset argument from the API
patch11: a new patch, with this patch, I can get the note's size, so lseek()
 is unnecessary.
patch12: remove the offset argument from the some static functions
 split some functions
 add a function to write elf load
 adjust writing order to avoid using lseek()

I have test this patchset, and it works fine for me.

 
 Changes from v11 to v12:
 1. rebase and resend
 
 Changes from v10 to v11:
 1. addressed Luiz's and Hatayam's comment
 2. fix a bug about filtering feature
 
 Changes from v9 to v10:
 1. fix some bug
 2. addressed Luiz's and Hatayam's comment
 3. remove cancel and query command
 
 Changes from v8 to v9:
 1. remove async support(it will be reimplemented after QAPI async commands 
 support
is finished)
 2. fix some typo error
 
 Changes from v7 to v8:
 1. addressed Hatayama's comments
 
 Changes from v6 to v7:
 1. addressed Jan's comments
 2. fix some bugs
 3. store cpu's state into the vmcore
 
 Changes from v5 to v6:
 1. allow user to dump a fraction of the memory
 2. fix some bugs
 
 Changes from v4 to v5:
 1. convert the new command dump to QAPI 
 
 Changes from v3 to v4:
 1. support it to run asynchronously
 2. add API to cancel dumping and query dumping progress
 3. add API to control dumping speed
 4. auto cancel dumping when the user resumes vm, and the status is failed.
 
 Changes from v2 to v3:
 1. address Jan Kiszka's comment
 
 Changes from v1 to v2:
 1. fix virt addr in the vmcore.
 
 Wen Congyang (12):
   Add API to create memory mapping list
   Add API to check whether a physical address is I/O address
   implement cpu_get_memory_mapping()
   Add API to check whether paging mode is enabled
   Add API to get memory mapping
   Add API to get memory mapping without do paging
   target-i386: Add API to write elf notes to core file
   target-i386: Add API to write cpu status to core file
   target-i386: add API to get dump info
   target-i386: Add API to get note's size
   make gdb_id() generally avialable and rename it to cpu_index()
   introduce a new monitor command 'dump-guest-memory' to dump guest's
 memory
 
  Makefile.target   |3 +
  configure |8 +
  cpu-all.h |   70 +++
  cpu-common.h  |2 +
  dump.c|  872 
 +
  dump.h|   23 +
  elf.h |5 +
  exec.c|9 +
  gdbstub.c |   19 +-
  gdbstub.h |9 +
  hmp-commands.hx   |   28 ++
  hmp.c |   22 +
  hmp.h |1 +
  memory_mapping.c  |  249 +++
  memory_mapping.h  |   68 +++
  qapi-schema.json  |   33 ++
  qmp-commands.hx   |   38 ++
  target-i386/arch_dump.c   |  449 +++
  target-i386/arch_memory_mapping.c |  271 
  19 files changed, 2165 insertions(+), 14 deletions(-)
  create mode 100644 dump.c
  create mode 100644 dump.h
  create mode 100644 memory_mapping.c
  create mode 100644 memory_mapping.h
  create mode 100644 target-i386/arch_dump.c
  create mode 100644 target-i386/arch_memory_mapping.c
 
 
 
 
 
 




Re: [Qemu-devel] block-stream/drive-mirror and default bandwidth

2012-04-18 Thread Stefan Hajnoczi
On Tue, Apr 17, 2012 at 1:14 PM, Eric Blake ebl...@redhat.com wrote:
 On 04/17/2012 03:20 AM, Stefan Hajnoczi wrote:
 I think it's cleanest to support block-job-set-speed even when no job
 is running.  The speed will be used as the default value when a job is
 started.  This poses the question of what happens if the job does not
 do throttling or cannot support the value for some reason - does
 creation fail until block-job-set-speed is set to 0 or a valid value
 again, or do we allow it but silently perform no throttling?

 I'd prefer failure for any request for an out-of-range speed, and I like
 the idea of always letting the user set the speed, even when a job is
 not already running.  Am I correct that only one job can run at a time,
 and therefore, the speed can be a property associated with the
 BlockDevice as a whole, rather than only a property associated with each
 individual job?

Yes, the current design is 1 block job per device.

Stefan



Re: [Qemu-devel] [PATCH 1/2] [trivial] Generic elimination of auto-generated files

2012-04-18 Thread Stefan Hajnoczi
On Sat, Apr 14, 2012 at 12:19:03AM +0200, Lluís Vilanova wrote:
 - rm -f $(GENERATED_HEADERS)
 - rm -f $(GENERATED_SOURCES)
 + rm -f $(foreach f,$(GENERATED_HEADERS),$(f) $(f)-timestamp */$(f) 
 */$(f)-timestamp)
 + rm -f $(foreach f,$(GENERATED_SOURCES),$(f) $(f)-timestamp */$(f) 
 */$(f)-timestamp)

Why */$(f) */$(f)-timestamp?

Stefan



Re: [Qemu-devel] [PATCH 0/3] switch to seavgabios

2012-04-18 Thread Gerd Hoffmann
  Hi,

[ adding seabios list to Cc:, topic is the missing vesa 2.0 protected
  mode interface in seavgabios ]

 Pointer?
 I'd like to have a test case which breaks with the new vgabios.
 
 We talked with malc briefly on irc yesterday, and this is what
 he gave me:
 
 http://cvs.savannah.gnu.org/viewvc/vgabios/vbe.c?root=vgabiosr1=1.47r2=1.48
 
 this is not the test case but the missing support he's referring to.
 
 It appears the patch implements just 2 functions which both just does
 int10,

It isn't that simple.  Just invoking int10 from protected mode isn't
guaranteed to have the desired effect.  It certainly wouldn't work for
linux vesafb panning.  It might work for dos extenders, they might have
the idt entry for int10 and other bios interrupts setup accordingly to
do a real-mode - bios call - protected mode transition to simplify
porting dos code to the 32bit extender.  But even for that use case it
is IMHO pointless as the reason to have a 32bit interface is to avoid
the expensive real mode switch in the first place ...

The code has been changed later on, for good reasons, see
http://git.qemu.org/?p=vgabios.git;a=commitdiff;h=72c270d8091fb0f09e8291cc0e7154b075b921a9

 so should be easy to implement in seabios,

seavgabios has no 32bit code at all at the moment.  vesa pmi didn't seem
to be important enougth to change it.

seabios is a 16/32bit hybrid with some code being compiled twice for
both modes; dunno how reusable the seabios infrastructure is.  Unlike
seabios seavgabios has no fixed load address, which makes things a bit
more complicated when it comes to global variables I think.

At least for the bochs interface this wouldn't be a showstopper though.
Instead of using global variables to figure stuff like the active video
mode we can just read the bits we need from the bochs device registers.
 Costs some extra vmexits of course, and we wouldn't have code sharing
for 16+32bit paths.

cheers,
  Gerd



Re: [Qemu-devel] [PATCH 2/2] [trivial] Simpler handling of tracetool-generated files in makefiles

2012-04-18 Thread Stefan Hajnoczi
On Sat, Apr 14, 2012 at 12:19:08AM +0200, Lluís Vilanova wrote:
 Adds 'tracetool-gen' to generate files with tracetool into a temporary file, 
 and
 'tracetool-ci' to commit the generation from the temporaty file into the
 actual destination file if there were any changes in the produced file.
 
 Signed-off-by: Lluís Vilanova vilan...@ac.upc.edu
 ---
  Makefile.objs |   19 +--
  rules.mak |   17 +
  2 files changed, 26 insertions(+), 10 deletions(-)
 
 diff --git a/Makefile.objs b/Makefile.objs
 index 6d6f24d..b98e905 100644
 --- a/Makefile.objs
 +++ b/Makefile.objs
 @@ -373,18 +373,17 @@ libdis-$(CONFIG_LM32_DIS) += lm32-dis.o
  # trace
  
  ifeq ($(TRACE_BACKEND),dtrace)
 -trace.h: trace.h-timestamp trace-dtrace.h
 -else
 -trace.h: trace.h-timestamp
 +TRACE_H_EXTRA_DEPS=trace-dtrace.h
  endif
 +trace.h: trace.h-timestamp $(TRACE_H_EXTRA_DEPS)

I like this.

  trace.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
 - $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/tracetool.py 
 --format=h --backend=$(TRACE_BACKEND)  $  $@,  GEN   trace.h)
 - @cmp -s $@ trace.h || cp $@ trace.h
 + $(call tracetool-gen,h,$(TRACE_BACKEND))
 + $(call tracetool-ci)

Here I don't think it's worth introducing an abstraction.  While there
is a pattern I think the abstraction actually hides what is going on
rather than being useful.  The macros are hiding output generation, I
find that especially troubling because you can't really tell what is
going to happen.  It's clearer to leave these statements open coded.

Stefan



[Qemu-devel] [PATCH 0/3] block: more fixes to job cancellation

2012-04-18 Thread Paolo Bonzini
Kevin noted how the part of stream_run() that is after the for loop
runs with s-common.busy = false. At least bdrv_change_backing_file()
can yield, possibly other functions, too.

The race window is really small but it's there.  The patch takes a more
flexible implementation of block_job_cancel_sync from the mirroring
patches so that the race can be fixed easily.

Paolo Bonzini (3):
  block: allow interrupting a co_sleep_ns
  block: wait for job callback in block_job_cancel_sync
  block: mark streaming job busy at the end

 block.c|   41 +++--
 block/stream.c |8 
 block_int.h|   17 -
 qemu-coroutine-sleep.c |3 ++-
 4 files changed, 57 insertions(+), 12 deletions(-)

-- 
1.7.9.3




[Qemu-devel] [PATCH 3/3] block: mark streaming job busy at the end

2012-04-18 Thread Paolo Bonzini
Kevin noted how the part of stream_run() that is after the for loop runs with
s-common.busy = false. At least bdrv_change_backing_file() can yield,
possibly other functions, too.

The race window is really small but it's there.  Thanks to the new,
more flexible implementation of block_job_cancel_sync we can fix it
easily.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block/stream.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/block/stream.c b/block/stream.c
index d38f30a..ea3566d 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -149,6 +149,7 @@ retry:
 co_sleep_ns(rt_clock, 0);
 }
 
+s-common.busy = true;
 if (!base) {
 bdrv_disable_copy_on_read(bs);
 }
-- 
1.7.9.3




[Qemu-devel] [PATCH 1/3] block: allow interrupting a co_sleep_ns

2012-04-18 Thread Paolo Bonzini
In the next patch we want to reenter the coroutine from
block_job_cancel_sync and cancel the timer.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 qemu-coroutine-sleep.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/qemu-coroutine-sleep.c b/qemu-coroutine-sleep.c
index fd65274..d7083ee 100644
--- a/qemu-coroutine-sleep.c
+++ b/qemu-coroutine-sleep.c
@@ -23,7 +23,6 @@ static void co_sleep_cb(void *opaque)
 {
 CoSleepCB *sleep_cb = opaque;
 
-qemu_free_timer(sleep_cb-ts);
 qemu_coroutine_enter(sleep_cb-co, NULL);
 }
 
@@ -35,4 +34,6 @@ void coroutine_fn co_sleep_ns(QEMUClock *clock, int64_t ns)
 sleep_cb.ts = qemu_new_timer(clock, SCALE_NS, co_sleep_cb, sleep_cb);
 qemu_mod_timer(sleep_cb.ts, qemu_get_clock_ns(clock) + ns);
 qemu_coroutine_yield();
+qemu_del_timer(sleep_cb.ts);
+qemu_free_timer(sleep_cb.ts);
 }
-- 
1.7.9.3





[Qemu-devel] [PATCH 2/3] block: wait for job callback in block_job_cancel_sync

2012-04-18 Thread Paolo Bonzini
The limitation on not having I/O after cancellation cannot really be
held.  Even streaming has a very small race window where you could
cancel a job and have it report completion.  If this window is hit,
bdrv_change_backing_file() will yield and possibly cause accesses to
dangling pointers etc.

So, let's just assume that we cannot know exactly what will happen
after the coroutine has set busy to false.  We can set a very lax
condition:

- if we cancel the job, the coroutine won't set it to false again
(and hence will not call co_sleep_ns again).

- block_job_cancel_sync will wait for the coroutine to exit, which
pretty much ensures no race.

Instead, we put a very strict condition on what to do while busy =
false.  We track the coroutine that executes the job and reenter it
(thus cancelling a wait for example) before block_job_cancel restarts.
Thus you cannot really do anything but co_sleep_ns at that time.

This patch also drains the I/O *before* canceling the job, so that
block_job_cancel is quite sure to find the coroutine in quiescent
state (busy = false).  For mirroring, this means that the job will
complete itself with a consistent view of the disk.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 block.c|   41 +++--
 block/stream.c |7 +++
 block_int.h|   17 -
 3 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/block.c b/block.c
index 9c7d896..48f4414 100644
--- a/block.c
+++ b/block.c
@@ -4217,7 +4217,15 @@ int block_job_set_speed(BlockJob *job, int64_t value)
 
 void block_job_cancel(BlockJob *job)
 {
+/* Complete all guest I/O before cancelling the job, so that if the
+ * job chooses to complete itself it will do so with a consistent
+ * view of the disk.
+ */
+bdrv_drain_all();
 job-cancelled = true;
+if (job-co  !job-busy) {
+qemu_coroutine_enter(job-co, NULL);
+}
 }
 
 bool block_job_is_cancelled(BlockJob *job)
@@ -4225,13 +4233,42 @@ bool block_job_is_cancelled(BlockJob *job)
 return job-cancelled;
 }
 
-void block_job_cancel_sync(BlockJob *job)
+struct BlockCancelData {
+BlockJob *job;
+BlockDriverCompletionFunc *cb;
+void *opaque;
+bool cancelled;
+int ret;
+};
+
+static void block_job_cancel_cb(void *opaque, int ret)
+{
+struct BlockCancelData *data = opaque;
+
+data-cancelled = block_job_is_cancelled(data-job);
+data-ret = ret;
+data-cb(data-opaque, ret);
+}
+
+int block_job_cancel_sync(BlockJob *job)
 {
+struct BlockCancelData data;
 BlockDriverState *bs = job-bs;
 
 assert(bs-job == job);
+
+/* Set up our own callback to store the result and chain to
+ * the original callback.
+ */
+data.job = job;
+data.cb = job-cb;
+data.opaque = job-opaque;
+data.ret = -EINPROGRESS;
+job-cb = block_job_cancel_cb;
+job-opaque = data;
 block_job_cancel(job);
-while (bs-job != NULL  bs-job-busy) {
+while (data.ret == -EINPROGRESS) {
 qemu_aio_wait();
 }
+return (data.cancelled  data.ret == 0) ? -ECANCELED : data.ret;
 }
diff --git a/block/stream.c b/block/stream.c
index 0116450..d38f30a 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -191,7 +191,6 @@ int stream_start(BlockDriverState *bs, BlockDriverState 
*base,
  void *opaque)
 {
 StreamBlockJob *s;
-Coroutine *co;
 
 s = block_job_create(stream_job_type, bs, cb, opaque);
 if (!s) {
@@ -203,8 +202,8 @@ int stream_start(BlockDriverState *bs, BlockDriverState 
*base,
 pstrcpy(s-backing_file_id, sizeof(s-backing_file_id), base_id);
 }
 
-co = qemu_coroutine_create(stream_run);
-trace_stream_start(bs, base, s, co, opaque);
-qemu_coroutine_enter(co, s);
+s-common.co = qemu_coroutine_create(stream_run);
+trace_stream_start(bs, base, s, s-common.co, opaque);
+qemu_coroutine_enter(s-common.co, s);
 return 0;
 }
diff --git a/block_int.h b/block_int.h
index 58e3eea..0554cd8 100644
--- a/block_int.h
+++ b/block_int.h
@@ -94,6 +94,12 @@ struct BlockJob {
 BlockDriverState *bs;
 
 /**
+ * The coroutine that executes the job.  If not NULL, it is
+ * reentered when busy is false and the job is cancelled.
+ */
+Coroutine *co;
+
+/**
  * Set to true if the job should cancel itself.  The flag must
  * always be tested just before toggling the busy flag from false
  * to true.  After a job has detected that the cancelled flag is
@@ -104,10 +110,8 @@ struct BlockJob {
 
 /**
  * Set to false by the job while it is in a quiescent state, where
- * no I/O is pending and cancellation can be processed without
- * issuing new I/O.  The busy flag must be set to false when the
- * job goes to sleep on any condition that is not detected by
- * #qemu_aio_wait, such as a timer.
+ * no I/O is pending and the job goes to sleep on any condition
+ * that is not detected by #qemu_aio_wait, such as 

Re: [Qemu-devel] [PATCH] qemu-io: Add command line switch for cache mode

2012-04-18 Thread Paolo Bonzini
Il 18/04/2012 14:35, Kevin Wolf ha scritto:
 To be used as in 'qemu-io -t writeback test.img'
 
 Signed-off-by: Kevin Wolf kw...@redhat.com
 ---
  qemu-io.c |   10 +-
  1 files changed, 9 insertions(+), 1 deletions(-)
 
 diff --git a/qemu-io.c b/qemu-io.c
 index e6fcd77..3095a22 100644
 --- a/qemu-io.c
 +++ b/qemu-io.c
 @@ -1784,6 +1784,7 @@ static void usage(const char *name)
-g, --growable   allow file to grow (only applies to protocols)\n
-m, --misalign   misalign allocations for O_DIRECT\n
-k, --native-aio use kernel AIO implementation (on Linux only)\n
 +  -t, --cache=MODE use the given cache mode for the image\n
-T, --trace FILE enable trace events listed in the given file\n
-h, --help   display this help and exit\n
-V, --versionoutput version information and exit\n
 @@ -1796,7 +1797,7 @@ int main(int argc, char **argv)
  {
  int readonly = 0;
  int growable = 0;
 -const char *sopt = hVc:rsnmgkT:;
 +const char *sopt = hVc:rsnmgkt:T:;
  const struct option lopt[] = {
  { help, 0, NULL, 'h' },
  { version, 0, NULL, 'V' },
 @@ -1808,6 +1809,7 @@ int main(int argc, char **argv)
  { misalign, 0, NULL, 'm' },
  { growable, 0, NULL, 'g' },
  { native-aio, 0, NULL, 'k' },
 +{ cache, 1, NULL, 't' },
  { trace, 1, NULL, 'T' },
  { NULL, 0, NULL, 0 }
  };
 @@ -1840,6 +1842,12 @@ int main(int argc, char **argv)
  case 'k':
  flags |= BDRV_O_NATIVE_AIO;
  break;
 +case 't':
 +if (bdrv_parse_cache_flags(optarg, flags)  0) {
 +error_report(Invalid cache option: %s, optarg);
 +exit(1);
 +}
 +break;
  case 'T':
  if (!trace_backend_init(optarg, NULL)) {
  exit(1); /* error message will have been printed */

Reviewed-by: Paolo Bonzini pbonz...@redhat.com





Re: [Qemu-devel] [PATCH v2 0/2] ide: convert pio code path to asynchronous I/O

2012-04-18 Thread Paolo Bonzini
Il 29/03/2012 11:31, Stefan Hajnoczi ha scritto:
 IDE PIO mode is currently implemented using synchronous I/O functions.  
 There's
 no need to do this because the IDE interface is actually designed with polling
 and interrupts in mind - we can do asynchronous I/O and let the guest know 
 when
 the operation has completed.  The benefit of asynchronous I/O is that the 
 guest
 can continue executing code and is more responsive.
 
 The second aim of this conversion is to avoid calling bdrv_read()/bdrv_write()
 since they do not work with I/O throttling.  This means guests should now boot
 IDE drives successfully when I/O throttling is enabled.
 
 Note that ATAPI is not converted yet and still uses bdrv_read() in two
 locations.  A future patch will have to convert ATAPI so CD-ROMs also do
 asynchronous I/O.
 
 I have tested both Windows 7 Home Premium and Red Hat Enterprise Linux 6.0
 guests with these patches.  In Windows, use the device manager to disable DMA
 on the IDE channels.  Under recent Linux kernels, use the libata.dma=0 kernel
 parameter.
 
 Chris and Richard: Please test this to confirm that it fixes the hang you
 reported.
 
 v2:
  * Keep aiocb and cancel request on reset [mjt]
 
 Stefan Hajnoczi (2):
   ide: convert ide_sector_read() to asynchronous I/O
   ide: convert ide_sector_write() to asynchronous I/O
 
  hw/ide/core.c |  137 +---
  hw/ide/internal.h |3 +
  2 files changed, 100 insertions(+), 40 deletions(-)
 

Reviewed-by: Paolo Bonzini pbonz...@redhat.com



Re: [Qemu-devel] [PATCH 15/18] qapi: implement support for variable argument list

2012-04-18 Thread Anthony Liguori

On 04/17/2012 03:26 PM, Paolo Bonzini wrote:

Il 17/04/2012 21:36, Luiz Capitulino ha scritto:

+switch(qobject_type(obj)) {
+case QTYPE_QSTRING:
+qstring_append(arglist,
+   qstring_get_str(qobject_to_qstring(obj)));
+break;


Does this escape commas correctly?

It seems much easier to use no_gen and qemu_opts_from_qdict...  Then
cmd_netdev_add can be

   void cmd_foo(QemuOpts *arglist, Error **errp);

and later on we could even replace the QemuOpts with a visitor for full
QAPI-ness...


Yeah, I think that's a reasonable suggestion.

Regards,

Anthony Liguori



Paolo






Re: [Qemu-devel] [PATCH 15/18] qapi: implement support for variable argument list

2012-04-18 Thread Paolo Bonzini
Il 18/04/2012 14:51, Luiz Capitulino ha scritto:
   It seems much easier to use no_gen and qemu_opts_from_qdict...  Then
   cmd_netdev_add can be
   
   netdev_add/del is expected to be a stable interface, so we can't use 
   no_gen.
  
  You can have hmp_netdev_add and the no_gen qmp_netdev_add as front-ends
  for the QAPI cmd_netdev_add.  I think it's fair when we have to take
  into account backwards-compatibility.  The conversion gives correct
  error propagation, so even though QemuOpts still leaks it's a step in
  the right direction.
 I thought Anthony had plans to replace QemuOpts with something else,
 I think it was qcfg, but I might be wrong. Anthony?

As far as I understood, QCFG is really the code name for a QemuOpts
visitor. :)

The idea is that instead of this:

static int net_init_netdev(QemuOpts *opts, void *dummy)
{
return net_client_init(opts);
}

...

if (qemu_opts_foreach(qemu_find_opts(netdev), net_init_netdev,
NULL, 1) == -1)
return -1;


you automatically generate functions like these:

static int netdev_cb(QemuOpts *opts, void *cb)
{
Error *err = NULL;
NetdevOpts *o = NULL;
int ret;

QapiDeallocVisitor *md = qapi_dealloc_visitor_new();
QemuOptsVisitor *iv = qemu_opts_visitor_new(opts);
Visitor *v;

v = qemu_opts_get_visitor(iv);
visit_type_NetdevOpts(v, (void **) o, NULL, err);
qemu_opts_visitor_cleanup(iv);
if (err) {
qerror_report_err(err);
ret = 1;
} else {
int (*p_cb)(NetdevOpts *) = cb;
ret = p_cb(opts);
}

v = qapi_dealloc_get_visitor(ov);
visit_type_NetdevOpts(v, (void **) o, NULL, errp);
qapi_dealloc_visitor_cleanup(ov);
}

int netdev_foreach(int (*cb)(NetdevOpts *))
{
return qemu_opts_foreach(qemu_find_opts(netdev), netdev_cb,
 cb, 1);
}

and just do:

netdev_foreach(net_client_init);

There was more stuff in QCFG, including extensions to QemuOpts to
represent an arbitrary QObject, but the above is pretty much it and is
really what we need at the moment.

Paolo



Re: [Qemu-devel] [PATCH 1/2] [trivial] Generic elimination of auto-generated files

2012-04-18 Thread Lluís Vilanova
Stefan Hajnoczi writes:

 On Sat, Apr 14, 2012 at 12:19:03AM +0200, Lluís Vilanova wrote:
 -rm -f $(GENERATED_HEADERS)
 -rm -f $(GENERATED_SOURCES)
 +rm -f $(foreach f,$(GENERATED_HEADERS),$(f) $(f)-timestamp */$(f) 
 */$(f)-timestamp)
 +rm -f $(foreach f,$(GENERATED_SOURCES),$(f) $(f)-timestamp */$(f) 
 */$(f)-timestamp)

 Why */$(f) */$(f)-timestamp?

Some of the files are generated in immediate subdirectories (e.g.,
libuser/trace.c-timestamp).

I could use the results of a call shell to find instead, so that it will
always find the right victims no matter where they are.


Lluis

-- 
 And it's much the same thing with knowledge, for whenever you learn
 something new, the whole world becomes that much richer.
 -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
 Tollbooth



Re: [Qemu-devel] [PATCH v2 0/2] ide: convert pio code path to asynchronous I/O

2012-04-18 Thread Zhi Yong Wu
On Thu, Mar 29, 2012 at 5:31 PM, Stefan Hajnoczi
stefa...@linux.vnet.ibm.com wrote:
 IDE PIO mode is currently implemented using synchronous I/O functions.  
 There's
 no need to do this because the IDE interface is actually designed with polling
 and interrupts in mind - we can do asynchronous I/O and let the guest know 
 when
 the operation has completed.  The benefit of asynchronous I/O is that the 
 guest
 can continue executing code and is more responsive.

 The second aim of this conversion is to avoid calling bdrv_read()/bdrv_write()
 since they do not work with I/O throttling.  This means guests should now boot
 IDE drives successfully when I/O throttling is enabled.

 Note that ATAPI is not converted yet and still uses bdrv_read() in two
 locations.  A future patch will have to convert ATAPI so CD-ROMs also do
 asynchronous I/O.

 I have tested both Windows 7 Home Premium and Red Hat Enterprise Linux 6.0
 guests with these patches.  In Windows, use the device manager to disable DMA
 on the IDE channels.  Under recent Linux kernels, use the libata.dma=0 kernel
 parameter.

 Chris and Richard: Please test this to confirm that it fixes the hang you
 reported.

 v2:
  * Keep aiocb and cancel request on reset [mjt]

 Stefan Hajnoczi (2):
  ide: convert ide_sector_read() to asynchronous I/O
  ide: convert ide_sector_write() to asynchronous I/O

  hw/ide/core.c     |  137 +---
  hw/ide/internal.h |    3 +
  2 files changed, 100 insertions(+), 40 deletions(-)

 --
 1.7.9.1


Zhi Yong Wu wu...@linux.vnet.ibm.com


-- 
Regards,

Zhi Yong Wu



Re: [Qemu-devel] [PATCH v2 0/2] ide: convert pio code path to asynchronous I/O

2012-04-18 Thread Zhi Yong Wu
On Thu, Mar 29, 2012 at 5:31 PM, Stefan Hajnoczi
stefa...@linux.vnet.ibm.com wrote:
 IDE PIO mode is currently implemented using synchronous I/O functions.  
 There's
 no need to do this because the IDE interface is actually designed with polling
 and interrupts in mind - we can do asynchronous I/O and let the guest know 
 when
 the operation has completed.  The benefit of asynchronous I/O is that the 
 guest
 can continue executing code and is more responsive.

 The second aim of this conversion is to avoid calling bdrv_read()/bdrv_write()
 since they do not work with I/O throttling.  This means guests should now boot
 IDE drives successfully when I/O throttling is enabled.

 Note that ATAPI is not converted yet and still uses bdrv_read() in two
 locations.  A future patch will have to convert ATAPI so CD-ROMs also do
 asynchronous I/O.

 I have tested both Windows 7 Home Premium and Red Hat Enterprise Linux 6.0
 guests with these patches.  In Windows, use the device manager to disable DMA
 on the IDE channels.  Under recent Linux kernels, use the libata.dma=0 kernel
 parameter.

 Chris and Richard: Please test this to confirm that it fixes the hang you
 reported.

 v2:
  * Keep aiocb and cancel request on reset [mjt]

 Stefan Hajnoczi (2):
  ide: convert ide_sector_read() to asynchronous I/O
  ide: convert ide_sector_write() to asynchronous I/O

  hw/ide/core.c     |  137 +---
  hw/ide/internal.h |    3 +
  2 files changed, 100 insertions(+), 40 deletions(-)

 --
 1.7.9.1



Reviewed-by: Zhi Yong Wu wu...@linux.vnet.ibm.com

-- 
Regards,

Zhi Yong Wu



Re: [Qemu-devel] [PATCH 2/2] [trivial] Simpler handling of tracetool-generated files in makefiles

2012-04-18 Thread Lluís Vilanova
Stefan Hajnoczi writes:

 On Sat, Apr 14, 2012 at 12:19:08AM +0200, Lluís Vilanova wrote:
 trace.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
 -$(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/tracetool.py 
 --format=h --backend=$(TRACE_BACKEND)  $  $@,  GEN   trace.h)
 -@cmp -s $@ trace.h || cp $@ trace.h
 +$(call tracetool-gen,h,$(TRACE_BACKEND))
 +$(call tracetool-ci)

 Here I don't think it's worth introducing an abstraction.  While there
 is a pattern I think the abstraction actually hides what is going on
 rather than being useful.  The macros are hiding output generation, I
 find that especially troubling because you can't really tell what is
 going to happen.  It's clearer to leave these statements open coded.

I just thought it was excessively verbose. Would it work for you making it more
explicit?

 $(call tracetool-gen,$,$@,h,$(TRACE_BACKEND))
 $(call tracetool-ci,$@)


The main points bugging me were:

* use of quiet-command plus explicit quiet compilation string ( GEN
  whatever).
* use of $(PYTHON) $(SRC_PATH)/scripts/tracetool.py.


If not, I'll just drop it and instead simply replace calls to tracetool with
$(TRACETOOL).


Lluis

-- 
 And it's much the same thing with knowledge, for whenever you learn
 something new, the whole world becomes that much richer.
 -- The Princess of Pure Reason, as told by Norton Juster in The Phantom
 Tollbooth



Re: [Qemu-devel] xen build failure

2012-04-18 Thread Stefano Stabellini
On Wed, 18 Apr 2012, Stefan Hajnoczi wrote:
 On Wed, Apr 18, 2012 at 09:26:44AM +0200, Alexander Graf wrote:
  
  On 17.04.2012, at 18:53, Stefano Stabellini wrote:
  
   On Tue, 17 Apr 2012, Alon Levy wrote:
   I have xen-devel-4.1.2-14.fc17.x86_64 and am getting the following build
   error:
   
CCx86_64-softmmu/xen-all.o
   /home/alon/src/spice_upstream/qemu/xen-all.c: In function 
   ‘xen_hvm_inject_msi’:
   /home/alon/src/spice_upstream/qemu/xen-all.c:132:5: error: implicit 
   declaration of function ‘xc_hvm_inject_msi’ 
   [-Werror=implicit-function-declaration]
   /home/alon/src/spice_upstream/qemu/xen-all.c:132:5: error: nested extern 
   declaration of ‘xc_hvm_inject_msi’ [-Werror=nested-externs]
   
   commit f1dbf015dfb0aa7f66f710a1f1bc58b662951de2 seems to be missing a
   capability / version check.
   
   Thanks for the report!
   I tested it without Xen and with Xen 4.2 but not with Xen 4.1 :-/
  
  So that means that our buildbot coverage is missing Xen 4.1 :). Could you 
  guys possibly provide a machine with Xen 4.1 that'd be running buildbot so 
  we see at least the compile time issues? :)
 
 It would be good to have buildslave for Xen versions.  Here is more info
 on how to volunteer a buildslave:
 
 http://wiki.qemu.org/ContinuousIntegration

I think it is a very good idea, I'll try to set one up.
Can I pass an arbitrary set of options to configure?
Otherwise I'll setup one VM per Xen version we want to test, I guess at
least three: 4.0, 4.1 and 4.2.

[Qemu-devel] [PATCH 5/9] tracetool: Add support for the 'simple' backend

2012-04-18 Thread Stefan Hajnoczi
From: Lluís Vilanova vilan...@ac.upc.edu

Signed-off-by: Lluís Vilanova vilan...@ac.upc.edu
Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
---
 scripts/tracetool/backend/simple.py |   55 +++
 1 file changed, 55 insertions(+)
 create mode 100644 scripts/tracetool/backend/simple.py

diff --git a/scripts/tracetool/backend/simple.py 
b/scripts/tracetool/backend/simple.py
new file mode 100644
index 000..fbb5717
--- /dev/null
+++ b/scripts/tracetool/backend/simple.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+
+Simple built-in backend.
+
+
+__author__ = Lluís Vilanova vilan...@ac.upc.edu
+__copyright__  = Copyright 2012, Lluís Vilanova vilan...@ac.upc.edu
+__license__= GPL version 2 or (at your option) any later version
+
+__maintainer__ = Stefan Hajnoczi
+__email__  = stefa...@linux.vnet.ibm.com
+
+
+from tracetool import out
+
+
+def c(events):
+out('#include trace.h',
+'',
+'TraceEvent trace_list[] = {')
+
+for e in events:
+out('{.tp_name = %(name)s, .state=0},',
+name = e.name,
+)
+
+out('};')
+
+def h(events):
+out('#include trace/simple.h',
+'')
+
+for num, e in enumerate(events):
+if len(e.args):
+argstr = e.args.names()
+arg_prefix = ', (uint64_t)(uintptr_t)'
+cast_args = arg_prefix + arg_prefix.join(argstr)
+simple_args = (str(num) + cast_args)
+else:
+simple_args = str(num)
+
+out('static inline void trace_%(name)s(%(args)s)',
+'{',
+'trace%(argc)d(%(trace_args)s);',
+'}',
+name = e.name,
+args = e.args,
+argc = len(e.args),
+trace_args = simple_args,
+)
+
+out('#define NR_TRACE_EVENTS %d' % len(events))
+out('extern TraceEvent trace_list[NR_TRACE_EVENTS];')
-- 
1.7.9.5




[Qemu-devel] [PATCH 9/9] tracetool: handle DTrace keywords 'in', 'next', 'self'

2012-04-18 Thread Stefan Hajnoczi
Language keywords cannot be used as argument names.  The DTrace backend
appends an underscore to the argument name in order to make the argument
name legal.

This patch adds 'in', 'next', and 'self' keywords to dtrace.py.

Also drop the unnecessary argument name lstrip() call.  The
Arguments.build() method already ensures there is no space around
argument names.  Furthermore it is misleading to do the lstrip() *after*
checking against keywords because the keyword check would not match if
spaces were in the name.

Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
Reviewed-by: Alon Levy al...@redhat.com
Reviewed-by: Lluís Vilanova vilan...@ac.upc.edu
---
 scripts/tracetool/backend/dtrace.py |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/tracetool/backend/dtrace.py 
b/scripts/tracetool/backend/dtrace.py
index cebbd57..9cab75c 100644
--- a/scripts/tracetool/backend/dtrace.py
+++ b/scripts/tracetool/backend/dtrace.py
@@ -86,10 +86,10 @@ def stap(events):
 i = 1
 if len(e.args)  0:
 for name in e.args.names():
-# 'limit' is a reserved keyword
-if name == 'limit':
-name = '_limit'
-out('  %s = $arg%d;' % (name.lstrip(), i))
+# Append underscore to reserved keywords
+if name in ('limit', 'in', 'next', 'self'):
+name += '_'
+out('  %s = $arg%d;' % (name, i))
 i += 1
 
 out('}')
-- 
1.7.9.5




[Qemu-devel] [PATCH 8/9] tracetool: Add MAINTAINERS info

2012-04-18 Thread Stefan Hajnoczi
From: Lluís Vilanova vilan...@ac.upc.edu

Update the MAINTAINERS file to reflect the new Python tracetool code.

[Commit description written by Stefan Hajnoczi]

Signed-off-by: Lluís Vilanova vilan...@ac.upc.edu
Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
---
 MAINTAINERS |2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 922945c..cce37e7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -553,6 +553,8 @@ Tracing
 M: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
 S: Maintained
 F: trace/
+F: scripts/tracetool.py
+F: scripts/tracetool/
 F: docs/tracing.txt
 T: git://github.com/stefanha/qemu.git tracing
 
-- 
1.7.9.5




[Qemu-devel] [PATCH 6/9] tracetool: Add support for the 'ust' backend

2012-04-18 Thread Stefan Hajnoczi
From: Lluís Vilanova vilan...@ac.upc.edu

Signed-off-by: Lluís Vilanova vilan...@ac.upc.edu
Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
---
 scripts/tracetool/backend/ust.py |   90 ++
 1 file changed, 90 insertions(+)
 create mode 100644 scripts/tracetool/backend/ust.py

diff --git a/scripts/tracetool/backend/ust.py b/scripts/tracetool/backend/ust.py
new file mode 100644
index 000..31a2ff0
--- /dev/null
+++ b/scripts/tracetool/backend/ust.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+
+LTTng User Space Tracing backend.
+
+
+__author__ = Lluís Vilanova vilan...@ac.upc.edu
+__copyright__  = Copyright 2012, Lluís Vilanova vilan...@ac.upc.edu
+__license__= GPL version 2 or (at your option) any later version
+
+__maintainer__ = Stefan Hajnoczi
+__email__  = stefa...@linux.vnet.ibm.com
+
+
+from tracetool import out
+
+
+def c(events):
+out('#include ust/marker.h',
+'#undef mutex_lock',
+'#undef mutex_unlock',
+'#undef inline',
+'#undef wmb',
+'#include trace.h')
+
+for e in events:
+argnames = , .join(e.args.names())
+if len(e.args)  0:
+argnames = ', ' + argnames
+
+out('DEFINE_TRACE(ust_%(name)s);',
+'',
+'static void ust_%(name)s_probe(%(args)s)',
+'{',
+'trace_mark(ust, %(name)s, %(fmt)s%(argnames)s);',
+'}',
+name = e.name,
+args = e.args,
+fmt = e.fmt,
+argnames = argnames,
+)
+
+else:
+out('DEFINE_TRACE(ust_%(name)s);',
+'',
+'static void ust_%(name)s_probe(%(args)s)',
+'{',
+'trace_mark(ust, %(name)s, UST_MARKER_NOARGS);',
+'}',
+name = e.name,
+args = e.args,
+)
+
+# register probes
+out('',
+'static void __attribute__((constructor)) trace_init(void)',
+'{')
+
+for e in events:
+out('register_trace_ust_%(name)s(ust_%(name)s_probe);',
+name = e.name,
+)
+
+out('}')
+
+
+def h(events):
+out('#include ust/tracepoint.h',
+'#undef mutex_lock',
+'#undef mutex_unlock',
+'#undef inline',
+'#undef wmb')
+
+for e in events:
+if len(e.args)  0:
+out('DECLARE_TRACE(ust_%(name)s, TP_PROTO(%(args)s), 
TP_ARGS(%(argnames)s));',
+'#define trace_%(name)s trace_ust_%(name)s',
+name = e.name,
+args = e.args,
+argnames = , .join(e.args.names()),
+)
+
+else:
+out('_DECLARE_TRACEPOINT_NOARGS(ust_%(name)s);',
+'#define trace_%(name)s trace_ust_%(name)s',
+name = e.name,
+)
+
+out()
-- 
1.7.9.5




[Qemu-devel] [PATCH 2/9] tracetool: Add module for the 'c' format

2012-04-18 Thread Stefan Hajnoczi
From: Lluís Vilanova vilan...@ac.upc.edu

Signed-off-by: Lluís Vilanova vilan...@ac.upc.edu
Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
---
 scripts/tracetool/format/c.py |   20 
 1 file changed, 20 insertions(+)
 create mode 100644 scripts/tracetool/format/c.py

diff --git a/scripts/tracetool/format/c.py b/scripts/tracetool/format/c.py
new file mode 100644
index 000..3ae
--- /dev/null
+++ b/scripts/tracetool/format/c.py
@@ -0,0 +1,20 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+
+Generate .c file.
+
+
+__author__ = Lluís Vilanova vilan...@ac.upc.edu
+__copyright__  = Copyright 2012, Lluís Vilanova vilan...@ac.upc.edu
+__license__= GPL version 2 or (at your option) any later version
+
+__maintainer__ = Stefan Hajnoczi
+__email__  = stefa...@linux.vnet.ibm.com
+
+
+from tracetool import out
+
+
+def begin(events):
+out('/* This file is autogenerated by tracetool, do not edit. */')
-- 
1.7.9.5




[Qemu-devel] [PATCH 1/9] tracetool: Rewrite infrastructure as python modules

2012-04-18 Thread Stefan Hajnoczi
From: Lluís Vilanova vilan...@ac.upc.edu

The tracetool script is written in shell and has hit several portability
problems due to shell quirks or external tools across host platforms.
Additionally the amount of string processing and lack of real data
structures makes it tough to implement code generator backends for
tracers that are more complex.

This patch replaces the shell version of tracetool with a Python
version.  The new tracetool design is:

  scripts/tracetool.py - top-level script
  scripts/tracetool/backend/ - tracer backends live here (simple, ust)
  scripts/tracetool/format/  - output formats live here (.c, .h)

There is common code for trace-events definition parsing so that
backends can focus on generating code rather than parsing input.

Support for all existing backends (nop, stderr, simple, ust,
and dtrace) is added back in follow-up patches.

[Commit description written by Stefan Hajnoczi]

Signed-off-by: Lluís Vilanova vilan...@ac.upc.edu
Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
---
 Makefile.objs |6 +-
 Makefile.target   |   13 +-
 configure |4 +-
 scripts/tracetool |  666 -
 scripts/tracetool.py  |  108 ++
 scripts/tracetool/__init__.py |  262 +
 scripts/tracetool/backend/__init__.py |  111 ++
 scripts/tracetool/format/__init__.py  |   99 +
 8 files changed, 592 insertions(+), 677 deletions(-)
 delete mode 100755 scripts/tracetool
 create mode 100755 scripts/tracetool.py
 create mode 100644 scripts/tracetool/__init__.py
 create mode 100644 scripts/tracetool/backend/__init__.py
 create mode 100644 scripts/tracetool/format/__init__.py

diff --git a/Makefile.objs b/Makefile.objs
index 5c3bcda..6d6f24d 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -378,12 +378,12 @@ else
 trace.h: trace.h-timestamp
 endif
 trace.h-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
-   $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool 
--$(TRACE_BACKEND) -h  $  $@,  GEN   trace.h)
+   $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/tracetool.py 
--format=h --backend=$(TRACE_BACKEND)  $  $@,  GEN   trace.h)
@cmp -s $@ trace.h || cp $@ trace.h
 
 trace.c: trace.c-timestamp
 trace.c-timestamp: $(SRC_PATH)/trace-events $(BUILD_DIR)/config-host.mak
-   $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool 
--$(TRACE_BACKEND) -c  $  $@,  GEN   trace.c)
+   $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/tracetool.py 
--format=c --backend=$(TRACE_BACKEND)  $  $@,  GEN   trace.c)
@cmp -s $@ trace.c || cp $@ trace.c
 
 trace.o: trace.c $(GENERATED_HEADERS)
@@ -396,7 +396,7 @@ trace-dtrace.h: trace-dtrace.dtrace
 # rule file. So we use '.dtrace' instead
 trace-dtrace.dtrace: trace-dtrace.dtrace-timestamp
 trace-dtrace.dtrace-timestamp: $(SRC_PATH)/trace-events 
$(BUILD_DIR)/config-host.mak
-   $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool 
--$(TRACE_BACKEND) -d  $  $@,  GEN   trace-dtrace.dtrace)
+   $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/tracetool.py 
--format=d --backend=$(TRACE_BACKEND)  $  $@,  GEN   trace-dtrace.dtrace)
@cmp -s $@ trace-dtrace.dtrace || cp $@ trace-dtrace.dtrace
 
 trace-dtrace.o: trace-dtrace.dtrace $(GENERATED_HEADERS)
diff --git a/Makefile.target b/Makefile.target
index 84951a0..a0540cd 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -59,12 +59,13 @@ TARGET_TYPE=system
 endif
 
 $(QEMU_PROG).stp: $(SRC_PATH)/trace-events
-   $(call quiet-command,sh $(SRC_PATH)/scripts/tracetool \
-   --$(TRACE_BACKEND) \
-   --binary $(bindir)/$(QEMU_PROG) \
-   --target-arch $(TARGET_ARCH) \
-   --target-type $(TARGET_TYPE) \
-   --stap  $(SRC_PATH)/trace-events  $(QEMU_PROG).stp,  GEN   
$(QEMU_PROG).stp)
+   $(call quiet-command,$(PYTHON) $(SRC_PATH)/scripts/tracetool.py \
+   --format=stap \
+   --backend=$(TRACE_BACKEND) \
+   --binary=$(bindir)/$(QEMU_PROG) \
+   --target-arch=$(TARGET_ARCH) \
+   --target-type=$(TARGET_TYPE) \
+$(SRC_PATH)/trace-events  $(QEMU_PROG).stp,  GEN   
$(QEMU_PROG).stp)
 else
 stap:
 endif
diff --git a/configure b/configure
index 2d62d12..03b49f6 100755
--- a/configure
+++ b/configure
@@ -1097,7 +1097,7 @@ echo   --disable-docs   disable documentation 
build
 echo   --disable-vhost-net  disable vhost-net acceleration support
 echo   --enable-vhost-net   enable vhost-net acceleration support
 echo   --enable-trace-backend=B Set trace backend
-echoAvailable backends: 
$($source_path/scripts/tracetool --list-backends)
+echoAvailable backends: $($python 
$source_path/scripts/tracetool.py --list-backends)
 echo   --with-trace-file=NAME   Full PATH,NAME of file to store 

[Qemu-devel] [PATCH 7/9] tracetool: Add support for the 'dtrace' backend

2012-04-18 Thread Stefan Hajnoczi
From: Lluís Vilanova vilan...@ac.upc.edu

Signed-off-by: Lluís Vilanova vilan...@ac.upc.edu
Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
---
 scripts/tracetool.py|   32 +++-
 scripts/tracetool/__init__.py   |   11 +++-
 scripts/tracetool/backend/dtrace.py |   97 +++
 scripts/tracetool/format/d.py   |   20 
 scripts/tracetool/format/stap.py|   20 
 5 files changed, 178 insertions(+), 2 deletions(-)
 create mode 100644 scripts/tracetool/backend/dtrace.py
 create mode 100644 scripts/tracetool/format/d.py
 create mode 100644 scripts/tracetool/format/stap.py

diff --git a/scripts/tracetool.py b/scripts/tracetool.py
index fe2ea34..cacfd99 100755
--- a/scripts/tracetool.py
+++ b/scripts/tracetool.py
@@ -44,6 +44,11 @@ Options:
 --help   This help message.
 --list-backends  Print list of available backends.
 --check-backend  Check if the given backend is valid.
+--binary path  Full path to QEMU binary.
+--target-type type QEMU emulator target type ('system' or 'user').
+--target-arch arch QEMU emulator target arch.
+--probe-prefix prefix  Prefix for dtrace probe names
+ (default: qemu-target-type-target-arch).\
  % {
 script : _SCRIPT,
 backends : backend_descr,
@@ -71,6 +76,10 @@ def main(args):
 check_backend = False
 arg_backend = 
 arg_format = 
+binary = None
+target_type = None
+target_arch = None
+probe_prefix = None
 for opt, arg in opts:
 if opt == --help:
 error_opt()
@@ -87,6 +96,15 @@ def main(args):
 elif opt == --check-backend:
 check_backend = True
 
+elif opt == --binary:
+binary = arg
+elif opt == '--target-type':
+target_type = arg
+elif opt == '--target-arch':
+target_arch = arg
+elif opt == '--probe-prefix':
+probe_prefix = arg
+
 else:
 error_opt(unhandled option: %s % opt)
 
@@ -99,8 +117,20 @@ def main(args):
 else:
 sys.exit(1)
 
+if arg_format == stap:
+if binary is None:
+error_opt(--binary is required for SystemTAP tapset generator)
+if probe_prefix is None and target_type is None:
+error_opt(--target-type is required for SystemTAP tapset 
generator)
+if probe_prefix is None and target_arch is None:
+error_opt(--target-arch is required for SystemTAP tapset 
generator)
+
+if probe_prefix is None:
+probe_prefix = ..join([ qemu, target_type, target_arch ])
+
 try:
-tracetool.generate(sys.stdin, arg_format, arg_backend)
+tracetool.generate(sys.stdin, arg_format, arg_backend,
+   binary = binary, probe_prefix = probe_prefix)
 except tracetool.TracetoolError as e:
 error_opt(str(e))
 
diff --git a/scripts/tracetool/__init__.py b/scripts/tracetool/__init__.py
index 1719bb4..74fe21b 100644
--- a/scripts/tracetool/__init__.py
+++ b/scripts/tracetool/__init__.py
@@ -212,7 +212,8 @@ def try_import(mod_name, attr_name = None, attr_default = 
None):
 return False, None
 
 
-def generate(fevents, format, backend):
+def generate(fevents, format, backend,
+ binary = None, probe_prefix = None):
 Generate the output for the given (format, backend) pair.
 
 Parameters
@@ -223,6 +224,10 @@ def generate(fevents, format, backend):
 Output format name.
 backend : str
 Output backend name.
+binary : str or None
+See tracetool.backend.dtrace.BINARY.
+probe_prefix : str or None
+See tracetool.backend.dtrace.PROBEPREFIX.
 
 # fix strange python error (UnboundLocalError tracetool)
 import tracetool
@@ -245,6 +250,10 @@ def generate(fevents, format, backend):
 raise TracetoolError(backend '%s' not compatible with format '%s' %
  (backend, format))
 
+import tracetool.backend.dtrace
+tracetool.backend.dtrace.BINARY = binary
+tracetool.backend.dtrace.PROBEPREFIX = probe_prefix
+
 events = _read_events(fevents)
 
 if backend == nop:
diff --git a/scripts/tracetool/backend/dtrace.py 
b/scripts/tracetool/backend/dtrace.py
new file mode 100644
index 000..cebbd57
--- /dev/null
+++ b/scripts/tracetool/backend/dtrace.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+
+DTrace/SystemTAP backend.
+
+
+__author__ = Lluís Vilanova vilan...@ac.upc.edu
+__copyright__  = Copyright 2012, Lluís Vilanova vilan...@ac.upc.edu
+__license__= GPL version 2 or (at your option) any later version
+
+__maintainer__ = Stefan Hajnoczi
+__email__  = stefa...@linux.vnet.ibm.com
+
+
+from tracetool import out
+
+
+PROBEPREFIX = None
+
+def _probeprefix():
+if PROBEPREFIX is None:
+raise ValueError(you must set 

  1   2   3   >