Re: [Qemu-devel] [PATCH 6/6] qmp: add balloon-get-memory-stats event

2012-02-09 Thread Adam Litke
 out any stale values that might
 - * have been set by a more featureful guest kernel.
 - */
 -reset_stats(dev);
 +}
 
 +static void virtio_balloon_info(void *opaque, BalloonInfo *info)
 +{
 +VirtIOBalloon *dev = opaque;
  info-actual = ram_size - ((uint64_t) dev-actual 
 VIRTIO_BALLOON_PFN_SHIFT);
  }
 @@ -236,18 +262,18 @@ VirtIODevice *virtio_balloon_init(DeviceState *dev)
  s-vdev.get_features = virtio_balloon_get_features;
 
  ret = qemu_add_balloon_handler(virtio_balloon_to_target,
 -   virtio_balloon_info, s);
 +   virtio_balloon_info,
 +   virtio_balloon_stats, s);
  if (ret  0) {
  virtio_cleanup(s-vdev);
  return NULL;
  }
 
 +s-stats_requested = false;
  s-ivq = virtio_add_queue(s-vdev, 128, virtio_balloon_handle_output);
  s-dvq = virtio_add_queue(s-vdev, 128, virtio_balloon_handle_output);
  s-svq = virtio_add_queue(s-vdev, 128, virtio_balloon_receive_stats);
 
 -reset_stats(s);
 -
  s-qdev = dev;
  register_savevm(dev, virtio-balloon, -1, 1,
  virtio_balloon_save, virtio_balloon_load, s);
 diff --git a/monitor.c b/monitor.c
 index aadbdcb..868f2a0 100644
 --- a/monitor.c
 +++ b/monitor.c
 @@ -484,6 +484,8 @@ void monitor_protocol_event(MonitorEvent event, QObject 
 *data)
  break;
  case QEVENT_BLOCK_JOB_CANCELLED:
  event_name = BLOCK_JOB_CANCELLED;
 +case QEVENT_BALLOON_STATS:
 +event_name = BALLOON_MEMORY_STATS;
  break;
  default:
  abort();
 diff --git a/monitor.h b/monitor.h
 index b72ea07..76e26c7 100644
 --- a/monitor.h
 +++ b/monitor.h
 @@ -38,6 +38,7 @@ typedef enum MonitorEvent {
  QEVENT_SPICE_DISCONNECTED,
  QEVENT_BLOCK_JOB_COMPLETED,
  QEVENT_BLOCK_JOB_CANCELLED,
 +QEVENT_BALLOON_STATS,
  QEVENT_MAX,
  } MonitorEvent;
 
 diff --git a/qapi-schema.json b/qapi-schema.json
 index 24a42e3..c4d8d0c 100644
 --- a/qapi-schema.json
 +++ b/qapi-schema.json
 @@ -1563,3 +1563,24 @@
  { 'command': 'qom-list-types',
'data': { '*implements': 'str', '*abstract': 'bool' },
'returns': [ 'ObjectTypeInfo' ] }
 +
 +##
 +# @balloon-get-memory-stats
 +#
 +# Ask the guest's balloon driver for guest memory statistics.
 +#
 +# This command will only get the process started and will return immediately.
 +# The BALLOON_MEMORY_STATS event will be emitted when the statistics
 +# information is returned by the guest.
 +#
 +# Returns: nothing on success
 +#  If the balloon driver is enabled but not functional because the 
 KVM
 +#  kernel module cannot support it, KvmMissingCap
 +#  If no balloon device is present, DeviceNotActive
 +#
 +# Notes: There's no guarantees the guest will ever respond, thus the
 +#BALLOON_MEMORY_STATS event may never be emitted.
 +#
 +# Since: 1.1
 +##
 +{ 'command': 'balloon-get-memory-stats' }
 diff --git a/qmp-commands.hx b/qmp-commands.hx
 index b5e2ab8..52c1fc3 100644
 --- a/qmp-commands.hx
 +++ b/qmp-commands.hx
 @@ -2047,3 +2047,8 @@ EQMP
  .args_type  = implements:s?,abstract:b?,
  .mhandler.cmd_new = qmp_marshal_input_qom_list_types,
  },
 +{
 +.name   = balloon-get-memory-stats,
 +.args_type  = ,
 +.mhandler.cmd_new = qmp_marshal_input_balloon_get_memory_stats,
 +},
 -- 
 1.7.9.111.gf3fb0.dirty
 

-- 
Adam Litke a...@us.ibm.com
IBM Linux Technology Center




Re: [Qemu-devel] [RFC 0/5]: QMP: add balloon-get-memory-stats command

2012-01-19 Thread Adam Litke
On Thu, Jan 19, 2012 at 01:56:26PM -0200, Luiz Capitulino wrote:
 Long ago, commit 625a5be added the guest provided memory statistics to
 the query-balloon command. Unfortunately, it also introduced a severe
 bug: query-balloon would hang if the guest didn't respond. This, in turn,
 would also cause a hang in libvirt.
 
 Because of that, we decided to disable the guest memory stats feature
 (commit 11724ff).
 
 As we decided to let commands implement ad-hoc async mechanisms until we
 get a proper way to do it, I decided to try to re-enable that feature.
 
 My idea is to have a command and an event. The command gets the process
 started by sending a request to guest and returns. Later, when the guest
 makes the memory stats info available, it's sent to the client by means
 of an QMP event (please, take a look at patch 05/05 for full details).
 
 I'm not sure if that approach is good for libvirt though, so it would be
 very helpful to get their input (Eric, I'm CC'ing you here, but feel free
 to route this to someone else).
 
 Another interesting point is that, there's another way of doing this and
 it's using qemu-ga instead. That's, qemu-ga could read that information
 from proc and return it. This is easier  simpler, as it doesn't involve
 guest communication. We also could return a lot more information if needed.
 The only disadvantage I can see is the dependency on qemu-ga...
 
  QMP/qmp-events.txt  |   28 
  balloon.c   |   47 +--
  balloon.h   |7 ---
  hmp.c   |   25 +
  hw/virtio-balloon.c |   39 +++
  monitor.c   |3 +++
  monitor.h   |1 +
  qapi-schema.json|   42 ++
  qmp-commands.hx |6 ++
  9 files changed, 129 insertions(+), 69 deletions(-)
 

The patch series looks good.  Thanks for making the improvements.  Once it is
upstream I can help out with the libvirt pieces.  I tested it with a Fedora-15
VM and it worked out of the box :)  I just have one small nit.  Due to the way
that the virtio stats queue works, the guest emits a stats event with bogus
values when the balloon driver initializes (which gives the host control of the
channel).  Your patches emit this initial event (which contains undefined
values).  In my old code, we kept a boolean flag in the ballon device to record
if stats have been requested and only if that was set would we raise the event.
Without this, the guest can spam the host with an unlimited number of bogus
events.

Tested-by: Adam Litke a...@us.ibm.com

-- 
Adam Litke a...@us.ibm.com
IBM Linux Technology Center




Re: [Qemu-devel] [PATCH] build: Cleanup qga make output

2011-12-13 Thread Adam Litke
On Tue, Dec 13, 2011 at 10:23:18AM -0200, Luiz Capitulino wrote:
 On Mon, 12 Dec 2011 17:38:36 -0600
 Michael Roth mdr...@linux.vnet.ibm.com wrote:
 
  On 12/12/2011 05:03 PM, Anthony Liguori wrote:
   On 12/07/2011 10:33 AM, Adam Litke wrote:
   Currently the make variable qapi-dir refers to the qapi-generated
   directory in
   absolute terms. This causes the harmless but ugly make output below. By
   changing this variable to the relative path the output conforms to the
   norm and
   the build works fine.
  
   Before patch:
   CC /home/aglitke/src/qemu/qapi-generated/qga-qapi-types.o
   CC /home/aglitke/src/qemu/qapi-generated/qga-qapi-visit.o
   CC /home/aglitke/src/qemu/qapi-generated/qga-qmp-marshal.o
   After patch:
   CC qapi-generated/qga-qapi-types.o
   CC qapi-generated/qga-qapi-visit.o
   CC qapi-generated/qga-qmp-marshal.o
  
   This was supposedly to fix a build issue that I was never able to
   reproduce. I think Luiz could reproduce it though. Luiz, could you try
   out Adam's patch and confirm it breaks for you?
 
 It doesn't :( I also tried to reproduce again with -rc4 (which was the
 version I was getting it) but it doesn't happen anymore.
 
  I think that was Stefano:
  
  http://lists.gnu.org/archive/html/qemu-devel/2011-11/msg02752.html
  
  Stefano had a patch that fixed a build breakage he saw with dirty 
  directories by setting:
  
  qapi-dir := $(SRC_DIR)/qapi-generated
  
  That patch ended up breaking the build for others, however, and was 
  reverted.
  
  This patch was an improvement on the original, but we'd all agreed that 
  it wasn't necessary since we don't support working around issues related 
  to dirty directories.

I am lacking some context here.  I am confused how a dirty directory could
change the output in this way.

 What kind of issue? I'm 100% certain that my tree was clean, the only
 thing I did not do was to clone it again.
 
 We have had races when building the qapi in the past, maybe that's the
 problem and commit 9b12940 makes it less likely.
 
 We could revert it and wait for the problem to happen again...

I would be fine with doing whatever is least likely to cause anyone else
problems.  I wouldn't want to break the actual build just to make the output
look nice.

 
  
  So if it's ugly, we can safely drop it.
  
  
   Regards,
  
   Anthony Liguori
  
  
   Signed-off-by: Adam Litkea...@us.ibm.com
   ---
   Makefile | 2 +-
   1 files changed, 1 insertions(+), 1 deletions(-)
  
   diff --git a/Makefile b/Makefile
   index 301c75e..7c93739 100644
   --- a/Makefile
   +++ b/Makefile
   @@ -168,7 +168,7 @@ check-qjson: check-qjson.o $(qobject-obj-y)
   $(tools-obj-y)
   test-coroutine: test-coroutine.o qemu-timer-common.o async.o
   $(coroutine-obj-y) $(tools-obj-y)
  
   $(qapi-obj-y): $(GENERATED_HEADERS)
   -qapi-dir := $(BUILD_DIR)/qapi-generated
   +qapi-dir := qapi-generated
   test-visitor.o test-qmp-commands.o qemu-ga$(EXESUF): QEMU_CFLAGS += -I
   $(qapi-dir)
   qemu-ga$(EXESUF): LIBS = $(LIBS_QGA)
  
  
  
  
 

-- 
Adam Litke a...@us.ibm.com
IBM Linux Technology Center




Re: [Qemu-devel] [libvirt] virDomainBlockJobAbort and block_job_cancel

2011-12-08 Thread Adam Litke
On Wed, Dec 07, 2011 at 04:01:58PM -0700, Eric Blake wrote:
 On 12/07/2011 03:35 PM, Adam Litke wrote:
  Stefan's qemu tree has a block_job_cancel command that always acts
  asynchronously.  In order to provide the synchronous behavior in libvirt 
  (when
  flags is 0), I need to wait for the block job to go away.  I see two 
  options:
  
  1) Use the event:
  To implement this I would create an internal event callback function and
  register it to receive the block job events.  When the cancel event comes 
  in, I
  can exit the qemu job context.  One problem I see with this is that events 
  are
  only available when using the json monitor mode.
 
 I like this idea.  We have internally handled events before, and limited
 it to just JSON if that made life easier: for example, virDomainReboot
 on qemu is rejected if you only have the HMP monitor, and if you have
 the JSON monitor, the implementation internally handles the event to
 change the domain state.
 
 Can we reliably detect whether qemu is new enough to provide the event,
 and if qemu was older and did not provide the event, do we reliably know
 that abort was blocking in that case?

I think we can say that qemu will operate in one of two modes:
a) Blocking abort AND event is not emitted
b) Non-blocking abort AND event is emitted

The difficulty is in detecting which case the current qemu supports.  I don't
believe there is a way to query qemu for a list of currently-supported events.
Therefore, we'd have to use version numbers.  If we do this, how do we avoid
breaking users of qemu git versions that fall between official qemu releases?

 It's okay to make things work that used to fail, but it is a regression
 to make blocking job cancel fail where it used to work, so rejecting
 blocking job cancel with HMP monitor is not a good idea.  If we can
 guarantee that all qemu new enough to have async cancel also support the
 event, while older qemu was always blocking, and that we can always use
 the JSON monitor to newer qemu, then we're set - merely ensure that we
 use only the JSON monitor and the event.  But if we can't make the
 guarantees, and insist on supporting newer qemu via HMP monitor, then we
 may need a hybrid combination of options 1 and 2, or for less code
 maintenance, just option 2.

Is there a deprecation plan for HMP with newer qemu versions?  I really hate the
idea of needing two implementations for this: one polling and one event-based.

  2) Poll the qemu monitor
  To do it this way, I would write a function that repeatedly calls
  virDomainGetBlockJobInfo() against the disk in question.  Once the job
  disappears from the list I can return with confidence that the job is gone.
  This is obviously sub-optimal because I need to poll and sleep.
 
 We've done this before, for both HMP and JSON - see
 qemuMigrationWaitForCompletion.  I agree that an event is nicer than
 polling, but we may be locked into this.
 
  
  3) Ask Stefan to provide a synchronous mode for the qemu monitor command
  This one is the nicest from a libvirt perspective, but I doubt qemu wants 
  to add
  such an interface given that it basically has broken semantics as far as 
  qemu is
  concerned.
 
 Or even:
 
 4) Ask Stefan to make the HMP monitor command synchronous, but only
 expose the JSON command as asynchronous.  After all, it is only HMP
 where we can't wait for an event.

Stefan, how 'bout it?

  
  If this is all too nasty, we could probably just change the behavior of
  blockJobAbort and make it always synchronous with a 'cancelled' event.
 
 No, we can't change the behavior without breaking back-compat of
 existing clients of job cancellation.

-- 
Adam Litke a...@us.ibm.com
IBM Linux Technology Center




[Qemu-devel] [PATCH] build: Cleanup qga make output

2011-12-07 Thread Adam Litke
Currently the make variable qapi-dir refers to the qapi-generated directory in
absolute terms.  This causes the harmless but ugly make output below.  By
changing this variable to the relative path the output conforms to the norm and
the build works fine.

Before patch:
  CC/home/aglitke/src/qemu/qapi-generated/qga-qapi-types.o
  CC/home/aglitke/src/qemu/qapi-generated/qga-qapi-visit.o
  CC/home/aglitke/src/qemu/qapi-generated/qga-qmp-marshal.o
After patch:
  CCqapi-generated/qga-qapi-types.o
  CCqapi-generated/qga-qapi-visit.o
  CCqapi-generated/qga-qmp-marshal.o

Signed-off-by: Adam Litke a...@us.ibm.com
---
 Makefile |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/Makefile b/Makefile
index 301c75e..7c93739 100644
--- a/Makefile
+++ b/Makefile
@@ -168,7 +168,7 @@ check-qjson: check-qjson.o $(qobject-obj-y) $(tools-obj-y)
 test-coroutine: test-coroutine.o qemu-timer-common.o async.o 
$(coroutine-obj-y) $(tools-obj-y)
 
 $(qapi-obj-y): $(GENERATED_HEADERS)
-qapi-dir := $(BUILD_DIR)/qapi-generated
+qapi-dir := qapi-generated
 test-visitor.o test-qmp-commands.o qemu-ga$(EXESUF): QEMU_CFLAGS += -I 
$(qapi-dir)
 qemu-ga$(EXESUF): LIBS = $(LIBS_QGA)
 
-- 
1.7.5.rc1




Re: [Qemu-devel] [libvirt] virDomainBlockJobAbort and block_job_cancel

2011-12-07 Thread Adam Litke
On Thu, Nov 24, 2011 at 09:21:42AM +, Stefan Hajnoczi wrote:
 On Thu, Nov 24, 2011 at 5:31 AM, Daniel Veillard veill...@redhat.com wrote:
  On Wed, Nov 23, 2011 at 09:04:50AM -0700, Eric Blake wrote:
  On 11/23/2011 07:48 AM, Stefan Hajnoczi wrote:
   This means that virDomainBlockJobAbort() returns to the client without
   a guarantee that the job has completed.  If the client enumerates jobs
   it may still see a job that has not finished cancelling.  The client
   must register a handler for the BLOCK_JOB_CANCELLED event if it wants
   to know when the job really goes away.  The BLOCK_JOB_CANCELLED event
   has the same fields as the BLOCK_JOB_COMPLETED event, except it lacks
   the optional error message field.
  
   The impact on clients is that they need to add a BLOCK_JOB_CANCELLED
   handler if they really want to wait.  Most clients today (not many
   exist) will be fine without waiting for cancellation.
  
   Any objections or thoughts on this?
 
  virDomainBlockJobAbort() thankfully has an 'unsigned int flags'
  argument.  For backwards-compatibility, I suggest we use it:
 
  calling virDomainBlockJobAbort(,0) maintains old blocking behavior, and
  we document that blocking until things abort may render the rest of
  interactions with the domain unresponsive.
 
  The new virDomainBlockJobAbort(,VIR_DOMAIN_BLOCK_JOB_ABORT_ASYNC) would
  then implement your new proposed semantics of returning immediately once
  the cancellation has been requested, even if it hasn't been acted on yet.
 
  Maybe you could convince me to swap the flags: have 0 change semantics
  to non-blocking, and a new flag to request blocking, where callers that
  care have to try the flag, and if the flag is unsupported then they know
  they are talking to older libvirtd where the behavior is blocking by
  default, but that's a bit riskier.
 
   Agreed, I would rather not change the current call semantic,
  but an ASYNC flag would be a really good addition. We can document
  the risk of not using it in the function description and suggest
  new applications use ASYNC flag.
 
 Yep, that's a nice suggestion and solves the problem.

I am almost ready to post the code that makes the above change, but before I do,
I need to ask a question about implementing synchronous behavior.

Stefan's qemu tree has a block_job_cancel command that always acts
asynchronously.  In order to provide the synchronous behavior in libvirt (when
flags is 0), I need to wait for the block job to go away.  I see two options:

1) Use the event:
To implement this I would create an internal event callback function and
register it to receive the block job events.  When the cancel event comes in, I
can exit the qemu job context.  One problem I see with this is that events are
only available when using the json monitor mode.

2) Poll the qemu monitor
To do it this way, I would write a function that repeatedly calls
virDomainGetBlockJobInfo() against the disk in question.  Once the job
disappears from the list I can return with confidence that the job is gone.
This is obviously sub-optimal because I need to poll and sleep.

3) Ask Stefan to provide a synchronous mode for the qemu monitor command
This one is the nicest from a libvirt perspective, but I doubt qemu wants to add
such an interface given that it basically has broken semantics as far as qemu is
concerned.

If this is all too nasty, we could probably just change the behavior of
blockJobAbort and make it always synchronous with a 'cancelled' event.

Thoughts?

-- 
Adam Litke a...@us.ibm.com
IBM Linux Technology Center




Re: [Qemu-devel] wiki summary

2011-11-18 Thread Adam Litke
 if
  there's so be any pretense of security, since we have access to
  everything at the end of the day. Additionally, VMware has been
  successfully leveraging guest file access, automatic upgrades of guest
  tools, and exec functionality for quite some time now.
  
  That's not to say we don't need to examine the implications closely, but
  there's precedence.
 
 
 1 - We have had such functionality in the ovirt-guest-agent and removed it 
 becuase of security (BTW it's very easy to add it back)
 
 2 - it's not about trusting qemu, it's about trusting who ever use such an 
 API, meaning: that eventually there is a management system with lots of users 
 and permissions that allow to use this api, so the exposure is much much 
 bigger than just to qemu itself. keep in mind that I qemu only supply the 
 APIs, i find it hard to believe that it will acually do some upgrade logic on 
 it's own. 

The security problems are addressable (via auditing, guest and host side
controls, etc).  And as far as upgrade goes, making the agent a part of qemu
will actually help.  The monitor will have two APIs: one to check if a guest
agent as installed and query capabilities/version (already present), and another
to present a guest-tools ISO to the guest for installation/upgrade.  With these
two host-side APIs in place, it will be possible to provide a trivial
guest-tools-upgrader that could be run when the guest tools iso is updated on
the host.

 
  
  - I understand the motivation of being able to do everything on the
  guest
  
(exe) but we need to keep in mind it's various guest OSs, and it
means that there should be a script for every OS type. to me the
option of having a well defined interface is much more appealing
  
  Agreed, and we should strive for that. But rarely is an interface
  designed so well that it never needs to change, and however well-defined
  it may be, it will grow with time and that growth entails deploying new
  guest code.
 
 Hence my concern above, about having to pass every new API through qemu  
 libvirt will slow down features drastically.

I am sure your sentiment is shared by non-oVirt users who would now need to
implement low-level guest agent features in an unrelated software stack.  I
think we need a separation of responsibility.  Low-level general purpose agent
functionality should be built into a hypervisor (qemu) API which should be
consumable by higher level management systems in a natural way.


-- 
Adam Litke a...@us.ibm.com
IBM Linux Technology Center




Re: [Qemu-devel] converging around a single guest agent

2011-11-17 Thread Adam Litke
 in as before.
  
  Obviously, this type of approach could be made easier by providing a well
  designed exec API that returns command exit codes and (optionally) command
  output.  We could also formalize the install of additional components into
  some sort of plugin interface.  These are all relatively easy problems to
  solve.
  
  If we go in this direction, we would have a simple, general-purpose agent
  with low-level primitives that everyone can use.  We would also be able to
  easily extend the agent based on the needs of individual deployments (not
  the least of which is an oVirt environment).  If certain plugins become
  popular enough, they can always be promoted to first-order API calls in
  future versions of the API.
  
  What are your thoughts on this approach?
  
  -- Adam Litke a...@us.ibm.com IBM Linux Technology Center
  
  ___ Arch mailing list
  a...@ovirt.org http://lists.ovirt.org/mailman/listinfo/arch
  
 

-- 
Adam Litke a...@us.ibm.com
IBM Linux Technology Center




Re: [Qemu-devel] converging around a single guest agent

2011-11-16 Thread Adam Litke
I have been following this thread pretty closely and the one sentence summary of
the current argument is: ovirt-guest-agent is already featureful and tested, so
let's drop qemu-ga and have everyone adopt ovirt-guest-agent.  Unfortunately,
this track strays completely away from the stated goal of convergence.  I have
at least two examples of why the greater KVM community can never adopt
ovirt-guest-agent as-is.  To address this, I would like to counter with an
example on how qemu-ga can enable the deployment of ovirt-guest-agent features
and satisfy the needs of the whole community at the same time.

1) Scope:  The ovirt-guest-agent contains functionality that is incredibly
useful within the context of oVirt.  Single Sign-on is very handy but KVM users
outside the scope of oVirt will not want this extra complexity in their agent.
For simplicity they will probably just write something small that does what they
need (and we have failed to provide a ubiquitous KVM agent).

1) Deployment complexity: The more complex the guest agent is, the more often it
will need to be updated (bug/security fixes, distro compatibility, new
features).  Rolling out guest agent updates does not scale well in large
environments (especially when the guest and host administrators are not the same
person).

For these reasons (and many others), I support having an agent with very basic
primitives that can be orchestrated by the host to provide needed functionality.
This agent would present a low-level, stable, extensible API that everyone can
use.  Today qemu-ga supports the following verbs: sync ping info shutdown
file-open file-close file-read file-write file-seek file-flush fsfreeze-status
fsfreeze-freeze fsfreeze-thaw.  If we add a generic execute mechanism, then the
agent can provide everything needed by oVirt to deploy SSO.

Let's assume that we have already agreed on some sort of security policy for the
write-file and exec primitives.  Consensus is possible on this issue but I
don't want to get bogged down with that here.

With the above primitives, SSO could be deployed automatically to a guest with
the following sequence of commands:

file-open exec-dir/sso-package.bin w
file-write fh buf
file-close fh
file-open exec-dir/sso-package.bin x
file-exec fh args
file-close fh

At this point, the package is installed.  It can contain whatever existing logic
exists in the ovirt-guest-agent today.  To perform a user login, we'll assume
that sso-package.bin contains an executable 'sso/do-user-sso':

file-open exec-dir/sso/do-user-sso x
exec fh args
file-close fh

At this point the user would be logged in as before.

Obviously, this type of approach could be made easier by providing a well
designed exec API that returns command exit codes and (optionally) command
output.  We could also formalize the install of additional components into some
sort of plugin interface.  These are all relatively easy problems to solve.

If we go in this direction, we would have a simple, general-purpose agent with
low-level primitives that everyone can use.  We would also be able to easily
extend the agent based on the needs of individual deployments (not the least of
which is an oVirt environment).  If certain plugins become popular enough, they
can always be promoted to first-order API calls in future versions of the API.

What are your thoughts on this approach?

-- 
Adam Litke a...@us.ibm.com
IBM Linux Technology Center




Re: [Qemu-devel] [PATCH 0/4] Image Streaming API

2011-08-30 Thread Adam Litke
On Tue, Aug 30, 2011 at 10:28:09AM +0100, Stefan Hajnoczi wrote:
 On Tue, Aug 30, 2011 at 4:19 AM, Zhi Yong Wu zwu.ker...@gmail.com wrote:
  On Tue, Aug 23, 2011 at 8:58 PM, Stefan Hajnoczi
  stefa...@linux.vnet.ibm.com wrote:
  These patches put in place the image streaming QMP/HMP commands and
  documentation.  Image streaming itself is not implemented by this patch 
  series
  but the HMP/QMP commands that libvirt uses are implemented to return
  NotSupported.
 
  The Image Streaming API can be used to copy the contents of a backing file 
  into
  the image file while the guest is running.  The API is described on the 
  wiki:
  http://wiki.qemu.org/Features/LiveBlockMigration/ImageStreamingAPI
 
  If query-block-jobs returns one percent value, it will be more
  readable and convenient for users.
 
 QMP is not a human interface.  Although a percentage value is good for
 progress bars, the absolute value is also interesting in terms of how
 much work is being done.  By providing an end value and a current
 value the management tool can display either or both.

I agree.  In QMP, we should report the best, most granular information we have
and allow management tools to present it as they see fit.  For example, a
management tool may wish to present streaming progress as a spinner (rather than
a progress bar).  In that case you would want to see the numbers updated
whenever progress has been made.



Re: [Qemu-devel] [PATCH 4/4] qmp: add query-block-jobs

2011-08-23 Thread Adam Litke
On Tue, 2011-08-23 at 13:58 +0100, Stefan Hajnoczi wrote:
 diff --git a/blockdev.c b/blockdev.c
 index 036b7eb..e9098f6 100644
 --- a/blockdev.c
 +++ b/blockdev.c
 @@ -835,3 +835,13 @@ int do_block_job_cancel(Monitor *mon, const QDict 
 *params, QObject **ret_data)
  qerror_report(QERR_DEVICE_NOT_ACTIVE, device);
  return -1;
  }
 +
 +void monitor_print_block_jobs(Monitor *mon, const QObject *data)
 +{
 +monitor_printf(mon, No active jobs\n);
 +}
 +
 +void do_info_block_jobs(Monitor *mon, QObject **ret_data)
 +{
 +*ret_data = QOBJECT(qdict_new());
 +}

This should return an empty qlist, not a qdict.

-- 
Thanks,
Adam




Re: [Qemu-devel] [PATCH 1/4] qmp: add block_stream command

2011-08-23 Thread Adam Litke
Under libvirt, I get the following error when trying to start a block
stream:

qerror: bad call in function 'do_block_stream':
qerror: - error format '{ 'class': 'NotSupported', 'data': {} }' not found
qerror: call at blockdev.c:808
2011-08-23 11:30:09.974: shutting down

Is this patch missing a part of the qerror definition?

On Tue, 2011-08-23 at 13:58 +0100, Stefan Hajnoczi wrote:
 This patch introduces the block_stream HMP/QMP command.  It is currently
 unimplemented and returns the 'NotSupported' error.
 
 block_stream
 
 
 Copy data from a backing file into a block device.
 
 The block streaming operation is performed in the background until the
 entire backing file has been copied.  This command returns immediately
 once streaming has started.  The status of ongoing block streaming
 operations can be checked with query-block-jobs.  The operation can be
 stopped before it has completed using the block_job_cancel command.
 
 If a base file is specified then sectors are not copied from that base
 file and its backing chain.  When streaming completes the image file
 will have the base file as its backing file.  This can be used to stream
 a subset of the backing file chain instead of flattening the entire
 image.
 
 On successful completion the image file is updated to drop the backing
 file.
 
 Arguments:
 
 - device: device name (json-string)
 - base:   common backing file (json-string, optional)
 
 Errors:
 
 DeviceInUse:streaming is already active on this device
 DeviceNotFound: device name is invalid
 NotSupported:   image streaming is not supported by this device
 
 Events:
 
 On completion the BLOCK_JOB_COMPLETED event is raised with the following
 fields:
 
 - type: job type (stream for image streaming, json-string)
 - device:   device name (json-string)
 - end:  maximum progress value (json-int)
 - position: current progress value (json-int)
 - speed:rate limit, bytes per second (json-int)
 - error:error message (json-string, only on error)
 
 The completion event is raised both on success and on failure.  On
 success position is equal to end.  On failure position and end can be
 used to indicate at which point the operation failed.
 
 On failure the error field contains a human-readable error message.
 There are no semantics other than that streaming has failed and clients
 should not try to interpret the error string.
 
 Examples:
 
 - { execute: block_stream, arguments: { device: virtio0 } }
 - { return:  {} }
 
 Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
 ---
  blockdev.c  |   18 +++
  blockdev.h  |1 +
  hmp-commands.hx |   14 +++
  monitor.c   |3 ++
  monitor.h   |1 +
  qerror.h|3 ++
  qmp-commands.hx |   65 
 +++
  7 files changed, 105 insertions(+), 0 deletions(-)
 
 diff --git a/blockdev.c b/blockdev.c
 index d272659..208bfc9 100644
 --- a/blockdev.c
 +++ b/blockdev.c
 @@ -790,3 +790,21 @@ int do_block_resize(Monitor *mon, const QDict *qdict, 
 QObject **ret_data)
 
  return 0;
  }
 +
 +int do_block_stream(Monitor *mon, const QDict *params, QObject **ret_data)
 +{
 +const char *device = qdict_get_str(params, device);
 +BlockDriverState *bs;
 +
 +bs = bdrv_find(device);
 +if (!bs) {
 +qerror_report(QERR_DEVICE_NOT_FOUND, device);
 +return -1;
 +}
 +
 +/* This command is not yet implemented.  The device not found check above
 + * is done so that error ordering will not change when fully implemented.
 + */
 +qerror_report(QERR_NOT_SUPPORTED);
 +return -1;
 +}
 diff --git a/blockdev.h b/blockdev.h
 index 3587786..ad98d37 100644
 --- a/blockdev.h
 +++ b/blockdev.h
 @@ -65,5 +65,6 @@ int do_change_block(Monitor *mon, const char *device,
  int do_drive_del(Monitor *mon, const QDict *qdict, QObject **ret_data);
  int do_snapshot_blkdev(Monitor *mon, const QDict *qdict, QObject **ret_data);
  int do_block_resize(Monitor *mon, const QDict *qdict, QObject **ret_data);
 +int do_block_stream(Monitor *mon, const QDict *qdict, QObject **ret_data);
 
  #endif
 diff --git a/hmp-commands.hx b/hmp-commands.hx
 index 0ccfb28..2a16fd9 100644
 --- a/hmp-commands.hx
 +++ b/hmp-commands.hx
 @@ -70,6 +70,20 @@ but should be used with extreme caution.  Note that this 
 command only
  resizes image files, it can not resize block devices like LVM volumes.
  ETEXI
 
 +{
 +.name   = block_stream,
 +.args_type  = device:B,base:s?,
 +.params = device [base],
 +.help   = copy data from a backing file into a block device,
 +.user_print = monitor_user_noop,
 +.mhandler.cmd_new = do_block_stream,
 +},
 +
 +STEXI
 +@item block_stream
 +@findex block_stream
 +Copy data from a backing file into a block device.
 +ETEXI
 
  {
  .name   = eject,
 diff --git a/monitor.c b/monitor.c
 index ada51d0..dc55fca 100644
 --- a/monitor.c
 

Re: [Qemu-devel] [libvirt] [PATCH] qemu: Get memory balloon info correctly for text monitor

2011-08-15 Thread Adam Litke


On 08/15/2011 11:50 AM, Daniel P. Berrange wrote:
 On Mon, Aug 15, 2011 at 11:27:43AM -0500, Adam Litke wrote:
 On 08/15/2011 08:23 AM, Osier Yang wrote:
 于 2011年08月15日 21:58, Osier Yang 写道:
 * src/qemu/qemu_monitor_text.c: BALLOON_PREFIX was defined as
 balloon: actual=, which cause actual= is stripped early before
 the real parsing. This patch changes BALLOON_PREFIX into balloon: ,
 and modifies related functions, also renames
 qemuMonitorParseExtraBalloonInfo to qemuMonitorParseBalloonInfo,
 as after the changing, it parses all the info returned by info balloon.

 Forgot to mention the problem, e.g. virsh dommemstat $domain returns empty
 result.

 That is because qemu has disabled stats reporting and so the extra
 fields are not present in the info balloon response.
 
 I'd completely forgotten about this problem. We really should try to
 get this fixed  renabled in QEMU sometime in the not too distant
 future.

I agree.  The problem is that qemu lacks a proper interface for
asynchronous commands so an unresponsive guest could freeze the monitor.
 QMP is undergoing a significant overhaul as a result of the new QAPI
framework.  It is my understanding that this new framework will provide
a robust async interface, allowing us to re-enable balloon stats.

-- 
Adam Litke
IBM Linux Technology Center



Re: [Qemu-devel] live block copy/stream/snapshot discussion

2011-07-12 Thread Adam Litke


On 07/12/2011 10:45 AM, Stefan Hajnoczi wrote:
 On Tue, Jul 12, 2011 at 9:06 AM, Kevin Wolf kw...@redhat.com wrote:
 Am 11.07.2011 18:32, schrieb Marcelo Tosatti:
 On Mon, Jul 11, 2011 at 03:47:15PM +0100, Stefan Hajnoczi wrote:
 Kevin, Marcelo,
 I'd like to reach agreement on the QMP/HMP APIs for live block copy
 and image streaming.  Libvirt has acked the image streaming APIs that
 Adam proposed and I think they are a good fit for the feature.  I have
 described that API below for your review (it's exactly what the QED
 Image Streaming patches provide).

 Marcelo: Are you happy with this API for live block copy?  Also please
 take a look at the switch command that I am proposing.

 Image streaming API
 ===

 For leaf images with copy-on-read semantics, the stream commands allow the 
 user
 to populate local blocks by manually streaming them from the backing image.
 Once all blocks have been streamed, the dependency on the original backing
 image can be removed.  Therefore, stream commands can be used to implement
 post-copy live block migration and rapid deployment.

 The block_stream command can be used to stream a single cluster, to
 start streaming the entire device, and to cancel an active stream.  It
 is easiest to allow the block_stream command to manage streaming for the
 entire device but a managent tool could use single cluster mode to
 throttle the I/O rate.

 As discussed earlier, having the management send requests for each
 single cluster doesn't make any sense at all. It wouldn't only throttle
 the I/O rate but bring it down to a level that makes it unusable. What
 you really want is to allow the management to give us a range (offset +
 length) that qemu should stream.
 
 I feel that an iteration interface is problematic whether the
 management tool or QEMU decide what to stream.  Let's have just the
 background streaming operation.
 
 The problem with byte ranges is two-fold.  The management tool doesn't
 know which regions of the image are allocated so it may do a lot of
 nop calls to already-allocated regions with no intelligence as to
 where the next sensible offset for streaming is.  Secondly, because
 the progress and performance of image streaming depend largely on
 whether or not clusters are allocated (it is very fast when a cluster
 is already allocated and we have no work to do), offsets are bad
 indicators of progress to the user.  I think it's best not to expose
 these details to the management tool at all.
 
 The only reason for the iteration interface was to punt I/O throttling
 to the management tool.  I think it would be easier to just throttle
 inside the streaming function.
 
 Kevin: Are you happy with dropping the iteration interface?
 Adam: Is there a libvirt requirement for iteration or could we support
 background copy only?

There is no hard requirement for iteration in libvirt.  However, I think
there is a requirement that we report some sort of progress to an end
user.  These operations can easily take many minutes (even hours) and
such a long-running operation needs to report progress.  I think the
current information returned by 'query-block-stream' is appropriate for
this purpose and should definitely be maintained.

-- 
Adam Litke
IBM Linux Technology Center



[Qemu-devel] Re: [RFC][PATCH v7 04/16] virtagent: bi-directional RPC handling logic

2011-03-07 Thread Adam Litke
On Mon, 2011-03-07 at 14:10 -0600, Michael Roth wrote:
 This implements the state machine/logic used to manage
 send/receive/execute phases of RPCs we send or receive. It does so using
 a set of abstract methods we implement with the application and
 transport level code which will follow.
 
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtagent-manager.c |  326 
 +++
  virtagent-manager.h |  130 
  2 files changed, 456 insertions(+), 0 deletions(-)
  create mode 100644 virtagent-manager.c
  create mode 100644 virtagent-manager.h
 
 diff --git a/virtagent-manager.c b/virtagent-manager.c
 new file mode 100644
 index 000..51d26a3
 --- /dev/null
 +++ b/virtagent-manager.c
 @@ -0,0 +1,326 @@
 +/*
 + * virtagent - job queue management
 + *
 + * Copyright IBM Corp. 2011
 + *
 + * Authors:
 + *  Michael Roth  mdr...@linux.vnet.ibm.com
 + *
 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
 + * See the COPYING file in the top-level directory.
 + *
 + */
 +
 +#include virtagent-common.h
 +
 +typedef struct VAServerJob {
 +char tag[64];
 +void *opaque;
 +VAServerJobOps ops;
 +QTAILQ_ENTRY(VAServerJob) next;
 +enum {
 +VA_SERVER_JOB_STATE_NEW = 0,
 +VA_SERVER_JOB_STATE_BUSY,
 +VA_SERVER_JOB_STATE_EXECUTED,
 +VA_SERVER_JOB_STATE_SENT,
 +VA_SERVER_JOB_STATE_DONE,
 +} state;
 +} VAServerJob;
 +
 +typedef struct VAClientJob {
 +char tag[64];
 +void *opaque;
 +void *resp_opaque;
 +VAClientJobOps ops;
 +QTAILQ_ENTRY(VAClientJob) next;
 +enum {
 +VA_CLIENT_JOB_STATE_NEW = 0,
 +VA_CLIENT_JOB_STATE_BUSY,
 +VA_CLIENT_JOB_STATE_SENT,
 +VA_CLIENT_JOB_STATE_READ,
 +VA_CLIENT_JOB_STATE_DONE,
 +} state;
 +} VAClientJob;
 +
 +#define SEND_COUNT_MAX 1
 +#define EXECUTE_COUNT_MAX 4

It's not immediately clear what the difference between SEND_COUNT_MAX
and EXECUTE_COUNT_MAX is.  Some comments would help.  Also, will the
code work if these numbers are changed?  If not, a note about what
someone needs to look at when changing these would seem appropriate.

 +
 +struct VAManager {
 +int send_count; /* sends in flight */
 +int execute_count; /* number of jobs currently executing */
 +QTAILQ_HEAD(, VAServerJob) server_jobs;
 +QTAILQ_HEAD(, VAClientJob) client_jobs;
 +};
 +
 +/* server job operations/helpers */
 +
 +static VAServerJob *va_server_job_by_tag(VAManager *m, const char *tag)
 +{
 +VAServerJob *j;
 +QTAILQ_FOREACH(j, m-server_jobs, next) {
 +if (strcmp(j-tag, tag) == 0) {
 +return j;
 +}
 +}
 +return NULL;
 +}
 +
 +int va_server_job_add(VAManager *m, const char *tag, void *opaque,
 +  VAServerJobOps ops)
 +{
 +VAServerJob *j = qemu_mallocz(sizeof(VAServerJob));
 +TRACE(called);

Qemu has a good tracing infrastructure.  If this is trace point is
useful enough to keep around, it should try to use that.  If it's not
that important, I'd remove it entirely.  I believe this has been flagged
in an earlier RFC too.

 +j-state = VA_SERVER_JOB_STATE_NEW;
 +j-ops = ops;
 +j-opaque = opaque;
 +memset(j-tag, 0, 64);
 +pstrcpy(j-tag, 63, tag);

Magic numbers.  Should use something like #define TAG_LEN 64

 +QTAILQ_INSERT_TAIL(m-server_jobs, j, next);
 +va_kick(m);
 +return 0;
 +}
 +
 +static void va_server_job_execute(VAServerJob *j)
 +{
 +TRACE(called);
 +j-state = VA_SERVER_JOB_STATE_BUSY;
 +j-ops.execute(j-opaque, j-tag);
 +}
 +
 +/* TODO: need a way to pass information back */
 +void va_server_job_execute_done(VAManager *m, const char *tag)
 +{
 +VAServerJob *j = va_server_job_by_tag(m, tag);
 +TRACE(called);
 +if (!j) {
 +LOG(server job with tag \%s\ not found, tag);
 +return;
 +}
 +j-state = VA_SERVER_JOB_STATE_EXECUTED;
 +va_kick(m);
 +}
 +
 +static void va_server_job_send(VAServerJob *j)
 +{
 +TRACE(called);
 +j-state = VA_SERVER_JOB_STATE_BUSY;
 +j-ops.send(j-opaque, j-tag);
 +}
 +
 +void va_server_job_send_done(VAManager *m, const char *tag)
 +{
 +VAServerJob *j = va_server_job_by_tag(m, tag);
 +TRACE(called);
 +if (!j) {
 +LOG(server job with tag \%s\ not found, tag);
 +return;
 +}
 +j-state = VA_SERVER_JOB_STATE_SENT;
 +m-send_count--;
 +va_kick(m);
 +}
 +
 +static void va_server_job_callback(VAServerJob *j)
 +{
 +TRACE(called);
 +j-state = VA_SERVER_JOB_STATE_BUSY;
 +if (j-ops.callback) {
 +j-ops.callback(j-opaque, j-tag);
 +}
 +j-state = VA_SERVER_JOB_STATE_DONE;
 +}
 +
 +void va_server_job_cancel(VAManager *m, const char *tag)
 +{
 +VAServerJob *j = va_server_job_by_tag(m, tag);
 +TRACE(called);
 +if (!j) {
 +LOG(server job with tag \%s\ not found, tag);
 +return;
 +}
 +/* TODO: need to 

[Qemu-devel] Re: [RFC][PATCH v7 06/16] virtagent: transport definitions

2011-03-07 Thread Adam Litke
On Mon, 2011-03-07 at 14:10 -0600, Michael Roth wrote:
 +#define VA_LINE_LEN_MAX 1024
 +static void va_rpc_parse_hdr(VAHTState *s)
 +{
 +int i, line_pos = 0;
 +bool first_line = true;
 +char line_buf[VA_LINE_LEN_MAX];
 +
 +TRACE(called);
 +
 +for (i = 0; i  VA_HDR_LEN_MAX; ++i) {
 +if (s-hdr[i] == 0) {
 +/* end of header */
 +return;
 +}
 +if (s-hdr[i] != '\n') {
 +/* read line */
 +line_buf[line_pos++] = s-hdr[i];
 +} else {
 +/* process line */
 +if (first_line) {
 +if (strncmp(line_buf, POST, 4) == 0) {
 +s-http_type = VA_HTTP_TYPE_REQUEST;
 +} else if (strncmp(line_buf, HTTP, 4) == 0) {
 +s-http_type = VA_HTTP_TYPE_RESPONSE;
 +} else {
 +s-http_type = VA_HTTP_TYPE_UNKNOWN;
 +}
 +first_line = false;
 +}
 +if (strncmp(line_buf, Content-Length: , 16) == 0) {
 +s-content_len = atoi(line_buf[16]);
 +}
 +if (strncmp(line_buf, X-Virtagent-Client-Tag: , 24) == 0) {
 +memcpy(s-hdr_client_tag, line_buf[24], MIN(line_pos-25, 
 64));
 +//pstrcpy(s-hdr_client_tag, 64, line_buf[24]);


Remove this commented code.


 +TRACE(\nTAG%s\n, s-hdr_client_tag);
 +}
 +line_pos = 0;
 +memset(line_buf, 0, VA_LINE_LEN_MAX);
 +}
 +}
 +}


-- 
Thanks,
Adam




[Qemu-devel] Re: [PATCH 1/2] Add virtagent file system freeze/thaw

2011-02-01 Thread Adam Litke
On Tue, 2011-02-01 at 11:58 +0100, jes.soren...@redhat.com wrote:
 +/*
 + * va_fsfreeze(): Walk list of mounted file systems in the guest, and
 + *   freeze the ones which are real local file systems.
 + * rpc return values: Number of file systems frozen, -1 on error.
 + */
 +static xmlrpc_value *va_fsfreeze(xmlrpc_env *env,
 + xmlrpc_value *params,
 + void *user_data)
 +{
 +xmlrpc_int32 ret = 0, i = 0;
 +xmlrpc_value *result;
 +struct direntry *entry;
 +int fd;
 +SLOG(va_fsfreeze());
 +
 +if (fsfreeze_status == FREEZE_FROZEN) {
 +ret = 0;
 +goto out;
 +}
 +
 +ret = build_mount_list();
 +if (ret  0) {
 +goto out;
 +}
 +
 +fsfreeze_status = FREEZE_INPROGRESS;
 +
 +entry = mount_list;
 +while(entry) {
 +fd = qemu_open(entry-dirname, O_RDONLY);
 +if (fd == -1) {
 +ret = errno;
 +goto error;
 +}
 +ret = ioctl(fd, FIFREEZE);
 +if (ret  0  ret != EOPNOTSUPP) {
 +goto error;
 +}

Here we silently ignore filesystems that do not support the FIFREEZE
ioctl.  Do we need to have a more complex return value so that we can
communicate which mount points could not be frozen?  Otherwise, an
unsuspecting host could retrieve a corrupted snapshot of that
filesystem, right?

 +
 +close(fd);
 +entry = entry-next;
 +i++;
 +}
 +
 +fsfreeze_status = FREEZE_FROZEN;
 +ret = i;
 +out:
 +result = xmlrpc_build_value(env, i, ret);
 +return result;
 +error:
 +if (i  0) {
 +fsfreeze_status = FREEZE_ERROR;
 +}
 +goto out;
 +}

-- 
Thanks,
Adam




Re: spice vdagent protocol, was Re: [Qemu-devel] KVM call minutes for Jan 11

2011-01-13 Thread Adam Litke
On Tue, 2011-01-11 at 20:33 +0200, Alon Levy wrote:
  Spice guest agent:
  - virt agent, matahari, spice agent...what is in spice agent?
  - spice char device
- mouse, copy 'n paste, screen resolution change
  - could be generic (at least input and copy/paste)
- send protocol details of what is being sent
  - need to look at how difficult it is to split it out from spice
(how to split out in qemu vs. libspice)
  - goal to converge on common framework
  - more discussion on char device vs. protocol
- eg. mouse_set breaks if mouse channel is part pv and part spice specific
  - Alon will send link to protocol and try to propose new interfaces
 
 http://spice-space.org/page/Whiteboard/AgentProtocol
 
 That's the corrent documentation. Notably the clipboard is a todo there, I'll
 try to get that filled in. I'll continue this discussion on a separate thread 
 later.

Thanks for sending this out Alon.  The use cases you have outlined are
very similar to the ones we have for virtagent.  The main differences I
can see so far are the the data encoding strategy and the spice agent's
method for delivery of events (mouse, etc).

Virtagent implements an RPC interface and uses xmlrpc-c to encode data
in XML.  This approach seems more general-purpose, extensible and easier
to manage over the long term than relying on a custom binary
representation.

As for event delivery, virtagent does not yet have an interface to allow
external programs to subscribe to events.  I am sure this can be done in
a generic way that is backwards-compatible with the current spice
architecture.  Such an interface should allow arbitrary programs to
subscribe to events but have no dependencies on those programs.  I am
not sure if something like D-Bus would be appropriate for Linux guests.
We'd need to consider Windows too.  

I see no reason why the core qemu use cases (shutdown, exec, copyfile)
and the spice use cases (mouse, copy-paste, graphics reconfiguration)
cannot (and should not) be satisfied by a single agent.  Going forward,
I think we'd need to agree on the wire protocol and guest-side event
subscription.

Thoughts?

-- 
Thanks,
Adam




Re: spice vdagent protocol, was Re: [Qemu-devel] KVM call minutes for Jan 11

2011-01-13 Thread Adam Litke
On Thu, 2011-01-13 at 21:18 +0200, Alon Levy wrote:
 On Thu, Jan 13, 2011 at 09:01:34AM -0600, Adam Litke wrote:
  On Tue, 2011-01-11 at 20:33 +0200, Alon Levy wrote:
Spice guest agent:
- virt agent, matahari, spice agent...what is in spice agent?
- spice char device
  - mouse, copy 'n paste, screen resolution change
- could be generic (at least input and copy/paste)
  - send protocol details of what is being sent
- need to look at how difficult it is to split it out from spice
  (how to split out in qemu vs. libspice)
- goal to converge on common framework
- more discussion on char device vs. protocol
  - eg. mouse_set breaks if mouse channel is part pv and part spice 
specific
- Alon will send link to protocol and try to propose new interfaces
   
   http://spice-space.org/page/Whiteboard/AgentProtocol
   
   That's the corrent documentation. Notably the clipboard is a todo there, 
   I'll
   try to get that filled in. I'll continue this discussion on a separate 
   thread later.
  
  Thanks for sending this out Alon.  The use cases you have outlined are
  very similar to the ones we have for virtagent.  The main differences I
  can see so far are the the data encoding strategy and the spice agent's
  method for delivery of events (mouse, etc).
  
 Let me start by saying I'm not yet familiar with virtagent, I need to read
 up on it (really didn't expect to try to merge with it at this point, not
 that that isn't a good discussion).
 
  Virtagent implements an RPC interface and uses xmlrpc-c to encode data
  in XML.  This approach seems more general-purpose, extensible and easier
  to manage over the long term than relying on a custom binary
  representation.
  
 
 Agree with the benefits, but there are other alternatives like I outlined
 in the thread that Gerd started, like a binary representation generated from
 declarative description. Totally agree with using a single definition to
 generate marshalling / demarshalling code. Not saying we can't work with
 xmlrpc for qemu-guest but for qemu-client I think it is too wasteful.
 
  As for event delivery, virtagent does not yet have an interface to allow
  external programs to subscribe to events.  I am sure this can be done in
  a generic way that is backwards-compatible with the current spice
  architecture.  Such an interface should allow arbitrary programs to
  subscribe to events but have no dependencies on those programs.  I am
  not sure if something like D-Bus would be appropriate for Linux guests.
  We'd need to consider Windows too.  
  
 
 Really not sure what you mean by events here, maybe I missed something.

I was trying to get at the method by which a generic guest agent could
forward things like mouse coordinates to Spice.  Unfortunately, I sent
this out before I noticed the other thread which covered this topic
well.

  I see no reason why the core qemu use cases (shutdown, exec, copyfile)
  and the spice use cases (mouse, copy-paste, graphics reconfiguration)
  cannot (and should not) be satisfied by a single agent.  Going forward,
  I think we'd need to agree on the wire protocol and guest-side event
  subscription.
 
 At first glance, i'd say:
  * development - how would we develop a shared agent? two forks and a upstream
   that no one cares about? shared repository? seems easier to me to use
   a shared protocol and separate agents. Also better bug wise (bug in our
   code doesn't bring down your agent) to have separate agents. Plugins are
   not any better bug wise (segfault is a segfault). Also languages become a
   problem (our agents are in C++/C atm win/lin). So in summary I think it
   is better to have a shared qemu code (this has to be a single exe) and
   protocol perhaps.

I think it is critical that we have a single shared agent.  The agent
must be part of the default install of most future guests for any
host-guest API to be useful.  I will defer to discussions in the main
thread regarding the structure of the daemon(s).

  * security - we don't need our agent to be able to execute or to copy files
   or to shutdown, so we'd need some mechanism to turn those off. There is
   another different agent (going over virtio-serial too) used for RHEV
   which probably does need these kind of features, so maybe need to involve
   them in this discussion too. (RHEV isn't open source yet but aims to be
   AFAIK).

One idea that has been discussed is to restrict agent actions to
programs/scripts that exist in an virtagent.d/ directory (similar to how
udev rules are configured).  Virtagent could install a set of scripts
in /lib/virtagent/actions.d/ which can be overridden (or disabled) by
the user in /etc/virtagent/actions.d/.  If the 'shutdown' action is
missing, then the agent will not be able to perform shutdowns.

-- 
Thanks,
Adam




Re: [Qemu-devel] [Qestion] What status of memory stats feature

2010-12-15 Thread Adam Litke
On Wed, 2010-12-15 at 15:39 -0200, Luiz Capitulino wrote:
 On Wed, 15 Dec 2010 16:20:05 +0900
 Ken'ichi Ohmichi oomi...@mxs.nes.nec.co.jp wrote:
 
  
  Hi,
  
  I tried to get the memory stats by using virDomainMemoryStats() of libvirt,
  but it could not do it because of the following patch:
  
  [PATCH 03/23] disable guest-provided stats on info balloon command
2010/10/01 from Luiz Capitulino
http://www.mail-archive.com/qemu-devel@nongnu.org/msg43024.html 
  
  According to the related thread, the above patch avoids hanging user
  monitor, and people hope the memory stats feature will be able in the
  future. So I'd like to know the status of this feature.
  
  Is there the patch for enabling the feature ?
  If the patch exists, I'd like to try it.
 
 It doesn't, afaik.

It depends on what you are looking to do.  If you just want to play
around and aren't concerned about lockups, You could undo the above
patch to re-enable the interface (although I cannot guarantee that even
this will work).  

  What is the essential problem ?
 
 There are two essential problems here.
 
 The first one is that QMP lacks true asynchronous command support (it does
 have some code for it, but it's incomplete).

I was never completely clear on the problems with the QMP async support.

 The second problem is that we must not make an existing synchronous command
 asynchronous, because this breaks the ABI.

The memory stats interface has always advertised itself as an async
command so this one shouldn't matter.  Whether those semantics actually
ever worked is another issue.

  Does some infinite loop happen ?
 
 No, the guest doesn't respond.
 
  Unfortunately, I cannot understand the cause of hanging user monitor.
 
 The balloon command needs guest cooperation (ie. it talks to the guest),
 if the guest doesn't respond the command will never return.

Isn't that only a problem for the user monitor?  For QMP you might just
never get a response to the stats request.  IIRC, the QMP monitor is
never 'suspended' in the same way that the user monitor is so I wouldn't
expect it to be susceptible to the same lockup issues.

 We don't have a precise ETA for async support, but if it depends only on
 the new error framework work, then we could have it for 0.15. If it depends
 on the full monitor redesign work, then I guess we'll two releases.

I can't provide a definitive answer to this unfortunately since I
haven't looked in awhile (nor am I privy to the details of the full
monitor redesign proposal.

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][PATCH v5 07/21] virtagent: add va.getfile RPC

2010-12-09 Thread Adam Litke
On Wed, 2010-12-08 at 20:19 +0100, Jes Sorensen wrote:
 On 12/07/10 17:00, Adam Litke wrote:
  Hi Jes, you raise some good points and pitfalls with the current getfile
  approach.  I've been thinking about an alternative and am wondering what
  you (and others) think...
  
  First off, I think we should switch to a copyfile() API that allows us
  to avoid presenting the file contents to the user.  Neither the human
  monitor nor the control monitor are designed to be file pagers.  Let the
  user decide how to consume the data once it has been transferred.  Now
  we don't need to care if the file is binary or text.
  
  The virtagent RPC protocol is bi-directional and supports asynchronous
  events.  We can use these to implement a better copyfile RPC that can
  transfer larger files without wasting memory.  The host issues a
  copyfile(guest-path, host-path) RPC.  The immediate result of this
  call will indicate whether the guest is able to initiate the transfer.
  The guest will generate a series of events (offset, size, payload)
  until the entire contents has been transferred.  The host and guest
  could negotiate the chunk size if necessary.  Once the transfer is
  complete, the guest sends a final event to indicate this (file-size,
  0).
  
  This interface could be integrated into the monitor with a pair of
  commands (va_copyfile and info va_copyfile), the former used to initiate
  transfers and the latter to check on the status.
  
  Thoughts on this?
 
 Hi Adam,
 
 This sounds a lot safer than the current approach. Intuitively I would
 think it should be the host controlling the copy, but otherwise it
 sounds good. Or is there a reason why the guest should control it?

Actually, a host-controlled interface would be both simpler to implement
(on both ends) and would concentrate control in the host (which is what
we probably want anyway).  

 I think it is vital that we do it in a way where a copy cannot blow
 QEMU's memory consumption out of the water, but the approach you suggest
 seems to take care of that.
 
 Cheers,
 Jes
 

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][PATCH v5 07/21] virtagent: add va.getfile RPC

2010-12-07 Thread Adam Litke
Hi Jes, you raise some good points and pitfalls with the current getfile
approach.  I've been thinking about an alternative and am wondering what
you (and others) think...

First off, I think we should switch to a copyfile() API that allows us
to avoid presenting the file contents to the user.  Neither the human
monitor nor the control monitor are designed to be file pagers.  Let the
user decide how to consume the data once it has been transferred.  Now
we don't need to care if the file is binary or text.

The virtagent RPC protocol is bi-directional and supports asynchronous
events.  We can use these to implement a better copyfile RPC that can
transfer larger files without wasting memory.  The host issues a
copyfile(guest-path, host-path) RPC.  The immediate result of this
call will indicate whether the guest is able to initiate the transfer.
The guest will generate a series of events (offset, size, payload)
until the entire contents has been transferred.  The host and guest
could negotiate the chunk size if necessary.  Once the transfer is
complete, the guest sends a final event to indicate this (file-size,
0).

This interface could be integrated into the monitor with a pair of
commands (va_copyfile and info va_copyfile), the former used to initiate
transfers and the latter to check on the status.

Thoughts on this?

On Tue, 2010-12-07 at 15:18 +0100, Jes Sorensen wrote:
 On 12/03/10 19:03, Michael Roth wrote:
  Add RPC to retrieve a guest file. This interface is intended
  for smaller reads like peeking at logs and /proc and such.
 
 I think you need to redesign your approach here. see below.
 
 In 06/21 you had:
 
 +#define VA_GETFILE_MAX 1  30
 
  +while ((ret = read(fd, buf, VA_FILEBUF_LEN))  0) {
  +file_contents = qemu_realloc(file_contents, count + 
  VA_FILEBUF_LEN);
  +memcpy(file_contents + count, buf, ret);
 
 UH OH!
 
 realloc will do a malloc and a memcpy of the data, this is going to turn
 into a really nasty malloc memcpy loop if someone tries to transfer a
 large file using this method. You could end up with almost 4GB of
 parallel allocations for a guest that might have been configured as a
 1GB guest. This would allow the guest to effectively blow the expected
 memory consumption out of the water. It's not exactly going to be fast
 either :(
 
 Maybe use a tmp file, and write data out to that as you receive it to
 avoid the malloc ballooning.
 
 Jes

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][PATCH v5 03/21] virtagent: common code for managing client/server rpc jobs

2010-12-06 Thread Adam Litke
On Fri, 2010-12-03 at 12:03 -0600, Michael Roth wrote:
 +/* create new client job and then put it on the queue. this can be
 + * called externally from virtagent. Since there can only be one virtagent
 + * instance we access state via an object-scoped global rather than pass
 + * it around.
 + *
 + * if this is successful virtagent will handle cleanup of req_xml after
 + * making the appropriate callbacks, otherwise callee should handle it
 + */

Explain please. Do you mean caller should handle it? Are you trying to
say that this function, when successful, steals the reference to
req_xml?

 +int va_client_job_add(xmlrpc_mem_block *req_xml, VAClientCallback *cb,
 +  MonitorCompletion *mon_cb, void *mon_data)
 +{
 +int ret;
 +VAClientJob *client_job;
 +TRACE(called);
 +
 +client_job = va_client_job_new(req_xml, cb, mon_cb, mon_data);
 +if (client_job == NULL) {
 +return -EINVAL;
 +}
 +
 +ret = va_push_client_job(client_job);
 +if (ret != 0) {
 +LOG(error adding client to queue: %s, strerror(ret));
 +qemu_free(client_job);
 +return ret;
 +}
 +
 +return 0;
 +}

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][PATCH v5 03/21] virtagent: common code for managing client/server rpc jobs

2010-12-06 Thread Adam Litke
On Fri, 2010-12-03 at 12:03 -0600, Michael Roth wrote: 
 +/* create new client job and then put it on the queue. this can be
 + * called externally from virtagent. Since there can only be one virtagent
 + * instance we access state via an object-scoped global rather than pass
 + * it around.
 + *
 + * if this is successful virtagent will handle cleanup of req_xml after
 + * making the appropriate callbacks, otherwise callee should handle it
 + */

Explain please. Do you mean caller should handle it? Are you trying to
say that this function, when successful, steals the reference to
req_xml?

 +int va_client_job_add(xmlrpc_mem_block *req_xml, VAClientCallback *cb,
 +  MonitorCompletion *mon_cb, void *mon_data)
 +{
 +int ret;
 +VAClientJob *client_job;
 +TRACE(called);
 +
 +client_job = va_client_job_new(req_xml, cb, mon_cb, mon_data);
 +if (client_job == NULL) {
 +return -EINVAL;
 +}
 +
 +ret = va_push_client_job(client_job);
 +if (ret != 0) {
 +LOG(error adding client to queue: %s, strerror(ret));
 +qemu_free(client_job);
 +return ret;
 +}
 +
 +return 0;
 +}
 +
 +/* create new server job and then put it on the queue in wait state
 + * this should only be called from within our read handler callback
 + */

Since this function is only 4 lines and has only one valid call site.
perhaps its better to fold it directly into the read handler callback.

 +static int va_server_job_add(xmlrpc_mem_block *resp_xml)
 +{
 +VAServerJob *server_job;
 +TRACE(called);
 +
 +server_job = va_server_job_new(resp_xml);
 +assert(server_job != NULL);
 +va_push_server_job(server_job);
 +return 0;
 +}

-- 
Thanks,
Adam





[Qemu-devel] Re: [RFC][PATCH v5 07/21] virtagent: add va.getfile RPC

2010-12-06 Thread Adam Litke
On Fri, 2010-12-03 at 12:03 -0600, Michael Roth wrote:
 Add RPC to retrieve a guest file. This interface is intended
 for smaller reads like peeking at logs and /proc and such.
 
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtagent-server.c |   59 
 
  1 files changed, 59 insertions(+), 0 deletions(-)
 
 diff --git a/virtagent-server.c b/virtagent-server.c
 index 36fbae2..a430b58 100644
 --- a/virtagent-server.c
 +++ b/virtagent-server.c
 @@ -26,12 +26,71 @@ static bool va_enable_syslog = false; /* enable 
 syslog'ing of RPCs */
  syslog(LOG_INFO, virtagent, %s, msg_buf); \
  } while(0)
 
 +/* RPC functions common to guest/host daemons */
 +
 +/* va_getfile(): return file contents
 + * rpc return values:
 + *   - base64-encoded file contents
 + */
 +static xmlrpc_value *va_getfile(xmlrpc_env *env,
 +xmlrpc_value *param,
 +void *user_data)
 +{
 +const char *path;
 +char *file_contents = NULL;
 +char buf[VA_FILEBUF_LEN];
 +int fd, ret, count = 0;
 +xmlrpc_value *result = NULL;
 +
 +/* parse argument array */
 +xmlrpc_decompose_value(env, param, (s), path);
 +if (env-fault_occurred) {
 +return NULL;
 +}
 +
 +SLOG(va_getfile(), path:%s, path);

I would log all calls (even those that would fail
xmlrpc_decompose_value(). You might catch someone trying to do something
tricky.

 +
 +fd = open(path, O_RDONLY);
 +if (fd == -1) {
 +LOG(open failed: %s, strerror(errno));
 +xmlrpc_faultf(env, open failed: %s, strerror(errno));
 +return NULL;
 +}
 +
 +while ((ret = read(fd, buf, VA_FILEBUF_LEN))  0) {
 +file_contents = qemu_realloc(file_contents, count + VA_FILEBUF_LEN);
 +memcpy(file_contents + count, buf, ret);
 +count += ret;
 +if (count  VA_GETFILE_MAX) {
 +xmlrpc_faultf(env, max file size (%d bytes) exceeded,
 +  VA_GETFILE_MAX);
 +goto EXIT_CLOSE_BAD;
 +}
 +}
 +if (ret == -1) {
 +LOG(read failed: %s, strerror(errno));
 +xmlrpc_faultf(env, read failed: %s, strerror(errno));
 +goto EXIT_CLOSE_BAD;
 +}
 +
 +result = xmlrpc_build_value(env, 6, file_contents, count);
 +
 +EXIT_CLOSE_BAD:
 +if (file_contents) {
 +qemu_free(file_contents);
 +}
 +close(fd);
 +return result;
 +}
 +
  typedef struct RPCFunction {
  xmlrpc_value *(*func)(xmlrpc_env *env, xmlrpc_value *param, void 
 *unused);
  const char *func_name;
  } RPCFunction;
 
  static RPCFunction guest_functions[] = {
 +{ .func = va_getfile,
 +  .func_name = va.getfile },
  { NULL, NULL }
  };
  static RPCFunction host_functions[] = {

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][PATCH v5 08/21] virtagent: add agent_viewfile qmp/hmp command

2010-12-06 Thread Adam Litke
On Fri, 2010-12-03 at 12:03 -0600, Michael Roth wrote:
 Utilize the getfile RPC to provide a means to view text files in the
 guest. Getfile can handle binary files as well but we don't advertise
 that here due to the special handling requiring to store it and provide
 it back to the user (base64 encoding it for instance). Hence the
 otherwise confusing viewfile as opposed to getfile.

What happens to the monitor if you use this to view a binary file?
Retrieving binary files progmatically using the QMP interface is a valid
use case right?  If so, I don't think it makes sense to introduce
confusion by renaming the rpc function from getfile to viewfile when we
are just exposing the getfile interface.

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][PATCH v5 04/21] virtagent: transport definitions and job callbacks

2010-12-06 Thread Adam Litke
On Fri, 2010-12-03 at 12:03 -0600, Michael Roth wrote:
 +static void va_http_send_handler(void *opaque)
 +{
 +VAHTState *s = va_state-send_state;
 +enum va_http_status http_status;
 +int fd = va_state-fd;
 +int ret;
 +
 +TRACE(called, fd: %d, fd);
 +
 +switch (s-state) {

Why is there a VA_SEND_START state when it always falls through to
VA_SEND_HDR?  Is there any difference between these?

 +case VA_SEND_START:
 +s-state = VA_SEND_HDR;
 +case VA_SEND_HDR:
 +do {
 +ret = write(fd, s-hdr + s-hdr_pos, s-hdr_len - s-hdr_pos);
 +if (ret = 0) {
 +break;
 +}
 +s-hdr_pos += ret;
 +} while (s-hdr_pos  s-hdr_len);
 +if (ret == -1) {
 +if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
 +return;
 +} else {
 +LOG(error writing header: %s, strerror(errno));
 +goto out_bad;
 +}
 +} else if (ret == 0) {
 +LOG(connected closed unexpectedly);
 +goto out_bad;
 +} else {
 +s-state = VA_SEND_BODY;
 +}
 +case VA_SEND_BODY:
 +do {
 +ret = write(fd, s-content + s-content_pos,
 +s-content_len - s-content_pos);
 +if (ret = 0) {
 +break;
 +}
 +s-content_pos += ret;
 +} while (s-content_pos  s-content_len);
 +if (ret == -1) {
 +if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) {
 +return;
 +} else {
 +LOG(error writing content: %s, strerror(errno));
 +goto out_bad;
 +}
 +} else if (ret == 0) {
 +LOG(connected closed unexpectedly);
 +goto out_bad;
 +} else {
 +http_status = VA_HTTP_STATUS_OK;
 +goto out;
 +}
 +default:
 +LOG(unknown state);
 +goto out_bad;
 +}
 +
 +out_bad:
 +http_status = VA_HTTP_STATUS_ERROR;
 +out:
 +s-send_cb(http_status, s-content, s-content_len);
 +qemu_set_fd_handler(fd, va_http_read_handler, NULL, NULL);
 +}

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][PATCH v5 09/21] virtagent: add va.getdmesg RPC

2010-12-06 Thread Adam Litke
On Fri, 2010-12-03 at 12:03 -0600, Michael Roth wrote:
 +/* va_getdmesg(): return dmesg output
 + * rpc return values:
 + *   - dmesg output as a string
 + */
 +static xmlrpc_value *va_getdmesg(xmlrpc_env *env,
 +  xmlrpc_value *param,
 +  void *user_data)
 +{
 +char *dmesg_buf = NULL, cmd[256];
 +int ret;
 +xmlrpc_value *result = NULL;
 +FILE *pipe;
 +
 +SLOG(va_getdmesg());
 +
 +dmesg_buf = qemu_mallocz(VA_DMESG_LEN + 2048);
 +sprintf(cmd, dmesg -s %d, VA_DMESG_LEN);
 +
 +pipe = popen(cmd, r);
 +if (pipe == NULL) {
 +LOG(popen failed: %s, strerror(errno));
 +xmlrpc_faultf(env, popen failed: %s, strerror(errno));
 +goto EXIT_NOCLOSE;
 +}
 +
 +ret = fread(dmesg_buf, sizeof(char), VA_DMESG_LEN, pipe);
 +if (!ferror(pipe)) {
 +dmesg_buf[ret] = '\0';
 +TRACE(dmesg:\n%s, dmesg_buf);
 +result = xmlrpc_build_value(env, s, dmesg_buf);
 +} else {
 +LOG(fread failed);
 +xmlrpc_faultf(env, popen failed: %s, strerror(errno));
 +}
 +
 +pclose(pipe);
 +EXIT_NOCLOSE:

I think goto labels are supposed to be lower-case.

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][PATCH v5 19/21] virtagent: add virtagent guest daemon

2010-12-06 Thread Adam Litke
On Fri, 2010-12-03 at 12:03 -0600, Michael Roth wrote:
 +static void usage(const char *cmd)
 +{
 +printf(
 +Usage: %s -c channel_opts\n
 +QEMU virtagent guest agent\n
 +\n
 +  -c, --channel channel options of the form:\n
 +method:addr:port[:channel_id]\n
 +  -v, --verbose display extra debugging information\n
 +  -h, --helpdisplay this help and exit\n
 +\n
 +  channels are used to establish a data connection between 2 end-points 
 in\n
 +  the host or the guest (connection method specified by method).\n
 +  The positional parameters for channels are:\n
 +\n
 +  method: one of unix-connect, unix-listen, tcp-connect, 
 tcp-listen,\n
 +virtserial-open\n

The usage information doesn't agree with the parameters the program
accepts (virtserial-open - virtio-serial)

 +  addr:   path of unix socket or virtserial port, or IP of host, to\n
 +connect/bind to\n
 +  port:   port to bind/connect to, or '-' if addr is a path\n
 +\n
 +Report bugs to mdr...@linux.vnet.ibm.com\n
 +, cmd);
 +}

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 02/15] virtproxy: qemu-vp, standalone daemon skeleton

2010-11-05 Thread Adam Litke
On Thu, 2010-11-04 at 08:57 -0500, Michael Roth wrote:
  This resembles vl.c's main_loop_wait() but I can see why you might want
  your own.  There is opportunity for sharing the select logic and ioh
  callbacks but I think that could be addressed later.
 
 
 Yup these are all basically copy/pastes from vl.c. It feels a bit dirty, 
 but I modeled things after the other qemu tools like qemu-nbd/qemu-io, 
 which don't link against vl.c (and adding a target for tools to do so 
 looks like it'd be a bit hairy since vl.c touches basically everything).

 It might still make sense to share things like structs...but the ones 
 I'm stealing here are specific to reproducing the main_loop_wait logic. 
 So I guess the real question is whether main_loop_wait and friends make 
 sense to expose as a utility function of some sort, and virtproxy seems 
 to be the only use case so far.

You make a fair point about following precedent, but the thought of
dual-maintaining code into the future is not that appealing.  I guess we
could benefit from other voices on this topic.

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 13/15] virtproxy: add read handler for proxied connections

2010-11-03 Thread Adam Litke
On Wed, 2010-11-03 at 10:28 -0500, Michael Roth wrote:
 reads data from client/server connections as they become readable, then
 sends the data over the channel
 
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtproxy.c |   80 
 +++
  1 files changed, 80 insertions(+), 0 deletions(-)
 
 diff --git a/virtproxy.c b/virtproxy.c
 index 86a8e5b..f3f7f46 100644
 --- a/virtproxy.c
 +++ b/virtproxy.c
 @@ -200,6 +200,86 @@ static VPIForward *get_iforward(const VPDriver *drv, 
 const char *service_id)
  return NULL;
  }
 
 +/* read handler for proxied connections */
 +static void vp_conn_read(void *opaque)
 +{
 +VPConn *conn = opaque;
 +VPDriver *drv = conn-drv;
 +VPPacket pkt;
 +char buf[VP_CONN_DATA_LEN];
 +int fd, count, ret;
 +bool client;
 +
 +TRACE(called with opaque: %p, drv: %p, opaque, drv);
 +
 +if (conn-state != VP_STATE_CONNECTED) {
 +LOG(invalid connection state);
 +return;
 +}
 +
 +if (conn-type != VP_CONN_CLIENT  conn-type != VP_CONN_SERVER) {
 +LOG(invalid connection type);
 +return;
 +}

When either of these happen (or some of the other errors below), would
it make sense to close and clean up this connection since you're now in
an invalid state?

 +/* TODO: all fields should be explicitly set so we shouldn't
 + * need to memset. this might hurt if we beef up VPPacket size
 + */
 +memset(pkt, 0, sizeof(VPPacket));
 +pkt.magic = VP_MAGIC;
 +
 +if (conn-type == VP_CONN_CLIENT) {
 +client = true;
 +fd = conn-client_fd;
 +} else {
 +client = false;
 +fd = conn-server_fd;
 +}
 +
 +count = read(fd, buf, VP_CONN_DATA_LEN);
 +if (count == -1) {
 +LOG(read() failed: %s, strerror(errno));
 +return;
 +} else if (count == 0) {
 +/* connection closed, tell remote end to clean up */
 +TRACE(connection closed);
 +pkt.type = VP_PKT_CONTROL;
 +pkt.payload.msg.type = VP_CONTROL_CLOSE;
 +if (client) {
 +/* we're closing the client, have remote close the server conn */
 +TRACE(closing connection for client fd %d, conn-client_fd);
 +pkt.payload.msg.args.close.client_fd = -1;
 +pkt.payload.msg.args.close.server_fd = conn-server_fd;
 +} else {
 +TRACE(closing connection for server fd %d, conn-server_fd);
 +pkt.payload.msg.args.close.server_fd = -1;
 +pkt.payload.msg.args.close.client_fd = conn-client_fd;;
 +}
 +/* clean up things on our end */
 +closesocket(fd);
 +vp_set_fd_handler(fd, NULL, NULL, NULL);
 +QLIST_REMOVE(conn, next);
 +qemu_free(conn);
 +} else {
 +TRACE(data read);
 +pkt.type = client ? VP_PKT_CLIENT : VP_PKT_SERVER;
 +pkt.payload.proxied.client_fd = conn-client_fd;
 +pkt.payload.proxied.server_fd = conn-server_fd;
 +memcpy(pkt.payload.proxied.data, buf, count);
 +pkt.payload.proxied.bytes = count;
 +}
 +
 +ret = vp_send_all(drv-channel_fd, (void *)pkt, sizeof(VPPacket));
 +if (ret == -1) {
 +LOG(error sending data over channel);
 +return;
 +}
 +if (ret != sizeof(VPPacket)) {
 +TRACE(buffer full?);
 +return;
 +}
 +}
 +
  /* accept handler for communication channel
   *
   * accept()s connection to communication channel (for sockets), and sets

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 11/15] virtproxy: add vp_handle_packet()

2010-11-03 Thread Adam Litke
Description please.

On Wed, 2010-11-03 at 10:28 -0500, Michael Roth wrote:
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtproxy.c |   23 +++
  1 files changed, 23 insertions(+), 0 deletions(-)
 
 diff --git a/virtproxy.c b/virtproxy.c
 index 4f56aba..5ec4e77 100644
 --- a/virtproxy.c
 +++ b/virtproxy.c
 @@ -431,6 +431,29 @@ static int vp_handle_data_packet(void *drv, const 
 VPPacket *pkt)
  return 0;
  }
 
 +static inline int vp_handle_packet(VPDriver *drv, const VPPacket *pkt)
 +{
 +int ret;
 +
 +TRACE(called with drv: %p, drv);
 +
 +if (pkt-magic != VP_MAGIC) {
 +LOG(invalid packet magic field);
 +return -1;
 +}
 +
 +if (pkt-type == VP_PKT_CONTROL) {
 +ret = vp_handle_control_packet(drv, pkt);
 +} else if (pkt-type == VP_PKT_CLIENT || pkt-type == VP_PKT_SERVER) {
 +ret = vp_handle_data_packet(drv, pkt);
 +} else {
 +LOG(invalid packet type);
 +return -1;
 +}
 +
 +return ret;
 +}
 +
  /* read handler for communication channel
   *
   * de-multiplexes data coming in over the channel. for control messages

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 09/15] virtproxy: add handler for data packets

2010-11-03 Thread Adam Litke
On Wed, 2010-11-03 at 10:28 -0500, Michael Roth wrote:
 Process VPPackets coming in from channel and send them to the
 appropriate server/client connections.
 
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtproxy.c |   42 ++
  1 files changed, 42 insertions(+), 0 deletions(-)
 
 diff --git a/virtproxy.c b/virtproxy.c
 index 6c3611b..57ab2b0 100644
 --- a/virtproxy.c
 +++ b/virtproxy.c
 @@ -235,6 +235,48 @@ static void vp_channel_accept(void *opaque)
  vp_set_fd_handler(drv-listen_fd, NULL, NULL, NULL);
  }
 
 +/* handle data packets
 + *
 + * process VPPackets containing data and send them to the corresponding
 + * FDs
 + */
 +static int vp_handle_data_packet(void *drv, const VPPacket *pkt)
 +{
 +int fd, ret;
 +
 +TRACE(called with drv: %p, drv);
 +
 +if (pkt-type == VP_PKT_CLIENT) {
 +TRACE(recieved client packet, client fd: %d, server fd: %d,
 +  pkt-payload.proxied.client_fd, 
 pkt-payload.proxied.server_fd);
 +fd = pkt-payload.proxied.server_fd;
 +} else if (pkt-type == VP_PKT_SERVER) {
 +TRACE(recieved server packet, client fd: %d, server fd: %d,
 +  pkt-payload.proxied.client_fd, 
 pkt-payload.proxied.server_fd);
 +fd = pkt-payload.proxied.client_fd;
 +} else {
 +TRACE(unknown packet type);
 +return -1;
 +}
 +
 +/* TODO: proxied in non-blocking mode can causes us to spin here
 + * for slow servers/clients. need to use write()'s and maintain
 + * a per-conn write queue that we clear out before sending any
 + * more data to the fd
 + */

Hmm, so a guest can cause a denial of service in the host qemu process?
Are you working on adding the write queues?

 +ret = vp_send_all(fd, (void *)pkt-payload.proxied.data,
 +pkt-payload.proxied.bytes);
 +if (ret == -1) {
 +LOG(error sending data over channel);
 +return -1;
 +} else if (ret != pkt-payload.proxied.bytes) {
 +TRACE(buffer full?);

This can happen?  Does this bring the whole connection down?  The whole
virtproxy instance?

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 12/15] virtproxy: interfaces to set/remove VPIForwards

2010-11-03 Thread Adam Litke
Description please.

On Wed, 2010-11-03 at 10:28 -0500, Michael Roth wrote:
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtproxy.c |   59 
 +++
  virtproxy.h |2 ++
  2 files changed, 61 insertions(+), 0 deletions(-)
 
 diff --git a/virtproxy.c b/virtproxy.c
 index 5ec4e77..86a8e5b 100644
 --- a/virtproxy.c
 +++ b/virtproxy.c
 @@ -658,3 +658,62 @@ int vp_set_oforward(VPDriver *drv, int fd, const char 
 *service_id)
 
  return 0;
  }
 +
 +/* add/modify a service_id - net/unix socket mapping
 + *
 + * service_id is a user-defined id for the service. this is what the
 + * remote end will use to proxy connections to a specific service on
 + * our end.
 + *
 + * if port is NULL, addr is the address of the net socket the
 + * service is running on. otherwise, addr is the path to the unix socket
 + * the service is running on.
 + *
 + * if port AND addr are NULL, find and remove the current iforward
 + * for this service_id if it exists.
 + *
 + * ipv6 is a bool denoting whether or not to use ipv6
 + */
 +int vp_set_iforward(VPDriver *drv, const char *service_id, const char *addr,
 +const char *port, bool ipv6)
 +{
 +VPIForward *f = get_iforward(drv, service_id);
 +
 +if (addr == NULL  port == NULL) {
 +if (f != NULL) {
 +qemu_opts_del(f-socket_opts);
 +QLIST_REMOVE(f, next);
 +qemu_free(f);
 +}
 +return 0;
 +}
 +
 +if (f == NULL) {
 +f = qemu_mallocz(sizeof(VPIForward));
 +f-drv = drv;
 +strncpy(f-service_id, service_id, VP_SERVICE_ID_LEN);
 +QLIST_INSERT_HEAD(drv-iforwards, f, next);
 +} else {
 +qemu_opts_del(f-socket_opts);
 +}
 +
 +/* stick socket-related options in a QemuOpts so we can
 + * utilize qemu socket utility functions directly
 + */
 +f-socket_opts = qemu_opts_create(vp_socket_opts, NULL, 0);
 +if (port == NULL) {
 +/* no port given, assume unix path */
 +qemu_opt_set(f-socket_opts, path, addr);
 +} else {
 +qemu_opt_set(f-socket_opts, host, addr);
 +qemu_opt_set(f-socket_opts, port, port);
 +}
 +
 +if (ipv6) {
 +qemu_opt_set(f-socket_opts, ipv6, on);
 +} else {
 +qemu_opt_set(f-socket_opts, ipv4, on);
 +}
 +
 +return 0;
 +}
 diff --git a/virtproxy.h b/virtproxy.h
 index 39d5d40..d2522b3 100644
 --- a/virtproxy.h
 +++ b/virtproxy.h
 @@ -34,5 +34,7 @@ int vp_set_fd_handler(int fd,
  /* virtproxy interface */
  VPDriver *vp_new(int fd, bool listen);
  int vp_set_oforward(VPDriver *drv, int fd, const char *service_id);
 +int vp_set_iforward(VPDriver *drv, const char *service_id, const char *addr,
 +const char *port, bool ipv6);
 
  #endif /* VIRTPROXY_H */

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 04/15] virtproxy: list look-up functions conns/oforwards/iforwards

2010-11-03 Thread Adam Litke
You should describe your changes a little bit more on the top here.
Looks good otherwise.

On Wed, 2010-11-03 at 10:27 -0500, Michael Roth wrote:
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtproxy.c |   44 
  1 files changed, 44 insertions(+), 0 deletions(-)
 
 diff --git a/virtproxy.c b/virtproxy.c
 index 2f8996c..fa17722 100644
 --- a/virtproxy.c
 +++ b/virtproxy.c
 @@ -149,3 +149,47 @@ static QemuOptsList vp_socket_opts = {
  { /* end if list */ }
  },
  };
 +
 +/* get VPConn by fd, client denotes whether to look for client or server */
 +static VPConn *get_conn(const VPDriver *drv, int fd, bool client)
 +{
 +VPConn *c = NULL;
 +int cur_fd;
 +
 +QLIST_FOREACH(c, drv-conns, next) {
 +cur_fd = client ? c-client_fd : c-server_fd;
 +if (cur_fd == fd) {
 +return c;
 +}
 +}
 +
 +return NULL;
 +}
 +
 +/* get VPOForward by service_id */
 +static VPOForward *get_oforward(const VPDriver *drv, const char *service_id)
 +{
 +VPOForward *f = NULL;
 +
 +QLIST_FOREACH(f, drv-oforwards, next) {
 +if (strncmp(f-service_id, service_id, VP_SERVICE_ID_LEN) == 0) {
 +return f;
 +}
 +}
 +
 +return NULL;
 +}
 +
 +/* get VPIForward by service_id */
 +static VPIForward *get_iforward(const VPDriver *drv, const char *service_id)
 +{
 +VPIForward *f = NULL;
 +
 +QLIST_FOREACH(f, drv-iforwards, next) {
 +if (strncmp(f-service_id, service_id, VP_SERVICE_ID_LEN) == 0) {
 +return f;
 +}
 +}
 +
 +return NULL;
 +}

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 00/15] virtproxy: host/guest communication layer

2010-11-03 Thread Adam Litke
You've got a lot of objects interacting with one another in virtproxy.
I think it would help other reviewers if you could describe the
relationships between the entities such as: VPDriver, VPConn, VPChannel,
VPIForward, VPOForward, etc.  This patch series is really wiring a lot
of things together.  Your documentation could end up as a nice ASCII art
diagram and associated commentary at the top of virtproxy.c.

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 03/15] virtproxy: add debug functions for virtproxy core

2010-11-03 Thread Adam Litke
Alas, this code exists in several other places too... I guess you can't
be responsible for cleaning up all of qemu :)

On Wed, 2010-11-03 at 10:27 -0500, Michael Roth wrote:
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtproxy.c |   17 +
  1 files changed, 17 insertions(+), 0 deletions(-)
 
 diff --git a/virtproxy.c b/virtproxy.c
 index f30b859..2f8996c 100644
 --- a/virtproxy.c
 +++ b/virtproxy.c
 @@ -13,6 +13,23 @@
 
  #include virtproxy.h
 
 +#define DEBUG_VP
 +
 +#ifdef DEBUG_VP
 +#define TRACE(msg, ...) do { \
 +fprintf(stderr, %s:%s():L%d:  msg \n, \
 +__FILE__, __FUNCTION__, __LINE__, ## __VA_ARGS__); \
 +} while(0)
 +#else
 +#define TRACE(msg, ...) \
 +do { } while (0)
 +#endif
 +
 +#define LOG(msg, ...) do { \
 +fprintf(stderr, %s:%s():  msg \n, \
 +__FILE__, __FUNCTION__, ## __VA_ARGS__); \
 +} while(0)

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 06/15] virtproxy: add read handler for communication channel

2010-11-03 Thread Adam Litke
On Wed, 2010-11-03 at 10:28 -0500, Michael Roth wrote:
 Handle data coming in over the channel as VPPackets: Process control
 messages and forward data from remote client/server connections to the
 appropriate server/client FD on our end.
 
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtproxy.c |   83 
 +++
  1 files changed, 83 insertions(+), 0 deletions(-)
 
 diff --git a/virtproxy.c b/virtproxy.c
 index 20532c2..c9c3022 100644
 --- a/virtproxy.c
 +++ b/virtproxy.c
 @@ -33,6 +33,7 @@
  #define VP_SERVICE_ID_LEN 32/* max length of service id string */
  #define VP_PKT_DATA_LEN 1024/* max proxied bytes per VPPacket */
  #define VP_CONN_DATA_LEN 1024   /* max bytes conns can send at a time */
 +#define VP_CHAN_DATA_LEN 4096   /* max bytes channel can send at a time */
  #define VP_MAGIC 0x1F374059
 
  /* listening fd, one for each service we're forwarding to remote end */
 @@ -150,6 +151,8 @@ static QemuOptsList vp_socket_opts = {
  },
  };
 
 +static void vp_channel_read(void *opaque);

Try to get rid of these forward declarations.  If you really need them,
consider adding a virtproxy-internal.h.


 @@ -230,3 +233,83 @@ static void vp_channel_accept(void *opaque)
  /* dont accept anymore connections until channel_fd is closed */
  vp_set_fd_handler(drv-listen_fd, NULL, NULL, NULL);
  }
 +
 +/* read handler for communication channel
 + *
 + * de-multiplexes data coming in over the channel. for control messages
 + * we process them here, for data destined for a service or client we
 + * send it to the appropriate FD.
 + */
 +static void vp_channel_read(void *opaque)
 +{
 +VPDriver *drv = opaque;
 +VPPacket pkt;
 +int count, ret, buf_offset;
 +char buf[VP_CHAN_DATA_LEN];
 +char *pkt_ptr, *buf_ptr;
 +
 +TRACE(called with opaque: %p, drv);
 +
 +count = read(drv-channel_fd, buf, sizeof(buf));
 +
 +if (count == -1) {
 +LOG(read() failed: %s, strerror(errno));
 +return;
 +} else if (count == 0) {
 +/* TODO: channel closed, this probably shouldn't happen for 
 guest-side
 + * serial/virtio-serial connections, but need to confirm and consider
 + * what should happen in this case. as it stands this virtproxy 
 instance
 + * is basically defunct at this point, same goes for client 
 instances
 + * of virtproxy where the remote end has hung-up.
 + */
 +LOG(channel connection closed);
 +vp_set_fd_handler(drv-channel_fd, NULL, NULL, drv);
 +drv-channel_fd = -1;
 +if (drv-listen_fd) {
 +vp_set_fd_handler(drv-listen_fd, vp_channel_accept, NULL, drv);
 +}
 +/* TODO: should close/remove/delete all existing VPConns here */

Looks like you have a little work TODO here still.  Perhaps you could
add some reset/init functions that would make handling this easier.  The
ability to reset state at both the channel and VPConn levels should give
you the ability to handle errors more gracefully in other places where
you currently just log them.

 +}
 +
 +if (drv-buflen + count = sizeof(VPPacket)) {
 +TRACE(initial packet, drv-buflen: %d, drv-buflen);
 +pkt_ptr = (char *)pkt;
 +memcpy(pkt_ptr, drv-buf, drv-buflen);

Can drv-buflen ever be  sizeof(VPPacket) ?  You might consider adding
an assert(drv-buflen  sizeof(VPPacket)) unless it's mathematically
impossible.

 +pkt_ptr += drv-buflen;
 +memcpy(pkt_ptr, buf, sizeof(VPPacket) - drv-buflen);
 +/* handle first packet */
 +ret = vp_handle_packet(drv, pkt);
 +if (ret != 0) {
 +LOG(error handling packet);
 +}
 +/* handle the rest of the buffer */
 +buf_offset = sizeof(VPPacket) - drv-buflen;
 +drv-buflen = 0;
 +buf_ptr = buf + buf_offset;
 +count -= buf_offset;
 +while (count  0) {
 +if (count = sizeof(VPPacket)) {
 +/* handle full packet */
 +TRACE(additional packet, drv-buflen: %d, drv-buflen);
 +memcpy((void *)pkt, buf_ptr, sizeof(VPPacket));
 +ret = vp_handle_packet(drv, pkt);
 +if (ret != 0) {
 +LOG(error handling packet);
 +}
 +count -= sizeof(VPPacket);
 +buf_ptr += sizeof(VPPacket);
 +} else {
 +/* buffer the remainder */
 +TRACE(buffering packet);
 +memcpy(drv-buf, buf_ptr, count);
 +drv-buflen = count;
 +break;
 +}
 +}
 +} else {
 +/* haven't got a full VPPacket yet, buffer for later */
 +buf_ptr = drv-buf + drv-buflen;
 +memcpy(buf_ptr, buf, count);
 +drv-buflen += count;

With this algorithm you can hold some data hostage indefinitely.  You
either need to send out smaller packets of 

[Qemu-devel] Re: [RFC][RESEND][PATCH v1 02/15] virtproxy: qemu-vp, standalone daemon skeleton

2010-11-03 Thread Adam Litke
On Wed, 2010-11-03 at 10:27 -0500, Michael Roth wrote:
 +/* mirror qemu I/O-related code for standalone daemon */
 +typedef struct IOHandlerRecord {
 +int fd;
 +IOCanReadHandler *fd_read_poll;
 +IOHandler *fd_read;
 +IOHandler *fd_write;
 +int deleted;
 +void *opaque;
 +/* temporary data */
 +struct pollfd *ufd;
 +QLIST_ENTRY(IOHandlerRecord) next;
 +} IOHandlerRecord;

As you say, this is exactly the same structure as defined in vl.c.  If
you copy and paste code for a new feature you should be looking for a
way to share it instead.  Why not create a separate patch that defines
this structure in vl.h which can then be included by vl.c and virtproxy
code.

 +static QLIST_HEAD(, IOHandlerRecord) io_handlers =
 +QLIST_HEAD_INITIALIZER(io_handlers);
 +
 +int vp_set_fd_handler2(int fd,
 + IOCanReadHandler *fd_read_poll,
 + IOHandler *fd_read,
 + IOHandler *fd_write,
 + void *opaque)
 +{
 +IOHandlerRecord *ioh;
 +
 +if (!fd_read  !fd_write) {
 +QLIST_FOREACH(ioh, io_handlers, next) {
 +if (ioh-fd == fd) {
 +ioh-deleted = 1;
 +break;
 +}
 +}
 +} else {
 +QLIST_FOREACH(ioh, io_handlers, next) {
 +if (ioh-fd == fd)
 +goto found;
 +}
 +ioh = qemu_mallocz(sizeof(IOHandlerRecord));
 +QLIST_INSERT_HEAD(io_handlers, ioh, next);
 +found:
 +ioh-fd = fd;
 +ioh-fd_read_poll = fd_read_poll;
 +ioh-fd_read = fd_read;
 +ioh-fd_write = fd_write;
 +ioh-opaque = opaque;
 +ioh-deleted = 0;
 +}
 +return 0;
 +}

Copied from vl.c -- export it instead.

 +int vp_set_fd_handler(int fd,
 +IOHandler *fd_read,
 +IOHandler *fd_write,
 +void *opaque)
 +{
 +return vp_set_fd_handler2(fd, NULL, fd_read, fd_write, opaque);
 +}
 +
 +int vp_send_all(int fd, const void *buf, int len1)
 +{
 +int ret, len;
 +
 +len = len1;
 +while (len  0) {
 +ret = write(fd, buf, len);
 +if (ret  0) {
 +if (errno != EINTR  errno != EAGAIN) {
 +warn(write() failed);
 +return -1;
 +}
 +} else if (ret == 0) {
 +break;
 +} else {
 +buf += ret;
 +len -= ret;
 +}
 +}
 +return len1 - len;
 +}

Copied from qemu-char.c -- Share please.

 +static void main_loop_wait(int nonblocking)
 +{
 +IOHandlerRecord *ioh;
 +fd_set rfds, wfds, xfds;
 +int ret, nfds;
 +struct timeval tv;
 +int timeout = 1000;
 +
 +if (nonblocking) {
 +timeout = 0;
 +}
 +
 +/* poll any events */
 +nfds = -1;
 +FD_ZERO(rfds);
 +FD_ZERO(wfds);
 +FD_ZERO(xfds);
 +QLIST_FOREACH(ioh, io_handlers, next) {
 +if (ioh-deleted)
 +continue;
 +if (ioh-fd_read 
 +(!ioh-fd_read_poll ||
 + ioh-fd_read_poll(ioh-opaque) != 0)) {
 +FD_SET(ioh-fd, rfds);
 +if (ioh-fd  nfds)
 +nfds = ioh-fd;
 +}
 +if (ioh-fd_write) {
 +FD_SET(ioh-fd, wfds);
 +if (ioh-fd  nfds)
 +nfds = ioh-fd;
 +}
 +}
 +
 +tv.tv_sec = timeout / 1000;
 +tv.tv_usec = (timeout % 1000) * 1000;
 +
 +ret = select(nfds + 1, rfds, wfds, xfds, tv);
 +
 +if (ret  0) {
 +IOHandlerRecord *pioh;
 +
 +QLIST_FOREACH_SAFE(ioh, io_handlers, next, pioh) {
 +if (ioh-deleted) {
 +QLIST_REMOVE(ioh, next);
 +qemu_free(ioh);
 +continue;
 +}
 +if (ioh-fd_read  FD_ISSET(ioh-fd, rfds)) {
 +ioh-fd_read(ioh-opaque);
 +}
 +if (ioh-fd_write  FD_ISSET(ioh-fd, wfds)) {
 +ioh-fd_write(ioh-opaque);
 +}
 +}
 +}
 +

This resembles vl.c's main_loop_wait() but I can see why you might want
your own.  There is opportunity for sharing the select logic and ioh
callbacks but I think that could be addressed later.

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 01/15] virtproxy: base data structures and constants

2010-11-03 Thread Adam Litke
On Wed, 2010-11-03 at 10:27 -0500, Michael Roth wrote:
 +static QemuOptsList vp_socket_opts = {
 +.name = vp_socket_opts,
 +.head = QTAILQ_HEAD_INITIALIZER(vp_socket_opts.head),
 +.desc = {
 +{
 +.name = path,
 +.type = QEMU_OPT_STRING,
 +},{
 +.name = host,
 +.type = QEMU_OPT_STRING,
 +},{
 +.name = port,
 +.type = QEMU_OPT_STRING,
 +},{
 +.name = ipv4,
 +.type = QEMU_OPT_BOOL,
 +},{
 +.name = ipv6,
 +.type = QEMU_OPT_BOOL,
 +},
 +{ /* end if list */ }

end of list

 +/* wrappers for s/vp/qemu/ functions we need */
 +int vp_send_all(int fd, const void *buf, int len1);
 +int vp_set_fd_handler2(int fd,
 + IOCanReadHandler *fd_read_poll,
 + IOHandler *fd_read,
 + IOHandler *fd_write,
 + void *opaque);
 +int vp_set_fd_handler(int fd,
 +IOHandler *fd_read,
 +IOHandler *fd_write,
 +void *opaque);

Where are the corresponding definitions for these functions?  The
declarations should appear in that patch.

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 07/15] virtproxy: add vp_new() VPDriver constructor

2010-11-03 Thread Adam Litke
Be more descriptive here please.

On Wed, 2010-11-03 at 10:28 -0500, Michael Roth wrote:
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtproxy.c |   23 +++
  virtproxy.h |3 +++
  2 files changed, 26 insertions(+), 0 deletions(-)
 
 diff --git a/virtproxy.c b/virtproxy.c
 index c9c3022..cc0ac9a 100644
 --- a/virtproxy.c
 +++ b/virtproxy.c
 @@ -313,3 +313,26 @@ static void vp_channel_read(void *opaque)
  drv-buflen += count;
  }
  }
 +
 +/* create/init VPDriver object */
 +VPDriver *vp_new(int fd, bool listen)
 +{
 +VPDriver *drv = NULL;
 +
 +drv = qemu_mallocz(sizeof(VPDriver));
 +drv-listen_fd = -1;
 +drv-channel_fd = -1;
 +QLIST_INIT(drv-oforwards);
 +QLIST_INIT(drv-conns);
 +
 +if (listen) {
 +/* provided FD is to be listened on for channel connection */
 +drv-listen_fd = fd;
 +vp_set_fd_handler(drv-listen_fd, vp_channel_accept, NULL, drv);
 +} else {
 +drv-channel_fd = fd;
 +vp_set_fd_handler(drv-channel_fd, vp_channel_read, NULL, drv);
 +}
 +
 +return drv;
 +}
 diff --git a/virtproxy.h b/virtproxy.h
 index 0203421..3df1691 100644
 --- a/virtproxy.h
 +++ b/virtproxy.h
 @@ -31,4 +31,7 @@ int vp_set_fd_handler(int fd,
  IOHandler *fd_write,
  void *opaque);
 
 +/* virtproxy interface */
 +VPDriver *vp_new(int fd, bool listen);
 +
  #endif /* VIRTPROXY_H */

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 05/15] virtproxy: add accept handler for communication channel

2010-11-03 Thread Adam Litke
On Wed, 2010-11-03 at 10:28 -0500, Michael Roth wrote:
 This accept()'s connections to the socket we told virt-proxy to listen
 for the channel connection on and sets the appropriate read handler for
 the resulting FD.
 
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtproxy.c |   37 +
  1 files changed, 37 insertions(+), 0 deletions(-)
 
 diff --git a/virtproxy.c b/virtproxy.c
 index fa17722..20532c2 100644
 --- a/virtproxy.c
 +++ b/virtproxy.c
 @@ -166,6 +166,8 @@ static VPConn *get_conn(const VPDriver *drv, int fd, bool 
 client)
  return NULL;
  }
 
 +static void vp_channel_accept(void *opaque);
 +

Why the forward declaration?  Can you move the function to a different
place in the file to avoid this?


 +/* accept handler for communication channel
 + *
 + * accept()s connection to communication channel (for sockets), and sets
 + * up the read handler for resulting FD.
 + */
 +static void vp_channel_accept(void *opaque)
 +{
 +VPDriver *drv = opaque;
 +struct sockaddr_in saddr;
 +struct sockaddr *addr;
 +socklen_t len;
 +int fd;
 +
 +TRACE(called with opaque: %p, drv);
 +
 +for(;;) {
 +len = sizeof(saddr);
 +addr = (struct sockaddr *)saddr;
 +fd = qemu_accept(drv-listen_fd, addr, len);
 +
 +if (fd  0  errno != EINTR) {
 +TRACE(accept() failed);
 +return;
 +} else if (fd = 0) {
 +TRACE(accepted connection);
 +break;
 +}
 +}
 +
 +drv-channel_fd = fd;
 +vp_set_fd_handler(drv-channel_fd, vp_channel_read, NULL, drv);
 +/* dont accept anymore connections until channel_fd is closed */
 +vp_set_fd_handler(drv-listen_fd, NULL, NULL, NULL);
 +}

-- 
Thanks,
Adam




[Qemu-devel] Re: [RFC][RESEND][PATCH v1 09/15] virtproxy: add handler for data packets

2010-11-03 Thread Adam Litke
On Wed, 2010-11-03 at 10:28 -0500, Michael Roth wrote: 
 Process VPPackets coming in from channel and send them to the
 appropriate server/client connections.
 
 Signed-off-by: Michael Roth mdr...@linux.vnet.ibm.com
 ---
  virtproxy.c |   42 ++
  1 files changed, 42 insertions(+), 0 deletions(-)
 
 diff --git a/virtproxy.c b/virtproxy.c
 index 6c3611b..57ab2b0 100644
 --- a/virtproxy.c
 +++ b/virtproxy.c
 @@ -235,6 +235,48 @@ static void vp_channel_accept(void *opaque)
  vp_set_fd_handler(drv-listen_fd, NULL, NULL, NULL);
  }
 
 +/* handle data packets
 + *
 + * process VPPackets containing data and send them to the corresponding
 + * FDs
 + */
 +static int vp_handle_data_packet(void *drv, const VPPacket *pkt)
 +{
 +int fd, ret;
 +
 +TRACE(called with drv: %p, drv);
 +
 +if (pkt-type == VP_PKT_CLIENT) {
 +TRACE(recieved client packet, client fd: %d, server fd: %d,
 +  pkt-payload.proxied.client_fd, 
 pkt-payload.proxied.server_fd);
 +fd = pkt-payload.proxied.server_fd;
 +} else if (pkt-type == VP_PKT_SERVER) {
 +TRACE(recieved server packet, client fd: %d, server fd: %d,
 +  pkt-payload.proxied.client_fd, 
 pkt-payload.proxied.server_fd);
 +fd = pkt-payload.proxied.client_fd;
 +} else {
 +TRACE(unknown packet type);
 +return -1;
 +}
 +
 +/* TODO: proxied in non-blocking mode can causes us to spin here
 + * for slow servers/clients. need to use write()'s and maintain
 + * a per-conn write queue that we clear out before sending any
 + * more data to the fd
 + */

Hmm, so a guest can cause a denial of service in the host qemu process?
Are you working on adding the write queues?

 +ret = vp_send_all(fd, (void *)pkt-payload.proxied.data,
 +pkt-payload.proxied.bytes);
 +if (ret == -1) {
 +LOG(error sending data over channel);
 +return -1;
 +} else if (ret != pkt-payload.proxied.bytes) {
 +TRACE(buffer full?);

This can happen?  

 +return -1;
 +}
 +
 +return 0;
 +}
 +
  /* read handler for communication channel
   *
   * de-multiplexes data coming in over the channel. for control messages

-- 
Thanks,
Adam





Re: [Qemu-devel] [PATCH] Disable virtio-balloon memory stats interface

2010-09-14 Thread Adam Litke
On Tue, 2010-09-14 at 12:46 -0300, Luiz Capitulino wrote:
 On Tue, 14 Sep 2010 12:42:00 -0300
 Eduardo Habkost ehabk...@redhat.com wrote:
 
  I keep my suggestion: if all we need is to change the way info balloon
  behaves, then we can simply change the info balloon behavior, intead
  of changing the guest-visible machine model.
 
 Adam, what problems do you see with this solution?

I have been sufficiently convinced that the feature bits won't work
nicely in the face of migration. 

 We need to resolve this asap for 0.13.

Let's go ahead with Eduardo's patch then.

-- 
Thanks,
Adam




Re: [Qemu-devel] [PATCH] Disable virtio-balloon memory stats interface

2010-09-14 Thread Adam Litke
Ok, I can see how this will work better for the migration case.

Acked-by: Adam Litke a...@us.ibm.com

On Tue, 2010-09-14 at 11:09 -0300, Eduardo Habkost wrote:
 This field is guest-visible, won't this cause problems on migration?
 
 Isn't it better to disable it on the info balloon side, so the guest
 knows that the host may start requesting stat info eventually? I suggest
 doing this:
 
 ---
 diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
 index 1e4dfdd..92e9057 100644
 --- a/hw/virtio-balloon.c
 +++ b/hw/virtio-balloon.c
 @@ -30,6 +30,10 @@
  #include sys/mman.h
  #endif
 
 +/* Disable guest-provided stats by now (bz#623903, bz#626544) */
 +#define ENABLE_GUEST_STATS   0
 +
 +
  typedef struct VirtIOBalloon
  {
  VirtIODevice vdev;
 @@ -84,12 +88,14 @@ static QObject *get_stats_qobject(VirtIOBalloon *dev)
VIRTIO_BALLOON_PFN_SHIFT);
 
  stat_put(dict, actual, actual);
 +#if ENABLE_GUEST_STATS
  stat_put(dict, mem_swapped_in, dev-stats[VIRTIO_BALLOON_S_SWAP_IN]);
  stat_put(dict, mem_swapped_out, dev-stats[VIRTIO_BALLOON_S_SWAP_OUT]);
  stat_put(dict, major_page_faults, dev-stats[VIRTIO_BALLOON_S_MAJFLT]);
  stat_put(dict, minor_page_faults, dev-stats[VIRTIO_BALLOON_S_MINFLT]);
  stat_put(dict, free_mem, dev-stats[VIRTIO_BALLOON_S_MEMFREE]);
  stat_put(dict, total_mem, dev-stats[VIRTIO_BALLOON_S_MEMTOT]);
 +#endif
 
  return QOBJECT(dict);
  }
 @@ -215,7 +221,7 @@ static void virtio_balloon_to_target(void *opaque, 
 ram_addr_t target,
  }
  dev-stats_callback = cb;
  dev-stats_opaque_callback_data = cb_data; 
 -if (dev-vdev.guest_features  (1  VIRTIO_BALLOON_F_STATS_VQ)) {
 +if (ENABLE_GUEST_STATS  (dev-vdev.guest_features  (1  
 VIRTIO_BALLOON_F_STATS_VQ))) {
  virtqueue_push(dev-svq, dev-stats_vq_elem, 
 dev-stats_vq_offset);
  virtio_notify(dev-vdev, dev-svq);
  } else {
 

-- 
Thanks,
Adam




[Qemu-devel] [PATCH] [For 0.13] Disable virtio-balloon memory stats interface

2010-09-08 Thread Adam Litke
The addition of memory stats reporting to the virtio balloon causes
the 'info balloon' command to become asynchronous.  This is a regression
because in some cases it can hang the user monitor.

To fix this regression, the virtio balloon memory stats feature is being
disabled in qemu-0.13.

Signed-off-by: Adam Litke a...@us.ibm.com
---
 hw/virtio-balloon.c |   12 +++-
 1 files changed, 11 insertions(+), 1 deletions(-)

diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 9fe3886..269bc22 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -190,7 +190,17 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
 
 static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
 {
-f |= (1  VIRTIO_BALLOON_F_STATS_VQ);
+/*
+ * The addition of memory stats reporting to the virtio balloon causes
+ * the 'info balloon' command to become asynchronous.  This is a regression
+ * because in some cases it can hang the user monitor.
+ *
+ * To fix this regression, the virtio balloon memory stats feature is being
+ * disabled in qemu-0.13.
+ *
+ * -aglitke
+ */
+/* f |= (1  VIRTIO_BALLOON_F_STATS_VQ); */
 return f;
 }
 
-- 
1.6.1.3




[Qemu-devel] [PATCH] Disable virtio-balloon memory stats interface

2010-08-30 Thread Adam Litke
Just got back from vacation and saw this thread.  I agree with Anthony
that the best thing to do is disable the memory stats interface for
0.13.  We need to fix the underlying problems in qemu with respect to
asynchronous commands first, then we can look at re-enabling the
feature.

The virtio feature negotiation mechanism allows the feature to be
disabled by commenting out a single line of code.  This late in the 0.13
release cycle, this is the safest option.  We can refactor/remove code
in the development tree as needed.

On Mon, 2010-08-30 at 17:01 +0200, Markus Armbruster wrote:
 Anyone care to submit a patch?

Sure...

[PATCH] Disable virtio-balloon memory stats interface

The addition of memory stats reporting to the virtio balloon causes
the 'info balloon' command to become asynchronous.  This is a regression
because management tools that consume this command were not designed to
handle lost or delayed responses.

To fix this regression, the virtio balloon memory stats feature is being
disabled in qemu-0.13.

Signed-off-by: Adam Litke a...@us.ibm.com

diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 9fe3886..2d80382 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -190,7 +190,18 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
 
 static uint32_t virtio_balloon_get_features(VirtIODevice *vdev, uint32_t f)
 {
-f |= (1  VIRTIO_BALLOON_F_STATS_VQ);
+/*
+ * The addition of memory stats reporting to the virtio balloon causes
+ * the 'info balloon' command to become asynchronous.  This is a regression
+ * because management tools that consume this command were not designed to
+ * handle lost or delayed responses.
+ *
+ * To fix this regression, the virtio balloon memory stats feature is being
+ * disabled in qemu-0.13.
+ *
+ * -aglitke
+ */
+/* f |= (1  VIRTIO_BALLOON_F_STATS_VQ); */
 return f;
 }
 


-- 
Thanks,
Adam




[Qemu-devel] [PATCH] balloon: Fix overflow when reporting actual memory size

2010-03-25 Thread Adam Litke

Beginning with its introduction, the virtio balloon has had an overflow error
that causes 'info balloon' to misreport the actual memory size when the balloon
itself becomes larger than 4G.  Use a cast when converting dev-actual from
pages to kB to prevent overflows.

Before:
(qemu) info balloon
balloon: actual=5120
(qemu) balloon 1025
(qemu) info balloon
balloon: actual=1025
(qemu) balloon 1024
(qemu) info balloon
balloon: actual=5120

After:
(qemu) info balloon
balloon: actual=5120
(qemu) balloon 1025
(qemu) info balloon
balloon: actual=1025
(qemu) balloon 1024
(qemu) info balloon
balloon: actual=1024

Signed-off-by: Adam Litke a...@us.ibm.com
---
hw/virtio-balloon.c |3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 086d9d1..6eedab1 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -78,7 +78,8 @@ static void stat_put(QDict *dict, const char *label, uint64_t 
val)
static QObject *get_stats_qobject(VirtIOBalloon *dev)
{
QDict *dict = qdict_new();
-uint32_t actual = ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+uint64_t actual = ram_size - ((uint64_t) dev-actual 
+  VIRTIO_BALLOON_PFN_SHIFT);

stat_put(dict, actual, actual);
stat_put(dict, mem_swapped_in, dev-stats[VIRTIO_BALLOON_S_SWAP_IN]);
--
1.6.3.3






Re: [Qemu-devel] Ideas wiki for GSoC 2010

2010-03-12 Thread Adam Litke
Hi Luiz.  Around the time when I introduced the new Asynchronous monitor
command API we had talked about converting all commands to use this new
API so that we can cut down on duplicate code paths and confusing code.
I would like to propose this as a GSoC project idea.  Do you think it
should stand as its own project or should we merge it into your Convert
Monitor commands to the QObject API project?

-- 
Thanks,
Adam





Re: [Qemu-devel] Ideas wiki for GSoC 2010

2010-03-12 Thread Adam Litke
On Fri, 2010-03-12 at 12:22 -0300, Luiz Capitulino wrote:
 On Fri, 12 Mar 2010 09:11:31 -0600
 Adam Litke a...@us.ibm.com wrote:
 
  Hi Luiz.  Around the time when I introduced the new Asynchronous monitor
  command API we had talked about converting all commands to use this new
  API so that we can cut down on duplicate code paths and confusing code.
  I would like to propose this as a GSoC project idea.  Do you think it
  should stand as its own project or should we merge it into your Convert
  Monitor commands to the QObject API project?
 
  I think it's a project by itself, but I wonder if it's too easy/short
 for GSoC. An experienced programmer can do the conversion plus testing
 in a day or two. There are probably a number of cleanups and adaptions
 that can take more, but still seems too short.

So given the relatively small scope of this additional work, maybe it
should be an additional stretch goal to be added to your project.
Once the student(s) have gone through the trouble to familiarize
themselves with the monitor code, they would be well-positioned to
complete this extra bit.  How difficult do you imagine it will be to
convert the remaining commands over to QObject?

-- 
Thanks,
Adam





[Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V8)

2010-03-09 Thread Adam Litke
On Tue, 2010-03-09 at 11:22 -0300, Luiz Capitulino wrote:
 On Tue, 09 Mar 2010 14:51:31 +0100
 Juan Quintela quint...@redhat.com wrote:
 
  Any recompilation/etc would break migration.  I have tried to understand
  what happened with monitor async commands, and my head exploded in
  indirections.
 
  The Monitor needs lots of cleanups to make things more obvious.
 
  Is there any written explanation of what are we trying to do here?
 
  Only the commit log 940cc30.
 
  Basically, an asynchronous handler has a completion function which is
 called when the handler completes.
 
  If we're in the user Monitor, it's suspended until the completion
 function is called. In QMP, the handler returns immediately and we
 _should_ be emitting an event when we have the answer.
 
  The current code doesn't do that, which seems to be a new issue.

With current git, I cannot get QMP to recognize any commands.  Unless
this is a known issue, I will look into it further to see what has
caused it.

-- 
Thanks,
Adam





[Qemu-devel] [PATCH] balloon: Do not save VM state wrt asynchronous virtio operations

2010-03-09 Thread Adam Litke
When working with the VM state (for loadvm/savevm and migration), it is not
valid to load and store pointers since the validity of those pointers cannot be
assured in the new qemu address space.  Therefore, virtio_balloon_save() and
virtio_balloon_load() must not handle the stats-related fields in struct
VirtIOBalloon.

If a memory stats request is in-flight at the time of a migration or savevm,
the request will not complete and should be resubmitted once migration or
loadvm completes.  Note that this extremely small race window can only be
triggered using QMP so it is not possible to hang the user monitor.

Signed-off-by: Adam Litke a...@us.ibm.com

diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index 086d9d1..6d12024 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -261,10 +261,6 @@ static void virtio_balloon_save(QEMUFile *f, void *opaque)
 
 qemu_put_be32(f, s-num_pages);
 qemu_put_be32(f, s-actual);
-qemu_put_buffer(f, (uint8_t *)s-stats_vq_elem, sizeof(VirtQueueElement));
-qemu_put_buffer(f, (uint8_t *)s-stats_vq_offset, sizeof(size_t));
-qemu_put_buffer(f, (uint8_t *)s-stats_callback, 
sizeof(MonitorCompletion));
-qemu_put_buffer(f, (uint8_t *)s-stats_opaque_callback_data, 
sizeof(void));
 }
 
 static int virtio_balloon_load(QEMUFile *f, void *opaque, int version_id)
@@ -278,11 +274,6 @@ static int virtio_balloon_load(QEMUFile *f, void *opaque, 
int version_id)
 
 s-num_pages = qemu_get_be32(f);
 s-actual = qemu_get_be32(f);
-qemu_get_buffer(f, (uint8_t *)s-stats_vq_elem, sizeof(VirtQueueElement));
-qemu_get_buffer(f, (uint8_t *)s-stats_vq_offset, sizeof(size_t));
-qemu_get_buffer(f, (uint8_t *)s-stats_callback, 
sizeof(MonitorCompletion));
-qemu_get_buffer(f, (uint8_t *)s-stats_opaque_callback_data, 
sizeof(void));
-
 return 0;
 }
 


-- 
Thanks,
Adam





Re: [Qemu-devel] [PATCH] Fix hanging user monitor when using balloon command

2010-02-26 Thread Adam Litke
On Fri, 2010-02-26 at 17:26 -0300, Luiz Capitulino wrote:
  This patch fixes both. One question, though:
 
  @@ -2332,6 +2331,7 @@ static int do_balloon(Monitor *mon, const QDict 
  *params,
   return -1;
   }
   
  +cb(opaque, NULL);
   return 0;
   }
 
  Can't (or shouldn't) this be called by common code upon handler
 completion?

The explicit cb() call is only needed for synchronous commands that want
to use this API.  For asynchronous commands, the top-half (ie.
do_info_balloon) returns right away and the callback is called when the
asynchronous command completes (in hw/virtio-balloon.c).  Since
do_balloon() is completely finished when it returns, it has to manually
call cb().

If we decide to merge all commands into this new API in the future, then
we could have common code call the cb() for all synchronous commands.
For now, I just implemented do_balloon() this way as a proof of concept
to show that synchronous commands can use the API. 

-- 
Thanks,
Adam





Re: [Qemu-devel] [PATCH] Fix hanging user monitor when using balloon command

2010-02-22 Thread Adam Litke
On Fri, 2010-02-19 at 15:47 -0600, Anthony Liguori wrote:
 On 02/12/2010 02:55 PM, Adam Litke wrote:
  Arghh... Adding missing S-O-B
 
  Hi Anthony.  I wonder if there was a problem when importing my async
  command handler patchset.  Since the 'balloon' command completes
  immediately, it must call the completion callback before returning.
  That call was missing but is added by the patch below.
 
  Signed-off-by: Adam Litkea...@us.ibm.com
 
 
 Applied.  Thanks.

This patch application failed.  My patch adds a cb() call in
do_balloon(), but the change in git has added the cb() call to
do_info_balloon().  That is causing qemu segfaults.  Applying the
following should correct the damage.  Thanks.

Fix for commit: 5c366a8a3d7ac71beda8499caa815cb3ea95eb58

The cb() call is needed in do_balloon(), not do_info_balloon().

Signed-off-by: Adam Litke a...@us.ibm.com

diff --git a/monitor.c b/monitor.c
index b1a6edc..c7d2117 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2309,7 +2309,6 @@ static int do_info_balloon(Monitor *mon, 
MonitorCompletion cb, void *opaque)
 return -1;
 }
 
-cb(opaque, NULL);
 return 0;
 }
 
@@ -2332,6 +2331,7 @@ static int do_balloon(Monitor *mon, const QDict *params,
 return -1;
 }
 
+cb(opaque, NULL);
 return 0;
 }
 



-- 
Thanks,
Adam





[Qemu-devel] [PATCH] Fix hanging user monitor when using balloon command

2010-02-12 Thread Adam Litke
Hi Anthony.  I wonder if there was a problem when importing my async
command handler patchset.  Since the 'balloon' command completes
immediately, it must call the completion callback before returning.
That call was missing but is added by the patch below.

diff --git a/monitor.c b/monitor.c
index ae125b8..f94794d 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2258,6 +2258,7 @@ static int do_balloon(Monitor *mon, const QDict *params,
 return -1;
 }
 
+cb(opaque, NULL);
 return 0;
 }

-- 
Thanks,
Adam





[Qemu-devel] [PATCH] Fix hanging user monitor when using balloon command

2010-02-12 Thread Adam Litke
Arghh... Adding missing S-O-B

Hi Anthony.  I wonder if there was a problem when importing my async
command handler patchset.  Since the 'balloon' command completes
immediately, it must call the completion callback before returning.
That call was missing but is added by the patch below.

Signed-off-by: Adam Litke a...@us.ibm.com

diff --git a/monitor.c b/monitor.c
index ae125b8..f94794d 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2258,6 +2258,7 @@ static int do_balloon(Monitor *mon, const QDict *params,
 return -1;
 }
 
+cb(opaque, NULL);
 return 0;
 }

-- 
Thanks,
Adam






[Qemu-devel] virtio: Add memory statistics reporting to the balloon driver (V8)

2010-01-26 Thread Adam Litke
The changes in V8 of this patch are related to the monitor infrastructure.  No
changes to the virtio interface core have been made since V4.  This is intended
to apply on top of my API for asynchronous monitor commands patch.

Changes since V7:
 - Ported to the asynchronous monitor API

Changes since V6:
 - Integrated with virtio qdev feature bit changes
   (specifically: Use VirtIODevice 'guest_features' to check if memory stats
   is a negotiated feature)
 - Track which monitor initiated the most recent stats request.  Now it does the
   Right Thing(tm) with multiple monitors making parallel requests.

Changes since V5:
 - Asynchronous query-balloon mode for QMP
 - Add timeout to prevent hanging the user monitor in synchronous mode

Changes since V4:
 - Virtio spec updated: 
http://ozlabs.org/~rusty/virtio-spec/virtio-spec-0.8.2.pdf
 - Guest-side Linux implementation applied by Rusty
 - Start using the QObject infrastructure
 - All endian conversions done in the host
 - Report stats that reference a quantity of memory in bytes

Changes since V3:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

Signed-off-by: Adam Litke a...@us.ibm.com
To: Anthony Liguori aligu...@us.ibm.com
Cc: Avi Kivity a...@redhat.com
Cc: Luiz Capitulino lcapitul...@redhat.com
Cc: qemu-devel@nongnu.org

diff --git a/balloon.h b/balloon.h
index 60b4a5d..c3a1ad3 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,13 @@
 
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target,
+MonitorCompletion cb, void *cb_data);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
-void qemu_balloon(ram_addr_t target);
+int qemu_balloon(ram_addr_t target, MonitorCompletion cb, void *opaque);
 
-ram_addr_t qemu_balloon_status(void);
+int qemu_balloon_status(MonitorCompletion cb, void *opaque);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index e17880f..086d9d1 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -16,9 +16,13 @@
 #include pc.h
 #include sysemu.h
 #include cpu.h
+#include monitor.h
 #include balloon.h
 #include virtio-balloon.h
 #include kvm.h
+#include qlist.h
+#include qint.h
+#include qstring.h
 
 #if defined(__linux__)
 #include sys/mman.h
@@ -27,9 +31,14 @@
 typedef struct VirtIOBalloon
 {
 VirtIODevice vdev;
-VirtQueue *ivq, *dvq;
+VirtQueue *ivq, *dvq, *svq;
 uint32_t num_pages;
 uint32_t actual;
+uint64_t stats[VIRTIO_BALLOON_S_NR];
+VirtQueueElement stats_vq_elem;
+size_t stats_vq_offset;
+MonitorCompletion *stats_callback;
+void *stats_opaque_callback_data;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +55,42 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+/*
+ * reset_stats - Mark all items in the stats array as unset
+ *
+ * This function needs to be called at device intialization and before
+ * before updating to a set of newly-generated stats.  This will ensure that no
+ * stale values stick around in case the guest reports a subset of the 
supported
+ * statistics.
+ */
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+int i;
+for (i = 0; i  VIRTIO_BALLOON_S_NR; dev-stats[i++] = -1);
+}
+
+static void stat_put(QDict *dict, const char *label, uint64_t val)
+{
+if (val != -1)
+qdict_put(dict, label, qint_from_int(val));
+}
+
+static QObject *get_stats_qobject(VirtIOBalloon *dev)
+{
+QDict *dict = qdict_new();
+uint32_t actual = ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+
+stat_put(dict, actual, actual);
+stat_put(dict, mem_swapped_in, dev-stats[VIRTIO_BALLOON_S_SWAP_IN]);
+stat_put(dict, mem_swapped_out, dev-stats[VIRTIO_BALLOON_S_SWAP_OUT]);
+stat_put(dict, major_page_faults, dev-stats[VIRTIO_BALLOON_S_MAJFLT]);
+stat_put(dict, minor_page_faults, dev-stats

[Qemu-devel] Re: [RFC] New API for asynchronous monitor commands

2010-01-25 Thread Adam Litke
On Mon, 2010-01-25 at 11:08 -0200, Luiz Capitulino wrote:
  @@ -85,11 +91,19 @@ typedef struct mon_cmd_t {
   union {
   void (*info)(Monitor *mon);
   void (*info_new)(Monitor *mon, QObject **ret_data);
  +int  (*info_async)(Monitor *mon, QMPCompletion *cb, void *opaque);
   void (*cmd)(Monitor *mon, const QDict *qdict);
   void (*cmd_new)(Monitor *mon, const QDict *params, QObject 
  **ret_data);
  +int  (*cmd_async)(Monitor *mon, const QDict *params,
  +  QMPCompletion *cb, void *opaque);
   } mhandler;
  +int async;
   } mon_cmd_t;
 
  Is 'async' really needed, can't use 'info_async' or 'cmd_async'?

Yes.  Otherwise the code cannot tell the difference between a monitor
command that uses cmd_new and a one using the cmd_async.  They both pass
the monitor_handler_ported() test.  Unless there is some underhanded way
of determining which union type is in use for mhandler, we are stuck
with the extra variable -- that is, unless we port all cmd_new cmds to
the new async API :)

  +static void do_async_info_handler(Monitor *mon, const mon_cmd_t *cmd);
  +static void do_async_cmd_handler(Monitor *mon, const mon_cmd_t *cmd,
  + const QDict *params);
  +
 
  Isn't it possible to avoid this forward declarations?

Sure, but I found the code more readable when I could define the
handlers near monitor_call_handler().  However, I dislike forward
declarations as much as the next guy.  I'll make it go away.

   /* file descriptors passed via SCM_RIGHTS */
   typedef struct mon_fd_t mon_fd_t;
   struct mon_fd_t {
  @@ -255,6 +269,11 @@ static inline int monitor_handler_ported(const 
  mon_cmd_t *cmd)
   return cmd-user_print != NULL;
   }
   
  +static inline bool monitor_handler_is_async(const mon_cmd_t *cmd)
  +{
  +return cmd-async != 0;
  +}
  +
   static inline int monitor_has_error(const Monitor *mon)
   {
   return mon-error != NULL;
  @@ -453,6 +472,23 @@ static void do_commit(Monitor *mon, const QDict *qdict)
   }
   }
   
  +static void user_monitor_complete(void *opaque, QObject *ret_data)
  +{
  +UserQMPCompletionData *data = (UserQMPCompletionData *)opaque; 
  +
  +if (ret_data) {
  +data-user_print(data-mon, ret_data);
  +}
  +monitor_resume(data-mon);
  +qemu_free(data);
  +}
 
  MonitorCompletionData?

Sure.  I like this name too.

  +
  +static void qmp_monitor_complete(void *opaque, QObject *ret_data)
  +{
  +Monitor *mon = (Monitor *)opaque;
  +monitor_protocol_emitter(mon, ret_data);
  +}
 
  You should free ret_data as well with:
 
 qobject_decref(ret_data);

Hmm.  The way I saw this working was like so:

do_async_cmd_handler()
 cmd-mhandler.cmd_async()
 dispatch_async_cmd()
...
command_completion_event()
 QObject *ret_data = qobject_from_jsonf('foo': 'bar');
 QMPCompletion(opaque, ret_data);
 qobject_decref(ret_data);

In other words, the qobject ret_data is created by the caller of the
QMPCompletion callback.  Therefore, it seemed natural to let that
routine clean up the qobject rather than letting the callback consume
it.  I realize that this patch makes it impossible to infer the above
explanation since an example async command implementation was not
provided.  Since you designed the qobject interfaces, you have the best
idea on how it should work.  Does the above make sense?

  Also, you can pass 'opaque' directly to monitor_protocol_emitter().

Ok.

 
  +
   static void do_info(Monitor *mon, const QDict *qdict, QObject **ret_data)
   {
   const mon_cmd_t *cmd;
  @@ -476,7 +512,15 @@ static void do_info(Monitor *mon, const QDict *qdict, 
  QObject **ret_data)
   goto help;
   }
   
  -if (monitor_handler_ported(cmd)) {
  +if (monitor_handler_is_async(cmd)) {
  +do_async_info_handler(mon, cmd);
  +/*
  + * Indicate that this command is asynchronous and will not return 
  any
  + * date (not even empty).  Instead, the data will be returned via a
  + * completion callback.
 
  s/date/data

Bah, my #1 typo.

 
  + */
  +*ret_data = qobject_from_jsonf({ 'async': 'return' });
 
  This is obviously only used internally, right? Sure it's _impossible_
 to conflict with handlers' returned keys?
 
  Anyway, I think it's a good idea to have a standard for internal
 keys. Maybe something like: __mon_.

Good idea.  I'll make this change.

  +} else if (monitor_handler_ported(cmd)) {
   cmd-mhandler.info_new(mon, ret_data);
   
   if (!monitor_ctrl_mode(mon)) {
  @@ -3612,6 +3656,11 @@ static void monitor_print_error(Monitor *mon)
   mon-error = NULL;
   }
   
  +static int is_async_return(const QObject *data)
  +{
  +return data  qdict_haskey(qobject_to_qdict(data), async);
  +}
  +
   static void monitor_call_handler(Monitor *mon, const mon_cmd_t *cmd,
const QDict *params)
   {

[Qemu-devel] [PATCH] New API for asynchronous monitor commands (V2)

2010-01-25 Thread Adam Litke
Changes since V1:
 - Miscellaneous code cleanups (Thanks Luiz)

Qemu has a number of commands that can operate asynchronously (savevm, migrate,
etc) and it will be getting more.  For these commands, the user monitor needs
to be suspended, but QMP monitors could continue to to accept other commands.
This patch introduces a new command API that isolates the details of handling
different monitor types from the actual command execution.

A monitor command can use this API by implementing the mhandler.cmd_async
handler (or info_async if appropriate).  This function is responsible for
submitting the command and does not return any data although it may raise
errors.  When the command completes, the QMPCompletion callback should be
invoked with its opaque data and the command result.

The process for submitting and completing an asynchronous command is different
for QMP and user monitors.  A user monitor must be suspended at submit time and
resumed at completion time.  The user_print() function must be passed to the
QMPCompletion callback so the result can be displayed properly.  QMP monitors
are simpler.  No submit time setup is required.  When the command completes,
monitor_protocol_emitter() writes the result in JSON format.

This API can also be used to implement synchronous commands.  In this case, the
cmd_async handler should immediately call the QMPCompletion callback.  It is my
hope that this new interface will work for all commands, leading to a
drastically simplified monitor.c once all commands are ported.

Thanks to Anthony for helping me out with the initial design.

Signed-off-by: Adam Litke a...@us.ibm.com
To: Anthony Liguori anth...@codemonkey.ws
cc: Luiz Capitulino lcapitul...@redhat.com
Cc: Avi Kivity a...@redhat.com
Cc: qemu-devel@nongnu.org

diff --git a/monitor.c b/monitor.c
index cadf422..58cd02c 100644
--- a/monitor.c
+++ b/monitor.c
@@ -76,6 +76,12 @@
  *
  */
 
+typedef struct MonitorCompletionData MonitorCompletionData;
+struct MonitorCompletionData {
+Monitor *mon;
+void (*user_print)(Monitor *mon, const QObject *data);
+};
+
 typedef struct mon_cmd_t {
 const char *name;
 const char *args_type;
@@ -85,9 +91,13 @@ typedef struct mon_cmd_t {
 union {
 void (*info)(Monitor *mon);
 void (*info_new)(Monitor *mon, QObject **ret_data);
+int  (*info_async)(Monitor *mon, MonitorCompletion *cb, void *opaque);
 void (*cmd)(Monitor *mon, const QDict *qdict);
 void (*cmd_new)(Monitor *mon, const QDict *params, QObject **ret_data);
+int  (*cmd_async)(Monitor *mon, const QDict *params,
+  MonitorCompletion *cb, void *opaque);
 } mhandler;
+int async;
 } mon_cmd_t;
 
 /* file descriptors passed via SCM_RIGHTS */
@@ -255,6 +265,11 @@ static inline int monitor_handler_ported(const mon_cmd_t 
*cmd)
 return cmd-user_print != NULL;
 }
 
+static inline bool monitor_handler_is_async(const mon_cmd_t *cmd)
+{
+return cmd-async != 0;
+}
+
 static inline int monitor_has_error(const Monitor *mon)
 {
 return mon-error != NULL;
@@ -453,6 +468,65 @@ static void do_commit(Monitor *mon, const QDict *qdict)
 }
 }
 
+static void user_monitor_complete(void *opaque, QObject *ret_data)
+{
+MonitorCompletionData *data = (MonitorCompletionData *)opaque; 
+
+if (ret_data) {
+data-user_print(data-mon, ret_data);
+}
+monitor_resume(data-mon);
+qemu_free(data);
+}
+
+static void qmp_monitor_complete(void *opaque, QObject *ret_data)
+{
+monitor_protocol_emitter(opaque, ret_data);
+}
+
+static void qmp_async_cmd_handler(Monitor *mon, const mon_cmd_t *cmd,
+  const QDict *params)
+{
+cmd-mhandler.cmd_async(mon, params, qmp_monitor_complete, mon);
+}
+
+static void qmp_async_info_handler(Monitor *mon, const mon_cmd_t *cmd)
+{
+cmd-mhandler.info_async(mon, qmp_monitor_complete, mon);
+}
+
+static void user_async_cmd_handler(Monitor *mon, const mon_cmd_t *cmd,
+   const QDict *params)
+{
+int ret;
+
+MonitorCompletionData *cb_data = qemu_malloc(sizeof(*cb_data));
+cb_data-mon = mon;
+cb_data-user_print = cmd-user_print;
+monitor_suspend(mon);
+ret = cmd-mhandler.cmd_async(mon, params,
+  user_monitor_complete, cb_data);
+if (ret  0) {
+monitor_resume(mon);
+qemu_free(cb_data);
+}
+}
+
+static void user_async_info_handler(Monitor *mon, const mon_cmd_t *cmd)
+{
+int ret;
+
+MonitorCompletionData *cb_data = qemu_malloc(sizeof(*cb_data));
+cb_data-mon = mon;
+cb_data-user_print = cmd-user_print;
+monitor_suspend(mon);
+ret = cmd-mhandler.info_async(mon, user_monitor_complete, cb_data);
+if (ret  0) {
+monitor_resume(mon);
+qemu_free(cb_data);
+}
+}
+
 static void do_info(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
 const mon_cmd_t *cmd;
@@ -476,7 +550,19 @@ static void do_info

[Qemu-devel] [RFC] New API for asynchronous monitor commands

2010-01-22 Thread Adam Litke
Qemu has a number of commands that can operate asynchronously (savevm, migrate,
etc) and it will be getting more.  For these commands, the user monitor needs
to be suspended, but QMP monitors could continue to to accept other commands.
This patch introduces a new command API that isolates the details of handling
different monitor types from the actual command execution.

A monitor command can use this API by implementing the mhandler.cmd_async
handler (or info_async if appropriate).  This function is responsible for
submitting the command and does not return any data although it may raise
errors.  When the command completes, the QMPCompletion callback should be
invoked with its opaque data and the command result.

The process for submitting and completing an asynchronous command is different
for QMP and user monitors.  A user monitor must be suspended at submit time and
resumed at completion time.  The user_print() function must be passed to the
QMPCompletion callback so the result can be displayed properly.  QMP monitors
are simpler.  No submit time setup is required.  When the command completes,
monitor_protocol_emitter() writes the result in JSON format.

This API can also be used to implement synchronous commands.  In this case, the
cmd_async handler should immediately call the QMPCompletion callback.  It is my
hope that this new interface will work for all commands, leading to a
drastically simplified monitor.c once all commands are ported.

Thanks to Anthony for helping me out with the initial design.

Signed-off-by: Adam Litke a...@us.ibm.com
To: Anthony Liguori anth...@codemonkey.ws
cc: Luiz Capitulino lcapitul...@redhat.com
Cc: qemu-devel@nongnu.org

diff --git a/monitor.c b/monitor.c
index cadf422..c0d0fa9 100644
--- a/monitor.c
+++ b/monitor.c
@@ -76,6 +76,12 @@
  *
  */
 
+typedef struct UserQMPCompletionData UserQMPCompletionData;
+struct UserQMPCompletionData {
+Monitor *mon;
+void (*user_print)(Monitor *mon, const QObject *data);
+};
+
 typedef struct mon_cmd_t {
 const char *name;
 const char *args_type;
@@ -85,11 +91,19 @@ typedef struct mon_cmd_t {
 union {
 void (*info)(Monitor *mon);
 void (*info_new)(Monitor *mon, QObject **ret_data);
+int  (*info_async)(Monitor *mon, QMPCompletion *cb, void *opaque);
 void (*cmd)(Monitor *mon, const QDict *qdict);
 void (*cmd_new)(Monitor *mon, const QDict *params, QObject **ret_data);
+int  (*cmd_async)(Monitor *mon, const QDict *params,
+  QMPCompletion *cb, void *opaque);
 } mhandler;
+int async;
 } mon_cmd_t;
 
+static void do_async_info_handler(Monitor *mon, const mon_cmd_t *cmd);
+static void do_async_cmd_handler(Monitor *mon, const mon_cmd_t *cmd,
+ const QDict *params);
+
 /* file descriptors passed via SCM_RIGHTS */
 typedef struct mon_fd_t mon_fd_t;
 struct mon_fd_t {
@@ -255,6 +269,11 @@ static inline int monitor_handler_ported(const mon_cmd_t 
*cmd)
 return cmd-user_print != NULL;
 }
 
+static inline bool monitor_handler_is_async(const mon_cmd_t *cmd)
+{
+return cmd-async != 0;
+}
+
 static inline int monitor_has_error(const Monitor *mon)
 {
 return mon-error != NULL;
@@ -453,6 +472,23 @@ static void do_commit(Monitor *mon, const QDict *qdict)
 }
 }
 
+static void user_monitor_complete(void *opaque, QObject *ret_data)
+{
+UserQMPCompletionData *data = (UserQMPCompletionData *)opaque; 
+
+if (ret_data) {
+data-user_print(data-mon, ret_data);
+}
+monitor_resume(data-mon);
+qemu_free(data);
+}
+
+static void qmp_monitor_complete(void *opaque, QObject *ret_data)
+{
+Monitor *mon = (Monitor *)opaque;
+monitor_protocol_emitter(mon, ret_data);
+}
+
 static void do_info(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
 const mon_cmd_t *cmd;
@@ -476,7 +512,15 @@ static void do_info(Monitor *mon, const QDict *qdict, 
QObject **ret_data)
 goto help;
 }
 
-if (monitor_handler_ported(cmd)) {
+if (monitor_handler_is_async(cmd)) {
+do_async_info_handler(mon, cmd);
+/*
+ * Indicate that this command is asynchronous and will not return any
+ * date (not even empty).  Instead, the data will be returned via a
+ * completion callback.
+ */
+*ret_data = qobject_from_jsonf({ 'async': 'return' });
+} else if (monitor_handler_ported(cmd)) {
 cmd-mhandler.info_new(mon, ret_data);
 
 if (!monitor_ctrl_mode(mon)) {
@@ -3612,6 +3656,11 @@ static void monitor_print_error(Monitor *mon)
 mon-error = NULL;
 }
 
+static int is_async_return(const QObject *data)
+{
+return data  qdict_haskey(qobject_to_qdict(data), async);
+}
+
 static void monitor_call_handler(Monitor *mon, const mon_cmd_t *cmd,
  const QDict *params)
 {
@@ -3619,7 +3668,15 @@ static void monitor_call_handler(Monitor *mon, const 
mon_cmd_t *cmd,
 
 cmd-mhandler.cmd_new(mon

[Qemu-devel] Re: [PATCH] QMP: Fix asynchronous events delivery

2010-01-20 Thread Adam Litke
On Wed, 2010-01-20 at 10:37 -0200, Luiz Capitulino wrote:
 Commit f039a563f200beee80cc10fd70b21ea396979dab introduces
 a regression as monitor_protocol_event() will return in
 the first user Monitor it finds in the QLIST_FOREACH()
 loop.
 
 The right thing to do is to only delivery an asynchronous
 event if the 'mon' is a QMP Monitor.
 
 The aforementioned commit was an early version, if it was
 applied to stable (it should) this one has to be applied
 there too.
 
 Signed-off-by: Luiz Capitulino lcapitul...@redhat.com

Acked-by: Adam Litke a...@us.ibm.com


-- 
Thanks,
Adam





[Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V7)

2010-01-18 Thread Adam Litke
On Mon, 2010-01-18 at 12:12 -0200, Luiz Capitulino wrote:
 On Fri, 15 Jan 2010 13:54:29 -0600
 Adam Litke a...@us.ibm.com wrote:
 
  This version improves support for multiple monitors and has been ported up 
  to
  HEAD as of 01/14.
 
  Overall review on the Monitor related changes seems ok, but I'm not sure
 how I should enable it.

You must run a Linux guest with the virtio balloon enabled (-balloon
virtio) and the guest kernel must support the stats API.  Rusty Russell
has accepted my patch into his tree and will be pushing it upstream.  In
the meantime, you could build a 2.6.33-rc3 kernel with the following
patch applied...

commit b9bb81c2f7ce82d636d47089d6aa279d9be704f2
Author: li...@us.ibm.com agli...@kernel.beaverton.ibm.com
Date:   Fri Jan 8 14:17:45 2010 -0800

balloon memory stats patch

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 9dd5880..f95be86 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -28,7 +28,7 @@
 struct virtio_balloon
 {
struct virtio_device *vdev;
-   struct virtqueue *inflate_vq, *deflate_vq;
+   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
 
/* Where the ballooning thread waits for config to change. */
wait_queue_head_t config_change;
@@ -49,6 +49,10 @@ struct virtio_balloon
/* The array of pfns we tell the Host about. */
unsigned int num_pfns;
u32 pfns[256];
+
+   /* Memory statistics */
+   int need_stats_update;
+   struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
 };
 
 static struct virtio_device_id id_table[] = {
@@ -154,6 +158,72 @@ static void leak_balloon(struct virtio_balloon *vb, size_t 
num)
}
 }
 
+static inline void update_stat(struct virtio_balloon *vb, int idx,
+  u16 tag, u64 val)
+{
+   BUG_ON(idx = VIRTIO_BALLOON_S_NR);
+   vb-stats[idx].tag = tag;
+   vb-stats[idx].val = val;
+}
+
+#define pages_to_bytes(x) ((u64)(x)  PAGE_SHIFT)
+
+static void update_balloon_stats(struct virtio_balloon *vb)
+{
+   unsigned long events[NR_VM_EVENT_ITEMS];
+   struct sysinfo i;
+   int idx = 0;
+
+   all_vm_events(events);
+   si_meminfo(i);
+
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN,
+   pages_to_bytes(events[PSWPIN]));
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT,
+   pages_to_bytes(events[PSWPOUT]));
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE,
+   pages_to_bytes(i.freeram));
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT,
+   pages_to_bytes(i.totalram));
+}
+
+/*
+ * While most virtqueues communicate guest-initiated requests to the 
hypervisor,
+ * the stats queue operates in reverse.  The driver initializes the virtqueue
+ * with a single buffer.  From that point forward, all conversations consist of
+ * a hypervisor request (a call to this function) which directs us to refill
+ * the virtqueue with a fresh stats buffer.  Since stats collection can sleep,
+ * we notify our kthread which does the actual work via stats_handle_request().
+ */
+static void stats_request(struct virtqueue *vq)
+{
+   struct virtio_balloon *vb;
+   unsigned int len;
+
+   vb = vq-vq_ops-get_buf(vq, len);
+   if (!vb)
+   return;
+   vb-need_stats_update = 1;
+   wake_up(vb-config_change);
+}
+
+static void stats_handle_request(struct virtio_balloon *vb)
+{
+   struct virtqueue *vq;
+   struct scatterlist sg;
+
+   vb-need_stats_update = 0;
+   update_balloon_stats(vb);
+
+   vq = vb-stats_vq;
+   sg_init_one(sg, vb-stats, sizeof(vb-stats));
+   if (vq-vq_ops-add_buf(vq, sg, 1, 0, vb)  0)
+   BUG();
+   vq-vq_ops-kick(vq);
+}
+
 static void virtballoon_changed(struct virtio_device *vdev)
 {
struct virtio_balloon *vb = vdev-priv;
@@ -190,8 +260,11 @@ static int balloon(void *_vballoon)
try_to_freeze();
wait_event_interruptible(vb-config_change,
 (diff = towards_target(vb)) != 0
+|| vb-need_stats_update
 || kthread_should_stop()
 || freezing(current));
+   if (vb-need_stats_update)
+   stats_handle_request(vb);
if (diff  0)
fill_balloon(vb, diff);
else if (diff  0)
@@ -204,10 +277,10 @@ static int balloon(void *_vballoon)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
struct virtio_balloon *vb;
-   struct virtqueue *vqs[2];
-   vq_callback_t *callbacks[] = { balloon_ack

[Qemu-devel] Re: [PATCH] QMP: Save default control monitor for emitting async events

2010-01-15 Thread Adam Litke
On Fri, 2010-01-15 at 11:38 -0200, Luiz Capitulino wrote:
 On Thu, 14 Jan 2010 15:20:10 -0600
 Adam Litke a...@us.ibm.com wrote:
 
  When using a control/QMP monitor in tandem with a regular monitor, 
  asynchronous
  messages can get lost depending on the order of the QEMU program arguments.
  QEMU events issued by monitor_protocol_event() always go to cur_mon.  If the
  user monitor was specified on the command line first (or it has ,default), 
  the
  message will be directed to the user monitor (not the QMP monitor).
 
  I think we have two problems here:
 
 1. Async messages are only delivered for the default Monitor, so if the QMP
 Monitor is not the default one it won't get them (not a bug if well
 documented, but it's annoying)
 
 2. On a multiple QMP Monitor setup, only one of them will get async messages
 
  This patch fixes 1. but the best fix would be to solve both problems,
 as QMP Monitors have to be equally capable IMO.
 
  There's an array with all Monitors IIRC, maybe we could loop through it
 and delivery the message to each QMP ones of them?

Sure.  This was the other way I was considering.  I'll spin up a patch
and we can see how it looks.


-- 
Thanks,
Adam





[Qemu-devel] [PATCH] QMP: Emit asynchronous events on all QMP monitors

2010-01-15 Thread Adam Litke
When using a control/QMP monitor in tandem with a regular monitor, asynchronous
messages can get lost depending on the order of the QEMU program arguments.
QEMU events issued by monitor_protocol_event() always go to cur_mon.  If the
user monitor was specified on the command line first (or it has ,default), the
message will be directed to the user monitor (not the QMP monitor).
Additionally, only one QMP session is currently able to receive async messages.

To avoid this confusion, scan through the list of monitors and emit the message
on each QMP monitor.

Signed-off-by: Adam Litke a...@us.ibm.com

diff --git a/monitor.c b/monitor.c
index 134ed15..06c8bf0 100644
--- a/monitor.c
+++ b/monitor.c
@@ -334,13 +334,10 @@ void monitor_protocol_event(MonitorEvent event, QObject 
*data)
 {
 QDict *qmp;
 const char *event_name;
-Monitor *mon = cur_mon;
+Monitor *mon;
 
 assert(event  QEVENT_MAX);
 
-if (!monitor_ctrl_mode(mon))
-return;
-
 switch (event) {
 case QEVENT_DEBUG:
 event_name = DEBUG;
@@ -373,7 +370,12 @@ void monitor_protocol_event(MonitorEvent event, QObject 
*data)
 qdict_put_obj(qmp, data, data);
 }
 
-monitor_json_emitter(mon, QOBJECT(qmp));
+QLIST_FOREACH(mon, mon_list, entry) {
+if (!monitor_ctrl_mode(mon))
+return;
+
+monitor_json_emitter(mon, QOBJECT(qmp));
+}
 QDECREF(qmp);
 }
 


-- 
Thanks,
Adam





[Qemu-devel] Re: [PATCH][RESPIN] QMP: Emit asynchronous events on all QMP monitors

2010-01-15 Thread Adam Litke
On Fri, 2010-01-15 at 13:00 -0200, Luiz Capitulino wrote:
  The function will return on the first !QMP Monitor, the
 right QLIST_FOREACH() body is:
 
 if (monitor_ctrl_mode(mon)) {
  monitor_json_emitter(mon, QOBJECT(qmp));
 }
 
  I'll ACK the respin.

Ah right, of course.  Thanks and here it is.

When using a control/QMP monitor in tandem with a regular monitor, asynchronous
messages can get lost depending on the order of the QEMU program arguments.
QEMU events issued by monitor_protocol_event() always go to cur_mon.  If the
user monitor was specified on the command line first (or it has ,default), the
message will be directed to the user monitor (not the QMP monitor).
Additionally, only one QMP session is currently able to receive async messages.

To avoid this confusion, scan through the list of monitors and emit the message
on each QMP monitor.

Signed-off-by: Adam Litke a...@us.ibm.com
Acked-by: Luiz Capitulino lcapitul...@redhat.com

diff --git a/monitor.c b/monitor.c
index 134ed15..6a2c7fb 100644
--- a/monitor.c
+++ b/monitor.c
@@ -334,13 +334,10 @@ void monitor_protocol_event(MonitorEvent event, QObject 
*data)
 {
 QDict *qmp;
 const char *event_name;
-Monitor *mon = cur_mon;
+Monitor *mon;
 
 assert(event  QEVENT_MAX);
 
-if (!monitor_ctrl_mode(mon))
-return;
-
 switch (event) {
 case QEVENT_DEBUG:
 event_name = DEBUG;
@@ -373,7 +370,11 @@ void monitor_protocol_event(MonitorEvent event, QObject 
*data)
 qdict_put_obj(qmp, data, data);
 }
 
-monitor_json_emitter(mon, QOBJECT(qmp));
+QLIST_FOREACH(mon, mon_list, entry) {
+if (monitor_ctrl_mode(mon)) {
+monitor_json_emitter(mon, QOBJECT(qmp));
+}
+}
 QDECREF(qmp);
 }
 


-- 
Thanks,
Adam





[Qemu-devel] virtio: Add memory statistics reporting to the balloon driver (V7)

2010-01-15 Thread Adam Litke
This version improves support for multiple monitors and has been ported up to
HEAD as of 01/14.

Changes since V6:
 - Integrated with virtio qdev feature bit changes
   (specifically: Use VirtIODevice 'guest_features' to check if memory stats
   is a negotiated feature)
 - Track which monitor initiated the most recent stats request.  Now it does the
   Right Thing(tm) with multiple monitors making parallel requests.

Changes since V5:
 - Asynchronous query-balloon mode for QMP
 - Add timeout to prevent hanging the user monitor in synchronous mode

Changes since V4:
 - Virtio spec updated: 
http://ozlabs.org/~rusty/virtio-spec/virtio-spec-0.8.2.pdf
 - Guest-side Linux implementation applied by Rusty
 - Start using the QObject infrastructure
 - All endian conversions done in the host
 - Report stats that reference a quantity of memory in bytes

Changes since V3:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

Signed-off-by: Adam Litke a...@us.ibm.com
To: Anthony Liguori aligu...@us.ibm.com
Cc: Avi Kivity a...@redhat.com
Cc: Luiz Capitulino lcapitul...@redhat.com
Cc: qemu-devel@nongnu.org

diff --git a/balloon.h b/balloon.h
index 60b4a5d..684ea9e 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,15 @@
 
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+/* Timeout for synchronous stats requests (in seconds) */
+#define QEMU_BALLOON_SYNC_TIMEOUT 5
+
+typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target, Monitor *mon);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
-void qemu_balloon(ram_addr_t target);
+int qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+int qemu_balloon_status(Monitor *mon);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index e17880f..b68bad7 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -19,6 +19,10 @@
 #include balloon.h
 #include virtio-balloon.h
 #include kvm.h
+#include monitor.h
+#include qlist.h
+#include qint.h
+#include qstring.h
 
 #if defined(__linux__)
 #include sys/mman.h
@@ -27,9 +31,16 @@
 typedef struct VirtIOBalloon
 {
 VirtIODevice vdev;
-VirtQueue *ivq, *dvq;
+VirtQueue *ivq, *dvq, *svq;
 uint32_t num_pages;
 uint32_t actual;
+uint64_t stats[VIRTIO_BALLOON_S_NR];
+VirtQueueElement stats_vq_elem;
+size_t stats_vq_offset;
+QEMUTimer *stats_timer;
+uint64_t stats_updated;
+Monitor *stats_mon;
+bool stats_requested;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +57,50 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+/*
+ * reset_stats - Mark all items in the stats array as unset
+ *
+ * This function needs to be called at device intialization and before
+ * before updating to a set of newly-generated stats.  This will ensure that no
+ * stale values stick around in case the guest reports a subset of the 
supported
+ * statistics.
+ */
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+int i;
+for (i = 0; i  VIRTIO_BALLOON_S_NR; dev-stats[i++] = -1);
+dev-stats_updated = qemu_get_clock(host_clock);
+}
+
+static void stat_put(QDict *dict, const char *label, uint64_t val)
+{
+if (val != -1)
+qdict_put(dict, label, qint_from_int(val));
+}
+
+static QObject *get_stats_qobject(VirtIOBalloon *dev)
+{
+QDict *dict = qdict_new();
+uint32_t actual = ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+uint64_t age;
+
+stat_put(dict, actual, actual);
+stat_put(dict, mem_swapped_in, dev-stats[VIRTIO_BALLOON_S_SWAP_IN]);
+stat_put(dict, mem_swapped_out, dev-stats[VIRTIO_BALLOON_S_SWAP_OUT]);
+stat_put(dict, major_page_faults, dev-stats[VIRTIO_BALLOON_S_MAJFLT]);
+stat_put(dict, minor_page_faults, dev-stats[VIRTIO_BALLOON_S_MINFLT]);
+stat_put(dict, free_mem, dev-stats[VIRTIO_BALLOON_S_MEMFREE]);
+stat_put(dict, total_mem, dev-stats[VIRTIO_BALLOON_S_MEMTOT

[Qemu-devel] [PATCH] QMP: Save default control monitor for emitting async events

2010-01-14 Thread Adam Litke
When using a control/QMP monitor in tandem with a regular monitor, asynchronous
messages can get lost depending on the order of the QEMU program arguments.
QEMU events issued by monitor_protocol_event() always go to cur_mon.  If the
user monitor was specified on the command line first (or it has ,default), the
message will be directed to the user monitor (not the QMP monitor).

One solution is to save the default QMP session in another monitor pointer (ala
cur_mon) and always direct asynchronous events to that monitor...

Signed-off-by: Adam Litke a...@us.ibm.com

diff --git a/monitor.c b/monitor.c
index 134ed15..794f6ba 100644
--- a/monitor.c
+++ b/monitor.c
@@ -128,6 +128,7 @@ static const mon_cmd_t mon_cmds[];
 static const mon_cmd_t info_cmds[];
 
 Monitor *cur_mon = NULL;
+Monitor *cur_mon_control = NULL;
 
 static void monitor_command_cb(Monitor *mon, const char *cmdline,
void *opaque);
@@ -334,11 +335,11 @@ void monitor_protocol_event(MonitorEvent event, QObject 
*data)
 {
 QDict *qmp;
 const char *event_name;
-Monitor *mon = cur_mon;
+Monitor *mon = cur_mon_control;
 
 assert(event  QEVENT_MAX);
 
-if (!monitor_ctrl_mode(mon))
+if (!mon)
 return;
 
 switch (event) {
@@ -4283,6 +4284,9 @@ void monitor_init(CharDriverState *chr, int flags)
 QLIST_INSERT_HEAD(mon_list, mon, entry);
 if (!cur_mon || (flags  MONITOR_IS_DEFAULT))
 cur_mon = mon;
+if (!cur_mon_control || (flags  MONITOR_IS_DEFAULT))
+if (flags  MONITOR_USE_CONTROL)
+cur_mon_control = mon;
 }
 
 static void bdrv_password_cb(Monitor *mon, const char *password, void *opaque)
diff --git a/monitor.h b/monitor.h
index 556239a..a343589 100644
--- a/monitor.h
+++ b/monitor.h
@@ -7,6 +7,7 @@
 #include block.h
 
 extern Monitor *cur_mon;
+extern Monitor *cur_mon_control;
 
 /* flags for monitor_init */
 #define MONITOR_IS_DEFAULT0x01
diff --git a/qemu-tool.c b/qemu-tool.c
index 18b48af..cfe03d6 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -34,6 +34,7 @@ void qemu_service_io(void)
 }
 
 Monitor *cur_mon;
+Monitor *cur_mon_control;
 
 void monitor_printf(Monitor *mon, const char *fmt, ...)
 {


-- 
Thanks,
Adam





[Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V6)

2010-01-13 Thread Adam Litke
On Wed, 2010-01-13 at 16:04 -0200, Luiz Capitulino wrote:
  I've tried to apply this patch to play with it, but turns out it conflicts
 with recent changes in hw/virtio-balloon.

Ahh, I will continue my never-ending quest to stay current :)

  Some comments on the QMP side of the patch follows.

Thanks for your review!

snip

  +/*
  + * complete_stats_request - Clean up and report statistics.
  + */
  +static void complete_stats_request(VirtIOBalloon *vb)
  +{
  +QObject *stats = get_stats_qobject(vb);
  +
  +if (vb-stats_request_mode == QEMU_BALLOON_MODE_SYNC) {
  +qemu_del_timer(vb-stats_timer);
  +monitor_print_balloon(cur_mon, stats);
  +monitor_resume(cur_mon);
  +} else if (vb-stats_request_mode == QEMU_BALLOON_MODE_ASYNC) {
  +monitor_protocol_event(QEVENT_BALLOON, stats);
  +}
  +
  +vb-stats_request_mode = QEMU_BALLOON_MODE_NONE;
  +}
 
  In the previous thread Anthony raised some issues about the 'cur_mon'
 usage that made me concerned, because some important code rely on
 it (read async events).
 
  As far as I could check, 'cur_mon' is guaranteed to be the default
 Monitor which is fine if you're aware of it when putting QEMU to run,
 but I'm afraid that testing your patch with two Monitors (user and qmp)
 is not going to work.
 
  Maybe not a big deal, but would be good to be aware of potential
 issues.

I talked to Anthony and will try to fix this up.  I just need to start
passing the monitor pointer around as well.

snip
  * 
  +if (monitor_ctrl_mode(mon))
  +mode = QEMU_BALLOON_MODE_ASYNC;
  +else
  +
  +mode = monitor_ctrl_mode(mon) ?
  +QEMU_BALLOON_MODE_ASYNC : 
  QEMU_BALLOON_MODE_SYNC;
 
  I think what you want is:
 
 } else {
  mode = QEMU_BALLOON_MODE_SYNC;
 }

Bah... Left over gunk. 

snip

  -void qemu_balloon(ram_addr_t target)
  +int qemu_balloon(ram_addr_t target)
   {
  -if (qemu_balloon_event)
  -qemu_balloon_event(qemu_balloon_event_opaque, target);
  +if (qemu_balloon_event) {
  +qemu_balloon_event(qemu_balloon_event_opaque, target,
  +   QEMU_BALLOON_MODE_SYNC);
  +return 1;
  +} else {
  +return 0;
  +}
   }
 
  This is used by do_balloon() right? Which is also used by QMP,
 shouldn't it also handle async vs. sync?

qemu_balloon always acts synchronously.  It does not wait on the guest
to do anything and it does not return data.

-- 
Thanks,
Adam





[Qemu-devel] virtio: Add memory statistics reporting to the balloon driver (V6)

2010-01-11 Thread Adam Litke
After some good discussion, V6 of this patch integrates well with the new QMP
support.  When the monitor is in QMP mode, the query-balloon command triggers a
stats refresh request to the guest.  This request is asynchronous.  If the
guest does not respond then nothing further happens.  When stats are updated, a
BALLOON monitor event is raised and the data element will contain the memory
statistics.

For the user monitor, a timer has been added to prevent monitor hangs with
unresponsive guests.  When the timer fires, the most recently collected stats
are returned along with an additional entry 'age' which indicates the number of
host_clock milliseconds that have passed since the stats were collected.

This method for dealing with asynchronous commands may prove useful for other
existing or future commands.  In that case, we may want to consider
incorporating this into the actual monitor API.

Changes since V5:
 - Asynchronous query-balloon mode for QMP
 - Add timeout to prevent hanging the user monitor in synchronous mode

Changes since V4:
 - Virtio spec updated: 
http://ozlabs.org/~rusty/virtio-spec/virtio-spec-0.8.2.pdf
 - Guest-side Linux implementation applied by Rusty
 - Start using the QObject infrastructure
 - All endian conversions done in the host
 - Report stats that reference a quantity of memory in bytes

Changes since V3:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

Signed-off-by: Adam Litke a...@us.ibm.com
To: Anthony Liguori aligu...@us.ibm.com
Cc: Avi Kivity a...@redhat.com
Cc: Luiz Capitulino lcapitul...@redhat.com
Cc: Jamie Lokier ja...@shareable.org
Cc: qemu-devel@nongnu.org

diff --git a/balloon.h b/balloon.h
index 60b4a5d..7e29028 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,22 @@
 
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+/* Timeout for synchronous stats requests (in seconds) */
+#define QEMU_BALLOON_SYNC_TIMEOUT 5
+
+typedef enum {
+QEMU_BALLOON_MODE_NONE = 0,  /* No stats request is active */
+QEMU_BALLOON_MODE_SYNC = 1,  /* Synchronous stats request */
+QEMU_BALLOON_MODE_ASYNC = 2, /* Asynchronous stats request */
+} balloon_mode_t;
+
+typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target,
+balloon_mode_t mode);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
-void qemu_balloon(ram_addr_t target);
+int qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+int qemu_balloon_status(balloon_mode_t mode);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index cfd3b41..bf67f4d 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -19,6 +19,10 @@
 #include balloon.h
 #include virtio-balloon.h
 #include kvm.h
+#include monitor.h
+#include qlist.h
+#include qint.h
+#include qstring.h
 
 #if defined(__linux__)
 #include sys/mman.h
@@ -27,9 +31,15 @@
 typedef struct VirtIOBalloon
 {
 VirtIODevice vdev;
-VirtQueue *ivq, *dvq;
+VirtQueue *ivq, *dvq, *svq;
 uint32_t num_pages;
 uint32_t actual;
+uint64_t stats[VIRTIO_BALLOON_S_NR];
+VirtQueueElement stats_vq_elem;
+size_t stats_vq_offset;
+balloon_mode_t stats_request_mode;
+QEMUTimer *stats_timer;
+uint64_t stats_updated;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +56,50 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+/*
+ * reset_stats - Mark all items in the stats array as unset
+ *
+ * This function needs to be called at device intialization and before
+ * before updating to a set of newly-generated stats.  This will ensure that no
+ * stale values stick around in case the guest reports a subset of the 
supported
+ * statistics.
+ */
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+int i;
+for (i = 0; i  VIRTIO_BALLOON_S_NR; dev-stats[i++] = -1);
+dev-stats_updated = qemu_get_clock(host_clock);
+}
+
+static void stat_put

[Qemu-devel] Re: [RFD] virtio: Add memory statistics reporting to the balloon driver

2010-01-07 Thread Adam Litke
Thanks for the ideas and feedback.  I will play around with #4 and see
if I can hack up a prototype.

On Thu, 2010-01-07 at 09:22 -0600, Anthony Liguori wrote:
 On 01/07/2010 09:18 AM, Avi Kivity wrote:
  On 01/07/2010 05:12 PM, Anthony Liguori wrote:
 
  3) Make qemu request balloon stats regularly (maybe every 10 seconds) 
  and display the latest stats with info balloon.  This avoids the 
  problem in #2 but it means that qemu determines the poll rate instead 
  of a management tool.
 
  4) Make info-balloon a proper asynchronous command.  We need new 
  infrastructure to allow a qmp handler to take a callback that can be 
  used to delay the completion of the command.  This addresses all of 
  the above problems but it introduces a new one.  Command completion 
  now depends on the guest.  This potentially could trip up a naive 
  management tool that doesn't realize that the info-balloon command 
  may never complete.
 
  I'm on the fence between 3 and 4 myself.
 
 
  Can I tip you over to #4?  #3 means we have no idea when the stats 
  were generated.  With #4, we can't be sure, but it usually be close to 
  when the command returns.
 
  The command should include a timeout so a broken guest won't hang a 
  management tool thread.
 
 Generally, timeouts are evil but if we did something like, wait 10 
 seconds and if we don't hear a response from the guest, return the last 
 data set, I think I would be okay with that.  It means we may be 
 reporting stale data, but at the same time, the data is coming from a 
 guest so it can't be considered authoritative anyway.
 
 Regards,
 
 Anthony Liguori


-- 
Thanks,
Adam





Re: [Qemu-devel] Re: [RFD] virtio: Add memory statistics reporting to the balloon driver

2010-01-07 Thread Adam Litke
On Thu, 2010-01-07 at 15:49 +, Daniel P. Berrange wrote:
 On Thu, Jan 07, 2010 at 09:12:10AM -0600, Anthony Liguori wrote:
  On 01/05/2010 11:08 AM, Adam Litke wrote:
  This patch has been discussed (and ACKed) in the past, but has not yet
  been committed due to the congestion surrounding the 0.12 release and
  other conflicting changes.  I would like to rekindle the discussion so
  that I can make the necessary changes to get this merged.
  
  This patch is ported to 0.12.1 and has one issue that I am aware of:
  
  It still calls monitor_suspend() and monitor_resume() which I understand
  is no longer allowed.  Does anyone have any ideas on how to enable async
  monitor commands?
  
  +static void request_stats(VirtIOBalloon *vb)
  +{
  +vb-stats_requested = 1;
  +reset_stats(vb);
  +monitor_suspend(cur_mon);
 
  
  This bit is not going to work reliably anymore.  Really, it never worked 
  reliably but this is exacerbated with -qmp since a QMP session will 
  never be the cur_mon.
  
  We have a couple of options:
  
  1) Introduce a query-balloon command that returns immediately, and 
  triggers a request for the guest to update the balloon stats.  When the 
  guest does update the balloon stats, trigger an asynchronous message.  
  Asynchronous messages are ignored by the human monitor so they would 
  never be displayed there which is unfortunate.
  
  2) Make info balloon show the last valid stats and have it request new 
  stats.  Stats will always be delayed but it avoids async messages.
  
  3) Make qemu request balloon stats regularly (maybe every 10 seconds) 
  and display the latest stats with info balloon.  This avoids the problem 
  in #2 but it means that qemu determines the poll rate instead of a 
  management tool.
 
 I don't like the idea of the anything which has hardcoded polling because
 it just wastes CPU cycles for the 99% of the time when no one is going to
 care about these stats.
 
  4) Make info-balloon a proper asynchronous command.  We need new 
  infrastructure to allow a qmp handler to take a callback that can be 
  used to delay the completion of the command.  This addresses all of the 
  above problems but it introduces a new one.  Command completion now 
  depends on the guest.  This potentially could trip up a naive management 
  tool that doesn't realize that the info-balloon command may never complete.
 
 I think 'info-balloon' should be synchronous and without side-effects.
 ie return the current stats that QEMU has. We could then add a separate
 'refresh-balloon' command + async event notification when that completes.
 The tool that is requiring the stats could thus refresh as often as it
 likes.

This would work well for the QMP case, but what about for a traditional
monitor?  We could include a sequence number or timestamp in the memory
stats results so the user could tell that they were updated.  This
doesn't seem very user friendly though.

-- 
Thanks,
Adam





Re: [Qemu-devel] Planning for 0.13

2010-01-06 Thread Adam Litke
On Tue, 2010-01-05 at 06:43 -0600, Anthony Liguori wrote:
 Hi,
 
 I hope everyone had a happy new year!  Now that we've finished the 0.12 
 release and most of us have had a nice break, I think it's time to start 
 planning for the next release.
 
 0.12 felt a bit rushed to me.  I'd like to take a bit more time with 
 0.13 and try to complete features a bit more than we did in 0.12.  So I 
 propose that we target 0.13 as a 6-month cycle and target a June 1st 
 release.  Based on the -rc process for 0.12, I would like to have a two 
 week -rc cycle for 0.13 but this time, absolutely no non-bug fixes past 
 freeze.  We got sloppy in 0.12 with this and we had some ugly 
 regressions in the final release.
 
 Here are the features that I'm aware of for 0.13.  Please add anything 
 you're working on and I'll try to centralize this somewhere.

Port the -mem-path and -mem-prealloc options from qemu-kvm.
virtio-balloon enhancements for -mem-path enabled VMs.

-- 
Thanks,
Adam





[Qemu-devel] [RFD] virtio: Add memory statistics reporting to the balloon driver

2010-01-05 Thread Adam Litke
This patch has been discussed (and ACKed) in the past, but has not yet
been committed due to the congestion surrounding the 0.12 release and
other conflicting changes.  I would like to rekindle the discussion so
that I can make the necessary changes to get this merged.

This patch is ported to 0.12.1 and has one issue that I am aware of:

It still calls monitor_suspend() and monitor_resume() which I understand
is no longer allowed.  Does anyone have any ideas on how to enable async
monitor commands?

diff --git a/balloon.h b/balloon.h
index 60b4a5d..dc851d3 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,12 @@
 
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef QObject *(QEMUBalloonEvent)(void *opaque, ram_addr_t target);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
-void qemu_balloon(ram_addr_t target);
+int qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+QObject *qemu_balloon_status(void);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index cfd3b41..03b9e2e 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -19,6 +19,10 @@
 #include balloon.h
 #include virtio-balloon.h
 #include kvm.h
+#include monitor.h
+#include qlist.h
+#include qint.h
+#include qstring.h
 
 #if defined(__linux__)
 #include sys/mman.h
@@ -27,9 +31,13 @@
 typedef struct VirtIOBalloon
 {
 VirtIODevice vdev;
-VirtQueue *ivq, *dvq;
+VirtQueue *ivq, *dvq, *svq;
 uint32_t num_pages;
 uint32_t actual;
+uint64_t stats[VIRTIO_BALLOON_S_NR];
+VirtQueueElement stats_vq_elem;
+size_t stats_vq_offset;
+uint8_t stats_requested;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +54,33 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+int i;
+for (i = 0; i  VIRTIO_BALLOON_S_NR; dev-stats[i++] = -1);
+}
+
+static void stat_put(QDict *dict, const char *label, uint64_t val)
+{
+if (val != -1)
+qdict_put(dict, label, qint_from_int(val));
+}
+
+static QObject *get_stats_qobject(VirtIOBalloon *dev)
+{
+QDict *dict = qdict_new();
+uint32_t actual = ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+
+stat_put(dict, actual, actual);
+stat_put(dict, mem_swapped_in, dev-stats[VIRTIO_BALLOON_S_SWAP_IN]);
+stat_put(dict, mem_swapped_out, dev-stats[VIRTIO_BALLOON_S_SWAP_OUT]);
+stat_put(dict, major_page_faults, dev-stats[VIRTIO_BALLOON_S_MAJFLT]);
+stat_put(dict, minor_page_faults, dev-stats[VIRTIO_BALLOON_S_MINFLT]);
+stat_put(dict, free_mem, dev-stats[VIRTIO_BALLOON_S_MEMFREE]);
+stat_put(dict, total_mem, dev-stats[VIRTIO_BALLOON_S_MEMTOT]);
+return QOBJECT(dict);
+}
+
 /* FIXME: once we do a virtio refactoring, this will get subsumed into common
  * code */
 static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
@@ -104,6 +139,36 @@ static void virtio_balloon_handle_output(VirtIODevice 
*vdev, VirtQueue *vq)
 }
 }
 
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
+VirtQueueElement *elem = s-stats_vq_elem;
+VirtIOBalloonStat stat;
+size_t offset = 0;
+
+if (!virtqueue_pop(vq, elem))
+return;
+
+while (memcpy_from_iovector(stat, offset, sizeof(stat), elem-out_sg,
+elem-out_num) == sizeof(stat)) {
+uint16_t tag = tswap16(stat.tag);
+uint64_t val = tswap64(stat.val);
+
+offset += sizeof(stat);
+if (tag  VIRTIO_BALLOON_S_NR)
+s-stats[tag] = val;
+}
+s-stats_vq_offset = offset;
+
+if (s-stats_requested) {
+QObject *stats = get_stats_qobject(s);
+monitor_print_balloon(cur_mon, stats);
+qobject_decref(stats);
+monitor_resume(cur_mon);
+s-stats_requested = 0;
+}
+}
+
 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 {
 VirtIOBalloon *dev = to_virtio_balloon(vdev);
@@ -126,12 +191,22 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
 
 static uint32_t virtio_balloon_get_features(VirtIODevice *vdev)
 {
-return 0;
+return 1  VIRTIO_BALLOON_F_STATS_VQ;
+}
+
+static void request_stats(VirtIOBalloon *vb)
+{
+vb-stats_requested = 1;
+reset_stats(vb);
+monitor_suspend(cur_mon);
+virtqueue_push(vb-svq, vb-stats_vq_elem, vb-stats_vq_offset);
+virtio_notify(vb-vdev, vb-svq);
 }
 
-static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
+static QObject *virtio_balloon_to_target(void *opaque, ram_addr_t target)
 {
 VirtIOBalloon *dev = opaque;
+QObject *ret = NULL;
 
 if (target  ram_size)
 target = ram_size;
@@ -139,9 +214,15 @@ static ram_addr_t virtio_balloon_to_target(void *opaque, 
ram_addr_t target)
 if (target) {
 dev-num_pages 

[Qemu-devel] [PATCH][For stable-0.12] virtio: Add memory statistics reporting to the balloon driver (V5)

2009-12-09 Thread Adam Litke
Reverted the vmstate changes since that has not made it into stable-0.12 yet.

This iteration addresses all of the comments from the last round.  Thanks to
everyone for their careful reviews and helpful comments.  The most significant
change in this version is my use of the QObject API, so a concentrated review
in that area would be most appreciated.  I am hoping to target 0.12.0 with this
patch.  Please let me know if that remains a possibility.  Thanks.

Changes since V4:
 - Virtio spec updated: 
http://ozlabs.org/~rusty/virtio-spec/virtio-spec-0.8.2.pdf
 - Guest-side Linux implementation applied by Rusty
 - Start using the QObject infrastructure
 - All endian conversions done in the host
 - Report stats that reference a quantity of memory in bytes

Changes since V3:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch implements the qemu side of the communication channel.  I will post
the kernel driver modifications in-reply to this message.

Signed-off-by: Adam Litke a...@us.ibm.com
Cc: Anthony Liguori aligu...@us.ibm.com
Cc: Avi Kivity a...@redhat.com
Cc: Rusty Russell ru...@rustcorp.com.au
Cc: qemu-devel@nongnu.org


diff --git a/balloon.h b/balloon.h
index 60b4a5d..23bbffe 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,12 @@
 
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef QObject *(QEMUBalloonEvent)(void *opaque, ram_addr_t target);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
 void qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+QObject *qemu_balloon_status(void);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index cfd3b41..f3bc138 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -19,6 +19,10 @@
 #include balloon.h
 #include virtio-balloon.h
 #include kvm.h
+#include monitor.h
+#include qlist.h
+#include qint.h
+#include qstring.h
 
 #if defined(__linux__)
 #include sys/mman.h
@@ -27,9 +31,13 @@
 typedef struct VirtIOBalloon
 {
 VirtIODevice vdev;
-VirtQueue *ivq, *dvq;
+VirtQueue *ivq, *dvq, *svq;
 uint32_t num_pages;
 uint32_t actual;
+uint64_t stats[VIRTIO_BALLOON_S_NR];
+VirtQueueElement stats_vq_elem;
+size_t stats_vq_offset;
+uint8_t stats_requested;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +54,35 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+int i;
+for (i = 0; i  VIRTIO_BALLOON_S_NR; dev-stats[i++] = -1);
+}
+
+static void stat_put(QList *list, const char *label, uint64_t val)
+{
+if (val != -1) {
+qlist_append(list, qstring_from_str(label));
+qlist_append(list, qint_from_int(val));
+}
+}
+
+static QObject *get_stats_qobject(VirtIOBalloon *dev)
+{
+QList *list = qlist_new();
+uint32_t actual = ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+
+stat_put(list, actual, (int)actual  20);
+stat_put(list, mem_swapped_in, dev-stats[VIRTIO_BALLOON_S_SWAP_IN]);
+stat_put(list, mem_swapped_out, dev-stats[VIRTIO_BALLOON_S_SWAP_OUT]);
+stat_put(list, major_page_faults, dev-stats[VIRTIO_BALLOON_S_MAJFLT]);
+stat_put(list, minor_page_faults, dev-stats[VIRTIO_BALLOON_S_MINFLT]);
+stat_put(list, free_mem, dev-stats[VIRTIO_BALLOON_S_MEMFREE]);
+stat_put(list, total_mem, dev-stats[VIRTIO_BALLOON_S_MEMTOT]);
+return QOBJECT(list);
+}
+
 /* FIXME: once we do a virtio refactoring, this will get subsumed into common
  * code */
 static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
@@ -104,6 +141,36 @@ static void virtio_balloon_handle_output(VirtIODevice 
*vdev, VirtQueue *vq)
 }
 }
 
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
+VirtQueueElement *elem = s-stats_vq_elem;
+VirtIOBalloonStat

[Qemu-devel] [FOR 0.12] [PATCH] Updated: virtio: Add memory statistics reporting to the balloon driver (V5)

2009-12-03 Thread Adam Litke
Updated to fix a compile problem with my vmstate conversion...

This iteration addresses all of the comments from the last round.  Thanks to
everyone for their careful reviews and helpful comments.  The most significant
change in this version is my use of the QObject API, so a concentrated review
in that area would be most appreciated.  I am hoping to target 0.12.0 with this
patch.  Please let me know if that remains a possibility.  Thanks.

Changes since V4:
 - Virtio spec updated: 
http://ozlabs.org/~rusty/virtio-spec/virtio-spec-0.8.2.pdf
 - Guest-side Linux implementation applied by Rusty
 - Start using the QObject infrastructure
 - All endian conversions done in the host
 - Report stats that reference a quantity of memory in bytes

Changes since V3:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch implements the qemu side of the communication channel.  I will post
the kernel driver modifications in-reply to this message.

Signed-off-by: Adam Litke a...@us.ibm.com
Cc: Anthony Liguori aligu...@us.ibm.com
Cc: Avi Kivity a...@redhat.com
Cc: Rusty Russell ru...@rustcorp.com.au
Cc: qemu-devel@nongnu.org

diff --git a/balloon.h b/balloon.h
index 60b4a5d..23bbffe 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,12 @@
 
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef QObject *(QEMUBalloonEvent)(void *opaque, ram_addr_t target);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
 void qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+QObject *qemu_balloon_status(void);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index f461c32..47da6ab 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -19,6 +19,10 @@
 #include balloon.h
 #include virtio-balloon.h
 #include kvm.h
+#include monitor.h
+#include qlist.h
+#include qint.h
+#include qstring.h
 
 #if defined(__linux__)
 #include sys/mman.h
@@ -27,9 +31,13 @@
 typedef struct VirtIOBalloon
 {
 VirtIODevice vdev;
-VirtQueue *ivq, *dvq;
+VirtQueue *ivq, *dvq, *svq;
 uint32_t num_pages;
 uint32_t actual;
+uint64_t stats[VIRTIO_BALLOON_S_NR];
+VirtQueueElement stats_vq_elem;
+size_t stats_vq_offset;
+uint8_t stats_requested;
 } VirtIOBalloon;
 
 static void balloon_page(void *addr, int deflate)
@@ -41,6 +49,35 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+int i;
+for (i = 0; i  VIRTIO_BALLOON_S_NR; dev-stats[i++] = -1);
+}
+
+static void stat_put(QList *list, const char *label, uint64_t val)
+{
+if (val != -1) {
+qlist_append(list, qstring_from_str(label));
+qlist_append(list, qint_from_int(val));
+}
+}
+
+static QObject *get_stats_qobject(VirtIOBalloon *dev)
+{
+QList *list = qlist_new();
+uint32_t actual = ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+
+stat_put(list, actual, (int)actual  20);
+stat_put(list, mem_swapped_in, dev-stats[VIRTIO_BALLOON_S_SWAP_IN]);
+stat_put(list, mem_swapped_out, dev-stats[VIRTIO_BALLOON_S_SWAP_OUT]);
+stat_put(list, major_page_faults, dev-stats[VIRTIO_BALLOON_S_MAJFLT]);
+stat_put(list, minor_page_faults, dev-stats[VIRTIO_BALLOON_S_MINFLT]);
+stat_put(list, free_mem, dev-stats[VIRTIO_BALLOON_S_MEMFREE]);
+stat_put(list, total_mem, dev-stats[VIRTIO_BALLOON_S_MEMTOT]);
+return QOBJECT(list);
+}
+
 /* FIXME: once we do a virtio refactoring, this will get subsumed into common
  * code */
 static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
@@ -99,6 +136,36 @@ static void virtio_balloon_handle_output(VirtIODevice 
*vdev, VirtQueue *vq)
 }
 }
 
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtIOBalloon *s = DO_UPCAST(VirtIOBalloon, vdev, vdev);
+VirtQueueElement *elem = s-stats_vq_elem;
+VirtIOBalloonStat stat;
+size_t offset = 0

[Qemu-devel] virtio: Add memory statistics reporting to the balloon driver (V5)

2009-12-01 Thread Adam Litke
This iteration addresses all of the comments from the last round.  Thanks to
everyone for their careful reviews and helpful comments.  The most significant
change in this version is my use of the QObject API, so a concentrated review
in that area would be most appreciated.  I am hoping to target 0.12.0 with this
patch.  Please let me know if that remains a possibility.  Thanks.

Changes since V4:
 - Virtio spec updated: 
http://ozlabs.org/~rusty/virtio-spec/virtio-spec-0.8.2.pdf
 - Guest-side Linux implementation applied by Rusty
 - Start using the QObject infrastructure
 - All endian conversions done in the host
 - Report stats that reference a quantity of memory in bytes

Changes since V3:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch implements the qemu side of the communication channel.  I will post
the kernel driver modifications in-reply to this message.

Signed-off-by: Adam Litke a...@us.ibm.com
Cc: Anthony Liguori aligu...@us.ibm.com
Cc: Avi Kivity a...@redhat.com
Cc: Rusty Russell ru...@rustcorp.com.au
Cc: qemu-devel@nongnu.org

diff --git a/balloon.h b/balloon.h
index 60b4a5d..23bbffe 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,12 @@
 
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef QObject *(QEMUBalloonEvent)(void *opaque, ram_addr_t target);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
 void qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+QObject *qemu_balloon_status(void);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index cfd3b41..3a67c39 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -19,6 +19,10 @@
 #include balloon.h
 #include virtio-balloon.h
 #include kvm.h
+#include monitor.h
+#include qlist.h
+#include qint.h
+#include qstring.h
 
 #if defined(__linux__)
 #include sys/mman.h
@@ -27,9 +31,13 @@
 typedef struct VirtIOBalloon
 {
 VirtIODevice vdev;
-VirtQueue *ivq, *dvq;
+VirtQueue *ivq, *dvq, *svq;
 uint32_t num_pages;
 uint32_t actual;
+uint64_t stats[VIRTIO_BALLOON_S_NR];
+VirtQueueElement stats_vq_elem;
+size_t stats_vq_offset;
+uint8_t stats_requested;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +54,35 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+int i;
+for (i = 0; i  VIRTIO_BALLOON_S_NR; dev-stats[i++] = -1);
+}
+
+static void stat_put(QList *list, const char *label, uint64_t val)
+{
+if (val != -1) {
+qlist_append(list, qstring_from_str(label));
+qlist_append(list, qint_from_int(val));
+}
+}
+
+static QObject *get_stats_qobject(VirtIOBalloon *dev)
+{
+QList *list = qlist_new();
+uint32_t actual = ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+
+stat_put(list, actual, (int)actual  20);
+stat_put(list, mem_swapped_in, dev-stats[VIRTIO_BALLOON_S_SWAP_IN]);
+stat_put(list, mem_swapped_out, dev-stats[VIRTIO_BALLOON_S_SWAP_OUT]);
+stat_put(list, major_page_faults, dev-stats[VIRTIO_BALLOON_S_MAJFLT]);
+stat_put(list, minor_page_faults, dev-stats[VIRTIO_BALLOON_S_MINFLT]);
+stat_put(list, free_mem, dev-stats[VIRTIO_BALLOON_S_MEMFREE]);
+stat_put(list, total_mem, dev-stats[VIRTIO_BALLOON_S_MEMTOT]);
+return QOBJECT(list);
+}
+
 /* FIXME: once we do a virtio refactoring, this will get subsumed into common
  * code */
 static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
@@ -104,6 +141,36 @@ static void virtio_balloon_handle_output(VirtIODevice 
*vdev, VirtQueue *vq)
 }
 }
 
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtIOBalloon *s = to_virtio_balloon(vdev);
+VirtQueueElement *elem = s-stats_vq_elem;
+VirtIOBalloonStat stat;
+size_t offset = 0;
+
+if (!virtqueue_pop(vq, elem))
+return

[Qemu-devel] virtio: Report new guest memory statistics pertinent to memory ballooning (V4)

2009-11-19 Thread Adam Litke
Avi and Anthony,
If you agree that I've addressed all outstanding issues, please consider this
patch for inclusion.  Thanks.

Changes since V3:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch implements the qemu side of the communication channel.  I will post
the kernel driver modifications in-reply to this message.

Signed-off-by: Adam Litke a...@us.ibm.com
Cc: Anthony Liguori aligu...@us.ibm.com
Cc: Avi Kivity a...@redhat.com
Cc: qemu-devel@nongnu.org

diff --git a/balloon.h b/balloon.h
index 60b4a5d..def4c56 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,12 @@
 
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef int (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
 void qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+int qemu_balloon_status(void);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index cfd3b41..b90e1f2 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -19,6 +19,7 @@
 #include balloon.h
 #include virtio-balloon.h
 #include kvm.h
+#include monitor.h
 
 #if defined(__linux__)
 #include sys/mman.h
@@ -27,9 +28,13 @@
 typedef struct VirtIOBalloon
 {
 VirtIODevice vdev;
-VirtQueue *ivq, *dvq;
+VirtQueue *ivq, *dvq, *svq;
 uint32_t num_pages;
 uint32_t actual;
+uint64_t stats[VIRTIO_BALLOON_S_NR];
+VirtQueueElement stats_vq_elem;
+size_t stats_vq_offset;
+uint8_t stats_requested;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +51,33 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+int i;
+for (i = 0; i  VIRTIO_BALLOON_S_NR; dev-stats[i++] = -1);
+}
+
+static inline void print_stat(uint64_t val, const char *label)
+{
+if (val != -1) {
+monitor_printf(cur_mon, ,%s=%lld, label, (unsigned long long)val);
+}
+}
+
+static void virtio_balloon_print_stats(VirtIOBalloon *dev)
+{
+uint32_t actual = ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+
+monitor_printf(cur_mon, balloon: actual=%d, (int)(actual  20));
+print_stat(dev-stats[VIRTIO_BALLOON_S_SWAP_IN], mem_swapped_in);
+print_stat(dev-stats[VIRTIO_BALLOON_S_SWAP_OUT], mem_swapped_out);
+print_stat(dev-stats[VIRTIO_BALLOON_S_MAJFLT], major_page_faults);
+print_stat(dev-stats[VIRTIO_BALLOON_S_MINFLT], minor_page_faults);
+print_stat(dev-stats[VIRTIO_BALLOON_S_MEMFREE], free_mem);
+print_stat(dev-stats[VIRTIO_BALLOON_S_MEMTOT], total_mem);
+monitor_printf(cur_mon, \n);
+}
+
 /* FIXME: once we do a virtio refactoring, this will get subsumed into common
  * code */
 static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
@@ -104,6 +136,31 @@ static void virtio_balloon_handle_output(VirtIODevice 
*vdev, VirtQueue *vq)
 }
 }
 
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtIOBalloon *s = to_virtio_balloon(vdev);
+VirtQueueElement *elem = s-stats_vq_elem;
+VirtIOBalloonStat stat;
+size_t offset = 0;
+
+if (!virtqueue_pop(vq, elem))
+return;
+
+while (memcpy_from_iovector(stat, offset, sizeof(stat), elem-out_sg,
+elem-out_num) == sizeof(stat)) {
+offset += sizeof(stat);
+if (stat.tag  VIRTIO_BALLOON_S_NR)
+s-stats[stat.tag] = stat.val;
+}
+s-stats_vq_offset = offset;
+
+if (s-stats_requested) {
+virtio_balloon_print_stats(s);
+monitor_resume(cur_mon);
+s-stats_requested = 0;
+}
+}
+
 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 {
 VirtIOBalloon *dev = to_virtio_balloon(vdev);
@@ -126,10 +183,19 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
 
 static uint32_t virtio_balloon_get_features

[Qemu-devel] virtio: Add memory statistics reporting to the balloon driver (V3)

2009-11-19 Thread Adam Litke
Rusty and Anthony,
If I've addressed all outstanding issues, please consider this patch for
inclusion.  Thanks.

Changes since V2:
 - Increase stat field size to 64 bits
 - Report all sizes in kb (not pages)
 - Drop anon_pages stat and fix endianness conversion

Changes since V1:
 - Use a virtqueue instead of the device config space

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch enables the guest-side support by adding stats collection and
reporting to the virtio balloon driver.

Signed-off-by: Adam Litke a...@us.ibm.com
Cc: Rusty Russell ru...@rustcorp.com.au
Cc: Anthony Liguori anth...@codemonkey.ws
Cc: virtualizat...@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 200c22f..ebc9d39 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -29,7 +29,7 @@
 struct virtio_balloon
 {
struct virtio_device *vdev;
-   struct virtqueue *inflate_vq, *deflate_vq;
+   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
 
/* Where the ballooning thread waits for config to change. */
wait_queue_head_t config_change;
@@ -50,6 +50,9 @@ struct virtio_balloon
/* The array of pfns we tell the Host about. */
unsigned int num_pfns;
u32 pfns[256];
+
+   /* Memory statistics */
+   struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
 };
 
 static struct virtio_device_id id_table[] = {
@@ -155,6 +158,57 @@ static void leak_balloon(struct virtio_balloon *vb, size_t 
num)
}
 }
 
+static inline void update_stat(struct virtio_balloon *vb, int idx,
+   __le16 tag, __le64 val)
+{
+   BUG_ON(idx = VIRTIO_BALLOON_S_NR);
+   vb-stats[idx].tag = cpu_to_le16(tag);
+   vb-stats[idx].val = cpu_to_le64(val);
+}
+
+#define K(x) ((x)  (PAGE_SHIFT - 10))
+static void update_balloon_stats(struct virtio_balloon *vb)
+{
+   unsigned long events[NR_VM_EVENT_ITEMS];
+   struct sysinfo i;
+   int idx = 0;
+
+   all_vm_events(events);
+   si_meminfo(i);
+
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, K(events[PSWPIN]));
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, K(events[PSWPOUT]));
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
+}
+
+/*
+ * While most virtqueues communicate guest-initiated requests to the 
hypervisor,
+ * the stats queue operates in reverse.  The driver initializes the virtqueue
+ * with a single buffer.  From that point forward, all conversations consist of
+ * a hypervisor request (a call to this function) which directs us to refill
+ * the virtqueue with a fresh stats buffer.
+ */
+static void stats_ack(struct virtqueue *vq)
+{
+   struct virtio_balloon *vb;
+   unsigned int len;
+   struct scatterlist sg;
+
+   vb = vq-vq_ops-get_buf(vq, len);
+   if (!vb)
+   return;
+
+   update_balloon_stats(vb);
+
+   sg_init_one(sg, vb-stats, sizeof(vb-stats));
+   if (vq-vq_ops-add_buf(vq, sg, 1, 0, vb)  0)
+   BUG();
+   vq-vq_ops-kick(vq);
+}
+
 static void virtballoon_changed(struct virtio_device *vdev)
 {
struct virtio_balloon *vb = vdev-priv;
@@ -205,10 +259,10 @@ static int balloon(void *_vballoon)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
struct virtio_balloon *vb;
-   struct virtqueue *vqs[2];
-   vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
-   const char *names[] = { inflate, deflate };
-   int err;
+   struct virtqueue *vqs[3];
+   vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
+   const char *names[] = { inflate, deflate, stats };
+   int err, nvqs;
 
vdev-priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
if (!vb) {
@@ -221,13 +275,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
init_waitqueue_head(vb-config_change);
vb-vdev = vdev;
 
-   /* We expect two virtqueues. */
-   err

[Qemu-devel] Re: virtio: Report new guest memory statistics pertinent to memory ballooning (V4)

2009-11-19 Thread Adam Litke
On Thu, 2009-11-19 at 17:19 +0200, Avi Kivity wrote:
 On 11/19/2009 05:06 PM, Adam Litke wrote:
  Avi and Anthony,
  If you agree that I've addressed all outstanding issues, please consider 
  this
  patch for inclusion.  Thanks.
 
 
 
 I'd like to see this (and all other virtio-ABI-modifying patches) first 
 go into the virtio pci spec, then propagated to guest and host.

Where can I find information on the procedure for updating the virtio
pci spec?

  Changes since V3:
- Increase stat field size to 64 bits
- Report all sizes in kb (not pages)
 
 
 Why not bytes?  It's the most natural unit.

The precision for most of these stats (except major and minor faults)
is 4kb (at least for Linux guests on the platforms I can think of).  I
chose kb units to avoid wasting bits.  I suppose the 64bit fields are
Bigger than we could ever possibly need (tm).  Others have suggested
bytes as well so I will be happy to make the change.

  -static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
  +static void request_stats(VirtIOBalloon *vb)
  +{
  +vb-stats_requested = 1;
  +reset_stats(vb);
  +monitor_suspend(cur_mon);
 
 
 You allow the guest to kill a monitor here.

Is there a better way to handle asynchronous communication with the
guest?

  +virtqueue_push(vb-svq,vb-stats_vq_elem, vb-stats_vq_offset);
  +virtio_notify(vb-vdev, vb-svq);
  +}
  +
 
 
 
 
  +typedef struct VirtIOBalloonStat {
  +uint16_t tag;
  +uint64_t val;
  +} VirtIOBalloonStat;
 
 
 Alignment here depends on word size.  This needs to be padded to be 
 aligned the same way on 32 and 64 bit hosts and guests.

ok.  I assume that means the structure size must be a multiple of 64
bits in size?

-- 
Thanks,
Adam





[Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)

2009-11-19 Thread Adam Litke
On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
 On 11/19/2009 05:19 PM, Adam Litke wrote:
  Rusty and Anthony,
  If I've addressed all outstanding issues, please consider this patch for
  inclusion.  Thanks.
 
  +struct virtio_balloon_stat
  +{
  +   __le16 tag;
  +   __le64 val;
  +};
  +
 
 
 You're not doing endian conversion in the host?

No.  I was following by example.  For the virtio_balloon, the existing
code is careful so that the guest always writes data in little endian.

-- 
Thanks,
Adam





[Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver (V3)

2009-11-19 Thread Adam Litke
On Thu, 2009-11-19 at 18:13 +0200, Avi Kivity wrote:
 On 11/19/2009 05:58 PM, Adam Litke wrote:
  On Thu, 2009-11-19 at 17:22 +0200, Avi Kivity wrote:
 
  On 11/19/2009 05:19 PM, Adam Litke wrote:
   
  Rusty and Anthony,
  If I've addressed all outstanding issues, please consider this patch for
  inclusion.  Thanks.
 
  +struct virtio_balloon_stat
  +{
  + __le16 tag;
  + __le64 val;
  +};
  +
 
 
  You're not doing endian conversion in the host?
   
  No.  I was following by example.  For the virtio_balloon, the existing
  code is careful so that the guest always writes data in little endian.
 
 
 I don't follow.  If the guest is careful to write little-endian, surely 
 the host must be equally careful to read little-endian?

That is true and, by my reading of the existing qemu virtio-balloon
device code, isn't virtio_balloon_set_config() on a big endian host
already broken?


-- 
Thanks,
Adam





[Qemu-devel] virtio: Report new guest memory statistics pertinent to memory ballooning (V3)

2009-11-17 Thread Adam Litke
virtio: Report new guest memory statistics pertinent to memory ballooning (V3)

Changes since V2:
 - Use a virtqueue for communication instead of the device config space

Changes since V1:
 - In the monitor, print all stats on one line with less abbreviated names
 - Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch implements the qemu side of the communication channel.  I will post
the kernel driver modifications in-reply to this message.

Signed-off-by: Adam Litke a...@us.ibm.com
Cc: Anthony Liguori aligu...@us.ibm.com
Cc: Avi Kivity a...@redhat.com
Cc: qemu-devel@nongnu.org

diff --git a/balloon.h b/balloon.h
index 60b4a5d..def4c56 100644
--- a/balloon.h
+++ b/balloon.h
@@ -16,12 +16,12 @@
 
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef int (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
 void qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+int qemu_balloon_status(void);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index cfd3b41..81dd1f3 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -19,6 +19,7 @@
 #include balloon.h
 #include virtio-balloon.h
 #include kvm.h
+#include monitor.h
 
 #if defined(__linux__)
 #include sys/mman.h
@@ -27,9 +28,13 @@
 typedef struct VirtIOBalloon
 {
 VirtIODevice vdev;
-VirtQueue *ivq, *dvq;
+VirtQueue *ivq, *dvq, *svq;
 uint32_t num_pages;
 uint32_t actual;
+uint32_t stats[VIRTIO_BALLOON_S_NR];
+VirtQueueElement stats_vq_elem;
+size_t stats_vq_offset;
+uint8_t stats_requested;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -46,6 +51,34 @@ static void balloon_page(void *addr, int deflate)
 #endif
 }
 
+static inline void reset_stats(VirtIOBalloon *dev)
+{
+int i;
+for (i = 0; i  VIRTIO_BALLOON_S_NR; dev-stats[i++] = -1);
+}
+
+static inline void print_stat(uint32_t val, const char *label)
+{
+if (val != -1) {
+monitor_printf(cur_mon, ,%s=%u, label, val);
+}
+}
+
+static void virtio_balloon_print_stats(VirtIOBalloon *dev)
+{
+uint32_t actual = ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+
+monitor_printf(cur_mon, balloon: actual=%d, (int)(actual  20));
+print_stat(dev-stats[VIRTIO_BALLOON_S_SWAP_IN], pages_swapped_in);
+print_stat(dev-stats[VIRTIO_BALLOON_S_SWAP_OUT], pages_swapped_out);
+print_stat(dev-stats[VIRTIO_BALLOON_S_ANON], anon_pages);
+print_stat(dev-stats[VIRTIO_BALLOON_S_MAJFLT], major_page_faults);
+print_stat(dev-stats[VIRTIO_BALLOON_S_MINFLT], minor_page_faults);
+print_stat(dev-stats[VIRTIO_BALLOON_S_MEMFREE], free_memory);
+print_stat(dev-stats[VIRTIO_BALLOON_S_MEMTOT], total_memory);
+monitor_printf(cur_mon, \n);
+}
+
 /* FIXME: once we do a virtio refactoring, this will get subsumed into common
  * code */
 static size_t memcpy_from_iovector(void *data, size_t offset, size_t size,
@@ -104,6 +137,31 @@ static void virtio_balloon_handle_output(VirtIODevice 
*vdev, VirtQueue *vq)
 }
 }
 
+static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)
+{
+VirtIOBalloon *s = to_virtio_balloon(vdev);
+VirtQueueElement *elem = s-stats_vq_elem;
+VirtIOBalloonStat stat;
+size_t offset = 0;
+
+if (!virtqueue_pop(vq, elem))
+return;
+
+while (memcpy_from_iovector(stat, offset, sizeof(stat), elem-out_sg,
+elem-out_num) == sizeof(stat)) {
+offset += sizeof(stat);
+if (stat.tag  VIRTIO_BALLOON_S_NR)
+s-stats[stat.tag] = stat.val;
+}
+s-stats_vq_offset = offset;
+
+if (s-stats_requested) {
+virtio_balloon_print_stats(s);
+monitor_resume(cur_mon);
+s-stats_requested = 0;
+}
+}
+
 static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data)
 {
 VirtIOBalloon *dev = to_virtio_balloon(vdev);
@@ -126,10 +184,19 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
 
 static uint32_t virtio_balloon_get_features(VirtIODevice *vdev)
 {
-return 0;
+return 1  VIRTIO_BALLOON_F_STATS_VQ;
 }
 
-static ram_addr_t

[Qemu-devel] virtio: Add memory statistics reporting to the balloon driver (V2)

2009-11-17 Thread Adam Litke
virtio: Add memory statistics reporting to the balloon driver (V2)

Changes since V1:
 - Use a virtqueue instead of the device config space

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them directly to the hypervisor.

This patch enables the guest-side support by adding stats collection and
reporting to the virtio balloon driver.

Signed-off-by: Adam Litke a...@us.ibm.com
Cc: Rusty Russell ru...@rustcorp.com.au
Cc: Anthony Liguori anth...@codemonkey.ws
Cc: virtualizat...@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 200c22f..59b9533 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -29,7 +29,7 @@
 struct virtio_balloon
 {
struct virtio_device *vdev;
-   struct virtqueue *inflate_vq, *deflate_vq;
+   struct virtqueue *inflate_vq, *deflate_vq, *stats_vq;
 
/* Where the ballooning thread waits for config to change. */
wait_queue_head_t config_change;
@@ -50,6 +50,9 @@ struct virtio_balloon
/* The array of pfns we tell the Host about. */
unsigned int num_pfns;
u32 pfns[256];
+
+   /* Memory statistics */
+   struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR];
 };
 
 static struct virtio_device_id id_table[] = {
@@ -155,6 +158,60 @@ static void leak_balloon(struct virtio_balloon *vb, size_t 
num)
}
 }
 
+static inline void update_stat(struct virtio_balloon *vb, int idx,
+   unsigned int tag, unsigned long val)
+{
+   BUG_ON(idx = VIRTIO_BALLOON_S_NR);
+   vb-stats[idx].tag = tag;
+   vb-stats[idx].val = cpu_to_le32(val);
+}
+
+#define K(x) ((x)  (PAGE_SHIFT - 10))
+static void update_balloon_stats(struct virtio_balloon *vb)
+{
+   unsigned long events[NR_VM_EVENT_ITEMS];
+   struct sysinfo i;
+   unsigned long anon_pages;
+   int idx = 0;
+
+   all_vm_events(events);
+   si_meminfo(i);
+   anon_pages = K(global_page_state(NR_ANON_PAGES));
+
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_IN, events[PSWPIN]);
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_SWAP_OUT, events[PSWPOUT]);
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_ANON, anon_pages);
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MAJFLT, events[PGMAJFAULT]);
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MINFLT, events[PGFAULT]);
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMFREE, K(i.freeram));
+   update_stat(vb, idx++, VIRTIO_BALLOON_S_MEMTOT, K(i.totalram));
+}
+
+/*
+ * While most virtqueues communicate guest-initiated requests to the 
hypervisor,
+ * the stats queue operates in reverse.  The driver initializes the virtqueue
+ * with a single buffer.  From that point forward, all conversations consist of
+ * a hypervisor request (a call to this function) which directs us to refill
+ * the virtqueue with a fresh stats buffer.
+ */
+static void stats_ack(struct virtqueue *vq)
+{
+   struct virtio_balloon *vb;
+   unsigned int len;
+   struct scatterlist sg;
+
+   vb = vq-vq_ops-get_buf(vq, len);
+   if (!vb)
+   return;
+
+   update_balloon_stats(vb);
+
+   sg_init_one(sg, vb-stats, sizeof(vb-stats));
+   if (vq-vq_ops-add_buf(vq, sg, 1, 0, vb)  0)
+   BUG();
+   vq-vq_ops-kick(vq);
+}
+
 static void virtballoon_changed(struct virtio_device *vdev)
 {
struct virtio_balloon *vb = vdev-priv;
@@ -205,10 +262,10 @@ static int balloon(void *_vballoon)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
struct virtio_balloon *vb;
-   struct virtqueue *vqs[2];
-   vq_callback_t *callbacks[] = { balloon_ack, balloon_ack };
-   const char *names[] = { inflate, deflate };
-   int err;
+   struct virtqueue *vqs[3];
+   vq_callback_t *callbacks[] = { balloon_ack, balloon_ack, stats_ack };
+   const char *names[] = { inflate, deflate, stats };
+   int err, nvqs;
 
vdev-priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
if (!vb) {
@@ -221,13 +278,28 @@ static int virtballoon_probe(struct virtio_device *vdev)
init_waitqueue_head(vb-config_change);
vb-vdev = vdev;
 
-   /* We expect two virtqueues. */
-   err = vdev-config-find_vqs(vdev, 2, vqs

[Qemu-devel] Re: virtio: Add memory statistics reporting to the balloon driver

2009-11-11 Thread Adam Litke
On Wed, 2009-11-11 at 13:13 +1030, Rusty Russell wrote:
  It's not laziness, it's consistency.  How is actual different than free 
  memory or any other stat?
 
 Because it's a COLLECTION of stats.  For example, swap in should be  swap
 out.  Now, the current Linux implementation of all_vm_events() is non-atomic
 anyway, so maybe we can just document this as best-effort.  I'm saying that
 if it *is* a problem, I think we need a vq.

I can't see why we would care about the atomicity of the collection of
statistics.  Best-effort is good enough.  Any variance within the stats
will be overshadowed by the latency of the host-side management daemon.

 But it raises the question: what stats are generally useful cross-OS?  Should
 we be supplying numbers like unused (free) instantly discardable (ie.
 clean), discardable to disk (ie. file-backed), discardable to swap
 (ie. swap-backed) and unswappable instead?

While I see the virtue in presenting abstracted memory stats that seem
more digestible in a virtualization context, I think we should keep the
raw stats.  This concentrates the complexity in the host-side management
daemon, and allows the host daemon to make better decisions (ie. by
reacting to trends in individual statistics).  Different OSes (or
different versions of the same OS), may also have different sets of
statistics that will provide the answers that a management daemon needs.


-- 
Thanks,
Adam





[Qemu-devel] [RFC] virtio: Report new guest memory statistics pertinent to memory ballooning (V2)

2009-11-09 Thread Adam Litke
Changes since V1:
- In the monitor, print all stats on one line with less abbreviated names
- Coding style changes

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them to the host via the device config space.

This patch implements the qemu side of the communication channel.  I will post
the kernel driver modifications in-reply to this message.

Signed-off-by: Adam Litke a...@us.ibm.com
Cc: Anthony Liguori aligu...@us.ibm.com
Cc: Avi Kivity a...@redhat.com

diff --git a/balloon.h b/balloon.h
index 60b4a5d..4008d1b 100644
--- a/balloon.h
+++ b/balloon.h
@@ -14,14 +14,21 @@
 #ifndef _QEMU_BALLOON_H
 #define _QEMU_BALLOON_H
 
+#include hw/virtio-balloon.h
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+typedef struct QEMUBalloonState {
+ram_addr_t actual;
+virtio_balloon_stats stats;
+} QEMUBalloonState;
+
+typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target,
+  QEMUBalloonState *state);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
 void qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+int qemu_balloon_status(QEMUBalloonState *s);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index cfd3b41..3d74e7f 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -30,6 +30,7 @@ typedef struct VirtIOBalloon
 VirtQueue *ivq, *dvq;
 uint32_t num_pages;
 uint32_t actual;
+virtio_balloon_stats stats;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -111,8 +112,15 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, 
uint8_t *config_data)
 
 config.num_pages = cpu_to_le32(dev-num_pages);
 config.actual = cpu_to_le32(dev-actual);
-
-memcpy(config_data, config, 8);
+config.stats.pswapin = cpu_to_le32(dev-stats.pswapin);
+config.stats.pswapout = cpu_to_le32(dev-stats.pswapout);
+config.stats.panon = cpu_to_le32(dev-stats.panon);
+config.stats.pgmajfault = cpu_to_le32(dev-stats.pgmajfault);
+config.stats.pgminfault = cpu_to_le32(dev-stats.pgminfault);
+config.stats.memfree = cpu_to_le32(dev-stats.memfree);
+config.stats.memtot = cpu_to_le32(dev-stats.memtot);
+
+memcpy(config_data, config, sizeof(config));
 }
 
 static void virtio_balloon_set_config(VirtIODevice *vdev,
@@ -120,16 +128,39 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
 {
 VirtIOBalloon *dev = to_virtio_balloon(vdev);
 struct virtio_balloon_config config;
-memcpy(config, config_data, 8);
+memcpy(config, config_data, sizeof(config));
 dev-actual = config.actual;
+dev-stats.pswapin = config.stats.pswapin;
+dev-stats.pswapout = config.stats.pswapout;
+dev-stats.panon = config.stats.panon;
+dev-stats.pgmajfault = config.stats.pgmajfault;
+dev-stats.pgminfault = config.stats.pgminfault;
+dev-stats.memfree = config.stats.memfree;
+dev-stats.memtot = config.stats.memtot;
 }
 
 static uint32_t virtio_balloon_get_features(VirtIODevice *vdev)
 {
-return 0;
+uint32_t features = 0;
+
+features |= (1  VIRTIO_BALLOON_F_RPT_SWAP_IN)  |
+(1  VIRTIO_BALLOON_F_RPT_SWAP_OUT) |
+(1  VIRTIO_BALLOON_F_RPT_ANON) |
+(1  VIRTIO_BALLOON_F_RPT_MAJFLT)   |
+(1  VIRTIO_BALLOON_F_RPT_MINFLT)   |
+(1  VIRTIO_BALLOON_F_RPT_MEMFREE)  |
+(1  VIRTIO_BALLOON_F_RPT_MEMTOT);
+   
+return features;
+}
+
+static inline int has_feature(VirtIOBalloon *dev, int feature)
+{
+   return (dev-vdev.features  1feature);
 }
 
-static ram_addr_t virtio_balloon_to_target(void *opaque, ram_addr_t target)
+static void virtio_balloon_to_target(void *opaque, ram_addr_t target,
+ QEMUBalloonState *s)
 {
 VirtIOBalloon *dev = opaque;
 
@@ -141,7 +172,23 @@ static ram_addr_t virtio_balloon_to_target(void *opaque, 
ram_addr_t target)
 virtio_notify_config(dev-vdev);
 }
 
-return ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+if (s) {
+s-actual = ram_size - (dev-actual  VIRTIO_BALLOON_PFN_SHIFT);
+s-stats.pswapin = has_feature(dev, VIRTIO_BALLOON_F_RPT_SWAP_OUT

[Qemu-devel] virtio: Add memory statistics reporting to the balloon driver

2009-11-09 Thread Adam Litke
When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current method
employs a daemon running in each guest that communicates memory statistics to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them to the host via the device config space.

This patch enables the guest-side support by adding stats collection and
reporting to the virtio balloon driver.

Comments?

Signed-off-by: Adam Litke a...@us.ibm.com
Cc: Rusty Russell ru...@rustcorp.com.au
Cc: Anthony Liguori anth...@codemonkey.ws
Cc: Avi Kivity a...@redhat.com
Cc: virtualizat...@lists.linux-foundation.org
Cc: linux-ker...@vger.kernel.org

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 200c22f..0c9a9a1 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -180,6 +180,41 @@ static void update_balloon_size(struct virtio_balloon *vb)
  actual, sizeof(actual));
 }
 
+static inline void update_stat(struct virtio_device *vdev, int feature,
+   unsigned long value, unsigned offset)
+{
+   __le32 __v = cpu_to_le32(value);
+   if (virtio_has_feature(vdev, feature))
+   vdev-config-set(vdev, offset, __v, sizeof(__v));
+}
+
+#define K(x) ((x)  (PAGE_SHIFT - 10))
+static void update_balloon_stats(struct virtio_balloon *vb)
+{
+   unsigned long events[NR_VM_EVENT_ITEMS];
+   struct sysinfo i;
+   unsigned off = offsetof(struct virtio_balloon_config, stats);
+
+   all_vm_events(events);
+
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_SWAP_IN, events[PSWPIN],
+   off + offsetof(struct virtio_balloon_stats, pswapin));
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_SWAP_OUT, events[PSWPOUT],
+   off + offsetof(struct virtio_balloon_stats, pswapout));
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_MAJFLT, events[PGMAJFAULT],
+   off + offsetof(struct virtio_balloon_stats, pgmajfault));
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_MINFLT, events[PGFAULT],
+   off + offsetof(struct virtio_balloon_stats, pgminfault));
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_ANON,
+   K(global_page_state(NR_ANON_PAGES)),
+   off + offsetof(struct virtio_balloon_stats, panon));
+   si_meminfo(i);
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_MEMFREE, K(i.freeram),
+   off + offsetof(struct virtio_balloon_stats, memfree));
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_MEMTOT, K(i.totalram),
+   off + offsetof(struct virtio_balloon_stats, memtot));
+}
+
 static int balloon(void *_vballoon)
 {
struct virtio_balloon *vb = _vballoon;
@@ -189,15 +224,17 @@ static int balloon(void *_vballoon)
s64 diff;
 
try_to_freeze();
-   wait_event_interruptible(vb-config_change,
+   wait_event_interruptible_timeout(vb-config_change,
 (diff = towards_target(vb)) != 0
 || kthread_should_stop()
-|| freezing(current));
+|| freezing(current),
+VIRTIO_BALLOON_TIMEOUT);
if (diff  0)
fill_balloon(vb, diff);
else if (diff  0)
leak_balloon(vb, -diff);
update_balloon_size(vb);
+   update_balloon_stats(vb);
}
return 0;
 }
@@ -265,7 +302,12 @@ static void virtballoon_remove(struct virtio_device *vdev)
kfree(vb);
 }
 
-static unsigned int features[] = { VIRTIO_BALLOON_F_MUST_TELL_HOST };
+static unsigned int features[] = {
+   VIRTIO_BALLOON_F_MUST_TELL_HOST, VIRTIO_BALLOON_F_RPT_SWAP_IN,
+   VIRTIO_BALLOON_F_RPT_SWAP_OUT, VIRTIO_BALLOON_F_RPT_ANON,
+   VIRTIO_BALLOON_F_RPT_MAJFLT, VIRTIO_BALLOON_F_RPT_MINFLT,
+   VIRTIO_BALLOON_F_RPT_MEMFREE, VIRTIO_BALLOON_F_RPT_MEMTOT,
+};
 
 static struct virtio_driver virtio_balloon = {
.feature_table = features,
diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h
index 09d7300..0bff4b8 100644
--- a/include/linux/virtio_balloon.h
+++ b/include/linux/virtio_balloon.h
@@ -6,15 +6,39 @@
 
 /* The feature bitmap for virtio balloon */
 #define VIRTIO_BALLOON_F_MUST_TELL_HOST0 /* Tell before reclaiming

Re: [Qemu-devel] [RFC] virtio: Report new guest memory statistics pertinent to memory ballooning (V2)

2009-11-09 Thread Adam Litke
On Mon, 2009-11-09 at 19:00 +, Jamie Lokier wrote:
 Adam Litke wrote:
  +s-stats.pswapin = has_feature(dev, VIRTIO_BALLOON_F_RPT_SWAP_OUT) 
  ?
  +   dev-stats.pswapin : -1;
 
 (etc.)
 
 Why not simply have the guest fill in the unused fields with -1, and
 say that's how no meaningful value is represented in the ABI?
 
 All guests have to know about all those fields anyway, for the
 structure layout.  Is there any benefit to specifying feature bits in
 advance over simply storing -1 there?

This holds true for fields that exist but might not be supported by a
guest, but what about the case where we add a new field?  In that case,
qemu and the guest may disagree on the structure layout.

-- 
Thanks,
Adam





Re: [Qemu-devel] [RFC] virtio: Report new guest memory statistics pertinent to memory ballooning (V2)

2009-11-09 Thread Adam Litke
On Mon, 2009-11-09 at 19:01 +, Jamie Lokier wrote:
 These days, would it make more sense to emit a QJSON object?
 
 In this case the JSON object is quite human readable too.

I'm not very particular on the output format since it's main consumer is
likely another program.  Is XML output via the monitor a new precedent
in qemu?  I was unable to find any other monitor command that emits
XML. 

-- 
Thanks,
Adam





[Qemu-devel] [RFC] virtio: Report new guest memory statistics pertinent to memory ballooning

2009-11-05 Thread Adam Litke
[RFC] virtio: Report new guest memory statistics pertinent to memory 
ballooning

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current 
method
employs a daemon running in each guest that communicates memory statistics 
to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate 
with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them to the host via the device config space.

This patch implements the qemu side of the communication channel.  I will 
post
the kernel driver modifications in-reply to this message.

I'd like to pose a few questions concerning my implementation:

 * Is there a better way to use feature codes than using one per variable?

 * This patch causes the 'info balloon' monitor command to generate output 
like
   the following:

(qemu) info balloon
balloon: actual=1024 MB
balloon: pswapin=0 pages
balloon: pswapout=0 pages
balloon: panon=3928 KB
balloon: pgmajfault=0
balloon: pgminfault=247914
balloon: memfree=987032 KB
balloon: memtot=1020812 KB

   Is this agreeable?

Thank you for your comments...

Signed-off-by: Adam Litke a...@us.ibm.com

diff --git a/balloon.h b/balloon.h
index 60b4a5d..3f67021 100644
--- a/balloon.h
+++ b/balloon.h
@@ -14,14 +14,21 @@
 #ifndef _QEMU_BALLOON_H
 #define _QEMU_BALLOON_H
 
+#include hw/virtio-balloon.h
 #include cpu-defs.h
 
-typedef ram_addr_t (QEMUBalloonEvent)(void *opaque, ram_addr_t target);
+struct QEMUBalloonState {
+ram_addr_t actual;
+struct virtio_balloon_stats stats;
+};
+
+typedef void (QEMUBalloonEvent)(void *opaque, ram_addr_t target,
+  struct QEMUBalloonState *state);
 
 void qemu_add_balloon_handler(QEMUBalloonEvent *func, void *opaque);
 
 void qemu_balloon(ram_addr_t target);
 
-ram_addr_t qemu_balloon_status(void);
+int qemu_balloon_status(struct QEMUBalloonState *s);
 
 #endif
diff --git a/hw/virtio-balloon.c b/hw/virtio-balloon.c
index cfd3b41..1dc8345 100644
--- a/hw/virtio-balloon.c
+++ b/hw/virtio-balloon.c
@@ -30,6 +30,7 @@ typedef struct VirtIOBalloon
 VirtQueue *ivq, *dvq;
 uint32_t num_pages;
 uint32_t actual;
+struct virtio_balloon_stats stats;
 } VirtIOBalloon;
 
 static VirtIOBalloon *to_virtio_balloon(VirtIODevice *vdev)
@@ -111,8 +112,15 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, 
uint8_t *config_data)
 
 config.num_pages = cpu_to_le32(dev-num_pages);
 config.actual = cpu_to_le32(dev-actual);
-
-memcpy(config_data, config, 8);
+config.stats.pswapin = cpu_to_le32(dev-stats.pswapin);
+config.stats.pswapout = cpu_to_le32(dev-stats.pswapout);
+config.stats.panon = cpu_to_le32(dev-stats.panon);
+config.stats.pgmajfault = cpu_to_le32(dev-stats.pgmajfault);
+config.stats.pgminfault = cpu_to_le32(dev-stats.pgminfault);
+config.stats.memfree = cpu_to_le32(dev-stats.memfree);
+config.stats.memtot = cpu_to_le32(dev-stats.memtot);
+
+memcpy(config_data, config, sizeof(config));
 }
 
 static void virtio_balloon_set_config(VirtIODevice *vdev,
@@ -120,16 +128,39 @@ static void virtio_balloon_set_config(VirtIODevice *vdev,
 {
 VirtIOBalloon *dev = to_virtio_balloon(vdev);
 struct virtio_balloon_config config;
-memcpy(config, config_data, 8);
+memcpy(config, config_data, sizeof(config));
 dev-actual = config.actual;
+dev-stats.pswapin = config.stats.pswapin;
+dev-stats.pswapout = config.stats.pswapout;
+dev-stats.panon = config.stats.panon;
+dev-stats.pgmajfault = config.stats.pgmajfault;
+dev-stats.pgminfault = config.stats.pgminfault;
+dev-stats.memfree = config.stats.memfree;
+dev-stats.memtot = config.stats.memtot;
 }
 
 static uint32_t virtio_balloon_get_features(VirtIODevice *vdev)
 {
-return 0;
+uint32_t features = 0;
+
+features |= (1  VIRTIO_BALLOON_F_RPT_SWAP_IN)  |
+(1  VIRTIO_BALLOON_F_RPT_SWAP_OUT) |
+(1  VIRTIO_BALLOON_F_RPT_ANON) |
+(1  VIRTIO_BALLOON_F_RPT_MAJFLT)   |
+(1  VIRTIO_BALLOON_F_RPT_MINFLT)   |
+(1  VIRTIO_BALLOON_F_RPT_MEMFREE)  |
+(1  VIRTIO_BALLOON_F_RPT_MEMTOT);
+   
+return features;
+}
+
+static inline int has_feature(VirtIOBalloon *dev, int feature)
+{
+   return (dev-vdev.features  1feature);
 }
 
-static ram_addr_t virtio_balloon_to_target(void

[Qemu-devel] virtio: Add memory statistics reporting to the balloon driver

2009-11-05 Thread Adam Litke
Here are the corresponding changes to the Linux virtio driver...

virtio: Add memory statistics reporting to the balloon driver

When using ballooning to manage overcommitted memory on a host, a system for
guests to communicate their memory usage to the host can provide information
that will minimize the impact of ballooning on the guests.  The current 
method
employs a daemon running in each guest that communicates memory statistics 
to a
host daemon at a specified time interval.  The host daemon aggregates this
information and inflates and/or deflates balloons according to the level of
host memory pressure.  This approach is effective but overly complex since a
daemon must be installed inside each guest and coordinated to communicate 
with
the host.  A simpler approach is to collect memory statistics in the virtio
balloon driver and communicate them to the host via the device config space.

This patch enables the guest-side support by adding stats collection and
reporting to the virtio balloon driver.

Signed-off-by: Adam Litke a...@us.ibm.com

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 3a43ebf..1029363 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -135,6 +135,7 @@ static int virtio_dev_probe(struct device *_d)
set_bit(i, dev-features);
 
dev-config-finalize_features(dev);
+   printk(virtio_dev_probe: final features = %lx\n, dev-features[0]);
 
err = drv-probe(dev);
if (err)
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 200c22f..77cb953 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -180,6 +180,45 @@ static void update_balloon_size(struct virtio_balloon *vb)
  actual, sizeof(actual));
 }
 
+static inline void update_stat(struct virtio_device *vdev, int feature,
+   unsigned long value, unsigned offset)
+{
+   if (virtio_has_feature(vdev, feature)) {
+   vdev-config-set(vdev, offset, value, sizeof(value));
+   printk(update_stat: Set field %i to %lu\n,
+   offset / 4, value);
+   } else
+   printk(Feature %i not supported, leaving field %i alone\n,
+   feature, offset / 4);
+}
+
+#define K(x) ((x)  (PAGE_SHIFT - 10))
+static void update_balloon_stats(struct virtio_balloon *vb)
+{
+   unsigned long events[NR_VM_EVENT_ITEMS];
+   struct sysinfo i;
+   unsigned off = offsetof(struct virtio_balloon_config, stats);
+
+   all_vm_events(events);
+
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_SWAP_IN, events[PSWPIN],
+   off + offsetof(struct virtio_balloon_stats, pswapin));
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_SWAP_OUT, events[PSWPOUT],
+   off + offsetof(struct virtio_balloon_stats, pswapout));
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_MAJFLT, events[PGMAJFAULT],
+   off + offsetof(struct virtio_balloon_stats, pgmajfault));
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_MINFLT, events[PGFAULT],
+   off + offsetof(struct virtio_balloon_stats, pgminfault));
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_ANON,
+   K(global_page_state(NR_ANON_PAGES)),
+   off + offsetof(struct virtio_balloon_stats, panon));
+   si_meminfo(i);
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_MEMFREE, K(i.freeram),
+   off + offsetof(struct virtio_balloon_stats, memfree));
+   update_stat(vb-vdev, VIRTIO_BALLOON_F_RPT_MEMTOT, K(i.totalram),
+   off + offsetof(struct virtio_balloon_stats, memtot));
+}
+
 static int balloon(void *_vballoon)
 {
struct virtio_balloon *vb = _vballoon;
@@ -189,15 +228,18 @@ static int balloon(void *_vballoon)
s64 diff;
 
try_to_freeze();
-   wait_event_interruptible(vb-config_change,
+   wait_event_interruptible_timeout(vb-config_change,
 (diff = towards_target(vb)) != 0
 || kthread_should_stop()
-|| freezing(current));
+|| freezing(current),
+VIRTIO_BALLOON_TIMEOUT);
+   printk(Awake!\n);
if (diff  0)
fill_balloon(vb, diff);
else if (diff  0)
leak_balloon(vb, -diff);
update_balloon_size(vb);
+   update_balloon_stats(vb);
}
return 0;
 }
@@ -237,6 +279,7 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
vb-tell_host_first
= virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST);
+   dev_printk(KERN_INFO, vb