date:20131008

[Qemu-devel] [PATCH v2] Add interface to traverse the qmp command list by QmpCommand

2013-10-08 Thread Mark Wu

In the original code, qmp_get_command_list is used to construct
a list of all commands' name. To get the information of all qga
commands, it traverses the name list and search the command info
with its name.  So it can cause O(n^2) in the number of commands.

This patch adds an interface to traverse the qmp command list by
QmpCommand to replace qmp_get_command_list. It can decrease the
complexity from O(n^2) to O(n).

Signed-off-by: Mark Wu wu...@linux.vnet.ibm.com
---
Changes:
v2:
1. Keep the signature of qmp_command_is_enabled (per Eric and Michael)
2. Remove the unnecessary pointer castings (per Eric)

 include/qapi/qmp/dispatch.h |  5 ++--
 qapi/qmp-registry.c | 27 +++---
 qga/commands.c  | 38 ++---
 qga/main.c  | 68 +
 4 files changed, 48 insertions(+), 90 deletions(-)

diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h
index 1ce11f5..b6eb49e 100644
--- a/include/qapi/qmp/dispatch.h
+++ b/include/qapi/qmp/dispatch.h
@@ -47,9 +47,10 @@ QmpCommand *qmp_find_command(const char *name);
 QObject *qmp_dispatch(QObject *request);
 void qmp_disable_command(const char *name);
 void qmp_enable_command(const char *name);
-bool qmp_command_is_enabled(const char *name);
-char **qmp_get_command_list(void);
+bool qmp_command_is_enabled(const QmpCommand *cmd);
 QObject *qmp_build_error_object(Error *errp);
+typedef void (*qmp_cmd_callback_fn)(QmpCommand *cmd, void *opaque);
+void qmp_for_each_command(qmp_cmd_callback_fn fn, void *opaque);
 
 #endif
 
diff --git a/qapi/qmp-registry.c b/qapi/qmp-registry.c
index 28bbbe8..3fcf10e 100644
--- a/qapi/qmp-registry.c
+++ b/qapi/qmp-registry.c
@@ -66,35 +66,16 @@ void qmp_enable_command(const char *name)
 qmp_toggle_command(name, true);
 }
 
-bool qmp_command_is_enabled(const char *name)
+bool qmp_command_is_enabled(const QmpCommand *cmd)
 {
-QmpCommand *cmd;
-
-QTAILQ_FOREACH(cmd, qmp_commands, node) {
-if (strcmp(cmd-name, name) == 0) {
-return cmd-enabled;
-}
-}
-
-return false;
+return cmd-enabled;
 }
 
-char **qmp_get_command_list(void)
+void qmp_for_each_command(qmp_cmd_callback_fn fn, void *opaque)
 {
 QmpCommand *cmd;
-int count = 1;
-char **list_head, **list;
-
-QTAILQ_FOREACH(cmd, qmp_commands, node) {
-count++;
-}
-
-list_head = list = g_malloc0(count * sizeof(char *));
 
 QTAILQ_FOREACH(cmd, qmp_commands, node) {
-*list = g_strdup(cmd-name);
-list++;
+fn(cmd, opaque);
 }
-
-return list_head;
 }
diff --git a/qga/commands.c b/qga/commands.c
index 528b082..063b22b 100644
--- a/qga/commands.c
+++ b/qga/commands.c
@@ -45,35 +45,27 @@ void qmp_guest_ping(Error **err)
 slog(guest-ping called);
 }
 
-struct GuestAgentInfo *qmp_guest_info(Error **err)
+static void qmp_command_info(QmpCommand *cmd, void *opaque)
 {
-GuestAgentInfo *info = g_malloc0(sizeof(GuestAgentInfo));
+GuestAgentInfo *info = opaque;
 GuestAgentCommandInfo *cmd_info;
 GuestAgentCommandInfoList *cmd_info_list;
-char **cmd_list_head, **cmd_list;
-
-info-version = g_strdup(QEMU_VERSION);
-
-cmd_list_head = cmd_list = qmp_get_command_list();
-if (*cmd_list_head == NULL) {
-goto out;
-}
 
-while (*cmd_list) {
-cmd_info = g_malloc0(sizeof(GuestAgentCommandInfo));
-cmd_info-name = g_strdup(*cmd_list);
-cmd_info-enabled = qmp_command_is_enabled(cmd_info-name);
+cmd_info = g_malloc0(sizeof(GuestAgentCommandInfo));
+cmd_info-name = g_strdup(cmd-name);
+cmd_info-enabled = qmp_command_is_enabled(cmd);
 
-cmd_info_list = g_malloc0(sizeof(GuestAgentCommandInfoList));
-cmd_info_list-value = cmd_info;
-cmd_info_list-next = info-supported_commands;
-info-supported_commands = cmd_info_list;
+cmd_info_list = g_malloc0(sizeof(GuestAgentCommandInfoList));
+cmd_info_list-value = cmd_info;
+cmd_info_list-next = info-supported_commands;
+info-supported_commands = cmd_info_list;
+}
 
-g_free(*cmd_list);
-cmd_list++;
-}
+struct GuestAgentInfo *qmp_guest_info(Error **err)
+{
+GuestAgentInfo *info = g_malloc0(sizeof(GuestAgentInfo));
 
-out:
-g_free(cmd_list_head);
+info-version = g_strdup(QEMU_VERSION);
+qmp_for_each_command(qmp_command_info, info);
 return info;
 }
diff --git a/qga/main.c b/qga/main.c
index 6c746c8..ff2ee03 100644
--- a/qga/main.c
+++ b/qga/main.c
@@ -347,48 +347,34 @@ static gint ga_strcmp(gconstpointer str1, gconstpointer 
str2)
 }
 
 /* disable commands that aren't safe for fsfreeze */
-static void ga_disable_non_whitelisted(void)
+static void ga_disable_non_whitelisted(QmpCommand *cmd, void *opaque)
 {
-char **list_head, **list;
 bool whitelisted;
 int i;
 
-list_head = list = qmp_get_command_list();
-while (*list != NULL) {
-whitelisted = false;
-

[Qemu-devel] [PATCH v4] Extend qemu-ga's 'guest-info' command to expose flag 'success-response'

2013-10-08 Thread Mark Wu

Now we have several qemu-ga commands not returning response on success.
It has been documented in qga/qapi-schema.json already. This patch exposes
the 'success-response' flag by extending 'guest-info' command. With this
change, the clients can handle the command response more flexibly.

Signed-off-by: Mark Wu wu...@linux.vnet.ibm.com
---
Changes:
v4: 
Add signature of qmp_has_success_response per Michael.
v3: 
1. treat cmd-options as a bitmask instead of single option (per Eric) 
2. rebase on the patch  Add interface to traverse the qmp command list
by QmpCommand to avoid the O(n2) problem (per Eric and Michael)
v2: 
add the notation 'since 1.7' to the option 'success-response'
(per Eric Blake's comments)

 include/qapi/qmp/dispatch.h | 1 +
 qapi/qmp-registry.c | 5 +
 qga/commands.c  | 1 +
 qga/qapi-schema.json| 5 -
 4 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/qapi/qmp/dispatch.h b/include/qapi/qmp/dispatch.h
index b6eb49e..cebf6aa 100644
--- a/include/qapi/qmp/dispatch.h
+++ b/include/qapi/qmp/dispatch.h
@@ -48,6 +48,7 @@ QObject *qmp_dispatch(QObject *request);
 void qmp_disable_command(const char *name);
 void qmp_enable_command(const char *name);
 bool qmp_command_is_enabled(const QmpCommand *cmd);
+bool qmp_has_success_response(const QmpCommand *cmd);
 QObject *qmp_build_error_object(Error *errp);
 typedef void (*qmp_cmd_callback_fn)(QmpCommand *cmd, void *opaque);
 void qmp_for_each_command(qmp_cmd_callback_fn fn, void *opaque);
diff --git a/qapi/qmp-registry.c b/qapi/qmp-registry.c
index 3fcf10e..c75c2e8 100644
--- a/qapi/qmp-registry.c
+++ b/qapi/qmp-registry.c
@@ -71,6 +71,11 @@ bool qmp_command_is_enabled(const QmpCommand *cmd)
 return cmd-enabled;
 }
 
+bool qmp_has_success_response(const QmpCommand *cmd)
+{
+   return !(cmd-options  QCO_NO_SUCCESS_RESP);
+}
+
 void qmp_for_each_command(qmp_cmd_callback_fn fn, void *opaque)
 {
 QmpCommand *cmd;
diff --git a/qga/commands.c b/qga/commands.c
index 063b22b..7f089ba 100644
--- a/qga/commands.c
+++ b/qga/commands.c
@@ -54,6 +54,7 @@ static void qmp_command_info(QmpCommand *cmd, void *opaque)
 cmd_info = g_malloc0(sizeof(GuestAgentCommandInfo));
 cmd_info-name = g_strdup(cmd-name);
 cmd_info-enabled = qmp_command_is_enabled(cmd);
+cmd_info-success_response = qmp_has_success_response(cmd);
 
 cmd_info_list = g_malloc0(sizeof(GuestAgentCommandInfoList));
 cmd_info_list-value = cmd_info;
diff --git a/qga/qapi-schema.json b/qga/qapi-schema.json
index 7155b7a..245f968 100644
--- a/qga/qapi-schema.json
+++ b/qga/qapi-schema.json
@@ -141,10 +141,13 @@
 #
 # @enabled: whether command is currently enabled by guest admin
 #
+# @success-response: whether command returns a response on success
+#(since 1.7)
+#
 # Since 1.1.0
 ##
 { 'type': 'GuestAgentCommandInfo',
-  'data': { 'name': 'str', 'enabled': 'bool' } }
+  'data': { 'name': 'str', 'enabled': 'bool', 'success-response': 'bool' } }
 
 ##
 # @GuestAgentInfo
-- 
1.8.3.1

Re: [Qemu-devel] [PATCHv3 00/20] block: logical block provisioning enhancements

2013-10-08 Thread Stefan Hajnoczi

On Mon, Oct 7, 2013 at 11:42 AM, Paolo Bonzini pbonz...@redhat.com wrote:
 Il 07/10/2013 10:42, Stefan Hajnoczi ha scritto:
 Could you make bdrv_co_write_zeroes() always use UNMAP, if possible, and
 avoid adding the new BDRV_REQ_MAY_UNMAP flag?  While reading the first
 few patches in this series I wondered why there is a need to expose
 flags at all...

 Sometimes it is useful to distinguish between zeroing at the image
 format level from discarding at the device level, but I don't think we
 make use of that yet.  I'd prefer to keep the interface simple for now
 and add flags later, if necessary.

 Or maybe I just missed something ;)

 The flag is needed to implement the right semantics for the SCSI WRITE
 SAME command, which are:

 - if the UNMAP bit is off, always write the sectors (that's
 bdrv_aio_write_zeroes without BDRV_REQ_MAY_UNMAP if the payload is zero,
 otherwise it's emulated with bdrv_aio_writev)

 - if the target can discard and write the specified payload, you can
 discard, else you must write the sectors with the correct payload
 (that's bdrv_aio_write_zeroes with BDRV_REQ_MAY_UNMAP).

 Contrast this with the UNMAP command, which does not make any guarantee
 on the content of the sectors after the command is completed (a few
 months ago we agreed that, even if you have discard_zeroes=true in the
 target, it is fine for UNMAP to do nothing).

Okay, then let's keep the patches to expose the flag.

Stefan

Re: [Qemu-devel] [PATCH v5 0/4] timers thread-safe stuff

2013-10-08 Thread Stefan Hajnoczi

On Mon, Oct 07, 2013 at 02:24:26PM +0200, Paolo Bonzini wrote:
 Stefan, will you pick this up next week or shall I?
 
 I have patches for thread-safe icount almost ready to post, and I am not
 sure through whom they are going to go.

Please include it in your pull request.

Kevin is merging block patches this week, the queue is fairly full so I
imagine he has plenty of other things to review.

Stefan

Re: [Qemu-devel] [patch 0/2] force -mem-path RAM allocation

2013-10-08 Thread Markus Armbruster

We have

-mem-path FILE  provide backing storage for guest RAM
-mem-prealloc   preallocate guest memory (use with -mem-path)

PATCH 2/2 adds

-mem-path-forcefail if unable to allocate RAM as specified by -mem-path

Looks like it's time to consolidate the options related to guest memory
into a single, QemuOpts-style -memory NAME=VALUE,...  What do you guys
think?

Re: [Qemu-devel] [PATCHv3 00/20] block: logical block provisioning enhancements

2013-10-08 Thread Peter Lieven


On 08.10.2013 09:02, Stefan Hajnoczi wrote:

On Mon, Oct 7, 2013 at 11:42 AM, Paolo Bonzini pbonz...@redhat.com wrote:

Il 07/10/2013 10:42, Stefan Hajnoczi ha scritto:

Could you make bdrv_co_write_zeroes() always use UNMAP, if possible, and
avoid adding the new BDRV_REQ_MAY_UNMAP flag?  While reading the first
few patches in this series I wondered why there is a need to expose
flags at all...

Sometimes it is useful to distinguish between zeroing at the image
format level from discarding at the device level, but I don't think we
make use of that yet.  I'd prefer to keep the interface simple for now
and add flags later, if necessary.

Or maybe I just missed something ;)

The flag is needed to implement the right semantics for the SCSI WRITE
SAME command, which are:

- if the UNMAP bit is off, always write the sectors (that's
bdrv_aio_write_zeroes without BDRV_REQ_MAY_UNMAP if the payload is zero,
otherwise it's emulated with bdrv_aio_writev)

- if the target can discard and write the specified payload, you can
discard, else you must write the sectors with the correct payload
(that's bdrv_aio_write_zeroes with BDRV_REQ_MAY_UNMAP).

Contrast this with the UNMAP command, which does not make any guarantee
on the content of the sectors after the command is completed (a few
months ago we agreed that, even if you have discard_zeroes=true in the
target, it is fine for UNMAP to do nothing).

Okay, then let's keep the patches to expose the flag.

Okay, then I can keep those.

Can you give a short hint if my approach with brdv_make_empty is what
you want? I would like to not change the parameters, so use BDRV_REQ_MAY_UNMAP
unconditionally.

int bdrv_make_empty(BlockDriverState *bs)
{
int64_t target_size = bdrv_getlength(bs) / BDRV_SECTOR_SIZE;
int64_t ret, nb_sectors, sector_num = 0;
int n;

if (bs-drv-bdrv_make_empty) {
return bs-drv-bdrv_make_empty(bs);
}

for (;;) {
nb_sectors = target_size - sector_num;
if (nb_sectors = 0) {
return 0;
}
if (nb_sectors  INT_MAX) {
nb_sectors = INT_MAX;
}
ret = bdrv_get_block_status(bs, sector_num, nb_sectors, n);
if (ret  BDRV_BLOCK_ZERO) {
sector_num += n;
continue;
}
ret = bdrv_write_zeroes(bs, sector_num, n, BDRV_REQ_MAY_UNMAP);
if (ret  0) {
error_report(error writing zeroes at sector % PRId64 : %s,
 sector_num, strerror(-ret));
return ret;
}
sector_num += n;
}
}

Re: [Qemu-devel] [patch 0/2] force -mem-path RAM allocation

2013-10-08 Thread Paolo Bonzini

Il 08/10/2013 09:32, Markus Armbruster ha scritto:
 We have
 
 -mem-path FILE  provide backing storage for guest RAM
 -mem-prealloc   preallocate guest memory (use with -mem-path)
 
 PATCH 2/2 adds
 
 -mem-path-forcefail if unable to allocate RAM as specified by 
 -mem-path
 
 Looks like it's time to consolidate the options related to guest memory
 into a single, QemuOpts-style -memory NAME=VALUE,...  What do you guys
 think?

Yes, we can use -numa memory (or -numa mem) that Wanlong Gao is
adding.  We can add path=, preallocate= and force= options there.

Paolo

Re: [Qemu-devel] [patch 1/2] qemu: mempath: prefault pages manually

2013-10-08 Thread Paolo Bonzini

Il 08/10/2013 02:41, Marcelo Tosatti ha scritto:
 +/* unblock SIGBUS */
 +pthread_sigmask(SIG_BLOCK, NULL, oldset);
 +sigemptyset(set);
 +sigaddset(set, SIGBUS);
 +pthread_sigmask(SIG_UNBLOCK, set, NULL);

Please instead modify qemu-thread-posix.c to unblock all per-thread
signals (SIGBUS, SIGSEGV, SIGILL, SIGFPE and SIGSYS).  There is no need
to keep those blocked.

Paolo

Re: [Qemu-devel] [PATCH v4 2/7] qmp: add internal sync mode common to mirror_start

2013-10-08 Thread Fam Zheng

On Mon, 09/30 08:49, Eric Blake wrote:
 On 09/30/2013 06:02 AM, Fam Zheng wrote:
  This adds a new sync mode common which only copies data that is above
  the common ancestor of source and target. In general, this could be useful
  in cases like:
  
  base_bs --- common_ancestor --- foo --- bar ---source
\
 \--- target
  
  Where data in foo, bar and source will be copied to target, once such
  common backing_hd sharing is introduced. For now, we could use a special
  case: If target is the ancestor of source, like,
  
  base_bs --- target --- foo --- bar ---source
  
  The data in foo, bar and source will be copied to target, like
  drive-commit, and when they are synced, the source bs replaces target
  bs. This is specifically useful for block commit of active layer.
  
  This mode is not available (-ENOTSUP) from QMP interface, it is only
  used internally by block commit code.
  
 
  +++ b/qapi-schema.json
  @@ -1363,7 +1363,7 @@
   # Since: 1.3
   ##
   { 'enum': 'MirrorSyncMode',
  -  'data': ['top', 'full', 'none'] }
  +  'data': ['top', 'full', 'none', 'common'] }
 
 Is it worth documenting the mode, in order to include a '(since 1.7)'
 notation, as well as a mention that this mode is not supported via QMP
 but only exists so that the code generator will support the mode needed
 internally?  Is there any way to refactor things so that you don't have
 to munge the QAPI just to provide this internal-only mode?
 

As described in commit message, this mode could be useful once blockdev-add has
device referencing (backing_hd sharing). For now, even with the same backing
file, they don't share BDS, so it's not working as expected and should be
disabled.

So do you think it OK to document as not implemented for now, and wait for
backing_hd sharing to enable it?

Thanks,

Fam

Re: [Qemu-devel] [PATCH v4 4/7] mirror: Add commit_job_type to perform commit with mirror code

2013-10-08 Thread Fam Zheng

On Tue, 10/01 11:13, Eric Blake wrote:
 On 09/30/2013 06:02 AM, Fam Zheng wrote:
  Commit active layer will be implemented in block/mirror.c, prepare a new
  job type to let it have a right type name for the user.
  
  Signed-off-by: Fam Zheng f...@redhat.com
  ---
   block/mirror.c| 12 +++-
   blockdev.c|  2 +-
   include/block/block_int.h |  2 ++
   3 files changed, 14 insertions(+), 2 deletions(-)
  
  diff --git a/block/mirror.c b/block/mirror.c
  index af6851f..20dcfb6 100644
  --- a/block/mirror.c
  +++ b/block/mirror.c
  @@ -532,10 +532,19 @@ static const BlockJobType mirror_job_type = {
   .complete  = mirror_complete,
   };
   
  +static const BlockJobType commit_job_type = {
  +.instance_size = sizeof(MirrorBlockJob),
  +.job_type  = commit,
 
 I still wonder if we should complete the conversion over to a QAPI enum
 type for all valid job types prior to hard-coding open strings through
 yet more of the code base.  As long as we don't have introspection done
 yet, we can still make the switch, and in the long run, having an enum
 of valid job types seems like it will be better for maintenance, all
 with no change to what is sent over the wire in QMP.
 
  @@ -390,6 +391,7 @@ void mirror_start(BlockDriverState *bs, 
  BlockDriverState *target,
 int64_t speed, int64_t granularity, int64_t buf_size,
 MirrorSyncMode mode, BlockdevOnError on_source_error,
 BlockdevOnError on_target_error,
  +  bool commit_job,
 
 If we DO create a QAPI enum for job type, then this parameter would be
 the enum type, rather than a bool.
 

Good point, I'll work on a QAPI enum and base this on it.

Fam

Re: [Qemu-devel] [PATCH qom-next 0/2] qdev-monitor: Reference counting follow-ups

2013-10-08 Thread Igor Mammedov

On Mon,  7 Oct 2013 18:43:59 +0200
Andreas Färber afaer...@suse.de wrote:

 Hello,
 
 I have queued bug fixes by Igor and Stefan for device_add on qom-next and
 am rearranging the following changes of mine on top.
 
 1) Further naming cleanups, now rebased on the bugfixes for easier 
 backporting.
 2) Inlining of qdev_init(), so that we always have unparent+unref pairs.
 
 If there's no objections, planning to include this in a pull tonight or 
 tomorrow.
 
 Regards,
 Andreas
 
 Cc: Igor Mammedov imamm...@redhat.com
 Cc: Stefan Hajnoczi stefa...@redhat.com
 Cc: Paolo Bonzini pbonz...@redhat.com
 Cc: Anthony Liguori anth...@codemonkey.ws
 
 Andreas Färber (2):
   qdev-monitor: Avoid qdev as variable name
   qdev-monitor: Inline qdev_init() for device_add
 
  qdev-monitor.c | 37 +
  1 file changed, 21 insertions(+), 16 deletions(-)
 

Reviewed-By: Igor Mammedov imamm...@redhat.com

[Qemu-devel] savevm/loadvm

2013-10-08 Thread Alexey Kardashevskiy

Hi!

I need the community help with savevm/loadvm.

I run QEMU like this:

./qemu-system-ppc64 \
 -drive file=virtimg/fc19_16GB.qcow2 \
 -nodefaults \
 -m 2048 \
 -machine pseries \
 -nographic \
 -vga none \
 -enable-kvm


The disk image is an 16GB qcow2 image.

Now I start the guest and do savevm 1 and loadvm 1 from the qemu
console. Everything works. Then I exit qemu, make sure that the snapshot is
there and run QEMU as above plus -loadvm 1. It fails with:

qemu-system-ppc64: qcow2: Loading snapshots with different disk size is not
implemented
qemu-system-ppc64: Error -95 while activating snapshot '2' on 'scsi0-hd0'

The check is added by commit 90b277593df873d3a2480f002e2eb5fe1f8e5277
qcow2: Save disk size in snapshot header.

As I cannot realize the whole idea of the patch, I looked a bit deeper.
This is the check:

int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
{
[...]
if (sn-disk_size != bs-total_sectors * BDRV_SECTOR_SIZE) {
error_report(qcow2: Loading snapshots with different disk 
size is not implemented);
ret = -ENOTSUP;
goto fail;
}


My understanding of the patch was that the disk_size should remain 16GB
(0x4..) as it uses bs-total_sectors and never changes it. And
bs-growable is 0 for qcow2 image because it is not really growable. At
least the total_sectors value from the qcow2 file header does not change
between QEMU starts.

However qcow2_save_vmstate() sets bs-growable to 1 for a short time
(commit 178e08a58f40dd5aef2ce774fe0850f5d0e56918 from 2009) and this
triggers a branch in bdrv_co_do_writev() which changes bs-total_sectors.
So when QEMU writes snapshots to the file, the disk_size field of a
snapshot has bigger value (for example 0x4.007b.8180).

And the check above fails. It does not fail if to do loadvm
_in_the_same_run_ after savevm because QEMU operates with the updated
bs-total_sectors.

What the proper fix would be? Or it is not a bug at all and I should be
using something else for -loadvm? Thanks.



-- 
Alexey

[Qemu-devel] [PATCH] scsi: Allocate SCSITargetReq r-buf dynamically

2013-10-08 Thread Asias He

r-buf is hardcoded to 2056 which is (256 + 1) * 8, allowing 256 luns at
most. If more than 256 luns are specified by user, we have buffer
overflow in scsi_target_emulate_report_luns.

To fix, we allocate the buffer dynamically.

Signed-off-by: Asias He as...@redhat.com
---
 hw/scsi/scsi-bus.c | 44 +---
 include/hw/scsi/scsi.h |  2 ++
 2 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index 4d36841..d950e6f 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -11,6 +11,8 @@ static char *scsibus_get_dev_path(DeviceState *dev);
 static char *scsibus_get_fw_dev_path(DeviceState *dev);
 static int scsi_req_parse(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf);
 static void scsi_req_dequeue(SCSIRequest *req);
+static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len);
+static void scsi_target_free_buf(SCSIRequest *req);
 
 static Property scsi_props[] = {
 DEFINE_PROP_UINT32(channel, SCSIDevice, channel, 0),
@@ -317,7 +319,8 @@ typedef struct SCSITargetReq SCSITargetReq;
 struct SCSITargetReq {
 SCSIRequest req;
 int len;
-uint8_t buf[2056];
+uint8_t *buf;
+int buf_len;
 };
 
 static void store_lun(uint8_t *outbuf, int lun)
@@ -361,14 +364,12 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq 
*r)
 if (!found_lun0) {
 n += 8;
 }
-len = MIN(n + 8, r-req.cmd.xfer  ~7);
-if (len  sizeof(r-buf)) {
-/* TODO:  256 LUNs? */
-return false;
-}
 
+scsi_target_alloc_buf(r-req, n + 8);
+
+len = MIN(n + 8, r-req.cmd.xfer  ~7);
 memset(r-buf, 0, len);
-stl_be_p(r-buf, n);
+stl_be_p(r-buf[0], n);
 i = found_lun0 ? 8 : 16;
 QTAILQ_FOREACH(kid, r-req.bus-qbus.children, sibling) {
 DeviceState *qdev = kid-child;
@@ -387,6 +388,9 @@ static bool scsi_target_emulate_report_luns(SCSITargetReq 
*r)
 static bool scsi_target_emulate_inquiry(SCSITargetReq *r)
 {
 assert(r-req.dev-lun != r-req.lun);
+
+scsi_target_alloc_buf(r-req, SCSI_INQUIRY_LEN);
+
 if (r-req.cmd.buf[1]  0x2) {
 /* Command support data - optional, not implemented */
 return false;
@@ -411,7 +415,7 @@ static bool scsi_target_emulate_inquiry(SCSITargetReq *r)
 return false;
 }
 /* done with EVPD */
-assert(r-len  sizeof(r-buf));
+assert(r-len  r-buf_len);
 r-len = MIN(r-req.cmd.xfer, r-len);
 return true;
 }
@@ -455,8 +459,8 @@ static int32_t scsi_target_send_command(SCSIRequest *req, 
uint8_t *buf)
 }
 break;
 case REQUEST_SENSE:
-r-len = scsi_device_get_sense(r-req.dev, r-buf,
-   MIN(req-cmd.xfer, sizeof r-buf),
+scsi_target_alloc_buf(r-req, SCSI_SENSE_LEN);
+r-len = scsi_device_get_sense(r-req.dev, r-buf, r-buf_len,
(req-cmd.buf[1]  1) == 0);
 if (r-req.dev-sense_is_ua) {
 scsi_device_unit_attention_reported(req-dev);
@@ -501,11 +505,29 @@ static uint8_t *scsi_target_get_buf(SCSIRequest *req)
 return r-buf;
 }
 
+static uint8_t *scsi_target_alloc_buf(SCSIRequest *req, size_t len)
+{
+SCSITargetReq *r = DO_UPCAST(SCSITargetReq, req, req);
+
+r-buf = g_malloc(len);
+r-buf_len = len;
+
+return r-buf;
+}
+
+static void scsi_target_free_buf(SCSIRequest *req)
+{
+SCSITargetReq *r = DO_UPCAST(SCSITargetReq, req, req);
+
+g_free(r-buf);
+}
+
 static const struct SCSIReqOps reqops_target_command = {
 .size = sizeof(SCSITargetReq),
 .send_command = scsi_target_send_command,
 .read_data= scsi_target_read_data,
 .get_buf  = scsi_target_get_buf,
+.free_req = scsi_target_free_buf,
 };
 
 
@@ -1365,7 +1387,7 @@ int scsi_build_sense(uint8_t *in_buf, int in_len,
 buf[7] = 10;
 buf[12] = sense.asc;
 buf[13] = sense.ascq;
-return MIN(len, 18);
+return MIN(len, SCSI_SENSE_LEN);
 } else {
 /* Return descriptor format sense buffer */
 buf[0] = 0x72;
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index 1b66510..76f6ac2 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -9,6 +9,8 @@
 #define MAX_SCSI_DEVS  255
 
 #define SCSI_CMD_BUF_SIZE 16
+#define SCSI_SENSE_LEN  18
+#define SCSI_INQUIRY_LEN36
 
 typedef struct SCSIBus SCSIBus;
 typedef struct SCSIBusInfo SCSIBusInfo;
-- 
1.8.3.1

[Qemu-devel] [PATCH 2/8] timers: add timer_mod_anticipate and timer_mod_anticipate_ns

2013-10-08 Thread Paolo Bonzini

These let a user anticipate the deadline of a timer, atomically with
other sites that call the function.  This helps avoiding complicated
lock hierarchies.  It is useful whenever the timer does work based on
the current value of the clock (rather than doing something periodically
on every tick).

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 include/qemu/timer.h | 26 ++
 qemu-timer.c | 29 +
 2 files changed, 55 insertions(+)

diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index b58903b..f215b0b 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -539,6 +539,19 @@ void timer_del(QEMUTimer *ts);
 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time);
 
 /**
+ * timer_mod_anticipate_ns:
+ * @ts: the timer
+ * @expire_time: the expiry time in nanoseconds
+ *
+ * Modify a timer to expire at @expire_time or the current time,
+ * whichever comes earlier.
+ *
+ * This function is thread-safe but the timer and its timer list must not be
+ * freed while this function is running.
+ */
+void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time);
+
+/**
  * timer_mod:
  * @ts: the timer
  * @expire_time: the expire time in the units associated with the timer
@@ -552,6 +565,19 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time);
 void timer_mod(QEMUTimer *ts, int64_t expire_timer);
 
 /**
+ * timer_mod_anticipate:
+ * @ts: the timer
+ * @expire_time: the expiry time in nanoseconds
+ *
+ * Modify a timer to expire at @expire_time or the current time, whichever
+ * comes earlier, taking into account the scale associated with the timer.
+ *
+ * This function is thread-safe but the timer and its timer list must not be
+ * freed while this function is running.
+ */
+void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time);
+
+/**
  * timer_pending:
  * @ts: the timer
  *
diff --git a/qemu-timer.c b/qemu-timer.c
index 95fc6eb..202e9a2 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -393,11 +393,40 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
 }
 }
 
+/* modify the current timer so that it will be fired when current_time
+   = expire_time or the current deadline, whichever comes earlier.
+   The corresponding callback will be called. */
+void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
+{
+QEMUTimerList *timer_list = ts-timer_list;
+bool rearm;
+
+qemu_mutex_lock(timer_list-active_timers_lock);
+if (ts-expire_time == -1 || ts-expire_time  expire_time) {
+if (ts-expire_time != -1) {
+timer_del_locked(timer_list, ts);
+}
+rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
+} else {
+rearm = false;
+}
+qemu_mutex_unlock(timer_list-active_timers_lock);
+
+if (rearm) {
+timerlist_rearm(timer_list);
+}
+}
+
 void timer_mod(QEMUTimer *ts, int64_t expire_time)
 {
 timer_mod_ns(ts, expire_time * ts-scale);
 }
 
+void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
+{
+timer_mod_anticipate_ns(ts, expire_time * ts-scale);
+}
+
 bool timer_pending(QEMUTimer *ts)
 {
 return ts-expire_time = 0;
-- 
1.8.3.1

[Qemu-devel] [PATCH 0/8] Make icount thread-safe

2013-10-08 Thread Paolo Bonzini

This series moves the icount state under the same seqlock as the normal
vm_clock implementation.

It is not yet 100% thread-safe, because the CPU list should be moved
under RCU protection (due to the call to !all_cpu_threads_idle()
in qemu_clock_warp).  However it is a substantial step forward, the
only uncovered case being CPU hotplug.

Please review.

Paolo

Paolo Bonzini (8):
  timers: extract timer_mod_ns_locked and timerlist_rearm
  timers: add timer_mod_anticipate and timer_mod_anticipate_ns
  timers: use cpu_get_icount() directly
  timers: reorganize icount_warp_rt
  timers: prepare the code for future races in calling qemu_clock_warp
  timers: introduce cpu_get_clock_locked
  timers: document (future) locking rules for icount
  timers: make icount thread-safe

 cpus.c | 110 -
 include/qemu/timer.h   |  26 +
 qemu-timer.c   |  74 +++--
 4 files changed, 163 insertions(+), 47 deletions(-)
 create mode 100644 include/qemu/seqlock.h

-- 
1.8.3.1

[Qemu-devel] [PATCH 4/8] timers: reorganize icount_warp_rt

2013-10-08 Thread Paolo Bonzini

To prepare for future code changes, move the increment of qemu_icount_bias
outside the if statement.

Also, hoist outside the if the check for timers that expired due to the
warping.  The check is redundant when !runstate_is_running(), but
doing it this way helps because the code that increments qemu_icount_bias
will be a critical section.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 cpus.c | 18 ++
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/cpus.c b/cpus.c
index f87ff6f..9f450ad 100644
--- a/cpus.c
+++ b/cpus.c
@@ -279,10 +279,10 @@ static void icount_warp_rt(void *opaque)
 
 if (runstate_is_running()) {
 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-int64_t warp_delta = clock - vm_clock_warp_start;
-if (use_icount == 1) {
-qemu_icount_bias += warp_delta;
-} else {
+int64_t warp_delta;
+
+warp_delta = clock - vm_clock_warp_start;
+if (use_icount == 2) {
 /*
  * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
  * far ahead of real time.
@@ -290,13 +290,15 @@ static void icount_warp_rt(void *opaque)
 int64_t cur_time = cpu_get_clock();
 int64_t cur_icount = cpu_get_icount();
 int64_t delta = cur_time - cur_icount;
-qemu_icount_bias += MIN(warp_delta, delta);
-}
-if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
-qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+warp_delta = MIN(warp_delta, delta);
 }
+qemu_icount_bias += warp_delta;
 }
 vm_clock_warp_start = -1;
+
+if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
+qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
+}
 }
 
 void qtest_clock_warp(int64_t dest)
-- 
1.8.3.1

[Qemu-devel] [PATCH 1/8] timers: extract timer_mod_ns_locked and timerlist_rearm

2013-10-08 Thread Paolo Bonzini

These will be reused in timer_mod_anticipate functions.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 qemu-timer.c | 51 ---
 1 file changed, 32 insertions(+), 19 deletions(-)

diff --git a/qemu-timer.c b/qemu-timer.c
index 6b62e88..95fc6eb 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -338,6 +338,34 @@ static void timer_del_locked(QEMUTimerList *timer_list, 
QEMUTimer *ts)
 }
 }
 
+static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
+QEMUTimer *ts, int64_t expire_time)
+{
+QEMUTimer **pt, *t;
+
+/* add the timer in the sorted list */
+pt = timer_list-active_timers;
+for (;;) {
+t = *pt;
+if (!timer_expired_ns(t, expire_time)) {
+break;
+}
+pt = t-next;
+}
+ts-expire_time = MAX(expire_time, 0);
+ts-next = *pt;
+*pt = ts;
+
+return pt == timer_list-active_timers;
+}
+
+static void timerlist_rearm(QEMUTimerList *timer_list)
+{
+/* Interrupt execution to force deadline recalculation.  */
+qemu_clock_warp(timer_list-clock-type);
+timerlist_notify(timer_list);
+}
+
 /* stop a timer, but do not dealloc it */
 void timer_del(QEMUTimer *ts)
 {
@@ -353,30 +381,15 @@ void timer_del(QEMUTimer *ts)
 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
 {
 QEMUTimerList *timer_list = ts-timer_list;
-QEMUTimer **pt, *t;
+bool rearm;
 
 qemu_mutex_lock(timer_list-active_timers_lock);
 timer_del_locked(timer_list, ts);
-
-/* add the timer in the sorted list */
-pt = timer_list-active_timers;
-for(;;) {
-t = *pt;
-if (!timer_expired_ns(t, expire_time)) {
-break;
-}
-pt = t-next;
-}
-ts-expire_time = MAX(expire_time, 0);
-ts-next = *pt;
-*pt = ts;
+rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
 qemu_mutex_unlock(timer_list-active_timers_lock);
 
-/* Rearm if necessary  */
-if (pt == timer_list-active_timers) {
-/* Interrupt execution to force deadline recalculation.  */
-qemu_clock_warp(timer_list-clock-type);
-timerlist_notify(timer_list);
+if (rearm) {
+timerlist_rearm(timer_list);
 }
 }
 
-- 
1.8.3.1

[Qemu-devel] [PATCH 3/8] timers: use cpu_get_icount() directly

2013-10-08 Thread Paolo Bonzini

This will help later when we will have to place these calls in
a critical section, and thus call a version of cpu_get_icount()
that does not take the lock.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 cpus.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/cpus.c b/cpus.c
index 870a832..f87ff6f 100644
--- a/cpus.c
+++ b/cpus.c
@@ -224,12 +224,15 @@ static void icount_adjust(void)
 int64_t cur_icount;
 int64_t delta;
 static int64_t last_delta;
+
 /* If the VM is not running, then do nothing.  */
 if (!runstate_is_running()) {
 return;
 }
+
 cur_time = cpu_get_clock();
-cur_icount = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+cur_icount = cpu_get_icount();
+
 delta = cur_icount - cur_time;
 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  
*/
 if (delta  0
@@ -285,7 +288,7 @@ static void icount_warp_rt(void *opaque)
  * far ahead of real time.
  */
 int64_t cur_time = cpu_get_clock();
-int64_t cur_icount = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
+int64_t cur_icount = cpu_get_icount();
 int64_t delta = cur_time - cur_icount;
 qemu_icount_bias += MIN(warp_delta, delta);
 }
-- 
1.8.3.1

[Qemu-devel] [PATCH 8/8] timers: make icount thread-safe

2013-10-08 Thread Paolo Bonzini

This lets threads other than the I/O thread use vm_clock even in -icount mode.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 cpus.c | 42 ++
 1 file changed, 34 insertions(+), 8 deletions(-)

diff --git a/cpus.c b/cpus.c
index bc675a4..1e5cba4 100644
--- a/cpus.c
+++ b/cpus.c
@@ -133,7 +133,7 @@ typedef struct TimersState {
 static TimersState timers_state;
 
 /* Return the virtual CPU time, based on the instruction counter.  */
-int64_t cpu_get_icount(void)
+static int64_t cpu_get_icount_locked(void)
 {
 int64_t icount;
 CPUState *cpu = current_cpu;
@@ -149,6 +149,19 @@ int64_t cpu_get_icount(void)
 return qemu_icount_bias + (icount  icount_time_shift);
 }
 
+int64_t cpu_get_icount(void)
+{
+int64_t icount;
+unsigned start;
+
+do {
+start = seqlock_read_begin(timers_state.clock_seqlock);
+icount = cpu_get_icount_locked();
+} while (seqlock_read_retry(timers_state.clock_seqlock, start));
+
+return icount;
+}
+
 /* return the host CPU cycle counter and handle stop/restart */
 /* cpu_ticks is safely if holding BQL */
 int64_t cpu_get_ticks(void)
@@ -246,8 +259,9 @@ static void icount_adjust(void)
 return;
 }
 
-cur_time = cpu_get_clock();
-cur_icount = cpu_get_icount();
+seqlock_write_lock(timers_state.clock_seqlock);
+cur_time = cpu_get_clock_locked();
+cur_icount = cpu_get_icount_locked();
 
 delta = cur_icount - cur_time;
 /* FIXME: This is a very crude algorithm, somewhat prone to oscillation.  
*/
@@ -265,6 +279,7 @@ static void icount_adjust(void)
 }
 last_delta = delta;
 qemu_icount_bias = cur_icount - (qemu_icount  icount_time_shift);
+seqlock_write_unlock(timers_state.clock_seqlock);
 }
 
 static void icount_adjust_rt(void *opaque)
@@ -289,10 +304,14 @@ static int64_t qemu_icount_round(int64_t count)
 
 static void icount_warp_rt(void *opaque)
 {
-if (vm_clock_warp_start == -1) {
+/* The icount_warp_timer is rescheduled soon after vm_clock_warp_start
+ * changes from -1 to another value, so the race here is okay.
+ */
+if (atomic_read(vm_clock_warp_start) == -1) {
 return;
 }
 
+seqlock_write_lock(timers_state.clock_seqlock);
 if (runstate_is_running()) {
 int64_t clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 int64_t warp_delta;
@@ -303,14 +322,15 @@ static void icount_warp_rt(void *opaque)
  * In adaptive mode, do not let QEMU_CLOCK_VIRTUAL run too
  * far ahead of real time.
  */
-int64_t cur_time = cpu_get_clock();
-int64_t cur_icount = cpu_get_icount();
+int64_t cur_time = cpu_get_clock_locked();
+int64_t cur_icount = cpu_get_icount_locked();
 int64_t delta = cur_time - cur_icount;
 warp_delta = MIN(warp_delta, delta);
 }
 qemu_icount_bias += warp_delta;
 }
 vm_clock_warp_start = -1;
+seqlock_write_unlock(timers_state.clock_seqlock);
 
 if (qemu_clock_expired(QEMU_CLOCK_VIRTUAL)) {
 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
@@ -324,7 +344,10 @@ void qtest_clock_warp(int64_t dest)
 while (clock  dest) {
 int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
 int64_t warp = MIN(dest - clock, deadline);
+seqlock_write_lock(timers_state.clock_seqlock);
 qemu_icount_bias += warp;
+seqlock_write_unlock(timers_state.clock_seqlock);
+
 qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
 clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
 }
@@ -391,9 +415,11 @@ void qemu_clock_warp(QEMUClockType type)
  * you will not be sending network packets continuously instead of
  * every 100ms.
  */
+seqlock_write_lock(timers_state.clock_seqlock);
 if (vm_clock_warp_start == -1 || vm_clock_warp_start  clock) {
 vm_clock_warp_start = clock;
 }
+seqlock_write_unlock(timers_state.clock_seqlock);
 timer_mod_anticipate(icount_warp_timer, clock + deadline);
 } else if (deadline == 0) {
 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-- 
1.8.3.1

[Qemu-devel] [PATCH 5/8] timers: prepare the code for future races in calling qemu_clock_warp

2013-10-08 Thread Paolo Bonzini

Computing the deadline of all vm_clocks is somewhat expensive and calls
out to qemu-timer.c; two reasons not to do it in the seqlock's write-side
critical section.  This however opens the door for races in setting and
reading vm_clock_warp_start.

To plug them, we need to cover the case where a new deadline slips in
between the call to qemu_clock_deadline_ns_all and the actual modification
of the icount_warp_timer.  Restrict changes to vm_clock_warp_start and
the icount_warp_timer's expiration time, to only move them back (which
would simply cause an early wakeup).

If a vm_clock timer is cancelled while CPUs are idle, this might cause the
icount_warp_timer to fire unnecessarily.  This is not a problem, after it
fires the timer becomes inactive and the next call to timer_mod_anticipate
will be precise.

In addition to this, we must deactivate the icount_warp_timer _before_
checking whether CPUs are idle.  This way, if the last CPU becomes idle
during the call to timer_del we will still set up the icount_warp_timer.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 cpus.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/cpus.c b/cpus.c
index 9f450ad..08eaf23 100644
--- a/cpus.c
+++ b/cpus.c
@@ -319,6 +319,7 @@ void qtest_clock_warp(int64_t dest)
 
 void qemu_clock_warp(QEMUClockType type)
 {
+int64_t clock;
 int64_t deadline;
 
 /*
@@ -338,7 +339,7 @@ void qemu_clock_warp(QEMUClockType type)
  * the earliest QEMU_CLOCK_VIRTUAL timer.
  */
 icount_warp_rt(NULL);
-if (!all_cpu_threads_idle() || !qemu_clock_has_timers(QEMU_CLOCK_VIRTUAL)) 
{
-timer_del(icount_warp_timer);
+timer_del(icount_warp_timer);
+if (!all_cpu_threads_idle()) {
 return;
 }
@@ -348,17 +349,11 @@ void qemu_clock_warp(QEMUClockType type)
return;
 }
 
-vm_clock_warp_start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 /* We want to use the earliest deadline from ALL vm_clocks */
+clock = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
 deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL);
-
-/* Maintain prior (possibly buggy) behaviour where if no deadline
- * was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
- * INT32_MAX nanoseconds ahead, we still use INT32_MAX
- * nanoseconds.
- */
-if ((deadline  0) || (deadline  INT32_MAX)) {
-deadline = INT32_MAX;
+if (deadline  0) {
+return;
 }
 
 if (deadline  0) {
@@ -379,7 +375,10 @@ void qemu_clock_warp(QEMUClockType type)
  * you will not be sending network packets continuously instead of
  * every 100ms.
  */
-timer_mod(icount_warp_timer, vm_clock_warp_start + deadline);
+if (vm_clock_warp_start == -1 || vm_clock_warp_start  clock) {
+vm_clock_warp_start = clock;
+}
+timer_mod_anticipate(icount_warp_timer, clock + deadline);
 } else if (deadline == 0) {
 qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
 }
-- 
1.8.3.1

[Qemu-devel] [PATCH 6/8] timers: introduce cpu_get_clock_locked

2013-10-08 Thread Paolo Bonzini

This fixes a deadlock in cpu_disable_ticks.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
Should be squashed in Ping Fan's patches.

 cpus.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/cpus.c b/cpus.c
index 08eaf23..01acce2 100644
--- a/cpus.c
+++ b/cpus.c
@@ -166,6 +166,20 @@ int64_t cpu_get_ticks(void)
 }
 }
 
+static int64_t cpu_get_clock_locked(void)
+{
+int64_t ti;
+
+if (!timers_state.cpu_ticks_enabled) {
+ti = timers_state.cpu_clock_offset;
+} else {
+ti = get_clock();
+ti += timers_state.cpu_clock_offset;
+}
+
+return ti;
+}
+
 /* return the host CPU monotonic timer and handle stop/restart */
 int64_t cpu_get_clock(void)
 {
@@ -174,12 +188,7 @@ int64_t cpu_get_clock(void)
 
 do {
 start = seqlock_read_begin(timers_state.clock_seqlock);
-if (!timers_state.cpu_ticks_enabled) {
-ti = timers_state.cpu_clock_offset;
-} else {
-ti = get_clock();
-ti += timers_state.cpu_clock_offset;
-}
+ti = cpu_get_clock_locked();
 } while (seqlock_read_retry(timers_state.clock_seqlock, start));
 
 return ti;
@@ -220,7 +233,7 @@ void cpu_disable_ticks(void)
 seqlock_write_lock(timers_state.clock_seqlock);
 if (timers_state.cpu_ticks_enabled) {
 timers_state.cpu_ticks_offset = cpu_get_ticks();
-timers_state.cpu_clock_offset = cpu_get_clock();
+timers_state.cpu_clock_offset = cpu_get_clock_locked();
 timers_state.cpu_ticks_enabled = 0;
 }
 seqlock_write_unlock(timers_state.clock_seqlock);
-- 
1.8.3.1

[Qemu-devel] [PATCH 7/8] timers: document (future) locking rules for icount

2013-10-08 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 cpus.c | 15 +++
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/cpus.c b/cpus.c
index 01acce2..bc675a4 100644
--- a/cpus.c
+++ b/cpus.c
@@ -98,17 +98,22 @@ static bool all_cpu_threads_idle(void)
 /***/
 /* guest cycle counter */
 
+/* Protected by TimersState seqlock */
+
+/* Compensate for varying guest execution speed.  */
+static int64_t qemu_icount_bias;
+static int64_t vm_clock_warp_start;
 /* Conversion factor from emulated instructions to virtual clock ticks.  */
 static int icount_time_shift;
 /* Arbitrarily pick 1MIPS as the minimum allowable speed.  */
 #define MAX_ICOUNT_SHIFT 10
-/* Compensate for varying guest execution speed.  */
-static int64_t qemu_icount_bias;
+
+/* Only written by TCG thread */
+static int64_t qemu_icount;
+
 static QEMUTimer *icount_rt_timer;
 static QEMUTimer *icount_vm_timer;
 static QEMUTimer *icount_warp_timer;
-static int64_t vm_clock_warp_start;
-static int64_t qemu_icount;
 
 typedef struct TimersState {
 int64_t cpu_ticks_prev;
@@ -232,6 +237,8 @@ static void icount_adjust(void)
 int64_t cur_time;
 int64_t cur_icount;
 int64_t delta;
+
+/* Protected by TimersState mutex.  */
 static int64_t last_delta;
 
 /* If the VM is not running, then do nothing.  */
-- 
1.8.3.1

Re: [Qemu-devel] [PATCHv3 00/20] block: logical block provisioning enhancements

2013-10-08 Thread Stefan Hajnoczi

On Tue, Oct 8, 2013 at 10:01 AM, Peter Lieven p...@kamp.de wrote:
 On 08.10.2013 09:02, Stefan Hajnoczi wrote:

 On Mon, Oct 7, 2013 at 11:42 AM, Paolo Bonzini pbonz...@redhat.com
 wrote:

 Il 07/10/2013 10:42, Stefan Hajnoczi ha scritto:

 Could you make bdrv_co_write_zeroes() always use UNMAP, if possible, and
 avoid adding the new BDRV_REQ_MAY_UNMAP flag?  While reading the first
 few patches in this series I wondered why there is a need to expose
 flags at all...

 Sometimes it is useful to distinguish between zeroing at the image
 format level from discarding at the device level, but I don't think we
 make use of that yet.  I'd prefer to keep the interface simple for now
 and add flags later, if necessary.

 Or maybe I just missed something ;)

 The flag is needed to implement the right semantics for the SCSI WRITE
 SAME command, which are:

 - if the UNMAP bit is off, always write the sectors (that's
 bdrv_aio_write_zeroes without BDRV_REQ_MAY_UNMAP if the payload is zero,
 otherwise it's emulated with bdrv_aio_writev)

 - if the target can discard and write the specified payload, you can
 discard, else you must write the sectors with the correct payload
 (that's bdrv_aio_write_zeroes with BDRV_REQ_MAY_UNMAP).

 Contrast this with the UNMAP command, which does not make any guarantee
 on the content of the sectors after the command is completed (a few
 months ago we agreed that, even if you have discard_zeroes=true in the
 target, it is fine for UNMAP to do nothing).

 Okay, then let's keep the patches to expose the flag.

 Okay, then I can keep those.

 Can you give a short hint if my approach with brdv_make_empty is what
 you want? I would like to not change the parameters, so use
 BDRV_REQ_MAY_UNMAP
 unconditionally.

 int bdrv_make_empty(BlockDriverState *bs)

The semantics of bdrv_make_empty() today are: deallocate all data in
the top layer of the image file.  If there is a backing file, reads
will fall back to the backing file.

The semantics that you want are zeroing the entire disk image
(efficiently, when possible).

A flags argument is needed to support both of sets of semantics.  If
you don't like that, then I suggest creating a new function called
bdrv_make_zero().

Stefan

Re: [Qemu-devel] savevm/loadvm

2013-10-08 Thread Paolo Bonzini

Il 08/10/2013 10:40, Alexey Kardashevskiy ha scritto:
 However qcow2_save_vmstate() sets bs-growable to 1 for a short time
 (commit 178e08a58f40dd5aef2ce774fe0850f5d0e56918 from 2009) and this
 triggers a branch in bdrv_co_do_writev() which changes bs-total_sectors.
 So when QEMU writes snapshots to the file, the disk_size field of a
 snapshot has bigger value (for example 0x4.007b.8180).

I think you need to modify qcow2_save_vmstate to save and restore
bs-total_sectors.  Can you test that and if so post the patch?

Paolo

Re: [Qemu-devel] [PATCH 1/8] timers: extract timer_mod_ns_locked and timerlist_rearm

2013-10-08 Thread Alex Bligh


On 8 Oct 2013, at 09:47, Paolo Bonzini wrote:

 These will be reused in timer_mod_anticipate functions.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
Signed-off-by: Alex Bligh a...@alex.org.uk
 ---
 qemu-timer.c | 51 ---
 1 file changed, 32 insertions(+), 19 deletions(-)
 
 diff --git a/qemu-timer.c b/qemu-timer.c
 index 6b62e88..95fc6eb 100644
 --- a/qemu-timer.c
 +++ b/qemu-timer.c
 @@ -338,6 +338,34 @@ static void timer_del_locked(QEMUTimerList *timer_list, 
 QEMUTimer *ts)
 }
 }
 
 +static bool timer_mod_ns_locked(QEMUTimerList *timer_list,
 +QEMUTimer *ts, int64_t expire_time)
 +{
 +QEMUTimer **pt, *t;
 +
 +/* add the timer in the sorted list */
 +pt = timer_list-active_timers;
 +for (;;) {
 +t = *pt;
 +if (!timer_expired_ns(t, expire_time)) {
 +break;
 +}
 +pt = t-next;
 +}
 +ts-expire_time = MAX(expire_time, 0);
 +ts-next = *pt;
 +*pt = ts;
 +
 +return pt == timer_list-active_timers;
 +}
 +
 +static void timerlist_rearm(QEMUTimerList *timer_list)
 +{
 +/* Interrupt execution to force deadline recalculation.  */
 +qemu_clock_warp(timer_list-clock-type);
 +timerlist_notify(timer_list);
 +}
 +
 /* stop a timer, but do not dealloc it */
 void timer_del(QEMUTimer *ts)
 {
 @@ -353,30 +381,15 @@ void timer_del(QEMUTimer *ts)
 void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
 {
 QEMUTimerList *timer_list = ts-timer_list;
 -QEMUTimer **pt, *t;
 +bool rearm;
 
 qemu_mutex_lock(timer_list-active_timers_lock);
 timer_del_locked(timer_list, ts);
 -
 -/* add the timer in the sorted list */
 -pt = timer_list-active_timers;
 -for(;;) {
 -t = *pt;
 -if (!timer_expired_ns(t, expire_time)) {
 -break;
 -}
 -pt = t-next;
 -}
 -ts-expire_time = MAX(expire_time, 0);
 -ts-next = *pt;
 -*pt = ts;
 +rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
 qemu_mutex_unlock(timer_list-active_timers_lock);
 
 -/* Rearm if necessary  */
 -if (pt == timer_list-active_timers) {
 -/* Interrupt execution to force deadline recalculation.  */
 -qemu_clock_warp(timer_list-clock-type);
 -timerlist_notify(timer_list);
 +if (rearm) {
 +timerlist_rearm(timer_list);
 }
 }
 
 -- 
 1.8.3.1
 
 
 
 

-- 
Alex Bligh

Re: [Qemu-devel] KVM Guest keymap issue

2013-10-08 Thread Matej Mailing

Hi,

the strange thing is that all other keys and combinations work except
those ccaron, Ccaron, scaron and Scaron, zcaron and ZCaron don't. In
our language there are many words containing those chars and I really
need to have them working.

When looking at the sl keymap file, those codes, even for all other
chars that I type with showkey --ascii, are different than the showkey
outputs, but they work (except those mentioned above).

Now I am totally confused on how could those that work, work ...

Thanks for any enlightenments in advance :)
Matej

2013/9/26 Matej Mailing mail...@tam.si:
I am still pretty lost here, also after reading your link which shed a
light to many things.

Every suggestion and idea is very welcome!
Thanks,
Matej

2013/9/24 Markus Armbruster arm...@redhat.com:
Not specific to KVM, adding qemu-devel.

Matej Mailing mail...@tam.si writes:

Dear list,

I have a problem with a Windows XP guest that I connect to via VNC and
is using sl keymap (option -k sl).

The guest is Windows XP and the problematic characters are s, c and z
with caron... when I type them via VNC, they are not printed at all in
virtual system... I have checked the file /usr/share/kvm/keymaps/sl
and it seems that it contains different codes than I get when doing
showkey --ascii on the host machine (running Ubuntu 12.04). I have
tried to change the KVM's keymap file 'sl' with the codes I get from
showkey, but they are still not printed in virtual system to which I
am connected via VNC...

I am totally lost with this issue, thanks for your time and ideas.

Required reading for anyone struggling with virtual keyboards:

https://www.berrange.com/posts/2010/07/04/more-than-you-or-i-ever-wanted-to-know-about-virtual-keyboard-handling/

Re: [Qemu-devel] [PATCHv3 00/20] block: logical block provisioning enhancements

2013-10-08 Thread Peter Lieven


On 08.10.2013 10:59, Stefan Hajnoczi wrote:

On Tue, Oct 8, 2013 at 10:01 AM, Peter Lieven p...@kamp.de wrote:

On 08.10.2013 09:02, Stefan Hajnoczi wrote:

On Mon, Oct 7, 2013 at 11:42 AM, Paolo Bonzini pbonz...@redhat.com
wrote:

Il 07/10/2013 10:42, Stefan Hajnoczi ha scritto:

Could you make bdrv_co_write_zeroes() always use UNMAP, if possible, and
avoid adding the new BDRV_REQ_MAY_UNMAP flag?  While reading the first
few patches in this series I wondered why there is a need to expose
flags at all...

Sometimes it is useful to distinguish between zeroing at the image
format level from discarding at the device level, but I don't think we
make use of that yet.  I'd prefer to keep the interface simple for now
and add flags later, if necessary.

Or maybe I just missed something ;)

The flag is needed to implement the right semantics for the SCSI WRITE
SAME command, which are:

- if the UNMAP bit is off, always write the sectors (that's
bdrv_aio_write_zeroes without BDRV_REQ_MAY_UNMAP if the payload is zero,
otherwise it's emulated with bdrv_aio_writev)

- if the target can discard and write the specified payload, you can
discard, else you must write the sectors with the correct payload
(that's bdrv_aio_write_zeroes with BDRV_REQ_MAY_UNMAP).

Contrast this with the UNMAP command, which does not make any guarantee
on the content of the sectors after the command is completed (a few
months ago we agreed that, even if you have discard_zeroes=true in the
target, it is fine for UNMAP to do nothing).

Okay, then let's keep the patches to expose the flag.

Okay, then I can keep those.

Can you give a short hint if my approach with brdv_make_empty is what
you want? I would like to not change the parameters, so use
BDRV_REQ_MAY_UNMAP
unconditionally.

int bdrv_make_empty(BlockDriverState *bs)

The semantics of bdrv_make_empty() today are: deallocate all data in
the top layer of the image file.  If there is a backing file, reads
will fall back to the backing file.

The semantics that you want are zeroing the entire disk image
(efficiently, when possible).

A flags argument is needed to support both of sets of semantics.  If
you don't like that, then I suggest creating a new function called
bdrv_make_zero().

Ok, that is what I would like to do. In this case I only have to rename
bdrv_zeroize to bdrv_make_zero. Ok ?

Peter

Re: [Qemu-devel] [PATCH 2/8] timers: add timer_mod_anticipate and timer_mod_anticipate_ns

2013-10-08 Thread Alex Bligh

Paolo,

On 8 Oct 2013, at 09:47, Paolo Bonzini wrote:
 
 --- a/qemu-timer.c
 +++ b/qemu-timer.c
 @@ -393,11 +393,40 @@ void timer_mod_ns(QEMUTimer *ts, int64_t expire_time)
 }
 }
 
 +/* modify the current timer so that it will be fired when current_time
 +   = expire_time or the current deadline, whichever comes earlier.
 +   The corresponding callback will be called. */
 +void timer_mod_anticipate_ns(QEMUTimer *ts, int64_t expire_time)
 +{
 +QEMUTimerList *timer_list = ts-timer_list;
 +bool rearm;
 +
 +qemu_mutex_lock(timer_list-active_timers_lock);
 +if (ts-expire_time == -1 || ts-expire_time  expire_time) {

So if we want to alter it ...

 +if (ts-expire_time != -1) {
 +timer_del_locked(timer_list, ts);
 +}

What's this bit for? Surely you've calculated whether you are
shortening the expiry time (above), so all you need do now is
modify it. Why delete it? timer_mod_ns doesn't make this
check?

Otherwise looks OK.

 +rearm = timer_mod_ns_locked(timer_list, ts, expire_time);
 +} else {
 +rearm = false;
 +}
 +qemu_mutex_unlock(timer_list-active_timers_lock);
 +
 +if (rearm) {
 +timerlist_rearm(timer_list);
 +}
 +}
 +
 void timer_mod(QEMUTimer *ts, int64_t expire_time)
 {
 timer_mod_ns(ts, expire_time * ts-scale);
 }
 
 +void timer_mod_anticipate(QEMUTimer *ts, int64_t expire_time)
 +{
 +timer_mod_anticipate_ns(ts, expire_time * ts-scale);
 +}
 +
 bool timer_pending(QEMUTimer *ts)
 {
 return ts-expire_time = 0;
 -- 
 1.8.3.1
 
 
 
 

-- 
Alex Bligh

1 2 3 >

1 - 100 of 248 matches

Mail list logo