date:20110905

[Qemu-devel] buildbot failure in qemu on s390-next_x86_64_debian_6_0

2011-09-05 Thread qemu

The Buildbot has detected a new failure on builder s390-next_x86_64_debian_6_0 
while building qemu.
Full details are available at:
 
http://buildbot.b1-systems.de/qemu/builders/s390-next_x86_64_debian_6_0/builds/21

Buildbot URL: http://buildbot.b1-systems.de/qemu/

Buildslave for this Build: yuzuki

Build Reason: The Nightly scheduler named 'nightly_s390-next' triggered this 
build
Build Source Stamp: [branch s390-next] HEAD
Blamelist: 

BUILD FAILED: failed git

sincerely,
 -The Buildbot

[Qemu-devel] buildbot failure in qemu on s390-next_i386_debian_6_0

2011-09-05 Thread qemu

The Buildbot has detected a new failure on builder s390-next_i386_debian_6_0 
while building qemu.
Full details are available at:
 http://buildbot.b1-systems.de/qemu/builders/s390-next_i386_debian_6_0/builds/21

Buildbot URL: http://buildbot.b1-systems.de/qemu/

Buildslave for this Build: yuzuki

Build Reason: The Nightly scheduler named 'nightly_s390-next' triggered this 
build
Build Source Stamp: [branch s390-next] HEAD
Blamelist: 

BUILD FAILED: failed git

sincerely,
 -The Buildbot

Re: [Qemu-devel] [PATCH 7/9] openpic: avoid a warning from clang analyzer

2011-09-05 Thread Paolo Bonzini


On 09/04/2011 05:52 PM, Blue Swirl wrote:

Avoid this warning by clang analyzer by defining a default case:
/src/qemu/hw/openpic.c:477:5: warning: Undefined or garbage value
returned to caller
 return retval;

Signed-off-by: Blue Swirlblauwir...@gmail.com
---
  hw/openpic.c |1 +
  1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/hw/openpic.c b/hw/openpic.c
index 26c96e2..4b883ac 100644
--- a/hw/openpic.c
+++ b/hw/openpic.c
@@ -469,6 +469,7 @@ static inline uint32_t read_IRQreg (openpic_t
*opp, int n_IRQ, uint32_t reg)
  case IRQ_IPVP:
  retval = opp-src[n_IRQ].ipvp;
  break;
+default:
  case IRQ_IDE:
  retval = opp-src[n_IRQ].ide;
  break;


Looks wrong, perhaps it should return 0?

Paolo

Re: [Qemu-devel] [PATCH v6 3/4] block: add block timer and block throttling algorithm

2011-09-05 Thread Zhi Yong Wu

On Thu, Sep 01, 2011 at 02:36:41PM +0100, Stefan Hajnoczi wrote:
Date: Thu, 1 Sep 2011 14:36:41 +0100
Message-ID: 
CAJSP0QU8GqWsA6jaYAfs0=hvabqayfrpszrnctgp6geggpt...@mail.gmail.com
From: Stefan Hajnoczi stefa...@gmail.com
To: Zhi Yong Wu wu...@linux.vnet.ibm.com
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 2)
X-Received-From: 209.85.216.45
Cc: kw...@redhat.com, aligu...@us.ibm.com, stefa...@linux.vnet.ibm.com,
 k...@vger.kernel.org, mtosa...@redhat.com, qemu-devel@nongnu.org,
 p...@us.ibm.com, zwu.ker...@gmail.com, ry...@us.ibm.com
Subject: Re: [Qemu-devel] [PATCH v6 3/4] block: add block timer and block
 throttling algorithm
X-BeenThere: qemu-devel@nongnu.org
X-Mailman-Version: 2.1.14
Precedence: list
List-Id: qemu-devel.nongnu.org
List-Unsubscribe: https://lists.nongnu.org/mailman/options/qemu-devel,
 mailto:qemu-devel-requ...@nongnu.org?subject=unsubscribe
List-Archive: /archive/html/qemu-devel
List-Post: mailto:qemu-devel@nongnu.org
List-Help: mailto:qemu-devel-requ...@nongnu.org?subject=help
List-Subscribe: https://lists.nongnu.org/mailman/listinfo/qemu-devel,
 mailto:qemu-devel-requ...@nongnu.org?subject=subscribe
X-Mailman-Copy: yes
Errors-To: qemu-devel-bounces+wuzhy=linux.vnet.ibm@nongnu.org
Sender: qemu-devel-bounces+wuzhy=linux.vnet.ibm@nongnu.org
X-Brightmail-Tracker: AA==
X-Xagent-From: stefa...@gmail.com
X-Xagent-To: wu...@linux.vnet.ibm.com
X-Xagent-Gateway: bldgate.vnet.ibm.com (XAGENTU8 at BLDGATE)

On Thu, Sep 1, 2011 at 12:44 PM, Zhi Yong Wu wu...@linux.vnet.ibm.com wrote:
 Note:
     1.) When bps/iops limits are specified to a small value such as 511 
 bytes/s, this VM will hang up. We are considering how to handle this senario.
     2.) When dd command is issued in guest, if its option bs is set to a 
 large value such as bs=1024K, the result speed will slightly bigger than 
 the limits.

 For these problems, if you have nice thought, pls let us know.:)

 Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com
 ---
  block.c     |  290 
 +--
  block.h     |    5 +
  block_int.h |    9 ++
  3 files changed, 296 insertions(+), 8 deletions(-)

 diff --git a/block.c b/block.c
 index 17ee3df..680f1e7 100644
 --- a/block.c
 +++ b/block.c
 @@ -30,6 +30,9 @@
  #include qemu-objects.h
  #include qemu-coroutine.h

 +#include qemu-timer.h
 +#include block/blk-queue.h
 +
  #ifdef CONFIG_BSD
  #include sys/types.h
  #include sys/stat.h
 @@ -72,6 +75,13 @@ static int coroutine_fn 
 bdrv_co_writev_em(BlockDriverState *bs,
                                          QEMUIOVector *iov);
  static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);

 +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
 +        bool is_write, double elapsed_time, uint64_t *wait);
 +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
 +        double elapsed_time, uint64_t *wait);
 +static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
 +        bool is_write, uint64_t *wait);
 +
  static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
     QTAILQ_HEAD_INITIALIZER(bdrv_states);

 @@ -104,6 +114,64 @@ int is_windows_drive(const char *filename)
  }
  #endif

 +/* throttling disk I/O limits */
 +void bdrv_io_limits_disable(BlockDriverState *bs)
 +{
 +    bs-io_limits_enabled = false;
 +
 +    if (bs-block_queue) {
 +        qemu_block_queue_flush(bs-block_queue);
 +        qemu_del_block_queue(bs-block_queue);
 +        bs-block_queue = NULL;
 +    }
 +
 +    if (bs-block_timer) {
 +        qemu_del_timer(bs-block_timer);
 +        qemu_free_timer(bs-block_timer);
 +        bs-block_timer = NULL;
 +    }
 +
 +    bs-slice_start[BLOCK_IO_LIMIT_READ]  = 0;
 +    bs-slice_start[BLOCK_IO_LIMIT_WRITE] = 0;
 +
 +    bs-slice_end[BLOCK_IO_LIMIT_READ]    = 0;
 +    bs-slice_end[BLOCK_IO_LIMIT_WRITE]   = 0;
 +}
 +
 +static void bdrv_block_timer(void *opaque)
 +{
 +    BlockDriverState *bs = opaque;
 +    BlockQueue *queue = bs-block_queue;
 +
 +    qemu_block_queue_flush(queue);
 +}
 +
 +void bdrv_io_limits_enable(BlockDriverState *bs)
 +{
 +    bs-block_queue    = qemu_new_block_queue();
 +    bs-block_timer    = qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
 +
 +    bs-slice_start[BLOCK_IO_LIMIT_READ]  = qemu_get_clock_ns(vm_clock);
 +    bs-slice_start[BLOCK_IO_LIMIT_WRITE] =
 +           bs-slice_start[BLOCK_IO_LIMIT_READ];
 +
 +    bs-slice_end[BLOCK_IO_LIMIT_READ]    =
 +           bs-slice_start[BLOCK_IO_LIMIT_READ] + BLOCK_IO_SLICE_TIME;
 +    bs-slice_end[BLOCK_IO_LIMIT_WRITE]   =
 +           bs-slice_end[BLOCK_IO_LIMIT_READ];
 +}
 +
 +bool bdrv_io_limits_enabled(BlockDriverState *bs)
 +{
 +    BlockIOLimit *io_limits = bs-io_limits;
 +    return io_limits-bps[BLOCK_IO_LIMIT_READ]
 +         || io_limits-bps[BLOCK_IO_LIMIT_WRITE]
 +         || io_limits-bps[BLOCK_IO_LIMIT_TOTAL]
 +

Re: [Qemu-devel] [PATCH 1/4] Probe for libcheck by default.

2011-09-05 Thread Markus Armbruster

Gerd Hoffmann kra...@redhat.com writes:

 On 09/01/11 21:37, Anthony Liguori wrote:
 On 09/01/2011 10:42 AM, Gerd Hoffmann wrote:
 Probe for libcheck and build checks (if found) by default.
 Can be explicitly disabled using --disable-check-utests.

 Signed-off-by: Gerd Hoffmannkra...@redhat.com
 ---
 configure | 2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

 I think we should convert the check tests to gtest, and then have make
 check use gtester and gtester-report generate a single report of all of
 the test cases.

 I wouldn't object, that is *way* beyond the scope of this little patch
 series though.

One step at a time.

 We could then have build bot run make check and post the output.

 Running make check in buildbot is indeed the motivation to do this ;)

 I don't
 want to end up with a bunch of non gtest unit tests...

 This patch series doesn't add any.

Yep.  If you want to standardize on gtest, you get to convert or remove
the existing tests.  Until then, we use what we have.

 It just adds some build system
 glue so make check runs the existing stuff.

Running self-tests shouldn't be made any harder than that.

Re: [Qemu-devel] [PATCH] virtio: Make memory barriers be memory barriers

2011-09-05 Thread Paolo Bonzini


On 09/04/2011 11:16 AM, Michael S. Tsirkin wrote:

  I mean argue for a richer set of barriers, with per-arch minimal
  implementations instead of the large but portable hammer of
  sync_synchronize, if you will.


That's what I'm saying really. On x86 the richer set of barriers
need not insert code at all for both wmb and rmb macros. All we
might need is an 'optimization barrier'- e.g. linux does
  __asm__ __volatile__(: : :memory)
ppc needs something like sync_synchronize there.


No, rmb and wmb need to generate code.  You are right that in some 
places there will be some extra barriers.


If you want a richer set of barriers, that must be something like 
{rr,rw,wr,ww}_mb{_acq,_rel,} (again not counting the Alpha).  On x86, 
then, all the rr/rw/ww barriers will be compiler barriers because the 
hardware already enforces ordering.  The other three map to 
lfence/sfence/mfence:


   barrier assembly  why?
   -
   wr_mb_acq   lfenceprevents the read from moving up - acquire
   wr_mb_rel   sfenceprevents the write from moving down - release
   wr_mb   mfence(full barrier)

But if you stick to rmb/wmb/mb, then the correct definition of rmb is 
the least strict barrier that provides all three of rr_mb(), 
rw_mb_rel() and wr_mb_acq().  This is, as expected, an lfence. 
Similarly, wmb must provide all three of ww_mb(), wr_mb_rel() and 
rw_mb_acq(), and this is an sfence.


So the right place to put an #ifdef is not wmb(), but the _uses_ of 
wmb() where you know you need a barrier that is less strict.  That's why 
I say David patch is correct; on top of that you may change the 
particular uses of wmb() in virtio.c to compiler barriers, for example 
when you only care about ordering writes after writes.


Likewise, there may even be places in which you could #ifdef out a full 
memory barrier.  For example, if you only care about ordering writes 
with respect to reads, x86 hardware is already providing that and you 
could omit the mb().


I think in general it is premature optimization, though.

Regarding specific examples in virtio where lfence and sfence could be 
used, there may be one when using event signaling.  In the backend you 
write first the index of your response, then you check whether to 
generate an event.  (I think) the following requirements hold:


* if you read the event-index too early, you might skip an event and 
deadlock.  So you need at least a read barrier.


* you can write the response-index after reading the event-index, as 
long as you write it before waking up the guest.


So, in that case an x86 lfence should be enough, though again without 
more consideration I would use a full barrier just to be sure.


Paolo

[Qemu-devel] buildbot failure in qemu on monitor_x86_64_debian_6_0

2011-09-05 Thread qemu

The Buildbot has detected a new failure on builder monitor_x86_64_debian_6_0 
while building qemu.
Full details are available at:
 http://buildbot.b1-systems.de/qemu/builders/monitor_x86_64_debian_6_0/builds/21

Buildbot URL: http://buildbot.b1-systems.de/qemu/

Buildslave for this Build: yuzuki

Build Reason: The Nightly scheduler named 'nightly_monitor' triggered this build
Build Source Stamp: [branch queue/monitor] HEAD
Blamelist: 

BUILD FAILED: failed git

sincerely,
 -The Buildbot

[Qemu-devel] buildbot failure in qemu on monitor_i386_debian_6_0

2011-09-05 Thread qemu

The Buildbot has detected a new failure on builder monitor_i386_debian_6_0 
while building qemu.
Full details are available at:
 http://buildbot.b1-systems.de/qemu/builders/monitor_i386_debian_6_0/builds/21

Buildbot URL: http://buildbot.b1-systems.de/qemu/

Buildslave for this Build: yuzuki

Build Reason: The Nightly scheduler named 'nightly_monitor' triggered this build
Build Source Stamp: [branch queue/monitor] HEAD
Blamelist: 

BUILD FAILED: failed git

sincerely,
 -The Buildbot

Re: [Qemu-devel] [PATCH] monitor: Protect outbuf from concurrent access

2011-09-05 Thread Gerd Hoffmann


On 09/02/11 17:31, Paolo Bonzini wrote:

On 09/02/2011 05:18 PM, Gerd Hoffmann wrote:



Can you just use a bottom half to defer this work to the I/O thread?
Bottom half scheduling has to be signal safe which means it will also be
thread safe.


Not that straight forward as I would have to pass arguments to the
bottom half.


Can you add a variant of qemu_bh_new that accepts a sizeof for the new
bottom half? Then the bottom half itself can be passed as the opaque and
used for the arguments.


That wouldn't help.  I would have to create some kind of job queue which 
is then processed by the bottom half.


cheers,
  Gerd

Re: [Qemu-devel] [PATCH 01/10] Add stub functions for PCI device models to do PCI DMA

2011-09-05 Thread Michael S. Tsirkin

On Fri, Sep 02, 2011 at 02:38:25PM +1000, David Gibson wrote:
  I'd prefer the stubs to be inline. Not just as an optimization:
  it also makes it easier to grok what goes on in the common
  no-iommu case.
 
 To elaborate on my earlier mail.  The problem with making them inlines
 is that the cpu_physical_*() functions then appear in pci.h, which is
 used in pci.c amongst other places that are included in
 libhw32/libhw64, where those functions are poisoned.

Hmm, how are they poisoned?
I thought almost all devices currently use cpu_physical_*()?
For example, e1000 uses cpu_physical_memory_write and
it seems to get included in libhw32/libhw64 without issues.


 -- 
 David Gibson  | I'll have my music baroque, and my code
 david AT gibson.dropbear.id.au| minimalist, thank you.  NOT _the_ 
 _other_
   | _way_ _around_!
 http://www.ozlabs.org/~dgibson

Re: [Qemu-devel] [PATCH 0/4] add make check

2011-09-05 Thread Markus Armbruster

Gerd Hoffmann kra...@redhat.com writes:

   Hi,

 This patch series intends to make unit testing easier.  It adds a new
 make check target which can be used to run all unit tests which are
 currently in the tree.  It also enables the unit tests by default, so
 you don't have to re-run configure with a special switch.

Reviewed-by: Markus Armbruster arm...@redhat.com

One test fails, but Luiz has a fix in his tree.

Re: [Qemu-devel] [PATCH] virtio: Make memory barriers be memory barriers

2011-09-05 Thread Michael S. Tsirkin

On Mon, Sep 05, 2011 at 09:41:19AM +0200, Paolo Bonzini wrote:
 On 09/04/2011 11:16 AM, Michael S. Tsirkin wrote:
   I mean argue for a richer set of barriers, with per-arch minimal
   implementations instead of the large but portable hammer of
   sync_synchronize, if you will.
 
 That's what I'm saying really. On x86 the richer set of barriers
 need not insert code at all for both wmb and rmb macros. All we
 might need is an 'optimization barrier'- e.g. linux does
   __asm__ __volatile__(: : :memory)
 ppc needs something like sync_synchronize there.
 
 No, rmb and wmb need to generate code.

If they do we'll have to surround each their use with
ifndef x86 as you suggest later. Which is just messy.
Maybe we should rename the barriers to smp_rmb/smp_wmb/smp_mb -
this is what linux guests do.

 You are right that in some
 places there will be some extra barriers.
 
 If you want a richer set of barriers, that must be something like
 {rr,rw,wr,ww}_mb{_acq,_rel,} (again not counting the Alpha).  On
 x86, then, all the rr/rw/ww barriers will be compiler barriers
 because the hardware already enforces ordering.  The other three map
 to lfence/sfence/mfence:
 
barrier assembly  why?
-
wr_mb_acq   lfenceprevents the read from moving up - acquire
wr_mb_rel   sfenceprevents the write from moving down - release
wr_mb   mfence(full barrier)
 
 But if you stick to rmb/wmb/mb, then the correct definition of rmb
 is the least strict barrier that provides all three of rr_mb(),
 rw_mb_rel() and wr_mb_acq().  This is, as expected, an lfence.
 Similarly, wmb must provide all three of ww_mb(), wr_mb_rel() and
 rw_mb_acq(), and this is an sfence.
 
 So the right place to put an #ifdef is not wmb(), but the _uses_
 of wmb() where you know you need a barrier that is less strict.
 That's why I say David patch is correct; on top of that you may
 change the particular uses of wmb() in virtio.c to compiler
 barriers, for example when you only care about ordering writes after
 writes.
 Likewise, there may even be places in which you could #ifdef out a
 full memory barrier.  For example, if you only care about ordering
 writes with respect to reads, x86 hardware is already providing that
 and you could omit the mb().
 I think in general it is premature optimization, though.
 
 Regarding specific examples in virtio where lfence and sfence could
 be used, there may be one when using event signaling.  In the
 backend you write first the index of your response, then you check
 whether to generate an event.  (I think) the following requirements
 hold:
 
 * if you read the event-index too early, you might skip an event and
 deadlock.  So you need at least a read barrier.
 * you can write the response-index after reading the event-index, as
 long as you write it before waking up the guest.
 
 So, in that case an x86 lfence should be enough, though again
 without more consideration I would use a full barrier just to be
 sure.
 
 Paolo

lfence in this place will not affect the relative order of read versus
write.

-- 
MST

[Qemu-devel] [PATCH] qemu_vmalloc: align properly for transparent hugepages and KVM

2011-09-05 Thread Avi Kivity

To make good use of transparent hugepages, KVM requires that guest-physical
and host-virtual addresses share the low 21 bits (as opposed to just the low
12 bits normally required).

Adjust qemu_vmalloc() to honor that requirement.  Ignore it for small regions
to avoid fragmentation.

Signed-off-by: Avi Kivity a...@redhat.com
---
 oslib-posix.c |   14 +-
 1 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/oslib-posix.c b/oslib-posix.c
index 196099c..a304fb0 100644
--- a/oslib-posix.c
+++ b/oslib-posix.c
@@ -35,6 +35,13 @@
 extern int daemon(int, int);
 #endif
 
+#if defined(__linux__)  defined(__x86_64__)
+   /* Use 2MB alignment so transparent hugepages can be used by KVM */
+#  define QEMU_VMALLOC_ALIGN (512 * 4096)
+#else
+#  define QEMU_VMALLOC_ALIGN getpagesize()
+#endif
+
 #include config-host.h
 #include sysemu.h
 #include trace.h
@@ -80,7 +87,12 @@ int qemu_daemon(int nochdir, int noclose)
 void *qemu_vmalloc(size_t size)
 {
 void *ptr;
-ptr = qemu_memalign(getpagesize(), size);
+size_t align = QEMU_VMALLOC_ALIGN;
+
+if (size  align) {
+align = getpagesize();
+}
+ptr = qemu_memalign(align, size);
 trace_qemu_vmalloc(size, ptr);
 return ptr;
 }
-- 
1.7.6.1

Re: [Qemu-devel] [PATCH v3 0/8] Add various VMDK subformats support

2011-09-05 Thread Stefan Hajnoczi

On Fri, Aug 12, 2011 at 11:19:26PM +0800, Fam Zheng wrote:
 Changes:
 02/06: Free extents on fail in vmdk_open.
 
 Added:
 07/08: VMDK: bugfix, open Haiku vmdk image
 08/08: VMDK: bugfix, opening vSphere 4 exported image
 
 Fam Zheng (8):
   VMDK: enable twoGbMaxExtentFlat
   VMDK: add twoGbMaxExtentSparse support
   VMDK: separate vmdk_read_extent/vmdk_write_extent
   VMDK: Opening compressed extent.
   VMDK: read/write compressed extent
   VMDK: creating streamOptimized subformat
   VMDK: bugfix, open Haiku vmdk image
   VMDK: bugfix, opening vSphere 4 exported image
 
  block/vmdk.c |  346 
 +-
  1 files changed, 270 insertions(+), 76 deletions(-)

Reviewed-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com

Re: [Qemu-devel] [PATCH 2/9] qemu-io: remove unnecessary assignment

2011-09-05 Thread Kevin Wolf

Am 04.09.2011 17:47, schrieb Blue Swirl:
 Remove an unnecessary assignment, spotted by clang analyzer:
 /src/qemu/qemu-io.c:995:9: warning: Value stored to 'offset' is never read
 offset += reqs[i].qiov-size;
 
 Signed-off-by: Blue Swirl blauwir...@gmail.com

Acked-by: Kevin Wolf kw...@redhat.com

Re: [Qemu-devel] [PATCH] pci: add standard bridge device

2011-09-05 Thread Markus Armbruster

Michael S. Tsirkin m...@redhat.com writes:

 On Mon, Jul 04, 2011 at 12:43:58PM +0300, Michael S. Tsirkin wrote:
 This adds support for a standard pci to pci bridge,
 enabling support for more than 32 PCI devices in the system.
 To use, specify the device id as a 'bus' option.
 Example:
  -device pci-bridge,id=bridge1 \
  -netdev user,id=u \
  -device ne2k_pci,id=net2,bus=bridge1,netdev=u

 BTW, I just noticed that -device ne2k_pci,? does
 not list the bus option. Any idea why?

Looking...  qdev_device_help() shows only device properties, not bus
properties.  I'd call that a bug.

Re: [Qemu-devel] [PATCH v6 2/4] block: add the block queue support

2011-09-05 Thread Zhi Yong Wu

On Thu, Sep 01, 2011 at 02:02:41PM +0100, Stefan Hajnoczi wrote:
 01 Sep 2011 06:02:41 -0700 (PDT)
Received: by 10.220.200.77 with HTTP; Thu, 1 Sep 2011 06:02:41 -0700 (PDT)
In-Reply-To: 1314877456-19521-3-git-send-email-wu...@linux.vnet.ibm.com
References: 1314877456-19521-1-git-send-email-wu...@linux.vnet.ibm.com
 1314877456-19521-3-git-send-email-wu...@linux.vnet.ibm.com
Date: Thu, 1 Sep 2011 14:02:41 +0100
Message-ID: 
CAJSP0QXxRTb74w1kkpwAVP2BM=9g7q3k6_dcbevkhrec21m...@mail.gmail.com
From: Stefan Hajnoczi stefa...@gmail.com
To: Zhi Yong Wu wu...@linux.vnet.ibm.com
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
X-detected-operating-system: by eggs.gnu.org: GNU/Linux 2.6 (newer, 2)
X-Received-From: 209.85.220.173
Cc: kw...@redhat.com, aligu...@us.ibm.com, stefa...@linux.vnet.ibm.com,
 k...@vger.kernel.org, mtosa...@redhat.com, qemu-devel@nongnu.org,
 p...@us.ibm.com, zwu.ker...@gmail.com, ry...@us.ibm.com
Subject: Re: [Qemu-devel] [PATCH v6 2/4] block: add the block queue support
X-BeenThere: qemu-devel@nongnu.org
X-Mailman-Version: 2.1.14
Precedence: list
List-Id: qemu-devel.nongnu.org
List-Unsubscribe: https://lists.nongnu.org/mailman/options/qemu-devel,
 mailto:qemu-devel-requ...@nongnu.org?subject=unsubscribe
List-Archive: /archive/html/qemu-devel
List-Post: mailto:qemu-devel@nongnu.org
List-Help: mailto:qemu-devel-requ...@nongnu.org?subject=help
List-Subscribe: https://lists.nongnu.org/mailman/listinfo/qemu-devel,
 mailto:qemu-devel-requ...@nongnu.org?subject=subscribe
X-Mailman-Copy: yes
Errors-To: qemu-devel-bounces+wuzhy=linux.vnet.ibm@nongnu.org
Sender: qemu-devel-bounces+wuzhy=linux.vnet.ibm@nongnu.org
X-Brightmail-Tracker: AA==
X-Xagent-From: stefa...@gmail.com
X-Xagent-To: wu...@linux.vnet.ibm.com
X-Xagent-Gateway: vmsdvma.vnet.ibm.com (XAGENTU4 at VMSDVMA)

On Thu, Sep 1, 2011 at 12:44 PM, Zhi Yong Wu wu...@linux.vnet.ibm.com wrote:
 +struct BlockIORequest {
 +    QTAILQ_ENTRY(BlockIORequest) entry;
 +    BlockDriverState *bs;
 +    BlockRequestHandler *handler;
 +    int64_t sector_num;
 +    QEMUIOVector *qiov;
 +    int nb_sectors;
 +    BlockDriverCompletionFunc *cb;
 +    void *opaque;
 +    BlockDriverAIOCB *acb;
 +};
 +
 +struct BlockQueue {
 +    QTAILQ_HEAD(requests, BlockIORequest) requests;
 +    BlockIORequest *request;
 +    bool flushing;
 +};
 +
 +struct BlockQueueAIOCB {
 +    BlockDriverAIOCB common;
 +    BlockDriverCompletionFunc *real_cb;
 +    BlockDriverAIOCB *real_acb;
 +    void *opaque;
 +    BlockIORequest *request;
 +};

Now it's pretty easy to merge BlockIORequest and BlockQueueAIOCB into
one struct.  That way we don't need to duplicate fields or link
structures together:
Must we merge them? I think that it will cause the logic is not clear than 
current way. It is weird that some BlockQueueAIOCB are linked to block queue 
although it reduce the LOC to some extent.
Moreover, those block-queue functions need to be rewritten.


typedef struct BlockQueueAIOCB {
BlockDriverAIOCB common;
QTAILQ_ENTRY(BlockQueueAIOCB) entry;  /* held requests */
BlockRequestHandler *handler; /* dispatch function */
BlockDriverAIOCB *real_acb;   /* dispatched aiocb */

/* Request parameters */
int64_t sector_num;
QEMUIOVector *qiov;
int nb_sectors;
} BlockQueueAIOCB;

This struct is kept for the lifetime of a request (both during
queueing and after dispatch).
ditto.

 +
 +static void qemu_block_queue_dequeue(BlockQueue *queue, BlockIORequest 
 *request)
 +{
 +    BlockIORequest *req;
 +
 +    while (!QTAILQ_EMPTY(queue-requests)) {
 +        req = QTAILQ_FIRST(queue-requests);
 +        if (req == request) {
 +            QTAILQ_REMOVE(queue-requests, req, entry);
 +            break;
 +        }
 +    }
 +}
 +
 +static void qemu_block_queue_cancel(BlockDriverAIOCB *acb)
 +{
 +    BlockQueueAIOCB *blkacb = container_of(acb, BlockQueueAIOCB, common);
 +    if (blkacb-real_acb) {
 +        bdrv_aio_cancel(blkacb-real_acb);
 +    } else {
 +        assert(blkacb-common.bs-block_queue);
 +        qemu_block_queue_dequeue(blkacb-common.bs-block_queue,
 +                                 blkacb-request);

Can we use QTAILQ_REMOVE() and eliminate qemu_block_queue_dequeue()?
why need to replace dequeue function with QTAILQ_REMOVE()?
If the aiocb exists and is not dispatched (real_acb != NULL) then it
must be queued.
Can you explain clearlier?

 +    }
 +
 +    qemu_aio_release(blkacb);
 +}
 +
 +static AIOPool block_queue_pool = {
 +    .aiocb_size         = sizeof(struct BlockQueueAIOCB),
 +    .cancel             = qemu_block_queue_cancel,
 +};
 +
 +static void qemu_block_queue_callback(void *opaque, int ret)
 +{
 +    BlockQueueAIOCB *acb = opaque;
 +
 +    if (acb-real_cb) {
 +        acb-real_cb(acb-opaque, ret);
 +    }
 +
 +    qemu_aio_release(acb);
 +}
 +
 +BlockQueue *qemu_new_block_queue(void)
 +{
 +    BlockQueue *queue;
 +
 +    queue =

Re: [Qemu-devel] [PATCH] ehci: avoid string arguments in trace events

2011-09-05 Thread Gerd Hoffmann


On 09/03/11 17:22, Stefan Hajnoczi wrote:

String arguments are not supported by all trace backends.  This patch
replaces existing string arguments in hw/usb-ehci.c either with
individual trace events that remain human-friendly or by printing raw
addresses when there is no alternative or downside to that.


Printing raw addresses *is* a downside.


States and usbsts bits remain human-friendly since it is hard to
remember all of them.  MMIO addresses are printed raw because they would
create many individual trace events and the addresses are usually easy
to remember when debugging.


I find it hard to rememeber them.  There is a reason why the code to 
print the names for the mmio addresses is there in the first place. I 
don't want to loose that.


Can't we just fix the backends instead?  Replacing debug fprintf with 
trace points isn't going to work if tracing can't handle strings.


cheers,
  Gerd

Re: [Qemu-devel] [PATCH] pc: Clean up PIC-to-APIC IRQ path

2011-09-05 Thread Avi Kivity


On 09/05/2011 11:38 AM, Edgar E. Iglesias wrote:


  We shouldn't really use the term IRQ as it's confusing.  I like the term
  pin better because that describes what we're really talking about.

  qemu_irq is designed oddly today because is represents something that is
  intrinsically state (whether a pin is high or low) with an edge
  notification with the assumption that the state is held somewhere else
  (which is usually true).

I don't agree. That's not what qemu_irq represents.
It represents a wire, a mechanism to drive changes through logic paths
between state. It is intrinsically stateless.

It may be the case that it is missused in some places, or that it isn't
always the best thing to use to represent what ever you need to represent,
so that you want to complement with other mechanisms.
But universally replacing it with a stateful alternative seems wrong to me.



I agree that qemu_irq is inherently stateless.  But I do think there 
should be a way for the sink to query the line level.  Whether it is 
implemented as a cache of the last qemu_set() level, or with callbacks 
that query the underlying state is not important, but we can't just rely 
on edge triggers.


(real hardware can query a line at any time, yes?)

--
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [PATCH] pc: Clean up PIC-to-APIC IRQ path

2011-09-05 Thread Peter Maydell

On 5 September 2011 09:51, Avi Kivity a...@redhat.com wrote:
 On 09/05/2011 11:38 AM, Edgar E. Iglesias wrote:
 I don't agree. That's not what qemu_irq represents.
 It represents a wire, a mechanism to drive changes through logic paths
 between state. It is intrinsically stateless.

 It may be the case that it is missused in some places, or that it isn't
 always the best thing to use to represent what ever you need to represent,
 so that you want to complement with other mechanisms.
 But universally replacing it with a stateful alternative seems wrong to
 me.

 I agree that qemu_irq is inherently stateless.  But I do think there should
 be a way for the sink to query the line level.  Whether it is implemented as
 a cache of the last qemu_set() level, or with callbacks that query the
 underlying state is not important, but we can't just rely on edge triggers.

 (real hardware can query a line at any time, yes?)

There are often significant constraints (eg can only query at a clock
edge, or not guaranteed to be valid until some other signal has been
high for at least this time), which often means hardware will latch
the input value internally. Obviously QEMU doesn't have to care about
things to that level of detail. The reason we don't care is that we're
really restricting ourselves to providing a programmer-visible approximation
to the device behaviour. So I think the right question to ask about qemu_irq
semantics is not does this act like hardware? but what is the right
interface between two components to let us conveniently implement the
behaviour we need? This probably means that sometimes we want a line
with state-querying and sometimes not.

Ideally our device/object model ought to (a) let us model either option
easily (b) let us easily upgrade a no state querying line to a state
querying line by having the latter's interface be a superset of the
former's.

[IMHO the most important reason not to call it a qemu_irq is that
'irq' is a description of what the signal is used for, not of its
behaviour. 'gpio' is a bit more neutral.]

-- PMM

Re: [Qemu-devel] [Spice-devel] [PATCH] server: don't call reds_stream_free from worker thread context

2011-09-05 Thread Gerd Hoffmann


  Hi,


Hi,
RED_WORKER_MESSAGE_DISPLAY_DISCONNECT is not the only place that
triggers red_disconnect_channel (and as a result,
reds_stream_free(dispatcher-stream)). red_disconnect_channel is called
also when there is an error upon receive/send and also when timeouts
related to the client occur (e.g., in flush_display_commands).


Ok.


We probably better make the dispatcher bi-directional, i.e., not only
push messages to the worker, but also listen.


That sounds like a non-trivial thing.

What does the master branch here btw?  I had a brief look and saw that 
the code looks quite different here (probably due to the multiclient work).


cheers,
  Gerd

[Qemu-devel] qemu segfaults at start

2011-09-05 Thread octane indice

Hello

I tried to use qemu on x86-32 in order to emulate x86-32bits.
I did a:
wget http://wiki.qemu.org/download/qemu-0.15.0.tar.gz
tar zxvf qemu-0.15.0.tar.gz
cd qemu-0.15.0
./configure --enable-system --target-list=i386-softmmu
make
sudo make install

then:
qemu disk.img
Segmentation fault

I just have a window that popup then disappear. I tried several options and it 
allways do a segfault.

I tried with the 0.14.1 version, and same results.

What information do you need in order to help me?

Thanks

Envoyé avec Inmano, ma messagerie renversante et gratuite : 
http://www.inmano.com

[Qemu-devel] [PATCH 0/5] Only one call output register needed for 64 bit hosts

2011-09-05 Thread Stefan Weil

The number of registers needed for the return value of TCG opcode
INDEX_op_call is calculated in function tcg_gen_callN (nb_rets).

It can be 0 or 1, for 32 bit hosts also 2 (return 64 bit value in
two 32 bit registers).

Some TCG implementations reserve 2 registers although only 1 is used.
The following patches fix this.

[PATCH 1/5] tcg/i386: Only one call output register needed for 64 bit hosts
[PATCH 2/5] tcg/ia64: Only one call output register needed for 64 bit hosts
[PATCH 3/5] tcg/s390: Only one call output register needed for 64 bit hosts
[PATCH 4/5] tcg/sparc: Only one call output register needed for 64 bit hosts
[PATCH 5/5] tcg/ppc64: Only one call output register needed for 64 bit hosts

[Qemu-devel] [PATCH 4/5] tcg/sparc: Only one call output register needed for 64 bit hosts

2011-09-05 Thread Stefan Weil

The second register is only needed for 32 bit hosts.

Cc: Blue Swirl blauwir...@gmail.com
Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 tcg/sparc/tcg-target.c |6 --
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index ac76e11..fc3fd7f 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -84,9 +84,11 @@ static const int tcg_target_call_iarg_regs[6] = {
 TCG_REG_O5,
 };
 
-static const int tcg_target_call_oarg_regs[2] = {
+static const int tcg_target_call_oarg_regs[] = {
 TCG_REG_O0,
-TCG_REG_O1,
+#if TCG_TARGET_REG_BITS == 32
+TCG_REG_O1
+#endif
 };
 
 static inline int check_fit_tl(tcg_target_long val, unsigned int bits)
-- 
1.7.0.4

[Qemu-devel] [PATCH 2/5] tcg/ia64: Only one call output register needed for 64 bit hosts

2011-09-05 Thread Stefan Weil

The second register is never used for ia64 hosts.

Cc: Aurelien Jarno aurel...@aurel32.net
Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 tcg/ia64/tcg-target.c |5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index 9db205d..3803ab6 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -172,9 +172,8 @@ static const int tcg_target_call_iarg_regs[8] = {
 TCG_REG_R63,
 };
 
-static const int tcg_target_call_oarg_regs[2] = {
-TCG_REG_R8,
-TCG_REG_R9
+static const int tcg_target_call_oarg_regs[] = {
+TCG_REG_R8
 };
 
 /* maximum number of register used for input function arguments */
-- 
1.7.0.4

[Qemu-devel] [PATCH 5/5] tcg/ppc64: Only one call output register needed for 64 bit hosts

2011-09-05 Thread Stefan Weil

The second register is only needed for 32 bit hosts.

Cc: Vassili Karpov av1...@comtv.ru
Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 tcg/ppc64/tcg-target.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index d831684..bef7aac 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -130,7 +130,7 @@ static const int tcg_target_call_iarg_regs[] = {
 TCG_REG_R10
 };
 
-static const int tcg_target_call_oarg_regs[2] = {
+static const int tcg_target_call_oarg_regs[] = {
 TCG_REG_R3
 };
 
-- 
1.7.0.4

[Qemu-devel] [PATCH 3/5] tcg/s390: Only one call output register needed for 64 bit hosts

2011-09-05 Thread Stefan Weil

The second register is only needed for 32 bit hosts.

Cc: Alexander Graf ag...@suse.de
Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 tcg/s390/tcg-target.c |4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 2fc5646..b58df71 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -252,7 +252,9 @@ static const int tcg_target_call_iarg_regs[] = {
 
 static const int tcg_target_call_oarg_regs[] = {
 TCG_REG_R2,
-TCG_REG_R3,
+#if TCG_TARGET_REG_BITS == 32
+TCG_REG_R3
+#endif
 };
 
 #define S390_CC_EQ  8
-- 
1.7.0.4

[Qemu-devel] [PATCH 1/5] tcg/i386: Only one call output register needed for 64 bit hosts

2011-09-05 Thread Stefan Weil

The second register is only needed for 32 bit hosts.

Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 tcg/i386/tcg-target.c |4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 7529677..281f87d 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -76,9 +76,11 @@ static const int tcg_target_call_iarg_regs[] = {
 #endif
 };
 
-static const int tcg_target_call_oarg_regs[2] = {
+static const int tcg_target_call_oarg_regs[] = {
 TCG_REG_EAX,
+#if TCG_TARGET_REG_BITS == 32
 TCG_REG_EDX
+#endif
 };
 
 static uint8_t *tb_ret_addr;
-- 
1.7.0.4

Re: [Qemu-devel] [PATCH] pc: Clean up PIC-to-APIC IRQ path

2011-09-05 Thread Avi Kivity


On 09/05/2011 12:02 PM, Peter Maydell wrote:

  I agree that qemu_irq is inherently stateless.  But I do think there should
  be a way for the sink to query the line level.  Whether it is implemented as
  a cache of the last qemu_set() level, or with callbacks that query the
  underlying state is not important, but we can't just rely on edge triggers.

  (real hardware can query a line at any time, yes?)

There are often significant constraints (eg can only query at a clock
edge, or not guaranteed to be valid until some other signal has been
high for at least this time), which often means hardware will latch
the input value internally. Obviously QEMU doesn't have to care about
things to that level of detail. The reason we don't care is that we're
really restricting ourselves to providing a programmer-visible approximation
to the device behaviour. So I think the right question to ask about qemu_irq
semantics is not does this act like hardware? but what is the right
interface between two components to let us conveniently implement the
behaviour we need?


Agree.


This probably means that sometimes we want a line
with state-querying and sometimes not.


Right, and either can be implemented in terms of the other.  The 
question is which requires less boilerplate.




Ideally our device/object model ought to (a) let us model either option
easily (b) let us easily upgrade a no state querying line to a state
querying line by having the latter's interface be a superset of the
former's.

[IMHO the most important reason not to call it a qemu_irq is that
'irq' is a description of what the signal is used for, not of its
behaviour. 'gpio' is a bit more neutral.]


Or 'line'.

--
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [PATCH] virtio: Make memory barriers be memory barriers

2011-09-05 Thread Michael S. Tsirkin

On Mon, Sep 05, 2011 at 02:43:16PM +1000, David Gibson wrote:
 On Sun, Sep 04, 2011 at 12:16:43PM +0300, Michael S. Tsirkin wrote:
  On Sun, Sep 04, 2011 at 12:46:35AM +1000, David Gibson wrote:
   On Fri, Sep 02, 2011 at 06:45:50PM +0300, Michael S. Tsirkin wrote:
On Thu, Sep 01, 2011 at 04:31:09PM -0400, Paolo Bonzini wrote:
Why not limit the change to ppc then?
  
   Because the bug is masked by the x86 memory model, but it is still
   there even there conceptually. It is not really true that x86 does
   not need memory barriers, though it doesn't in this case:
  
   http://bartoszmilewski.wordpress.com/2008/11/05/who-ordered-memory-fences-on-an-x86/
  
   Paolo
  
  Right.
  To summarize, on x86 we probably want wmb and rmb to be compiler
  barrier only. Only mb might in theory need to be an mfence.
 
 No, wmb needs to be sfence and rmb needs to be lfence.  GCC does
 not provide those, so they should become __sync_synchronize() too,
 or you should use inline assembly.
 
  But there might be reasons why that is not an issue either
  if we look closely enough.
 
 Since the ring buffers are not using locked instructions (no xchg
 or cmpxchg) the barriers simply must be there, even on x86.  Whether
 it works in practice is not interesting, only the formal model is
 interesting.
 
 Paolo

Well, can you describe an issue in virtio that lfence/sfence help solve
in terms of a memory model please?
Pls note that guest uses smp_ variants for barriers.
   
   Ok, so, I'm having a bit of trouble with the fact that I'm having to
   argue the case that things the protocol requiress to be memory
   barriers actually *be* memory barriers on all platforms.
   
   I mean argue for a richer set of barriers, with per-arch minimal
   implementations instead of the large but portable hammer of
   sync_synchronize, if you will.
  
  That's what I'm saying really. On x86 the richer set of barriers
  need not insert code at all for both wmb and rmb macros. All we
  might need is an 'optimization barrier'- e.g. linux does
   __asm__ __volatile__(: : :memory)
  ppc needs something like sync_synchronize there.
 
 But you're approaching this the wrong way around - correctness should
 come first.  That is, we should first ensure that there is a
 sufficient memory barrier to satisfy the protocol.  Then, *if* there
 is a measurable performance improvement and *if* we can show that a
 weaker barrier is sufficient on a given platform, then we can whittle
 it down to a lighter barrier.

You are only looking at ppc. But on x86 this code ships in
production. So changes should be made in a way to reduce
a potential for regressions, balancing risk versus potential benefit.
I'm trying to point out a way to do this.

   But just leaving them out on x86!?
   Seriously, wtf?  Do you enjoy having software that works chiefly by
   accident?
  
  I'm surprised at the controversy too. People seem to argue that
  x86 cpu does not reorder stores and that we need an sfence between
  stores to prevent the guest from seeing them out of order, at
  the same time.
 
 I don't know the x86 storage model well enough to definitively say
 that the barrier is not necessary there - nor to say that it is
 necessary.  All I know is that the x86 model is quite strongly
 ordered, and I assume that is why the lack of barrier has not caused
 an observed problem on x86.

Please review Documentation/memory-barriers.txt as one reference.
then look at how SMP barriers are implemented at various systems.
In particular, note how it says 'Mandatory barriers should not be used
to control SMP effects'.

 Again, correctness first.  sync_synchronize should be a sufficient
 barrier for wmb() on all platforms.  If you really don't want it, the
 onus is on you

Just for fun, I did a quick hack replacing all barriers with mb()
in the userspace virtio test. This is on x386.

Before:
[mst@tuck virtio]$ sudo time ./virtio_test 
spurious wakeus: 0x1da
24.53user 14.63system 0:41.91elapsed 93%CPU (0avgtext+0avgdata
464maxresident)k
0inputs+0outputs (0major+154minor)pagefaults 0swaps

After:
[mst@tuck virtio]$ sudo time ./virtio_test 
spurious wakeus: 0x218
33.97user 6.22system 0:42.10elapsed 95%CPU (0avgtext+0avgdata
464maxresident)k
0inputs+0outputs (0major+154minor)pagefaults 0swaps

So user time went up significantly, as expected. Surprisingly the kernel
side started working more efficiently - surprising since
kernel was not changed - with net effect close to evening out.

So a risk of performance regressions from unnecessary fencing
seems to be non-zero, assuming that time doesn't lie.
This might be worth investigating, but I'm out of time right now.


 to show that (a) it's safe to do so and
 (b) it's actually worth it.

Worth what? I'm asking you to minimuse disruption to other platforms
while you fix ppc.

-- 
MST

Re: [Qemu-devel] [PATCH] pc: Clean up PIC-to-APIC IRQ path

2011-09-05 Thread Edgar E. Iglesias

On Mon, Sep 05, 2011 at 11:51:01AM +0300, Avi Kivity wrote:
 On 09/05/2011 11:38 AM, Edgar E. Iglesias wrote:
  
We shouldn't really use the term IRQ as it's confusing.  I like the term
pin better because that describes what we're really talking about.
  
qemu_irq is designed oddly today because is represents something that is
intrinsically state (whether a pin is high or low) with an edge
notification with the assumption that the state is held somewhere else
(which is usually true).
 
  I don't agree. That's not what qemu_irq represents.
  It represents a wire, a mechanism to drive changes through logic paths
  between state. It is intrinsically stateless.
 
  It may be the case that it is missused in some places, or that it isn't
  always the best thing to use to represent what ever you need to represent,
  so that you want to complement with other mechanisms.
  But universally replacing it with a stateful alternative seems wrong to me.
 
 
 I agree that qemu_irq is inherently stateless.  But I do think there 
 should be a way for the sink to query the line level.  Whether it is 
 implemented as a cache of the last qemu_set() level, or with callbacks 
 that query the underlying state is not important, but we can't just rely 
 on edge triggers.

I think it is important. Because you need to at least be able to mark the
places were there is state. The cache at the sink sounds more right to me.
An IRQ line, can at the same time have multiple states through its
path between source device and the final sink. Every device that needs to
model state should implement/mark it or maybe connect the the generic caching
sink version and devices that manipulate the level but dont have state,
shouldn't.

 (real hardware can query a line at any time, yes?)

IMO, the query is just an upside-down way of thinking of it.

What happens is, you change some state, and the state drives changes through
a logic path towards new state that picks up the updated value etc etc.
Quering is like going (for bus accesses): OK, here's my VGA framebuffer
address, lets do a get back through the bus and see what the CPU wants to
write to this location?

According to the query view, you would add state to the memory regions so
that an MMIO device only gets a xxx_access call, and does a query back to
the core to figure out what's going on. Possible? I'm sure it is. Correct?
who knows. But IMO, a very upside-down way of thinking of it.

Cheers

Re: [Qemu-devel] [PATCH] pc: Clean up PIC-to-APIC IRQ path

2011-09-05 Thread Avi Kivity


On 09/05/2011 12:22 PM, Edgar E. Iglesias wrote:

  (real hardware can query a line at any time, yes?)

IMO, the query is just an upside-down way of thinking of it.

What happens is, you change some state, and the state drives changes through
a logic path towards new state that picks up the updated value etc etc.
Quering is like going (for bus accesses): OK, here's my VGA framebuffer
address, lets do a get back through the bus and see what the CPU wants to
write to this location?

According to the query view, you would add state to the memory regions so
that an MMIO device only gets a xxx_access call, and does a query back to
the core to figure out what's going on. Possible? I'm sure it is. Correct?
who knows.


There's no difference really.  Consider reading the 'data' parameter a 
query.


With qemu_irq, there is a difference, because the line level is valid at 
all times, not just when the edge takes place (for memory, the data 
lines are only valid during the transaction).



But IMO, a very upside-down way of thinking of it.



Query is needed when a line is masked internally, or when a device is 
hot-plugged.


We can work around masking by caching the level in the device even 
though the line is masked, and querying the cache when the line is 
unmasked, but that is artificial, and requires live-migrating the cache 
(which isn't true state).  We can't work around the hotplug case.


--
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [PATCH] [SPARC] Gdbstub: Fix back-trace on SPARC32

2011-09-05 Thread Fabien Chouteau

On 03/09/2011 11:25, Blue Swirl wrote:
 On Thu, Sep 1, 2011 at 2:17 PM, Fabien Chouteau chout...@adacore.com wrote:
 Gdb expects all registers windows to be flushed in ram, which is not the case
 in Qemu. Therefore the back-trace generation doesn't work. This patch adds a
 function to handle reads/writes in stack frames as if windows were flushed.

 Signed-off-by: Fabien Chouteau chout...@adacore.com
 ---
  gdbstub.c |   10 --
  target-sparc/cpu.h|7 
  target-sparc/helper.c |   85 
 +
  3 files changed, 99 insertions(+), 3 deletions(-)

 diff --git a/gdbstub.c b/gdbstub.c
 index 3b87c27..85d5ad7 100644
 --- a/gdbstub.c
 +++ b/gdbstub.c
 @@ -41,6 +41,9 @@
  #include qemu_socket.h
  #include kvm.h

 +#ifndef TARGET_CPU_MEMORY_RW_DEBUG
 +#define TARGET_CPU_MEMORY_RW_DEBUG cpu_memory_rw_debug

 These days, inline functions are preferred over macros.


This is to allow target-specific implementation of the function.

 +#endif

  enum {
 GDB_SIGNAL_0 = 0,
 @@ -2013,7 +2016,7 @@ static int gdb_handle_packet(GDBState *s, const char 
 *line_buf)
 if (*p == ',')
 p++;
 len = strtoull(p, NULL, 16);
 -if (cpu_memory_rw_debug(s-g_cpu, addr, mem_buf, len, 0) != 0) {
 +if (TARGET_CPU_MEMORY_RW_DEBUG(s-g_cpu, addr, mem_buf, len, 0) != 
 0) {

 cpu_memory_rw_debug() could remain unwrapped with a generic function
 like cpu_gdb_sync_memory() which gdbstub should explicitly call.

 Maybe the lazy condition codes etc. could be handled in similar way,
 cpu_gdb_sync_registers().


Excuse me, I don't understand here.

 put_packet (s, E14);
 } else {
 memtohex(buf, mem_buf, len);
 @@ -2028,10 +2031,11 @@ static int gdb_handle_packet(GDBState *s, const char 
 *line_buf)
 if (*p == ':')
 p++;
 hextomem(mem_buf, p, len);
 -if (cpu_memory_rw_debug(s-g_cpu, addr, mem_buf, len, 1) != 0)
 +if (TARGET_CPU_MEMORY_RW_DEBUG(s-g_cpu, addr, mem_buf, len, 1) != 
 0) {
 put_packet(s, E14);
 -else
 +} else {
 put_packet(s, OK);
 +}
 break;
 case 'p':
 /* Older gdb are really dumb, and don't use 'g' if 'p' is avaialable.
 diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
 index 8654f26..3f76eaf 100644
 --- a/target-sparc/cpu.h
 +++ b/target-sparc/cpu.h
 @@ -495,6 +495,13 @@ int cpu_sparc_handle_mmu_fault(CPUSPARCState *env1, 
 target_ulong address, int rw
  target_ulong mmu_probe(CPUSPARCState *env, target_ulong address, int 
 mmulev);
  void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUState *env);

 +#if !defined(TARGET_SPARC64)
 +int sparc_cpu_memory_rw_debug(CPUState *env, target_ulong addr,
 +  uint8_t *buf, int len, int is_write);
 +#define TARGET_CPU_MEMORY_RW_DEBUG sparc_cpu_memory_rw_debug
 +#endif
 +
 +
  /* translate.c */
  void gen_intermediate_code_init(CPUSPARCState *env);

 diff --git a/target-sparc/helper.c b/target-sparc/helper.c
 index 1fe1f07..2cf4e8b 100644
 --- a/target-sparc/helper.c
 +++ b/target-sparc/helper.c
 @@ -358,6 +358,91 @@ void dump_mmu(FILE *f, fprintf_function cpu_fprintf, 
 CPUState *env)
 }
  }

 +
 +/* Gdb expects all registers windows to be flushed in ram. This function 
 handles
 + * reads/writes in stack frames as if windows were flushed. We assume that 
 the
 + * sparc ABI is followed.
 + */

 We can't assume that, it depends on what we are executing (BIOS, OS,
 even application).

Well, maybe the statement is too strong. The ABI is required to get a valid
result. Gdb cannot build back-traces if the ABI is not followed anyway.

 On Sparc64 there are two ABIs (32 bit and 64 bit
 with offset of -2047), though calling flushw instruction could handle
 that.

This solution is for SPARC32 only.

 If the flush happens to trigger a fault, we're in big trouble.


That's why it's safer/easier to use this hackish read/write in the registers.

 Overall, I think this is too hackish. Maybe this is a bug in GDB
 instead, information from backtrace is not reliable if ABI is not
 known.


It's not a bug in Gdb. To build back-traces you have to read stack frames. To
read stack frames, register windows must be flushed. In Qemu we can avoid
flushing with this little trick.

Regards,

-- 
Fabien Chouteau

Re: [Qemu-devel] [PATCH] pci: add standard bridge device

2011-09-05 Thread Michael S. Tsirkin

On Mon, Sep 05, 2011 at 10:17:01AM +0200, Markus Armbruster wrote:
 Michael S. Tsirkin m...@redhat.com writes:
 
  On Mon, Jul 04, 2011 at 12:43:58PM +0300, Michael S. Tsirkin wrote:
  This adds support for a standard pci to pci bridge,
  enabling support for more than 32 PCI devices in the system.
  To use, specify the device id as a 'bus' option.
  Example:
 -device pci-bridge,id=bridge1 \
 -netdev user,id=u \
 -device ne2k_pci,id=net2,bus=bridge1,netdev=u
 
  BTW, I just noticed that -device ne2k_pci,? does
  not list the bus option. Any idea why?
 
 Looking...  qdev_device_help() shows only device properties, not bus
 properties.  I'd call that a bug.

Hmm, but is bus a bus property?
Care fixing all these?

-- 
MST

Re: [Qemu-devel] [PATCH] virtio: Make memory barriers be memory barriers

2011-09-05 Thread Paolo Bonzini

  No, rmb and wmb need to generate code.
 
 If they do we'll have to surround each their use with
 ifndef x86 as you suggest later. Which is just messy.

[1 hour later]

I see what you mean now.  You assume there are no accesses to
write-combining memory (of course) or non-temporal load/stores
(because they are accessed only with assembly), so you can
make rmb/wmb no-ops on x86.  I was confused by the kernel
(and liburcu's) choice to use lfence/sfence for rmb/wmb.

Then it's indeed better to move the wmb() defines to
qemu-barrier.h, where they can be made processor-dependent.
S390, it seems, also does not need rmb/wmb.

Paolo

Re: [Qemu-devel] [PATCH 2/2] main: switch qemu_set_fd_handler to g_io_add_watch

2011-09-05 Thread Avi Kivity


On 09/04/2011 05:03 PM, Avi Kivity wrote:

On 08/22/2011 04:12 PM, Anthony Liguori wrote:

This patch changes qemu_set_fd_handler to be implemented in terms of
g_io_add_watch().  The semantics are a bit different so some glue is 
required.


qemu_set_fd_handler2 is much harder to convert because of its use of 
polling.


The glib main loop has the major of advantage of having a proven 
thread safe
architecture.  By using the glib main loop instead of our own, it 
will allow us

to eventually introduce multiple I/O threads.

I'm pretty sure that this will work on Win32, but I would appreciate 
some help
testing.  I think the semantics of g_io_channel_unix_new() are really 
just tied

to the notion of a unix fd and not necessarily unix itself.


'git bisect' fingered this as responsible for breaking 
qcow2+cache=unsafe.  I think there's an off-by-one here and the guilty 
patch is the one that switches the main loop, but that's just a guess.


The symptoms are that a guest that is restarted (new qemu process) 
after install doesn't make it through grub - some image data didn't 
make it do disk.  With qcow2 and cache=unsafe that can easily happen 
through exit notifiers not being run and the entire qcow2 metadata 
being thrown out the window.  Running with raw+cache=unsafe works.




Upstream appears to work for me... strange.

--
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [PATCH] pci: add standard bridge device

2011-09-05 Thread Gerd Hoffmann


  Hi,


Looking...  qdev_device_help() shows only device properties, not bus
properties.  I'd call that a bug.


Hmm, but is bus a bus property?


It isn't.  bus= is handled by qdev core (id= too).  addr= actually is a 
(pci) bus property.


cheers,
  Gerd

Re: [Qemu-devel] [PATCH] qemu_vmalloc: align properly for transparent hugepages and KVM

2011-09-05 Thread Jan Kiszka

On 2011-09-05 10:07, Avi Kivity wrote:
 To make good use of transparent hugepages, KVM requires that guest-physical
 and host-virtual addresses share the low 21 bits (as opposed to just the low
 12 bits normally required).
 
 Adjust qemu_vmalloc() to honor that requirement.  Ignore it for small regions
 to avoid fragmentation.
 
 Signed-off-by: Avi Kivity a...@redhat.com
 ---
  oslib-posix.c |   14 +-
  1 files changed, 13 insertions(+), 1 deletions(-)
 
 diff --git a/oslib-posix.c b/oslib-posix.c
 index 196099c..a304fb0 100644
 --- a/oslib-posix.c
 +++ b/oslib-posix.c
 @@ -35,6 +35,13 @@
  extern int daemon(int, int);
  #endif
  
 +#if defined(__linux__)  defined(__x86_64__)
 +   /* Use 2MB alignment so transparent hugepages can be used by KVM */

Aren't transparent hugepages also available in TCG mode? Then just
remove by KVM from subject and comment.

Jan

 +#  define QEMU_VMALLOC_ALIGN (512 * 4096)
 +#else
 +#  define QEMU_VMALLOC_ALIGN getpagesize()
 +#endif
 +
  #include config-host.h
  #include sysemu.h
  #include trace.h
 @@ -80,7 +87,12 @@ int qemu_daemon(int nochdir, int noclose)
  void *qemu_vmalloc(size_t size)
  {
  void *ptr;
 -ptr = qemu_memalign(getpagesize(), size);
 +size_t align = QEMU_VMALLOC_ALIGN;
 +
 +if (size  align) {
 +align = getpagesize();
 +}
 +ptr = qemu_memalign(align, size);
  trace_qemu_vmalloc(size, ptr);
  return ptr;
  }

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

Re: [Qemu-devel] [PATCH] qemu_vmalloc: align properly for transparent hugepages and KVM

2011-09-05 Thread Avi Kivity


On 09/05/2011 01:10 PM, Jan Kiszka wrote:

On 2011-09-05 10:07, Avi Kivity wrote:
  To make good use of transparent hugepages, KVM requires that guest-physical
  and host-virtual addresses share the low 21 bits (as opposed to just the low
  12 bits normally required).

  Adjust qemu_vmalloc() to honor that requirement.  Ignore it for small regions
  to avoid fragmentation.

  Signed-off-by: Avi Kivitya...@redhat.com
  ---
   oslib-posix.c |   14 +-
   1 files changed, 13 insertions(+), 1 deletions(-)

  diff --git a/oslib-posix.c b/oslib-posix.c
  index 196099c..a304fb0 100644
  --- a/oslib-posix.c
  +++ b/oslib-posix.c
  @@ -35,6 +35,13 @@
   extern int daemon(int, int);
   #endif

  +#if defined(__linux__)  defined(__x86_64__)
  +   /* Use 2MB alignment so transparent hugepages can be used by KVM */

Aren't transparent hugepages also available in TCG mode? Then just
remove by KVM from subject and comment.


They are, but they don't require the special alignment.

--
error compiling committee.c: too many arguments to function

Re: [Qemu-devel] [PATCH] pc: Clean up PIC-to-APIC IRQ path

2011-09-05 Thread Edgar E. Iglesias

On Sun, Sep 04, 2011 at 08:57:31AM -0500, Anthony Liguori wrote:
 On 09/04/2011 08:49 AM, Jan Kiszka wrote:
  On 2011-09-04 15:41, Anthony Liguori wrote:
  On 09/04/2011 08:36 AM, Jan Kiszka wrote:
  On 2011-09-04 15:32, Anthony Liguori wrote:
  I prefer to not think of IRQs as special things.  They're just single
  bits of information that flow through the device model.  Having a higher
  level representation that understands something like paths seems wrong
  to me.
 
  I'd prefer to treat things like device assignment as a hack.  You just
  need code that can walk the device tree to figure out the path (which is
  not generic code, it's very machine specific).  Then you tell the kernel
  the resolution of the path and are otherwise completely oblivious in
  userspace.
 
  See it as you like, but we need the support, not only for device
  assigment. And I do not see any gain it hacking this instead of
  designing it.
 
  You can design a hack but it's still a hack.
 
  Device state belongs in devices.  Trying to extract device state
  (interrupt routing paths) and externalizing it is by definition a hack.
 
  Having some sort of global interrupt routing table is just going to add
  a layer of complexity for very little obvious gain.
 
  It's not yet decided how the problem is solved. A global interrupt
  matrix is just one proposal, another option is to extend the pin model
  in a way that supports routing change notifiers and backward polling.
 
 If that's all you need, then you really just want notification on socket 
 changes.  Backwards polling can be achieved by just adding state to the 
 Pin (which I full heartedly support).

I'm not a fan of this backwards thing, but seeing the options available,
it might be the simplest way forward.

I agree that the global interrupt manager sounds like overkill...

Cheers

Re: [Qemu-devel] [PATCH] pc: Clean up PIC-to-APIC IRQ path

2011-09-05 Thread Edgar E. Iglesias

On Mon, Sep 05, 2011 at 12:28:50PM +0300, Avi Kivity wrote:

... 
 Query is needed when a line is masked internally, or when a device is 
 hot-plugged.
 
 We can work around masking by caching the level in the device even 
 though the line is masked, and querying the cache when the line is 
 unmasked, but that is artificial, and requires live-migrating the cache 
 (which isn't true state).  We can't work around the hotplug case.

Yup, hotplug messes everything up :) At least if you want to plug into
arbitrary places in the tree.

Cheers

Re: [Qemu-devel] [Spice-devel] [PATCH] server: don't call reds_stream_free from worker thread context

2011-09-05 Thread Alon Levy

On Mon, Sep 05, 2011 at 11:02:43AM +0200, Gerd Hoffmann wrote:
   Hi,
 
 Hi,
 RED_WORKER_MESSAGE_DISPLAY_DISCONNECT is not the only place that
 triggers red_disconnect_channel (and as a result,
 reds_stream_free(dispatcher-stream)). red_disconnect_channel is called
 also when there is an error upon receive/send and also when timeouts
 related to the client occur (e.g., in flush_display_commands).
 
 Ok.
 
 We probably better make the dispatcher bi-directional, i.e., not only
 push messages to the worker, but also listen.
 
 That sounds like a non-trivial thing.
 
 What does the master branch here btw?  I had a brief look and saw
 that the code looks quite different here (probably due to the
 multiclient work).
 

I verified it still calls reds_stream_free from the worker thread, only
now the call itself is done in red_channel.c (via red_channel_disconnect
or something like that), which is called from red_worker.c

 cheers,
   Gerd
 ___
 Spice-devel mailing list
 spice-de...@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/spice-devel

Re: [Qemu-devel] [PATCH v4 00/32] target-xtensa: new target architecture

2011-09-05 Thread Edgar E. Iglesias

On Sun, Sep 04, 2011 at 06:35:10PM +, Blue Swirl wrote:
 On Thu, Sep 1, 2011 at 8:45 PM, Max Filippov jcmvb...@gmail.com wrote:
  This series adds support for Tensilica Xtensa target.
  Port status: Linux for DC232B works in the qemu.
   Not implemented xtensa options: MAC16, floating point coprocessor,
   boolean option, cache option, debug option.
 
 I had minor comments to a few patches, otherwise this looks nice to me.
 
 Does anyone object to merging this?

I just had a very quick look and it Looks good to me too. Would be awesome
if Max could provide something to test with in binary form. Maybe we could
put it on the wiki's download page.

Cheers

Re: [Qemu-devel] [PATCH] pci: add standard bridge device

2011-09-05 Thread Michael S. Tsirkin

On Mon, Sep 05, 2011 at 11:53:11AM +0200, Gerd Hoffmann wrote:
   Hi,
 
 Looking...  qdev_device_help() shows only device properties, not bus
 properties.  I'd call that a bug.
 
 Hmm, but is bus a bus property?
 
 It isn't.  bus= is handled by qdev core (id= too).  addr= actually
 is a (pci) bus property.
 
 cheers,
   Gerd

At the moment the help text is wrong:

-device driver[,prop[=value][,...]]
add device (based on driver)
prop=value,... sets driver properties
use -device ? to print all possible drivers
use -device driver,? to print all possible properties

We also need documentation for properties and devices,
but that's a separate problem.


-- 
MST

Re: [Qemu-devel] Where to log xen_platform_log data

2011-09-05 Thread Stefano Stabellini

On Sat, 3 Sep 2011, Stefan Hajnoczi wrote:
 Hi Steven,
 The Xen platform PCI device has a logging feature that is currently
 implemented using trace_xen_platform_log(s-log_buffer).  String
 arguments may not be supported by all trace backends so they should be
 avoided.  For example, the simple trace backend logs 8-byte arguments
 and therefore cannot log strings - it will simply log the char *
 pointer value, not the actual log message.  Here is what
 docs/tracing.txt says:
 
Pointers (including char *) cannot be dereferenced easily (or at all) in
some trace backends.  If pointers are used, ensure they are meaningful by
themselves and do not assume the data they point to will be traced.  Do
not pass in string arguments.
 
 Is there a better place to send the log output?

Ideally they would go to syslog: the idea was to write a syslog trace
backend but it hasn't been done yet.

It seems to me that having the ability to trace a string would be very
useful; we could implement a fallback tracing function to those backends
that don't support char* logging natively.

Re: [Qemu-devel] [PATCH v3] rbd: fix leak in qemu_rbd_open failure paths

2011-09-05 Thread Kevin Wolf

Am 04.09.2011 18:19, schrieb Sage Weil:
 Fix leak of s-snap in failure path.  Simplify error paths for the whole
 function.
 
 Reported-by: Stefan Hajnoczi stefa...@gmail.com
 Signed-off-by: Sage Weil s...@newdream.net

This depends on [PATCH v2] rbd: allow client id to be specified in
config string, which doesn't seem to apply any more and has coding
style issues. Please fix and send a rebased series containing all four
patches.

Kevin

Re: [Qemu-devel] [PATCH v4 00/32] target-xtensa: new target architecture

2011-09-05 Thread Max Filippov

 I just had a very quick look and it Looks good to me too. Would be awesome
 if Max could provide something to test with in binary form. Maybe we could
 put it on the wiki's download page.

Tarball of my current kernel and rootfs is available at
http://jcmvbkbc.spb.ru/~dumb/ws/osll/qemu-xtensa/20110829/xtensa-dc232b_kernel_rootfs.tgz

I can also publish unit tests binaries.

-- 
Thanks.
-- Max

Re: [Qemu-devel] [PATCH v2] Display logical disk size in 'info block' output

2011-09-05 Thread Kevin Wolf

Am 02.09.2011 18:38, schrieb Daniel P. Berrange:
 From: Daniel P. Berrange d...@berrange.com
 
 To aid in knowing whether a 'block_resize' was succesful, display
 the logical disk size in bytes, in the 'info block' output
 
 In v2:
   - Replace sectors with bytes
 
 Signed-off-by: Daniel P. Berrange d...@berrange.com
 ---
  block.c |6 --
  qmp-commands.hx |1 +
  2 files changed, 5 insertions(+), 2 deletions(-)
 
 diff --git a/block.c b/block.c
 index 03a21d8..03102ad 100644
 --- a/block.c
 +++ b/block.c
 @@ -1844,6 +1844,7 @@ static void bdrv_print_dict(QObject *obj, void *opaque)
  
  monitor_printf(mon,  file=);
  monitor_print_filename(mon, qdict_get_str(qdict, file));
 +monitor_printf(mon,  length=% PRId64, qdict_get_int(qdict, 
 length));

What about using get_human_readable_size() here? Or maybe we should
rather introduce an option that selects readable units or bytes for all
of the statistics? (The latter would be a separate patch, obviously)

Looks good to me otherwise, so if you don't like my suggestion I'll
merge it as it is.

Kevin

[Qemu-devel] KVM call agenda for September 6

2011-09-05 Thread Juan Quintela


Hi

Please send in any agenda items you are interested in covering.

Later, Juan.

Re: [Qemu-devel] qemu segfaults at start

2011-09-05 Thread Stefan Hajnoczi

On Mon, Sep 5, 2011 at 10:04 AM, octane indice oct...@alinto.com wrote:
 qemu disk.img
 Segmentation fault

Please post the backtrace as well as your host operating system
version (e.g. Fedora 15):

gdb --args qemu disk.img
(gdb) r
...runs and crashes...
(gdb) bt

Stefan

Re: [Qemu-devel] QEMU online guest disk resize wrt host block devices

2011-09-05 Thread Kevin Wolf

Am 01.09.2011 17:56, schrieb Christoph Hellwig:
 On Thu, Sep 01, 2011 at 03:27:35PM +0100, Daniel P. Berrange wrote:
 One other question too, when creating a qcow2 image via 'qemu-img create'
 you can specify a 'prealloc' option to require metadata to be allocated
 at time of creation.

 Should we have the same control at time of resize too. If the app had
 originally created the qcow2 image with preallocated metadata, then
 I'd expect they want to pre-allocate metadata when extending it too,
 or is there no additional metadata allocation required when extending
 an image ?
 
 Sounds reasonable.  Keving, is there a sane way to implement this?

Implementing the functionality itself shouldn't be a big problem. The
big question is what the right interface would look like.

We have driver specific preallocation options, so we would end up with
something like this: bdrv_truncate(bs, int64_t size, char*
prealloc_mode). Not exactly nice. And is preallocation the only thing or
would we need to pass a whole option list like for image creation?

Of course, if this is a real requirement and not only a random thought,
we can always introduce a specific flag for the current use case and add
the generic thing only later when we have thought a bit more about it.

Kevin

[Qemu-devel] [PATCH] ppc405: use RAM_ADDR_FMT instead of %08lx

2011-09-05 Thread Stefan Hajnoczi

The RAM_ADDR_FMT macro hides the type of ram_addr_t so that format
strings can be safely used.  Make sure to use RAM_ADDR_FMT so that the
build works on 32-bit hosts with Xen enabled.  Whether Xen should affect
ppc TCG targets is questionable but a separate issue.

Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
---
 hw/ppc405_boards.c |   11 ++-
 1 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/hw/ppc405_boards.c b/hw/ppc405_boards.c
index dec165e4..5da26dc 100644
--- a/hw/ppc405_boards.c
+++ b/hw/ppc405_boards.c
@@ -211,7 +211,8 @@ static void ref405ep_init (ram_addr_t ram_size,
 sram_size = 512 * 1024;
 sram_offset = qemu_ram_alloc(NULL, ef405ep.sram, sram_size);
 #ifdef DEBUG_BOARD_INIT
-printf(%s: register SRAM at offset %08lx\n, __func__, sram_offset);
+printf(%s: register SRAM at offset  RAM_ADDR_FMT \n,
+   __func__, sram_offset);
 #endif
 cpu_register_physical_memory(0xFFF0, sram_size,
  sram_offset | IO_MEM_RAM);
@@ -228,7 +229,7 @@ static void ref405ep_init (ram_addr_t ram_size,
 fl_sectors = (bios_size + 65535)  16;
 #ifdef DEBUG_BOARD_INIT
 printf(Register parallel flash %d size %lx
-at offset %08lx addr %lx '%s' %d\n,
+at offset  RAM_ADDR_FMT  addr %lx '%s' %d\n,
fl_idx, bios_size, bios_offset, -bios_size,
bdrv_get_device_name(dinfo-bdrv), fl_sectors);
 #endif
@@ -353,7 +354,7 @@ static void ref405ep_init (ram_addr_t ram_size,
 #ifdef DEBUG_BOARD_INIT
 printf(%s: Done\n, __func__);
 #endif
-printf(bdloc %016lx\n, (unsigned long)bdloc);
+printf(bdloc  RAM_ADDR_FMT \n, bdloc);
 }
 
 static QEMUMachine ref405ep_machine = {
@@ -547,7 +548,7 @@ static void taihu_405ep_init(ram_addr_t ram_size,
 bios_offset = qemu_ram_alloc(NULL, taihu_405ep.bios, bios_size);
 #ifdef DEBUG_BOARD_INIT
 printf(Register parallel flash %d size %lx
-at offset %08lx addr %lx '%s' %d\n,
+at offset  RAM_ADDR_FMT  addr %lx '%s' %d\n,
fl_idx, bios_size, bios_offset, -bios_size,
bdrv_get_device_name(dinfo-bdrv), fl_sectors);
 #endif
@@ -590,7 +591,7 @@ static void taihu_405ep_init(ram_addr_t ram_size,
 fl_sectors = (bios_size + 65535)  16;
 #ifdef DEBUG_BOARD_INIT
 printf(Register parallel flash %d size %lx
-at offset %08lx  addr  TARGET_FMT_lx  '%s'\n,
+at offset  RAM_ADDR_FMT   addr  TARGET_FMT_lx  '%s'\n,
fl_idx, bios_size, bios_offset, (target_ulong)0xfc00,
bdrv_get_device_name(dinfo-bdrv));
 #endif
-- 
1.7.5.4

Re: [Qemu-devel] [PATCH] ehci: avoid string arguments in trace events

2011-09-05 Thread Stefan Hajnoczi

On Mon, Sep 5, 2011 at 9:38 AM, Gerd Hoffmann kra...@redhat.com wrote:
 On 09/03/11 17:22, Stefan Hajnoczi wrote:

 String arguments are not supported by all trace backends.  This patch
 replaces existing string arguments in hw/usb-ehci.c either with
 individual trace events that remain human-friendly or by printing raw
 addresses when there is no alternative or downside to that.

 Printing raw addresses *is* a downside.

 States and usbsts bits remain human-friendly since it is hard to
 remember all of them.  MMIO addresses are printed raw because they would
 create many individual trace events and the addresses are usually easy
 to remember when debugging.

 I find it hard to rememeber them.  There is a reason why the code to print
 the names for the mmio addresses is there in the first place. I don't want
 to loose that.

 Can't we just fix the backends instead?  Replacing debug fprintf with trace
 points isn't going to work if tracing can't handle strings.

I looked at the capabilities of the backends again and I think we
*can* allow strings.  The simple trace backend does not support them
but it's possible to add that.  I thought SystemTap doesn't either but
it turns out there is a userspace string copy function available.
Both stderr and ust are fine with strings.

Let's drop this patch.  I will update the tracing documentation.

Stefan

Re: [Qemu-devel] [PATCH] ehci: avoid string arguments in trace events

2011-09-05 Thread Gerd Hoffmann


  Hi,


Let's drop this patch.  I will update the tracing documentation.


Great.

thanks,
  Gerd

Re: [Qemu-devel] [PATCH] qemu-coroutine: Add simple work queue support

2011-09-05 Thread Kevin Wolf

Am 24.08.2011 09:57, schrieb Peter A. G. Crosthwaite:
 Add a function co_queue_yield_to_next() which will immediately transfer
 control to the coroutine at the head of a co queue. This can be used for
 implementing simple work queues where the manager of a co-queue only
 needs to restart queued routines one at a time.
 
 Signed-off-by: Peter A. G. Crosthwaite peter.crosthwa...@petalogix.com

Is there a difference to qemu_co_queue_next(), except that it doesn't
use a bottom half? Is there a reason why using a BH isn't reasonable in
your use case?

Kevin

Re: [Qemu-devel] [Spice-devel] [PATCH] server: don't call reds_stream_free from worker thread context

2011-09-05 Thread Gerd Hoffmann


  Hi,


I verified it still calls reds_stream_free from the worker thread, only
now the call itself is done in red_channel.c (via red_channel_disconnect
or something like that), which is called from red_worker.c


Where the code in red_channel.c is now shared for all channel types? 
Hmm.  That makes it a bit harder to change the workflow I guess ...


cheers,
  Gerd

Re: [Qemu-devel] [Spice-devel] [PATCH] server: don't call reds_stream_free from worker thread context

2011-09-05 Thread Alon Levy

On Mon, Sep 05, 2011 at 03:29:39PM +0200, Gerd Hoffmann wrote:
   Hi,
 
 I verified it still calls reds_stream_free from the worker thread, only
 now the call itself is done in red_channel.c (via red_channel_disconnect
 or something like that), which is called from red_worker.c
 
 Where the code in red_channel.c is now shared for all channel types?
 Hmm.  That makes it a bit harder to change the workflow I guess ...

can do the usual (well, done once in hw/qxl.c) trick of

 if (pthread_id() == stored_thread_id_from_main_channel_creation) {
   write_to_pipe_read_in_main_thread
 } else {
   real_reds_stream_free();
 }

 
 cheers,
   Gerd
 
 ___
 Spice-devel mailing list
 spice-de...@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/spice-devel

Re: [Qemu-devel] [Spice-devel] [PATCH] server: don't call reds_stream_free from worker thread context

2011-09-05 Thread Alon Levy

On Mon, Sep 05, 2011 at 03:29:39PM +0200, Gerd Hoffmann wrote:
   Hi,
 
 I verified it still calls reds_stream_free from the worker thread, only
 now the call itself is done in red_channel.c (via red_channel_disconnect
 or something like that), which is called from red_worker.c
 
 Where the code in red_channel.c is now shared for all channel types?
 Hmm.  That makes it a bit harder to change the workflow I guess ...
 

But just to be clear, I think it's better to fix this in monitor, like Daniel
suggested. Still waiting for Anthony to reply to his last email.

 cheers,
   Gerd
 
 ___
 Spice-devel mailing list
 spice-de...@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/spice-devel

[Qemu-devel] required glib version? Re: [PATCH 2/6] Add base64 encoder/decoder

2011-09-05 Thread Gerd Hoffmann


On 08/26/11 17:47, Jan Kiszka wrote:

On 2011-08-26 17:23, Jan Kiszka wrote:



[ using glib base64 decoder ]


Requires glib= 2.12, we are currently at= 2.0, right? Would it be OK
to raise the entry barrier?


In master it currently is = 2.20 due to v9fs_init_worker_threads using 
g_thread_get_initialized which was added in 2.20 according to the docs. 
 Which makes the build fail on rhel-5 (shipping with glib 2.12).


Guess we'll need to clearly define which minimum glib version we want 
require.  And whenever we want apply this to the whole code base or 
allow different minimum requirements for different components.


Requiring glib 2.20 for all components isn't going to fly as we 
certainly want be able to run the qemu guest agent almost everythere. 
Requiring something newer than 2.0 for the qemu emulator might be 
reasonable of there are good reasons (aka useful glib features) though.


Comments?

cheers,
  Gerd

[Qemu-devel] [PATCH] spice: set qxl-ssd.running=true before telling spice to start, RHBZ #733993

2011-09-05 Thread Yonit Halperin

If qxl-ssd.running=true is set after telling spice to start, the spice server
thread can call qxl_send_events while qxl-ssd.running is still false. This 
leads to
assert(d-ssd.running).

Signed-off-by: Yonit Halperin yhalp...@redhat.com
---
Since it looks like the purpose of the assert in qxl_send_event is preventing 
changes
in the guest when the vm is stopped, I think it is not necessary for 
ssd.running to be
exactly synchronized with the spice server status, but just be true before
the spice worker starts.

 ui/spice-display.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/ui/spice-display.c b/ui/spice-display.c
index 683d454..3224f99 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -260,11 +260,12 @@ void qemu_spice_vm_change_state_handler(void *opaque, int 
running, int reason)
 SimpleSpiceDisplay *ssd = opaque;
 
 if (running) {
+ssd-running = true;
 qemu_spice_start(ssd);
 } else {
 qemu_spice_stop(ssd);
+ssd-running = false;
 }
-ssd-running = running;
 }
 
 void qemu_spice_display_init_common(SimpleSpiceDisplay *ssd, DisplayState *ds)
-- 
1.7.4.4

Re: [Qemu-devel] [RFC PATCH 0/5] Add configure flag to disable TCG

2011-09-05 Thread Stefano Stabellini

On Fri, 2 Sep 2011, Anthony Liguori wrote:
 Hi,
 
 There have been a few attempts in the past to allow TCG to be disabled
 at build time.  Recently, Alex made the suggestion that we could do it by 
 using
 the same trick that we used to introduce kvm support.  That involves 
 introducing
 a tcg_enabled() macro that will be (0) if TCG is disabled in the build.
 
 GCC is smart enough to do dead code elimination if it sees an if (0) and the
 result is that if you can do:
 
 if (tcg_enabled()) {
   foo();
 }
 
 and it's more or less equivalent to:
 
 #ifdef CONFIG_TCG
   foo();
 #endif
 
 Without the ugliness that comes from using the preprocessor.  I think this 
 ended
 up being pretty straight forward.  exec.c could use a fair bit of cleanup but
 other than that, this pretty much eliminates all of the TCG code from the 
 build.
 
 This absolutely is going to break non-x86 KVM builds if they use the
 --disable-tcg flag as I haven't tested those yet.  The normal TCG build
 shouldn't be affected at all though.
 
 In principle, the code assumes that you need KVM if you don't have TCG.  Of
 course, some extra logic could be added to allow for Xen if TCG isn't present.

I like the goal if the series very much and compilation with
--disable-tcg --enable-xen works out of the box!


However there are two problems:

- compilation with --disable-kvm --disable-xen (--enable-tcg) breaks on
my box, log attached;

- the automatic filtering on the target list introduced by the first
patch effectively prevents users from enabling xen with --disable-tcg,
unless they also enable kvm. See appended patch.



diff --git a/configure b/configure
index 3f2cf6a..64f85b6 100755
--- a/configure
+++ b/configure
@@ -3148,7 +3148,7 @@ if test $static = no -a $user_pie = yes ; then
   echo QEMU_CFLAGS+=-fpie  libdis-user/config.mak
 fi
 
-kvm_incompatible() {
+virt_incompatible() {
 if test $kvm = yes -a \
   \( $1 = $cpu -o \
   \( $1 = ppcemb -a $cpu = ppc \) -o \
@@ -3158,9 +3158,14 @@ kvm_incompatible() {
   \( $1 = x86_64 -a $cpu = i386   \) -o \
   \( $1 = i386   -a $cpu = x86_64 \) \) ; then
return 1
-else
-   return 0
 fi
+if test $xen = yes -a \ 
+  \( $1 = $cpu -o \
+  \( $1 = x86_64 -a $cpu = i386   \) -o \
+  \( $1 = i386   -a $cpu = x86_64 \) \) ; then
+   return 1
+fi
+return 0
 }
 
 target_list2=
@@ -3219,7 +3224,7 @@ if test $tcg = no; then
 if test $target_softmmu = no; then
continue;
 fi
-if kvm_incompatible $target_arch2; then
+if virt_incompatible $target_arch2; then
continue;
 fi
 fisstabellini@dt02:/local/scratch/sstabellini/qemu$ ./configure --disable-kvm 
--disable-xen --target-list=i386-softmmu
Install prefix/usr/local
BIOS directory/usr/local/share/qemu
binary directory  /usr/local/bin
library directory /usr/local/lib
include directory /usr/local/include
config directory  /usr/local/etc
Manual directory  /usr/local/share/man
ELF interp prefix /usr/gnemul/qemu-%M
Source path   /local/scratch/sstabellini/qemu
C compilergcc
Host C compiler   gcc
CFLAGS-O2 -g 
QEMU_CFLAGS   -Werror -m64 -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE 
-D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -Wstrict-prototypes 
-Wredundant-decls -Wall -Wundef -Wwrite-strings -Wmissing-prototypes 
-fno-strict-aliasing  -fstack-protector-all -Wendif-labels 
-Wmissing-include-dirs -Wempty-body -Wnested-externs -Wformat-security 
-Wformat-y2k -Winit-self -Wignored-qualifiers -Wold-style-declaration 
-Wold-style-definition -Wtype-limits -I/usr/include/libpng12   
-I/usr/local/include/spice-server -I/usr/local/include/pixman-1 
-I/usr/local/include -I/usr/local/include/spice-1  
LDFLAGS   -Wl,--warn-common -m64 -g 
make  make
install   install
pythonpython
smbd  /usr/sbin/smbd
host CPU  x86_64
host big endian   no
target list   i386-softmmu
tcg debug enabled no
Mon debug enabled no
gprof enabled no
sparse enabledno
strip binariesyes
profiler  no
static build  no
-Werror enabled   yes
SDL support   yes
curses supportyes
curl support  no
check support no
mingw32 support   no
Audio drivers oss
Extra audio cards ac97 es1370 sb16 hda
Block whitelist   
Mixer emulation   no
VNC support   yes
VNC TLS support   no
VNC SASL support  no
VNC JPEG support  yes
VNC PNG support   yes
VNC threadno
xen support   no
brlapi supportno
bluez  supportno
Documentation yes
NPTL support  yes
GUEST_BASEyes
PIE user targets  no
vde support   no
Linux AIO support yes
ATTR/XATTR support no
Install blobs yes
KVM support   no
fdt support   no
preadv supportyes
fdatasync yes
madvise   yes
posix_madvise yes
uuid support  yes
vhost-net support yes
Trace backend nop
Trace output file trace-pid
spice support yes
rbd support   no
xfsctl supportno
nss used

Re: [Qemu-devel] [PATCH 5/5] tcg/ppc64: Only one call output register needed for 64 bit hosts

2011-09-05 Thread malc

On Mon, 5 Sep 2011, Stefan Weil wrote:

 The second register is only needed for 32 bit hosts.
 
 Cc: Vassili Karpov av1...@comtv.ru
 Signed-off-by: Stefan Weil w...@mail.berlios.de
 ---
  tcg/ppc64/tcg-target.c |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)
 
 diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
 index d831684..bef7aac 100644
 --- a/tcg/ppc64/tcg-target.c
 +++ b/tcg/ppc64/tcg-target.c
 @@ -130,7 +130,7 @@ static const int tcg_target_call_iarg_regs[] = {
  TCG_REG_R10
  };
  
 -static const int tcg_target_call_oarg_regs[2] = {
 +static const int tcg_target_call_oarg_regs[] = {
  TCG_REG_R3
  };
  

Fine with me. 

-- 
mailto:av1...@comtv.ru

[Qemu-devel] [PATCH 1/2] trace: allow trace events with string arguments

2011-09-05 Thread Stefan Hajnoczi

String arguments are useful for producing human-readable traces without
post-processing (e.g. stderr backend).  Although the simple backend
cannot handles strings all others can.  Strings should be allowed and
the simple backend can be extended to support them.

Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
---
 docs/tracing.txt |8 +++-
 1 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/docs/tracing.txt b/docs/tracing.txt
index 4b27ab0..e130a61 100644
--- a/docs/tracing.txt
+++ b/docs/tracing.txt
@@ -70,11 +70,6 @@ Trace events should use types as follows:
cannot include all user-defined struct declarations and it is therefore
necessary to use void * for pointers to structs.
 
-   Pointers (including char *) cannot be dereferenced easily (or at all) in
-   some trace backends.  If pointers are used, ensure they are meaningful by
-   themselves and do not assume the data they point to will be traced.  Do
-   not pass in string arguments.
-
  * For everything else, use primitive scalar types (char, int, long) with the
appropriate signedness.
 
@@ -185,6 +180,9 @@ source tree.  It may not be as powerful as 
platform-specific or third-party
 trace backends but it is portable.  This is the recommended trace backend
 unless you have specific needs for more advanced backends.
 
+The simple backend currently does not capture string arguments, it simply
+records the char* pointer value instead of the string that is pointed to.
+
  Monitor commands 
 
 * info trace
-- 
1.7.5.4

[Qemu-devel] [PATCH 2/2] MAINTAINERS: add tracing subsystem

2011-09-05 Thread Stefan Hajnoczi

Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
---
 MAINTAINERS |6 ++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 508ea1e..ce189a4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -446,6 +446,12 @@ S: Maintained
 F: slirp/
 T: git://git.kiszka.org/qemu.git queues/slirp
 
+Tracing
+M: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
+S: Maintained
+F: trace/
+T: git://repo.or.cz/qemu/stefanha.git tracing
+
 Usermode Emulation
 --
 BSD user
-- 
1.7.5.4

[Qemu-devel] [PATCH] scsi: fix accounting of writes

2011-09-05 Thread Paolo Bonzini

Writes go through scsi_write_complete at least twice, the first time
to get some data without having actually written anything.  Because
of this, the first time scsi_write_complete is called it will call
bdrv_acct_done and account a read incorrectly.  Fix this by looking
at the aiocb.  I am doing the same in scsi_read_complete for symmetry,
but it is only needed in the (bogus) case of bdrv_aio_readv returning
NULL.

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 hw/scsi-disk.c |   14 --
 1 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index dba1cfa..bdeba14 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -149,9 +149,10 @@ static void scsi_read_complete(void * opaque, int ret)
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r-req.dev);
 int n;
 
-r-req.aiocb = NULL;
-
-bdrv_acct_done(s-bs, r-acct);
+if (r-req.aiocb != NULL) {
+r-req.aiocb = NULL;
+bdrv_acct_done(s-bs, r-acct);
+}
 
 if (ret) {
 if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_READ)) {
@@ -254,9 +255,10 @@ static void scsi_write_complete(void * opaque, int ret)
 SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r-req.dev);
 uint32_t n;
 
-r-req.aiocb = NULL;
-
-bdrv_acct_done(s-bs, r-acct);
+if (r-req.aiocb != NULL) {
+r-req.aiocb = NULL;
+bdrv_acct_done(s-bs, r-acct);
+}
 
 if (ret) {
 if (scsi_handle_rw_error(r, -ret, SCSI_REQ_STATUS_RETRY_WRITE)) {
-- 
1.7.6

[Qemu-devel] [PATCH] qemu-options: Improve help texts for options which depend on configure

2011-09-05 Thread Stefan Weil

* Replace available only by the more common only available.

* Tracing options depend on the configuration of the QEMU executable,
  so clarify the help text for both options.

Cc: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 qemu-options.hx |   10 +-
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/qemu-options.hx b/qemu-options.hx
index 659ecb2..552d41c 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1412,7 +1412,7 @@ qemu linux.img -net nic,macaddr=52:54:00:12:34:56 \
 Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and
 listening for incoming connections on @var{socketpath}. Use GROUP 
@var{groupname}
 and MODE @var{octalmode} to change default ownership and permissions for
-communication port. This option is available only if QEMU has been compiled
+communication port. This option is only available if QEMU has been compiled
 with vde support enabled.
 
 Example:
@@ -2453,13 +2453,13 @@ Specify tracing options.
 Immediately enable events listed in @var{file}.
 The file must contain one event name (as listed in the @var{trace-events} file)
 per line.
-
-This option is only available when using the @var{simple} and @var{stderr}
-tracing backends.
+This option is only available if QEMU has been compiled with
+either @var{simple} or @var{stderr} tracing backend.
 @item file=@var{file}
 Log output traces to @var{file}.
 
-This option is only available when using the @var{simple} tracing backend.
+This option is only available if QEMU has been compiled with
+the @var{simple} tracing backend.
 @end table
 ETEXI
 
-- 
1.7.2.5

[Qemu-devel] [PATCH V12 12/15] hw/9pfs: chown in chroot environment

2011-09-05 Thread M. Mohan Kumar

Add support to do chown in chroot process

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-chroot-worker.c |   18 ++
 hw/9pfs/virtio-9p-chroot.h|1 +
 hw/9pfs/virtio-9p-local.c |9 +
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/hw/9pfs/virtio-9p-chroot-worker.c 
b/hw/9pfs/virtio-9p-chroot-worker.c
index d297b50..8ca4805 100644
--- a/hw/9pfs/virtio-9p-chroot-worker.c
+++ b/hw/9pfs/virtio-9p-chroot-worker.c
@@ -213,6 +213,21 @@ static int chroot_do_chmod(V9fsFileObjectRequest *request)
 return 0;
 }
 
+/*
+ * Change ownership of a file object
+ * Returns 0 on success and -errno on failure
+ */
+static int chroot_do_chown(V9fsFileObjectRequest *request)
+{
+int retval;
+
+retval = lchown(request-path.path, request-data.uid, request-data.gid);
+if (retval  0) {
+return -errno;
+}
+return 0;
+}
+
 static void chroot_daemonize(int chroot_sock)
 {
 sigset_t sigset;
@@ -325,6 +340,9 @@ int v9fs_chroot(FsContext *fs_ctx)
 case T_CHMOD:
 retval = chroot_do_chmod(request);
 break;
+case T_CHOWN:
+retval = chroot_do_chown(request);
+break;
 default:
 retval = -1;
 break;
diff --git a/hw/9pfs/virtio-9p-chroot.h b/hw/9pfs/virtio-9p-chroot.h
index fc7a134..07c6627 100644
--- a/hw/9pfs/virtio-9p-chroot.h
+++ b/hw/9pfs/virtio-9p-chroot.h
@@ -12,6 +12,7 @@
 #define T_REMOVE7
 #define T_RENAME8
 #define T_CHMOD 9
+#define T_CHOWN 10
 
 #define V9FS_FD_VALID INT_MAX
 
diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 50e55ed..673cd44 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -607,16 +607,17 @@ static int local_chown(FsContext *fs_ctx, V9fsPath 
*fs_path, FsCred *credp)
 char buffer[PATH_MAX];
 char *path = fs_path-data;
 
-if ((credp-fc_uid == -1  credp-fc_gid == -1) ||
-(fs_ctx-fs_sm == SM_PASSTHROUGH)) {
+if (fs_ctx-fs_sm != SM_PASSTHROUGH 
+(credp-fc_uid == -1  credp-fc_gid == -1)) {
 return lchown(rpath(fs_ctx, path, buffer), credp-fc_uid,
 credp-fc_gid);
 } else if (fs_ctx-fs_sm == SM_MAPPED) {
 return local_set_xattr(rpath(fs_ctx, path, buffer), credp);
-} else if ((fs_ctx-fs_sm == SM_PASSTHROUGH) ||
-   (fs_ctx-fs_sm == SM_NONE)) {
+} else if (fs_ctx-fs_sm == SM_NONE) {
 return lchown(rpath(fs_ctx, path, buffer), credp-fc_uid,
 credp-fc_gid);
+} else if (fs_ctx-fs_sm == SM_PASSTHROUGH) {
+return passthrough_request(fs_ctx, NULL, path, 0, credp, T_CHOWN);
 }
 return -1;
 }
-- 
1.7.6

[Qemu-devel] [PATCH V12 10/15] hw/9pfs: Move file post creation changes to none security model

2011-09-05 Thread M. Mohan Kumar

After creating a file object, its permission and ownership details are updated
as per 9p client's request for both passthrough and none security model.
But with chrooted environment its not required for passthrough security model.
Move all post file creation changes to none security model.

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-local.c |   25 +
 1 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 3b97f51..68551e2 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -155,23 +155,16 @@ static int local_set_xattr(const char *path, FsCred 
*credp)
 return 0;
 }
 
-static int local_post_create_passthrough(FsContext *fs_ctx, const char *path,
- FsCred *credp)
+static int local_post_create_none(const char *path, FsCred *credp)
 {
-char buffer[PATH_MAX];
+int retval;
 
-if (chmod(rpath(fs_ctx, path, buffer), credp-fc_mode  0)  0) {
+if (chmod(path, credp-fc_mode  0)  0) {
 return -1;
 }
-if (lchown(rpath(fs_ctx, path, buffer), credp-fc_uid,
-credp-fc_gid)  0) {
-/*
- * If we fail to change ownership and if we are
- * using security model none. Ignore the error
- */
-if (fs_ctx-fs_sm != SM_NONE) {
-return -1;
-}
+retval = lchown(path, credp-fc_uid, credp-fc_gid);
+if (retval  0) {
+/* Ignore return value for none security model */
 }
 return 0;
 }
@@ -347,7 +340,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath 
*dir_path,
 if (err == -1) {
 goto out;
 }
-err = local_post_create_passthrough(fs_ctx, path, credp);
+err = local_post_create_none(buffer, credp);
 if (err == -1) {
 serrno = errno;
 goto err_end;
@@ -395,7 +388,7 @@ static int local_mkdir(FsContext *fs_ctx, V9fsPath 
*dir_path,
 if (err == -1) {
 goto out;
 }
-err = local_post_create_passthrough(fs_ctx, path, credp);
+err = local_post_create_none(buffer, credp);
 if (err == -1) {
 serrno = errno;
 goto err_end;
@@ -482,7 +475,7 @@ static int local_open2(FsContext *fs_ctx, V9fsPath 
*dir_path, const char *name,
 err = fd;
 goto out;
 }
-err = local_post_create_passthrough(fs_ctx, path, credp);
+err = local_post_create_none(buffer, credp);
 if (err == -1) {
 serrno = errno;
 goto err_end;
-- 
1.7.6

[Qemu-devel] [PATCH V12 13/15] hw/9pfs: stat in chroot environment

2011-09-05 Thread M. Mohan Kumar


Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-chroot-worker.c |   52 -
 hw/9pfs/virtio-9p-chroot.c|   59 -
 hw/9pfs/virtio-9p-chroot.h|3 ++
 hw/9pfs/virtio-9p-local.c |   30 +++
 4 files changed, 142 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/virtio-9p-chroot-worker.c 
b/hw/9pfs/virtio-9p-chroot-worker.c
index 8ca4805..a2f6dcd 100644
--- a/hw/9pfs/virtio-9p-chroot-worker.c
+++ b/hw/9pfs/virtio-9p-chroot-worker.c
@@ -66,6 +66,29 @@ static void chroot_sendfd(int sockfd, int fd, int fd_valid)
 }
 }
 
+/* Send special information and error status to qemu process */
+static void chroot_sendspecial(int sockfd, void *buff, int size, int error)
+{
+int retval;
+
+/* If there is an error, send NULL buff also set status to error */
+if (error) {
+memset(buff, 0, size);
+}
+do {
+retval = send(sockfd, error, sizeof(error), 0);
+} while (retval  0  errno == EINTR);
+if (retval  0) {
+_exit(1);
+}
+do {
+retval = send(sockfd, buff, size, 0);
+} while (retval  0  errno == EINTR);
+if (retval  0) {
+_exit(1);
+}
+}
+
 /* Read V9fsFileObjectRequest sent by QEMU process */
 static int chroot_read_request(int sockfd, V9fsFileObjectRequest *request)
 {
@@ -267,6 +290,7 @@ int v9fs_chroot(FsContext *fs_ctx)
 pid_t pid;
 uint32_t code;
 int retval, valid_fd;
+char *buff;
 
 if (socketpair(PF_UNIX, SOCK_STREAM, 0, fd_pair)  0) {
 error_report(socketpair %s, strerror(errno));
@@ -346,7 +370,33 @@ int v9fs_chroot(FsContext *fs_ctx)
 default:
 retval = -1;
 break;
+case T_LSTAT:
+buff = g_malloc(request.data.size);
+retval = lstat(request.path.path, (struct stat *)buff);
+if (retval   0) {
+retval = -errno;
+}
+break;
+}
+
+/* Send the results */
+switch (request.data.type) {
+case T_OPEN:
+case T_CREATE:
+case T_MKDIR:
+case T_SYMLINK:
+case T_LINK:
+case T_MKNOD:
+case T_REMOVE:
+case T_RENAME:
+case T_CHMOD:
+case T_CHOWN:
+chroot_sendfd(chroot_sock, retval, valid_fd);
+break;
+case T_LSTAT:
+chroot_sendspecial(chroot_sock, buff, request.data.size, retval);
+g_free(buff);
+break;
 }
-chroot_sendfd(chroot_sock, retval, valid_fd);
 }
 }
diff --git a/hw/9pfs/virtio-9p-chroot.c b/hw/9pfs/virtio-9p-chroot.c
index f5b3abc..3094186 100644
--- a/hw/9pfs/virtio-9p-chroot.c
+++ b/hw/9pfs/virtio-9p-chroot.c
@@ -77,6 +77,37 @@ static int v9fs_receivefd(int sockfd, int *sock_error)
 return -ENFILE; /* Ancillary data sent but not received */
 }
 
+static ssize_t chroot_recv(int sockfd, void *data, size_t size, int flags)
+{
+ssize_t retval;
+do {
+retval = recv(sockfd, data, size, 0);
+} while (retval  0  errno == EINTR);
+return retval;
+}
+
+/*
+ * Return received struct stat on success and -errno on failure.
+ * sock_error is set to 1 whenever there is error in socket IO
+ */
+static int v9fs_special_receive(int sockfd, int *sock_error, char *buff,
+int size)
+{
+int retval, status;
+if (chroot_recv(sockfd, status, sizeof(status), 0) = 0) {
+*sock_error = 1;
+return -EIO;
+}
+
+retval = chroot_recv(sockfd, buff, size, 0);
+if (retval  0) {
+return status;
+} else {
+*sock_error = 1;
+return -EIO;
+}
+}
+
 /*
  * V9fsFileObjectRequest is written into the socket by QEMU process.
  * Then this request is read by chroot process using v9fs_read_request function
@@ -94,7 +125,7 @@ static int v9fs_write_request(int sockfd, 
V9fsFileObjectRequest *request)
 /* Return opened file descriptor on success or -errno on error */
 int v9fs_request(FsContext *fs_ctx, V9fsFileObjectRequest *request)
 {
-int fd, sock_error;
+int fd, sock_error = 0;
 qemu_mutex_lock(fs_ctx-chroot_mutex);
 if (fs_ctx-chroot_socket == -1) {
 goto error;
@@ -114,3 +145,29 @@ error:
 qemu_mutex_unlock(fs_ctx-chroot_mutex);
 return -EIO;
 }
+
+/* Return special information on success or -errno on error */
+int v9fs_special(FsContext *fs_ctx, V9fsFileObjectRequest *request,
+char *buff, int size)
+{
+int retval, sock_error = 0;
+qemu_mutex_lock(fs_ctx-chroot_mutex);
+if (fs_ctx-chroot_socket == -1) {
+goto error;
+}
+if (v9fs_write_request(fs_ctx-chroot_socket, request)  0) {
+goto error;
+}
+retval = v9fs_special_receive(fs_ctx-chroot_socket, sock_error, buff,
+size);
+if (retval  0  sock_error) {
+goto error;
+}
+qemu_mutex_unlock(fs_ctx-chroot_mutex);
+return retval;

[Qemu-devel] [PATCH V12 11/15] hw/9pfs: chmod in chroot environment

2011-09-05 Thread M. Mohan Kumar

Add support to do chmod operation in chroot process.

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-chroot-worker.c |   18 ++
 hw/9pfs/virtio-9p-chroot.h|1 +
 hw/9pfs/virtio-9p-local.c |5 +++--
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/virtio-9p-chroot-worker.c 
b/hw/9pfs/virtio-9p-chroot-worker.c
index 9bb94f2..d297b50 100644
--- a/hw/9pfs/virtio-9p-chroot-worker.c
+++ b/hw/9pfs/virtio-9p-chroot-worker.c
@@ -198,6 +198,21 @@ static int chroot_do_rename(V9fsFileObjectRequest *request)
 return 0;
 }
 
+/*
+ * Change permissions of a file object
+ * Returns 0 on success and -errno on failure
+ */
+static int chroot_do_chmod(V9fsFileObjectRequest *request)
+{
+int retval;
+
+retval = chmod(request-path.path, request-data.mode);
+if (retval  0) {
+return -errno;
+}
+return 0;
+}
+
 static void chroot_daemonize(int chroot_sock)
 {
 sigset_t sigset;
@@ -307,6 +322,9 @@ int v9fs_chroot(FsContext *fs_ctx)
 case T_RENAME:
 retval = chroot_do_rename(request);
 break;
+case T_CHMOD:
+retval = chroot_do_chmod(request);
+break;
 default:
 retval = -1;
 break;
diff --git a/hw/9pfs/virtio-9p-chroot.h b/hw/9pfs/virtio-9p-chroot.h
index fda60af..fc7a134 100644
--- a/hw/9pfs/virtio-9p-chroot.h
+++ b/hw/9pfs/virtio-9p-chroot.h
@@ -11,6 +11,7 @@
 #define T_LINK  6
 #define T_REMOVE7
 #define T_RENAME8
+#define T_CHMOD 9
 
 #define V9FS_FD_VALID INT_MAX
 
diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 68551e2..50e55ed 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -302,9 +302,10 @@ static int local_chmod(FsContext *fs_ctx, V9fsPath 
*fs_path, FsCred *credp)
 
 if (fs_ctx-fs_sm == SM_MAPPED) {
 return local_set_xattr(rpath(fs_ctx, path, buffer), credp);
-} else if ((fs_ctx-fs_sm == SM_PASSTHROUGH) ||
-   (fs_ctx-fs_sm == SM_NONE)) {
+} else if (fs_ctx-fs_sm == SM_NONE) {
 return chmod(rpath(fs_ctx, path, buffer), credp-fc_mode);
+} else {
+return passthrough_request(fs_ctx, NULL, path, 0, credp, T_CHMOD);
 }
 return -1;
 }
-- 
1.7.6

[Qemu-devel] [PATCH V12 05/15] hw/9pfs: Support for opening a file in chroot environment

2011-09-05 Thread M. Mohan Kumar

This patch adds both chroot worker and qemu side support to open a file/
directory in the chroot environment

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-chroot.c |   29 
 hw/9pfs/virtio-9p-chroot.h |2 +-
 hw/9pfs/virtio-9p-local.c  |   79 ++--
 3 files changed, 98 insertions(+), 12 deletions(-)

diff --git a/hw/9pfs/virtio-9p-chroot.c b/hw/9pfs/virtio-9p-chroot.c
index 63de410..f5b3abc 100644
--- a/hw/9pfs/virtio-9p-chroot.c
+++ b/hw/9pfs/virtio-9p-chroot.c
@@ -91,13 +91,26 @@ static int v9fs_write_request(int sockfd, 
V9fsFileObjectRequest *request)
 return 0;
 }
 
-/*
- * This patch adds v9fs_receivefd and v9fs_write_request functions,
- * but there is no caller. To avoid compiler warning message,
- * refer these two functions
- */
-void chroot_dummy(void)
+/* Return opened file descriptor on success or -errno on error */
+int v9fs_request(FsContext *fs_ctx, V9fsFileObjectRequest *request)
 {
-(void)v9fs_receivefd;
-(void)v9fs_write_request;
+int fd, sock_error;
+qemu_mutex_lock(fs_ctx-chroot_mutex);
+if (fs_ctx-chroot_socket == -1) {
+goto error;
+}
+if (v9fs_write_request(fs_ctx-chroot_socket, request)  0) {
+goto error;
+}
+fd = v9fs_receivefd(fs_ctx-chroot_socket, sock_error);
+if (fd  0  sock_error) {
+goto error;
+}
+qemu_mutex_unlock(fs_ctx-chroot_mutex);
+return fd;
+error:
+close(fs_ctx-chroot_socket);
+fs_ctx-chroot_socket = -1;
+qemu_mutex_unlock(fs_ctx-chroot_mutex);
+return -EIO;
 }
diff --git a/hw/9pfs/virtio-9p-chroot.h b/hw/9pfs/virtio-9p-chroot.h
index a817bcf..326238d 100644
--- a/hw/9pfs/virtio-9p-chroot.h
+++ b/hw/9pfs/virtio-9p-chroot.h
@@ -35,6 +35,6 @@ typedef struct V9fsFileObjectRequest
 } V9fsFileObjectRequest;
 
 int v9fs_chroot(FsContext *fs_ctx);
-void chroot_dummy(void);
+int v9fs_request(FsContext *fs_ctx, V9fsFileObjectRequest *or);
 
 #endif /* _QEMU_VIRTIO_9P_CHROOT_H */
diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 7a46e93..a91adb8 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -14,6 +14,9 @@
 #include hw/virtio.h
 #include virtio-9p.h
 #include virtio-9p-xattr.h
+#include qemu_socket.h
+#include fsdev/qemu-fsdev.h
+#include virtio-9p-chroot.h
 #include arpa/inet.h
 #include pwd.h
 #include grp.h
@@ -24,6 +27,63 @@
 #include linux/magic.h
 #include sys/ioctl.h
 
+/* Helper routine to fill V9fsFileObjectRequest structure */
+static int fill_fileobjectrequest(V9fsFileObjectRequest *request,
+const char *oldpath, const char *path, int flags,
+FsCred *credp, int type)
+{
+if (oldpath  strlen(oldpath) = PATH_MAX) {
+return -ENAMETOOLONG;
+}
+/* path can't be NULL */
+if (!path) {
+return -EFAULT;
+}
+
+if (strlen(path) = PATH_MAX) {
+return -ENAMETOOLONG;
+}
+strcpy(request-path.path, path);
+if (oldpath) {
+strcpy(request-path.old_path, oldpath);
+} else {
+request-path.old_path[0] = '\0';
+}
+
+memset(request-data, 0, sizeof(request-data));
+if (credp) {
+request-data.mode = credp-fc_mode;
+request-data.uid = credp-fc_uid;
+request-data.gid = credp-fc_gid;
+request-data.dev = credp-fc_rdev;
+}
+
+request-data.flags = flags;
+request-data.type = type;
+return 0;
+}
+
+static int passthrough_request(FsContext *fs_ctx, const char *old_path,
+const char *path, int flags, FsCred *credp, int type)
+{
+V9fsFileObjectRequest request;
+int retval;
+
+retval = fill_fileobjectrequest(request, old_path, path, flags, credp,
+type);
+if (retval  0) {
+errno = -retval;
+return -1;
+}
+
+retval = v9fs_request(fs_ctx, request);
+if (retval  0) {
+errno = -retval;
+retval = -1;
+}
+return retval;
+}
+
 static int local_lstat(FsContext *fs_ctx, V9fsPath *fs_path, struct stat 
*stbuf)
 {
 int err;
@@ -157,7 +217,11 @@ static int local_open(FsContext *ctx, V9fsPath *fs_path,
 char buffer[PATH_MAX];
 char *path = fs_path-data;
 
-fs-fd = open(rpath(ctx, path, buffer), flags);
+if (ctx-fs_sm == SM_PASSTHROUGH) {
+fs-fd = passthrough_request(ctx, NULL, path, flags, NULL, T_OPEN);
+} else {
+fs-fd = open(rpath(ctx, path, buffer), flags);
+}
 return fs-fd;
 }
 
@@ -167,7 +231,17 @@ static int local_opendir(FsContext *ctx,
 char buffer[PATH_MAX];
 char *path = fs_path-data;
 
-fs-dir = opendir(rpath(ctx, path, buffer));
+if (ctx-fs_sm == SM_PASSTHROUGH) {
+int fd;
+fd = passthrough_request(ctx, NULL, path, O_DIRECTORY, NULL, T_OPEN);
+if (fd  0) {
+return -1;
+}
+fs-dir = fdopendir(fd);
+} else {
+fs-dir = opendir(rpath(ctx, path, buffer));
+}
+
 if (!fs-dir) {

[Qemu-devel] [PATCH V12 04/15] hw/9pfs: qemu interfaces for chroot environment

2011-09-05 Thread M. Mohan Kumar

QEMU side interfaces to communicate with chroot worker process.

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
[mala...@us.ibm.com: Handle when qemu process can not receive fd because
it already reached max fds]
---
 Makefile.objs  |2 +-
 hw/9pfs/virtio-9p-chroot.c |  103 
 2 files changed, 104 insertions(+), 1 deletions(-)
 create mode 100644 hw/9pfs/virtio-9p-chroot.c

diff --git a/Makefile.objs b/Makefile.objs
index 01e9350..fa8a755 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -308,7 +308,7 @@ hw-obj-$(CONFIG_SOUND) += $(sound-obj-y)
 9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-coth.o cofs.o codir.o cofile.o
 9pfs-nested-$(CONFIG_VIRTFS) += coxattr.o virtio-9p-handle.o
 9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-synth.o
-9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-chroot-worker.o
+9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-chroot-worker.o virtio-9p-chroot.o
 
 hw-obj-$(CONFIG_REALLY_VIRTFS) += $(addprefix 9pfs/, $(9pfs-nested-y))
 $(addprefix 9pfs/, $(9pfs-nested-y)): QEMU_CFLAGS+=$(GLIB_CFLAGS)
diff --git a/hw/9pfs/virtio-9p-chroot.c b/hw/9pfs/virtio-9p-chroot.c
new file mode 100644
index 000..63de410
--- /dev/null
+++ b/hw/9pfs/virtio-9p-chroot.c
@@ -0,0 +1,103 @@
+/*
+ * Virtio 9p chroot environment for contained access to exported path
+ * Code handles qemu side interfaces to communicate with chroot worker process
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * M. Mohan Kumar mo...@in.ibm.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the copying file in the top-level directory
+ *
+ */
+
+#include sys/fsuid.h
+#include sys/resource.h
+#include signal.h
+#include qemu_socket.h
+#include qemu-thread.h
+#include qerror.h
+#include virtio-9p.h
+#include virtio-9p-chroot.h
+
+/*
+ * Return received file descriptor on success and -errno on failure.
+ * sock_error is set to 1 whenever there is error in socket IO
+ */
+static int v9fs_receivefd(int sockfd, int *sock_error)
+{
+struct msghdr msg = { };
+struct iovec iov;
+union MsgControl msg_control;
+struct cmsghdr *cmsg;
+int retval, data, fd;
+
+iov.iov_base = data;
+iov.iov_len = sizeof(data);
+
+*sock_error = 0;
+memset(msg, 0, sizeof(msg));
+msg.msg_iov = iov;
+msg.msg_iovlen = 1;
+msg.msg_control = msg_control;
+msg.msg_controllen = sizeof(msg_control);
+
+do {
+retval = recvmsg(sockfd, msg, 0);
+} while (retval  0  errno == EINTR);
+if (retval = 0) {
+*sock_error = 1;
+return -EIO;
+}
+
+/*
+ * data is set to V9FS_FD_VALID, if ancillary data is sent.  If this
+ * request doesn't need ancillary data (fd) or an error occurred,
+ * data is set to negative errno value.
+ */
+if (data != V9FS_FD_VALID) {
+return data;
+}
+
+/*
+ * File descriptor (fd) is sent in the ancillary data. Check if we
+ * indeed received it. One of the reasons to fail to receive it is if
+ * we exceeded the maximum number of file descriptors!
+ */
+for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+if (cmsg-cmsg_len != CMSG_LEN(sizeof(int)) ||
+cmsg-cmsg_level != SOL_SOCKET ||
+cmsg-cmsg_type != SCM_RIGHTS) {
+continue;
+}
+fd = *((int *)CMSG_DATA(cmsg));
+return fd;
+}
+
+return -ENFILE; /* Ancillary data sent but not received */
+}
+
+/*
+ * V9fsFileObjectRequest is written into the socket by QEMU process.
+ * Then this request is read by chroot process using v9fs_read_request function
+ */
+static int v9fs_write_request(int sockfd, V9fsFileObjectRequest *request)
+{
+int retval;
+retval = qemu_write_full(sockfd, request, sizeof(*request));
+if (retval != sizeof(*request)) {
+return -EIO;
+}
+return 0;
+}
+
+/*
+ * This patch adds v9fs_receivefd and v9fs_write_request functions,
+ * but there is no caller. To avoid compiler warning message,
+ * refer these two functions
+ */
+void chroot_dummy(void)
+{
+(void)v9fs_receivefd;
+(void)v9fs_write_request;
+}
-- 
1.7.6

[Qemu-devel] [PATCH V12 02/15] hw/9pfs: Enable CONFIG_THREAD if CONFIG_VIRTFS is enabled

2011-09-05 Thread M. Mohan Kumar

9p Chroot environment needs APIs defined in qemu-thread.c, so enable
CONFIG_THREAD if virtfs is enabled

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 configure |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/configure b/configure
index 1340c33..ad59fcc 100755
--- a/configure
+++ b/configure
@@ -2980,6 +2980,7 @@ fi
 if test $linux = yes ; then
   if test $attr = yes ; then
 echo CONFIG_VIRTFS=y  $config_host_mak
+echo CONFIG_THREAD=y  $config_host_mak
   fi
 fi
 if test $blobs = yes ; then
-- 
1.7.6

[Qemu-devel] [PATCH V12 03/15] hw/9pfs: Provide chroot worker side interfaces

2011-09-05 Thread M. Mohan Kumar

Implement chroot worker side interfaces like sending the file
descriptor to qemu process, reading the object request from socket etc.
Also add chroot main function and other helper routines.

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
[mala...@us.ibm.com: Do not send fd as part of data, instead a special
value is sent as part of data when fd is sent as part of ancillary data]
---
 Makefile.objs |1 +
 fsdev/file-op-9p.h|3 +
 hw/9pfs/virtio-9p-chroot-worker.c |  193 +
 hw/9pfs/virtio-9p-chroot.h|   40 
 hw/9pfs/virtio-9p-device.c|   24 +
 5 files changed, 261 insertions(+), 0 deletions(-)
 create mode 100644 hw/9pfs/virtio-9p-chroot-worker.c
 create mode 100644 hw/9pfs/virtio-9p-chroot.h

diff --git a/Makefile.objs b/Makefile.objs
index 20c9e2b..01e9350 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -308,6 +308,7 @@ hw-obj-$(CONFIG_SOUND) += $(sound-obj-y)
 9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-coth.o cofs.o codir.o cofile.o
 9pfs-nested-$(CONFIG_VIRTFS) += coxattr.o virtio-9p-handle.o
 9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-synth.o
+9pfs-nested-$(CONFIG_VIRTFS) += virtio-9p-chroot-worker.o
 
 hw-obj-$(CONFIG_REALLY_VIRTFS) += $(addprefix 9pfs/, $(9pfs-nested-y))
 $(addprefix 9pfs/, $(9pfs-nested-y)): QEMU_CFLAGS+=$(GLIB_CFLAGS)
diff --git a/fsdev/file-op-9p.h b/fsdev/file-op-9p.h
index 076af22..5547a2f 100644
--- a/fsdev/file-op-9p.h
+++ b/fsdev/file-op-9p.h
@@ -19,6 +19,7 @@
 #include sys/stat.h
 #include sys/uio.h
 #include sys/vfs.h
+#include qemu-thread.h
 
 #define SM_LOCAL_MODE_BITS0600
 #define SM_LOCAL_DIR_MODE_BITS0700
@@ -72,6 +73,8 @@ typedef struct FsContext
 struct extended_ops exops;
 /* fs driver specific data */
 void *private;
+QemuMutex chroot_mutex;
+int chroot_socket;
 } FsContext;
 
 typedef struct V9fsPath {
diff --git a/hw/9pfs/virtio-9p-chroot-worker.c 
b/hw/9pfs/virtio-9p-chroot-worker.c
new file mode 100644
index 000..40a54b3
--- /dev/null
+++ b/hw/9pfs/virtio-9p-chroot-worker.c
@@ -0,0 +1,193 @@
+/*
+ * Virtio 9p chroot environment for contained access to the exported path
+ * Code path handles chroot worker side interfaces
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * M. Mohan Kumar mo...@in.ibm.com
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the copying file in the top-level directory
+ *
+ */
+
+#include sys/fsuid.h
+#include sys/resource.h
+#include signal.h
+#include qemu_socket.h
+#include qemu-thread.h
+#include qerror.h
+#include virtio-9p.h
+#include virtio-9p-chroot.h
+
+/* Send file descriptor and error status to qemu process */
+static void chroot_sendfd(int sockfd, int fd, int fd_valid)
+{
+struct msghdr msg = { };
+struct iovec iov;
+struct cmsghdr *cmsg;
+int retval, data;
+union MsgControl msg_control;
+
+iov.iov_base = data;
+iov.iov_len = sizeof(data);
+
+memset(msg, 0, sizeof(msg));
+msg.msg_iov = iov;
+msg.msg_iovlen = 1;
+/* No ancillary data on error */
+if (!fd_valid) {
+/*
+ * fd is really negative errno if the request failed. Or simply
+ * zero if the request is successful and it doesn't need a file
+ * descriptor.
+ */
+data = fd;
+} else {
+data = V9FS_FD_VALID;
+msg.msg_control = msg_control;
+msg.msg_controllen = sizeof(msg_control);
+
+cmsg = msg_control.cmsg;
+cmsg-cmsg_len = CMSG_LEN(sizeof(fd));
+cmsg-cmsg_level = SOL_SOCKET;
+cmsg-cmsg_type = SCM_RIGHTS;
+memcpy(CMSG_DATA(cmsg), fd, sizeof(fd));
+}
+
+do {
+retval = sendmsg(sockfd, msg, 0);
+} while (retval  0  errno == EINTR);
+if (retval  0) {
+_exit(1);
+}
+if (fd_valid) {
+close(fd);
+}
+}
+
+/* Read V9fsFileObjectRequest sent by QEMU process */
+static int chroot_read_request(int sockfd, V9fsFileObjectRequest *request)
+{
+int retval;
+retval = qemu_read_full(sockfd, request, sizeof(*request));
+if (retval != sizeof(*request)) {
+if (errno == EBADF) {
+_exit(1);
+}
+return -EIO;
+}
+return 0;
+}
+
+/*
+ * Helper routine to open a file
+ */
+static int chroot_do_open(V9fsFileObjectRequest *request)
+{
+int fd;
+fd = open(request-path.path, request-data.flags);
+if (fd  0) {
+fd = -errno;
+}
+return fd;
+}
+
+static void chroot_daemonize(int chroot_sock)
+{
+sigset_t sigset;
+struct rlimit nr_fd;
+int fd;
+
+/* Block all signals for this process */
+sigfillset(sigset);
+sigprocmask(SIG_SETMASK, sigset, NULL);
+
+/* Close other file descriptors */
+getrlimit(RLIMIT_NOFILE, nr_fd);
+for (fd = 0; fd  nr_fd.rlim_cur; fd++) {
+if (fd != chroot_sock) {
+close(fd);
+}
+}
+chdir(/);
+/* Create files with mode as per request */
+

[Qemu-devel] [PATCH 1/2] ptimer: move declarations to ptimer.h

2011-09-05 Thread Paolo Bonzini

Signed-off-by: Paolo Bonzini pbonz...@redhat.com
---
 hw/arm_timer.c|1 +
 hw/etraxfs_timer.c|1 +
 hw/grlib_apbuart.c|1 +
 hw/grlib_gptimer.c|1 +
 hw/lan9118.c  |1 +
 hw/leon3.c|1 +
 hw/lm32_timer.c   |1 +
 hw/mcf5206.c  |1 +
 hw/mcf5208.c  |1 +
 hw/milkymist-sysctl.c |1 +
 hw/musicpal.c |1 +
 hw/ptimer.c   |1 +
 hw/ptimer.h   |   27 +++
 hw/sh_timer.c |1 +
 hw/slavio_timer.c |1 +
 hw/syborg_timer.c |1 +
 hw/xilinx_axidma.c|1 +
 hw/xilinx_timer.c |1 +
 qemu-timer.h  |   13 -
 19 files changed, 44 insertions(+), 13 deletions(-)
 create mode 100644 hw/ptimer.h

diff --git a/hw/arm_timer.c b/hw/arm_timer.c
index 09a4b24..63c19d7 100644
--- a/hw/arm_timer.c
+++ b/hw/arm_timer.c
@@ -9,6 +9,7 @@
 
 #include sysbus.h
 #include qemu-timer.h
+#include ptimer.h
 
 /* Common timer implementation.  */
 
diff --git a/hw/etraxfs_timer.c b/hw/etraxfs_timer.c
index b08e574..0a28c4c 100644
--- a/hw/etraxfs_timer.c
+++ b/hw/etraxfs_timer.c
@@ -24,6 +24,7 @@
 #include sysbus.h
 #include sysemu.h
 #include qemu-timer.h
+#include ptimer.h
 
 #define D(x)
 
diff --git a/hw/grlib_apbuart.c b/hw/grlib_apbuart.c
index c90b810..84d3d7a 100644
--- a/hw/grlib_apbuart.c
+++ b/hw/grlib_apbuart.c
@@ -24,6 +24,7 @@
 
 #include sysbus.h
 #include qemu-char.h
+#include ptimer.h
 
 #include trace.h
 
diff --git a/hw/grlib_gptimer.c b/hw/grlib_gptimer.c
index 85869b9..a0ae1c4 100644
--- a/hw/grlib_gptimer.c
+++ b/hw/grlib_gptimer.c
@@ -24,6 +24,7 @@
 
 #include sysbus.h
 #include qemu-timer.h
+#include ptimer.h
 
 #include trace.h
 
diff --git a/hw/lan9118.c b/hw/lan9118.c
index 73a8661..a562206 100644
--- a/hw/lan9118.c
+++ b/hw/lan9118.c
@@ -11,6 +11,7 @@
 #include net.h
 #include devices.h
 #include sysemu.h
+#include ptimer.h
 /* For crc32 */
 #include zlib.h
 
diff --git a/hw/leon3.c b/hw/leon3.c
index a62a941..24ba46b 100644
--- a/hw/leon3.c
+++ b/hw/leon3.c
@@ -23,6 +23,7 @@
  */
 #include hw.h
 #include qemu-timer.h
+#include ptimer.h
 #include qemu-char.h
 #include sysemu.h
 #include boards.h
diff --git a/hw/lm32_timer.c b/hw/lm32_timer.c
index 49cbb22..e355d80 100644
--- a/hw/lm32_timer.c
+++ b/hw/lm32_timer.c
@@ -25,6 +25,7 @@
 #include sysbus.h
 #include trace.h
 #include qemu-timer.h
+#include ptimer.h
 #include qemu-error.h
 
 #define DEFAULT_FREQUENCY (50*100)
diff --git a/hw/mcf5206.c b/hw/mcf5206.c
index 15d6f22..4ad3805 100644
--- a/hw/mcf5206.c
+++ b/hw/mcf5206.c
@@ -8,6 +8,7 @@
 #include hw.h
 #include mcf.h
 #include qemu-timer.h
+#include ptimer.h
 #include sysemu.h
 
 /* General purpose timer module.  */
diff --git a/hw/mcf5208.c b/hw/mcf5208.c
index 8fe507f..e4c4330 100644
--- a/hw/mcf5208.c
+++ b/hw/mcf5208.c
@@ -8,6 +8,7 @@
 #include hw.h
 #include mcf.h
 #include qemu-timer.h
+#include ptimer.h
 #include sysemu.h
 #include net.h
 #include boards.h
diff --git a/hw/milkymist-sysctl.c b/hw/milkymist-sysctl.c
index 7b2d544..5f8d4ad 100644
--- a/hw/milkymist-sysctl.c
+++ b/hw/milkymist-sysctl.c
@@ -26,6 +26,7 @@
 #include sysemu.h
 #include trace.h
 #include qemu-timer.h
+#include ptimer.h
 #include qemu-error.h
 
 enum {
diff --git a/hw/musicpal.c b/hw/musicpal.c
index 63dd391..1bac24b 100644
--- a/hw/musicpal.c
+++ b/hw/musicpal.c
@@ -14,6 +14,7 @@
 #include boards.h
 #include pc.h
 #include qemu-timer.h
+#include ptimer.h
 #include block.h
 #include flash.h
 #include console.h
diff --git a/hw/ptimer.c b/hw/ptimer.c
index b6cabd5..de7d664 100644
--- a/hw/ptimer.c
+++ b/hw/ptimer.c
@@ -7,6 +7,7 @@
  */
 #include hw.h
 #include qemu-timer.h
+#include ptimer.h
 #include host-utils.h
 
 struct ptimer_state
diff --git a/hw/ptimer.h b/hw/ptimer.h
new file mode 100644
index 000..69cdddc
--- /dev/null
+++ b/hw/ptimer.h
@@ -0,0 +1,27 @@
+/*
+ * General purpose implementation of a simple periodic countdown timer.
+ *
+ * Copyright (c) 2007 CodeSourcery.
+ *
+ * This code is licensed under the GNU LGPL.
+ */
+#ifndef PTIMER_H
+#define PTIMER_H
+
+#include qemu-common.h
+#include qemu-timer.h
+
+/* ptimer.c */
+typedef struct ptimer_state ptimer_state;
+typedef void (*ptimer_cb)(void *opaque);
+
+ptimer_state *ptimer_init(QEMUBH *bh);
+void ptimer_set_period(ptimer_state *s, int64_t period);
+void ptimer_set_freq(ptimer_state *s, uint32_t freq);
+void ptimer_set_limit(ptimer_state *s, uint64_t limit, int reload);
+uint64_t ptimer_get_count(ptimer_state *s);
+void ptimer_set_count(ptimer_state *s, uint64_t count);
+void ptimer_run(ptimer_state *s, int oneshot);
+void ptimer_stop(ptimer_state *s);
+
+#endif
diff --git a/hw/sh_timer.c b/hw/sh_timer.c
index dca3c94..31bf975 100644
--- a/hw/sh_timer.c
+++ b/hw/sh_timer.c
@@ -11,6 +11,7 @@
 #include hw.h
 #include sh.h
 #include qemu-timer.h
+#include ptimer.h
 
 //#define DEBUG_TIMER
 
diff --git a/hw/slavio_timer.c b/hw/slavio_timer.c
index 84449ba..b3a6745

[Qemu-devel] [PATCH V12 06/15] hw/9pfs: Create support in chroot environment

2011-09-05 Thread M. Mohan Kumar

Add both chroot worker  qemu side interfaces to create regular files in
chroot environment

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-chroot-worker.c |   36 
 hw/9pfs/virtio-9p-chroot.h|1 +
 hw/9pfs/virtio-9p-local.c |5 +++--
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/virtio-9p-chroot-worker.c 
b/hw/9pfs/virtio-9p-chroot-worker.c
index 40a54b3..581bfa9 100644
--- a/hw/9pfs/virtio-9p-chroot-worker.c
+++ b/hw/9pfs/virtio-9p-chroot-worker.c
@@ -93,6 +93,36 @@ static int chroot_do_open(V9fsFileObjectRequest *request)
 return fd;
 }
 
+/*
+ * Helper routine to create a file and return the file descriptor
+ */
+static int chroot_do_create(V9fsFileObjectRequest *request)
+{
+uid_t cur_uid;
+gid_t cur_gid;
+int fd = -1;
+
+cur_uid = geteuid();
+cur_gid = getegid();
+
+if (setfsuid(request-data.uid)  0) {
+return -errno;
+}
+if (setfsgid(request-data.gid)  0) {
+fd = -errno;
+goto unset_uid;
+}
+
+fd = open(request-path.path, request-data.flags, request-data.mode);
+if (fd  0) {
+fd = -errno;
+}
+setfsgid(cur_gid);
+unset_uid:
+setfsuid(cur_uid);
+return fd;
+}
+
 static void chroot_daemonize(int chroot_sock)
 {
 sigset_t sigset;
@@ -184,6 +214,12 @@ int v9fs_chroot(FsContext *fs_ctx)
 valid_fd = 1;
 }
 break;
+case T_CREATE:
+retval = chroot_do_create(request);
+if (retval = 0) {
+valid_fd = 1;
+}
+break;
 default:
 retval = -1;
 break;
diff --git a/hw/9pfs/virtio-9p-chroot.h b/hw/9pfs/virtio-9p-chroot.h
index 326238d..d5c3f37 100644
--- a/hw/9pfs/virtio-9p-chroot.h
+++ b/hw/9pfs/virtio-9p-chroot.h
@@ -4,6 +4,7 @@
 #include qemu_socket.h
 /* types for V9fsFileObjectRequest */
 #define T_OPEN  1
+#define T_CREATE2
 
 #define V9FS_FD_VALID INT_MAX
 
diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index a91adb8..4e40fa8 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -474,8 +474,7 @@ static int local_open2(FsContext *fs_ctx, V9fsPath 
*dir_path, const char *name,
 serrno = errno;
 goto err_end;
 }
-} else if ((fs_ctx-fs_sm == SM_PASSTHROUGH) ||
-   (fs_ctx-fs_sm == SM_NONE)) {
+} else if (fs_ctx-fs_sm == SM_NONE) {
 fd = open(rpath(fs_ctx, path, buffer), flags, credp-fc_mode);
 if (fd == -1) {
 err = fd;
@@ -486,6 +485,8 @@ static int local_open2(FsContext *fs_ctx, V9fsPath 
*dir_path, const char *name,
 serrno = errno;
 goto err_end;
 }
+} else if (fs_ctx-fs_sm == SM_PASSTHROUGH) {
+fd = passthrough_request(fs_ctx, NULL, path, flags, credp, T_CREATE);
 }
 err = fd;
 fs-fd = fd;
-- 
1.7.6

[Qemu-devel] [PATCH V12 00/15] virtio-9p: chroot environment for passthrough security model

2011-09-05 Thread M. Mohan Kumar

In passthrough security model, following symbolic links in the server
side could result in TOCTTOU vulnerabilities.
(http://en.wikipedia.org/wiki/Time-of-check-to-time-of-use)

This patchset resolves this issue by creating a dedicated process which
chroots into the share path and all file object access are done in the
chroot environment.

This patchset implements chroot environment, provides necessary functions
that can be used by the passthrough function calls.

Qemu need to be invoked by root user for using virtfs with passthrough
security model (i.e to use chroot() syscall).

Question is: Is running qemu by root user expected and allowed? Some of the
virtfs features can be utilized only if qemu is started by root user (for
example passthrough security model and handle based file driver need root
privilege).

This issue can be resolved by root user starting qemu and spawning a process
with root privilege to do all privileged operations there and main qemu
process dropping its privileges to avoid any security issue in running qemu in
root mode. Privileged operations can be done similar to the chroot patchset.

But how to determine to which user-id(ie non root user id) qemu needs to drop
the privileges? Do we have a common user-id across all distributions/systems
to which qemu process can be dropped down? Also it becomes too complex i.e when
a new feature needing root privilege is added, a process with root privilege
needs to be created to handle this.

So is it allowed to run qemu by root user? If no, is it okay to add the
complexity of adding a root privilege process for each feature that needs root
privilege?

Changes from version V11:
* Rebased on top of latest qemu tree
* Moved chroot process creation into local_init function
* g_malloc/g_free instead qemu_malloc/g_free
* Rename qemu_recv function to chroot_recv

Changes from version V10:
* Added support to do lstat and readlink from chroot process
* Fixed an issue with dealing fds when qemu process reached maxfds limit

Changes from version V9:
* Error handling in special file object creation in virtio-9p-local.c

Changes from version V8:
* Make chmod and chown also operate under chroot process
* Check for invalid path requests, minor cleanups

Changes from version V7:
* Add two chroot methods remove and rename
* Minor cleanups like consolidating functions

Changes from version V6:
* Send only fd/errno in socket operations instead of FdInfo structure
* Minor cleanups

Changes from version V5:
* Return errno on failure instead of setting errno
* Minor cleanups like updated comments, enable CONFIG_THREAD if
  CONFIG_VIRTFS is enabled

Changes from version V4:
* Avoid using malloc/free inside chroot process
* Seperate chroot server and client functions

Changes from version V3
* Return EIO incase of socket read/write fail instead of exiting
* Changed data types as suggested by Blue Swirl
* Chroot process reports error through qemu process

Changes from version V2
* Treat socket IO errors as fatal, ie qemu will exit
* Split patchset based on chroot side (server) and qemu side(client)
  functionalities

M. Mohan Kumar (15):
  Implement qemu_read_full
  virtio-9p: Enable CONFIG_THREAD if CONFIG_VIRTFS is enabled
  virtio-9p: Provide chroot worker side interfaces
  virtio-9p: qemu interfaces for chroot environment
  virtio-9p: Support for opening a file in chroot environment
  virtio-9p: Create support in chroot environment
  virtio-9p: Creating special files in chroot environment
  virtio-9p: Removing file or directory in chroot environment
  virtio-9p: Rename in chroot environment
  virtio-9p: Move file post creation changes to none security model
  virtio-9p: chmod in chroot environment
  virtio-9p: chown in chroot environment
  virtio-9p: stat in chroot environment
  virtio-9p: readlink in chroot environment
  virtio-9p: Chroot environment for other functions

 Makefile.objs |1 +
 configure |1 +
 fsdev/file-op-9p.h|3 +
 hw/9pfs/virtio-9p-chroot-worker.c |  413 +
 hw/9pfs/virtio-9p-chroot.c|  173 
 hw/9pfs/virtio-9p-chroot.h|   54 +
 hw/9pfs/virtio-9p-device.c|1 +
 hw/9pfs/virtio-9p-local.c |  277 -
 osdep.c   |   32 +++
 qemu-common.h |2 +
 10 files changed, 907 insertions(+), 50 deletions(-)
 create mode 100644 hw/9pfs/virtio-9p-chroot-worker.c
 create mode 100644 hw/9pfs/virtio-9p-chroot.c
 create mode 100644 hw/9pfs/virtio-9p-chroot.h

-- 
1.7.5.4

[Qemu-devel] [PATCH V12 07/15] hw/9pfs: Creating special files in chroot environment

2011-09-05 Thread M. Mohan Kumar

Add both chroot worker and qemu side interfaces to create special files
(directory, device nodes, links and symbolic links)

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-chroot-worker.c |   52 +
 hw/9pfs/virtio-9p-chroot.h|5 +++
 hw/9pfs/virtio-9p-local.c |   37 +-
 3 files changed, 75 insertions(+), 19 deletions(-)

diff --git a/hw/9pfs/virtio-9p-chroot-worker.c 
b/hw/9pfs/virtio-9p-chroot-worker.c
index 581bfa9..10f290d 100644
--- a/hw/9pfs/virtio-9p-chroot-worker.c
+++ b/hw/9pfs/virtio-9p-chroot-worker.c
@@ -123,6 +123,52 @@ unset_uid:
 return fd;
 }
 
+/*
+ * Create directory, symbolic link, link, device node and regular files
+ * Similar to create, but it does not return the fd of created object
+ * Returns 0 as file descriptor on success and -errno on failure
+ */
+static int chroot_do_create_special(V9fsFileObjectRequest *request)
+{
+int cur_uid, cur_gid;
+int retval = -1;
+
+cur_uid = geteuid();
+cur_gid = getegid();
+
+if (setfsuid(request-data.uid)  0) {
+return -errno;
+}
+if (setfsgid(request-data.gid)  0) {
+retval = -errno;
+goto unset_uid;
+}
+
+switch (request-data.type) {
+case T_MKDIR:
+retval = mkdir(request-path.path, request-data.mode);
+break;
+case T_SYMLINK:
+retval = symlink(request-path.old_path, request-path.path);
+break;
+case T_LINK:
+retval = link(request-path.old_path, request-path.path);
+break;
+default:
+retval = mknod(request-path.path, request-data.mode,
+request-data.dev);
+break;
+}
+
+if (retval  0) {
+retval = -errno;
+}
+setfsgid(cur_gid);
+unset_uid:
+setfsuid(cur_uid);
+return retval;
+}
+
 static void chroot_daemonize(int chroot_sock)
 {
 sigset_t sigset;
@@ -220,6 +266,12 @@ int v9fs_chroot(FsContext *fs_ctx)
 valid_fd = 1;
 }
 break;
+case T_MKDIR:
+case T_SYMLINK:
+case T_LINK:
+case T_MKNOD:
+retval = chroot_do_create_special(request);
+break;
 default:
 retval = -1;
 break;
diff --git a/hw/9pfs/virtio-9p-chroot.h b/hw/9pfs/virtio-9p-chroot.h
index d5c3f37..9075361 100644
--- a/hw/9pfs/virtio-9p-chroot.h
+++ b/hw/9pfs/virtio-9p-chroot.h
@@ -5,6 +5,10 @@
 /* types for V9fsFileObjectRequest */
 #define T_OPEN  1
 #define T_CREATE2
+#define T_MKDIR 3
+#define T_MKNOD 4
+#define T_SYMLINK   5
+#define T_LINK  6
 
 #define V9FS_FD_VALID INT_MAX
 
@@ -37,5 +41,6 @@ typedef struct V9fsFileObjectRequest
 
 int v9fs_chroot(FsContext *fs_ctx);
 int v9fs_request(FsContext *fs_ctx, V9fsFileObjectRequest *or);
+int v9fs_create_special(FsContext *fs_ctx, V9fsFileObjectRequest *request);
 
 #endif /* _QEMU_VIRTIO_9P_CHROOT_H */
diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 4e40fa8..86eb81a 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -341,8 +341,7 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath 
*dir_path,
 serrno = errno;
 goto err_end;
 }
-} else if ((fs_ctx-fs_sm == SM_PASSTHROUGH) ||
-   (fs_ctx-fs_sm == SM_NONE)) {
+} else if (fs_ctx-fs_sm == SM_NONE) {
 err = mknod(rpath(fs_ctx, path, buffer), credp-fc_mode,
 credp-fc_rdev);
 if (err == -1) {
@@ -353,6 +352,8 @@ static int local_mknod(FsContext *fs_ctx, V9fsPath 
*dir_path,
 serrno = errno;
 goto err_end;
 }
+} else {
+err = passthrough_request(fs_ctx, NULL, path, 0, credp, T_MKNOD);
 }
 goto out;
 
@@ -389,8 +390,7 @@ static int local_mkdir(FsContext *fs_ctx, V9fsPath 
*dir_path,
 serrno = errno;
 goto err_end;
 }
-} else if ((fs_ctx-fs_sm == SM_PASSTHROUGH) ||
-   (fs_ctx-fs_sm == SM_NONE)) {
+} else if (fs_ctx-fs_sm == SM_NONE) {
 err = mkdir(rpath(fs_ctx, path, buffer), credp-fc_mode);
 if (err == -1) {
 goto out;
@@ -400,6 +400,8 @@ static int local_mkdir(FsContext *fs_ctx, V9fsPath 
*dir_path,
 serrno = errno;
 goto err_end;
 }
+} else {
+err = passthrough_request(fs_ctx, NULL, path, 0, credp, T_MKDIR);
 }
 goto out;
 
@@ -544,25 +546,17 @@ static int local_symlink(FsContext *fs_ctx, const char 
*oldpath,
 serrno = errno;
 goto err_end;
 }
-} else if ((fs_ctx-fs_sm == SM_PASSTHROUGH) ||
-   (fs_ctx-fs_sm == SM_NONE)) {
+} else if (fs_ctx-fs_sm == SM_NONE) {
 err = symlink(oldpath, rpath(fs_ctx, newpath, buffer));
 if (err) {
 goto out;
 }
 err = lchown(rpath(fs_ctx, newpath, buffer), credp-fc_uid,
  credp-fc_gid);
-

[Qemu-devel] [PATCH V12 08/15] hw/9pfs: Removing file or directory in chroot environment

2011-09-05 Thread M. Mohan Kumar

Support for removing file or directory in chroot environment. Add
interfaces to remove file/directory in chroot worker and qemu side.

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-chroot-worker.c |   18 ++
 hw/9pfs/virtio-9p-chroot.h|1 +
 hw/9pfs/virtio-9p-local.c |4 
 3 files changed, 23 insertions(+), 0 deletions(-)

diff --git a/hw/9pfs/virtio-9p-chroot-worker.c 
b/hw/9pfs/virtio-9p-chroot-worker.c
index 10f290d..f269c7b 100644
--- a/hw/9pfs/virtio-9p-chroot-worker.c
+++ b/hw/9pfs/virtio-9p-chroot-worker.c
@@ -169,6 +169,21 @@ unset_uid:
 return retval;
 }
 
+/*
+ * Remove a file object
+ * Returns 0 on success and -errno on failure
+ */
+static int chroot_do_remove(V9fsFileObjectRequest *request)
+{
+int retval;
+
+retval = remove(request-path.path);
+if (retval  0) {
+return -errno;
+}
+return 0;
+}
+
 static void chroot_daemonize(int chroot_sock)
 {
 sigset_t sigset;
@@ -272,6 +287,9 @@ int v9fs_chroot(FsContext *fs_ctx)
 case T_MKNOD:
 retval = chroot_do_create_special(request);
 break;
+case T_REMOVE:
+retval = chroot_do_remove(request);
+break;
 default:
 retval = -1;
 break;
diff --git a/hw/9pfs/virtio-9p-chroot.h b/hw/9pfs/virtio-9p-chroot.h
index 9075361..9d69739 100644
--- a/hw/9pfs/virtio-9p-chroot.h
+++ b/hw/9pfs/virtio-9p-chroot.h
@@ -9,6 +9,7 @@
 #define T_MKNOD 4
 #define T_SYMLINK   5
 #define T_LINK  6
+#define T_REMOVE7
 
 #define V9FS_FD_VALID INT_MAX
 
diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 86eb81a..9d5354a 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -637,6 +637,10 @@ static int local_utimensat(FsContext *s, V9fsPath *fs_path,
 static int local_remove(FsContext *ctx, const char *path)
 {
 char buffer[PATH_MAX];
+
+if (ctx-fs_sm == SM_PASSTHROUGH) {
+return passthrough_request(ctx, NULL, path, 0, NULL, T_REMOVE);
+}
 return remove(rpath(ctx, path, buffer));
 }
 
-- 
1.7.6

[Qemu-devel] [PATCH V12 01/15] Implement qemu_read_full

2011-09-05 Thread M. Mohan Kumar

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 osdep.c   |   32 
 qemu-common.h |2 ++
 2 files changed, 34 insertions(+), 0 deletions(-)

diff --git a/osdep.c b/osdep.c
index 56e6963..5a4d670 100644
--- a/osdep.c
+++ b/osdep.c
@@ -126,6 +126,38 @@ ssize_t qemu_write_full(int fd, const void *buf, size_t 
count)
 }
 
 /*
+ * A variant of read(2) which handles interrupted read.
+ * Simlar to qemu_write_full function
+ *
+ * Return the number of bytes read.
+ *
+ * This function does not work with non-blocking fd's.
+ * errno is set if fewer than `count' bytes are read because of any
+ * error
+ */
+ssize_t qemu_read_full(int fd, void *buf, size_t count)
+{
+ssize_t ret = 0;
+ssize_t total = 0;
+
+while (count) {
+ret = read(fd, buf, count);
+if (ret = 0) {
+if (errno == EINTR) {
+continue;
+}
+break;
+}
+
+count -= ret;
+buf += ret;
+total += ret;
+}
+
+return total;
+}
+
+/*
  * Opens a socket with FD_CLOEXEC set
  */
 int qemu_socket(int domain, int type, int protocol)
diff --git a/qemu-common.h b/qemu-common.h
index 404c421..d6aabd2 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -189,6 +189,8 @@ void qemu_mutex_unlock_iothread(void);
 int qemu_open(const char *name, int flags, ...);
 ssize_t qemu_write_full(int fd, const void *buf, size_t count)
 QEMU_WARN_UNUSED_RESULT;
+ssize_t qemu_read_full(int fd, void *buf, size_t count)
+QEMU_WARN_UNUSED_RESULT;
 void qemu_set_cloexec(int fd);
 
 #ifndef _WIN32
-- 
1.7.6

[Qemu-devel] [PATCH V12 09/15] hw/9pfs: Rename in chroot environment

2011-09-05 Thread M. Mohan Kumar

Support renaming a file or directory in chroot envirnoment. Add
interfaces for renaming in chroot worker and qemu side.

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-chroot-worker.c |   17 +
 hw/9pfs/virtio-9p-chroot.h|1 +
 hw/9pfs/virtio-9p-local.c |3 +++
 3 files changed, 21 insertions(+), 0 deletions(-)

diff --git a/hw/9pfs/virtio-9p-chroot-worker.c 
b/hw/9pfs/virtio-9p-chroot-worker.c
index f269c7b..9bb94f2 100644
--- a/hw/9pfs/virtio-9p-chroot-worker.c
+++ b/hw/9pfs/virtio-9p-chroot-worker.c
@@ -184,6 +184,20 @@ static int chroot_do_remove(V9fsFileObjectRequest *request)
 return 0;
 }
 
+/*
+ * Rename a file object
+ */
+static int chroot_do_rename(V9fsFileObjectRequest *request)
+{
+int retval;
+
+retval = rename(request-path.old_path, request-path.path);
+if (retval  0) {
+return -errno;
+}
+return 0;
+}
+
 static void chroot_daemonize(int chroot_sock)
 {
 sigset_t sigset;
@@ -290,6 +304,9 @@ int v9fs_chroot(FsContext *fs_ctx)
 case T_REMOVE:
 retval = chroot_do_remove(request);
 break;
+case T_RENAME:
+retval = chroot_do_rename(request);
+break;
 default:
 retval = -1;
 break;
diff --git a/hw/9pfs/virtio-9p-chroot.h b/hw/9pfs/virtio-9p-chroot.h
index 9d69739..fda60af 100644
--- a/hw/9pfs/virtio-9p-chroot.h
+++ b/hw/9pfs/virtio-9p-chroot.h
@@ -10,6 +10,7 @@
 #define T_SYMLINK   5
 #define T_LINK  6
 #define T_REMOVE7
+#define T_RENAME8
 
 #define V9FS_FD_VALID INT_MAX
 
diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 9d5354a..3b97f51 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -602,6 +602,9 @@ static int local_rename(FsContext *ctx, const char *oldpath,
 {
 char buffer[PATH_MAX], buffer1[PATH_MAX];
 
+if (ctx-fs_sm == SM_PASSTHROUGH) {
+return passthrough_request(ctx, oldpath, newpath, 0, NULL, T_RENAME);
+}
 return rename(rpath(ctx, oldpath, buffer), rpath(ctx, newpath, buffer1));
 }
 
-- 
1.7.6

[Qemu-devel] [PATCH V12 15/15] hw/9pfs: Chroot environment for other functions

2011-09-05 Thread M. Mohan Kumar

Add chroot functionality for system calls that can operate on a file using
relative directory file descriptor.

Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-local.c |   41 +++--
 1 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index ffef8a2..c7cceb5 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -628,7 +628,25 @@ static int local_truncate(FsContext *ctx, V9fsPath 
*fs_path, off_t size)
 char buffer[PATH_MAX];
 char *path = fs_path-data;
 
-return truncate(rpath(ctx, path, buffer), size);
+if (ctx-fs_sm == SM_PASSTHROUGH) {
+int fd, retval, serrno;
+fd = passthrough_request(ctx, NULL, path, O_RDWR, NULL, T_OPEN);
+if (fd  0) {
+errno = -fd;
+return -1;
+}
+retval = ftruncate(fd, size);
+if (retval  0) {
+serrno = errno;
+}
+close(fd);
+if (retval  0) {
+errno = serrno;
+}
+return retval;
+} else {
+return truncate(rpath(ctx, path, buffer), size);
+}
 }
 
 static int local_rename(FsContext *ctx, const char *oldpath,
@@ -668,8 +686,27 @@ static int local_utimensat(FsContext *s, V9fsPath *fs_path,
 char buffer[PATH_MAX];
 char *path = fs_path-data;
 
-return qemu_utimensat(AT_FDCWD, rpath(s, path, buffer), buf,
+if (s-fs_sm == SM_PASSTHROUGH) {
+int fd, retval, serrno = 0;
+fd = passthrough_request(s, NULL, path,
+O_RDONLY | O_NONBLOCK | O_NOFOLLOW, NULL, T_OPEN);
+if (fd  0) {
+errno = -fd;
+return -1;
+}
+retval = futimens(fd, buf);
+if (retval  0) {
+serrno = errno;
+}
+close(fd);
+if (retval  0) {
+errno = serrno;
+}
+return retval;
+} else {
+return qemu_utimensat(AT_FDCWD, rpath(s, path, buffer), buf,
   AT_SYMLINK_NOFOLLOW);
+}
 }
 
 static int local_remove(FsContext *ctx, const char *path)
-- 
1.7.6

[Qemu-devel] [PATCH V12 14/15] hw/9pfs: readlink in chroot environment

2011-09-05 Thread M. Mohan Kumar


Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
---
 hw/9pfs/virtio-9p-chroot-worker.c |   17 ++---
 hw/9pfs/virtio-9p-chroot.h|1 +
 hw/9pfs/virtio-9p-local.c |   14 --
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/hw/9pfs/virtio-9p-chroot-worker.c 
b/hw/9pfs/virtio-9p-chroot-worker.c
index a2f6dcd..3040d98 100644
--- a/hw/9pfs/virtio-9p-chroot-worker.c
+++ b/hw/9pfs/virtio-9p-chroot-worker.c
@@ -367,9 +367,6 @@ int v9fs_chroot(FsContext *fs_ctx)
 case T_CHOWN:
 retval = chroot_do_chown(request);
 break;
-default:
-retval = -1;
-break;
 case T_LSTAT:
 buff = g_malloc(request.data.size);
 retval = lstat(request.path.path, (struct stat *)buff);
@@ -377,6 +374,19 @@ int v9fs_chroot(FsContext *fs_ctx)
 retval = -errno;
 }
 break;
+case T_READLINK:
+buff = g_malloc(request.data.size);
+retval = readlink(request.path.path, buff, request.data.size);
+if (retval  0) {
+retval = -errno;
+} else {
+buff[retval] = '\0';
+retval = 0;
+}
+break;
+default:
+retval = -1;
+break;
 }
 
 /* Send the results */
@@ -394,6 +404,7 @@ int v9fs_chroot(FsContext *fs_ctx)
 chroot_sendfd(chroot_sock, retval, valid_fd);
 break;
 case T_LSTAT:
+case T_READLINK:
 chroot_sendspecial(chroot_sock, buff, request.data.size, retval);
 g_free(buff);
 break;
diff --git a/hw/9pfs/virtio-9p-chroot.h b/hw/9pfs/virtio-9p-chroot.h
index 9ed3f4d..ebf04b5 100644
--- a/hw/9pfs/virtio-9p-chroot.h
+++ b/hw/9pfs/virtio-9p-chroot.h
@@ -14,6 +14,7 @@
 #define T_CHMOD 9
 #define T_CHOWN 10
 #define T_LSTAT 11
+#define T_READLINK  12
 
 #define V9FS_FD_VALID INT_MAX
 
diff --git a/hw/9pfs/virtio-9p-local.c b/hw/9pfs/virtio-9p-local.c
index 13c93a5..ffef8a2 100644
--- a/hw/9pfs/virtio-9p-local.c
+++ b/hw/9pfs/virtio-9p-local.c
@@ -99,6 +99,12 @@ static int passthrough_special_request(FsContext *fs_ctx, 
const char *path,
 request.data.size = size;
 if (type == T_LSTAT) {
 retval = v9fs_special(fs_ctx, request, buff, size);
+} else if (type == T_READLINK) {
+retval = v9fs_special(fs_ctx, request, buff, size);
+/* readlink needs to return the number of bytes placed in buff */
+if (retval = 0) {
+retval = strlen(buff);
+}
 } else {
 return -1;
 }
@@ -206,6 +212,11 @@ static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath 
*fs_path,
 char buffer[PATH_MAX];
 char *path = fs_path-data;
 
+if (fs_ctx-fs_sm == SM_PASSTHROUGH) {
+return passthrough_special_request(fs_ctx, path, buf, bufsz,
+T_READLINK);
+}
+
 if (fs_ctx-fs_sm == SM_MAPPED) {
 int fd;
 fd = open(rpath(fs_ctx, path, buffer), O_RDONLY);
@@ -217,8 +228,7 @@ static ssize_t local_readlink(FsContext *fs_ctx, V9fsPath 
*fs_path,
 } while (tsize == -1  errno == EINTR);
 close(fd);
 return tsize;
-} else if ((fs_ctx-fs_sm == SM_PASSTHROUGH) ||
-   (fs_ctx-fs_sm == SM_NONE)) {
+} else if (fs_ctx-fs_sm == SM_NONE) {
 tsize = readlink(rpath(fs_ctx, path, buffer), buf, bufsz);
 }
 return tsize;
-- 
1.7.6

Re: [Qemu-devel] [PATCH V12 01/15] Implement qemu_read_full

2011-09-05 Thread malc

On Mon, 5 Sep 2011, M. Mohan Kumar wrote:

 Signed-off-by: M. Mohan Kumar mo...@in.ibm.com
 ---
  osdep.c   |   32 
  qemu-common.h |2 ++
  2 files changed, 34 insertions(+), 0 deletions(-)
 
 diff --git a/osdep.c b/osdep.c
 index 56e6963..5a4d670 100644
 --- a/osdep.c
 +++ b/osdep.c
 @@ -126,6 +126,38 @@ ssize_t qemu_write_full(int fd, const void *buf, size_t 
 count)
  }
  
  /*
 + * A variant of read(2) which handles interrupted read.
 + * Simlar to qemu_write_full function
 + *
 + * Return the number of bytes read.
 + *
 + * This function does not work with non-blocking fd's.
 + * errno is set if fewer than `count' bytes are read because of any
 + * error
 + */
 +ssize_t qemu_read_full(int fd, void *buf, size_t count)
 +{
 +ssize_t ret = 0;
 +ssize_t total = 0;
 +
 +while (count) {
 +ret = read(fd, buf, count);
 +if (ret = 0) {
 +if (errno == EINTR) {

ret == 0 is not an error so here the stale value of errno is checked,
iow this is wrong and recipe for an endless loop.

 +continue;
 +}
 +break;
 +}
 +
 +count -= ret;
 +buf += ret;
 +total += ret;
 +}
 +
 +return total;
 +}
 +
 +/*
   * Opens a socket with FD_CLOEXEC set
   */
  int qemu_socket(int domain, int type, int protocol)
 diff --git a/qemu-common.h b/qemu-common.h
 index 404c421..d6aabd2 100644
 --- a/qemu-common.h
 +++ b/qemu-common.h
 @@ -189,6 +189,8 @@ void qemu_mutex_unlock_iothread(void);
  int qemu_open(const char *name, int flags, ...);
  ssize_t qemu_write_full(int fd, const void *buf, size_t count)
  QEMU_WARN_UNUSED_RESULT;
 +ssize_t qemu_read_full(int fd, void *buf, size_t count)
 +QEMU_WARN_UNUSED_RESULT;
  void qemu_set_cloexec(int fd);
  
  #ifndef _WIN32
 

-- 
mailto:av1...@comtv.ru

Re: [Qemu-devel] qemu segfaults at start

2011-09-05 Thread Mulyadi Santosa

On 05/09/2011, octane indice oct...@alinto.com wrote:
 then:
 qemu disk.img
 Segmentation fault

how about invoking it as:
qemu -hda disk.img
?
does that make any difference? perhaps adding -S too so we could find
out whether it reach the very initial point.

-- 
regards,

Mulyadi Santosa
Freelance Linux trainer and consultant

blog: the-hydra.blogspot.com
training: mulyaditraining.blogspot.com

Re: [Qemu-devel] [PATCH 7/9] openpic: avoid a warning from clang analyzer

2011-09-05 Thread Blue Swirl

On Mon, Sep 5, 2011 at 6:48 AM, Paolo Bonzini pbonz...@redhat.com wrote:
 On 09/04/2011 05:52 PM, Blue Swirl wrote:

 Avoid this warning by clang analyzer by defining a default case:
 /src/qemu/hw/openpic.c:477:5: warning: Undefined or garbage value
 returned to caller
     return retval;

 Signed-off-by: Blue Swirlblauwir...@gmail.com
 ---
  hw/openpic.c |    1 +
  1 files changed, 1 insertions(+), 0 deletions(-)

 diff --git a/hw/openpic.c b/hw/openpic.c
 index 26c96e2..4b883ac 100644
 --- a/hw/openpic.c
 +++ b/hw/openpic.c
 @@ -469,6 +469,7 @@ static inline uint32_t read_IRQreg (openpic_t
 *opp, int n_IRQ, uint32_t reg)
      case IRQ_IPVP:
          retval = opp-src[n_IRQ].ipvp;
          break;
 +    default:
      case IRQ_IDE:
          retval = opp-src[n_IRQ].ide;
          break;

 Looks wrong, perhaps it should return 0?

The only possible values are IRQ_IDE and IRQ_IPVP.

The function is actually baroque, it's as easy to use
read_IRQreg(opp, IRQ_DBL0 + n_dbl, IRQ_IPVP);
as the shorter
opp-src[IRQ_DBL0 + n_dbl].ipvp;

The reason seems to be that write_IRQreg is more complex. I'd replace
both with {read,write}_{ide,ipvp} without the switch.

Re: [Qemu-devel] [PATCH] [SPARC] Gdbstub: Fix back-trace on SPARC32

2011-09-05 Thread Blue Swirl

On Mon, Sep 5, 2011 at 9:33 AM, Fabien Chouteau chout...@adacore.com wrote:
 On 03/09/2011 11:25, Blue Swirl wrote:
 On Thu, Sep 1, 2011 at 2:17 PM, Fabien Chouteau chout...@adacore.com wrote:
 Gdb expects all registers windows to be flushed in ram, which is not the 
 case
 in Qemu. Therefore the back-trace generation doesn't work. This patch adds a
 function to handle reads/writes in stack frames as if windows were flushed.

 Signed-off-by: Fabien Chouteau chout...@adacore.com
 ---
  gdbstub.c             |   10 --
  target-sparc/cpu.h    |    7 
  target-sparc/helper.c |   85 
 +
  3 files changed, 99 insertions(+), 3 deletions(-)

 diff --git a/gdbstub.c b/gdbstub.c
 index 3b87c27..85d5ad7 100644
 --- a/gdbstub.c
 +++ b/gdbstub.c
 @@ -41,6 +41,9 @@
  #include qemu_socket.h
  #include kvm.h

 +#ifndef TARGET_CPU_MEMORY_RW_DEBUG
 +#define TARGET_CPU_MEMORY_RW_DEBUG cpu_memory_rw_debug

 These days, inline functions are preferred over macros.


 This is to allow target-specific implementation of the function.

That can be done with inline functions too.

 +#endif

  enum {
     GDB_SIGNAL_0 = 0,
 @@ -2013,7 +2016,7 @@ static int gdb_handle_packet(GDBState *s, const char 
 *line_buf)
         if (*p == ',')
             p++;
         len = strtoull(p, NULL, 16);
 -        if (cpu_memory_rw_debug(s-g_cpu, addr, mem_buf, len, 0) != 0) {
 +        if (TARGET_CPU_MEMORY_RW_DEBUG(s-g_cpu, addr, mem_buf, len, 0) != 
 0) {

 cpu_memory_rw_debug() could remain unwrapped with a generic function
 like cpu_gdb_sync_memory() which gdbstub should explicitly call.

 Maybe the lazy condition codes etc. could be handled in similar way,
 cpu_gdb_sync_registers().


 Excuse me, I don't understand here.

cpu_gdb_{read,write}_register needs to force calculation of lazy
condition codes. On Sparc this is handled by cpu_get_psr(), so it is
not explicit.

             put_packet (s, E14);
         } else {
             memtohex(buf, mem_buf, len);
 @@ -2028,10 +2031,11 @@ static int gdb_handle_packet(GDBState *s, const 
 char *line_buf)
         if (*p == ':')
             p++;
         hextomem(mem_buf, p, len);
 -        if (cpu_memory_rw_debug(s-g_cpu, addr, mem_buf, len, 1) != 0)
 +        if (TARGET_CPU_MEMORY_RW_DEBUG(s-g_cpu, addr, mem_buf, len, 1) != 
 0) {
             put_packet(s, E14);
 -        else
 +        } else {
             put_packet(s, OK);
 +        }
         break;
     case 'p':
         /* Older gdb are really dumb, and don't use 'g' if 'p' is 
 avaialable.
 diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
 index 8654f26..3f76eaf 100644
 --- a/target-sparc/cpu.h
 +++ b/target-sparc/cpu.h
 @@ -495,6 +495,13 @@ int cpu_sparc_handle_mmu_fault(CPUSPARCState *env1, 
 target_ulong address, int rw
  target_ulong mmu_probe(CPUSPARCState *env, target_ulong address, int 
 mmulev);
  void dump_mmu(FILE *f, fprintf_function cpu_fprintf, CPUState *env);

 +#if !defined(TARGET_SPARC64)
 +int sparc_cpu_memory_rw_debug(CPUState *env, target_ulong addr,
 +                              uint8_t *buf, int len, int is_write);
 +#define TARGET_CPU_MEMORY_RW_DEBUG sparc_cpu_memory_rw_debug
 +#endif
 +
 +
  /* translate.c */
  void gen_intermediate_code_init(CPUSPARCState *env);

 diff --git a/target-sparc/helper.c b/target-sparc/helper.c
 index 1fe1f07..2cf4e8b 100644
 --- a/target-sparc/helper.c
 +++ b/target-sparc/helper.c
 @@ -358,6 +358,91 @@ void dump_mmu(FILE *f, fprintf_function cpu_fprintf, 
 CPUState *env)
     }
  }

 +
 +/* Gdb expects all registers windows to be flushed in ram. This function 
 handles
 + * reads/writes in stack frames as if windows were flushed. We assume that 
 the
 + * sparc ABI is followed.
 + */

 We can't assume that, it depends on what we are executing (BIOS, OS,
 even application).

 Well, maybe the statement is too strong. The ABI is required to get a valid
 result. Gdb cannot build back-traces if the ABI is not followed anyway.

But if the ABI assumption happens to be wrong (for example registers
contain random values), memory may be corrupted because this would
happily use whatever the registers contain.

Another way to fix this would be that GDB would tell QEMU what ABI to
use for flushing. But how would one tell GDB about a non-standard ABI?

For user emulators we can make ABI assumptions, there similar patch
could make sense. But system emulators can't assume anything about the
guest OS, it could be Linux, *BSD, a commercial OS or even a toy OS.

 On Sparc64 there are two ABIs (32 bit and 64 bit
 with offset of -2047), though calling flushw instruction could handle
 that.

 This solution is for SPARC32 only.

 If the flush happens to trigger a fault, we're in big trouble.


 That's why it's safer/easier to use this hackish read/write in the 
 registers.

No, if the fault happens here, handling it may be tricky. See for
example what paranoia Linux has to do for user window flushing, it
involves the no-fault mode in MMU.

Re: [Qemu-devel] [PATCH] pc: Clean up PIC-to-APIC IRQ path

2011-09-05 Thread Blue Swirl

On Mon, Sep 5, 2011 at 8:38 AM, Edgar E. Iglesias
edgar.igles...@gmail.com wrote:
 On Sat, Sep 03, 2011 at 02:53:31PM -0500, Anthony Liguori wrote:
 On 08/31/2011 11:59 AM, Blue Swirl wrote:
  On Wed, Aug 31, 2011 at 8:28 AM, Avi Kivitya...@redhat.com  wrote:
  On 08/30/2011 10:19 PM, Blue Swirl wrote:
 
 
    We need some kind of two phase restore. In the first phase all state 
  is
    restored; since some of that state drivers outputs that are input to
  other
    devices, they may experience an edge, and we need to supress that.  In
  the
    second phase edge detection is unsupressed and the device goes live.
 
  No. Devices may not perform any externally visible activities (like
  toggle a qemu_irq) during or after load because 1) qemu_irq is
  stateless and 2) since the receiving end is also freshly loaded, both
  states are already in synch without any calls or toggling.
 
  That makes it impossible to migrate level-triggered irq lines.  Or at 
  least,
  the receiver has to remember the state, instead of (or in addition to) the
  sender.
 
  Both ends probably need to remember the state. That should work
  without any multiphase restores and transient suppressors.
 
  It might be also possible to introduce stateful signal lines which
  save and restore their state, then the receiving end could check what
  is the current level. However, if you consider that the devices may be
  restored in random order, if the IRQ line device happens to be
  restored later, the receiver would still get wrong information. Adding
  priorities could solve this, but I think stateless IRQs are the only
  sane way.

 We shouldn't really use the term IRQ as it's confusing.  I like the term
 pin better because that describes what we're really talking about.

 qemu_irq is designed oddly today because is represents something that is
 intrinsically state (whether a pin is high or low) with an edge
 notification with the assumption that the state is held somewhere else
 (which is usually true).

 I don't agree. That's not what qemu_irq represents.
 It represents a wire, a mechanism to drive changes through logic paths
 between state. It is intrinsically stateless.

 It may be the case that it is missused in some places, or that it isn't
 always the best thing to use to represent what ever you need to represent,
 so that you want to complement with other mechanisms.
 But universally replacing it with a stateful alternative seems wrong to me.

Maybe there could be a stateless version of Pin for optimization:
Line? This would probably save one bool worth of memory and one memory
store access for each IRQ event.

Re: [Qemu-devel] [PATCH 1/2] trace: allow trace events with string arguments

2011-09-05 Thread Blue Swirl

On Mon, Sep 5, 2011 at 3:37 PM, Stefan Hajnoczi
stefa...@linux.vnet.ibm.com wrote:
 String arguments are useful for producing human-readable traces without
 post-processing (e.g. stderr backend).  Although the simple backend
 cannot handles strings all others can.  Strings should be allowed and
 the simple backend can be extended to support them.

I don't think this is possible in general. Yes if the string can be
found in the executable (assuming address space randomizations don't
make that impossible post run), but not if the string happens to be
constructed in the stack or in the data segment during run time.

So I'd still at least strongly discourage string use.

 Signed-off-by: Stefan Hajnoczi stefa...@linux.vnet.ibm.com
 ---
  docs/tracing.txt |    8 +++-
  1 files changed, 3 insertions(+), 5 deletions(-)

 diff --git a/docs/tracing.txt b/docs/tracing.txt
 index 4b27ab0..e130a61 100644
 --- a/docs/tracing.txt
 +++ b/docs/tracing.txt
 @@ -70,11 +70,6 @@ Trace events should use types as follows:
    cannot include all user-defined struct declarations and it is therefore
    necessary to use void * for pointers to structs.

 -   Pointers (including char *) cannot be dereferenced easily (or at all) in
 -   some trace backends.  If pointers are used, ensure they are meaningful by
 -   themselves and do not assume the data they point to will be traced.  Do
 -   not pass in string arguments.
 -
  * For everything else, use primitive scalar types (char, int, long) with the
    appropriate signedness.

 @@ -185,6 +180,9 @@ source tree.  It may not be as powerful as 
 platform-specific or third-party
  trace backends but it is portable.  This is the recommended trace backend
  unless you have specific needs for more advanced backends.

 +The simple backend currently does not capture string arguments, it simply
 +records the char* pointer value instead of the string that is pointed to.
 +
   Monitor commands 

  * info trace
 --
 1.7.5.4

Re: [Qemu-devel] [PATCH] g364fb: compile in hwlib

2011-09-05 Thread Hervé Poussineau


Blue Swirl a écrit :

Compile g364fb in hwlib. Two compilations less for the full build.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
 Makefile.objs|1 +
 Makefile.target  |2 +-
 default-configs/mips-softmmu.mak |1 +
 default-configs/mips64-softmmu.mak   |1 +
 default-configs/mips64el-softmmu.mak |1 +
 default-configs/mipsel-softmmu.mak   |1 +
 hw/g364fb.c  |   16 +---
 7 files changed, 15 insertions(+), 8 deletions(-)
  

Acked-by: Hervé Poussineau hpous...@reactos.org

Re: [Qemu-devel] [PATCH 1/2] trace: allow trace events with string arguments

2011-09-05 Thread Jan Kiszka

On 2011-09-05 21:45, Blue Swirl wrote:
 On Mon, Sep 5, 2011 at 3:37 PM, Stefan Hajnoczi
 stefa...@linux.vnet.ibm.com wrote:
 String arguments are useful for producing human-readable traces without
 post-processing (e.g. stderr backend).  Although the simple backend
 cannot handles strings all others can.  Strings should be allowed and
 the simple backend can be extended to support them.
 
 I don't think this is possible in general. Yes if the string can be
 found in the executable (assuming address space randomizations don't
 make that impossible post run), but not if the string happens to be
 constructed in the stack or in the data segment during run time.

Strings can be addressed in tracers like simpletrace by storing a
fixed-size copy (e.g. 64 chars) in the log. That will work out for the
majority of use cases.

Jan



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-devel] Now, what's left to non-developers? (Qemu forum down again, No such list qemu-users)

2011-09-05 Thread Stefan Weil


Am 19.08.2011 21:49, schrieb Anthony Liguori:

On 08/19/2011 11:28 AM, Stefan Hajnoczi wrote:
On Fri, Aug 19, 2011 at 2:26 PM, Ottaviopr0f3ss0r1...@yahoo.com  
wrote:

On 19 August 2011 11:08, 陳韋任che...@iis.sinica.edu.tw  wrote:

Hi, Ottavio


2) The qemu-user mailing list is not active:
http://lists.nongnu.org/mailman/listinfo/qemu-users


  +1. Maybe you can ask Pablo's opinion, too.


I don't understand. Do you mean Pablo's opinion about the mailing list
or about the forum?

About the forum, I have contacted him a few days ago reporting the
incident and he answered that he would give it a look once he was in
front of a computer (he was using a blackberry or a mobile phone to
email, I think). I have sent him a couple of emails but no answer yet.
And plus, I am quite sure he's subscribed to this list.

I think that, regardless of the qemu forum, there is a base of qemu
users which are not subscribed to this mailing list that would like to
exchange ideas and ask for mutual support and for which the medium of
a web forum is not stimulating enough.

If anybody with admin rights could activate the qemu-users list this
would be very beneficial to this project. I, for instance, have a lot
of questions to ask but I wouldn't dare on this mailing list because
my post wouldn't be interesting enough to developers.


Ottavio,
User questions on this mailing list are not uncommon.  I think your
questions and discussions are welcome.

However, I understand that following a mailing list that has high
traffic and mostly contains patches is not ideal.  It does make sense
to bring the qemu users list back to life.  Anthony may have mailing
list admin access, he is CCed.


I do, but qemu-users was created ages ago before I was an admin on the 
Savannah project.  I suspect I'll need to submit a ticket to reset the 
list.  I'll do that once I get back to the States.


Regards,

Anthony Liguori


Do you already know when you expect qemu-users to be available?

Regards,
Stefan Weil

[Qemu-devel] buildbot failure in qemu on default_x86_64_fedora16

2011-09-05 Thread qemu

The Buildbot has detected a new failure on builder default_x86_64_fedora16 
while building qemu.
Full details are available at:
 http://buildbot.b1-systems.de/qemu/builders/default_x86_64_fedora16/builds/16

Buildbot URL: http://buildbot.b1-systems.de/qemu/

Buildslave for this Build: kraxel_fedora16

Build Reason: The Nightly scheduler named 'nightly_default' triggered this build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot

[Qemu-devel] buildbot failure in qemu on default_x86_64_rhel5

2011-09-05 Thread qemu

The Buildbot has detected a new failure on builder default_x86_64_rhel5 while 
building qemu.
Full details are available at:
 http://buildbot.b1-systems.de/qemu/builders/default_x86_64_rhel5/builds/0

Buildbot URL: http://buildbot.b1-systems.de/qemu/

Buildslave for this Build: kraxel_rhel5

Build Reason: The Nightly scheduler named 'nightly_default' triggered this build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot

[Qemu-devel] buildbot failure in qemu on default_ppc

2011-09-05 Thread qemu

The Buildbot has detected a new failure on builder default_ppc while building 
qemu.
Full details are available at:
 http://buildbot.b1-systems.de/qemu/builders/default_ppc/builds/144

Buildbot URL: http://buildbot.b1-systems.de/qemu/

Buildslave for this Build: qemu-ppc.opensuse.org

Build Reason: The Nightly scheduler named 'nightly_default' triggered this build
Build Source Stamp: [branch master] HEAD
Blamelist: 

BUILD FAILED: failed compile

sincerely,
 -The Buildbot

[Qemu-devel] [PATCH v5 17/33] target-xtensa: implement exceptions

2011-09-05 Thread Max Filippov

- mark privileged opcodes with ring check;
- make debug exception on exception handler entry.

Signed-off-by: Max Filippov jcmvb...@gmail.com
---
 cpu-exec.c|6 +++
 target-xtensa/cpu.h   |   67 
 target-xtensa/helper.c|   37 +++-
 target-xtensa/helpers.h   |2 +
 target-xtensa/op_helper.c |   29 
 target-xtensa/translate.c |  107 ++--
 6 files changed, 242 insertions(+), 6 deletions(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index 3fce033..2fc37d8 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -488,6 +488,12 @@ int cpu_exec(CPUState *env)
 do_interrupt(env);
 next_tb = 0;
 }
+#elif defined(TARGET_XTENSA)
+if (interrupt_request  CPU_INTERRUPT_HARD) {
+env-exception_index = EXC_IRQ;
+do_interrupt(env);
+next_tb = 0;
+}
 #endif
/* Don't use the cached interrupt_request value,
   do_interrupt may have updated the EXITTB flag. */
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 939222c..cae6637 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -108,7 +108,12 @@ enum {
 enum {
 SAR = 3,
 SCOMPARE1 = 12,
+EPC1 = 177,
+DEPC = 192,
+EXCSAVE1 = 209,
 PS = 230,
+EXCCAUSE = 232,
+EXCVADDR = 238,
 };
 
 #define PS_INTLEVEL 0xf
@@ -129,9 +134,60 @@ enum {
 
 #define PS_WOE 0x4
 
+enum {
+/* Static vectors */
+EXC_RESET,
+EXC_MEMORY_ERROR,
+
+/* Dynamic vectors */
+EXC_WINDOW_OVERFLOW4,
+EXC_WINDOW_UNDERFLOW4,
+EXC_WINDOW_OVERFLOW8,
+EXC_WINDOW_UNDERFLOW8,
+EXC_WINDOW_OVERFLOW12,
+EXC_WINDOW_UNDERFLOW12,
+EXC_IRQ,
+EXC_KERNEL,
+EXC_USER,
+EXC_DOUBLE,
+EXC_MAX
+};
+
+enum {
+ILLEGAL_INSTRUCTION_CAUSE = 0,
+SYSCALL_CAUSE,
+INSTRUCTION_FETCH_ERROR_CAUSE,
+LOAD_STORE_ERROR_CAUSE,
+LEVEL1_INTERRUPT_CAUSE,
+ALLOCA_CAUSE,
+INTEGER_DIVIDE_BY_ZERO_CAUSE,
+PRIVILEGED_CAUSE = 8,
+LOAD_STORE_ALIGNMENT_CAUSE,
+
+INSTR_PIF_DATA_ERROR_CAUSE = 12,
+LOAD_STORE_PIF_DATA_ERROR_CAUSE,
+INSTR_PIF_ADDR_ERROR_CAUSE,
+LOAD_STORE_PIF_ADDR_ERROR_CAUSE,
+
+INST_TLB_MISS_CAUSE,
+INST_TLB_MULTI_HIT_CAUSE,
+INST_FETCH_PRIVILEGE_CAUSE,
+INST_FETCH_PROHIBITED_CAUSE = 20,
+LOAD_STORE_TLB_MISS_CAUSE = 24,
+LOAD_STORE_TLB_MULTI_HIT_CAUSE,
+LOAD_STORE_PRIVILEGE_CAUSE,
+LOAD_PROHIBITED_CAUSE = 28,
+STORE_PROHIBITED_CAUSE,
+
+COPROCESSOR0_DISABLED = 32,
+};
+
 typedef struct XtensaConfig {
 const char *name;
 uint64_t options;
+int excm_level;
+int ndepc;
+uint32_t exception_vector[EXC_MAX];
 } XtensaConfig;
 
 typedef struct CPUXtensaState {
@@ -141,6 +197,8 @@ typedef struct CPUXtensaState {
 uint32_t sregs[256];
 uint32_t uregs[256];
 
+int exception_taken;
+
 CPU_COMMON
 } CPUXtensaState;
 
@@ -164,6 +222,15 @@ static inline bool xtensa_option_enabled(const 
XtensaConfig *config, int opt)
 return (config-options  XTENSA_OPTION_BIT(opt)) != 0;
 }
 
+static inline int xtensa_get_cintlevel(const CPUState *env)
+{
+int level = (env-sregs[PS]  PS_INTLEVEL)  PS_INTLEVEL_SHIFT;
+if ((env-sregs[PS]  PS_EXCM)  env-config-excm_level  level) {
+level = env-config-excm_level;
+}
+return level;
+}
+
 static inline int xtensa_get_ring(const CPUState *env)
 {
 if (xtensa_option_enabled(env-config, XTENSA_OPTION_MMU)) {
diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c
index 83b8a04..44ebb9f 100644
--- a/target-xtensa/helper.c
+++ b/target-xtensa/helper.c
@@ -36,7 +36,8 @@
 
 void cpu_reset(CPUXtensaState *env)
 {
-env-pc = 0;
+env-exception_taken = 0;
+env-pc = env-config-exception_vector[EXC_RESET];
 env-sregs[PS] = 0x1f;
 }
 
@@ -44,6 +45,20 @@ static const XtensaConfig core_config[] = {
 {
 .name = sample-xtensa-core,
 .options = -1,
+.ndepc = 1,
+.excm_level = 16,
+.exception_vector = {
+[EXC_RESET] = 0x5fff8000,
+[EXC_WINDOW_OVERFLOW4] = 0x5fff8400,
+[EXC_WINDOW_UNDERFLOW4] = 0x5fff8440,
+[EXC_WINDOW_OVERFLOW8] = 0x5fff8480,
+[EXC_WINDOW_UNDERFLOW8] = 0x5fff84c0,
+[EXC_WINDOW_OVERFLOW12] = 0x5fff8500,
+[EXC_WINDOW_UNDERFLOW12] = 0x5fff8540,
+[EXC_KERNEL] = 0x5fff861c,
+[EXC_USER] = 0x5fff863c,
+[EXC_DOUBLE] = 0x5fff865c,
+},
 },
 };
 
@@ -94,4 +109,24 @@ target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, 
target_ulong addr)
 
 void do_interrupt(CPUState *env)
 {
+switch (env-exception_index) {
+case EXC_WINDOW_OVERFLOW4:
+case EXC_WINDOW_UNDERFLOW4:
+case EXC_WINDOW_OVERFLOW8:
+case EXC_WINDOW_UNDERFLOW8:
+case

[Qemu-devel] [PATCH v5 29/33] target-xtensa: implement memory protection options

2011-09-05 Thread Max Filippov

- TLB opcode group;
- region protection option (ISA, 4.6.3);
- region translation option (ISA, 4.6.4);
- MMU option (ISA, 4.6.5).

Cache control attribute bits are not used by this implementation.

Signed-off-by: Max Filippov jcmvb...@gmail.com
---
 target-xtensa/cpu.h   |   56 -
 target-xtensa/helper.c|  340 -
 target-xtensa/helpers.h   |7 +
 target-xtensa/op_helper.c |  301 +++-
 target-xtensa/translate.c |   91 -
 5 files changed, 782 insertions(+), 13 deletions(-)

diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 93e17d1..14d62fa 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -114,6 +114,10 @@ enum {
 SCOMPARE1 = 12,
 WINDOW_BASE = 72,
 WINDOW_START = 73,
+PTEVADDR = 83,
+RASID = 90,
+ITLBCFG = 91,
+DTLBCFG = 92,
 EPC1 = 177,
 DEPC = 192,
 EPS2 = 194,
@@ -154,6 +158,9 @@ enum {
 #define MAX_NLEVEL 6
 #define MAX_NNMI 1
 #define MAX_NCCOMPARE 3
+#define MAX_TLB_WAY_SIZE 8
+
+#define REGION_PAGE_MASK 0xe000
 
 enum {
 /* Static vectors */
@@ -214,6 +221,21 @@ typedef enum {
 INTTYPE_MAX
 } interrupt_type;
 
+typedef struct xtensa_tlb_entry {
+uint32_t vaddr;
+uint32_t paddr;
+uint8_t asid;
+uint8_t attr;
+bool variable;
+} xtensa_tlb_entry;
+
+typedef struct xtensa_tlb {
+unsigned nways;
+const unsigned way_size[10];
+bool varway56;
+unsigned nrefillentries;
+} xtensa_tlb;
+
 typedef struct XtensaGdbReg {
 int targno;
 int type;
@@ -248,6 +270,9 @@ typedef struct XtensaConfig {
 unsigned nccompare;
 uint32_t timerint[MAX_NCCOMPARE];
 uint32_t clock_freq_khz;
+
+xtensa_tlb itlb;
+xtensa_tlb dtlb;
 } XtensaConfig;
 
 typedef struct CPUXtensaState {
@@ -258,6 +283,10 @@ typedef struct CPUXtensaState {
 uint32_t uregs[256];
 uint32_t phys_regs[MAX_NAREG];
 
+xtensa_tlb_entry itlb[7][MAX_TLB_WAY_SIZE];
+xtensa_tlb_entry dtlb[10][MAX_TLB_WAY_SIZE];
+unsigned autorefill_idx;
+
 int pending_irq_level; /* level of last raised IRQ */
 void **irq_inputs;
 QEMUTimer *ccompare_timer;
@@ -287,12 +316,29 @@ int cpu_xtensa_signal_handler(int host_signum, void 
*pinfo, void *puc);
 void xtensa_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 void xtensa_sync_window_from_phys(CPUState *env);
 void xtensa_sync_phys_from_window(CPUState *env);
+uint32_t xtensa_tlb_get_addr_mask(const CPUState *env, bool dtlb, uint32_t 
way);
+void split_tlb_entry_spec_way(const CPUState *env, uint32_t v, bool dtlb,
+uint32_t *vpn, uint32_t wi, uint32_t *ei);
+int xtensa_tlb_lookup(const CPUState *env, uint32_t addr, bool dtlb,
+uint32_t *pwi, uint32_t *pei, uint8_t *pring);
+void xtensa_tlb_set_entry(CPUState *env, bool dtlb,
+unsigned wi, unsigned ei, uint32_t vpn, uint32_t pte);
+int xtensa_get_physical_addr(CPUState *env,
+uint32_t vaddr, int is_write, int mmu_idx,
+uint32_t *paddr, uint32_t *page_size, unsigned *access);
+
 
 #define XTENSA_OPTION_BIT(opt) (((uint64_t)1)  (opt))
 
+static inline bool xtensa_option_bits_enabled(const XtensaConfig *config,
+uint64_t opt)
+{
+return (config-options  opt) != 0;
+}
+
 static inline bool xtensa_option_enabled(const XtensaConfig *config, int opt)
 {
-return (config-options  XTENSA_OPTION_BIT(opt)) != 0;
+return xtensa_option_bits_enabled(config, XTENSA_OPTION_BIT(opt));
 }
 
 static inline int xtensa_get_cintlevel(const CPUState *env)
@@ -323,6 +369,14 @@ static inline int xtensa_get_cring(const CPUState *env)
 }
 }
 
+static inline xtensa_tlb_entry *xtensa_tlb_get_entry(CPUState *env,
+bool dtlb, unsigned wi, unsigned ei)
+{
+return dtlb ?
+env-dtlb[wi] + ei :
+env-itlb[wi] + ei;
+}
+
 /* MMU modes definitions */
 #define MMU_MODE0_SUFFIX _ring0
 #define MMU_MODE1_SUFFIX _ring1
diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c
index 11c318d..9e18984 100644
--- a/target-xtensa/helper.c
+++ b/target-xtensa/helper.c
@@ -38,6 +38,8 @@
 a1, a2, a3, a4, a5, a6) \
 { .targno = (no), .type = (typ), .group = (grp) },
 
+static void reset_mmu(CPUState *env);
+
 void cpu_reset(CPUXtensaState *env)
 {
 env-exception_taken = 0;
@@ -48,6 +50,7 @@ void cpu_reset(CPUXtensaState *env)
 env-sregs[VECBASE] = env-config-vecbase;
 
 env-pending_irq_level = 0;
+reset_mmu(env);
 }
 
 static const XtensaConfig core_config[] = {
@@ -150,7 +153,19 @@ void xtensa_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 
 target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
 {
-return addr;
+uint32_t paddr;
+uint32_t page_size;
+unsigned access;
+
+if (xtensa_get_physical_addr(env, addr, 0, 0,
+paddr, page_size, access) == 0) {
+return paddr;
+}
+if (xtensa_get_physical_addr(env, addr, 2, 0,
+paddr, page_size, access) == 0) {

[Qemu-devel] [PATCH v5 16/33] target-xtensa: add PS register and access control

2011-09-05 Thread Max Filippov

Signed-off-by: Max Filippov jcmvb...@gmail.com
---
 target-xtensa/cpu.h   |   53 -
 target-xtensa/helper.c|1 +
 target-xtensa/translate.c |   29 
 3 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index ac9bbb4..939222c 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -108,8 +108,27 @@ enum {
 enum {
 SAR = 3,
 SCOMPARE1 = 12,
+PS = 230,
 };
 
+#define PS_INTLEVEL 0xf
+#define PS_INTLEVEL_SHIFT 0
+
+#define PS_EXCM 0x10
+#define PS_UM 0x20
+
+#define PS_RING 0xc0
+#define PS_RING_SHIFT 6
+
+#define PS_OWB 0xf00
+#define PS_OWB_SHIFT 8
+
+#define PS_CALLINC 0x3
+#define PS_CALLINC_SHIFT 16
+#define PS_CALLINC_LEN 2
+
+#define PS_WOE 0x4
+
 typedef struct XtensaConfig {
 const char *name;
 uint64_t options;
@@ -145,17 +164,49 @@ static inline bool xtensa_option_enabled(const 
XtensaConfig *config, int opt)
 return (config-options  XTENSA_OPTION_BIT(opt)) != 0;
 }
 
+static inline int xtensa_get_ring(const CPUState *env)
+{
+if (xtensa_option_enabled(env-config, XTENSA_OPTION_MMU)) {
+return (env-sregs[PS]  PS_RING)  PS_RING_SHIFT;
+} else {
+return 0;
+}
+}
+
+static inline int xtensa_get_cring(const CPUState *env)
+{
+if (xtensa_option_enabled(env-config, XTENSA_OPTION_MMU) 
+(env-sregs[PS]  PS_EXCM) == 0) {
+return (env-sregs[PS]  PS_RING)  PS_RING_SHIFT;
+} else {
+return 0;
+}
+}
+
+/* MMU modes definitions */
+#define MMU_MODE0_SUFFIX _ring0
+#define MMU_MODE1_SUFFIX _ring1
+#define MMU_MODE2_SUFFIX _ring2
+#define MMU_MODE3_SUFFIX _ring3
+
 static inline int cpu_mmu_index(CPUState *env)
 {
-return 0;
+return xtensa_get_cring(env);
 }
 
+#define XTENSA_TBFLAG_RING_MASK 0x3
+#define XTENSA_TBFLAG_EXCM 0x4
+
 static inline void cpu_get_tb_cpu_state(CPUState *env, target_ulong *pc,
 target_ulong *cs_base, int *flags)
 {
 *pc = env-pc;
 *cs_base = 0;
 *flags = 0;
+*flags |= xtensa_get_ring(env);
+if (env-sregs[PS]  PS_EXCM) {
+*flags |= XTENSA_TBFLAG_EXCM;
+}
 }
 
 #include cpu-all.h
diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c
index c119fd0..83b8a04 100644
--- a/target-xtensa/helper.c
+++ b/target-xtensa/helper.c
@@ -37,6 +37,7 @@
 void cpu_reset(CPUXtensaState *env)
 {
 env-pc = 0;
+env-sregs[PS] = 0x1f;
 }
 
 static const XtensaConfig core_config[] = {
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 7d383b3..32fab09 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -45,6 +45,8 @@ typedef struct DisasContext {
 TranslationBlock *tb;
 uint32_t pc;
 uint32_t next_pc;
+int cring;
+int ring;
 int is_jmp;
 int singlestep_enabled;
 
@@ -65,6 +67,7 @@ static TCGv_i32 cpu_UR[256];
 static const char * const sregnames[256] = {
 [SAR] = SAR,
 [SCOMPARE1] = SCOMPARE1,
+[PS] = PS,
 };
 
 static const char * const uregnames[256] = {
@@ -239,11 +242,25 @@ static void gen_wsr_sar(DisasContext *dc, uint32_t sr, 
TCGv_i32 s)
 dc-sar_m32_5bit = false;
 }
 
+static void gen_wsr_ps(DisasContext *dc, uint32_t sr, TCGv_i32 v)
+{
+uint32_t mask = PS_WOE | PS_CALLINC | PS_OWB |
+PS_UM | PS_EXCM | PS_INTLEVEL;
+
+if (option_enabled(dc, XTENSA_OPTION_MMU)) {
+mask |= PS_RING;
+}
+tcg_gen_andi_i32(cpu_SR[sr], v, mask);
+/* This can change mmu index, so exit tb */
+gen_jumpi(dc, dc-next_pc, -1);
+}
+
 static void gen_wsr(DisasContext *dc, uint32_t sr, TCGv_i32 s)
 {
 static void (* const wsr_handler[256])(DisasContext *dc,
 uint32_t sr, TCGv_i32 v) = {
 [SAR] = gen_wsr_sar,
+[PS] = gen_wsr_ps,
 };
 
 if (sregnames[sr]) {
@@ -973,7 +990,7 @@ static void disas_xtensa_insn(DisasContext *dc)
 
 /* no ext L32R */
 
-tcg_gen_qemu_ld32u(cpu_R[RRR_T], tmp, 0);
+tcg_gen_qemu_ld32u(cpu_R[RRR_T], tmp, dc-cring);
 tcg_temp_free(tmp);
 }
 break;
@@ -982,7 +999,7 @@ static void disas_xtensa_insn(DisasContext *dc)
 #define gen_load_store(type, shift) do { \
 TCGv_i32 addr = tcg_temp_new_i32(); \
 tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8  shift); \
-tcg_gen_qemu_##type(cpu_R[RRI8_T], addr, 0); \
+tcg_gen_qemu_##type(cpu_R[RRI8_T], addr, dc-cring); \
 tcg_temp_free(addr); \
 } while (0)
 
@@ -1140,11 +1157,11 @@ static void disas_xtensa_insn(DisasContext *dc)
 
 tcg_gen_mov_i32(tmp, cpu_R[RRI8_T]);
 tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8  2);
-tcg_gen_qemu_ld32u(cpu_R[RRI8_T], addr, 0);
+tcg_gen_qemu_ld32u(cpu_R[RRI8_T], addr, dc-cring);
 tcg_gen_brcond_i32(TCG_COND_NE, cpu_R[RRI8_T],
 cpu_SR[SCOMPARE1], label);
 
-

[Qemu-devel] [PATCH v5 30/33] target-xtensa: implement boolean option

2011-09-05 Thread Max Filippov

See ISA, 4.3.9

Signed-off-by: Max Filippov jcmvb...@gmail.com
---
 target-xtensa/cpu.h   |1 +
 target-xtensa/translate.c |  109 +++--
 2 files changed, 86 insertions(+), 24 deletions(-)

diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 14d62fa..339075d 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -110,6 +110,7 @@ enum {
 LEND = 1,
 LCOUNT = 2,
 SAR = 3,
+BR = 4,
 LITBASE = 5,
 SCOMPARE1 = 12,
 WINDOW_BASE = 72,
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 20a6834..93a807e 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -76,6 +76,7 @@ static const char * const sregnames[256] = {
 [LEND] = LEND,
 [LCOUNT] = LCOUNT,
 [SAR] = SAR,
+[BR] = BR,
 [LITBASE] = LITBASE,
 [SCOMPARE1] = SCOMPARE1,
 [WINDOW_BASE] = WINDOW_BASE,
@@ -434,6 +435,11 @@ static void gen_wsr_sar(DisasContext *dc, uint32_t sr, 
TCGv_i32 s)
 dc-sar_m32_5bit = false;
 }
 
+static void gen_wsr_br(DisasContext *dc, uint32_t sr, TCGv_i32 s)
+{
+tcg_gen_andi_i32(cpu_SR[sr], s, 0x);
+}
+
 static void gen_wsr_litbase(DisasContext *dc, uint32_t sr, TCGv_i32 s)
 {
 tcg_gen_andi_i32(cpu_SR[sr], s, 0xf001);
@@ -536,6 +542,7 @@ static void gen_wsr(DisasContext *dc, uint32_t sr, TCGv_i32 
s)
 [LBEG] = gen_wsr_lbeg,
 [LEND] = gen_wsr_lend,
 [SAR] = gen_wsr_sar,
+[BR] = gen_wsr_br,
 [LITBASE] = gen_wsr_litbase,
 [WINDOW_BASE] = gen_wsr_windowbase,
 [WINDOW_START] = gen_wsr_windowstart,
@@ -974,23 +981,28 @@ static void disas_xtensa_insn(DisasContext *dc)
 break;
 
 case 8: /*ANY4p*/
-HAS_OPTION(XTENSA_OPTION_BOOLEAN);
-TBD();
-break;
-
 case 9: /*ALL4p*/
-HAS_OPTION(XTENSA_OPTION_BOOLEAN);
-TBD();
-break;
-
 case 10: /*ANY8p*/
-HAS_OPTION(XTENSA_OPTION_BOOLEAN);
-TBD();
-break;
-
 case 11: /*ALL8p*/
 HAS_OPTION(XTENSA_OPTION_BOOLEAN);
-TBD();
+{
+const unsigned shift = (RRR_R  2) ? 8 : 4;
+TCGv_i32 mask = tcg_const_i32(
+((1  shift) - 1)  RRR_S);
+TCGv_i32 tmp = tcg_temp_new_i32();
+
+tcg_gen_and_i32(tmp, cpu_SR[BR], mask);
+if (RRR_R  1) { /*ALL*/
+tcg_gen_addi_i32(tmp, tmp, 1  RRR_S);
+} else { /*ANY*/
+tcg_gen_add_i32(tmp, tmp, mask);
+}
+tcg_gen_shri_i32(tmp, tmp, RRR_S + shift);
+tcg_gen_deposit_i32(cpu_SR[BR], cpu_SR[BR],
+tmp, RRR_T, 1);
+tcg_temp_free(mask);
+tcg_temp_free(tmp);
+}
 break;
 
 default: /*reserved*/
@@ -1339,7 +1351,9 @@ static void disas_xtensa_insn(DisasContext *dc)
 break;
 
 case 2: /*RST2*/
-gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
+if (OP2 = 8) {
+gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
+}
 
 if (OP2 = 12) {
 HAS_OPTION(XTENSA_OPTION_32_BIT_IDIV);
@@ -1350,6 +1364,42 @@ static void disas_xtensa_insn(DisasContext *dc)
 }
 
 switch (OP2) {
+#define BOOLEAN_LOGIC(fn, r, s, t) \
+do { \
+HAS_OPTION(XTENSA_OPTION_BOOLEAN); \
+TCGv_i32 tmp1 = tcg_temp_new_i32(); \
+TCGv_i32 tmp2 = tcg_temp_new_i32(); \
+\
+tcg_gen_shri_i32(tmp1, cpu_SR[BR], s); \
+tcg_gen_shri_i32(tmp2, cpu_SR[BR], t); \
+tcg_gen_##fn##_i32(tmp1, tmp1, tmp2); \
+tcg_gen_deposit_i32(cpu_SR[BR], cpu_SR[BR], tmp1, r, 1); \
+tcg_temp_free(tmp1); \
+tcg_temp_free(tmp2); \
+} while (0)
+
+case 0: /*ANDBp*/
+BOOLEAN_LOGIC(and, RRR_R, RRR_S, RRR_T);
+break;
+
+case 1: /*ANDBCp*/
+BOOLEAN_LOGIC(andc, RRR_R, RRR_S, RRR_T);
+break;
+
+case 2: /*ORBp*/
+BOOLEAN_LOGIC(or, RRR_R, RRR_S, RRR_T);
+break;
+
+case 3: /*ORBCp*/
+BOOLEAN_LOGIC(orc, RRR_R, RRR_S, RRR_T);
+break;
+
+case 4: /*XORBp*/
+BOOLEAN_LOGIC(xor, RRR_R, RRR_S, RRR_T);
+break;
+
+#undef BOOLEAN_LOGIC
+
 case 8: /*MULLi*/

[Qemu-devel] [PATCH v5 25/33] target-xtensa: implement accurate window check

2011-09-05 Thread Max Filippov

See ISA, 4.7.1.3 for details.

Window check is inserted before commands that push used register
watermark beyond its current level. Used register watermark is reset on
instructions that change WINDOW_BASE/WINDOW_START SRs.

Signed-off-by: Max Filippov jcmvb...@gmail.com
---
 target-xtensa/translate.c |  110 +
 1 files changed, 110 insertions(+), 0 deletions(-)

diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index d75e780..cee1f1c 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -60,6 +60,7 @@ typedef struct DisasContext {
 TCGv_i32 sar_m32;
 
 uint32_t ccount_delta;
+unsigned used_window;
 } DisasContext;
 
 static TCGv_ptr cpu_env;
@@ -225,6 +226,11 @@ static void gen_advance_ccount(DisasContext *dc)
 }
 }
 
+static void reset_used_window(DisasContext *dc)
+{
+dc-used_window = 0;
+}
+
 static void gen_exception(DisasContext *dc, int excp)
 {
 TCGv_i32 tmp = tcg_const_i32(excp);
@@ -418,6 +424,13 @@ static void gen_wsr_litbase(DisasContext *dc, uint32_t sr, 
TCGv_i32 s)
 static void gen_wsr_windowbase(DisasContext *dc, uint32_t sr, TCGv_i32 v)
 {
 gen_helper_wsr_windowbase(v);
+reset_used_window(dc);
+}
+
+static void gen_wsr_windowstart(DisasContext *dc, uint32_t sr, TCGv_i32 v)
+{
+tcg_gen_mov_i32(cpu_SR[sr], v);
+reset_used_window(dc);
 }
 
 static void gen_wsr_intset(DisasContext *dc, uint32_t sr, TCGv_i32 v)
@@ -457,6 +470,7 @@ static void gen_wsr_ps(DisasContext *dc, uint32_t sr, 
TCGv_i32 v)
 mask |= PS_RING;
 }
 tcg_gen_andi_i32(cpu_SR[sr], v, mask);
+reset_used_window(dc);
 gen_helper_check_interrupts(cpu_env);
 /* This can change mmu index and tb-flags, so exit tb */
 gen_jumpi_check_loop_end(dc, -1);
@@ -483,6 +497,7 @@ static void gen_wsr(DisasContext *dc, uint32_t sr, TCGv_i32 
s)
 [SAR] = gen_wsr_sar,
 [LITBASE] = gen_wsr_litbase,
 [WINDOW_BASE] = gen_wsr_windowbase,
+[WINDOW_START] = gen_wsr_windowstart,
 [INTSET] = gen_wsr_intset,
 [INTCLEAR] = gen_wsr_intclear,
 [INTENABLE] = gen_wsr_intenable,
@@ -530,6 +545,36 @@ static void gen_waiti(DisasContext *dc, uint32_t imm4)
 tcg_temp_free(intlevel);
 }
 
+static void gen_window_check1(DisasContext *dc, unsigned r1)
+{
+if (dc-tb-flags  XTENSA_TBFLAG_EXCM) {
+return;
+}
+if (option_enabled(dc, XTENSA_OPTION_WINDOWED_REGISTER) 
+r1 / 4  dc-used_window) {
+TCGv_i32 pc = tcg_const_i32(dc-pc);
+TCGv_i32 w = tcg_const_i32(r1 / 4);
+
+dc-used_window = r1 / 4;
+gen_advance_ccount(dc);
+gen_helper_window_check(pc, w);
+
+tcg_temp_free(w);
+tcg_temp_free(pc);
+}
+}
+
+static void gen_window_check2(DisasContext *dc, unsigned r1, unsigned r2)
+{
+gen_window_check1(dc, r1  r2 ? r1 : r2);
+}
+
+static void gen_window_check3(DisasContext *dc, unsigned r1, unsigned r2,
+unsigned r3)
+{
+gen_window_check2(dc, r1, r2  r3 ? r2 : r3);
+}
+
 static void disas_xtensa_insn(DisasContext *dc)
 {
 #define HAS_OPTION(opt) do { \
@@ -659,6 +704,7 @@ static void disas_xtensa_insn(DisasContext *dc)
 switch (CALLX_N) {
 case 0: /*RET*/
 case 2: /*JX*/
+gen_window_check1(dc, CALLX_S);
 gen_jump(dc, cpu_R[CALLX_S]);
 break;
 
@@ -680,6 +726,7 @@ static void disas_xtensa_insn(DisasContext *dc)
 break;
 
 case 3: /*CALLX*/
+gen_window_check2(dc, CALLX_S, CALLX_N  2);
 switch (CALLX_N) {
 case 0: /*CALLX0*/
 {
@@ -710,6 +757,7 @@ static void disas_xtensa_insn(DisasContext *dc)
 
 case 1: /*MOVSPw*/
 HAS_OPTION(XTENSA_OPTION_WINDOWED_REGISTER);
+gen_window_check2(dc, RRR_T, RRR_S);
 {
 TCGv_i32 pc = tcg_const_i32(dc-pc);
 gen_advance_ccount(dc);
@@ -863,6 +911,7 @@ static void disas_xtensa_insn(DisasContext *dc)
 case 6: /*RSILx*/
 HAS_OPTION(XTENSA_OPTION_INTERRUPT);
 gen_check_privilege(dc);
+gen_window_check1(dc, RRR_T);
 tcg_gen_mov_i32(cpu_R[RRR_T], cpu_SR[PS]);
 tcg_gen_andi_i32(cpu_SR[PS], cpu_SR[PS], ~PS_INTLEVEL);
 tcg_gen_ori_i32(cpu_SR[PS], cpu_SR[PS], RRR_S);
@@ -904,28 +953,34 @@ static void disas_xtensa_insn(DisasContext *dc)
 break;
 
 case 1: /*AND*/
+gen_window_check3(dc, RRR_R, RRR_S, RRR_T);
 tcg_gen_and_i32(cpu_R[RRR_R], cpu_R[RRR_S], cpu_R[RRR_T]);
 break;
 
 case 2: /*OR*/
+

1 2 >

1 - 100 of 132 matches

Mail list logo