date:20110917

[Qemu-devel] [net-next RFC V2 PATCH 4/5] tuntap: multiqueue support

2011-09-17 Thread Jason Wang

This patch adds multiqueue support for tap device by allowing multiple
sockets to be attached to a tap device. Then we could parallize packets
transmission/reception by put them into different socket.

Following steps were used when choose tx queues:
1 For the packets comes from multiqueue nics, we would just choose the
tx queue based on the which physical queue the packets comes from.
2 Otherwise we try to use rxhash to choose the queue.
3 If all above fails, we always use the first queue.

In order to let the tx path lockless, like macvtap, netif_tx_loch_bh()
isr eplaced by RCU and NETIF_F_LLTX to synchronize between hot path
and systemcall.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 drivers/net/tun.c |  358 +
 1 files changed, 223 insertions(+), 135 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index dc768e0..ec29f85 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -108,6 +108,8 @@ struct tap_filter {
unsigned char   addr[FLT_EXACT_COUNT][ETH_ALEN];
 };
 
+#define MAX_TAP_QUEUES (NR_CPUS  16 ? NR_CPUS : 16)
+
 struct tun_file {
struct sock sk;
struct socket socket;
@@ -115,16 +117,18 @@ struct tun_file {
int vnet_hdr_sz;
struct tap_filter txflt;
atomic_t count;
-   struct tun_struct *tun;
+   struct tun_struct __rcu *tun;
struct net *net;
struct fasync_struct *fasync;
unsigned int flags;
+   u16 queue_index;
 };
 
 struct tun_sock;
 
 struct tun_struct {
-   struct tun_file *tfile;
+   struct tun_file *tfiles[MAX_TAP_QUEUES];
+   unsigned intnumqueues;
unsigned intflags;
uid_t   owner;
gid_t   group;
@@ -139,80 +143,160 @@ struct tun_struct {
 #endif
 };
 
-static int tun_attach(struct tun_struct *tun, struct file *file)
+static DEFINE_SPINLOCK(tun_lock);
+
+/*
+ * tun_get_queue(): calculate the queue index
+ * - if skbs comes from mq nics, we can just borrow
+ * - if not, calculate from the hash
+ */
+static struct tun_file *tun_get_queue(struct net_device *dev,
+ struct sk_buff *skb)
 {
-   struct tun_file *tfile = file-private_data;
-   int err;
+   struct tun_struct *tun = netdev_priv(dev);
+   struct tun_file *tfile = NULL;
+   int numqueues = tun-numqueues;
+   __u32 rxq;
 
-   ASSERT_RTNL();
+   BUG_ON(!rcu_read_lock_held());
 
-   netif_tx_lock_bh(tun-dev);
+   if (!numqueues)
+   goto out;
 
-   err = -EINVAL;
-   if (tfile-tun)
+   if (numqueues == 1) {
+   tfile = rcu_dereference(tun-tfiles[0]);
goto out;
+   }
 
-   err = -EBUSY;
-   if (tun-tfile)
+   if (likely(skb_rx_queue_recorded(skb))) {
+   rxq = skb_get_rx_queue(skb);
+
+   while (unlikely(rxq = numqueues))
+   rxq -= numqueues;
+
+   tfile = rcu_dereference(tun-tfiles[rxq]);
goto out;
+   }
 
-   err = 0;
-   tfile-tun = tun;
-   tun-tfile = tfile;
-   netif_carrier_on(tun-dev);
-   dev_hold(tun-dev);
-   sock_hold(tfile-sk);
-   atomic_inc(tfile-count);
+   /* Check if we can use flow to select a queue */
+   rxq = skb_get_rxhash(skb);
+   if (rxq) {
+   u32 idx = ((u64)rxq * numqueues)  32;
+   tfile = rcu_dereference(tun-tfiles[idx]);
+   goto out;
+   }
 
+   tfile = rcu_dereference(tun-tfiles[0]);
 out:
-   netif_tx_unlock_bh(tun-dev);
-   return err;
+   return tfile;
 }
 
-static void __tun_detach(struct tun_struct *tun)
+static int tun_detach(struct tun_file *tfile, bool clean)
 {
-   struct tun_file *tfile = tun-tfile;
-   /* Detach from net device */
-   netif_tx_lock_bh(tun-dev);
-   netif_carrier_off(tun-dev);
-   tun-tfile = NULL;
-   netif_tx_unlock_bh(tun-dev);
-
-   /* Drop read queue */
-   skb_queue_purge(tfile-socket.sk-sk_receive_queue);
-
-   /* Drop the extra count on the net device */
-   dev_put(tun-dev);
-}
+   struct tun_struct *tun;
+   struct net_device *dev = NULL;
+   bool destroy = false;
 
-static void tun_detach(struct tun_struct *tun)
-{
-   rtnl_lock();
-   __tun_detach(tun);
-   rtnl_unlock();
-}
+   spin_lock(tun_lock);
 
-static struct tun_struct *__tun_get(struct tun_file *tfile)
-{
-   struct tun_struct *tun = NULL;
+   tun = rcu_dereference_protected(tfile-tun,
+   lockdep_is_held(tun_lock));
+   if (tun) {
+   u16 index = tfile-queue_index;
+   BUG_ON(index  tun-numqueues);
+   BUG_ON(!tun-tfiles[tun-numqueues - 1]);
+   dev = tun-dev;
+
+   rcu_assign_pointer(tun-tfiles[index],
+

[Qemu-devel] [net-next RFC V2 PATCH 5/5] tuntap: add ioctls to attach or detach a file form tap device

2011-09-17 Thread Jason Wang

New ioctls were added to let multiple files/sockets to be attached to
a tap device.

Signed-off-by: Jason Wang jasow...@redhat.com
---
 drivers/net/tun.c  |   25 ++---
 include/linux/if_tun.h |3 +++
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ec29f85..6a1b591 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1343,11 +1343,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
 {
struct tun_file *tfile = file-private_data;
struct tun_struct *tun;
+   struct net_device *dev = NULL;
void __user* argp = (void __user*)arg;
struct ifreq ifr;
int ret;
 
-   if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
+   if (cmd == TUNSETIFF || cmd == TUNATTACHQUEUE || _IOC_TYPE(cmd) == 0x89)
if (copy_from_user(ifr, argp, ifreq_len))
return -EFAULT;
 
@@ -1356,7 +1357,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
 * This is needed because we never checked for invalid flags on
 * TUNSETIFF. */
return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE |
-   IFF_VNET_HDR,
+   IFF_VNET_HDR | IFF_MULTI_QUEUE,
(unsigned int __user*)argp);
}
 
@@ -1372,6 +1373,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
return -EFAULT;
return ret;
}
+   if (cmd == TUNDETACHQUEUE) {
+   return tun_detach(tfile, false);
+   }
 
rtnl_lock();
 
@@ -1379,7 +1383,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
 
ret = -EBADFD;
tun = rcu_dereference(tfile-tun);
-   if (!tun)
+   if (!tun  cmd != TUNATTACHQUEUE)
goto unlock;
 
 
@@ -1394,6 +1398,21 @@ static long __tun_chr_ioctl(struct file *file, unsigned 
int cmd,
ret = -EFAULT;
goto out;
 
+   case TUNATTACHQUEUE:
+   dev = __dev_get_by_name(tfile-net, ifr.ifr_name);
+   if (!dev || dev-netdev_ops != tap_netdev_ops) {
+   ret = -EINVAL;
+   } else if (ifr.ifr_flags 
+   ~(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR)) {
+  /* ignore illegal flag */
+   ret = -EINVAL;
+   } else {
+   tfile-flags = TUN_TAP_DEV | TUN_NO_PI | TUN_VNET_HDR;
+   tun = netdev_priv(dev);
+   ret = tun_attach(tun, file);
+   }
+   break;
+
case TUNSETNOCSUM:
/* Disable/Enable checksum */
 
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index c92a291..d3f24d8 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -54,6 +54,9 @@
 #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
 #define TUNGETVNETHDRSZ _IOR('T', 215, int)
 #define TUNSETVNETHDRSZ _IOW('T', 216, int)
+#define TUNATTACHQUEUE  _IOW('T', 217, int)
+#define TUNDETACHQUEUE  _IOW('T', 218, int)
+
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN0x0001

Re: [Qemu-devel] [PATCH v2 03/15] sheepdog: move coroutine send/recv function to generic code

2011-09-17 Thread MORITA Kazutaka

At Fri, 16 Sep 2011 16:25:40 +0200,
Paolo Bonzini wrote:
 
 Outside coroutines, avoid busy waiting on EAGAIN by temporarily
 making the socket blocking.
 
 The API of qemu_recvv/qemu_sendv is slightly different from
 do_readv/do_writev because they do not handle coroutines.  It
 returns the number of bytes written before encountering an
 EAGAIN.  The specificity of yielding on EAGAIN is entirely in
 qemu-coroutine.c.
 
 Reviewed-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  block/sheepdog.c |  225 
 ++
  cutils.c |  177 ++
  qemu-common.h|   30 +++
  3 files changed, 230 insertions(+), 202 deletions(-)

It seems this patch causes a compile error of qemu-ga.

Other things I noticed:

  static int send_req(int sockfd, SheepdogReq *hdr, void *data,
  unsigned int *wlen)
  {
 @@ -691,10 +509,9 @@ static int send_req(int sockfd, SheepdogReq *hdr, void 
 *data,
  iov[1].iov_len = *wlen;
  }
  
 -ret = do_writev(sockfd, iov, sizeof(*hdr) + *wlen, 0);
 -if (ret) {
 +ret = qemu_sendv(sockfd, iov, sizeof(*hdr) + *wlen, 0);

This is wrong because qemu_sendv() may return a smaller value than
(sizeof(*hdr) + *wlen).  We need to do things like qemu_write_full()
here.

 +if (ret  0) {
  error_report(failed to send a req, %s, strerror(errno));
 -ret = -1;
  }
  
  return ret;
 @@ -704,17 +521,19 @@ static int do_req(int sockfd, SheepdogReq *hdr, void 
 *data,
unsigned int *wlen, unsigned int *rlen)
  {
  int ret;
 +struct iovec iov;
  
 +socket_set_block(sockfd);
  ret = send_req(sockfd, hdr, data, wlen);
 -if (ret) {
 -ret = -1;
 +if (ret  0) {
  goto out;
  }
  
 -ret = do_read(sockfd, hdr, sizeof(*hdr));
 -if (ret) {
 +iov.iov_base = hdr;
 +iov.iov_len = sizeof(*hdr);
 +ret = qemu_recvv(sockfd, iov, sizeof(*hdr), 0);

qemu_recvv() may also return a smaller value than sizeof(*hdr) here.

 +if (ret  0) {
  error_report(failed to get a rsp, %s, strerror(errno));
 -ret = -1;
  goto out;
  }
  
 @@ -723,15 +542,17 @@ static int do_req(int sockfd, SheepdogReq *hdr, void 
 *data,
  }
  
  if (*rlen) {
 -ret = do_read(sockfd, data, *rlen);
 -if (ret) {
 +iov.iov_base = data;
 +iov.iov_len = *rlen;
 +ret = qemu_recvv(sockfd, iov, *rlen, 0);

Same here.

 +if (ret  0) {
  error_report(failed to get the data, %s, strerror(errno));
 -ret = -1;
  goto out;
  }
  }
  ret = 0;
  out:
 +socket_set_nonblock(sockfd);
  return ret;
  }
  

[snip]

 +
 +/*
 + * Send/recv data with iovec buffers
 + *
 + * This function send/recv data from/to the iovec buffer directly.
 + * The first `offset' bytes in the iovec buffer are skipped and next
 + * `len' bytes are used.
 + *
 + * For example,
 + *
 + *   do_sendv_recvv(sockfd, iov, len, offset, 1);
 + *
 + * is equal to
 + *
 + *   char *buf = malloc(size);
 + *   iov_to_buf(iov, iovcnt, buf, offset, size);
 + *   send(sockfd, buf, size, 0);
 + *   free(buf);
 + */
 +static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset,
 +  int do_sendv)
 +{
 +int ret, diff, iovlen;
 +struct iovec *last_iov;
 +
 +/* last_iov is inclusive, so count from one.  */
 +iovlen = 1;
 +last_iov = iov;
 +len += offset;
 +
 +while (last_iov-iov_len  len) {
 +len -= last_iov-iov_len;
 +
 +last_iov++;
 +iovlen++;
 +}
 +
 +diff = last_iov-iov_len - len;
 +last_iov-iov_len -= diff;
 +
 +while (iov-iov_len = offset) {
 +offset -= iov-iov_len;
 +
 +iov++;
 +iovlen--;
 +}
 +
 +iov-iov_base = (char *) iov-iov_base + offset;
 +iov-iov_len -= offset;
 +
 +{
 +#ifdef CONFIG_IOVEC
 +struct msghdr msg;
 +memset(msg, 0, sizeof(msg));
 +msg.msg_iov = iov;
 +msg.msg_iovlen = iovlen;
 +
 +do {
 +if (do_sendv) {
 +ret = sendmsg(sockfd, msg, 0);
 +} else {
 +ret = recvmsg(sockfd, msg, 0);
 +}
 +} while (ret == -1  errno == EINTR);
 +#else
 +struct iovec *p = iov;
 +ret = 0;
 +while (iovlen  0) {
 +int rc;
 +if (do_sendv) {
 +rc = send(sockfd, p-iov_base, p-iov_len, 0);
 +} else {
 +rc = qemu_recv(sockfd, p-iov_base, p-iov_len, 0);
 +}
 +if (rc == -1) {
 +if (errno == EINTR) {
 +continue;
 +}
 +if (ret == 0) {
 +ret = -1;
 +}
 +break;
 +}
 +iovlen--, p++;

Re: [Qemu-devel] [PATCH v2 04/15] coroutine-io: handle zero returns from recv

2011-09-17 Thread MORITA Kazutaka

At Fri, 16 Sep 2011 16:25:41 +0200,
Paolo Bonzini wrote:
 
 When the other side is shutdown, read returns zero (writes return EPIPE).
 In this case, care must be taken to avoid infinite loops.  This error
 was already present in sheepdog.
 
 Cc: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  cutils.c |8 +++-
  1 files changed, 7 insertions(+), 1 deletions(-)
 
 diff --git a/cutils.c b/cutils.c
 index b302020..295187f 100644
 --- a/cutils.c
 +++ b/cutils.c
 @@ -501,8 +501,11 @@ static int do_sendv_recvv(int sockfd, struct iovec *iov, 
 int len, int offset,
  }
  break;
  }
 -iovlen--, p++;
 +if (rc == 0) {
 +break;
 +}
  ret += rc;
 +iovlen--, p++;
  }
  #endif
  }
 @@ -567,6 +570,9 @@ int coroutine_fn qemu_co_sendv(int sockfd, struct iovec 
 *iov,
  }
  break;
  }
 +if (ret == 0) {
 +break;
 +}
  total += ret, len -= ret;
  }

When EPIPE is set, write() returns -1 doesn't it?

It looks like qemu_co_recvv() handles a zero return correctly, so I
think this patch is not needed.


Thanks,

Kazutaka

Re: [Qemu-devel] [PATCH] This patch adds a new block driver : iSCSI

2011-09-17 Thread Stefan Hajnoczi

On Fri, Sep 16, 2011 at 05:53:20PM +0200, Christoph Hellwig wrote:
 On Wed, Sep 14, 2011 at 04:50:25PM +0100, Stefan Hajnoczi wrote:
  I think in this case it will not make the code nicer.  Since the
  external iSCSI library is based on callbacks it would be necessary to
  write the coroutines-callbacks adapter functions.  So for example,
  the READ10 command would need a function that can be called in
  coroutine context and yields while libiscsi does the I/O.  When the
  callback is invoked it will re-enter the coroutine.
  
  The area where coroutines are useful in the block layer is for image
  formats.  We already have common coroutines-callback adapter
  functions in block.c so it's possible to write sequential code for
  image formats.  They only need access to block layer functions which
  have already been adapted.  But as soon as you interact with a
  callback-based API from the coroutine, then you need to write an
  adapter yourself.
 
 So you plan on keeping the aio interface around forever?  Qemu with two
 different I/O pathes was already more than painful enough, I don't
 think keeping three, and two of them beeing fairly complex is a good
 idea.

The synchronous interfaces can be converted to the coroutine
interfaces.

The block layer needs a public aio interface because device emulation is
asynchronous/callback-based.  That doesn't mean that BlockDriver needs
aio functions since block.c could transparently set up coroutines.  So
in theory BlockDriver could have only coroutine interfaces.

Doing the aio to coroutine conversion is pretty mechanical, that's why
I'm not afraid of doing it with this iSCSI code later.

Stefan

[Qemu-devel] [PATCH] Remove qemu_host_page_bits

2011-09-17 Thread Stefan Weil

It was introduced with commit 54936004fddc52c321cb3f9a9a51140e782bed5d
as host_page_bits but never used.

Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 cpu-all.h |1 -
 exec.c|4 
 2 files changed, 0 insertions(+), 5 deletions(-)

diff --git a/cpu-all.h b/cpu-all.h
index 3532026..e8143cd 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -290,7 +290,6 @@ extern unsigned long reserved_va;
 
 /* ??? These should be the larger of unsigned long and target_ulong.  */
 extern unsigned long qemu_real_host_page_size;
-extern unsigned long qemu_host_page_bits;
 extern unsigned long qemu_host_page_size;
 extern unsigned long qemu_host_page_mask;
 
diff --git a/exec.c b/exec.c
index 3df6b23..639deae 100644
--- a/exec.c
+++ b/exec.c
@@ -183,7 +183,6 @@ typedef struct PageDesc {
 #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS)
 
 unsigned long qemu_real_host_page_size;
-unsigned long qemu_host_page_bits;
 unsigned long qemu_host_page_size;
 unsigned long qemu_host_page_mask;
 
@@ -275,9 +274,6 @@ static void page_init(void)
 qemu_host_page_size = qemu_real_host_page_size;
 if (qemu_host_page_size  TARGET_PAGE_SIZE)
 qemu_host_page_size = TARGET_PAGE_SIZE;
-qemu_host_page_bits = 0;
-while ((1  qemu_host_page_bits)  qemu_host_page_size)
-qemu_host_page_bits++;
 qemu_host_page_mask = ~(qemu_host_page_size - 1);
 
 #if defined(CONFIG_BSD)  defined(CONFIG_USER_ONLY)
-- 
1.7.2.5

Re: [Qemu-devel] [PATCH 0/5] Only one call output register needed for 64 bit hosts

2011-09-17 Thread Stefan Weil


Am 05.09.2011 11:06, schrieb Stefan Weil:

The number of registers needed for the return value of TCG opcode
INDEX_op_call is calculated in function tcg_gen_callN (nb_rets).

It can be 0 or 1, for 32 bit hosts also 2 (return 64 bit value in
two 32 bit registers).

Some TCG implementations reserve 2 registers although only 1 is used.
The following patches fix this.

[PATCH 1/5] tcg/i386: Only one call output register needed for 64 bit 
hosts
[PATCH 2/5] tcg/ia64: Only one call output register needed for 64 bit 
hosts
[PATCH 3/5] tcg/s390: Only one call output register needed for 64 bit 
hosts
[PATCH 4/5] tcg/sparc: Only one call output register needed for 64 bit 
hosts
[PATCH 5/5] tcg/ppc64: Only one call output register needed for 64 bit 
hosts


Patch 3 was acked by Richard Henderson, so was patch 5 by Malc.

What about the other three patches (i386 / ia64, sparc)? They are all 
similar,

so I think they could also be committed without more reviews.

Thanks,
Stefan Weil

Re: [Qemu-devel] [PATCH v2 03/15] sheepdog: move coroutine send/recv function to generic code

2011-09-17 Thread Paolo Bonzini


On 09/17/2011 08:29 AM, MORITA Kazutaka wrote:

  +#else
  +struct iovec *p = iov;
  +ret = 0;
  +while (iovlen  0) {
  +int rc;
  +if (do_sendv) {
  +rc = send(sockfd, p-iov_base, p-iov_len, 0);
  +} else {
  +rc = qemu_recv(sockfd, p-iov_base, p-iov_len, 0);
  +}
  +if (rc == -1) {
  +if (errno == EINTR) {
  +continue;
  +}
  +if (ret == 0) {
  +ret = -1;
  +}
  +break;
  +}
  +iovlen--, p++;
  +ret += rc;
  +}

This code can be called inside coroutines with a non-blocking fd, so
should we avoid busy waiting?


It doesn't busy wait, it exits with EAGAIN.  I'll squash in here the 
first hunk of patch 4, which is needed.


qemu_co_recvv already handles reads that return zero, unlike sheepdog's 
do_readv_writev.  I probably moved it there inadvertently while moving 
code around to cutils.c, but in order to fix qemu-ga I need to create a 
new file qemu-coroutine-io.c.


Kevin, do you want me to resubmit everything, or are you going to apply 
some more patches to the block branch (5 to 12 should be fine)?


Paolo

Re: [Qemu-devel] [PATCH v2 07/18] omap_gpmc: GPMC_IRQSTATUS is write-one-to-clear

2011-09-17 Thread Peter Maydell

On 17 September 2011 02:08, andrzej zaborowski balr...@gmail.com wrote:
 --- a/hw/omap_gpmc.c
 +++ b/hw/omap_gpmc.c
 @@ -284,7 +284,7 @@ static void omap_gpmc_write(void *opaque, 
 target_phys_addr_t addr,
         break;

     case 0x018:        /* GPMC_IRQSTATUS */
 -        s-irqen = ~value;
 +        s-irqen = ~value;

 Should we be clearing s-irqst here instead of irqen?

Oops, you're right... (IIRC this change was a spotted-while-reading-code
one, not a response to a behavioural issue with the model.)

-- PMM

Re: [Qemu-devel] [PATCH 14/58] device tree: add nop_node

2011-09-17 Thread Blue Swirl

On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote:
 We have a qemu internal abstraction layer on FDT. While I'm not fully 
 convinced
 we need it at all, it's missing the nop_node functionality that we now need
 on e500. So let's add it and think about the general future of that API later.

 Signed-off-by: Alexander Graf ag...@suse.de
 ---
  device_tree.c |   11 +++
  device_tree.h |    1 +
  2 files changed, 12 insertions(+), 0 deletions(-)

 diff --git a/device_tree.c b/device_tree.c
 index 3a224d1..23e89e3 100644
 --- a/device_tree.c
 +++ b/device_tree.c
 @@ -107,3 +107,14 @@ int qemu_devtree_setprop_string(void *fdt, const char 
 *node_path,

     return fdt_setprop_string(fdt, offset, property, string);
  }
 +
 +int qemu_devtree_nop_node(void *fdt, const char *node_path)
 +{
 +    int offset;
 +
 +    offset = fdt_path_offset(fdt, node_path);
 +    if (offset  0)

-EBRACES

 +        return offset;
 +
 +    return fdt_nop_node(fdt, offset);
 +}
 diff --git a/device_tree.h b/device_tree.h
 index cecd98f..76fce5f 100644
 --- a/device_tree.h
 +++ b/device_tree.h
 @@ -22,5 +22,6 @@ int qemu_devtree_setprop_cell(void *fdt, const char 
 *node_path,
                               const char *property, uint32_t val);
  int qemu_devtree_setprop_string(void *fdt, const char *node_path,
                                 const char *property, const char *string);
 +int qemu_devtree_nop_node(void *fdt, const char *node_path);

  #endif /* __DEVICE_TREE_H__ */
 --
 1.6.0.2

Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code

2011-09-17 Thread Blue Swirl

On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote:
 CPUs that are not the boot CPU need to run in spinning code to check if they
 should run off to execute and if so where to jump to. This usually happens
 by leaving secondary CPUs looping and checking if some variable in memory
 changed.

 In an environment like Qemu however we can be more clever. We can just export
 the spin table the primary CPU modifies as MMIO region that would event based
 wake up the respective secondary CPUs. That saves us quite some cycles while
 the secondary CPUs are not up yet.

 So this patch adds a PV device that simply exports the spinning table into the
 guest and thus allows the primary CPU to wake up secondary ones.

On Sparc32, there is no need for a PV device. The CPU is woken up from
halted state with an IPI. Maybe you could use this approach?

 Signed-off-by: Alexander Graf ag...@suse.de

 ---

 v1 - v2:

  - change into MMIO scheme
  - map the secondary NIP instead of 0 1:1
  - only map 64MB for TLB, same as u-boot
  - prepare code for 64-bit spinnings

 v2 - v3:

  - remove r6
  - set MAS2_M
  - map EA 0
  - use second TLB1 entry

 v3 - v4:

  - change to memoryops

 v4 - v5:

  - fix endianness bugs
 ---
  Makefile.target        |    2 +-
  hw/ppce500_mpc8544ds.c |   33 -
  hw/ppce500_spin.c      |  186 
 
  3 files changed, 216 insertions(+), 5 deletions(-)
  create mode 100644 hw/ppce500_spin.c

 diff --git a/Makefile.target b/Makefile.target
 index 2ed9099..3f689ce 100644
 --- a/Makefile.target
 +++ b/Makefile.target
 @@ -247,7 +247,7 @@ endif
  obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
  obj-ppc-y += ppc440.o ppc440_bamboo.o
  # PowerPC E500 boards
 -obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o
 +obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o ppce500_spin.o
  # PowerPC 440 Xilinx ML507 reference board.
  obj-ppc-y += virtex_ml507.o
  obj-ppc-$(CONFIG_KVM) += kvm_ppc.o
 diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c
 index 9379624..3b8b449 100644
 --- a/hw/ppce500_mpc8544ds.c
 +++ b/hw/ppce500_mpc8544ds.c
 @@ -49,6 +49,7 @@
  #define MPC8544_PCI_IO             0xE100
  #define MPC8544_PCI_IOLEN          0x1
  #define MPC8544_UTIL_BASE          (MPC8544_CCSRBAR_BASE + 0xe)
 +#define MPC8544_SPIN_BASE          0xEF00

  struct boot_info
  {
 @@ -164,6 +165,18 @@ static void mmubooke_create_initial_mapping(CPUState 
 *env,
     tlb-mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX;
  }

 +static void mpc8544ds_cpu_reset_sec(void *opaque)
 +{
 +    CPUState *env = opaque;
 +
 +    cpu_reset(env);
 +
 +    /* Secondary CPU starts in halted state for now. Needs to change when
 +       implementing non-kernel boot. */
 +    env-halted = 1;
 +    env-exception_index = EXCP_HLT;
 +}
 +
  static void mpc8544ds_cpu_reset(void *opaque)
  {
     CPUState *env = opaque;
 @@ -172,6 +185,7 @@ static void mpc8544ds_cpu_reset(void *opaque)
     cpu_reset(env);

     /* Set initial guest state. */
 +    env-halted = 0;
     env-gpr[1] = (1620) - 8;
     env-gpr[3] = bi-dt_base;
     env-nip = bi-entry;
 @@ -199,7 +213,6 @@ static void mpc8544ds_init(ram_addr_t ram_size,
     unsigned int pci_irq_nrs[4] = {1, 2, 3, 4};
     qemu_irq **irqs, *mpic;
     DeviceState *dev;
 -    struct boot_info *boot_info;
     CPUState *firstenv = NULL;

     /* Setup CPUs */
 @@ -234,9 +247,16 @@ static void mpc8544ds_init(ram_addr_t ram_size,
         env-spr[SPR_40x_TCR] = 1  26;

         /* Register reset handler */
 -        boot_info = g_malloc0(sizeof(struct boot_info));
 -        qemu_register_reset(mpc8544ds_cpu_reset, env);
 -        env-load_info = boot_info;
 +        if (!i) {
 +            /* Primary CPU */
 +            struct boot_info *boot_info;
 +            boot_info = g_malloc0(sizeof(struct boot_info));
 +            qemu_register_reset(mpc8544ds_cpu_reset, env);
 +            env-load_info = boot_info;
 +        } else {
 +            /* Secondary CPUs */
 +            qemu_register_reset(mpc8544ds_cpu_reset_sec, env);
 +        }
     }

     env = firstenv;
 @@ -289,6 +309,9 @@ static void mpc8544ds_init(ram_addr_t ram_size,
         }
     }

 +    /* Register spinning region */
 +    sysbus_create_simple(e500-spin, MPC8544_SPIN_BASE, NULL);
 +
     /* Load kernel. */
     if (kernel_filename) {
         kernel_size = load_uimage(kernel_filename, entry, loadaddr, NULL);
 @@ -321,6 +344,8 @@ static void mpc8544ds_init(ram_addr_t ram_size,

     /* If we're loading a kernel directly, we must load the device tree too. 
 */
     if (kernel_filename) {
 +        struct boot_info *boot_info;
 +
  #ifndef CONFIG_FDT
         cpu_abort(env, Compiled without FDT support - can't load kernel\n);
  #endif
 diff --git a/hw/ppce500_spin.c b/hw/ppce500_spin.c
 new file mode 100644
 index 000..38451ac
 --- /dev/null
 +++ b/hw/ppce500_spin.c
 @@ -0,0 +1,186 @@
 +#include hw.h

Re: [Qemu-devel] [PATCH 33/58] KVM: update kernel headers

2011-09-17 Thread Blue Swirl

On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote:
 This patch updates the kvm kernel headers to the latest version.

 Signed-off-by: Alexander Graf ag...@suse.de
 ---
  linux-headers/asm-powerpc/kvm.h  |   23 +++
  linux-headers/asm-x86/kvm_para.h |   14 ++
  linux-headers/linux/kvm.h        |   25 +
  linux-headers/linux/kvm_para.h   |    1 +
  4 files changed, 55 insertions(+), 8 deletions(-)

 diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h
 index 777d307..579e219 100644
 --- a/linux-headers/asm-powerpc/kvm.h
 +++ b/linux-headers/asm-powerpc/kvm.h
 @@ -22,6 +22,10 @@

  #include linux/types.h

 +/* Select powerpc specific features in linux/kvm.h */
 +#define __KVM_HAVE_SPAPR_TCE
 +#define __KVM_HAVE_PPC_SMT
 +
  struct kvm_regs {
        __u64 pc;
        __u64 cr;
 @@ -145,6 +149,12 @@ struct kvm_regs {
  #define KVM_SREGS_E_UPDATE_DBSR                (1  3)

  /*
 + * Book3S special bits to indicate contents in the struct by maintaining
 + * backwards compatibility with older structs. If adding a new field,
 + * please make sure to add a flag for that new field */
 +#define KVM_SREGS_S_HIOR               (1  0)
 +
 +/*
  * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
  * previous KVM_GET_REGS.
  *
 @@ -169,6 +179,8 @@ struct kvm_sregs {
                                __u64 ibat[8];
                                __u64 dbat[8];
                        } ppc32;
 +                       __u64 flags; /* KVM_SREGS_S_ */
 +                       __u64 hior;
                } s;
                struct {
                        union {
 @@ -272,4 +284,15 @@ struct kvm_guest_debug_arch {
  #define KVM_INTERRUPT_UNSET    -2U
  #define KVM_INTERRUPT_SET_LEVEL        -3U

 +/* for KVM_CAP_SPAPR_TCE */
 +struct kvm_create_spapr_tce {
 +       __u64 liobn;
 +       __u32 window_size;
 +};
 +
 +/* for KVM_ALLOCATE_RMA */
 +struct kvm_allocate_rma {
 +       __u64 rma_size;
 +};
 +
  #endif /* __LINUX_KVM_POWERPC_H */
 diff --git a/linux-headers/asm-x86/kvm_para.h 
 b/linux-headers/asm-x86/kvm_para.h
 index 834d71e..f2ac46a 100644
 --- a/linux-headers/asm-x86/kvm_para.h
 +++ b/linux-headers/asm-x86/kvm_para.h
 @@ -21,6 +21,7 @@
  */
  #define KVM_FEATURE_CLOCKSOURCE2        3
  #define KVM_FEATURE_ASYNC_PF           4
 +#define KVM_FEATURE_STEAL_TIME         5

  /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
 @@ -30,10 +31,23 @@
  #define MSR_KVM_WALL_CLOCK  0x11
  #define MSR_KVM_SYSTEM_TIME 0x12

 +#define KVM_MSR_ENABLED 1
  /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
  #define MSR_KVM_WALL_CLOCK_NEW  0x4b564d00
  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 +#define MSR_KVM_STEAL_TIME  0x4b564d03
 +
 +struct kvm_steal_time {
 +       __u64 steal;
 +       __u32 version;
 +       __u32 flags;
 +       __u32 pad[12];
 +};
 +
 +#define KVM_STEAL_ALIGNMENT_BITS 5
 +#define KVM_STEAL_VALID_BITS ((-1ULL  (KVM_STEAL_ALIGNMENT_BITS + 1)))
 +#define KVM_STEAL_RESERVED_MASK (((1  KVM_STEAL_ALIGNMENT_BITS) - 1 )  1)

  #define KVM_MAX_MMU_OP_BATCH           32

 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
 index fc63b73..2062375 100644
 --- a/linux-headers/linux/kvm.h
 +++ b/linux-headers/linux/kvm.h
 @@ -161,6 +161,7 @@ struct kvm_pit_config {
  #define KVM_EXIT_NMI              16
  #define KVM_EXIT_INTERNAL_ERROR   17
  #define KVM_EXIT_OSI              18
 +#define KVM_EXIT_PAPR_HCALL      19

  /* For KVM_EXIT_INTERNAL_ERROR */
  #define KVM_INTERNAL_ERROR_EMULATION 1
 @@ -264,6 +265,11 @@ struct kvm_run {
                struct {
                        __u64 gprs[32];
                } osi;
 +               struct {
 +                       __u64 nr;
 +                       __u64 ret;
 +                       __u64 args[9];
 +               } papr_hcall;
                /* Fix the size of the union. */
                char padding[256];
        };
 @@ -457,7 +463,7 @@ struct kvm_ppc_pvinfo {
  #define KVM_CAP_VAPIC 6
  #define KVM_CAP_EXT_CPUID 7
  #define KVM_CAP_CLOCKSOURCE 8
 -#define KVM_CAP_NR_VCPUS 9       /* returns max vcpus per vm */
 +#define KVM_CAP_NR_VCPUS 9       /* returns recommended max vcpus per vm */
  #define KVM_CAP_NR_MEMSLOTS 10   /* returns max memory slots per vm */
  #define KVM_CAP_PIT 11
  #define KVM_CAP_NOP_IO_DELAY 12
 @@ -544,6 +550,12 @@ struct kvm_ppc_pvinfo {
  #define KVM_CAP_TSC_CONTROL 60
  #define KVM_CAP_GET_TSC_KHZ 61
  #define KVM_CAP_PPC_BOOKE_SREGS 62
 +#define KVM_CAP_SPAPR_TCE 63
 +#define KVM_CAP_PPC_SMT 64
 +#define KVM_CAP_PPC_RMA        65
 +#define KVM_CAP_MAX_VCPUS 66       /* returns max vcpus per vm */
 +#define KVM_CAP_PPC_HIOR 67
 +#define KVM_CAP_PPC_PAPR 68

  #ifdef KVM_CAP_IRQ_ROUTING

 @@ -746,6 +758,9 @@ struct kvm_clock_data {
  /* Available with

Re: [Qemu-devel] [PATCH 47/58] Implement POWER7's CFAR in TCG

2011-09-17 Thread Blue Swirl

On Wed, Sep 14, 2011 at 8:43 AM, Alexander Graf ag...@suse.de wrote:
 From: David Gibson da...@gibson.dropbear.id.au

 This patch implements support for the CFAR SPR on POWER7 (Come From
 Address Register), which snapshots the PC value at the time of a branch or
 an rfid.  The latest powerpc-next kernel also catches it and can show it in
 xmon or in the signal frames.

 This works well enough to let recent kernels boot (which otherwise oops
 on the CFAR access).  It hasn't been tested enough to be confident that the
 CFAR values are actually accurate, but one thing at a time.

This looks accurate at least for the cases covered.

A higher performance implementation could be to only update the
register lazily when the SPR is read, in most other times CFAR would
be only stored to DisasContext.

 Signed-off-by: Ben Herrenschmidt b...@kernel.crashing.org
 Signed-off-by: David Gibson da...@gibson.dropbear.id.au
 Signed-off-by: Alexander Graf ag...@suse.de
 ---
  target-ppc/cpu.h            |    8 
  target-ppc/translate.c      |   28 
  target-ppc/translate_init.c |   23 ++-
  3 files changed, 58 insertions(+), 1 deletions(-)

 diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
 index 32706df..3f4af22 100644
 --- a/target-ppc/cpu.h
 +++ b/target-ppc/cpu.h
 @@ -555,6 +555,8 @@ enum {
     /* Decrementer clock: RTC clock (POWER, 601) or bus clock                
 */
     POWERPC_FLAG_RTC_CLK  = 0x0001,
     POWERPC_FLAG_BUS_CLK  = 0x0002,
 +    /* Has CFAR                                                              
 */
 +    POWERPC_FLAG_CFAR     = 0x0004,
  };

  /*/
 @@ -872,6 +874,10 @@ struct CPUPPCState {
     target_ulong ctr;
     /* condition register */
     uint32_t crf[8];
 +#if defined(TARGET_PPC64)
 +    /* CFAR */
 +    target_ulong cfar;
 +#endif
     /* XER */
     target_ulong xer;
     /* Reservation address */
 @@ -1204,6 +1210,7 @@ static inline void cpu_clone_regs(CPUState *env, 
 target_ulong newsp)
  #define SPR_601_UDECR         (0x006)
  #define SPR_LR                (0x008)
  #define SPR_CTR               (0x009)
 +#define SPR_DSCR              (0x011)
  #define SPR_DSISR             (0x012)
  #define SPR_DAR               (0x013) /* DAE for PowerPC 601 */
  #define SPR_601_RTCU          (0x014)
 @@ -1212,6 +1219,7 @@ static inline void cpu_clone_regs(CPUState *env, 
 target_ulong newsp)
  #define SPR_SDR1              (0x019)
  #define SPR_SRR0              (0x01A)
  #define SPR_SRR1              (0x01B)
 +#define SPR_CFAR              (0x01C)
  #define SPR_AMR               (0x01D)
  #define SPR_BOOKE_PID         (0x030)
  #define SPR_BOOKE_DECAR       (0x036)
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index 4277460..1e362fc 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -69,6 +69,9 @@ static TCGv cpu_nip;
  static TCGv cpu_msr;
  static TCGv cpu_ctr;
  static TCGv cpu_lr;
 +#if defined(TARGET_PPC64)
 +static TCGv cpu_cfar;
 +#endif
  static TCGv cpu_xer;
  static TCGv cpu_reserve;
  static TCGv_i32 cpu_fpscr;
 @@ -154,6 +157,11 @@ void ppc_translate_init(void)
     cpu_lr = tcg_global_mem_new(TCG_AREG0,
                                 offsetof(CPUState, lr), lr);

 +#if defined(TARGET_PPC64)
 +    cpu_cfar = tcg_global_mem_new(TCG_AREG0,
 +                                  offsetof(CPUState, cfar), cfar);
 +#endif
 +
     cpu_xer = tcg_global_mem_new(TCG_AREG0,
                                  offsetof(CPUState, xer), xer);

 @@ -187,6 +195,7 @@ typedef struct DisasContext {
     int le_mode;
  #if defined(TARGET_PPC64)
     int sf_mode;
 +    int has_cfar;
  #endif
     int fpu_enabled;
     int altivec_enabled;
 @@ -3345,6 +3354,14 @@ static inline void gen_qemu_st32fiw(DisasContext *ctx, 
 TCGv_i64 arg1, TCGv arg2)
  /* stfiwx */
  GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX);

 +static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip)
 +{
 +#if defined(TARGET_PPC64)
 +    if (ctx-has_cfar)

Braces missing, please use checkpatch.pl.

 +        tcg_gen_movi_tl(cpu_cfar, nip);
 +#endif
 +}
 +
  /***                                Branch                                 
 ***/
  static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest)
  {
 @@ -3407,6 +3424,7 @@ static void gen_b(DisasContext *ctx)
         target = li;
     if (LK(ctx-opcode))
         gen_setlr(ctx, ctx-nip);
 +    gen_update_cfar(ctx, ctx-nip);
     gen_goto_tb(ctx, 0, target);
  }

 @@ -3469,6 +3487,7 @@ static inline void gen_bcond(DisasContext *ctx, int 
 type)
         }
         tcg_temp_free_i32(temp);
     }
 +    gen_update_cfar(ctx, ctx-nip);
     if (type == BCOND_IM) {
         target_ulong li = (target_long)((int16_t)(BD(ctx-opcode)));
         if (likely(AA(ctx-opcode) == 0)) {
 @@ -3580,6 +3599,7 @@ static void gen_rfi(DisasContext *ctx)
         gen_inval_exception(ctx,

Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code

2011-09-17 Thread Alexander Graf


Am 17.09.2011 um 18:58 schrieb Blue Swirl blauwir...@gmail.com:

 On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote:
 CPUs that are not the boot CPU need to run in spinning code to check if they
 should run off to execute and if so where to jump to. This usually happens
 by leaving secondary CPUs looping and checking if some variable in memory
 changed.
 
 In an environment like Qemu however we can be more clever. We can just export
 the spin table the primary CPU modifies as MMIO region that would event based
 wake up the respective secondary CPUs. That saves us quite some cycles while
 the secondary CPUs are not up yet.
 
 So this patch adds a PV device that simply exports the spinning table into 
 the
 guest and thus allows the primary CPU to wake up secondary ones.
 
 On Sparc32, there is no need for a PV device. The CPU is woken up from
 halted state with an IPI. Maybe you could use this approach?

The way it's done here is defined by u-boot and now also nailed down in the 
ePAPR architecture spec. While alternatives might be more appealing, this is 
how guests work today :).

Alex

Re: [Qemu-devel] [PATCH v2 03/15] sheepdog: move coroutine send/recv function to generic code

2011-09-17 Thread MORITA Kazutaka

At Sat, 17 Sep 2011 16:49:22 +0200,
Paolo Bonzini wrote:
 
 On 09/17/2011 08:29 AM, MORITA Kazutaka wrote:
+#else
+struct iovec *p = iov;
+ret = 0;
+while (iovlen  0) {
+int rc;
+if (do_sendv) {
+rc = send(sockfd, p-iov_base, p-iov_len, 0);
+} else {
+rc = qemu_recv(sockfd, p-iov_base, p-iov_len, 0);
+}
+if (rc == -1) {
+if (errno == EINTR) {
+continue;
+}
+if (ret == 0) {
+ret = -1;
+}
+break;
+}
+iovlen--, p++;
+ret += rc;
+}
  This code can be called inside coroutines with a non-blocking fd, so
  should we avoid busy waiting?
 
 It doesn't busy wait, it exits with EAGAIN.  I'll squash in here the 

Oops, you're right.  Sorry for the noise.

Thanks,

Kazutaka


 first hunk of patch 4, which is needed.
 
 qemu_co_recvv already handles reads that return zero, unlike sheepdog's 
 do_readv_writev.  I probably moved it there inadvertently while moving 
 code around to cutils.c, but in order to fix qemu-ga I need to create a 
 new file qemu-coroutine-io.c.
 
 Kevin, do you want me to resubmit everything, or are you going to apply 
 some more patches to the block branch (5 to 12 should be fine)?
 
 Paolo

Re: [Qemu-devel] [PATCH 33/58] KVM: update kernel headers

2011-09-17 Thread Alexander Graf


Am 17.09.2011 um 18:59 schrieb Blue Swirl blauwir...@gmail.com:

 On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote:
 This patch updates the kvm kernel headers to the latest version.
 
 Signed-off-by: Alexander Graf ag...@suse.de
 ---
  linux-headers/asm-powerpc/kvm.h  |   23 +++
  linux-headers/asm-x86/kvm_para.h |   14 ++
  linux-headers/linux/kvm.h|   25 +
  linux-headers/linux/kvm_para.h   |1 +
  4 files changed, 55 insertions(+), 8 deletions(-)
 
 diff --git a/linux-headers/asm-powerpc/kvm.h 
 b/linux-headers/asm-powerpc/kvm.h
 index 777d307..579e219 100644
 --- a/linux-headers/asm-powerpc/kvm.h
 +++ b/linux-headers/asm-powerpc/kvm.h
 @@ -22,6 +22,10 @@
 
  #include linux/types.h
 
 +/* Select powerpc specific features in linux/kvm.h */
 +#define __KVM_HAVE_SPAPR_TCE
 +#define __KVM_HAVE_PPC_SMT
 +
  struct kvm_regs {
__u64 pc;
__u64 cr;
 @@ -145,6 +149,12 @@ struct kvm_regs {
  #define KVM_SREGS_E_UPDATE_DBSR(1  3)
 
  /*
 + * Book3S special bits to indicate contents in the struct by maintaining
 + * backwards compatibility with older structs. If adding a new field,
 + * please make sure to add a flag for that new field */
 +#define KVM_SREGS_S_HIOR   (1  0)
 +
 +/*
  * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
  * previous KVM_GET_REGS.
  *
 @@ -169,6 +179,8 @@ struct kvm_sregs {
__u64 ibat[8];
__u64 dbat[8];
} ppc32;
 +   __u64 flags; /* KVM_SREGS_S_ */
 +   __u64 hior;
} s;
struct {
union {
 @@ -272,4 +284,15 @@ struct kvm_guest_debug_arch {
  #define KVM_INTERRUPT_UNSET-2U
  #define KVM_INTERRUPT_SET_LEVEL-3U
 
 +/* for KVM_CAP_SPAPR_TCE */
 +struct kvm_create_spapr_tce {
 +   __u64 liobn;
 +   __u32 window_size;
 +};
 +
 +/* for KVM_ALLOCATE_RMA */
 +struct kvm_allocate_rma {
 +   __u64 rma_size;
 +};
 +
  #endif /* __LINUX_KVM_POWERPC_H */
 diff --git a/linux-headers/asm-x86/kvm_para.h 
 b/linux-headers/asm-x86/kvm_para.h
 index 834d71e..f2ac46a 100644
 --- a/linux-headers/asm-x86/kvm_para.h
 +++ b/linux-headers/asm-x86/kvm_para.h
 @@ -21,6 +21,7 @@
  */
  #define KVM_FEATURE_CLOCKSOURCE2 3
  #define KVM_FEATURE_ASYNC_PF   4
 +#define KVM_FEATURE_STEAL_TIME 5
 
  /* The last 8 bits are used to indicate how to interpret the flags field
  * in pvclock structure. If no bits are set, all flags are ignored.
 @@ -30,10 +31,23 @@
  #define MSR_KVM_WALL_CLOCK  0x11
  #define MSR_KVM_SYSTEM_TIME 0x12
 
 +#define KVM_MSR_ENABLED 1
  /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */
  #define MSR_KVM_WALL_CLOCK_NEW  0x4b564d00
  #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
  #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
 +#define MSR_KVM_STEAL_TIME  0x4b564d03
 +
 +struct kvm_steal_time {
 +   __u64 steal;
 +   __u32 version;
 +   __u32 flags;
 +   __u32 pad[12];
 +};
 +
 +#define KVM_STEAL_ALIGNMENT_BITS 5
 +#define KVM_STEAL_VALID_BITS ((-1ULL  (KVM_STEAL_ALIGNMENT_BITS + 1)))
 +#define KVM_STEAL_RESERVED_MASK (((1  KVM_STEAL_ALIGNMENT_BITS) - 1 )  
 1)
 
  #define KVM_MAX_MMU_OP_BATCH   32
 
 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
 index fc63b73..2062375 100644
 --- a/linux-headers/linux/kvm.h
 +++ b/linux-headers/linux/kvm.h
 @@ -161,6 +161,7 @@ struct kvm_pit_config {
  #define KVM_EXIT_NMI  16
  #define KVM_EXIT_INTERNAL_ERROR   17
  #define KVM_EXIT_OSI  18
 +#define KVM_EXIT_PAPR_HCALL  19
 
  /* For KVM_EXIT_INTERNAL_ERROR */
  #define KVM_INTERNAL_ERROR_EMULATION 1
 @@ -264,6 +265,11 @@ struct kvm_run {
struct {
__u64 gprs[32];
} osi;
 +   struct {
 +   __u64 nr;
 +   __u64 ret;
 +   __u64 args[9];
 +   } papr_hcall;
/* Fix the size of the union. */
char padding[256];
};
 @@ -457,7 +463,7 @@ struct kvm_ppc_pvinfo {
  #define KVM_CAP_VAPIC 6
  #define KVM_CAP_EXT_CPUID 7
  #define KVM_CAP_CLOCKSOURCE 8
 -#define KVM_CAP_NR_VCPUS 9   /* returns max vcpus per vm */
 +#define KVM_CAP_NR_VCPUS 9   /* returns recommended max vcpus per vm */
  #define KVM_CAP_NR_MEMSLOTS 10   /* returns max memory slots per vm */
  #define KVM_CAP_PIT 11
  #define KVM_CAP_NOP_IO_DELAY 12
 @@ -544,6 +550,12 @@ struct kvm_ppc_pvinfo {
  #define KVM_CAP_TSC_CONTROL 60
  #define KVM_CAP_GET_TSC_KHZ 61
  #define KVM_CAP_PPC_BOOKE_SREGS 62
 +#define KVM_CAP_SPAPR_TCE 63
 +#define KVM_CAP_PPC_SMT 64
 +#define KVM_CAP_PPC_RMA65
 +#define KVM_CAP_MAX_VCPUS 66   /* returns max vcpus per vm */
 +#define KVM_CAP_PPC_HIOR 67
 +#define KVM_CAP_PPC_PAPR 68
 
  #ifdef

Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code

2011-09-17 Thread Blue Swirl

On Sat, Sep 17, 2011 at 5:15 PM, Alexander Graf ag...@suse.de wrote:

 Am 17.09.2011 um 18:58 schrieb Blue Swirl blauwir...@gmail.com:

 On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote:
 CPUs that are not the boot CPU need to run in spinning code to check if they
 should run off to execute and if so where to jump to. This usually happens
 by leaving secondary CPUs looping and checking if some variable in memory
 changed.

 In an environment like Qemu however we can be more clever. We can just 
 export
 the spin table the primary CPU modifies as MMIO region that would event 
 based
 wake up the respective secondary CPUs. That saves us quite some cycles while
 the secondary CPUs are not up yet.

 So this patch adds a PV device that simply exports the spinning table into 
 the
 guest and thus allows the primary CPU to wake up secondary ones.

 On Sparc32, there is no need for a PV device. The CPU is woken up from
 halted state with an IPI. Maybe you could use this approach?

 The way it's done here is defined by u-boot and now also nailed down in the 
 ePAPR architecture spec. While alternatives might be more appealing, this is 
 how guests work today :).

OK. I hoped that there were no implementations yet. The header (btw
missing) should point to the spec.

Re: [Qemu-devel] [PATCH 06/14] qdev: add ability to do QOM-style derived naming

2011-09-17 Thread Blue Swirl

On Fri, Sep 16, 2011 at 4:00 PM, Anthony Liguori aligu...@us.ibm.com wrote:
 By using a prefix of :: in the name, we can safely derive the composed 
 device
 name from the parent device and busses name.  For instance, if the ::i440fx
 device created a device named piix3, it would look like this:

  static void i440fx_initfn(...)
  {
    s-piix3 = qdev_create(PIIX3, ::piix3);
    ...

 The resulting device would be named ::i440fx::i440fx.0::piix3.  The reason 
 for
 the middle ::i440fx.0 blob is that there are two levels of the tree 
 hierarchy
 here and the bus level already has it's name derived from the parent device.

It could make sense to name the intermediate level by bus type, like
::i440fx::pci.0::piix3.

 We'll eliminate the bus level of the hierarchy in due time, but for now we 
 have
 to just live with the ugly names.

 This patch lets qdev names be specified as a printf style format string which 
 is
 convenient for creating devices like ::smbus-eeprom[%d].

 Signed-off-by: Anthony Liguori aligu...@us.ibm.com
 ---
  hw/qdev.c |   79 +++-
  hw/qdev.h |    8 -
  2 files changed, 78 insertions(+), 9 deletions(-)

 diff --git a/hw/qdev.c b/hw/qdev.c
 index 3096667..6bf6650 100644
 --- a/hw/qdev.c
 +++ b/hw/qdev.c
 @@ -88,9 +88,10 @@ static DeviceInfo *qdev_find_info(BusInfo *bus_info, const 
 char *name)
     return NULL;
  }

 -static DeviceState *qdev_create_from_info(BusState *bus, DeviceInfo *info, 
 const char *id)
 +static DeviceState *qdev_create_from_infov(BusState *bus, DeviceInfo *info, 
 const char *id, va_list ap)
  {
     DeviceState *dev;
 +    char *name = NULL;

     assert(bus-info == info-bus_info);
     dev = g_malloc0(info-size);
 @@ -107,18 +108,50 @@ static DeviceState *qdev_create_from_info(BusState 
 *bus, DeviceInfo *info, const
     }
     dev-instance_id_alias = -1;
     dev-state = DEV_STATE_CREATED;
 -    dev-id = g_strdup(id);
 +
 +    if (id) {
 +        name = g_strdup_vprintf(id, ap);
 +        if (name[0] == ':'  name[1] == ':') {
 +            const char *parent_bus, *parent_device;
 +            char *full_name;
 +
 +            if (dev-parent_bus  dev-parent_bus-parent) {
 +                parent_device = dev-parent_bus-parent-id;
 +                parent_bus = dev-parent_bus-name;
 +
 +                full_name = g_strdup_printf(%s%s%s,
 +                                            dev-parent_bus-parent-id,
 +                                            dev-parent_bus-name,
 +                                            name);
 +                g_free(name);
 +                name = full_name;
 +            }
 +        }
 +    }
 +    dev-id = name;
 +    return dev;
 +}
 +
 +static DeviceState *qdev_create_from_info(BusState *bus, DeviceInfo *info, 
 const char *id, ...)
 +{
 +    DeviceState *dev;
 +    va_list ap;
 +
 +    va_start(ap, id);
 +    dev = qdev_create_from_infov(bus, info, id, ap);
 +    va_end(ap);
 +
     return dev;
  }

  /* Create a new device.  This only initializes the device state structure
    and allows properties to be set.  qdev_init should be called to
    initialize the actual device emulation.  */
 -DeviceState *qdev_create(BusState *bus, const char *name, const char *id)
 +DeviceState *qdev_createv(BusState *bus, const char *name, const char *id, 
 va_list ap)
  {
     DeviceState *dev;

 -    dev = qdev_try_create(bus, name, id);
 +    dev = qdev_try_createv(bus, name, id, ap);
     if (!dev) {
         if (bus) {
             hw_error(Unknown device '%s' for bus '%s'\n, name,
 @@ -131,7 +164,19 @@ DeviceState *qdev_create(BusState *bus, const char 
 *name, const char *id)
     return dev;
  }

 -DeviceState *qdev_try_create(BusState *bus, const char *name, const char *id)
 +DeviceState *qdev_create(BusState *bus, const char *name, const char *id, 
 ...)
 +{
 +    DeviceState *dev;
 +    va_list ap;
 +
 +    va_start(ap, id);
 +    dev = qdev_createv(bus, name, id, ap);
 +    va_end(ap);
 +
 +    return dev;
 +}
 +
 +DeviceState *qdev_try_createv(BusState *bus, const char *name, const char 
 *id, va_list ap)
  {
     DeviceInfo *info;

 @@ -144,7 +189,19 @@ DeviceState *qdev_try_create(BusState *bus, const char 
 *name, const char *id)
         return NULL;
     }

 -    return qdev_create_from_info(bus, info, id);
 +    return qdev_create_from_infov(bus, info, id, ap);
 +}
 +
 +DeviceState *qdev_try_create(BusState *bus, const char *name, const char 
 *id, ...)
 +{
 +    DeviceState *dev;
 +    va_list ap;
 +
 +    va_start(ap, id);
 +    dev = qdev_try_createv(bus, name, id, ap);
 +    va_end(ap);
 +
 +    return dev;
  }

  static void qdev_print_devinfo(DeviceInfo *info)
 @@ -231,6 +288,7 @@ DeviceState *qdev_device_add(QemuOpts *opts)
     DeviceInfo *info;
     DeviceState *qdev;
     BusState *bus;
 +    const char *id;

     driver = qemu_opt_get(opts, driver);
     if (!driver) {
 @@ -271,8 +329,15 @@ DeviceState *qdev_device_add(QemuOpts *opts)
         return

Re: [Qemu-devel] [PATCH 00/14] qdev: assign unique names to all devices (part 1)

2011-09-17 Thread Blue Swirl

On Fri, Sep 16, 2011 at 4:00 PM, Anthony Liguori aligu...@us.ibm.com wrote:
 This series introduces an infrastructure to remove anonymous devices from 
 qdev.
 Anonymous devices are one of the big gaps between qdev and QOM so removing is
 a prerequisite to incrementally merging QOM.

 Besides the infrastructure, I also converted almost all of the possible PC
 devices to have unique names.  Please not that naming is not a property of
 devices but rather of the thing that creates the devices (usually machines).

 The names are ugly but this is because of the alternating device/bus hierarchy
 in qdev.  For now, the names use '::' as deliminators but I think Jan has
 convinced me that down the road, we should use '/' as a deliminator such that
 the resulting names are actually valid paths (using a canonical path format).

The patches look fine to me (assuming s/::/\//g).

Re: [Qemu-devel] [PATCH] Makefile: Fix broken build

2011-09-17 Thread Blue Swirl

Thanks, applied.

On Fri, Sep 16, 2011 at 7:50 PM, Stefan Weil w...@mail.berlios.de wrote:
 make -C mybuilddir no longer works (regression caused by commit)
 388d475815c23901010a25c845eb078d47ee0740.

 PWD is the directory of the caller (not mybuilddir),
 so BUILD_DIR is set to the wrong value.

 GNU make sets CURDIR to the correct value.
 Use this macro instead of PWD.

 Cc: Lluís Vilanova vilan...@ac.upc.edu
 Cc: Anthony Liguori aligu...@us.ibm.com
 Signed-off-by: Stefan Weil w...@mail.berlios.de
 ---
  Makefile |    4 ++--
  1 files changed, 2 insertions(+), 2 deletions(-)

 diff --git a/Makefile b/Makefile
 index 57cc399..a211158 100644
 --- a/Makefile
 +++ b/Makefile
 @@ -1,7 +1,7 @@
  # Makefile for QEMU.

 -# Always point to the root of the build tree
 -BUILD_DIR=$(PWD)
 +# Always point to the root of the build tree (needs GNU make).
 +BUILD_DIR=$(CURDIR)

  GENERATED_HEADERS = config-host.h trace.h qemu-options.def
  ifeq ($(TRACE_BACKEND),dtrace)
 --
 1.7.2.5

Re: [Qemu-devel] [PATCH 0/4] Remove trailing double quote limitation and add virtio_set_status trace event

2011-09-17 Thread Blue Swirl

Thanks, applied all.

On Tue, Sep 13, 2011 at 12:34 PM, Stefan Hajnoczi
stefa...@linux.vnet.ibm.com wrote:
 This series removes the tracetool parser limitation that format strings must
 begin and end with double quotes.  In practice this means we need to work
 around PRI*64 usage by adding dummy  at the end of the line.  It's fairly
 easy to solve this parser limitation and do away with the workarounds.

 While we're at it, also add the virtio_set_status() trace event to properly
 follow the lifecycle of virtio devices.

  docs/tracing.txt  |    5 +
  hw/virtio.c       |   10 ++
  hw/virtio.h       |    9 +
  scripts/tracetool |   20 +---
  trace-events      |   37 +++--
  5 files changed, 44 insertions(+), 37 deletions(-)

Re: [Qemu-devel] [PATCH] target-i386: Fix several SSE3 instructions.

2011-09-17 Thread Blue Swirl

Thanks, applied.

On Fri, Sep 16, 2011 at 3:29 PM, Max Reitz m...@tyndur.org wrote:
 haddp[sd], hsubp[sd] and addsubp[sd] operate on floats, thus it is
 necessary to use the appropriate floating point calculation functions.
 If this is not done, those functions operate merely on integers, which
 is not correct.

 Signed-off-by: Max Reitz m...@tyndur.org
 ---
  target-i386/ops_sse.h |   36 ++--
  1 files changed, 18 insertions(+), 18 deletions(-)

 diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
 index 703be99..aa41d25 100644
 --- a/target-i386/ops_sse.h
 +++ b/target-i386/ops_sse.h
 @@ -859,51 +859,51 @@ void helper_insertq_i(XMMReg *d, int index, int length)
  void helper_haddps(XMMReg *d, XMMReg *s)
  {
     XMMReg r;
 -    r.XMM_S(0) = d-XMM_S(0) + d-XMM_S(1);
 -    r.XMM_S(1) = d-XMM_S(2) + d-XMM_S(3);
 -    r.XMM_S(2) = s-XMM_S(0) + s-XMM_S(1);
 -    r.XMM_S(3) = s-XMM_S(2) + s-XMM_S(3);
 +    r.XMM_S(0) = float32_add(d-XMM_S(0), d-XMM_S(1), env-sse_status);
 +    r.XMM_S(1) = float32_add(d-XMM_S(2), d-XMM_S(3), env-sse_status);
 +    r.XMM_S(2) = float32_add(s-XMM_S(0), s-XMM_S(1), env-sse_status);
 +    r.XMM_S(3) = float32_add(s-XMM_S(2), s-XMM_S(3), env-sse_status);
     *d = r;
  }

  void helper_haddpd(XMMReg *d, XMMReg *s)
  {
     XMMReg r;
 -    r.XMM_D(0) = d-XMM_D(0) + d-XMM_D(1);
 -    r.XMM_D(1) = s-XMM_D(0) + s-XMM_D(1);
 +    r.XMM_D(0) = float64_add(d-XMM_D(0), d-XMM_D(1), env-sse_status);
 +    r.XMM_D(1) = float64_add(s-XMM_D(0), s-XMM_D(1), env-sse_status);
     *d = r;
  }

  void helper_hsubps(XMMReg *d, XMMReg *s)
  {
     XMMReg r;
 -    r.XMM_S(0) = d-XMM_S(0) - d-XMM_S(1);
 -    r.XMM_S(1) = d-XMM_S(2) - d-XMM_S(3);
 -    r.XMM_S(2) = s-XMM_S(0) - s-XMM_S(1);
 -    r.XMM_S(3) = s-XMM_S(2) - s-XMM_S(3);
 +    r.XMM_S(0) = float32_sub(d-XMM_S(0), d-XMM_S(1), env-sse_status);
 +    r.XMM_S(1) = float32_sub(d-XMM_S(2), d-XMM_S(3), env-sse_status);
 +    r.XMM_S(2) = float32_sub(s-XMM_S(0), s-XMM_S(1), env-sse_status);
 +    r.XMM_S(3) = float32_sub(s-XMM_S(2), s-XMM_S(3), env-sse_status);
     *d = r;
  }

  void helper_hsubpd(XMMReg *d, XMMReg *s)
  {
     XMMReg r;
 -    r.XMM_D(0) = d-XMM_D(0) - d-XMM_D(1);
 -    r.XMM_D(1) = s-XMM_D(0) - s-XMM_D(1);
 +    r.XMM_D(0) = float64_sub(d-XMM_D(0), d-XMM_D(1), env-sse_status);
 +    r.XMM_D(1) = float64_sub(s-XMM_D(0), s-XMM_D(1), env-sse_status);
     *d = r;
  }

  void helper_addsubps(XMMReg *d, XMMReg *s)
  {
 -    d-XMM_S(0) = d-XMM_S(0) - s-XMM_S(0);
 -    d-XMM_S(1) = d-XMM_S(1) + s-XMM_S(1);
 -    d-XMM_S(2) = d-XMM_S(2) - s-XMM_S(2);
 -    d-XMM_S(3) = d-XMM_S(3) + s-XMM_S(3);
 +    d-XMM_S(0) = float32_sub(d-XMM_S(0), s-XMM_S(0), env-sse_status);
 +    d-XMM_S(1) = float32_add(d-XMM_S(1), s-XMM_S(1), env-sse_status);
 +    d-XMM_S(2) = float32_sub(d-XMM_S(2), s-XMM_S(2), env-sse_status);
 +    d-XMM_S(3) = float32_add(d-XMM_S(3), s-XMM_S(3), env-sse_status);
  }

  void helper_addsubpd(XMMReg *d, XMMReg *s)
  {
 -    d-XMM_D(0) = d-XMM_D(0) - s-XMM_D(0);
 -    d-XMM_D(1) = d-XMM_D(1) + s-XMM_D(1);
 +    d-XMM_D(0) = float64_sub(d-XMM_D(0), s-XMM_D(0), env-sse_status);
 +    d-XMM_D(1) = float64_add(d-XMM_D(1), s-XMM_D(1), env-sse_status);
  }

  /* XXX: unordered */
 --
 1.7.6.1

Re: [Qemu-devel] [PATCH 0/5] Only one call output register needed for 64 bit hosts

2011-09-17 Thread Blue Swirl

Thanks, applied all.

On Sat, Sep 17, 2011 at 2:01 PM, Stefan Weil w...@mail.berlios.de wrote:
 Am 05.09.2011 11:06, schrieb Stefan Weil:

 The number of registers needed for the return value of TCG opcode
 INDEX_op_call is calculated in function tcg_gen_callN (nb_rets).

 It can be 0 or 1, for 32 bit hosts also 2 (return 64 bit value in
 two 32 bit registers).

 Some TCG implementations reserve 2 registers although only 1 is used.
 The following patches fix this.

 [PATCH 1/5] tcg/i386: Only one call output register needed for 64 bit
 hosts
 [PATCH 2/5] tcg/ia64: Only one call output register needed for 64 bit
 hosts
 [PATCH 3/5] tcg/s390: Only one call output register needed for 64 bit
 hosts
 [PATCH 4/5] tcg/sparc: Only one call output register needed for 64 bit
 hosts
 [PATCH 5/5] tcg/ppc64: Only one call output register needed for 64 bit
 hosts

 Patch 3 was acked by Richard Henderson, so was patch 5 by Malc.

 What about the other three patches (i386 / ia64, sparc)? They are all
 similar,
 so I think they could also be committed without more reviews.

 Thanks,
 Stefan Weil

[Qemu-devel] [PATCH 2/2] hw/omap_gpmc: Modify correct field when writing IRQSTATUS register

2011-09-17 Thread Peter Maydell

Writing to IRQSTATUS should affect irqst, not irqen -- error
spotted by Andrzej Zaborowski.

Signed-off-by: Peter Maydell peter.mayd...@linaro.org
---
 hw/omap_gpmc.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/hw/omap_gpmc.c b/hw/omap_gpmc.c
index e27b93c..7fc82a2 100644
--- a/hw/omap_gpmc.c
+++ b/hw/omap_gpmc.c
@@ -639,7 +639,7 @@ static void omap_gpmc_write(void *opaque, 
target_phys_addr_t addr,
 break;
 
 case 0x018:/* GPMC_IRQSTATUS */
-s-irqen = ~value;
+s-irqst = ~value;
 omap_gpmc_int_update(s);
 break;
 
-- 
1.7.1

[Qemu-devel] [PATCH 1/2] hw/omap_gpmc: Add comment about FIFOTHRESHOLDSTATUS bit

2011-09-17 Thread Peter Maydell

Promote the remark about why we handle FIFOTHRESHOLDSTATUS the
way we do from the commit message of de8af7fe0 to a comment in
the code.

Signed-off-by: Peter Maydell peter.mayd...@linaro.org
---
 hw/omap_gpmc.c |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/hw/omap_gpmc.c b/hw/omap_gpmc.c
index 02f0c52..e27b93c 100644
--- a/hw/omap_gpmc.c
+++ b/hw/omap_gpmc.c
@@ -569,6 +569,13 @@ static uint64_t omap_gpmc_read(void *opaque, 
target_phys_addr_t addr,
 case 0x1ec:/* GPMC_PREFETCH_CONTROL */
 return s-prefetch.startengine;
 case 0x1f0:/* GPMC_PREFETCH_STATUS */
+/* NB: The OMAP3 TRM is inconsistent about whether the GPMC
+ * FIFOTHRESHOLDSTATUS bit should be set when
+ * FIFOPOINTER  FIFOTHRESHOLD or when it is = FIFOTHRESHOLD.
+ * Apparently the underlying functional spec from which the TRM was
+ * created states that the behaviour is =, and this also
+ * makes more conceptual sense.
+ */
 return (s-prefetch.fifopointer  24) |
 ((s-prefetch.fifopointer =
   ((s-prefetch.config1  8)  0x7f) ? 1 : 0)  16) |
-- 
1.7.1

Re: [Qemu-devel] [PATCH] Add iSCSI support for QEMU

2011-09-17 Thread Laurent Vivier

Le jeudi 15 septembre 2011 à 08:06 +0200, Paolo Bonzini a écrit :
 On 09/14/2011 06:36 PM, Orit Wasserman wrote:
I think NBD would be fine, especially with a flush command.
   I think NBD would be fine, especially with a flush command.
  If I remember correctly , there is a problem with NBD with an image with
  a backing file chain . NBD client only displays a single file image.
  With ISCSI we can use different luns per image file.
 
 The NBD protocol supports multiple named exports, just not QEMU's 
 implementation.

Named exports are supported since commit
1d45f8b542f6b80b24c44533ef0dd9e1a3b17ea5

Regards,
Laurent

Re: [Qemu-devel] [net-next RFC V2 PATCH 0/5] Multiqueue support in tun/tap

2011-09-17 Thread Michael S. Tsirkin

On Sat, Sep 17, 2011 at 02:02:04PM +0800, Jason Wang wrote:
 A wiki-page was created to narrate the detail design of all parts
 involved in the multi queue implementation:
 http://www.linux-kvm.org/page/Multiqueue and some basic tests result
 could be seen in this page
 http://www.linux-kvm.org/page/Multiqueue-performance-Sep-13. I would
 post the detail numbers in attachment as the reply of this thread.

Does it make sense to test both with and without RPS in guest?

-- 
MST

Re: [Qemu-devel] blobstore disk format (was Re: Design of the blobstore)

2011-09-17 Thread Michael S. Tsirkin

On Fri, Sep 16, 2011 at 12:46:40PM -0400, Stefan Berger wrote:
 On 09/16/2011 10:44 AM, Michael S. Tsirkin wrote:
 On Thu, Sep 15, 2011 at 10:33:13AM -0400, Stefan Berger wrote:
 On 09/15/2011 08:28 AM, Michael S. Tsirkin wrote:
 So the below is a proposal for a directory scheme
 for storing (optionally multiple) nvram images,
 along with any metadata.
 Data is encoded using BER:
 http://en.wikipedia.org/wiki/Basic_Encoding_Rules
 Specifically, we mostly use the subsets.
 
 Would it change anything if we were to think of the NVRAM image as
 another piece of metadata?
 Yes, we can do that, sure. I had the feeling that it will help to lay
 out the image at the end, to make directory listing
 more efficient - the rest of metadata is usually small,
 image might be somewhat large.
 
 Why not let a convenience library handle the metadata on the device
 level, having it create the blob that the NVRAM layer ends up
 writing and parsing before the device uses it? Otherwise I should
 maybe rename the nvram to meatdata_store :-/

Maybe we are talking about different things. All I agrue for
is using a common standard format for storing metadata,
instead of having each device roll its own.

 I am also wondering whether each device shouldn't just handle the
 metadata itself,
 It could be that just means we will have custom code with
 different bugs in each device.
 Note that from experience with formats, the problem with
 time becomes less trivial than it seems as we
 need to provide forward and backward compatibility
 guarantees.
 
 Is that guaranteed just by using ASN.1 ?

At least for BER, yes. We can always skip an optional field
that we don't recognize without knowing anything about
its internal format.

 Do we need to add a
 revision to the metadata?

IMO, no. Instead we add optional attributes as long as we can
preserve backwards compatibility, and madatory attributes
if we can't.

 How do we handle metadata that was to
 change over time, i.e., new attribute/values being added into a
 finite store?

Add them as optional attributes.

 so generate a blob from data structures containing
 all the metadata it needs, arranging attribute and value pairs
 itself (maybe using some convenience function for
 serialization/deserialization) and let the NVRAM layer not handle
 the metadata at all but only blobs, their maximum sizes, actual
 sizes
 Actual size seems to be a TPM specific thing.
 
 Yes, it could also be metadata. One should probably always be
 allowed to write a shorter blob than registered, but not a longer
 one. If the device did that, maybe it should assume it needs to
 prepend a header to the actual blob indicating the actual size of
 the following blob so trailing garbage can be ignored.

And then when we need more info we get to deal with
versioning of that header.

 encryption, integrity value (crc32 or sha1) and so on. What
 metadata should there be that really need to be handled on the NVRAM
 API and below level rather than on the device-specific code level?
 So checksum  (checksum value and type) 'and so on' are what I call
 metadata :) Doing it at device level seems wrong.
 
 You mean doing it at the NVRAM level seems wrong. Of course, again
 something a device could write into a header prepended to the actual
 blob. Maybe every device that needs it should do that so that if we
 were to support encryption of blobs and the key for decryption was
 wrong one could detect it early without feeding badly decrypted /
 corrupted state into the device and see what happens.

Do what? Checksum the data? Well, error detection is nice,
but it could be that people actually care about not losing
all of the data on nvram if qemu is killed.  I also wonder whether
invalidating all data because of a single bit corruption is a bug or a
feature.

 We use a directory as a SET in a CER format.
 This allows generating directory online without scanning
 the entries beforehand.
 
 I guess it is the 'unknown' for me... but what is the advantage of
 using ASN1 for this rather than just writing out packed and
 endianess-normalized data structures (with revision value),
 If you want an example of where this 'custom formats are easy
 so let us write one' leads to in the end,
 look no further than live migration code.
 It's a mess of hacks that does not even work across
 upstream qemu versions, leave alone across
 downstreams (different linux distros).
 
 So is ASN1 the answer or does one still need to add a revision tag
 to each blob putting in custom code for parsing the different
 revisions of data structures (I guess) that may be extended/changed
 over time?
 
Stefan

We don't need revisions. We can always parse a new structure
skipping optional attributes we don't recognize. In case we want to
break old qemu versions intentially, we can add
a mandatory attribute.

 having
 them crc32-protected to have some sanity checking in place?
 
  Stefan
 I'm not sure why we want crc specifically in TPM.
 If it is 'just because we

[Qemu-devel] [PATCH v2] memory: simple memory tree printer

2011-09-17 Thread Blue Swirl

Add a monitor command 'info mtree' to show the memory hierarchy
much like /proc/iomem in Linux.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
v1-v2: use /proc/iomem format.
---
 memory.c  |   27 +++
 memory.h  |2 ++
 monitor.c |7 +++
 3 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/memory.c b/memory.c
index 101b67c..275f5cf 100644
--- a/memory.c
+++ b/memory.c
@@ -17,6 +17,7 @@
 #include bitops.h
 #include kvm.h
 #include assert.h
+#include monitor.h

 unsigned memory_region_transaction_depth = 0;

@@ -1253,3 +1254,29 @@ void set_system_io_map(MemoryRegion *mr)
 address_space_io.root = mr;
 memory_region_update_topology();
 }
+
+static void mtree_print_mr(Monitor *mon, MemoryRegion *mr, unsigned int level)
+{
+MemoryRegion *submr;
+unsigned int i;
+
+for (i = 0; i  level; i++) {
+monitor_printf(mon,   );
+}
+monitor_printf(mon, TARGET_FMT_plx - TARGET_FMT_plx  : %s\n,
+   mr-addr, mr-addr + (target_phys_addr_t)mr-size - 1,
+   mr-name);
+
+QTAILQ_FOREACH(submr, mr-subregions, subregions_link) {
+mtree_print_mr(mon, submr, level + 1);
+}
+}
+
+void mtree_info(Monitor *mon)
+{
+monitor_printf(mon, memory\n);
+mtree_print_mr(mon, address_space_memory.root, 0);
+
+monitor_printf(mon, I/O\n);
+mtree_print_mr(mon, address_space_io.root, 0);
+}
diff --git a/memory.h b/memory.h
index 06b83ae..09d8e29 100644
--- a/memory.h
+++ b/memory.h
@@ -500,6 +500,8 @@ void memory_region_transaction_begin(void);
  */
 void memory_region_transaction_commit(void);

+void mtree_info(Monitor *mon);
+
 #endif

 #endif
diff --git a/monitor.c b/monitor.c
index 03ae997..0302446 100644
--- a/monitor.c
+++ b/monitor.c
@@ -2968,6 +2968,13 @@ static const mon_cmd_t info_cmds[] = {
 },
 #endif
 {
+.name   = mtree,
+.args_type  = ,
+.params = ,
+.help   = show memory tree,
+.mhandler.info = mtree_info,
+},
+{
 .name   = jit,
 .args_type  = ,
 .params = ,
-- 
1.6.2.4

[Qemu-devel] [PATCH] sun4u: don't set up isa_mem_base

2011-09-17 Thread Blue Swirl

Since we use memory API in sun4u.c, after
71579cae30b53c910cd6c47ab4e683f647d36519, setting up isa_mem_base
puts vga.chain4 outside of the physical address space.

Fix by removing obsolete isa_mem_base set up.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
 hw/sun4u.c |1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/hw/sun4u.c b/hw/sun4u.c
index 32e6ab9..6afb0e7 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -763,7 +763,6 @@ static void sun4uv_init(ram_addr_t RAM_size,
 irq = qemu_allocate_irqs(cpu_set_irq, env, MAX_PILS);
 pci_bus = pci_apb_init(APB_SPECIAL_BASE, APB_MEM_BASE, irq, pci_bus2,
pci_bus3);
-isa_mem_base = APB_PCI_IO_BASE;
 pci_vga_init(pci_bus);

 // XXX Should be pci_bus3
-- 
1.6.2.4

[Qemu-devel] [PATCH 0/8] tcg/interpreter: Add TCG + interpreter for bytecode (virtual machine)

2011-09-17 Thread Stefan Weil


Hello,

these patches add a new code generator (TCG target) to qemu.

Unlike other tcg target code generators, this one does not generate
machine code for some cpu. It generates machine independent bytecode
which is interpreted later. That's why I called it TCI (tiny code
interpreter).

I wrote most of the code two years ago and included feedback and
contributions from several QEMU developers, notably TeleMan,
Stuart Brady, Blue Swirl and Malc. See the history here:
http://lists.nongnu.org/archive/html/qemu-devel/2009-09/msg01710.html

Since that time, I used TCI regularly, added small fixes and improvements
and rebased it to latest QEMU. Some versions were tested using
ARM (emulated and real), PowerPC (emulated) and MIPS (emulated) hosts,
but normally I run it on i386 and x86_64 hosts.

I'd appreciate to see TCI in QEMU 1.0.

Regards,
Stefan Weil

The patches 2 and 4 are optional, patch 8 is only needed for running
TCI on a PowerPC host.

[PATCH 1/8] tcg: Declare TCG_TARGET_REG_BITS in tcg.h
[PATCH 2/8] tcg: Don't declare TCG_TARGET_REG_BITS in tcg-target.h
[PATCH 3/8] tcg: Add forward declarations for local functions
[PATCH 4/8] tcg: Add some assertions
[PATCH 5/8] tcg: Add interpreter for bytecode
[PATCH 6/8] tcg: Add bytecode generator for tcg interpreter
[PATCH 7/8] tcg: Add tcg interpreter to configure / make
[PATCH 8/8] ppc: Support tcg interpreter on ppc hosts

[Qemu-devel] [PATCH 8/8] ppc: Support tcg interpreter on ppc hosts

2011-09-17 Thread Stefan Weil

Tests of the tcg interpreter on an (emulated) ppc host
needed this small change.

Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 cache-utils.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/cache-utils.h b/cache-utils.h
index 0b65907..7c3b282 100644
--- a/cache-utils.h
+++ b/cache-utils.h
@@ -1,7 +1,7 @@
 #ifndef QEMU_CACHE_UTILS_H
 #define QEMU_CACHE_UTILS_H
 
-#if defined(_ARCH_PPC)
+#if defined(_ARCH_PPC)  !defined(CONFIG_TCG_INTERPRETER)
 struct qemu_cache_conf {
 unsigned long dcache_bsize;
 unsigned long icache_bsize;
-- 
1.7.2.5

[Qemu-devel] [PATCH 4/8] tcg: Add some assertions

2011-09-17 Thread Stefan Weil

Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 tcg/tcg.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index bdd7a67..30f3aef 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -794,7 +794,9 @@ static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, 
int buf_size,
 {
 TCGTemp *ts;
 
+assert(idx = 0  idx  s-nb_temps);
 ts = s-temps[idx];
+assert(ts);
 if (idx  s-nb_globals) {
 pstrcpy(buf, buf_size, ts-name);
 } else {
-- 
1.7.2.5

[Qemu-devel] [PATCH 7/8] tcg: Add tcg interpreter to configure / make

2011-09-17 Thread Stefan Weil

Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 Makefile.target |1 +
 configure   |   30 --
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/Makefile.target b/Makefile.target
index 88d2f1f..a2c3a4a 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -69,6 +69,7 @@ all: $(PROGS) stap
 # cpu emulator library
 libobj-y = exec.o translate-all.o cpu-exec.o translate.o
 libobj-y += tcg/tcg.o tcg/optimize.o
+libobj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o
 libobj-y += fpu/softfloat.o
 libobj-y += op_helper.o helper.o
 ifeq ($(TARGET_BASE_ARCH), i386)
diff --git a/configure b/configure
index ad924c4..1d800e1 100755
--- a/configure
+++ b/configure
@@ -138,6 +138,7 @@ debug_tcg=no
 debug_mon=no
 debug=no
 strip_opt=yes
+tcg_interpreter=no
 bigendian=no
 mingw32=no
 EXESUF=
@@ -647,6 +648,10 @@ for opt do
   ;;
   --enable-kvm) kvm=yes
   ;;
+  --disable-tcg-interpreter) tcg_interpreter=no
+  ;;
+  --enable-tcg-interpreter) tcg_interpreter=yes
+  ;;
   --disable-spice) spice=no
   ;;
   --enable-spice) spice=yes
@@ -997,6 +1002,7 @@ echo   --enable-bluez   enable bluez stack 
connectivity
 echo   --disable-slirp  disable SLIRP userspace network connectivity
 echo   --disable-kvmdisable KVM acceleration support
 echo   --enable-kvm enable KVM acceleration support
+echo   --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI)
 echo   --disable-nptl   disable usermode NPTL support
 echo   --enable-nptlenable usermode NPTL support
 echo   --enable-system  enable all system emulation targets
@@ -2714,6 +2720,7 @@ echo Linux AIO support $linux_aio
 echo ATTR/XATTR support $attr
 echo Install blobs $blobs
 echo KVM support   $kvm
+echo TCG interpreter   $tcg_interpreter
 echo fdt support   $fdt
 echo preadv support$preadv
 echo fdatasync $fdatasync
@@ -2761,6 +2768,15 @@ case $cpu in
   armv4b|armv4l)
 ARCH=arm
   ;;
+  *)
+if test $tcg_interpreter = yes ; then
+echo Unsupported CPU = $cpu, will use TCG with TCI (experimental)
+ARCH=unknown
+else
+echo Unsupported CPU = $cpu, try --enable-tcg-interpreter
+exit 1
+fi
+  ;;
 esac
 echo ARCH=$ARCH  $config_host_mak
 if test $debug_tcg = yes ; then
@@ -2994,6 +3010,9 @@ fi
 if test $signalfd = yes ; then
   echo CONFIG_SIGNALFD=y  $config_host_mak
 fi
+if test $tcg_interpreter = yes ; then
+  echo CONFIG_TCG_INTERPRETER=y  $config_host_mak
+fi
 if test $need_offsetof = yes ; then
   echo CONFIG_NEED_OFFSETOF=y  $config_host_mak
 fi
@@ -3454,7 +3473,9 @@ cflags=
 includes=
 ldflags=
 
-if test $ARCH = sparc64 ; then
+if test $tcg_interpreter = yes; then
+  includes=-I\$(SRC_PATH)/tcg/bytecode $includes
+elif test $ARCH = sparc64 ; then
   includes=-I\$(SRC_PATH)/tcg/sparc $includes
 elif test $ARCH = s390x ; then
   includes=-I\$(SRC_PATH)/tcg/s390 $includes
@@ -3577,7 +3598,12 @@ if test $gprof = yes ; then
   fi
 fi
 
-linker_script=-Wl,-T../config-host.ld -Wl,-T,\$(SRC_PATH)/\$(ARCH).ld
+if test $ARCH = unknown; then
+  linker_script=
+else
+  linker_script=-Wl,-T../config-host.ld -Wl,-T,\$(SRC_PATH)/\$(ARCH).ld
+fi
+
 if test $target_linux_user = yes -o $target_bsd_user = yes ; then
   case $ARCH in
   sparc)
-- 
1.7.2.5

[Qemu-devel] [PATCH 1/8] tcg: Declare TCG_TARGET_REG_BITS in tcg.h

2011-09-17 Thread Stefan Weil

TCG_TARGET_REG_BITS can be determined by the compiler,
so there is no need to declare it for each individual tcg target.

This is especially important for new tcg targets
which will be supported by the tcg interpreter.

Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 tcg/tcg.h |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/tcg/tcg.h b/tcg/tcg.h
index dc5e9c9..1859fae 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -22,6 +22,16 @@
  * THE SOFTWARE.
  */
 #include qemu-common.h
+
+/* Target word size (must be identical to pointer size). */
+#if UINTPTR_MAX == UINT32_MAX
+# define TCG_TARGET_REG_BITS 32
+#elif UINTPTR_MAX == UINT64_MAX
+# define TCG_TARGET_REG_BITS 64
+#else
+# error Unknown pointer size for tcg target
+#endif
+
 #include tcg-target.h
 #include tcg-runtime.h
 
-- 
1.7.2.5

[Qemu-devel] [PATCH 6/8] tcg: Add bytecode generator for tcg interpreter

2011-09-17 Thread Stefan Weil

Unlike other tcg target code generators, this one does not generate
machine code for some cpu. It generates machine independent bytecode
which is interpreted later.

This allows running QEMU on any host.

Interpreted bytecode is slower than direct execution of generated
machine code.

Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 dis-asm.h |1 +
 disas.c   |4 +-
 dyngen-exec.h |   13 +-
 exec-all.h|   13 +-
 tcg/bytecode/README   |  129 ++
 tcg/bytecode/tcg-target.c |  955 +
 tcg/bytecode/tcg-target.h |  152 +++
 7 files changed, 1263 insertions(+), 4 deletions(-)
 create mode 100644 tcg/bytecode/README
 create mode 100644 tcg/bytecode/tcg-target.c
 create mode 100644 tcg/bytecode/tcg-target.h

diff --git a/dis-asm.h b/dis-asm.h
index 5b07d7f..876975f 100644
--- a/dis-asm.h
+++ b/dis-asm.h
@@ -365,6 +365,7 @@ typedef struct disassemble_info {
target address.  Return number of bytes processed.  */
 typedef int (*disassembler_ftype) (bfd_vma, disassemble_info *);
 
+int print_insn_bytecode(bfd_vma, disassemble_info*);
 int print_insn_big_mips (bfd_vma, disassemble_info*);
 int print_insn_little_mips  (bfd_vma, disassemble_info*);
 int print_insn_i386 (bfd_vma, disassemble_info*);
diff --git a/disas.c b/disas.c
index 611b30b..e2061d8 100644
--- a/disas.c
+++ b/disas.c
@@ -273,7 +273,9 @@ void disas(FILE *out, void *code, unsigned long size)
 #else
 disasm_info.endian = BFD_ENDIAN_LITTLE;
 #endif
-#if defined(__i386__)
+#if defined(CONFIG_TCG_INTERPRETER)
+print_insn = print_insn_bytecode;
+#elif defined(__i386__)
 disasm_info.mach = bfd_mach_i386_i386;
 print_insn = print_insn_i386;
 #elif defined(__x86_64__)
diff --git a/dyngen-exec.h b/dyngen-exec.h
index 8beb7f3..64f76c4 100644
--- a/dyngen-exec.h
+++ b/dyngen-exec.h
@@ -19,7 +19,9 @@
 #if !defined(__DYNGEN_EXEC_H__)
 #define __DYNGEN_EXEC_H__
 
-#if defined(__i386__)
+#if defined(CONFIG_TCG_INTERPRETER)
+/* The TCG interpreter does not use special registers. */
+#elif defined(__i386__)
 #define AREG0 ebp
 #elif defined(__x86_64__)
 #define AREG0 r14
@@ -55,11 +57,18 @@
 #error unsupported CPU
 #endif
 
+#if defined(AREG0)
 register CPUState *env asm(AREG0);
+#else
+extern CPUState *env;
+#endif
 
 /* The return address may point to the start of the next instruction.
Subtracting one gets us the call instruction itself.  */
-#if defined(__s390__)  !defined(__s390x__)
+#if defined(CONFIG_TCG_INTERPRETER)
+extern uint8_t *tci_tb_ptr;
+# define GETPC() ((void *)tci_tb_ptr)
+#elif defined(__s390__)  !defined(__s390x__)
 # define GETPC() ((void*)(((unsigned long)__builtin_return_address(0)  
0x7fffUL) - 1))
 #elif defined(__arm__)
 /* Thumb return addresses have the low bit set, so we need to subtract two.
diff --git a/exec-all.h b/exec-all.h
index 9b8d62c..0116acd 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -122,6 +122,8 @@ void tlb_set_page(CPUState *env, target_ulong vaddr,
 
 #if defined(_ARCH_PPC) || defined(__x86_64__) || defined(__arm__) || 
defined(__i386__)
 #define USE_DIRECT_JUMP
+#elif defined(CONFIG_TCG_INTERPRETER)
+#define USE_DIRECT_JUMP
 #endif
 
 struct TranslationBlock {
@@ -189,7 +191,14 @@ extern TranslationBlock 
*tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE];
 
 #if defined(USE_DIRECT_JUMP)
 
-#if defined(_ARCH_PPC)
+#if defined(CONFIG_TCG_INTERPRETER)
+static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
+{
+/* patch the branch destination */
+*(uint32_t *)jmp_addr = addr - (jmp_addr + 4);
+/* no need to flush icache explicitly */
+}
+#elif defined(_ARCH_PPC)
 void ppc_tb_set_jmp_target(unsigned long jmp_addr, unsigned long addr);
 #define tb_set_jmp_target1 ppc_tb_set_jmp_target
 #elif defined(__i386__) || defined(__x86_64__)
@@ -223,6 +232,8 @@ static inline void tb_set_jmp_target1(unsigned long 
jmp_addr, unsigned long addr
 __asm __volatile__ (swi 0x9f0002 : : r (_beg), r (_end), r (_flg));
 #endif
 }
+#else
+#error tb_set_jmp_target1 is missing
 #endif
 
 static inline void tb_set_jmp_target(TranslationBlock *tb,
diff --git a/tcg/bytecode/README b/tcg/bytecode/README
new file mode 100644
index 000..6fe9755
--- /dev/null
+++ b/tcg/bytecode/README
@@ -0,0 +1,129 @@
+TCG Interpreter (TCI) - Copyright (c) 2011 Stefan Weil.
+
+This file is released under GPL 2 or later.
+
+1) Introduction
+
+TCG (Tiny Code Generator) is a code generator which translates
+code fragments (basic blocks) from target code (any of the
+targets supported by QEMU) to a code representation which
+can be run on a host.
+
+QEMU can create native code for some hosts (arm, hppa, i386, ia64, ppc, ppc64,
+s390, sparc, x86_64). For others, unofficial host support was written.
+
+By adding a code generator for a virtual machine and using an
+interpreter for the generated bytecode, it is possible to
+support (almost) any host.
+
+This is what TCI (Tiny Code

[Qemu-devel] [PATCH 5/8] tcg: Add interpreter for bytecode

2011-09-17 Thread Stefan Weil

Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 tcg/tcg.h |4 +-
 tcg/tci.c | 1200 +
 2 files changed, 1203 insertions(+), 1 deletions(-)
 create mode 100644 tcg/tci.c

diff --git a/tcg/tcg.h b/tcg/tcg.h
index 1859fae..c99c7ea 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -577,7 +577,9 @@ TCGv_i32 tcg_const_local_i32(int32_t val);
 TCGv_i64 tcg_const_local_i64(int64_t val);
 
 extern uint8_t code_gen_prologue[];
-#if defined(_ARCH_PPC)  !defined(_ARCH_PPC64)
+#if defined(CONFIG_TCG_INTERPRETER)
+unsigned long tcg_qemu_tb_exec(CPUState *env, uint8_t *tb_ptr);
+#elif defined(_ARCH_PPC)  !defined(_ARCH_PPC64)
 #define tcg_qemu_tb_exec(env, tb_ptr)\
 ((long REGPARM __attribute__ ((longcall)) (*)(void *, void 
*))code_gen_prologue)(env, tb_ptr)
 #else
diff --git a/tcg/tci.c b/tcg/tci.c
new file mode 100644
index 000..eea9992
--- /dev/null
+++ b/tcg/tci.c
@@ -0,0 +1,1200 @@
+/*
+ * Tiny Code Interpreter for QEMU
+ *
+ * Copyright (c) 2009, 2011 Stefan Weil
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see http://www.gnu.org/licenses/.
+ */
+
+#include config.h
+#include qemu-common.h
+#include exec-all.h   /* MAX_OPC_PARAM_IARGS */
+#include tcg-op.h
+
+/* Marker for missing code. */
+#define TODO() \
+do { \
+fprintf(stderr, TODO %s:%u: %s()\n, \
+__FILE__, __LINE__, __func__); \
+tcg_abort(); \
+} while (0)
+
+/* Trace message to see program flow. */
+#if defined(CONFIG_DEBUG_TCG_INTERPRETER)
+#define TRACE() \
+loglevel \
+? fprintf(stderr, TCG %s:%u: %s()\n, __FILE__, __LINE__, __func__) \
+: (void)0
+#else
+#define TRACE() ((void)0)
+#endif
+
+#if MAX_OPC_PARAM_IARGS != 4
+# error Fix needed, number of supported input arguments changed!
+#endif
+#if TCG_TARGET_REG_BITS == 32
+typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
+tcg_target_ulong, tcg_target_ulong,
+tcg_target_ulong, tcg_target_ulong,
+tcg_target_ulong, tcg_target_ulong);
+#else
+typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
+tcg_target_ulong, tcg_target_ulong);
+#endif
+
+CPUState *env;
+
+/* Alpha and SH4 user mode emulations call GETPC(), so they need tci_tb_ptr. */
+#if defined(CONFIG_SOFTMMU) || defined(TARGET_ALPHA) || defined(TARGET_SH4)
+# define NEEDS_TB_PTR
+#endif
+
+#ifdef NEEDS_TB_PTR
+uint8_t *tci_tb_ptr;
+#endif
+
+static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS];
+
+static tcg_target_ulong tci_read_reg(TCGRegister index)
+{
+assert(index  ARRAY_SIZE(tci_reg));
+return tci_reg[index];
+}
+
+#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64
+static int8_t tci_read_reg8s(TCGRegister index)
+{
+return (int8_t)tci_read_reg(index);
+}
+#endif
+
+#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64
+static int16_t tci_read_reg16s(TCGRegister index)
+{
+return (int16_t)tci_read_reg(index);
+}
+#endif
+
+#if TCG_TARGET_REG_BITS == 64
+static int32_t tci_read_reg32s(TCGRegister index)
+{
+return (int32_t)tci_read_reg(index);
+}
+#endif
+
+static uint8_t tci_read_reg8(TCGRegister index)
+{
+return (uint8_t)tci_read_reg(index);
+}
+
+static uint16_t tci_read_reg16(TCGRegister index)
+{
+return (uint16_t)tci_read_reg(index);
+}
+
+static uint32_t tci_read_reg32(TCGRegister index)
+{
+return (uint32_t)tci_read_reg(index);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static uint64_t tci_read_reg64(TCGRegister index)
+{
+return tci_read_reg(index);
+}
+#endif
+
+static void tci_write_reg(TCGRegister index, tcg_target_ulong value)
+{
+assert(index  ARRAY_SIZE(tci_reg));
+assert(index != TCG_AREG0);
+tci_reg[index] = value;
+}
+
+static void tci_write_reg8s(TCGRegister index, int8_t value)
+{
+tci_write_reg(index, value);
+}
+
+static void tci_write_reg16s(TCGRegister index, int16_t value)
+{
+tci_write_reg(index, value);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tci_write_reg32s(TCGRegister index, int32_t value)
+{
+tci_write_reg(index, value);
+}
+#endif
+
+static void tci_write_reg8(TCGRegister index, uint8_t value)
+{
+tci_write_reg(index, value);
+}
+
+static void tci_write_reg16(TCGRegister index, uint16_t value)
+{
+

[Qemu-devel] [PATCH 2/8] tcg: Don't declare TCG_TARGET_REG_BITS in tcg-target.h

2011-09-17 Thread Stefan Weil

It is now declared for all tcg targets in tcg.h,
so the tcg target specific declarations are redundant.

Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 tcg/arm/tcg-target.h   |1 -
 tcg/hppa/tcg-target.h  |4 +---
 tcg/ia64/tcg-target.h  |2 --
 tcg/mips/tcg-target.h  |1 -
 tcg/ppc/tcg-target.h   |1 -
 tcg/ppc64/tcg-target.h |1 -
 tcg/s390/tcg-target.h  |6 --
 tcg/sparc/tcg-target.h |6 --
 8 files changed, 1 insertions(+), 21 deletions(-)

diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 0e0f69a..33afd97 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -24,7 +24,6 @@
  */
 #define TCG_TARGET_ARM 1
 
-#define TCG_TARGET_REG_BITS 32
 #undef TCG_TARGET_WORDS_BIGENDIAN
 #undef TCG_TARGET_STACK_GROWSUP
 
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index ed90efc..ec9a7bf 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -24,9 +24,7 @@
 
 #define TCG_TARGET_HPPA 1
 
-#if defined(_PA_RISC1_1)
-#define TCG_TARGET_REG_BITS 32
-#else
+#if TCG_TARGET_REG_BITS != 32
 #error unsupported
 #endif
 
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index ddc93c1..578cf29 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -24,8 +24,6 @@
  */
 #define TCG_TARGET_IA64 1
 
-#define TCG_TARGET_REG_BITS 64
-
 /* We only map the first 64 registers */
 #define TCG_TARGET_NB_REGS 64
 enum {
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 43c5501..e2a2571 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -25,7 +25,6 @@
  */
 #define TCG_TARGET_MIPS 1
 
-#define TCG_TARGET_REG_BITS 32
 #ifdef __MIPSEB__
 # define TCG_TARGET_WORDS_BIGENDIAN
 #endif
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index f9a88c4..5c2d612 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -23,7 +23,6 @@
  */
 #define TCG_TARGET_PPC 1
 
-#define TCG_TARGET_REG_BITS 32
 #define TCG_TARGET_WORDS_BIGENDIAN
 #define TCG_TARGET_NB_REGS 32
 
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 5395131..8d1fb73 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -23,7 +23,6 @@
  */
 #define TCG_TARGET_PPC64 1
 
-#define TCG_TARGET_REG_BITS 64
 #define TCG_TARGET_WORDS_BIGENDIAN
 #define TCG_TARGET_NB_REGS 32
 
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 35ebac3..e4cd641 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -23,12 +23,6 @@
  */
 #define TCG_TARGET_S390 1
 
-#ifdef __s390x__
-#define TCG_TARGET_REG_BITS 64
-#else
-#define TCG_TARGET_REG_BITS 32
-#endif
-
 #define TCG_TARGET_WORDS_BIGENDIAN
 
 typedef enum TCGReg {
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 7b4e7f9..1464ef4 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -23,12 +23,6 @@
  */
 #define TCG_TARGET_SPARC 1
 
-#if defined(__sparc_v9__)  !defined(__sparc_v8plus__)
-#define TCG_TARGET_REG_BITS 64
-#else
-#define TCG_TARGET_REG_BITS 32
-#endif
-
 #define TCG_TARGET_WORDS_BIGENDIAN
 
 #define TCG_TARGET_NB_REGS 32
-- 
1.7.2.5

[Qemu-devel] [PATCH 3/8] tcg: Add forward declarations for local functions

2011-09-17 Thread Stefan Weil

These functions are defined in the tcg target specific file
tcg-target.c.

The forward declarations assert that every tcg target uses
the same function prototype.

Signed-off-by: Stefan Weil w...@mail.berlios.de
---
 tcg/tcg.c |   16 
 1 files changed, 16 insertions(+), 0 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 411f971..bdd7a67 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -63,11 +63,27 @@
 #error GUEST_BASE not supported on this host.
 #endif
 
+/* Forward declarations for functions declared in tcg-target.c and used here. 
*/
 static void tcg_target_init(TCGContext *s);
 static void tcg_target_qemu_prologue(TCGContext *s);
 static void patch_reloc(uint8_t *code_ptr, int type, 
 tcg_target_long value, tcg_target_long addend);
 
+/* Forward declarations for functions declared and used in tcg-target.c. */
+static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str);
+static void tcg_out_ld(TCGContext *s, TCGType type, int ret, int arg1,
+   tcg_target_long arg2);
+static void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg);
+static void tcg_out_movi(TCGContext *s, TCGType type,
+ int ret, tcg_target_long arg);
+static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
+   const int *const_args);
+static void tcg_out_st(TCGContext *s, TCGType type, int arg, int arg1,
+   tcg_target_long arg2);
+static int tcg_target_const_match(tcg_target_long val,
+  const TCGArgConstraint *arg_ct);
+static int tcg_target_get_call_iarg_regs_count(int flags);
+
 TCGOpDef tcg_op_defs[] = {
 #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + 
oargs + cargs, flags },
 #include tcg-opc.h
-- 
1.7.2.5

Re: [Qemu-devel] [PATCH 8/8] ppc: Support tcg interpreter on ppc hosts

2011-09-17 Thread Peter Maydell

On 17 September 2011 21:00, Stefan Weil w...@mail.berlios.de wrote:
 Tests of the tcg interpreter on an (emulated) ppc host
 needed this small change.

 Signed-off-by: Stefan Weil w...@mail.berlios.de
 ---
  cache-utils.h |    2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

 diff --git a/cache-utils.h b/cache-utils.h
 index 0b65907..7c3b282 100644
 --- a/cache-utils.h
 +++ b/cache-utils.h
 @@ -1,7 +1,7 @@
  #ifndef QEMU_CACHE_UTILS_H
  #define QEMU_CACHE_UTILS_H

 -#if defined(_ARCH_PPC)
 +#if defined(_ARCH_PPC)  !defined(CONFIG_TCG_INTERPRETER)
  struct qemu_cache_conf {
     unsigned long dcache_bsize;
     unsigned long icache_bsize;

This looks a bit odd, but I think that's partly an effect of
only the PPC flush_icache_range being in this header file when
for other architectures it is in tcg/*/tcg-target.h. If we
could have the cache flushing be in tcg/* for every target then
you wouldn't need to do an ifdef here.

-- PMM

Re: [Qemu-devel] [PATCH 8/8] ppc: Support tcg interpreter on ppc hosts

2011-09-17 Thread Stefan Weil


Am 17.09.2011 23:31, schrieb Peter Maydell:

On 17 September 2011 21:00, Stefan Weilw...@mail.berlios.de  wrote:

Tests of the tcg interpreter on an (emulated) ppc host
needed this small change.

Signed-off-by: Stefan Weilw...@mail.berlios.de
---
  cache-utils.h |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/cache-utils.h b/cache-utils.h
index 0b65907..7c3b282 100644
--- a/cache-utils.h
+++ b/cache-utils.h
@@ -1,7 +1,7 @@
  #ifndef QEMU_CACHE_UTILS_H
  #define QEMU_CACHE_UTILS_H

-#if defined(_ARCH_PPC)
+#if defined(_ARCH_PPC)  !defined(CONFIG_TCG_INTERPRETER)
  struct qemu_cache_conf {
 unsigned long dcache_bsize;
 unsigned long icache_bsize;

This looks a bit odd, but I think that's partly an effect of
only the PPC flush_icache_range being in this header file when
for other architectures it is in tcg/*/tcg-target.h. If we
could have the cache flushing be in tcg/* for every target then
you wouldn't need to do an ifdef here.

-- PMM


That's correct.

Re: [Qemu-devel] [PATCH v3 5/6] vga: Use linear mapping + dirty logging in chain 4 memory access mode

2011-09-17 Thread Blue Swirl

On Thu, Sep 15, 2011 at 11:31 AM, Avi Kivity a...@redhat.com wrote:
 On 09/15/2011 01:01 PM, Benjamin Herrenschmidt wrote:

   Sure :). So the problem is that when emulating the G3 Beige machine in
   QEMU (default ppc32 target) we also add a PCI VGA adapter. Apparently,
   on x86 that PCI VGA adapter can map the special VGA regions to
   somewhere, namely 0xa. With the memory api overhaul, this also
   slipped into the PPC world where mapping 0xa with VGA adapters is
   a pretty bad idea, as it's occupied by RAM.
 
   Now the discussion was on which level that mapping would happen and
   which devices go through which buses which then would filter certain
   ranges from being mapped. Basically, which way does a memory request
   from the CPU go on a G3 Beige machine until it arrives the VGA
   adapter?
 
   I hope that concludes the actual question. Avi, if I explained this
   wrong, please correct me.

 Ok so there's several things here.

 First, the mapping from CPU addresses to PCI addresses. This depends on
 the host bridge chip. The MPC106, used in the Beige G3, itself supports
 different type of mappings.

  From memory, the way it's configured in a G3 is to have a 1:1 mapping of
 8000 CPU to 8000 PCI.

 That means that with this basic mapping, you cannot generate memory
 accesses to low PCI addresses such as 0xa.

 Alex, what this means (I think is) that: pci_grackle_init() needs to create
 a container memory region and pass it to pc_register_bus() as the pci
 address space, and create and alias starting at 0x8000 of the pci
 address space, and map that alias at address 0x8000 of the system
 address space.

 See pc_init1() creating pci_memory and passing it to i440fx_init(), which
 then maps some aliases into the system address space and also gives it to
 pci_bus_new().  It's essentially the same thing with different details.

I think the attached patch (on top of ppc-next) should do it, but it
doesn't. Only the top area of the screen is shown, the rest is black.

 I don't remember (but it's possible) if it has another region which maps
 some other (high address) part of the address space down to 0 PCI.
 Typically that would be a smaller region which specifically allow access
 to the ISA hole that way.

 That would be done by mapping yet another alias.

 --
 error compiling committee.c: too many arguments to function


From c07f1116220cba7d2ee769b03de59b5a874b76db Mon Sep 17 00:00:00 2001
Message-Id: c07f1116220cba7d2ee769b03de59b5a874b76db.1316295419.git.blauwir...@gmail.com
From: Blue Swirl blauwir...@gmail.com
Date: Sat, 17 Sep 2011 20:30:50 +
Subject: [PATCH] PPC: use memory API to construct the PCI hole

Avoid vga.chain4 mapping by constructing a PCI hole for upper
2G of the PCI space.

Signed-off-by: Blue Swirl blauwir...@gmail.com
---
 hw/grackle_pci.c  |   11 ++-
 hw/ppc_newworld.c |2 --
 hw/ppc_oldworld.c |2 --
 hw/unin_pci.c |   18 --
 4 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/hw/grackle_pci.c b/hw/grackle_pci.c
index 9d3ff7d..94a608e 100644
--- a/hw/grackle_pci.c
+++ b/hw/grackle_pci.c
@@ -41,6 +41,8 @@
 typedef struct GrackleState {
 SysBusDevice busdev;
 PCIHostState host_state;
+MemoryRegion pci_mmio;
+MemoryRegion pci_hole;
 } GrackleState;
 
 /* Don't know if this matches real hardware, but it agrees with OHW.  */
@@ -73,11 +75,18 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic,
 qdev_init_nofail(dev);
 s = sysbus_from_qdev(dev);
 d = FROM_SYSBUS(GrackleState, s);
+
+memory_region_init(d-pci_mmio, pci-mmio, 0x1ULL);
+memory_region_init_alias(d-pci_hole, pci-hole, d-pci_mmio,
+ 0x8000ULL, 0x7e00ULL);
+memory_region_add_subregion(address_space_mem, 0x8000ULL,
+d-pci_hole);
+
 d-host_state.bus = pci_register_bus(d-busdev.qdev, pci,
  pci_grackle_set_irq,
  pci_grackle_map_irq,
  pic,
- address_space_mem,
+ d-pci_mmio,
  address_space_io,
  0, 4);
 
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index 5fb9359..bcdc0a3 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -263,8 +263,6 @@ static void ppc_core99_init (ram_addr_t ram_size,
 }
 }
 
-isa_mem_base = 0x8000;
-
 /* Register 8 MB of ISA IO space */
 isa_mmio_init(0xf200, 0x0080);
 
diff --git a/hw/ppc_oldworld.c b/hw/ppc_oldworld.c
index 3857075..5c17944 100644
--- a/hw/ppc_oldworld.c
+++ b/hw/ppc_oldworld.c
@@ -208,8 +208,6 @@ static void ppc_heathrow_init (ram_addr_t ram_size,
 }
 }
 
-isa_mem_base = 0x8000;
-
 /* Register 2 MB of ISA IO space */

Re: [Qemu-devel] [PATCH 3/8] tcg: Add forward declarations for local functions

2011-09-17 Thread Peter Maydell

On 17 September 2011 21:00, Stefan Weil w...@mail.berlios.de wrote:
 +/* Forward declarations for functions declared and used in tcg-target.c. */
 +static int target_parse_constraint(TCGArgConstraint *ct, const char 
 **pct_str);
 +static void tcg_out_ld(TCGContext *s, TCGType type, int ret, int arg1,
 +                       tcg_target_long arg2);
 +static void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg);
 +static void tcg_out_movi(TCGContext *s, TCGType type,
 +                         int ret, tcg_target_long arg);
 +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
 +                       const int *const_args);
 +static void tcg_out_st(TCGContext *s, TCGType type, int arg, int arg1,
 +                       tcg_target_long arg2);
 +static int tcg_target_const_match(tcg_target_long val,
 +                                  const TCGArgConstraint *arg_ct);
 +static int tcg_target_get_call_iarg_regs_count(int flags);

I'm tempted to submit a bulk rename patch that renames the functions
in this list which don't start 'tcg_target_' so that they do...

-- PMM

Re: [Qemu-devel] [PATCH] Add privilege level check to several Cop0 instructions.

2011-09-17 Thread Johnson, Eric

The patch applies to a8467c7a0e8b024a18608ff7db31ca2f2297e641.

-Original Message-
From: qemu-devel-bounces+ericj=mips@nongnu.org 
[mailto:qemu-devel-bounces+ericj=mips@nongnu.org] On Behalf Of Eric Johnson
Sent: Saturday, September 17, 2011 5:06 PM
To: qemu-devel@nongnu.org; aurel...@aurel32.net
Subject: [Qemu-devel] [PATCH] Add privilege level check to several Cop0 
instructions.

The MIPS Architecture Verification Programs (AVPs) check privileged
instructions for the required privilege level.  These changes are needed
to pass the AVP suite.

Signed-off-by: Eric Johnson er...@mips.com
---
 target-mips/translate.c |   10 ++
 1 files changed, 10 insertions(+), 0 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index d5b1c76..d99a716 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -5940,6 +5940,8 @@ static void gen_cp0 (CPUState *env, DisasContext *ctx, 
uint32_t opc, int rt, int
 {
 const char *opn = ldst;
 
+check_cp0_enabled(ctx);
+
 switch (opc) {
 case OPC_MFC0:
 if (rt == 0) {
@@ -10125,6 +10127,7 @@ static void gen_pool32axf (CPUState *env, DisasContext 
*ctx, int rt, int rs,
 #ifndef CONFIG_USER_ONLY
 case MFC0:
 case MFC0 + 32:
+check_cp0_enabled(ctx);
 if (rt == 0) {
 /* Treat as NOP. */
 break;
@@ -10136,6 +10139,7 @@ static void gen_pool32axf (CPUState *env, DisasContext 
*ctx, int rt, int rs,
 {
 TCGv t0 = tcg_temp_new();
 
+check_cp0_enabled(ctx);
 gen_load_gpr(t0, rt);
 gen_mtc0(env, ctx, t0, rs, (ctx-opcode  11)  0x7);
 tcg_temp_free(t0);
@@ -10230,10 +10234,12 @@ static void gen_pool32axf (CPUState *env, 
DisasContext *ctx, int rt, int rs,
 switch (minor) {
 case RDPGPR:
 check_insn(env, ctx, ISA_MIPS32R2);
+check_cp0_enabled(ctx);
 gen_load_srsgpr(rt, rs);
 break;
 case WRPGPR:
 check_insn(env, ctx, ISA_MIPS32R2);
+check_cp0_enabled(ctx);
 gen_store_srsgpr(rt, rs);
 break;
 default:
@@ -10276,6 +10282,7 @@ static void gen_pool32axf (CPUState *env, DisasContext 
*ctx, int rt, int rs,
 {
 TCGv t0 = tcg_temp_new();
 
+check_cp0_enabled(ctx);
 save_cpu_state(ctx, 1);
 gen_helper_di(t0);
 gen_store_gpr(t0, rs);
@@ -10288,6 +10295,7 @@ static void gen_pool32axf (CPUState *env, DisasContext 
*ctx, int rt, int rs,
 {
 TCGv t0 = tcg_temp_new();
 
+check_cp0_enabled(ctx);
 save_cpu_state(ctx, 1);
 gen_helper_ei(t0);
 gen_store_gpr(t0, rs);
@@ -10765,6 +10773,7 @@ static void decode_micromips32_opc (CPUState *env, 
DisasContext *ctx,
 minor = (ctx-opcode  12)  0xf;
 switch (minor) {
 case CACHE:
+check_cp0_enabled(ctx);
 /* Treat as no-op. */
 break;
 case LWC2:
@@ -12216,6 +12225,7 @@ static void decode_opc (CPUState *env, DisasContext 
*ctx, int *is_branch)
  break;
 case OPC_CACHE:
 check_insn(env, ctx, ISA_MIPS3 | ISA_MIPS32);
+check_cp0_enabled(ctx);
 /* Treat as NOP. */
 break;
 case OPC_PREF:

[Qemu-devel] [PATCH] Allow microMIPS SWP and SDP to have RD equal to BASE.

2011-09-17 Thread Eric Johnson

The microMIPS SWP and SDP instructions do not modify GPRs.  So their
behavior is well defined when RD equals BASE.  The MIPS Architecture
Verification Programs (AVPs) check that they work as expected.  This
is required for AVPs to pass.

Signed-off-by: Eric Johnson er...@mips.com
---
 target-mips/translate.c |   10 +-
 1 files changed, 9 insertions(+), 1 deletions(-)

The patch applies to a8467c7a0e8b024a18608ff7db31ca2f2297e641.

diff --git a/target-mips/translate.c b/target-mips/translate.c
index d5b1c76..82cf75b 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -10034,7 +10034,7 @@ static void gen_ldst_pair (DisasContext *ctx, uint32_t 
opc, int rd,
 const char *opn = ldst_pair;
 TCGv t0, t1;
 
-if (ctx-hflags  MIPS_HFLAG_BMASK || rd == 31 || rd == base) {
+if (ctx-hflags  MIPS_HFLAG_BMASK || rd == 31) {
 generate_exception(ctx, EXCP_RI);
 return;
 }
@@ -10046,6 +10046,10 @@ static void gen_ldst_pair (DisasContext *ctx, uint32_t 
opc, int rd,
 
 switch (opc) {
 case LWP:
+if (rd == base) {
+generate_exception(ctx, EXCP_RI);
+return;
+}
 save_cpu_state(ctx, 0);
 op_ld_lw(t1, t0, ctx);
 gen_store_gpr(t1, rd);
@@ -10067,6 +10071,10 @@ static void gen_ldst_pair (DisasContext *ctx, uint32_t 
opc, int rd,
 break;
 #ifdef TARGET_MIPS64
 case LDP:
+if (rd == base) {
+generate_exception(ctx, EXCP_RI);
+return;
+}
 save_cpu_state(ctx, 0);
 op_ld_ld(t1, t0, ctx);
 gen_store_gpr(t1, rd);

[Qemu-devel] [PATCH] Fix compile when MIPS_DEBUG_DISAS is defined.

2011-09-17 Thread Eric Johnson

When MIPS_DEBUG_DISAS is defined the gen_logic_imm, gen_slt_imm,
gen_cond_move, gen_logic and gen_slt functions cause errors because ctx
is not defined.  Fixed the functions by passing in the DisasContext.

Signed-off-by: Eric Johnson er...@mips.com
---
 target-mips/translate.c |   72 +-
 1 files changed, 39 insertions(+), 33 deletions(-)

This patch applies to a8467c7a0e8b024a18608ff7db31ca2f2297e641.

diff --git a/target-mips/translate.c b/target-mips/translate.c
index d5b1c76..00afc48 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -1420,7 +1420,8 @@ static void gen_arith_imm (CPUState *env, DisasContext 
*ctx, uint32_t opc,
 }
 
 /* Logic with immediate operand */
-static void gen_logic_imm (CPUState *env, uint32_t opc, int rt, int rs, 
int16_t imm)
+static void gen_logic_imm(CPUState *env, DisasContext *ctx, uint32_t opc,
+int rt, int rs, int16_t imm)
 {
 target_ulong uimm;
 const char *opn = imm logic;
@@ -1463,7 +1464,8 @@ static void gen_logic_imm (CPUState *env, uint32_t opc, 
int rt, int rs, int16_t
 }
 
 /* Set on less than with immediate operand */
-static void gen_slt_imm (CPUState *env, uint32_t opc, int rt, int rs, int16_t 
imm)
+static void gen_slt_imm(CPUState *env, DisasContext *ctx, uint32_t opc,
+int rt, int rs, int16_t imm)
 {
 target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
 const char *opn = imm arith;
@@ -1764,7 +1766,8 @@ static void gen_arith (CPUState *env, DisasContext *ctx, 
uint32_t opc,
 }
 
 /* Conditional move */
-static void gen_cond_move (CPUState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_cond_move(CPUState *env, DisasContext *ctx, uint32_t opc,
+int rd, int rs, int rt)
 {
 const char *opn = cond move;
 int l1;
@@ -1802,7 +1805,8 @@ static void gen_cond_move (CPUState *env, uint32_t opc, 
int rd, int rs, int rt)
 }
 
 /* Logic */
-static void gen_logic (CPUState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_logic(CPUState *env, DisasContext *ctx, uint32_t opc, int rd,
+int rs, int rt)
 {
 const char *opn = logic;
 
@@ -1863,7 +1867,8 @@ static void gen_logic (CPUState *env, uint32_t opc, int 
rd, int rs, int rt)
 }
 
 /* Set on lower than */
-static void gen_slt (CPUState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_slt(CPUState *env, DisasContext *ctx, uint32_t opc, int rd,
+int rs, int rt)
 {
 const char *opn = slt;
 TCGv t0, t1;
@@ -8763,10 +8768,10 @@ static int decode_extended_mips16_opc (CPUState *env, 
DisasContext *ctx,
 gen_arith_imm(env, ctx, OPC_ADDIU, rx, rx, imm);
 break;
 case M16_OPC_SLTI:
-gen_slt_imm(env, OPC_SLTI, 24, rx, imm);
+gen_slt_imm(env, ctx, OPC_SLTI, 24, rx, imm);
 break;
 case M16_OPC_SLTIU:
-gen_slt_imm(env, OPC_SLTIU, 24, rx, imm);
+gen_slt_imm(env, ctx, OPC_SLTIU, 24, rx, imm);
 break;
 case M16_OPC_I8:
 switch (funct) {
@@ -8978,14 +8983,14 @@ static int decode_mips16_opc (CPUState *env, 
DisasContext *ctx,
 {
 int16_t imm = (uint8_t) ctx-opcode;
 
-gen_slt_imm(env, OPC_SLTI, 24, rx, imm);
+gen_slt_imm(env, ctx, OPC_SLTI, 24, rx, imm);
 }
 break;
 case M16_OPC_SLTIU:
 {
 int16_t imm = (uint8_t) ctx-opcode;
 
-gen_slt_imm(env, OPC_SLTIU, 24, rx, imm);
+gen_slt_imm(env, ctx, OPC_SLTIU, 24, rx, imm);
 }
 break;
 case M16_OPC_I8:
@@ -9061,7 +9066,7 @@ static int decode_mips16_opc (CPUState *env, DisasContext 
*ctx,
 {
 int16_t imm = (uint8_t) ctx-opcode;
 
-gen_logic_imm(env, OPC_XORI, 24, rx, imm);
+gen_logic_imm(env, ctx, OPC_XORI, 24, rx, imm);
 }
 break;
 #if defined(TARGET_MIPS64)
@@ -9173,10 +9178,10 @@ static int decode_mips16_opc (CPUState *env, 
DisasContext *ctx,
 }
 break;
 case RR_SLT:
-gen_slt(env, OPC_SLT, 24, rx, ry);
+gen_slt(env, ctx, OPC_SLT, 24, rx, ry);
 break;
 case RR_SLTU:
-gen_slt(env, OPC_SLTU, 24, rx, ry);
+gen_slt(env, ctx, OPC_SLTU, 24, rx, ry);
 break;
 case RR_BREAK:
 generate_exception(ctx, EXCP_BREAK);
@@ -9197,22 +9202,22 @@ static int decode_mips16_opc (CPUState *env, 
DisasContext *ctx,
 break;
 #endif
 case RR_CMP:
-gen_logic(env, OPC_XOR, 24, rx, ry);
+gen_logic(env, ctx, OPC_XOR, 24, rx, ry);
 break;
 case RR_NEG:
 gen_arith(env, ctx, OPC_SUBU, rx, 0, ry);
 break;
 case RR_AND:
-gen_logic(env, OPC_AND, rx, rx, ry);
+gen_logic(env, ctx, OPC_AND, rx, rx, ry);
 break;
 case RR_OR:
-gen_logic(env, OPC_OR, rx, rx, ry);
+gen_logic(env, ctx, OPC_OR, rx, rx, ry);

Re: [Qemu-devel] [PATCH 5/8] tcg: Add interpreter for bytecode

2011-09-17 Thread Andi Kleen

Stefan Weil w...@mail.berlios.de writes:
 +
 +switch (opc) {
 +case INDEX_op_end:
 +case INDEX_op_nop:
 +break;

You could probably get some more speed out of this by using a threaded
interpreter with gcc's computed goto extension. That's typically
significantly faster than a plain switch in a loop.

static void *ops[] = {
   op1,
   op2,
   ...
};

#define NEXT() goto *ops[*tb_ptr++];

op1:
...
NEXT();

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only

Re: [Qemu-devel] [PATCH 5/8] tcg: Add interpreter for bytecode

2011-09-17 Thread Stefan Weil


Am 18.09.2011 06:03, schrieb Andi Kleen:

Stefan Weil w...@mail.berlios.de writes:

+
+ switch (opc) {
+ case INDEX_op_end:
+ case INDEX_op_nop:
+ break;


You could probably get some more speed out of this by using a threaded
interpreter with gcc's computed goto extension. That's typically
significantly faster than a plain switch in a loop.

static void *ops[] = {
op1,
op2,
...
};

#define NEXT() goto *ops[*tb_ptr++];

op1:
...
NEXT();

-Andi


Is there really any difference in the generated code?
gcc already uses a jump table internally to handle the
switch cases.

- Stefan

48 matches

Mail list logo