[Qemu-devel] [net-next RFC V2 PATCH 4/5] tuntap: multiqueue support
This patch adds multiqueue support for tap device by allowing multiple sockets to be attached to a tap device. Then we could parallize packets transmission/reception by put them into different socket. Following steps were used when choose tx queues: 1 For the packets comes from multiqueue nics, we would just choose the tx queue based on the which physical queue the packets comes from. 2 Otherwise we try to use rxhash to choose the queue. 3 If all above fails, we always use the first queue. In order to let the tx path lockless, like macvtap, netif_tx_loch_bh() isr eplaced by RCU and NETIF_F_LLTX to synchronize between hot path and systemcall. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/net/tun.c | 358 + 1 files changed, 223 insertions(+), 135 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index dc768e0..ec29f85 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -108,6 +108,8 @@ struct tap_filter { unsigned char addr[FLT_EXACT_COUNT][ETH_ALEN]; }; +#define MAX_TAP_QUEUES (NR_CPUS 16 ? NR_CPUS : 16) + struct tun_file { struct sock sk; struct socket socket; @@ -115,16 +117,18 @@ struct tun_file { int vnet_hdr_sz; struct tap_filter txflt; atomic_t count; - struct tun_struct *tun; + struct tun_struct __rcu *tun; struct net *net; struct fasync_struct *fasync; unsigned int flags; + u16 queue_index; }; struct tun_sock; struct tun_struct { - struct tun_file *tfile; + struct tun_file *tfiles[MAX_TAP_QUEUES]; + unsigned intnumqueues; unsigned intflags; uid_t owner; gid_t group; @@ -139,80 +143,160 @@ struct tun_struct { #endif }; -static int tun_attach(struct tun_struct *tun, struct file *file) +static DEFINE_SPINLOCK(tun_lock); + +/* + * tun_get_queue(): calculate the queue index + * - if skbs comes from mq nics, we can just borrow + * - if not, calculate from the hash + */ +static struct tun_file *tun_get_queue(struct net_device *dev, + struct sk_buff *skb) { - struct tun_file *tfile = file-private_data; - int err; + struct tun_struct *tun = netdev_priv(dev); + struct tun_file *tfile = NULL; + int numqueues = tun-numqueues; + __u32 rxq; - ASSERT_RTNL(); + BUG_ON(!rcu_read_lock_held()); - netif_tx_lock_bh(tun-dev); + if (!numqueues) + goto out; - err = -EINVAL; - if (tfile-tun) + if (numqueues == 1) { + tfile = rcu_dereference(tun-tfiles[0]); goto out; + } - err = -EBUSY; - if (tun-tfile) + if (likely(skb_rx_queue_recorded(skb))) { + rxq = skb_get_rx_queue(skb); + + while (unlikely(rxq = numqueues)) + rxq -= numqueues; + + tfile = rcu_dereference(tun-tfiles[rxq]); goto out; + } - err = 0; - tfile-tun = tun; - tun-tfile = tfile; - netif_carrier_on(tun-dev); - dev_hold(tun-dev); - sock_hold(tfile-sk); - atomic_inc(tfile-count); + /* Check if we can use flow to select a queue */ + rxq = skb_get_rxhash(skb); + if (rxq) { + u32 idx = ((u64)rxq * numqueues) 32; + tfile = rcu_dereference(tun-tfiles[idx]); + goto out; + } + tfile = rcu_dereference(tun-tfiles[0]); out: - netif_tx_unlock_bh(tun-dev); - return err; + return tfile; } -static void __tun_detach(struct tun_struct *tun) +static int tun_detach(struct tun_file *tfile, bool clean) { - struct tun_file *tfile = tun-tfile; - /* Detach from net device */ - netif_tx_lock_bh(tun-dev); - netif_carrier_off(tun-dev); - tun-tfile = NULL; - netif_tx_unlock_bh(tun-dev); - - /* Drop read queue */ - skb_queue_purge(tfile-socket.sk-sk_receive_queue); - - /* Drop the extra count on the net device */ - dev_put(tun-dev); -} + struct tun_struct *tun; + struct net_device *dev = NULL; + bool destroy = false; -static void tun_detach(struct tun_struct *tun) -{ - rtnl_lock(); - __tun_detach(tun); - rtnl_unlock(); -} + spin_lock(tun_lock); -static struct tun_struct *__tun_get(struct tun_file *tfile) -{ - struct tun_struct *tun = NULL; + tun = rcu_dereference_protected(tfile-tun, + lockdep_is_held(tun_lock)); + if (tun) { + u16 index = tfile-queue_index; + BUG_ON(index tun-numqueues); + BUG_ON(!tun-tfiles[tun-numqueues - 1]); + dev = tun-dev; + + rcu_assign_pointer(tun-tfiles[index], +
[Qemu-devel] [net-next RFC V2 PATCH 5/5] tuntap: add ioctls to attach or detach a file form tap device
New ioctls were added to let multiple files/sockets to be attached to a tap device. Signed-off-by: Jason Wang jasow...@redhat.com --- drivers/net/tun.c | 25 ++--- include/linux/if_tun.h |3 +++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index ec29f85..6a1b591 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1343,11 +1343,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, { struct tun_file *tfile = file-private_data; struct tun_struct *tun; + struct net_device *dev = NULL; void __user* argp = (void __user*)arg; struct ifreq ifr; int ret; - if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) + if (cmd == TUNSETIFF || cmd == TUNATTACHQUEUE || _IOC_TYPE(cmd) == 0x89) if (copy_from_user(ifr, argp, ifreq_len)) return -EFAULT; @@ -1356,7 +1357,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, * This is needed because we never checked for invalid flags on * TUNSETIFF. */ return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | - IFF_VNET_HDR, + IFF_VNET_HDR | IFF_MULTI_QUEUE, (unsigned int __user*)argp); } @@ -1372,6 +1373,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, return -EFAULT; return ret; } + if (cmd == TUNDETACHQUEUE) { + return tun_detach(tfile, false); + } rtnl_lock(); @@ -1379,7 +1383,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = -EBADFD; tun = rcu_dereference(tfile-tun); - if (!tun) + if (!tun cmd != TUNATTACHQUEUE) goto unlock; @@ -1394,6 +1398,21 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = -EFAULT; goto out; + case TUNATTACHQUEUE: + dev = __dev_get_by_name(tfile-net, ifr.ifr_name); + if (!dev || dev-netdev_ops != tap_netdev_ops) { + ret = -EINVAL; + } else if (ifr.ifr_flags + ~(IFF_TAP | IFF_NO_PI | IFF_VNET_HDR)) { + /* ignore illegal flag */ + ret = -EINVAL; + } else { + tfile-flags = TUN_TAP_DEV | TUN_NO_PI | TUN_VNET_HDR; + tun = netdev_priv(dev); + ret = tun_attach(tun, file); + } + break; + case TUNSETNOCSUM: /* Disable/Enable checksum */ diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h index c92a291..d3f24d8 100644 --- a/include/linux/if_tun.h +++ b/include/linux/if_tun.h @@ -54,6 +54,9 @@ #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog) #define TUNGETVNETHDRSZ _IOR('T', 215, int) #define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNATTACHQUEUE _IOW('T', 217, int) +#define TUNDETACHQUEUE _IOW('T', 218, int) + /* TUNSETIFF ifr flags */ #define IFF_TUN0x0001
Re: [Qemu-devel] [PATCH v2 03/15] sheepdog: move coroutine send/recv function to generic code
At Fri, 16 Sep 2011 16:25:40 +0200, Paolo Bonzini wrote: Outside coroutines, avoid busy waiting on EAGAIN by temporarily making the socket blocking. The API of qemu_recvv/qemu_sendv is slightly different from do_readv/do_writev because they do not handle coroutines. It returns the number of bytes written before encountering an EAGAIN. The specificity of yielding on EAGAIN is entirely in qemu-coroutine.c. Reviewed-by: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- block/sheepdog.c | 225 ++ cutils.c | 177 ++ qemu-common.h| 30 +++ 3 files changed, 230 insertions(+), 202 deletions(-) It seems this patch causes a compile error of qemu-ga. Other things I noticed: static int send_req(int sockfd, SheepdogReq *hdr, void *data, unsigned int *wlen) { @@ -691,10 +509,9 @@ static int send_req(int sockfd, SheepdogReq *hdr, void *data, iov[1].iov_len = *wlen; } -ret = do_writev(sockfd, iov, sizeof(*hdr) + *wlen, 0); -if (ret) { +ret = qemu_sendv(sockfd, iov, sizeof(*hdr) + *wlen, 0); This is wrong because qemu_sendv() may return a smaller value than (sizeof(*hdr) + *wlen). We need to do things like qemu_write_full() here. +if (ret 0) { error_report(failed to send a req, %s, strerror(errno)); -ret = -1; } return ret; @@ -704,17 +521,19 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data, unsigned int *wlen, unsigned int *rlen) { int ret; +struct iovec iov; +socket_set_block(sockfd); ret = send_req(sockfd, hdr, data, wlen); -if (ret) { -ret = -1; +if (ret 0) { goto out; } -ret = do_read(sockfd, hdr, sizeof(*hdr)); -if (ret) { +iov.iov_base = hdr; +iov.iov_len = sizeof(*hdr); +ret = qemu_recvv(sockfd, iov, sizeof(*hdr), 0); qemu_recvv() may also return a smaller value than sizeof(*hdr) here. +if (ret 0) { error_report(failed to get a rsp, %s, strerror(errno)); -ret = -1; goto out; } @@ -723,15 +542,17 @@ static int do_req(int sockfd, SheepdogReq *hdr, void *data, } if (*rlen) { -ret = do_read(sockfd, data, *rlen); -if (ret) { +iov.iov_base = data; +iov.iov_len = *rlen; +ret = qemu_recvv(sockfd, iov, *rlen, 0); Same here. +if (ret 0) { error_report(failed to get the data, %s, strerror(errno)); -ret = -1; goto out; } } ret = 0; out: +socket_set_nonblock(sockfd); return ret; } [snip] + +/* + * Send/recv data with iovec buffers + * + * This function send/recv data from/to the iovec buffer directly. + * The first `offset' bytes in the iovec buffer are skipped and next + * `len' bytes are used. + * + * For example, + * + * do_sendv_recvv(sockfd, iov, len, offset, 1); + * + * is equal to + * + * char *buf = malloc(size); + * iov_to_buf(iov, iovcnt, buf, offset, size); + * send(sockfd, buf, size, 0); + * free(buf); + */ +static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset, + int do_sendv) +{ +int ret, diff, iovlen; +struct iovec *last_iov; + +/* last_iov is inclusive, so count from one. */ +iovlen = 1; +last_iov = iov; +len += offset; + +while (last_iov-iov_len len) { +len -= last_iov-iov_len; + +last_iov++; +iovlen++; +} + +diff = last_iov-iov_len - len; +last_iov-iov_len -= diff; + +while (iov-iov_len = offset) { +offset -= iov-iov_len; + +iov++; +iovlen--; +} + +iov-iov_base = (char *) iov-iov_base + offset; +iov-iov_len -= offset; + +{ +#ifdef CONFIG_IOVEC +struct msghdr msg; +memset(msg, 0, sizeof(msg)); +msg.msg_iov = iov; +msg.msg_iovlen = iovlen; + +do { +if (do_sendv) { +ret = sendmsg(sockfd, msg, 0); +} else { +ret = recvmsg(sockfd, msg, 0); +} +} while (ret == -1 errno == EINTR); +#else +struct iovec *p = iov; +ret = 0; +while (iovlen 0) { +int rc; +if (do_sendv) { +rc = send(sockfd, p-iov_base, p-iov_len, 0); +} else { +rc = qemu_recv(sockfd, p-iov_base, p-iov_len, 0); +} +if (rc == -1) { +if (errno == EINTR) { +continue; +} +if (ret == 0) { +ret = -1; +} +break; +} +iovlen--, p++;
Re: [Qemu-devel] [PATCH v2 04/15] coroutine-io: handle zero returns from recv
At Fri, 16 Sep 2011 16:25:41 +0200, Paolo Bonzini wrote: When the other side is shutdown, read returns zero (writes return EPIPE). In this case, care must be taken to avoid infinite loops. This error was already present in sheepdog. Cc: MORITA Kazutaka morita.kazut...@lab.ntt.co.jp Signed-off-by: Paolo Bonzini pbonz...@redhat.com --- cutils.c |8 +++- 1 files changed, 7 insertions(+), 1 deletions(-) diff --git a/cutils.c b/cutils.c index b302020..295187f 100644 --- a/cutils.c +++ b/cutils.c @@ -501,8 +501,11 @@ static int do_sendv_recvv(int sockfd, struct iovec *iov, int len, int offset, } break; } -iovlen--, p++; +if (rc == 0) { +break; +} ret += rc; +iovlen--, p++; } #endif } @@ -567,6 +570,9 @@ int coroutine_fn qemu_co_sendv(int sockfd, struct iovec *iov, } break; } +if (ret == 0) { +break; +} total += ret, len -= ret; } When EPIPE is set, write() returns -1 doesn't it? It looks like qemu_co_recvv() handles a zero return correctly, so I think this patch is not needed. Thanks, Kazutaka
Re: [Qemu-devel] [PATCH] This patch adds a new block driver : iSCSI
On Fri, Sep 16, 2011 at 05:53:20PM +0200, Christoph Hellwig wrote: On Wed, Sep 14, 2011 at 04:50:25PM +0100, Stefan Hajnoczi wrote: I think in this case it will not make the code nicer. Since the external iSCSI library is based on callbacks it would be necessary to write the coroutines-callbacks adapter functions. So for example, the READ10 command would need a function that can be called in coroutine context and yields while libiscsi does the I/O. When the callback is invoked it will re-enter the coroutine. The area where coroutines are useful in the block layer is for image formats. We already have common coroutines-callback adapter functions in block.c so it's possible to write sequential code for image formats. They only need access to block layer functions which have already been adapted. But as soon as you interact with a callback-based API from the coroutine, then you need to write an adapter yourself. So you plan on keeping the aio interface around forever? Qemu with two different I/O pathes was already more than painful enough, I don't think keeping three, and two of them beeing fairly complex is a good idea. The synchronous interfaces can be converted to the coroutine interfaces. The block layer needs a public aio interface because device emulation is asynchronous/callback-based. That doesn't mean that BlockDriver needs aio functions since block.c could transparently set up coroutines. So in theory BlockDriver could have only coroutine interfaces. Doing the aio to coroutine conversion is pretty mechanical, that's why I'm not afraid of doing it with this iSCSI code later. Stefan
[Qemu-devel] [PATCH] Remove qemu_host_page_bits
It was introduced with commit 54936004fddc52c321cb3f9a9a51140e782bed5d as host_page_bits but never used. Signed-off-by: Stefan Weil w...@mail.berlios.de --- cpu-all.h |1 - exec.c|4 2 files changed, 0 insertions(+), 5 deletions(-) diff --git a/cpu-all.h b/cpu-all.h index 3532026..e8143cd 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -290,7 +290,6 @@ extern unsigned long reserved_va; /* ??? These should be the larger of unsigned long and target_ulong. */ extern unsigned long qemu_real_host_page_size; -extern unsigned long qemu_host_page_bits; extern unsigned long qemu_host_page_size; extern unsigned long qemu_host_page_mask; diff --git a/exec.c b/exec.c index 3df6b23..639deae 100644 --- a/exec.c +++ b/exec.c @@ -183,7 +183,6 @@ typedef struct PageDesc { #define V_L1_SHIFT (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - V_L1_BITS) unsigned long qemu_real_host_page_size; -unsigned long qemu_host_page_bits; unsigned long qemu_host_page_size; unsigned long qemu_host_page_mask; @@ -275,9 +274,6 @@ static void page_init(void) qemu_host_page_size = qemu_real_host_page_size; if (qemu_host_page_size TARGET_PAGE_SIZE) qemu_host_page_size = TARGET_PAGE_SIZE; -qemu_host_page_bits = 0; -while ((1 qemu_host_page_bits) qemu_host_page_size) -qemu_host_page_bits++; qemu_host_page_mask = ~(qemu_host_page_size - 1); #if defined(CONFIG_BSD) defined(CONFIG_USER_ONLY) -- 1.7.2.5
Re: [Qemu-devel] [PATCH 0/5] Only one call output register needed for 64 bit hosts
Am 05.09.2011 11:06, schrieb Stefan Weil: The number of registers needed for the return value of TCG opcode INDEX_op_call is calculated in function tcg_gen_callN (nb_rets). It can be 0 or 1, for 32 bit hosts also 2 (return 64 bit value in two 32 bit registers). Some TCG implementations reserve 2 registers although only 1 is used. The following patches fix this. [PATCH 1/5] tcg/i386: Only one call output register needed for 64 bit hosts [PATCH 2/5] tcg/ia64: Only one call output register needed for 64 bit hosts [PATCH 3/5] tcg/s390: Only one call output register needed for 64 bit hosts [PATCH 4/5] tcg/sparc: Only one call output register needed for 64 bit hosts [PATCH 5/5] tcg/ppc64: Only one call output register needed for 64 bit hosts Patch 3 was acked by Richard Henderson, so was patch 5 by Malc. What about the other three patches (i386 / ia64, sparc)? They are all similar, so I think they could also be committed without more reviews. Thanks, Stefan Weil
Re: [Qemu-devel] [PATCH v2 03/15] sheepdog: move coroutine send/recv function to generic code
On 09/17/2011 08:29 AM, MORITA Kazutaka wrote: +#else +struct iovec *p = iov; +ret = 0; +while (iovlen 0) { +int rc; +if (do_sendv) { +rc = send(sockfd, p-iov_base, p-iov_len, 0); +} else { +rc = qemu_recv(sockfd, p-iov_base, p-iov_len, 0); +} +if (rc == -1) { +if (errno == EINTR) { +continue; +} +if (ret == 0) { +ret = -1; +} +break; +} +iovlen--, p++; +ret += rc; +} This code can be called inside coroutines with a non-blocking fd, so should we avoid busy waiting? It doesn't busy wait, it exits with EAGAIN. I'll squash in here the first hunk of patch 4, which is needed. qemu_co_recvv already handles reads that return zero, unlike sheepdog's do_readv_writev. I probably moved it there inadvertently while moving code around to cutils.c, but in order to fix qemu-ga I need to create a new file qemu-coroutine-io.c. Kevin, do you want me to resubmit everything, or are you going to apply some more patches to the block branch (5 to 12 should be fine)? Paolo
Re: [Qemu-devel] [PATCH v2 07/18] omap_gpmc: GPMC_IRQSTATUS is write-one-to-clear
On 17 September 2011 02:08, andrzej zaborowski balr...@gmail.com wrote: --- a/hw/omap_gpmc.c +++ b/hw/omap_gpmc.c @@ -284,7 +284,7 @@ static void omap_gpmc_write(void *opaque, target_phys_addr_t addr, break; case 0x018: /* GPMC_IRQSTATUS */ - s-irqen = ~value; + s-irqen = ~value; Should we be clearing s-irqst here instead of irqen? Oops, you're right... (IIRC this change was a spotted-while-reading-code one, not a response to a behavioural issue with the model.) -- PMM
Re: [Qemu-devel] [PATCH 14/58] device tree: add nop_node
On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote: We have a qemu internal abstraction layer on FDT. While I'm not fully convinced we need it at all, it's missing the nop_node functionality that we now need on e500. So let's add it and think about the general future of that API later. Signed-off-by: Alexander Graf ag...@suse.de --- device_tree.c | 11 +++ device_tree.h | 1 + 2 files changed, 12 insertions(+), 0 deletions(-) diff --git a/device_tree.c b/device_tree.c index 3a224d1..23e89e3 100644 --- a/device_tree.c +++ b/device_tree.c @@ -107,3 +107,14 @@ int qemu_devtree_setprop_string(void *fdt, const char *node_path, return fdt_setprop_string(fdt, offset, property, string); } + +int qemu_devtree_nop_node(void *fdt, const char *node_path) +{ + int offset; + + offset = fdt_path_offset(fdt, node_path); + if (offset 0) -EBRACES + return offset; + + return fdt_nop_node(fdt, offset); +} diff --git a/device_tree.h b/device_tree.h index cecd98f..76fce5f 100644 --- a/device_tree.h +++ b/device_tree.h @@ -22,5 +22,6 @@ int qemu_devtree_setprop_cell(void *fdt, const char *node_path, const char *property, uint32_t val); int qemu_devtree_setprop_string(void *fdt, const char *node_path, const char *property, const char *string); +int qemu_devtree_nop_node(void *fdt, const char *node_path); #endif /* __DEVICE_TREE_H__ */ -- 1.6.0.2
Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote: CPUs that are not the boot CPU need to run in spinning code to check if they should run off to execute and if so where to jump to. This usually happens by leaving secondary CPUs looping and checking if some variable in memory changed. In an environment like Qemu however we can be more clever. We can just export the spin table the primary CPU modifies as MMIO region that would event based wake up the respective secondary CPUs. That saves us quite some cycles while the secondary CPUs are not up yet. So this patch adds a PV device that simply exports the spinning table into the guest and thus allows the primary CPU to wake up secondary ones. On Sparc32, there is no need for a PV device. The CPU is woken up from halted state with an IPI. Maybe you could use this approach? Signed-off-by: Alexander Graf ag...@suse.de --- v1 - v2: - change into MMIO scheme - map the secondary NIP instead of 0 1:1 - only map 64MB for TLB, same as u-boot - prepare code for 64-bit spinnings v2 - v3: - remove r6 - set MAS2_M - map EA 0 - use second TLB1 entry v3 - v4: - change to memoryops v4 - v5: - fix endianness bugs --- Makefile.target | 2 +- hw/ppce500_mpc8544ds.c | 33 - hw/ppce500_spin.c | 186 3 files changed, 216 insertions(+), 5 deletions(-) create mode 100644 hw/ppce500_spin.c diff --git a/Makefile.target b/Makefile.target index 2ed9099..3f689ce 100644 --- a/Makefile.target +++ b/Makefile.target @@ -247,7 +247,7 @@ endif obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o obj-ppc-y += ppc440.o ppc440_bamboo.o # PowerPC E500 boards -obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o +obj-ppc-y += ppce500_mpc8544ds.o mpc8544_guts.o ppce500_spin.o # PowerPC 440 Xilinx ML507 reference board. obj-ppc-y += virtex_ml507.o obj-ppc-$(CONFIG_KVM) += kvm_ppc.o diff --git a/hw/ppce500_mpc8544ds.c b/hw/ppce500_mpc8544ds.c index 9379624..3b8b449 100644 --- a/hw/ppce500_mpc8544ds.c +++ b/hw/ppce500_mpc8544ds.c @@ -49,6 +49,7 @@ #define MPC8544_PCI_IO 0xE100 #define MPC8544_PCI_IOLEN 0x1 #define MPC8544_UTIL_BASE (MPC8544_CCSRBAR_BASE + 0xe) +#define MPC8544_SPIN_BASE 0xEF00 struct boot_info { @@ -164,6 +165,18 @@ static void mmubooke_create_initial_mapping(CPUState *env, tlb-mas7_3 |= MAS3_UR | MAS3_UW | MAS3_UX | MAS3_SR | MAS3_SW | MAS3_SX; } +static void mpc8544ds_cpu_reset_sec(void *opaque) +{ + CPUState *env = opaque; + + cpu_reset(env); + + /* Secondary CPU starts in halted state for now. Needs to change when + implementing non-kernel boot. */ + env-halted = 1; + env-exception_index = EXCP_HLT; +} + static void mpc8544ds_cpu_reset(void *opaque) { CPUState *env = opaque; @@ -172,6 +185,7 @@ static void mpc8544ds_cpu_reset(void *opaque) cpu_reset(env); /* Set initial guest state. */ + env-halted = 0; env-gpr[1] = (1620) - 8; env-gpr[3] = bi-dt_base; env-nip = bi-entry; @@ -199,7 +213,6 @@ static void mpc8544ds_init(ram_addr_t ram_size, unsigned int pci_irq_nrs[4] = {1, 2, 3, 4}; qemu_irq **irqs, *mpic; DeviceState *dev; - struct boot_info *boot_info; CPUState *firstenv = NULL; /* Setup CPUs */ @@ -234,9 +247,16 @@ static void mpc8544ds_init(ram_addr_t ram_size, env-spr[SPR_40x_TCR] = 1 26; /* Register reset handler */ - boot_info = g_malloc0(sizeof(struct boot_info)); - qemu_register_reset(mpc8544ds_cpu_reset, env); - env-load_info = boot_info; + if (!i) { + /* Primary CPU */ + struct boot_info *boot_info; + boot_info = g_malloc0(sizeof(struct boot_info)); + qemu_register_reset(mpc8544ds_cpu_reset, env); + env-load_info = boot_info; + } else { + /* Secondary CPUs */ + qemu_register_reset(mpc8544ds_cpu_reset_sec, env); + } } env = firstenv; @@ -289,6 +309,9 @@ static void mpc8544ds_init(ram_addr_t ram_size, } } + /* Register spinning region */ + sysbus_create_simple(e500-spin, MPC8544_SPIN_BASE, NULL); + /* Load kernel. */ if (kernel_filename) { kernel_size = load_uimage(kernel_filename, entry, loadaddr, NULL); @@ -321,6 +344,8 @@ static void mpc8544ds_init(ram_addr_t ram_size, /* If we're loading a kernel directly, we must load the device tree too. */ if (kernel_filename) { + struct boot_info *boot_info; + #ifndef CONFIG_FDT cpu_abort(env, Compiled without FDT support - can't load kernel\n); #endif diff --git a/hw/ppce500_spin.c b/hw/ppce500_spin.c new file mode 100644 index 000..38451ac --- /dev/null +++ b/hw/ppce500_spin.c @@ -0,0 +1,186 @@ +#include hw.h
Re: [Qemu-devel] [PATCH 33/58] KVM: update kernel headers
On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote: This patch updates the kvm kernel headers to the latest version. Signed-off-by: Alexander Graf ag...@suse.de --- linux-headers/asm-powerpc/kvm.h | 23 +++ linux-headers/asm-x86/kvm_para.h | 14 ++ linux-headers/linux/kvm.h | 25 + linux-headers/linux/kvm_para.h | 1 + 4 files changed, 55 insertions(+), 8 deletions(-) diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 777d307..579e219 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -22,6 +22,10 @@ #include linux/types.h +/* Select powerpc specific features in linux/kvm.h */ +#define __KVM_HAVE_SPAPR_TCE +#define __KVM_HAVE_PPC_SMT + struct kvm_regs { __u64 pc; __u64 cr; @@ -145,6 +149,12 @@ struct kvm_regs { #define KVM_SREGS_E_UPDATE_DBSR (1 3) /* + * Book3S special bits to indicate contents in the struct by maintaining + * backwards compatibility with older structs. If adding a new field, + * please make sure to add a flag for that new field */ +#define KVM_SREGS_S_HIOR (1 0) + +/* * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a * previous KVM_GET_REGS. * @@ -169,6 +179,8 @@ struct kvm_sregs { __u64 ibat[8]; __u64 dbat[8]; } ppc32; + __u64 flags; /* KVM_SREGS_S_ */ + __u64 hior; } s; struct { union { @@ -272,4 +284,15 @@ struct kvm_guest_debug_arch { #define KVM_INTERRUPT_UNSET -2U #define KVM_INTERRUPT_SET_LEVEL -3U +/* for KVM_CAP_SPAPR_TCE */ +struct kvm_create_spapr_tce { + __u64 liobn; + __u32 window_size; +}; + +/* for KVM_ALLOCATE_RMA */ +struct kvm_allocate_rma { + __u64 rma_size; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h index 834d71e..f2ac46a 100644 --- a/linux-headers/asm-x86/kvm_para.h +++ b/linux-headers/asm-x86/kvm_para.h @@ -21,6 +21,7 @@ */ #define KVM_FEATURE_CLOCKSOURCE2 3 #define KVM_FEATURE_ASYNC_PF 4 +#define KVM_FEATURE_STEAL_TIME 5 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -30,10 +31,23 @@ #define MSR_KVM_WALL_CLOCK 0x11 #define MSR_KVM_SYSTEM_TIME 0x12 +#define KVM_MSR_ENABLED 1 /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 +#define MSR_KVM_STEAL_TIME 0x4b564d03 + +struct kvm_steal_time { + __u64 steal; + __u32 version; + __u32 flags; + __u32 pad[12]; +}; + +#define KVM_STEAL_ALIGNMENT_BITS 5 +#define KVM_STEAL_VALID_BITS ((-1ULL (KVM_STEAL_ALIGNMENT_BITS + 1))) +#define KVM_STEAL_RESERVED_MASK (((1 KVM_STEAL_ALIGNMENT_BITS) - 1 ) 1) #define KVM_MAX_MMU_OP_BATCH 32 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index fc63b73..2062375 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -161,6 +161,7 @@ struct kvm_pit_config { #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 #define KVM_EXIT_OSI 18 +#define KVM_EXIT_PAPR_HCALL 19 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -264,6 +265,11 @@ struct kvm_run { struct { __u64 gprs[32]; } osi; + struct { + __u64 nr; + __u64 ret; + __u64 args[9]; + } papr_hcall; /* Fix the size of the union. */ char padding[256]; }; @@ -457,7 +463,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_VAPIC 6 #define KVM_CAP_EXT_CPUID 7 #define KVM_CAP_CLOCKSOURCE 8 -#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */ +#define KVM_CAP_NR_VCPUS 9 /* returns recommended max vcpus per vm */ #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ #define KVM_CAP_PIT 11 #define KVM_CAP_NOP_IO_DELAY 12 @@ -544,6 +550,12 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_TSC_CONTROL 60 #define KVM_CAP_GET_TSC_KHZ 61 #define KVM_CAP_PPC_BOOKE_SREGS 62 +#define KVM_CAP_SPAPR_TCE 63 +#define KVM_CAP_PPC_SMT 64 +#define KVM_CAP_PPC_RMA 65 +#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ +#define KVM_CAP_PPC_HIOR 67 +#define KVM_CAP_PPC_PAPR 68 #ifdef KVM_CAP_IRQ_ROUTING @@ -746,6 +758,9 @@ struct kvm_clock_data { /* Available with
Re: [Qemu-devel] [PATCH 47/58] Implement POWER7's CFAR in TCG
On Wed, Sep 14, 2011 at 8:43 AM, Alexander Graf ag...@suse.de wrote: From: David Gibson da...@gibson.dropbear.id.au This patch implements support for the CFAR SPR on POWER7 (Come From Address Register), which snapshots the PC value at the time of a branch or an rfid. The latest powerpc-next kernel also catches it and can show it in xmon or in the signal frames. This works well enough to let recent kernels boot (which otherwise oops on the CFAR access). It hasn't been tested enough to be confident that the CFAR values are actually accurate, but one thing at a time. This looks accurate at least for the cases covered. A higher performance implementation could be to only update the register lazily when the SPR is read, in most other times CFAR would be only stored to DisasContext. Signed-off-by: Ben Herrenschmidt b...@kernel.crashing.org Signed-off-by: David Gibson da...@gibson.dropbear.id.au Signed-off-by: Alexander Graf ag...@suse.de --- target-ppc/cpu.h | 8 target-ppc/translate.c | 28 target-ppc/translate_init.c | 23 ++- 3 files changed, 58 insertions(+), 1 deletions(-) diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 32706df..3f4af22 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -555,6 +555,8 @@ enum { /* Decrementer clock: RTC clock (POWER, 601) or bus clock */ POWERPC_FLAG_RTC_CLK = 0x0001, POWERPC_FLAG_BUS_CLK = 0x0002, + /* Has CFAR */ + POWERPC_FLAG_CFAR = 0x0004, }; /*/ @@ -872,6 +874,10 @@ struct CPUPPCState { target_ulong ctr; /* condition register */ uint32_t crf[8]; +#if defined(TARGET_PPC64) + /* CFAR */ + target_ulong cfar; +#endif /* XER */ target_ulong xer; /* Reservation address */ @@ -1204,6 +1210,7 @@ static inline void cpu_clone_regs(CPUState *env, target_ulong newsp) #define SPR_601_UDECR (0x006) #define SPR_LR (0x008) #define SPR_CTR (0x009) +#define SPR_DSCR (0x011) #define SPR_DSISR (0x012) #define SPR_DAR (0x013) /* DAE for PowerPC 601 */ #define SPR_601_RTCU (0x014) @@ -1212,6 +1219,7 @@ static inline void cpu_clone_regs(CPUState *env, target_ulong newsp) #define SPR_SDR1 (0x019) #define SPR_SRR0 (0x01A) #define SPR_SRR1 (0x01B) +#define SPR_CFAR (0x01C) #define SPR_AMR (0x01D) #define SPR_BOOKE_PID (0x030) #define SPR_BOOKE_DECAR (0x036) diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 4277460..1e362fc 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -69,6 +69,9 @@ static TCGv cpu_nip; static TCGv cpu_msr; static TCGv cpu_ctr; static TCGv cpu_lr; +#if defined(TARGET_PPC64) +static TCGv cpu_cfar; +#endif static TCGv cpu_xer; static TCGv cpu_reserve; static TCGv_i32 cpu_fpscr; @@ -154,6 +157,11 @@ void ppc_translate_init(void) cpu_lr = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, lr), lr); +#if defined(TARGET_PPC64) + cpu_cfar = tcg_global_mem_new(TCG_AREG0, + offsetof(CPUState, cfar), cfar); +#endif + cpu_xer = tcg_global_mem_new(TCG_AREG0, offsetof(CPUState, xer), xer); @@ -187,6 +195,7 @@ typedef struct DisasContext { int le_mode; #if defined(TARGET_PPC64) int sf_mode; + int has_cfar; #endif int fpu_enabled; int altivec_enabled; @@ -3345,6 +3354,14 @@ static inline void gen_qemu_st32fiw(DisasContext *ctx, TCGv_i64 arg1, TCGv arg2) /* stfiwx */ GEN_STXF(stfiw, st32fiw, 0x17, 0x1E, PPC_FLOAT_STFIWX); +static inline void gen_update_cfar(DisasContext *ctx, target_ulong nip) +{ +#if defined(TARGET_PPC64) + if (ctx-has_cfar) Braces missing, please use checkpatch.pl. + tcg_gen_movi_tl(cpu_cfar, nip); +#endif +} + /*** Branch ***/ static inline void gen_goto_tb(DisasContext *ctx, int n, target_ulong dest) { @@ -3407,6 +3424,7 @@ static void gen_b(DisasContext *ctx) target = li; if (LK(ctx-opcode)) gen_setlr(ctx, ctx-nip); + gen_update_cfar(ctx, ctx-nip); gen_goto_tb(ctx, 0, target); } @@ -3469,6 +3487,7 @@ static inline void gen_bcond(DisasContext *ctx, int type) } tcg_temp_free_i32(temp); } + gen_update_cfar(ctx, ctx-nip); if (type == BCOND_IM) { target_ulong li = (target_long)((int16_t)(BD(ctx-opcode))); if (likely(AA(ctx-opcode) == 0)) { @@ -3580,6 +3599,7 @@ static void gen_rfi(DisasContext *ctx) gen_inval_exception(ctx,
Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
Am 17.09.2011 um 18:58 schrieb Blue Swirl blauwir...@gmail.com: On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote: CPUs that are not the boot CPU need to run in spinning code to check if they should run off to execute and if so where to jump to. This usually happens by leaving secondary CPUs looping and checking if some variable in memory changed. In an environment like Qemu however we can be more clever. We can just export the spin table the primary CPU modifies as MMIO region that would event based wake up the respective secondary CPUs. That saves us quite some cycles while the secondary CPUs are not up yet. So this patch adds a PV device that simply exports the spinning table into the guest and thus allows the primary CPU to wake up secondary ones. On Sparc32, there is no need for a PV device. The CPU is woken up from halted state with an IPI. Maybe you could use this approach? The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :). Alex
Re: [Qemu-devel] [PATCH v2 03/15] sheepdog: move coroutine send/recv function to generic code
At Sat, 17 Sep 2011 16:49:22 +0200, Paolo Bonzini wrote: On 09/17/2011 08:29 AM, MORITA Kazutaka wrote: +#else +struct iovec *p = iov; +ret = 0; +while (iovlen 0) { +int rc; +if (do_sendv) { +rc = send(sockfd, p-iov_base, p-iov_len, 0); +} else { +rc = qemu_recv(sockfd, p-iov_base, p-iov_len, 0); +} +if (rc == -1) { +if (errno == EINTR) { +continue; +} +if (ret == 0) { +ret = -1; +} +break; +} +iovlen--, p++; +ret += rc; +} This code can be called inside coroutines with a non-blocking fd, so should we avoid busy waiting? It doesn't busy wait, it exits with EAGAIN. I'll squash in here the Oops, you're right. Sorry for the noise. Thanks, Kazutaka first hunk of patch 4, which is needed. qemu_co_recvv already handles reads that return zero, unlike sheepdog's do_readv_writev. I probably moved it there inadvertently while moving code around to cutils.c, but in order to fix qemu-ga I need to create a new file qemu-coroutine-io.c. Kevin, do you want me to resubmit everything, or are you going to apply some more patches to the block branch (5 to 12 should be fine)? Paolo
Re: [Qemu-devel] [PATCH 33/58] KVM: update kernel headers
Am 17.09.2011 um 18:59 schrieb Blue Swirl blauwir...@gmail.com: On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote: This patch updates the kvm kernel headers to the latest version. Signed-off-by: Alexander Graf ag...@suse.de --- linux-headers/asm-powerpc/kvm.h | 23 +++ linux-headers/asm-x86/kvm_para.h | 14 ++ linux-headers/linux/kvm.h| 25 + linux-headers/linux/kvm_para.h |1 + 4 files changed, 55 insertions(+), 8 deletions(-) diff --git a/linux-headers/asm-powerpc/kvm.h b/linux-headers/asm-powerpc/kvm.h index 777d307..579e219 100644 --- a/linux-headers/asm-powerpc/kvm.h +++ b/linux-headers/asm-powerpc/kvm.h @@ -22,6 +22,10 @@ #include linux/types.h +/* Select powerpc specific features in linux/kvm.h */ +#define __KVM_HAVE_SPAPR_TCE +#define __KVM_HAVE_PPC_SMT + struct kvm_regs { __u64 pc; __u64 cr; @@ -145,6 +149,12 @@ struct kvm_regs { #define KVM_SREGS_E_UPDATE_DBSR(1 3) /* + * Book3S special bits to indicate contents in the struct by maintaining + * backwards compatibility with older structs. If adding a new field, + * please make sure to add a flag for that new field */ +#define KVM_SREGS_S_HIOR (1 0) + +/* * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a * previous KVM_GET_REGS. * @@ -169,6 +179,8 @@ struct kvm_sregs { __u64 ibat[8]; __u64 dbat[8]; } ppc32; + __u64 flags; /* KVM_SREGS_S_ */ + __u64 hior; } s; struct { union { @@ -272,4 +284,15 @@ struct kvm_guest_debug_arch { #define KVM_INTERRUPT_UNSET-2U #define KVM_INTERRUPT_SET_LEVEL-3U +/* for KVM_CAP_SPAPR_TCE */ +struct kvm_create_spapr_tce { + __u64 liobn; + __u32 window_size; +}; + +/* for KVM_ALLOCATE_RMA */ +struct kvm_allocate_rma { + __u64 rma_size; +}; + #endif /* __LINUX_KVM_POWERPC_H */ diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h index 834d71e..f2ac46a 100644 --- a/linux-headers/asm-x86/kvm_para.h +++ b/linux-headers/asm-x86/kvm_para.h @@ -21,6 +21,7 @@ */ #define KVM_FEATURE_CLOCKSOURCE2 3 #define KVM_FEATURE_ASYNC_PF 4 +#define KVM_FEATURE_STEAL_TIME 5 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. @@ -30,10 +31,23 @@ #define MSR_KVM_WALL_CLOCK 0x11 #define MSR_KVM_SYSTEM_TIME 0x12 +#define KVM_MSR_ENABLED 1 /* Custom MSRs falls in the range 0x4b564d00-0x4b564dff */ #define MSR_KVM_WALL_CLOCK_NEW 0x4b564d00 #define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02 +#define MSR_KVM_STEAL_TIME 0x4b564d03 + +struct kvm_steal_time { + __u64 steal; + __u32 version; + __u32 flags; + __u32 pad[12]; +}; + +#define KVM_STEAL_ALIGNMENT_BITS 5 +#define KVM_STEAL_VALID_BITS ((-1ULL (KVM_STEAL_ALIGNMENT_BITS + 1))) +#define KVM_STEAL_RESERVED_MASK (((1 KVM_STEAL_ALIGNMENT_BITS) - 1 ) 1) #define KVM_MAX_MMU_OP_BATCH 32 diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index fc63b73..2062375 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -161,6 +161,7 @@ struct kvm_pit_config { #define KVM_EXIT_NMI 16 #define KVM_EXIT_INTERNAL_ERROR 17 #define KVM_EXIT_OSI 18 +#define KVM_EXIT_PAPR_HCALL 19 /* For KVM_EXIT_INTERNAL_ERROR */ #define KVM_INTERNAL_ERROR_EMULATION 1 @@ -264,6 +265,11 @@ struct kvm_run { struct { __u64 gprs[32]; } osi; + struct { + __u64 nr; + __u64 ret; + __u64 args[9]; + } papr_hcall; /* Fix the size of the union. */ char padding[256]; }; @@ -457,7 +463,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_VAPIC 6 #define KVM_CAP_EXT_CPUID 7 #define KVM_CAP_CLOCKSOURCE 8 -#define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */ +#define KVM_CAP_NR_VCPUS 9 /* returns recommended max vcpus per vm */ #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ #define KVM_CAP_PIT 11 #define KVM_CAP_NOP_IO_DELAY 12 @@ -544,6 +550,12 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_TSC_CONTROL 60 #define KVM_CAP_GET_TSC_KHZ 61 #define KVM_CAP_PPC_BOOKE_SREGS 62 +#define KVM_CAP_SPAPR_TCE 63 +#define KVM_CAP_PPC_SMT 64 +#define KVM_CAP_PPC_RMA65 +#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */ +#define KVM_CAP_PPC_HIOR 67 +#define KVM_CAP_PPC_PAPR 68 #ifdef
Re: [Qemu-devel] [PATCH 24/58] PPC: E500: Add PV spinning code
On Sat, Sep 17, 2011 at 5:15 PM, Alexander Graf ag...@suse.de wrote: Am 17.09.2011 um 18:58 schrieb Blue Swirl blauwir...@gmail.com: On Wed, Sep 14, 2011 at 8:42 AM, Alexander Graf ag...@suse.de wrote: CPUs that are not the boot CPU need to run in spinning code to check if they should run off to execute and if so where to jump to. This usually happens by leaving secondary CPUs looping and checking if some variable in memory changed. In an environment like Qemu however we can be more clever. We can just export the spin table the primary CPU modifies as MMIO region that would event based wake up the respective secondary CPUs. That saves us quite some cycles while the secondary CPUs are not up yet. So this patch adds a PV device that simply exports the spinning table into the guest and thus allows the primary CPU to wake up secondary ones. On Sparc32, there is no need for a PV device. The CPU is woken up from halted state with an IPI. Maybe you could use this approach? The way it's done here is defined by u-boot and now also nailed down in the ePAPR architecture spec. While alternatives might be more appealing, this is how guests work today :). OK. I hoped that there were no implementations yet. The header (btw missing) should point to the spec.
Re: [Qemu-devel] [PATCH 06/14] qdev: add ability to do QOM-style derived naming
On Fri, Sep 16, 2011 at 4:00 PM, Anthony Liguori aligu...@us.ibm.com wrote: By using a prefix of :: in the name, we can safely derive the composed device name from the parent device and busses name. For instance, if the ::i440fx device created a device named piix3, it would look like this: static void i440fx_initfn(...) { s-piix3 = qdev_create(PIIX3, ::piix3); ... The resulting device would be named ::i440fx::i440fx.0::piix3. The reason for the middle ::i440fx.0 blob is that there are two levels of the tree hierarchy here and the bus level already has it's name derived from the parent device. It could make sense to name the intermediate level by bus type, like ::i440fx::pci.0::piix3. We'll eliminate the bus level of the hierarchy in due time, but for now we have to just live with the ugly names. This patch lets qdev names be specified as a printf style format string which is convenient for creating devices like ::smbus-eeprom[%d]. Signed-off-by: Anthony Liguori aligu...@us.ibm.com --- hw/qdev.c | 79 +++- hw/qdev.h | 8 - 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/hw/qdev.c b/hw/qdev.c index 3096667..6bf6650 100644 --- a/hw/qdev.c +++ b/hw/qdev.c @@ -88,9 +88,10 @@ static DeviceInfo *qdev_find_info(BusInfo *bus_info, const char *name) return NULL; } -static DeviceState *qdev_create_from_info(BusState *bus, DeviceInfo *info, const char *id) +static DeviceState *qdev_create_from_infov(BusState *bus, DeviceInfo *info, const char *id, va_list ap) { DeviceState *dev; + char *name = NULL; assert(bus-info == info-bus_info); dev = g_malloc0(info-size); @@ -107,18 +108,50 @@ static DeviceState *qdev_create_from_info(BusState *bus, DeviceInfo *info, const } dev-instance_id_alias = -1; dev-state = DEV_STATE_CREATED; - dev-id = g_strdup(id); + + if (id) { + name = g_strdup_vprintf(id, ap); + if (name[0] == ':' name[1] == ':') { + const char *parent_bus, *parent_device; + char *full_name; + + if (dev-parent_bus dev-parent_bus-parent) { + parent_device = dev-parent_bus-parent-id; + parent_bus = dev-parent_bus-name; + + full_name = g_strdup_printf(%s%s%s, + dev-parent_bus-parent-id, + dev-parent_bus-name, + name); + g_free(name); + name = full_name; + } + } + } + dev-id = name; + return dev; +} + +static DeviceState *qdev_create_from_info(BusState *bus, DeviceInfo *info, const char *id, ...) +{ + DeviceState *dev; + va_list ap; + + va_start(ap, id); + dev = qdev_create_from_infov(bus, info, id, ap); + va_end(ap); + return dev; } /* Create a new device. This only initializes the device state structure and allows properties to be set. qdev_init should be called to initialize the actual device emulation. */ -DeviceState *qdev_create(BusState *bus, const char *name, const char *id) +DeviceState *qdev_createv(BusState *bus, const char *name, const char *id, va_list ap) { DeviceState *dev; - dev = qdev_try_create(bus, name, id); + dev = qdev_try_createv(bus, name, id, ap); if (!dev) { if (bus) { hw_error(Unknown device '%s' for bus '%s'\n, name, @@ -131,7 +164,19 @@ DeviceState *qdev_create(BusState *bus, const char *name, const char *id) return dev; } -DeviceState *qdev_try_create(BusState *bus, const char *name, const char *id) +DeviceState *qdev_create(BusState *bus, const char *name, const char *id, ...) +{ + DeviceState *dev; + va_list ap; + + va_start(ap, id); + dev = qdev_createv(bus, name, id, ap); + va_end(ap); + + return dev; +} + +DeviceState *qdev_try_createv(BusState *bus, const char *name, const char *id, va_list ap) { DeviceInfo *info; @@ -144,7 +189,19 @@ DeviceState *qdev_try_create(BusState *bus, const char *name, const char *id) return NULL; } - return qdev_create_from_info(bus, info, id); + return qdev_create_from_infov(bus, info, id, ap); +} + +DeviceState *qdev_try_create(BusState *bus, const char *name, const char *id, ...) +{ + DeviceState *dev; + va_list ap; + + va_start(ap, id); + dev = qdev_try_createv(bus, name, id, ap); + va_end(ap); + + return dev; } static void qdev_print_devinfo(DeviceInfo *info) @@ -231,6 +288,7 @@ DeviceState *qdev_device_add(QemuOpts *opts) DeviceInfo *info; DeviceState *qdev; BusState *bus; + const char *id; driver = qemu_opt_get(opts, driver); if (!driver) { @@ -271,8 +329,15 @@ DeviceState *qdev_device_add(QemuOpts *opts) return
Re: [Qemu-devel] [PATCH 00/14] qdev: assign unique names to all devices (part 1)
On Fri, Sep 16, 2011 at 4:00 PM, Anthony Liguori aligu...@us.ibm.com wrote: This series introduces an infrastructure to remove anonymous devices from qdev. Anonymous devices are one of the big gaps between qdev and QOM so removing is a prerequisite to incrementally merging QOM. Besides the infrastructure, I also converted almost all of the possible PC devices to have unique names. Please not that naming is not a property of devices but rather of the thing that creates the devices (usually machines). The names are ugly but this is because of the alternating device/bus hierarchy in qdev. For now, the names use '::' as deliminators but I think Jan has convinced me that down the road, we should use '/' as a deliminator such that the resulting names are actually valid paths (using a canonical path format). The patches look fine to me (assuming s/::/\//g).
Re: [Qemu-devel] [PATCH] Makefile: Fix broken build
Thanks, applied. On Fri, Sep 16, 2011 at 7:50 PM, Stefan Weil w...@mail.berlios.de wrote: make -C mybuilddir no longer works (regression caused by commit) 388d475815c23901010a25c845eb078d47ee0740. PWD is the directory of the caller (not mybuilddir), so BUILD_DIR is set to the wrong value. GNU make sets CURDIR to the correct value. Use this macro instead of PWD. Cc: Lluís Vilanova vilan...@ac.upc.edu Cc: Anthony Liguori aligu...@us.ibm.com Signed-off-by: Stefan Weil w...@mail.berlios.de --- Makefile | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 57cc399..a211158 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # Makefile for QEMU. -# Always point to the root of the build tree -BUILD_DIR=$(PWD) +# Always point to the root of the build tree (needs GNU make). +BUILD_DIR=$(CURDIR) GENERATED_HEADERS = config-host.h trace.h qemu-options.def ifeq ($(TRACE_BACKEND),dtrace) -- 1.7.2.5
Re: [Qemu-devel] [PATCH 0/4] Remove trailing double quote limitation and add virtio_set_status trace event
Thanks, applied all. On Tue, Sep 13, 2011 at 12:34 PM, Stefan Hajnoczi stefa...@linux.vnet.ibm.com wrote: This series removes the tracetool parser limitation that format strings must begin and end with double quotes. In practice this means we need to work around PRI*64 usage by adding dummy at the end of the line. It's fairly easy to solve this parser limitation and do away with the workarounds. While we're at it, also add the virtio_set_status() trace event to properly follow the lifecycle of virtio devices. docs/tracing.txt | 5 + hw/virtio.c | 10 ++ hw/virtio.h | 9 + scripts/tracetool | 20 +--- trace-events | 37 +++-- 5 files changed, 44 insertions(+), 37 deletions(-)
Re: [Qemu-devel] [PATCH] target-i386: Fix several SSE3 instructions.
Thanks, applied. On Fri, Sep 16, 2011 at 3:29 PM, Max Reitz m...@tyndur.org wrote: haddp[sd], hsubp[sd] and addsubp[sd] operate on floats, thus it is necessary to use the appropriate floating point calculation functions. If this is not done, those functions operate merely on integers, which is not correct. Signed-off-by: Max Reitz m...@tyndur.org --- target-i386/ops_sse.h | 36 ++-- 1 files changed, 18 insertions(+), 18 deletions(-) diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h index 703be99..aa41d25 100644 --- a/target-i386/ops_sse.h +++ b/target-i386/ops_sse.h @@ -859,51 +859,51 @@ void helper_insertq_i(XMMReg *d, int index, int length) void helper_haddps(XMMReg *d, XMMReg *s) { XMMReg r; - r.XMM_S(0) = d-XMM_S(0) + d-XMM_S(1); - r.XMM_S(1) = d-XMM_S(2) + d-XMM_S(3); - r.XMM_S(2) = s-XMM_S(0) + s-XMM_S(1); - r.XMM_S(3) = s-XMM_S(2) + s-XMM_S(3); + r.XMM_S(0) = float32_add(d-XMM_S(0), d-XMM_S(1), env-sse_status); + r.XMM_S(1) = float32_add(d-XMM_S(2), d-XMM_S(3), env-sse_status); + r.XMM_S(2) = float32_add(s-XMM_S(0), s-XMM_S(1), env-sse_status); + r.XMM_S(3) = float32_add(s-XMM_S(2), s-XMM_S(3), env-sse_status); *d = r; } void helper_haddpd(XMMReg *d, XMMReg *s) { XMMReg r; - r.XMM_D(0) = d-XMM_D(0) + d-XMM_D(1); - r.XMM_D(1) = s-XMM_D(0) + s-XMM_D(1); + r.XMM_D(0) = float64_add(d-XMM_D(0), d-XMM_D(1), env-sse_status); + r.XMM_D(1) = float64_add(s-XMM_D(0), s-XMM_D(1), env-sse_status); *d = r; } void helper_hsubps(XMMReg *d, XMMReg *s) { XMMReg r; - r.XMM_S(0) = d-XMM_S(0) - d-XMM_S(1); - r.XMM_S(1) = d-XMM_S(2) - d-XMM_S(3); - r.XMM_S(2) = s-XMM_S(0) - s-XMM_S(1); - r.XMM_S(3) = s-XMM_S(2) - s-XMM_S(3); + r.XMM_S(0) = float32_sub(d-XMM_S(0), d-XMM_S(1), env-sse_status); + r.XMM_S(1) = float32_sub(d-XMM_S(2), d-XMM_S(3), env-sse_status); + r.XMM_S(2) = float32_sub(s-XMM_S(0), s-XMM_S(1), env-sse_status); + r.XMM_S(3) = float32_sub(s-XMM_S(2), s-XMM_S(3), env-sse_status); *d = r; } void helper_hsubpd(XMMReg *d, XMMReg *s) { XMMReg r; - r.XMM_D(0) = d-XMM_D(0) - d-XMM_D(1); - r.XMM_D(1) = s-XMM_D(0) - s-XMM_D(1); + r.XMM_D(0) = float64_sub(d-XMM_D(0), d-XMM_D(1), env-sse_status); + r.XMM_D(1) = float64_sub(s-XMM_D(0), s-XMM_D(1), env-sse_status); *d = r; } void helper_addsubps(XMMReg *d, XMMReg *s) { - d-XMM_S(0) = d-XMM_S(0) - s-XMM_S(0); - d-XMM_S(1) = d-XMM_S(1) + s-XMM_S(1); - d-XMM_S(2) = d-XMM_S(2) - s-XMM_S(2); - d-XMM_S(3) = d-XMM_S(3) + s-XMM_S(3); + d-XMM_S(0) = float32_sub(d-XMM_S(0), s-XMM_S(0), env-sse_status); + d-XMM_S(1) = float32_add(d-XMM_S(1), s-XMM_S(1), env-sse_status); + d-XMM_S(2) = float32_sub(d-XMM_S(2), s-XMM_S(2), env-sse_status); + d-XMM_S(3) = float32_add(d-XMM_S(3), s-XMM_S(3), env-sse_status); } void helper_addsubpd(XMMReg *d, XMMReg *s) { - d-XMM_D(0) = d-XMM_D(0) - s-XMM_D(0); - d-XMM_D(1) = d-XMM_D(1) + s-XMM_D(1); + d-XMM_D(0) = float64_sub(d-XMM_D(0), s-XMM_D(0), env-sse_status); + d-XMM_D(1) = float64_add(d-XMM_D(1), s-XMM_D(1), env-sse_status); } /* XXX: unordered */ -- 1.7.6.1
Re: [Qemu-devel] [PATCH 0/5] Only one call output register needed for 64 bit hosts
Thanks, applied all. On Sat, Sep 17, 2011 at 2:01 PM, Stefan Weil w...@mail.berlios.de wrote: Am 05.09.2011 11:06, schrieb Stefan Weil: The number of registers needed for the return value of TCG opcode INDEX_op_call is calculated in function tcg_gen_callN (nb_rets). It can be 0 or 1, for 32 bit hosts also 2 (return 64 bit value in two 32 bit registers). Some TCG implementations reserve 2 registers although only 1 is used. The following patches fix this. [PATCH 1/5] tcg/i386: Only one call output register needed for 64 bit hosts [PATCH 2/5] tcg/ia64: Only one call output register needed for 64 bit hosts [PATCH 3/5] tcg/s390: Only one call output register needed for 64 bit hosts [PATCH 4/5] tcg/sparc: Only one call output register needed for 64 bit hosts [PATCH 5/5] tcg/ppc64: Only one call output register needed for 64 bit hosts Patch 3 was acked by Richard Henderson, so was patch 5 by Malc. What about the other three patches (i386 / ia64, sparc)? They are all similar, so I think they could also be committed without more reviews. Thanks, Stefan Weil
[Qemu-devel] [PATCH 2/2] hw/omap_gpmc: Modify correct field when writing IRQSTATUS register
Writing to IRQSTATUS should affect irqst, not irqen -- error spotted by Andrzej Zaborowski. Signed-off-by: Peter Maydell peter.mayd...@linaro.org --- hw/omap_gpmc.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/hw/omap_gpmc.c b/hw/omap_gpmc.c index e27b93c..7fc82a2 100644 --- a/hw/omap_gpmc.c +++ b/hw/omap_gpmc.c @@ -639,7 +639,7 @@ static void omap_gpmc_write(void *opaque, target_phys_addr_t addr, break; case 0x018:/* GPMC_IRQSTATUS */ -s-irqen = ~value; +s-irqst = ~value; omap_gpmc_int_update(s); break; -- 1.7.1
[Qemu-devel] [PATCH 1/2] hw/omap_gpmc: Add comment about FIFOTHRESHOLDSTATUS bit
Promote the remark about why we handle FIFOTHRESHOLDSTATUS the way we do from the commit message of de8af7fe0 to a comment in the code. Signed-off-by: Peter Maydell peter.mayd...@linaro.org --- hw/omap_gpmc.c |7 +++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/hw/omap_gpmc.c b/hw/omap_gpmc.c index 02f0c52..e27b93c 100644 --- a/hw/omap_gpmc.c +++ b/hw/omap_gpmc.c @@ -569,6 +569,13 @@ static uint64_t omap_gpmc_read(void *opaque, target_phys_addr_t addr, case 0x1ec:/* GPMC_PREFETCH_CONTROL */ return s-prefetch.startengine; case 0x1f0:/* GPMC_PREFETCH_STATUS */ +/* NB: The OMAP3 TRM is inconsistent about whether the GPMC + * FIFOTHRESHOLDSTATUS bit should be set when + * FIFOPOINTER FIFOTHRESHOLD or when it is = FIFOTHRESHOLD. + * Apparently the underlying functional spec from which the TRM was + * created states that the behaviour is =, and this also + * makes more conceptual sense. + */ return (s-prefetch.fifopointer 24) | ((s-prefetch.fifopointer = ((s-prefetch.config1 8) 0x7f) ? 1 : 0) 16) | -- 1.7.1
Re: [Qemu-devel] [PATCH] Add iSCSI support for QEMU
Le jeudi 15 septembre 2011 à 08:06 +0200, Paolo Bonzini a écrit : On 09/14/2011 06:36 PM, Orit Wasserman wrote: I think NBD would be fine, especially with a flush command. I think NBD would be fine, especially with a flush command. If I remember correctly , there is a problem with NBD with an image with a backing file chain . NBD client only displays a single file image. With ISCSI we can use different luns per image file. The NBD protocol supports multiple named exports, just not QEMU's implementation. Named exports are supported since commit 1d45f8b542f6b80b24c44533ef0dd9e1a3b17ea5 Regards, Laurent
Re: [Qemu-devel] [net-next RFC V2 PATCH 0/5] Multiqueue support in tun/tap
On Sat, Sep 17, 2011 at 02:02:04PM +0800, Jason Wang wrote: A wiki-page was created to narrate the detail design of all parts involved in the multi queue implementation: http://www.linux-kvm.org/page/Multiqueue and some basic tests result could be seen in this page http://www.linux-kvm.org/page/Multiqueue-performance-Sep-13. I would post the detail numbers in attachment as the reply of this thread. Does it make sense to test both with and without RPS in guest? -- MST
Re: [Qemu-devel] blobstore disk format (was Re: Design of the blobstore)
On Fri, Sep 16, 2011 at 12:46:40PM -0400, Stefan Berger wrote: On 09/16/2011 10:44 AM, Michael S. Tsirkin wrote: On Thu, Sep 15, 2011 at 10:33:13AM -0400, Stefan Berger wrote: On 09/15/2011 08:28 AM, Michael S. Tsirkin wrote: So the below is a proposal for a directory scheme for storing (optionally multiple) nvram images, along with any metadata. Data is encoded using BER: http://en.wikipedia.org/wiki/Basic_Encoding_Rules Specifically, we mostly use the subsets. Would it change anything if we were to think of the NVRAM image as another piece of metadata? Yes, we can do that, sure. I had the feeling that it will help to lay out the image at the end, to make directory listing more efficient - the rest of metadata is usually small, image might be somewhat large. Why not let a convenience library handle the metadata on the device level, having it create the blob that the NVRAM layer ends up writing and parsing before the device uses it? Otherwise I should maybe rename the nvram to meatdata_store :-/ Maybe we are talking about different things. All I agrue for is using a common standard format for storing metadata, instead of having each device roll its own. I am also wondering whether each device shouldn't just handle the metadata itself, It could be that just means we will have custom code with different bugs in each device. Note that from experience with formats, the problem with time becomes less trivial than it seems as we need to provide forward and backward compatibility guarantees. Is that guaranteed just by using ASN.1 ? At least for BER, yes. We can always skip an optional field that we don't recognize without knowing anything about its internal format. Do we need to add a revision to the metadata? IMO, no. Instead we add optional attributes as long as we can preserve backwards compatibility, and madatory attributes if we can't. How do we handle metadata that was to change over time, i.e., new attribute/values being added into a finite store? Add them as optional attributes. so generate a blob from data structures containing all the metadata it needs, arranging attribute and value pairs itself (maybe using some convenience function for serialization/deserialization) and let the NVRAM layer not handle the metadata at all but only blobs, their maximum sizes, actual sizes Actual size seems to be a TPM specific thing. Yes, it could also be metadata. One should probably always be allowed to write a shorter blob than registered, but not a longer one. If the device did that, maybe it should assume it needs to prepend a header to the actual blob indicating the actual size of the following blob so trailing garbage can be ignored. And then when we need more info we get to deal with versioning of that header. encryption, integrity value (crc32 or sha1) and so on. What metadata should there be that really need to be handled on the NVRAM API and below level rather than on the device-specific code level? So checksum (checksum value and type) 'and so on' are what I call metadata :) Doing it at device level seems wrong. You mean doing it at the NVRAM level seems wrong. Of course, again something a device could write into a header prepended to the actual blob. Maybe every device that needs it should do that so that if we were to support encryption of blobs and the key for decryption was wrong one could detect it early without feeding badly decrypted / corrupted state into the device and see what happens. Do what? Checksum the data? Well, error detection is nice, but it could be that people actually care about not losing all of the data on nvram if qemu is killed. I also wonder whether invalidating all data because of a single bit corruption is a bug or a feature. We use a directory as a SET in a CER format. This allows generating directory online without scanning the entries beforehand. I guess it is the 'unknown' for me... but what is the advantage of using ASN1 for this rather than just writing out packed and endianess-normalized data structures (with revision value), If you want an example of where this 'custom formats are easy so let us write one' leads to in the end, look no further than live migration code. It's a mess of hacks that does not even work across upstream qemu versions, leave alone across downstreams (different linux distros). So is ASN1 the answer or does one still need to add a revision tag to each blob putting in custom code for parsing the different revisions of data structures (I guess) that may be extended/changed over time? Stefan We don't need revisions. We can always parse a new structure skipping optional attributes we don't recognize. In case we want to break old qemu versions intentially, we can add a mandatory attribute. having them crc32-protected to have some sanity checking in place? Stefan I'm not sure why we want crc specifically in TPM. If it is 'just because we
[Qemu-devel] [PATCH v2] memory: simple memory tree printer
Add a monitor command 'info mtree' to show the memory hierarchy much like /proc/iomem in Linux. Signed-off-by: Blue Swirl blauwir...@gmail.com --- v1-v2: use /proc/iomem format. --- memory.c | 27 +++ memory.h |2 ++ monitor.c |7 +++ 3 files changed, 36 insertions(+), 0 deletions(-) diff --git a/memory.c b/memory.c index 101b67c..275f5cf 100644 --- a/memory.c +++ b/memory.c @@ -17,6 +17,7 @@ #include bitops.h #include kvm.h #include assert.h +#include monitor.h unsigned memory_region_transaction_depth = 0; @@ -1253,3 +1254,29 @@ void set_system_io_map(MemoryRegion *mr) address_space_io.root = mr; memory_region_update_topology(); } + +static void mtree_print_mr(Monitor *mon, MemoryRegion *mr, unsigned int level) +{ +MemoryRegion *submr; +unsigned int i; + +for (i = 0; i level; i++) { +monitor_printf(mon, ); +} +monitor_printf(mon, TARGET_FMT_plx - TARGET_FMT_plx : %s\n, + mr-addr, mr-addr + (target_phys_addr_t)mr-size - 1, + mr-name); + +QTAILQ_FOREACH(submr, mr-subregions, subregions_link) { +mtree_print_mr(mon, submr, level + 1); +} +} + +void mtree_info(Monitor *mon) +{ +monitor_printf(mon, memory\n); +mtree_print_mr(mon, address_space_memory.root, 0); + +monitor_printf(mon, I/O\n); +mtree_print_mr(mon, address_space_io.root, 0); +} diff --git a/memory.h b/memory.h index 06b83ae..09d8e29 100644 --- a/memory.h +++ b/memory.h @@ -500,6 +500,8 @@ void memory_region_transaction_begin(void); */ void memory_region_transaction_commit(void); +void mtree_info(Monitor *mon); + #endif #endif diff --git a/monitor.c b/monitor.c index 03ae997..0302446 100644 --- a/monitor.c +++ b/monitor.c @@ -2968,6 +2968,13 @@ static const mon_cmd_t info_cmds[] = { }, #endif { +.name = mtree, +.args_type = , +.params = , +.help = show memory tree, +.mhandler.info = mtree_info, +}, +{ .name = jit, .args_type = , .params = , -- 1.6.2.4
[Qemu-devel] [PATCH] sun4u: don't set up isa_mem_base
Since we use memory API in sun4u.c, after 71579cae30b53c910cd6c47ab4e683f647d36519, setting up isa_mem_base puts vga.chain4 outside of the physical address space. Fix by removing obsolete isa_mem_base set up. Signed-off-by: Blue Swirl blauwir...@gmail.com --- hw/sun4u.c |1 - 1 files changed, 0 insertions(+), 1 deletions(-) diff --git a/hw/sun4u.c b/hw/sun4u.c index 32e6ab9..6afb0e7 100644 --- a/hw/sun4u.c +++ b/hw/sun4u.c @@ -763,7 +763,6 @@ static void sun4uv_init(ram_addr_t RAM_size, irq = qemu_allocate_irqs(cpu_set_irq, env, MAX_PILS); pci_bus = pci_apb_init(APB_SPECIAL_BASE, APB_MEM_BASE, irq, pci_bus2, pci_bus3); -isa_mem_base = APB_PCI_IO_BASE; pci_vga_init(pci_bus); // XXX Should be pci_bus3 -- 1.6.2.4
[Qemu-devel] [PATCH 0/8] tcg/interpreter: Add TCG + interpreter for bytecode (virtual machine)
Hello, these patches add a new code generator (TCG target) to qemu. Unlike other tcg target code generators, this one does not generate machine code for some cpu. It generates machine independent bytecode which is interpreted later. That's why I called it TCI (tiny code interpreter). I wrote most of the code two years ago and included feedback and contributions from several QEMU developers, notably TeleMan, Stuart Brady, Blue Swirl and Malc. See the history here: http://lists.nongnu.org/archive/html/qemu-devel/2009-09/msg01710.html Since that time, I used TCI regularly, added small fixes and improvements and rebased it to latest QEMU. Some versions were tested using ARM (emulated and real), PowerPC (emulated) and MIPS (emulated) hosts, but normally I run it on i386 and x86_64 hosts. I'd appreciate to see TCI in QEMU 1.0. Regards, Stefan Weil The patches 2 and 4 are optional, patch 8 is only needed for running TCI on a PowerPC host. [PATCH 1/8] tcg: Declare TCG_TARGET_REG_BITS in tcg.h [PATCH 2/8] tcg: Don't declare TCG_TARGET_REG_BITS in tcg-target.h [PATCH 3/8] tcg: Add forward declarations for local functions [PATCH 4/8] tcg: Add some assertions [PATCH 5/8] tcg: Add interpreter for bytecode [PATCH 6/8] tcg: Add bytecode generator for tcg interpreter [PATCH 7/8] tcg: Add tcg interpreter to configure / make [PATCH 8/8] ppc: Support tcg interpreter on ppc hosts
[Qemu-devel] [PATCH 8/8] ppc: Support tcg interpreter on ppc hosts
Tests of the tcg interpreter on an (emulated) ppc host needed this small change. Signed-off-by: Stefan Weil w...@mail.berlios.de --- cache-utils.h |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/cache-utils.h b/cache-utils.h index 0b65907..7c3b282 100644 --- a/cache-utils.h +++ b/cache-utils.h @@ -1,7 +1,7 @@ #ifndef QEMU_CACHE_UTILS_H #define QEMU_CACHE_UTILS_H -#if defined(_ARCH_PPC) +#if defined(_ARCH_PPC) !defined(CONFIG_TCG_INTERPRETER) struct qemu_cache_conf { unsigned long dcache_bsize; unsigned long icache_bsize; -- 1.7.2.5
[Qemu-devel] [PATCH 4/8] tcg: Add some assertions
Signed-off-by: Stefan Weil w...@mail.berlios.de --- tcg/tcg.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index bdd7a67..30f3aef 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -794,7 +794,9 @@ static char *tcg_get_arg_str_idx(TCGContext *s, char *buf, int buf_size, { TCGTemp *ts; +assert(idx = 0 idx s-nb_temps); ts = s-temps[idx]; +assert(ts); if (idx s-nb_globals) { pstrcpy(buf, buf_size, ts-name); } else { -- 1.7.2.5
[Qemu-devel] [PATCH 7/8] tcg: Add tcg interpreter to configure / make
Signed-off-by: Stefan Weil w...@mail.berlios.de --- Makefile.target |1 + configure | 30 -- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/Makefile.target b/Makefile.target index 88d2f1f..a2c3a4a 100644 --- a/Makefile.target +++ b/Makefile.target @@ -69,6 +69,7 @@ all: $(PROGS) stap # cpu emulator library libobj-y = exec.o translate-all.o cpu-exec.o translate.o libobj-y += tcg/tcg.o tcg/optimize.o +libobj-$(CONFIG_TCG_INTERPRETER) += tcg/tci.o libobj-y += fpu/softfloat.o libobj-y += op_helper.o helper.o ifeq ($(TARGET_BASE_ARCH), i386) diff --git a/configure b/configure index ad924c4..1d800e1 100755 --- a/configure +++ b/configure @@ -138,6 +138,7 @@ debug_tcg=no debug_mon=no debug=no strip_opt=yes +tcg_interpreter=no bigendian=no mingw32=no EXESUF= @@ -647,6 +648,10 @@ for opt do ;; --enable-kvm) kvm=yes ;; + --disable-tcg-interpreter) tcg_interpreter=no + ;; + --enable-tcg-interpreter) tcg_interpreter=yes + ;; --disable-spice) spice=no ;; --enable-spice) spice=yes @@ -997,6 +1002,7 @@ echo --enable-bluez enable bluez stack connectivity echo --disable-slirp disable SLIRP userspace network connectivity echo --disable-kvmdisable KVM acceleration support echo --enable-kvm enable KVM acceleration support +echo --enable-tcg-interpreter enable TCG with bytecode interpreter (TCI) echo --disable-nptl disable usermode NPTL support echo --enable-nptlenable usermode NPTL support echo --enable-system enable all system emulation targets @@ -2714,6 +2720,7 @@ echo Linux AIO support $linux_aio echo ATTR/XATTR support $attr echo Install blobs $blobs echo KVM support $kvm +echo TCG interpreter $tcg_interpreter echo fdt support $fdt echo preadv support$preadv echo fdatasync $fdatasync @@ -2761,6 +2768,15 @@ case $cpu in armv4b|armv4l) ARCH=arm ;; + *) +if test $tcg_interpreter = yes ; then +echo Unsupported CPU = $cpu, will use TCG with TCI (experimental) +ARCH=unknown +else +echo Unsupported CPU = $cpu, try --enable-tcg-interpreter +exit 1 +fi + ;; esac echo ARCH=$ARCH $config_host_mak if test $debug_tcg = yes ; then @@ -2994,6 +3010,9 @@ fi if test $signalfd = yes ; then echo CONFIG_SIGNALFD=y $config_host_mak fi +if test $tcg_interpreter = yes ; then + echo CONFIG_TCG_INTERPRETER=y $config_host_mak +fi if test $need_offsetof = yes ; then echo CONFIG_NEED_OFFSETOF=y $config_host_mak fi @@ -3454,7 +3473,9 @@ cflags= includes= ldflags= -if test $ARCH = sparc64 ; then +if test $tcg_interpreter = yes; then + includes=-I\$(SRC_PATH)/tcg/bytecode $includes +elif test $ARCH = sparc64 ; then includes=-I\$(SRC_PATH)/tcg/sparc $includes elif test $ARCH = s390x ; then includes=-I\$(SRC_PATH)/tcg/s390 $includes @@ -3577,7 +3598,12 @@ if test $gprof = yes ; then fi fi -linker_script=-Wl,-T../config-host.ld -Wl,-T,\$(SRC_PATH)/\$(ARCH).ld +if test $ARCH = unknown; then + linker_script= +else + linker_script=-Wl,-T../config-host.ld -Wl,-T,\$(SRC_PATH)/\$(ARCH).ld +fi + if test $target_linux_user = yes -o $target_bsd_user = yes ; then case $ARCH in sparc) -- 1.7.2.5
[Qemu-devel] [PATCH 1/8] tcg: Declare TCG_TARGET_REG_BITS in tcg.h
TCG_TARGET_REG_BITS can be determined by the compiler, so there is no need to declare it for each individual tcg target. This is especially important for new tcg targets which will be supported by the tcg interpreter. Signed-off-by: Stefan Weil w...@mail.berlios.de --- tcg/tcg.h | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/tcg/tcg.h b/tcg/tcg.h index dc5e9c9..1859fae 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -22,6 +22,16 @@ * THE SOFTWARE. */ #include qemu-common.h + +/* Target word size (must be identical to pointer size). */ +#if UINTPTR_MAX == UINT32_MAX +# define TCG_TARGET_REG_BITS 32 +#elif UINTPTR_MAX == UINT64_MAX +# define TCG_TARGET_REG_BITS 64 +#else +# error Unknown pointer size for tcg target +#endif + #include tcg-target.h #include tcg-runtime.h -- 1.7.2.5
[Qemu-devel] [PATCH 6/8] tcg: Add bytecode generator for tcg interpreter
Unlike other tcg target code generators, this one does not generate machine code for some cpu. It generates machine independent bytecode which is interpreted later. This allows running QEMU on any host. Interpreted bytecode is slower than direct execution of generated machine code. Signed-off-by: Stefan Weil w...@mail.berlios.de --- dis-asm.h |1 + disas.c |4 +- dyngen-exec.h | 13 +- exec-all.h| 13 +- tcg/bytecode/README | 129 ++ tcg/bytecode/tcg-target.c | 955 + tcg/bytecode/tcg-target.h | 152 +++ 7 files changed, 1263 insertions(+), 4 deletions(-) create mode 100644 tcg/bytecode/README create mode 100644 tcg/bytecode/tcg-target.c create mode 100644 tcg/bytecode/tcg-target.h diff --git a/dis-asm.h b/dis-asm.h index 5b07d7f..876975f 100644 --- a/dis-asm.h +++ b/dis-asm.h @@ -365,6 +365,7 @@ typedef struct disassemble_info { target address. Return number of bytes processed. */ typedef int (*disassembler_ftype) (bfd_vma, disassemble_info *); +int print_insn_bytecode(bfd_vma, disassemble_info*); int print_insn_big_mips (bfd_vma, disassemble_info*); int print_insn_little_mips (bfd_vma, disassemble_info*); int print_insn_i386 (bfd_vma, disassemble_info*); diff --git a/disas.c b/disas.c index 611b30b..e2061d8 100644 --- a/disas.c +++ b/disas.c @@ -273,7 +273,9 @@ void disas(FILE *out, void *code, unsigned long size) #else disasm_info.endian = BFD_ENDIAN_LITTLE; #endif -#if defined(__i386__) +#if defined(CONFIG_TCG_INTERPRETER) +print_insn = print_insn_bytecode; +#elif defined(__i386__) disasm_info.mach = bfd_mach_i386_i386; print_insn = print_insn_i386; #elif defined(__x86_64__) diff --git a/dyngen-exec.h b/dyngen-exec.h index 8beb7f3..64f76c4 100644 --- a/dyngen-exec.h +++ b/dyngen-exec.h @@ -19,7 +19,9 @@ #if !defined(__DYNGEN_EXEC_H__) #define __DYNGEN_EXEC_H__ -#if defined(__i386__) +#if defined(CONFIG_TCG_INTERPRETER) +/* The TCG interpreter does not use special registers. */ +#elif defined(__i386__) #define AREG0 ebp #elif defined(__x86_64__) #define AREG0 r14 @@ -55,11 +57,18 @@ #error unsupported CPU #endif +#if defined(AREG0) register CPUState *env asm(AREG0); +#else +extern CPUState *env; +#endif /* The return address may point to the start of the next instruction. Subtracting one gets us the call instruction itself. */ -#if defined(__s390__) !defined(__s390x__) +#if defined(CONFIG_TCG_INTERPRETER) +extern uint8_t *tci_tb_ptr; +# define GETPC() ((void *)tci_tb_ptr) +#elif defined(__s390__) !defined(__s390x__) # define GETPC() ((void*)(((unsigned long)__builtin_return_address(0) 0x7fffUL) - 1)) #elif defined(__arm__) /* Thumb return addresses have the low bit set, so we need to subtract two. diff --git a/exec-all.h b/exec-all.h index 9b8d62c..0116acd 100644 --- a/exec-all.h +++ b/exec-all.h @@ -122,6 +122,8 @@ void tlb_set_page(CPUState *env, target_ulong vaddr, #if defined(_ARCH_PPC) || defined(__x86_64__) || defined(__arm__) || defined(__i386__) #define USE_DIRECT_JUMP +#elif defined(CONFIG_TCG_INTERPRETER) +#define USE_DIRECT_JUMP #endif struct TranslationBlock { @@ -189,7 +191,14 @@ extern TranslationBlock *tb_phys_hash[CODE_GEN_PHYS_HASH_SIZE]; #if defined(USE_DIRECT_JUMP) -#if defined(_ARCH_PPC) +#if defined(CONFIG_TCG_INTERPRETER) +static inline void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr) +{ +/* patch the branch destination */ +*(uint32_t *)jmp_addr = addr - (jmp_addr + 4); +/* no need to flush icache explicitly */ +} +#elif defined(_ARCH_PPC) void ppc_tb_set_jmp_target(unsigned long jmp_addr, unsigned long addr); #define tb_set_jmp_target1 ppc_tb_set_jmp_target #elif defined(__i386__) || defined(__x86_64__) @@ -223,6 +232,8 @@ static inline void tb_set_jmp_target1(unsigned long jmp_addr, unsigned long addr __asm __volatile__ (swi 0x9f0002 : : r (_beg), r (_end), r (_flg)); #endif } +#else +#error tb_set_jmp_target1 is missing #endif static inline void tb_set_jmp_target(TranslationBlock *tb, diff --git a/tcg/bytecode/README b/tcg/bytecode/README new file mode 100644 index 000..6fe9755 --- /dev/null +++ b/tcg/bytecode/README @@ -0,0 +1,129 @@ +TCG Interpreter (TCI) - Copyright (c) 2011 Stefan Weil. + +This file is released under GPL 2 or later. + +1) Introduction + +TCG (Tiny Code Generator) is a code generator which translates +code fragments (basic blocks) from target code (any of the +targets supported by QEMU) to a code representation which +can be run on a host. + +QEMU can create native code for some hosts (arm, hppa, i386, ia64, ppc, ppc64, +s390, sparc, x86_64). For others, unofficial host support was written. + +By adding a code generator for a virtual machine and using an +interpreter for the generated bytecode, it is possible to +support (almost) any host. + +This is what TCI (Tiny Code
[Qemu-devel] [PATCH 5/8] tcg: Add interpreter for bytecode
Signed-off-by: Stefan Weil w...@mail.berlios.de --- tcg/tcg.h |4 +- tcg/tci.c | 1200 + 2 files changed, 1203 insertions(+), 1 deletions(-) create mode 100644 tcg/tci.c diff --git a/tcg/tcg.h b/tcg/tcg.h index 1859fae..c99c7ea 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -577,7 +577,9 @@ TCGv_i32 tcg_const_local_i32(int32_t val); TCGv_i64 tcg_const_local_i64(int64_t val); extern uint8_t code_gen_prologue[]; -#if defined(_ARCH_PPC) !defined(_ARCH_PPC64) +#if defined(CONFIG_TCG_INTERPRETER) +unsigned long tcg_qemu_tb_exec(CPUState *env, uint8_t *tb_ptr); +#elif defined(_ARCH_PPC) !defined(_ARCH_PPC64) #define tcg_qemu_tb_exec(env, tb_ptr)\ ((long REGPARM __attribute__ ((longcall)) (*)(void *, void *))code_gen_prologue)(env, tb_ptr) #else diff --git a/tcg/tci.c b/tcg/tci.c new file mode 100644 index 000..eea9992 --- /dev/null +++ b/tcg/tci.c @@ -0,0 +1,1200 @@ +/* + * Tiny Code Interpreter for QEMU + * + * Copyright (c) 2009, 2011 Stefan Weil + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#include config.h +#include qemu-common.h +#include exec-all.h /* MAX_OPC_PARAM_IARGS */ +#include tcg-op.h + +/* Marker for missing code. */ +#define TODO() \ +do { \ +fprintf(stderr, TODO %s:%u: %s()\n, \ +__FILE__, __LINE__, __func__); \ +tcg_abort(); \ +} while (0) + +/* Trace message to see program flow. */ +#if defined(CONFIG_DEBUG_TCG_INTERPRETER) +#define TRACE() \ +loglevel \ +? fprintf(stderr, TCG %s:%u: %s()\n, __FILE__, __LINE__, __func__) \ +: (void)0 +#else +#define TRACE() ((void)0) +#endif + +#if MAX_OPC_PARAM_IARGS != 4 +# error Fix needed, number of supported input arguments changed! +#endif +#if TCG_TARGET_REG_BITS == 32 +typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong, +tcg_target_ulong, tcg_target_ulong, +tcg_target_ulong, tcg_target_ulong, +tcg_target_ulong, tcg_target_ulong); +#else +typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong, +tcg_target_ulong, tcg_target_ulong); +#endif + +CPUState *env; + +/* Alpha and SH4 user mode emulations call GETPC(), so they need tci_tb_ptr. */ +#if defined(CONFIG_SOFTMMU) || defined(TARGET_ALPHA) || defined(TARGET_SH4) +# define NEEDS_TB_PTR +#endif + +#ifdef NEEDS_TB_PTR +uint8_t *tci_tb_ptr; +#endif + +static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS]; + +static tcg_target_ulong tci_read_reg(TCGRegister index) +{ +assert(index ARRAY_SIZE(tci_reg)); +return tci_reg[index]; +} + +#if TCG_TARGET_HAS_ext8s_i32 || TCG_TARGET_HAS_ext8s_i64 +static int8_t tci_read_reg8s(TCGRegister index) +{ +return (int8_t)tci_read_reg(index); +} +#endif + +#if TCG_TARGET_HAS_ext16s_i32 || TCG_TARGET_HAS_ext16s_i64 +static int16_t tci_read_reg16s(TCGRegister index) +{ +return (int16_t)tci_read_reg(index); +} +#endif + +#if TCG_TARGET_REG_BITS == 64 +static int32_t tci_read_reg32s(TCGRegister index) +{ +return (int32_t)tci_read_reg(index); +} +#endif + +static uint8_t tci_read_reg8(TCGRegister index) +{ +return (uint8_t)tci_read_reg(index); +} + +static uint16_t tci_read_reg16(TCGRegister index) +{ +return (uint16_t)tci_read_reg(index); +} + +static uint32_t tci_read_reg32(TCGRegister index) +{ +return (uint32_t)tci_read_reg(index); +} + +#if TCG_TARGET_REG_BITS == 64 +static uint64_t tci_read_reg64(TCGRegister index) +{ +return tci_read_reg(index); +} +#endif + +static void tci_write_reg(TCGRegister index, tcg_target_ulong value) +{ +assert(index ARRAY_SIZE(tci_reg)); +assert(index != TCG_AREG0); +tci_reg[index] = value; +} + +static void tci_write_reg8s(TCGRegister index, int8_t value) +{ +tci_write_reg(index, value); +} + +static void tci_write_reg16s(TCGRegister index, int16_t value) +{ +tci_write_reg(index, value); +} + +#if TCG_TARGET_REG_BITS == 64 +static void tci_write_reg32s(TCGRegister index, int32_t value) +{ +tci_write_reg(index, value); +} +#endif + +static void tci_write_reg8(TCGRegister index, uint8_t value) +{ +tci_write_reg(index, value); +} + +static void tci_write_reg16(TCGRegister index, uint16_t value) +{ +
[Qemu-devel] [PATCH 2/8] tcg: Don't declare TCG_TARGET_REG_BITS in tcg-target.h
It is now declared for all tcg targets in tcg.h, so the tcg target specific declarations are redundant. Signed-off-by: Stefan Weil w...@mail.berlios.de --- tcg/arm/tcg-target.h |1 - tcg/hppa/tcg-target.h |4 +--- tcg/ia64/tcg-target.h |2 -- tcg/mips/tcg-target.h |1 - tcg/ppc/tcg-target.h |1 - tcg/ppc64/tcg-target.h |1 - tcg/s390/tcg-target.h |6 -- tcg/sparc/tcg-target.h |6 -- 8 files changed, 1 insertions(+), 21 deletions(-) diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 0e0f69a..33afd97 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -24,7 +24,6 @@ */ #define TCG_TARGET_ARM 1 -#define TCG_TARGET_REG_BITS 32 #undef TCG_TARGET_WORDS_BIGENDIAN #undef TCG_TARGET_STACK_GROWSUP diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h index ed90efc..ec9a7bf 100644 --- a/tcg/hppa/tcg-target.h +++ b/tcg/hppa/tcg-target.h @@ -24,9 +24,7 @@ #define TCG_TARGET_HPPA 1 -#if defined(_PA_RISC1_1) -#define TCG_TARGET_REG_BITS 32 -#else +#if TCG_TARGET_REG_BITS != 32 #error unsupported #endif diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h index ddc93c1..578cf29 100644 --- a/tcg/ia64/tcg-target.h +++ b/tcg/ia64/tcg-target.h @@ -24,8 +24,6 @@ */ #define TCG_TARGET_IA64 1 -#define TCG_TARGET_REG_BITS 64 - /* We only map the first 64 registers */ #define TCG_TARGET_NB_REGS 64 enum { diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 43c5501..e2a2571 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -25,7 +25,6 @@ */ #define TCG_TARGET_MIPS 1 -#define TCG_TARGET_REG_BITS 32 #ifdef __MIPSEB__ # define TCG_TARGET_WORDS_BIGENDIAN #endif diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index f9a88c4..5c2d612 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -23,7 +23,6 @@ */ #define TCG_TARGET_PPC 1 -#define TCG_TARGET_REG_BITS 32 #define TCG_TARGET_WORDS_BIGENDIAN #define TCG_TARGET_NB_REGS 32 diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h index 5395131..8d1fb73 100644 --- a/tcg/ppc64/tcg-target.h +++ b/tcg/ppc64/tcg-target.h @@ -23,7 +23,6 @@ */ #define TCG_TARGET_PPC64 1 -#define TCG_TARGET_REG_BITS 64 #define TCG_TARGET_WORDS_BIGENDIAN #define TCG_TARGET_NB_REGS 32 diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h index 35ebac3..e4cd641 100644 --- a/tcg/s390/tcg-target.h +++ b/tcg/s390/tcg-target.h @@ -23,12 +23,6 @@ */ #define TCG_TARGET_S390 1 -#ifdef __s390x__ -#define TCG_TARGET_REG_BITS 64 -#else -#define TCG_TARGET_REG_BITS 32 -#endif - #define TCG_TARGET_WORDS_BIGENDIAN typedef enum TCGReg { diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index 7b4e7f9..1464ef4 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -23,12 +23,6 @@ */ #define TCG_TARGET_SPARC 1 -#if defined(__sparc_v9__) !defined(__sparc_v8plus__) -#define TCG_TARGET_REG_BITS 64 -#else -#define TCG_TARGET_REG_BITS 32 -#endif - #define TCG_TARGET_WORDS_BIGENDIAN #define TCG_TARGET_NB_REGS 32 -- 1.7.2.5
[Qemu-devel] [PATCH 3/8] tcg: Add forward declarations for local functions
These functions are defined in the tcg target specific file tcg-target.c. The forward declarations assert that every tcg target uses the same function prototype. Signed-off-by: Stefan Weil w...@mail.berlios.de --- tcg/tcg.c | 16 1 files changed, 16 insertions(+), 0 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index 411f971..bdd7a67 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -63,11 +63,27 @@ #error GUEST_BASE not supported on this host. #endif +/* Forward declarations for functions declared in tcg-target.c and used here. */ static void tcg_target_init(TCGContext *s); static void tcg_target_qemu_prologue(TCGContext *s); static void patch_reloc(uint8_t *code_ptr, int type, tcg_target_long value, tcg_target_long addend); +/* Forward declarations for functions declared and used in tcg-target.c. */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str); +static void tcg_out_ld(TCGContext *s, TCGType type, int ret, int arg1, + tcg_target_long arg2); +static void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg); +static void tcg_out_movi(TCGContext *s, TCGType type, + int ret, tcg_target_long arg); +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, + const int *const_args); +static void tcg_out_st(TCGContext *s, TCGType type, int arg, int arg1, + tcg_target_long arg2); +static int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct); +static int tcg_target_get_call_iarg_regs_count(int flags); + TCGOpDef tcg_op_defs[] = { #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags }, #include tcg-opc.h -- 1.7.2.5
Re: [Qemu-devel] [PATCH 8/8] ppc: Support tcg interpreter on ppc hosts
On 17 September 2011 21:00, Stefan Weil w...@mail.berlios.de wrote: Tests of the tcg interpreter on an (emulated) ppc host needed this small change. Signed-off-by: Stefan Weil w...@mail.berlios.de --- cache-utils.h | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/cache-utils.h b/cache-utils.h index 0b65907..7c3b282 100644 --- a/cache-utils.h +++ b/cache-utils.h @@ -1,7 +1,7 @@ #ifndef QEMU_CACHE_UTILS_H #define QEMU_CACHE_UTILS_H -#if defined(_ARCH_PPC) +#if defined(_ARCH_PPC) !defined(CONFIG_TCG_INTERPRETER) struct qemu_cache_conf { unsigned long dcache_bsize; unsigned long icache_bsize; This looks a bit odd, but I think that's partly an effect of only the PPC flush_icache_range being in this header file when for other architectures it is in tcg/*/tcg-target.h. If we could have the cache flushing be in tcg/* for every target then you wouldn't need to do an ifdef here. -- PMM
Re: [Qemu-devel] [PATCH 8/8] ppc: Support tcg interpreter on ppc hosts
Am 17.09.2011 23:31, schrieb Peter Maydell: On 17 September 2011 21:00, Stefan Weilw...@mail.berlios.de wrote: Tests of the tcg interpreter on an (emulated) ppc host needed this small change. Signed-off-by: Stefan Weilw...@mail.berlios.de --- cache-utils.h |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/cache-utils.h b/cache-utils.h index 0b65907..7c3b282 100644 --- a/cache-utils.h +++ b/cache-utils.h @@ -1,7 +1,7 @@ #ifndef QEMU_CACHE_UTILS_H #define QEMU_CACHE_UTILS_H -#if defined(_ARCH_PPC) +#if defined(_ARCH_PPC) !defined(CONFIG_TCG_INTERPRETER) struct qemu_cache_conf { unsigned long dcache_bsize; unsigned long icache_bsize; This looks a bit odd, but I think that's partly an effect of only the PPC flush_icache_range being in this header file when for other architectures it is in tcg/*/tcg-target.h. If we could have the cache flushing be in tcg/* for every target then you wouldn't need to do an ifdef here. -- PMM That's correct.
Re: [Qemu-devel] [PATCH v3 5/6] vga: Use linear mapping + dirty logging in chain 4 memory access mode
On Thu, Sep 15, 2011 at 11:31 AM, Avi Kivity a...@redhat.com wrote: On 09/15/2011 01:01 PM, Benjamin Herrenschmidt wrote: Sure :). So the problem is that when emulating the G3 Beige machine in QEMU (default ppc32 target) we also add a PCI VGA adapter. Apparently, on x86 that PCI VGA adapter can map the special VGA regions to somewhere, namely 0xa. With the memory api overhaul, this also slipped into the PPC world where mapping 0xa with VGA adapters is a pretty bad idea, as it's occupied by RAM. Now the discussion was on which level that mapping would happen and which devices go through which buses which then would filter certain ranges from being mapped. Basically, which way does a memory request from the CPU go on a G3 Beige machine until it arrives the VGA adapter? I hope that concludes the actual question. Avi, if I explained this wrong, please correct me. Ok so there's several things here. First, the mapping from CPU addresses to PCI addresses. This depends on the host bridge chip. The MPC106, used in the Beige G3, itself supports different type of mappings. From memory, the way it's configured in a G3 is to have a 1:1 mapping of 8000 CPU to 8000 PCI. That means that with this basic mapping, you cannot generate memory accesses to low PCI addresses such as 0xa. Alex, what this means (I think is) that: pci_grackle_init() needs to create a container memory region and pass it to pc_register_bus() as the pci address space, and create and alias starting at 0x8000 of the pci address space, and map that alias at address 0x8000 of the system address space. See pc_init1() creating pci_memory and passing it to i440fx_init(), which then maps some aliases into the system address space and also gives it to pci_bus_new(). It's essentially the same thing with different details. I think the attached patch (on top of ppc-next) should do it, but it doesn't. Only the top area of the screen is shown, the rest is black. I don't remember (but it's possible) if it has another region which maps some other (high address) part of the address space down to 0 PCI. Typically that would be a smaller region which specifically allow access to the ISA hole that way. That would be done by mapping yet another alias. -- error compiling committee.c: too many arguments to function From c07f1116220cba7d2ee769b03de59b5a874b76db Mon Sep 17 00:00:00 2001 Message-Id: c07f1116220cba7d2ee769b03de59b5a874b76db.1316295419.git.blauwir...@gmail.com From: Blue Swirl blauwir...@gmail.com Date: Sat, 17 Sep 2011 20:30:50 + Subject: [PATCH] PPC: use memory API to construct the PCI hole Avoid vga.chain4 mapping by constructing a PCI hole for upper 2G of the PCI space. Signed-off-by: Blue Swirl blauwir...@gmail.com --- hw/grackle_pci.c | 11 ++- hw/ppc_newworld.c |2 -- hw/ppc_oldworld.c |2 -- hw/unin_pci.c | 18 -- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/hw/grackle_pci.c b/hw/grackle_pci.c index 9d3ff7d..94a608e 100644 --- a/hw/grackle_pci.c +++ b/hw/grackle_pci.c @@ -41,6 +41,8 @@ typedef struct GrackleState { SysBusDevice busdev; PCIHostState host_state; +MemoryRegion pci_mmio; +MemoryRegion pci_hole; } GrackleState; /* Don't know if this matches real hardware, but it agrees with OHW. */ @@ -73,11 +75,18 @@ PCIBus *pci_grackle_init(uint32_t base, qemu_irq *pic, qdev_init_nofail(dev); s = sysbus_from_qdev(dev); d = FROM_SYSBUS(GrackleState, s); + +memory_region_init(d-pci_mmio, pci-mmio, 0x1ULL); +memory_region_init_alias(d-pci_hole, pci-hole, d-pci_mmio, + 0x8000ULL, 0x7e00ULL); +memory_region_add_subregion(address_space_mem, 0x8000ULL, +d-pci_hole); + d-host_state.bus = pci_register_bus(d-busdev.qdev, pci, pci_grackle_set_irq, pci_grackle_map_irq, pic, - address_space_mem, + d-pci_mmio, address_space_io, 0, 4); diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c index 5fb9359..bcdc0a3 100644 --- a/hw/ppc_newworld.c +++ b/hw/ppc_newworld.c @@ -263,8 +263,6 @@ static void ppc_core99_init (ram_addr_t ram_size, } } -isa_mem_base = 0x8000; - /* Register 8 MB of ISA IO space */ isa_mmio_init(0xf200, 0x0080); diff --git a/hw/ppc_oldworld.c b/hw/ppc_oldworld.c index 3857075..5c17944 100644 --- a/hw/ppc_oldworld.c +++ b/hw/ppc_oldworld.c @@ -208,8 +208,6 @@ static void ppc_heathrow_init (ram_addr_t ram_size, } } -isa_mem_base = 0x8000; - /* Register 2 MB of ISA IO space */
Re: [Qemu-devel] [PATCH 3/8] tcg: Add forward declarations for local functions
On 17 September 2011 21:00, Stefan Weil w...@mail.berlios.de wrote: +/* Forward declarations for functions declared and used in tcg-target.c. */ +static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str); +static void tcg_out_ld(TCGContext *s, TCGType type, int ret, int arg1, + tcg_target_long arg2); +static void tcg_out_mov(TCGContext *s, TCGType type, int ret, int arg); +static void tcg_out_movi(TCGContext *s, TCGType type, + int ret, tcg_target_long arg); +static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, + const int *const_args); +static void tcg_out_st(TCGContext *s, TCGType type, int arg, int arg1, + tcg_target_long arg2); +static int tcg_target_const_match(tcg_target_long val, + const TCGArgConstraint *arg_ct); +static int tcg_target_get_call_iarg_regs_count(int flags); I'm tempted to submit a bulk rename patch that renames the functions in this list which don't start 'tcg_target_' so that they do... -- PMM
Re: [Qemu-devel] [PATCH] Add privilege level check to several Cop0 instructions.
The patch applies to a8467c7a0e8b024a18608ff7db31ca2f2297e641. -Original Message- From: qemu-devel-bounces+ericj=mips@nongnu.org [mailto:qemu-devel-bounces+ericj=mips@nongnu.org] On Behalf Of Eric Johnson Sent: Saturday, September 17, 2011 5:06 PM To: qemu-devel@nongnu.org; aurel...@aurel32.net Subject: [Qemu-devel] [PATCH] Add privilege level check to several Cop0 instructions. The MIPS Architecture Verification Programs (AVPs) check privileged instructions for the required privilege level. These changes are needed to pass the AVP suite. Signed-off-by: Eric Johnson er...@mips.com --- target-mips/translate.c | 10 ++ 1 files changed, 10 insertions(+), 0 deletions(-) diff --git a/target-mips/translate.c b/target-mips/translate.c index d5b1c76..d99a716 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -5940,6 +5940,8 @@ static void gen_cp0 (CPUState *env, DisasContext *ctx, uint32_t opc, int rt, int { const char *opn = ldst; +check_cp0_enabled(ctx); + switch (opc) { case OPC_MFC0: if (rt == 0) { @@ -10125,6 +10127,7 @@ static void gen_pool32axf (CPUState *env, DisasContext *ctx, int rt, int rs, #ifndef CONFIG_USER_ONLY case MFC0: case MFC0 + 32: +check_cp0_enabled(ctx); if (rt == 0) { /* Treat as NOP. */ break; @@ -10136,6 +10139,7 @@ static void gen_pool32axf (CPUState *env, DisasContext *ctx, int rt, int rs, { TCGv t0 = tcg_temp_new(); +check_cp0_enabled(ctx); gen_load_gpr(t0, rt); gen_mtc0(env, ctx, t0, rs, (ctx-opcode 11) 0x7); tcg_temp_free(t0); @@ -10230,10 +10234,12 @@ static void gen_pool32axf (CPUState *env, DisasContext *ctx, int rt, int rs, switch (minor) { case RDPGPR: check_insn(env, ctx, ISA_MIPS32R2); +check_cp0_enabled(ctx); gen_load_srsgpr(rt, rs); break; case WRPGPR: check_insn(env, ctx, ISA_MIPS32R2); +check_cp0_enabled(ctx); gen_store_srsgpr(rt, rs); break; default: @@ -10276,6 +10282,7 @@ static void gen_pool32axf (CPUState *env, DisasContext *ctx, int rt, int rs, { TCGv t0 = tcg_temp_new(); +check_cp0_enabled(ctx); save_cpu_state(ctx, 1); gen_helper_di(t0); gen_store_gpr(t0, rs); @@ -10288,6 +10295,7 @@ static void gen_pool32axf (CPUState *env, DisasContext *ctx, int rt, int rs, { TCGv t0 = tcg_temp_new(); +check_cp0_enabled(ctx); save_cpu_state(ctx, 1); gen_helper_ei(t0); gen_store_gpr(t0, rs); @@ -10765,6 +10773,7 @@ static void decode_micromips32_opc (CPUState *env, DisasContext *ctx, minor = (ctx-opcode 12) 0xf; switch (minor) { case CACHE: +check_cp0_enabled(ctx); /* Treat as no-op. */ break; case LWC2: @@ -12216,6 +12225,7 @@ static void decode_opc (CPUState *env, DisasContext *ctx, int *is_branch) break; case OPC_CACHE: check_insn(env, ctx, ISA_MIPS3 | ISA_MIPS32); +check_cp0_enabled(ctx); /* Treat as NOP. */ break; case OPC_PREF:
[Qemu-devel] [PATCH] Allow microMIPS SWP and SDP to have RD equal to BASE.
The microMIPS SWP and SDP instructions do not modify GPRs. So their behavior is well defined when RD equals BASE. The MIPS Architecture Verification Programs (AVPs) check that they work as expected. This is required for AVPs to pass. Signed-off-by: Eric Johnson er...@mips.com --- target-mips/translate.c | 10 +- 1 files changed, 9 insertions(+), 1 deletions(-) The patch applies to a8467c7a0e8b024a18608ff7db31ca2f2297e641. diff --git a/target-mips/translate.c b/target-mips/translate.c index d5b1c76..82cf75b 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -10034,7 +10034,7 @@ static void gen_ldst_pair (DisasContext *ctx, uint32_t opc, int rd, const char *opn = ldst_pair; TCGv t0, t1; -if (ctx-hflags MIPS_HFLAG_BMASK || rd == 31 || rd == base) { +if (ctx-hflags MIPS_HFLAG_BMASK || rd == 31) { generate_exception(ctx, EXCP_RI); return; } @@ -10046,6 +10046,10 @@ static void gen_ldst_pair (DisasContext *ctx, uint32_t opc, int rd, switch (opc) { case LWP: +if (rd == base) { +generate_exception(ctx, EXCP_RI); +return; +} save_cpu_state(ctx, 0); op_ld_lw(t1, t0, ctx); gen_store_gpr(t1, rd); @@ -10067,6 +10071,10 @@ static void gen_ldst_pair (DisasContext *ctx, uint32_t opc, int rd, break; #ifdef TARGET_MIPS64 case LDP: +if (rd == base) { +generate_exception(ctx, EXCP_RI); +return; +} save_cpu_state(ctx, 0); op_ld_ld(t1, t0, ctx); gen_store_gpr(t1, rd);
[Qemu-devel] [PATCH] Fix compile when MIPS_DEBUG_DISAS is defined.
When MIPS_DEBUG_DISAS is defined the gen_logic_imm, gen_slt_imm, gen_cond_move, gen_logic and gen_slt functions cause errors because ctx is not defined. Fixed the functions by passing in the DisasContext. Signed-off-by: Eric Johnson er...@mips.com --- target-mips/translate.c | 72 +- 1 files changed, 39 insertions(+), 33 deletions(-) This patch applies to a8467c7a0e8b024a18608ff7db31ca2f2297e641. diff --git a/target-mips/translate.c b/target-mips/translate.c index d5b1c76..00afc48 100644 --- a/target-mips/translate.c +++ b/target-mips/translate.c @@ -1420,7 +1420,8 @@ static void gen_arith_imm (CPUState *env, DisasContext *ctx, uint32_t opc, } /* Logic with immediate operand */ -static void gen_logic_imm (CPUState *env, uint32_t opc, int rt, int rs, int16_t imm) +static void gen_logic_imm(CPUState *env, DisasContext *ctx, uint32_t opc, +int rt, int rs, int16_t imm) { target_ulong uimm; const char *opn = imm logic; @@ -1463,7 +1464,8 @@ static void gen_logic_imm (CPUState *env, uint32_t opc, int rt, int rs, int16_t } /* Set on less than with immediate operand */ -static void gen_slt_imm (CPUState *env, uint32_t opc, int rt, int rs, int16_t imm) +static void gen_slt_imm(CPUState *env, DisasContext *ctx, uint32_t opc, +int rt, int rs, int16_t imm) { target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */ const char *opn = imm arith; @@ -1764,7 +1766,8 @@ static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc, } /* Conditional move */ -static void gen_cond_move (CPUState *env, uint32_t opc, int rd, int rs, int rt) +static void gen_cond_move(CPUState *env, DisasContext *ctx, uint32_t opc, +int rd, int rs, int rt) { const char *opn = cond move; int l1; @@ -1802,7 +1805,8 @@ static void gen_cond_move (CPUState *env, uint32_t opc, int rd, int rs, int rt) } /* Logic */ -static void gen_logic (CPUState *env, uint32_t opc, int rd, int rs, int rt) +static void gen_logic(CPUState *env, DisasContext *ctx, uint32_t opc, int rd, +int rs, int rt) { const char *opn = logic; @@ -1863,7 +1867,8 @@ static void gen_logic (CPUState *env, uint32_t opc, int rd, int rs, int rt) } /* Set on lower than */ -static void gen_slt (CPUState *env, uint32_t opc, int rd, int rs, int rt) +static void gen_slt(CPUState *env, DisasContext *ctx, uint32_t opc, int rd, +int rs, int rt) { const char *opn = slt; TCGv t0, t1; @@ -8763,10 +8768,10 @@ static int decode_extended_mips16_opc (CPUState *env, DisasContext *ctx, gen_arith_imm(env, ctx, OPC_ADDIU, rx, rx, imm); break; case M16_OPC_SLTI: -gen_slt_imm(env, OPC_SLTI, 24, rx, imm); +gen_slt_imm(env, ctx, OPC_SLTI, 24, rx, imm); break; case M16_OPC_SLTIU: -gen_slt_imm(env, OPC_SLTIU, 24, rx, imm); +gen_slt_imm(env, ctx, OPC_SLTIU, 24, rx, imm); break; case M16_OPC_I8: switch (funct) { @@ -8978,14 +8983,14 @@ static int decode_mips16_opc (CPUState *env, DisasContext *ctx, { int16_t imm = (uint8_t) ctx-opcode; -gen_slt_imm(env, OPC_SLTI, 24, rx, imm); +gen_slt_imm(env, ctx, OPC_SLTI, 24, rx, imm); } break; case M16_OPC_SLTIU: { int16_t imm = (uint8_t) ctx-opcode; -gen_slt_imm(env, OPC_SLTIU, 24, rx, imm); +gen_slt_imm(env, ctx, OPC_SLTIU, 24, rx, imm); } break; case M16_OPC_I8: @@ -9061,7 +9066,7 @@ static int decode_mips16_opc (CPUState *env, DisasContext *ctx, { int16_t imm = (uint8_t) ctx-opcode; -gen_logic_imm(env, OPC_XORI, 24, rx, imm); +gen_logic_imm(env, ctx, OPC_XORI, 24, rx, imm); } break; #if defined(TARGET_MIPS64) @@ -9173,10 +9178,10 @@ static int decode_mips16_opc (CPUState *env, DisasContext *ctx, } break; case RR_SLT: -gen_slt(env, OPC_SLT, 24, rx, ry); +gen_slt(env, ctx, OPC_SLT, 24, rx, ry); break; case RR_SLTU: -gen_slt(env, OPC_SLTU, 24, rx, ry); +gen_slt(env, ctx, OPC_SLTU, 24, rx, ry); break; case RR_BREAK: generate_exception(ctx, EXCP_BREAK); @@ -9197,22 +9202,22 @@ static int decode_mips16_opc (CPUState *env, DisasContext *ctx, break; #endif case RR_CMP: -gen_logic(env, OPC_XOR, 24, rx, ry); +gen_logic(env, ctx, OPC_XOR, 24, rx, ry); break; case RR_NEG: gen_arith(env, ctx, OPC_SUBU, rx, 0, ry); break; case RR_AND: -gen_logic(env, OPC_AND, rx, rx, ry); +gen_logic(env, ctx, OPC_AND, rx, rx, ry); break; case RR_OR: -gen_logic(env, OPC_OR, rx, rx, ry); +gen_logic(env, ctx, OPC_OR, rx, rx, ry);
Re: [Qemu-devel] [PATCH 5/8] tcg: Add interpreter for bytecode
Stefan Weil w...@mail.berlios.de writes: + +switch (opc) { +case INDEX_op_end: +case INDEX_op_nop: +break; You could probably get some more speed out of this by using a threaded interpreter with gcc's computed goto extension. That's typically significantly faster than a plain switch in a loop. static void *ops[] = { op1, op2, ... }; #define NEXT() goto *ops[*tb_ptr++]; op1: ... NEXT(); -Andi -- a...@linux.intel.com -- Speaking for myself only
Re: [Qemu-devel] [PATCH 5/8] tcg: Add interpreter for bytecode
Am 18.09.2011 06:03, schrieb Andi Kleen: Stefan Weil w...@mail.berlios.de writes: + + switch (opc) { + case INDEX_op_end: + case INDEX_op_nop: + break; You could probably get some more speed out of this by using a threaded interpreter with gcc's computed goto extension. That's typically significantly faster than a plain switch in a loop. static void *ops[] = { op1, op2, ... }; #define NEXT() goto *ops[*tb_ptr++]; op1: ... NEXT(); -Andi Is there really any difference in the generated code? gcc already uses a jump table internally to handle the switch cases. - Stefan