[Qemu-devel] [PATCH] block: simplify bdrv_drop_intermediate

2013-10-12 Thread Fam Zheng
There is only one failure point: bdrv_change_backing_file in this
function, so we can drop the qlist and try to change the backing file
before deleting anything.

This way bdrv_drop_intermediate is simplified while keeping the
operation transactional. A bonus is dropping an active BDS is supported
too by swapping the base and top. Although no caller uses this yet, the
comment is updated to reflect the change.

Signed-off-by: Fam Zheng f...@redhat.com
---
 block.c| 100 ++---
 block/commit.c |   1 +
 2 files changed, 32 insertions(+), 69 deletions(-)

diff --git a/block.c b/block.c
index fd05a80..b9e073f 100644
--- a/block.c
+++ b/block.c
@@ -2130,18 +2130,11 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState 
*active,
 return overlay;
 }
 
-typedef struct BlkIntermediateStates {
-BlockDriverState *bs;
-QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
-} BlkIntermediateStates;
-
-
 /*
- * Drops images above 'base' up to and including 'top', and sets the image
- * above 'top' to have base as its backing file.
- *
- * Requires that the overlay to 'top' is opened r/w, so that the backing file
- * information in 'bs' can be properly updated.
+ * Drops images above 'base' up to and including 'top', and sets new 'base'
+ * as backing_hd of top_overlay (the image orignally has 'top' as backing
+ * file). top_overlay may be NULL if 'top' is active, no such update needed.
+ * Requires that the top_overlay to 'top' is opened r/w.
  *
  * E.g., this will convert the following chain:
  * bottom - base - intermediate - top - active
@@ -2158,86 +2151,55 @@ typedef struct BlkIntermediateStates {
  *
  * base - active
  *
- * Error conditions:
- *  if active == top, that is considered an error
+ * It also allows active==top, in which case it converts:
+ *
+ * base - intermediate - active (also top)
+ *
+ * to
+ *
+ * base == active == top, i.e. only base remains: *top == *base when return.
  *
  */
 int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
BlockDriverState *base)
 {
-BlockDriverState *intermediate;
+BlockDriverState *pbs;
+BlockDriverState *overlay = NULL;
 BlockDriverState *base_bs = NULL;
-BlockDriverState *new_top_bs = NULL;
-BlkIntermediateStates *intermediate_state, *next;
 int ret = -EIO;
 
-QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
-QSIMPLEQ_INIT(states_to_delete);
-
 if (!top-drv || !base-drv) {
 goto exit;
 }
 
-new_top_bs = bdrv_find_overlay(active, top);
-
-if (new_top_bs == NULL) {
-/* we could not find the image above 'top', this is an error */
-goto exit;
-}
-
-/* special case of new_top_bs-backing_hd already pointing to base - 
nothing
- * to do, no intermediate images */
-if (new_top_bs-backing_hd == base) {
-ret = 0;
-goto exit;
-}
-
-intermediate = top;
-
-/* now we will go down through the list, and add each BDS we find
- * into our deletion queue, until we hit the 'base'
- */
-while (intermediate) {
-intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
-intermediate_state-bs = intermediate;
-QSIMPLEQ_INSERT_TAIL(states_to_delete, intermediate_state, entry);
-
-if (intermediate-backing_hd == base) {
-base_bs = intermediate-backing_hd;
-break;
+if (active != top) {
+/* If there's an overlay, its backing_hd points to top's BDS now,
+ * the top image is dropped but this BDS structure is kept and swapped
+ * with base, this way we keep the pointers valid after dropping top */
+overlay = bdrv_find_overlay(active, top);
+ret = bdrv_change_backing_file(overlay, base-filename,
+   base-drv ?
+base-drv-format_name : );
+if (ret) {
+goto exit;
 }
-intermediate = intermediate-backing_hd;
-}
-if (base_bs == NULL) {
-/* something went wrong, we did not end at the base. safely
- * unravel everything, and exit with error */
-goto exit;
 }
 
-/* success - we can delete the intermediate states, and link top-base */
-ret = bdrv_change_backing_file(new_top_bs, base_bs-filename,
-   base_bs-drv ? base_bs-drv-format_name : 
);
-if (ret) {
-goto exit;
+for (pbs = top-backing_hd; pbs != base; pbs = base_bs) {
+assert(pbs);
+base_bs = pbs-backing_hd;
+pbs-backing_hd = NULL;
+bdrv_unref(pbs);
 }
-new_top_bs-backing_hd = base_bs;
-
 
-QSIMPLEQ_FOREACH_SAFE(intermediate_state, states_to_delete, entry, next) {
-/* so that bdrv_close() does not recursively close the chain */
-intermediate_state-bs-backing_hd = NULL;
-bdrv_unref(intermediate_state-bs);
-}
-  

[Qemu-devel] [PATCH] mirror: drop local_err in mirror_compelte

2013-10-12 Thread Fam Zheng
There is errp passed in, so no need for local_err and error_propagate.

Signed-off-by: Fam Zheng f...@redhat.com
---
 block/mirror.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/block/mirror.c b/block/mirror.c
index 7b95acf..f2e9558 100644
--- a/block/mirror.c
+++ b/block/mirror.c
@@ -505,15 +505,13 @@ static void mirror_iostatus_reset(BlockJob *job)
 static void mirror_complete(BlockJob *job, Error **errp)
 {
 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
-Error *local_err = NULL;
 int ret;
 
-ret = bdrv_open_backing_file(s-target, NULL, local_err);
+ret = bdrv_open_backing_file(s-target, NULL, errp);
 if (ret  0) {
 char backing_filename[PATH_MAX];
 bdrv_get_full_backing_filename(s-target, backing_filename,
sizeof(backing_filename));
-error_propagate(errp, local_err);
 return;
 }
 if (!s-synced) {
-- 
1.8.3.1




[Qemu-devel] QEMU GSoC featured on Google Open Source Blog

2013-10-12 Thread Stefan Hajnoczi
Stephanie Taylor at the Google Open Source Blog has helped us publish
a post that features two projects from this year's Google Summer of
Code:

http://google-opensource.blogspot.de/2013/10/google-summer-of-code-veteran-orgs-qemu.html

Mike and Osier contributed summaries of the QEMU Integrated
Copy/Paste and Libvirt API to query IP addresses for given domain
projects.

Stefan



Re: [Qemu-devel] [PATCH] configure: create fsdev/ directory

2013-10-12 Thread Alex Bennée

m...@tls.msk.ru writes:

 In some cases when building with parallelism (make -jN),
 build fails because the directory where output files are
 supposed to be does not exist.  In particular, when make
 decides to build virtfs-proxy-helper.1 before other files
 in fsdev/, build will fail with the following error:
snip

Isn't this the sort of think Make dependencies are for?

-- 
Alex Bennée



[Qemu-devel] [Bug 1174654] Re: qemu-system-x86_64 takes 100% CPU after host machine resumed from suspend to ram

2013-10-12 Thread tobias
Hi,

I am also encountering the bug of high cpu usage for a windows guest after 
suspend resume of my ubuntu host. Problem was in 13.04 but it's also still 
there in 13.10.
The windows guest has virtio / spice  enabled.
Linux guests do not get the high cpu usage.
Are there any more logs required or is investigation going on upstream ?
I am not sure where to look or curious whether there are other workarounds.

** Attachment added: win7.xml
   
https://bugs.launchpad.net/qemu/+bug/1174654/+attachment/3875634/+files/win7.xml

-- 
You received this bug notification because you are a member of qemu-
devel-ml, which is subscribed to QEMU.
https://bugs.launchpad.net/bugs/1174654

Title:
  qemu-system-x86_64 takes 100% CPU after host machine resumed from
  suspend to ram

Status in QEMU:
  Confirmed
Status in “qemu” package in Ubuntu:
  Invalid

Bug description:
  I have Windows XP SP3  inside qemu VM. All works fine in 12.10. But
  after upgraiding to 13.04 i have to restart the VM each time i
  resuming my host machine, because qemu process starts to take CPU
  cycles and OS inside VM is very slow and sluggish. However it's still
  controllable and could be shutdown by itself.

  According to the taskmgr any active process takes 99% CPU. It's not
  stuck on some single process.

To manage notifications about this bug go to:
https://bugs.launchpad.net/qemu/+bug/1174654/+subscriptions



Re: [Qemu-devel] kvm binary is deprecated

2013-10-12 Thread Alexander Binun
Hello Stefan,
   The qemu used by me is the one installed using apt-get install qemu. The 
executable is in /usr/bin. The KVM driver is the one supplied with Ubuntu 13.04.

The version of qemu is 1.4.0 (after running qemu --version I get the message

  --- QEMU emulator version 1.4.0 (Debian 1.4.0+dfsg-1expubuntu4), Copyright 
(c) 2003-2008 Fabrice Bellard

You mean I should use the build-from-sources qemu (getting the sources from 
git://git.qemu-project.org/qemu.git) ? Should I then compile from sources and 
mount the KVM ?

Regards, 
   Alex



  When we try to create a VM the following error message appears:
   --- kvm binary is deprecated, please use qemu-system-x86_64 instead
  
  The same message appears when I try to run kvm --version.
  
  Question: how must be upgrade/degrade KVM oro Qemu in order to make them 
  collaborate properly ?
 
 It sounds like you may be building the old qemu-kvm.git source code.
 Last year qemu-kvm.git was merged back into qemu.git.
 
 It means you should use git://git.qemu-project.org/qemu.git if you are
 building from source.
 
 Some distros are creating transitional packages or wrapper scripts that
 build QEMU (qemu-system-x86_64) and provide a /usr/bin/kvm or qemu-kvm
 executable.
 
 More info:
 http://blog.vmsplice.net/2012/12/qemu-kvmgit-has-unforked-back-into.html
 
 Stefan
 







Re: [Qemu-devel] [PATCH] configure: create fsdev/ directory

2013-10-12 Thread Paolo Bonzini
Il 11/10/2013 11:05, Michael Tokarev ha scritto:
 In some cases when building with parallelism (make -jN),
 build fails because the directory where output files are
 supposed to be does not exist.  In particular, when make
 decides to build virtfs-proxy-helper.1 before other files
 in fsdev/, build will fail with the following error:
 
 perl -Ww -- BUILDDIR/scripts/texi2pod.pl 
 BUILDDIR/fsdev/virtfs-proxy-helper.texi fsdev/virtfs-proxy-helper.pod  
 pod2man --utf8 --section=1 --center=  --release=  
 fsdev/virtfs-proxy-helper.pod  fsdev/virtfs-proxy-helper.1
 opening fsdev/virtfs-proxy-helper.pod: No such file or directory
 
 Create the `fsdev' subdir the same way as other conditional
 subdirs (eg, dtc or pixman) are created in configure.
 
 Might be better to extract all dirs from $tools and other
 common vars and run mkdir on all of these, but this needs
 some review/restructuring first.
 
 Signed-off-by: Michael Tokarev m...@tls.msk.ru
 ---
  configure |1 +
  1 file changed, 1 insertion(+)
 
 diff --git a/configure b/configure
 index 23dbaaf..203084a 100755
 --- a/configure
 +++ b/configure
 @@ -3576,6 +3576,7 @@ if test $softmmu = yes ; then
  if test $cap = yes  test $linux = yes  test $attr = yes ; then
virtfs=yes
tools=$tools fsdev/virtfs-proxy-helper\$(EXESUF)
 +  mkdir -p fsdev
  else
if test $virtfs = yes; then
  error_exit VirtFS is supported only on Linux and requires 
 libcap-devel and libattr-devel
 

Please modify around

DIRS=tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos
tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests
DIRS=$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw
DIRS=$DIRS roms/seabios roms/vgabios
DIRS=$DIRS qapi-generated

instead.

Paolo



Re: [Qemu-devel] [PATCH] block: simplify bdrv_drop_intermediate

2013-10-12 Thread Jeff Cody
On Sat, Oct 12, 2013 at 02:05:45PM +0800, Fam Zheng wrote:
 There is only one failure point: bdrv_change_backing_file in this
 function, so we can drop the qlist and try to change the backing file
 before deleting anything.
 
 This way bdrv_drop_intermediate is simplified while keeping the
 operation transactional. A bonus is dropping an active BDS is supported
 too by swapping the base and top. Although no caller uses this yet, the
 comment is updated to reflect the change.
 
 Signed-off-by: Fam Zheng f...@redhat.com
 ---
  block.c| 100 
 ++---
  block/commit.c |   1 +
  2 files changed, 32 insertions(+), 69 deletions(-)
 
 diff --git a/block.c b/block.c
 index fd05a80..b9e073f 100644
 --- a/block.c
 +++ b/block.c
 @@ -2130,18 +2130,11 @@ BlockDriverState *bdrv_find_overlay(BlockDriverState 
 *active,
  return overlay;
  }
  
 -typedef struct BlkIntermediateStates {
 -BlockDriverState *bs;
 -QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
 -} BlkIntermediateStates;
 -
 -
  /*
 - * Drops images above 'base' up to and including 'top', and sets the image
 - * above 'top' to have base as its backing file.
 - *
 - * Requires that the overlay to 'top' is opened r/w, so that the backing file
 - * information in 'bs' can be properly updated.
 + * Drops images above 'base' up to and including 'top', and sets new 'base'
 + * as backing_hd of top_overlay (the image orignally has 'top' as backing
 + * file). top_overlay may be NULL if 'top' is active, no such update needed.
 + * Requires that the top_overlay to 'top' is opened r/w.
   *
   * E.g., this will convert the following chain:
   * bottom - base - intermediate - top - active
 @@ -2158,86 +2151,55 @@ typedef struct BlkIntermediateStates {
   *
   * base - active
   *
 - * Error conditions:
 - *  if active == top, that is considered an error
 + * It also allows active==top, in which case it converts:
 + *
 + * base - intermediate - active (also top)
 + *
 + * to
 + *
 + * base == active == top, i.e. only base remains: *top == *base when return.
   *
   */
  int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
 BlockDriverState *base)
  {
 -BlockDriverState *intermediate;
 +BlockDriverState *pbs;
 +BlockDriverState *overlay = NULL;
  BlockDriverState *base_bs = NULL;
 -BlockDriverState *new_top_bs = NULL;
 -BlkIntermediateStates *intermediate_state, *next;
  int ret = -EIO;
  
 -QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
 -QSIMPLEQ_INIT(states_to_delete);
 -
  if (!top-drv || !base-drv) {
  goto exit;
  }
  
 -new_top_bs = bdrv_find_overlay(active, top);
 -
 -if (new_top_bs == NULL) {
 -/* we could not find the image above 'top', this is an error */
 -goto exit;
 -}
 -
 -/* special case of new_top_bs-backing_hd already pointing to base - 
 nothing
 - * to do, no intermediate images */
 -if (new_top_bs-backing_hd == base) {
 -ret = 0;
 -goto exit;
 -}
 -
 -intermediate = top;
 -
 -/* now we will go down through the list, and add each BDS we find
 - * into our deletion queue, until we hit the 'base'
 - */
 -while (intermediate) {
 -intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
 -intermediate_state-bs = intermediate;
 -QSIMPLEQ_INSERT_TAIL(states_to_delete, intermediate_state, entry);
 -
 -if (intermediate-backing_hd == base) {
 -base_bs = intermediate-backing_hd;
 -break;
 +if (active != top) {
 +/* If there's an overlay, its backing_hd points to top's BDS now,
 + * the top image is dropped but this BDS structure is kept and 
 swapped
 + * with base, this way we keep the pointers valid after dropping top 
 */
 +overlay = bdrv_find_overlay(active, top);

bdrv_find_overlay() can return NULL, this needs to be checked for
error.

 +ret = bdrv_change_backing_file(overlay, base-filename,
 +   base-drv ?
 +base-drv-format_name : );

Using 'base' here makes the assumption that 'base' is definitely in
the  BDS chain, starting from overlay.  This was previously explicitly
verified by walking down through chain (this function made sure that
all BDS's specified - active, top, base - were all in the same chain, in
the correct order).

I guess a case could be made that we don't need to do that check, as
the current usage of this function verifies (in qmp_block_commit())
that base is indeed part of top.  

If we are going to go with that assumption, we should explicitly
mention that base needs to be verified to be in the chain prior to
calling this function, otherwise we will assert().

 +if (ret) {
 +goto exit;
  }
 -intermediate = intermediate-backing_hd;
 -}
 -

Re: [Qemu-devel] [PATCH] configure: create fsdev/ directory

2013-10-12 Thread Michael Tokarev

12.10.2013 20:28, Paolo Bonzini wrote:

Il 11/10/2013 11:05, Michael Tokarev ha scritto:

[]

--- a/configure
+++ b/configure
@@ -3576,6 +3576,7 @@ if test $softmmu = yes ; then
  if test $cap = yes  test $linux = yes  test $attr = yes ; then
virtfs=yes
tools=$tools fsdev/virtfs-proxy-helper\$(EXESUF)
+  mkdir -p fsdev
  else
if test $virtfs = yes; then
  error_exit VirtFS is supported only on Linux and requires libcap-devel 
and libattr-devel



Please modify around

DIRS=tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos
tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests
DIRS=$DIRS pc-bios/optionrom pc-bios/spapr-rtas pc-bios/s390-ccw
DIRS=$DIRS roms/seabios roms/vgabios
DIRS=$DIRS qapi-generated

instead.


I considered doing it there initially, but decided to add it
to the other place, because that's where virtfs variable
is set.  The place you're referring to will need to have a
condition `if' based on $virtfs value.

Also, there are other places below this virtfs test (which I
modified) which runs mkdir (dtc, pixman), so this is not
something unusual.

Thanks,

/mjt



[Qemu-devel] [PULL 01/17] tcg: Use TCGMemOp for TCGLabelQemuLdst.opc

2013-10-12 Thread Richard Henderson
Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/tcg-be-ldst.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tcg/tcg-be-ldst.h b/tcg/tcg-be-ldst.h
index 2826d29..284db0c 100644
--- a/tcg/tcg-be-ldst.h
+++ b/tcg/tcg-be-ldst.h
@@ -25,7 +25,7 @@
 
 typedef struct TCGLabelQemuLdst {
 int is_ld:1;/* qemu_ld: 1, qemu_st: 0 */
-int opc:4;
+TCGMemOp opc:4;
 TCGReg addrlo_reg;  /* reg index for low word of guest virtual addr */
 TCGReg addrhi_reg;  /* reg index for high word of guest virtual addr */
 TCGReg datalo_reg;  /* reg index for low word to be loaded or stored */
-- 
1.8.1.4




[Qemu-devel] [PULL 00/17] tcg ldst conversions

2013-10-12 Thread Richard Henderson
All of these patches have been posted over the month of September.

This pull includes conversions of 4 of the tcg backends to the new
ldst opcodes.  There are 2 more backends that I have also converted,
ia64 and sparc, but at present I'm not able to re-test those so I've
left them out.

I also include the conversion of the alpha front-end to the new helpers.


r~


The following changes since commit 1cdae4573d7613149348d834c605bfbe3c7d405b:

  Merge remote-tracking branch 'mdroth/qga-pull-2013-10-10' into staging 
(2013-10-11 09:38:07 -0700)

are available in the git repository at:


  git://github.com/rth7680/qemu.git tcg-ldst-6

for you to fetch changes up to f8da40aefbd1b40a0c5ab6fb25987943fe01125c:

  target-alpha: Convert to new ldst opcodes (2013-10-12 16:19:20 -0700)


Richard Henderson (17):
  tcg: Use TCGMemOp for TCGLabelQemuLdst.opc
  tcg-i386: Use TCGMemOp within qemu_ldst routines
  tcg-i386: Tidy softmmu routines
  tcg-i386: Remove cb output restriction from qemu_st8 for i386
  tcg-i386: Support new ldst opcodes
  tcg-arm: Use TCGMemOp within qemu_ldst routines
  tcg-arm: Convert to le/be ldst helpers
  tcg-arm: Tidy variable naming convention in qemu_ld/st
  tcg-arm: Convert to new ldst opcodes
  tcg-arm: Improve GUEST_BASE qemu_ld/st
  tcg-ppc: Use TCGMemOp within qemu_ldst routines
  tcg-ppc64: Use TCGMemOp within qemu_ldst routines
  tcg-ppc: Convert to le/be ldst helpers
  tcg-ppc64: Convert to le/be ldst helpers
  tcg-ppc: Support new ldst opcodes
  tcg-ppc64: Support new ldst opcodes
  target-alpha: Convert to new ldst opcodes

 target-alpha/translate.c |  49 ++--
 tcg/arm/tcg-target.c | 556 
 tcg/arm/tcg-target.h |   2 +-
 tcg/i386/tcg-target.c| 643 +--
 tcg/i386/tcg-target.h|   2 +-
 tcg/ppc/tcg-target.c | 202 +++
 tcg/ppc/tcg-target.h |   2 +-
 tcg/ppc64/tcg-target.c   | 163 +---
 tcg/ppc64/tcg-target.h   |   2 +-
 tcg/tcg-be-ldst.h|   2 +-
 10 files changed, 722 insertions(+), 901 deletions(-)



[Qemu-devel] [PULL 05/17] tcg-i386: Support new ldst opcodes

2013-10-12 Thread Richard Henderson
No support for helpers with non-default endianness yet,
but good enough to test the opcodes.

Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/i386/tcg-target.c | 139 ++
 tcg/i386/tcg-target.h |   2 +-
 2 files changed, 51 insertions(+), 90 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index f4fdce5..7ac8e45 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1026,21 +1026,27 @@ static void tcg_out_jmp(TCGContext *s, uintptr_t dest)
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  * int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_ld_helpers[4] = {
-helper_ret_ldub_mmu,
-helper_ret_lduw_mmu,
-helper_ret_ldul_mmu,
-helper_ret_ldq_mmu,
+static const void * const qemu_ld_helpers[16] = {
+[MO_UB]   = helper_ret_ldub_mmu,
+[MO_LEUW] = helper_le_lduw_mmu,
+[MO_LEUL] = helper_le_ldul_mmu,
+[MO_LEQ]  = helper_le_ldq_mmu,
+[MO_BEUW] = helper_be_lduw_mmu,
+[MO_BEUL] = helper_be_ldul_mmu,
+[MO_BEQ]  = helper_be_ldq_mmu,
 };
 
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  * uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_st_helpers[4] = {
-helper_ret_stb_mmu,
-helper_ret_stw_mmu,
-helper_ret_stl_mmu,
-helper_ret_stq_mmu,
+static const void * const qemu_st_helpers[16] = {
+[MO_UB]   = helper_ret_stb_mmu,
+[MO_LEUW] = helper_le_stw_mmu,
+[MO_LEUL] = helper_le_stl_mmu,
+[MO_LEQ]  = helper_le_stq_mmu,
+[MO_BEUW] = helper_be_stw_mmu,
+[MO_BEUL] = helper_be_stl_mmu,
+[MO_BEQ]  = helper_be_stq_mmu,
 };
 
 /* Perform the TLB load and compare.
@@ -1165,7 +1171,6 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, 
TCGMemOp opc,
 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
 TCGMemOp opc = l-opc;
-TCGMemOp s_bits = opc  MO_SIZE;
 TCGReg data_reg;
 uint8_t **label_ptr = l-label_ptr[0];
 
@@ -1202,7 +1207,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
  (uintptr_t)l-raddr);
 }
 
-tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[s_bits]);
+tcg_out_calli(s, (uintptr_t)qemu_ld_helpers[opc  ~MO_SIGN]);
 
 data_reg = l-datalo_reg;
 switch (opc  MO_SSIZE) {
@@ -1307,7 +1312,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 
 /* Tail call to the helper, with the return address back inline.  */
 tcg_out_push(s, retaddr);
-tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[s_bits]);
+tcg_out_jmp(s, (uintptr_t)qemu_st_helpers[opc]);
 }
 #elif defined(__x86_64__)  defined(__linux__)
 # include asm/prctl.h
@@ -1411,22 +1416,24 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, 
TCGReg datalo, TCGReg datahi,
 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
EAX. It will be useful once fixed registers globals are less
common. */
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 {
 TCGReg datalo, datahi, addrlo;
+TCGReg addrhi __attribute__((unused));
+TCGMemOp opc;
 #if defined(CONFIG_SOFTMMU)
-TCGReg addrhi;
 int mem_index;
 TCGMemOp s_bits;
 uint8_t *label_ptr[2];
 #endif
 
 datalo = *args++;
-datahi = (TCG_TARGET_REG_BITS == 32  opc == 3 ? *args++ : 0);
+datahi = (TCG_TARGET_REG_BITS == 32  is64 ? *args++ : 0);
 addrlo = *args++;
+addrhi = (TARGET_LONG_BITS  TCG_TARGET_REG_BITS ? *args++ : 0);
+opc = *args++;
 
 #if defined(CONFIG_SOFTMMU)
-addrhi = (TARGET_LONG_BITS  TCG_TARGET_REG_BITS ? *args++ : 0);
 mem_index = *args++;
 s_bits = opc  MO_SIZE;
 
@@ -1531,22 +1538,24 @@ static void tcg_out_qemu_st_direct(TCGContext *s, 
TCGReg datalo, TCGReg datahi,
 }
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGMemOp opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 {
 TCGReg datalo, datahi, addrlo;
+TCGReg addrhi __attribute__((unused));
+TCGMemOp opc;
 #if defined(CONFIG_SOFTMMU)
-TCGReg addrhi;
 int mem_index;
 TCGMemOp s_bits;
 uint8_t *label_ptr[2];
 #endif
 
 datalo = *args++;
-datahi = (TCG_TARGET_REG_BITS == 32  opc == 3 ? *args++ : 0);
+datahi = (TCG_TARGET_REG_BITS == 32  is64 ? *args++ : 0);
 addrlo = *args++;
+addrhi = (TARGET_LONG_BITS  TCG_TARGET_REG_BITS ? *args++ : 0);
+opc = *args++;
 
 #if defined(CONFIG_SOFTMMU)
-addrhi = (TARGET_LONG_BITS  TCG_TARGET_REG_BITS ? *args++ : 0);
 mem_index = *args++;
 s_bits = opc  MO_SIZE;
 
@@ -1810,39 +1819,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 tcg_out_ext16u(s, args[0], args[1]);
 break;
 
-case INDEX_op_qemu_ld8u:

[Qemu-devel] [PULL 02/17] tcg-i386: Use TCGMemOp within qemu_ldst routines

2013-10-12 Thread Richard Henderson
Step one in the transition, with constants passed down from tcg_out_op.

Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/i386/tcg-target.c | 123 --
 1 file changed, 59 insertions(+), 64 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index b865b4b..1b86009 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1077,7 +1077,7 @@ static void add_qemu_ldst_label(TCGContext *s,
First argument register is clobbered.  */
 
 static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
-int mem_index, int s_bits,
+int mem_index, TCGMemOp s_bits,
 const TCGArg *args,
 uint8_t **label_ptr, int which)
 {
@@ -1164,28 +1164,26 @@ static inline void setup_guest_base_seg(void)
 static inline void setup_guest_base_seg(void) { }
 #endif /* SOFTMMU */
 
-static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi,
-   int base, intptr_t ofs, int seg, int sizeop)
+static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+   TCGReg base, intptr_t ofs, int seg,
+   TCGMemOp memop)
 {
-#ifdef TARGET_WORDS_BIGENDIAN
-const int bswap = 1;
-#else
-const int bswap = 0;
-#endif
-switch (sizeop) {
-case 0:
+const TCGMemOp bswap = memop  MO_BSWAP;
+
+switch (memop  MO_SSIZE) {
+case MO_UB:
 tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs);
 break;
-case 0 | 4:
+case MO_SB:
 tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs);
 break;
-case 1:
+case MO_UW:
 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
 if (bswap) {
 tcg_out_rolw_8(s, datalo);
 }
 break;
-case 1 | 4:
+case MO_SW:
 if (bswap) {
 tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs);
 tcg_out_rolw_8(s, datalo);
@@ -1195,14 +1193,14 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int 
datalo, int datahi,
  datalo, base, ofs);
 }
 break;
-case 2:
+case MO_UL:
 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
 if (bswap) {
 tcg_out_bswap32(s, datalo);
 }
 break;
 #if TCG_TARGET_REG_BITS == 64
-case 2 | 4:
+case MO_SL:
 if (bswap) {
 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs);
 tcg_out_bswap32(s, datalo);
@@ -1212,7 +1210,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int 
datalo, int datahi,
 }
 break;
 #endif
-case 3:
+case MO_Q:
 if (TCG_TARGET_REG_BITS == 64) {
 tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg,
  datalo, base, ofs);
@@ -1250,26 +1248,26 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int 
datalo, int datahi,
 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
EAX. It will be useful once fixed registers globals are less
common. */
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
-int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc)
 {
 int data_reg, data_reg2 = 0;
 int addrlo_idx;
 #if defined(CONFIG_SOFTMMU)
-int mem_index, s_bits;
+int mem_index;
+TCGMemOp s_bits;
 uint8_t *label_ptr[2];
 #endif
 
 data_reg = args[0];
 addrlo_idx = 1;
-if (TCG_TARGET_REG_BITS == 32  opc == 3) {
+if (TCG_TARGET_REG_BITS == 32  (opc  MO_SIZE) == MO_64) {
 data_reg2 = args[1];
 addrlo_idx = 2;
 }
 
 #if defined(CONFIG_SOFTMMU)
 mem_index = args[addrlo_idx + 1 + (TARGET_LONG_BITS  
TCG_TARGET_REG_BITS)];
-s_bits = opc  3;
+s_bits = opc  MO_SIZE;
 
 tcg_out_tlb_load(s, addrlo_idx, mem_index, s_bits, args,
  label_ptr, offsetof(CPUTLBEntry, addr_read));
@@ -1314,27 +1312,24 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args,
 #endif
 }
 
-static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
-   int base, intptr_t ofs, int seg,
-   int sizeop)
+static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
+   TCGReg base, intptr_t ofs, int seg,
+   TCGMemOp memop)
 {
-#ifdef TARGET_WORDS_BIGENDIAN
-const int bswap = 1;
-#else
-const int bswap = 0;
-#endif
+const TCGMemOp bswap = memop  MO_BSWAP;
+
 /* ??? Ideally we wouldn't need a scratch register.  For user-only,
we could perform the bswap twice to restore the original 

[Qemu-devel] [PULL 04/17] tcg-i386: Remove cb output restriction from qemu_st8 for i386

2013-10-12 Thread Richard Henderson
Once we form a combined qemu_st_i32 opcode, we won't be able to
have separate constraints based on size.  This one is fairly easy
to work around, since eax is available as a scratch register.

When storing variable data, this tends to merely exchange one mov
for another.  E.g.

-:  mov%esi,%ecx
...
-:  mov%cl,(%edx)
+:  mov%esi,%eax
+:  mov%al,(%edx)

Where we do have a regression is when storing constant data, in which
we may load the constant into edi, when only ecx/ebx ought to be used.

The proper way to recover this regression is to allow constants as
arguments to qemu_st_i32, so that we never load the constant data into
a register at all, must less the wrong register.  TBD.

Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/i386/tcg-target.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index a7ff8a3..f4fdce5 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1479,6 +1479,12 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg 
datalo, TCGReg datahi,
 
 switch (memop  MO_SIZE) {
 case MO_8:
+/* In 32-bit mode, 8-byte stores can only happen from [abcd]x.
+   Use the scratch register if necessary.  */
+if (TCG_TARGET_REG_BITS == 32  datalo = 4) {
+tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo);
+datalo = scratch;
+}
 tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg,
  datalo, base, ofs);
 break;
@@ -2084,7 +2090,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
 { INDEX_op_qemu_ld32, { r, L } },
 { INDEX_op_qemu_ld64, { r, r, L } },
 
-{ INDEX_op_qemu_st8, { cb, L } },
+{ INDEX_op_qemu_st8, { L, L } },
 { INDEX_op_qemu_st16, { L, L } },
 { INDEX_op_qemu_st32, { L, L } },
 { INDEX_op_qemu_st64, { L, L, L } },
@@ -2096,7 +2102,7 @@ static const TCGTargetOpDef x86_op_defs[] = {
 { INDEX_op_qemu_ld32, { r, L, L } },
 { INDEX_op_qemu_ld64, { r, r, L, L } },
 
-{ INDEX_op_qemu_st8, { cb, L, L } },
+{ INDEX_op_qemu_st8, { L, L, L } },
 { INDEX_op_qemu_st16, { L, L, L } },
 { INDEX_op_qemu_st32, { L, L, L } },
 { INDEX_op_qemu_st64, { L, L, L, L } },
-- 
1.8.1.4




[Qemu-devel] [PULL 13/17] tcg-ppc: Convert to le/be ldst helpers

2013-10-12 Thread Richard Henderson
Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/ppc/tcg-target.c | 57 ++--
 1 file changed, 33 insertions(+), 24 deletions(-)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index c1b0908..dadc108 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -550,25 +550,31 @@ static void add_qemu_ldst_label (TCGContext *s,
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  * int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_ld_helpers[4] = {
-helper_ret_ldub_mmu,
-helper_ret_lduw_mmu,
-helper_ret_ldul_mmu,
-helper_ret_ldq_mmu,
+static const void * const qemu_ld_helpers[16] = {
+[MO_UB]   = helper_ret_ldub_mmu,
+[MO_LEUW] = helper_le_lduw_mmu,
+[MO_LEUL] = helper_le_ldul_mmu,
+[MO_LEQ]  = helper_le_ldq_mmu,
+[MO_BEUW] = helper_be_lduw_mmu,
+[MO_BEUL] = helper_be_ldul_mmu,
+[MO_BEQ]  = helper_be_ldq_mmu,
 };
 
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  * uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_st_helpers[4] = {
-helper_ret_stb_mmu,
-helper_ret_stw_mmu,
-helper_ret_stl_mmu,
-helper_ret_stq_mmu,
+static const void * const qemu_st_helpers[16] = {
+[MO_UB]   = helper_ret_stb_mmu,
+[MO_LEUW] = helper_le_stw_mmu,
+[MO_LEUL] = helper_le_stl_mmu,
+[MO_LEQ]  = helper_le_stq_mmu,
+[MO_BEUW] = helper_be_stw_mmu,
+[MO_BEUL] = helper_be_stl_mmu,
+[MO_BEQ]  = helper_be_stq_mmu,
 };
 
-static void *ld_trampolines[4];
-static void *st_trampolines[4];
+static void *ld_trampolines[16];
+static void *st_trampolines[16];
 
 /* Perform the TLB load and compare.  Branches to the slow path, placing the
address of the branch in *LABEL_PTR.  Loads the addend of the TLB into R0.
@@ -783,7 +789,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, TCGMemOp opc)
 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
 TCGReg ir, datalo, datahi;
-TCGMemOp opc = l-opc  MO_SSIZE;
+TCGMemOp opc = l-opc;
 
 reloc_pc14 (l-label_ptr[0], (uintptr_t)s-code_ptr);
 
@@ -799,10 +805,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 }
 tcg_out_movi(s, TCG_TYPE_I32, ir++, l-mem_index);
 tcg_out32(s, MFSPR | RT(ir++) | LR);
-tcg_out_b(s, LK, (uintptr_t)ld_trampolines[opc  MO_SIZE]);
+tcg_out_b(s, LK, (uintptr_t)ld_trampolines[opc  ~MO_SIGN]);
 
 datalo = l-datalo_reg;
-switch (opc) {
+switch (opc  MO_SSIZE) {
 case MO_SB:
 tcg_out32(s, EXTSB | RA(datalo) | RS(TCG_REG_R3));
 break;
@@ -833,7 +839,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
 {
 TCGReg ir, datalo;
-TCGMemOp s_bits = l-opc  MO_SIZE;
+TCGMemOp opc = l-opc;
 
 reloc_pc14 (l-label_ptr[0], (tcg_target_long) s-code_ptr);
 
@@ -849,7 +855,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 }
 
 datalo = l-datalo_reg;
-switch (s_bits) {
+switch (opc  MO_SIZE) {
 case MO_8:
 tcg_out32(s, (RLWINM | RA (ir) | RS (datalo)
   | SH (0) | MB (24) | ME (31)));
@@ -873,7 +879,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *l)
 
 tcg_out_movi(s, TCG_TYPE_I32, ir++, l-mem_index);
 tcg_out32(s, MFSPR | RT(ir++) | LR);
-tcg_out_b(s, LK, (uintptr_t)st_trampolines[l-opc]);
+tcg_out_b(s, LK, (uintptr_t)st_trampolines[opc]);
 tcg_out_b(s, 0, (uintptr_t)l-raddr);
 }
 #endif
@@ -948,12 +954,15 @@ static void tcg_target_qemu_prologue (TCGContext *s)
 tcg_out32 (s, BCLR | BO_ALWAYS);
 
 #ifdef CONFIG_SOFTMMU
-for (i = 0; i  4; ++i) {
-ld_trampolines[i] = s-code_ptr;
-emit_ldst_trampoline (s, qemu_ld_helpers[i]);
-
-st_trampolines[i] = s-code_ptr;
-emit_ldst_trampoline (s, qemu_st_helpers[i]);
+for (i = 0; i  16; ++i) {
+if (qemu_ld_helpers[i]) {
+ld_trampolines[i] = s-code_ptr;
+emit_ldst_trampoline(s, qemu_ld_helpers[i]);
+}
+if (qemu_st_helpers[i]) {
+st_trampolines[i] = s-code_ptr;
+emit_ldst_trampoline(s, qemu_st_helpers[i]);
+}
 }
 #endif
 }
-- 
1.8.1.4




[Qemu-devel] [PULL 07/17] tcg-arm: Convert to le/be ldst helpers

2013-10-12 Thread Richard Henderson
Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/arm/tcg-target.c | 50 +-
 1 file changed, 29 insertions(+), 21 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 980d030..4692859 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1079,26 +1079,34 @@ static inline void tcg_out_goto_label(TCGContext *s, 
int cond, int label_index)
 /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
  * int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_ld_helpers[8] = {
-helper_ret_ldub_mmu,
-helper_ret_lduw_mmu,
-helper_ret_ldul_mmu,
-helper_ret_ldq_mmu,
-
-helper_ret_ldsb_mmu,
-helper_ret_ldsw_mmu,
-helper_ret_ldul_mmu,
-helper_ret_ldq_mmu,
+static const void * const qemu_ld_helpers[16] = {
+[MO_UB]   = helper_ret_ldub_mmu,
+[MO_SB]   = helper_ret_ldsb_mmu,
+
+[MO_LEUW] = helper_le_lduw_mmu,
+[MO_LEUL] = helper_le_ldul_mmu,
+[MO_LEQ]  = helper_le_ldq_mmu,
+[MO_LESW] = helper_le_ldsw_mmu,
+[MO_LESL] = helper_le_ldul_mmu,
+
+[MO_BEUW] = helper_be_lduw_mmu,
+[MO_BEUL] = helper_be_ldul_mmu,
+[MO_BEQ]  = helper_be_ldq_mmu,
+[MO_BESW] = helper_be_ldsw_mmu,
+[MO_BESL] = helper_be_ldul_mmu,
 };
 
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  * uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_st_helpers[4] = {
-helper_ret_stb_mmu,
-helper_ret_stw_mmu,
-helper_ret_stl_mmu,
-helper_ret_stq_mmu,
+static const void * const qemu_st_helpers[16] = {
+[MO_UB]   = helper_ret_stb_mmu,
+[MO_LEUW] = helper_le_stw_mmu,
+[MO_LEUL] = helper_le_stl_mmu,
+[MO_LEQ]  = helper_le_stq_mmu,
+[MO_BEUW] = helper_be_stw_mmu,
+[MO_BEUL] = helper_be_stl_mmu,
+[MO_BEQ]  = helper_be_stq_mmu,
 };
 
 /* Helper routines for marshalling helper function arguments into
@@ -1261,7 +1269,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, 
TCGMemOp opc,
 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
 TCGReg argreg, data_reg, data_reg2;
-TCGMemOp opc = lb-opc  MO_SSIZE;
+TCGMemOp opc = lb-opc;
 uintptr_t func;
 
 reloc_pc24(lb-label_ptr[0], (tcg_target_long)s-code_ptr);
@@ -1279,7 +1287,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
icache usage.  For pre-armv6, use the signed helpers since we do
not have a single insn sign-extend.  */
 if (use_armv6_instructions) {
-func = (uintptr_t)qemu_ld_helpers[opc  MO_SIZE];
+func = (uintptr_t)qemu_ld_helpers[opc  ~MO_SIGN];
 } else {
 func = (uintptr_t)qemu_ld_helpers[opc];
 if (opc  MO_SIGN) {
@@ -1290,7 +1298,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 
 data_reg = lb-datalo_reg;
 data_reg2 = lb-datahi_reg;
-switch (opc) {
+switch (opc  MO_SSIZE) {
 case MO_SB:
 tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
 break;
@@ -1321,7 +1329,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
 TCGReg argreg, data_reg, data_reg2;
-TCGMemOp s_bits = lb-opc  MO_SIZE;
+TCGMemOp opc = lb-opc;
 
 reloc_pc24(lb-label_ptr[0], (tcg_target_long)s-code_ptr);
 
@@ -1335,7 +1343,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 
 data_reg = lb-datalo_reg;
 data_reg2 = lb-datahi_reg;
-switch (s_bits) {
+switch (opc  MO_SIZE) {
 case MO_8:
 argreg = tcg_out_arg_reg8(s, argreg, data_reg);
 break;
@@ -1355,7 +1363,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
 
 /* Tail-call to the helper, which will return to the fast path.  */
-tcg_out_goto(s, COND_AL, (tcg_target_long) qemu_st_helpers[s_bits]);
+tcg_out_goto(s, COND_AL, (uintptr_t)qemu_st_helpers[opc]);
 }
 #endif /* SOFTMMU */
 
-- 
1.8.1.4




[Qemu-devel] [PULL 10/17] tcg-arm: Improve GUEST_BASE qemu_ld/st

2013-10-12 Thread Richard Henderson
If we pull the code to emit the actual load/store into a subroutine,
we can share the reg+reg addressing mode code between softmmu and
usermode.  This lets us load GUEST_BASE into a temporary register
rather than attempting to add it piece-wise to the address.

Which lets us use movw+movt for armv7, rather than (up to) 4 adds.
Code size for pre-armv7 stays the same.

Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/arm/tcg-target.c | 220 +++
 1 file changed, 116 insertions(+), 104 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index c3fd2b0..e93a4a2 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1367,33 +1367,11 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 }
 #endif /* SOFTMMU */
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
+static inline void tcg_out_qemu_ld_index(TCGContext *s, TCGMemOp opc,
+ TCGReg datalo, TCGReg datahi,
+ TCGReg addrlo, TCGReg addend)
 {
-TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
-TCGMemOp opc, bswap;
-#ifdef CONFIG_SOFTMMU
-TCGMemOp s_bits;
-int mem_index;
-TCGReg addend;
-uint8_t *label_ptr;
-#endif
-
-datalo = *args++;
-datahi = (is64 ? *args++ : 0);
-addrlo = *args++;
-addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
-opc = *args++;
-bswap = opc  MO_BSWAP;
-
-#ifdef CONFIG_SOFTMMU
-s_bits = opc  MO_SIZE;
-mem_index = *args;
-addend = tcg_out_tlb_read(s, addrlo, addrhi, s_bits, mem_index, 1);
-
-/* This a conditional BL only to load a pointer within this opcode into LR
-   for the slow path.  We will not be using the value for a tail call.  */
-label_ptr = s-code_ptr;
-tcg_out_bl_noaddr(s, COND_NE);
+TCGMemOp bswap = opc  MO_BSWAP;
 
 switch (opc  MO_SSIZE) {
 case MO_UB:
@@ -1425,8 +1403,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
 break;
 case MO_Q:
 {
-/* Be careful not to modify datalo and datahi
-   for the slow path below.  */
 TCGReg dl = (bswap ? datahi : datalo);
 TCGReg dh = (bswap ? datalo : datahi);
 
@@ -1442,30 +1418,20 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
 tcg_out_ld32_12(s, COND_AL, dh, TCG_REG_TMP, 4);
 }
 if (bswap) {
-tcg_out_bswap32(s, COND_AL, dh, dh);
 tcg_out_bswap32(s, COND_AL, dl, dl);
+tcg_out_bswap32(s, COND_AL, dh, dh);
 }
 }
 break;
 }
+}
 
-add_qemu_ldst_label(s, 1, opc, datalo, datahi, addrlo, addrhi,
-mem_index, s-code_ptr, label_ptr);
-#else /* !CONFIG_SOFTMMU */
-if (GUEST_BASE) {
-uint32_t offset = GUEST_BASE;
-int i, rot;
-
-while (offset) {
-i = ctz32(offset)  ~1;
-rot = ((32 - i)  7)  0xf00;
+static inline void tcg_out_qemu_ld_direct(TCGContext *s, TCGMemOp opc,
+  TCGReg datalo, TCGReg datahi,
+  TCGReg addrlo)
+{
+TCGMemOp bswap = opc  MO_BSWAP;
 
-tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_TMP, addrlo,
-((offset  i)  0xff) | rot);
-addrlo = TCG_REG_TMP;
-offset = ~(0xff  i);
-}
-}
 switch (opc  MO_SSIZE) {
 case MO_UB:
 tcg_out_ld8_12(s, COND_AL, datalo, addrlo, 0);
@@ -1495,32 +1461,32 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, bool is64)
 }
 break;
 case MO_Q:
-if (use_armv6_instructions  !bswap
- (datalo  1) == 0  datahi == datalo + 1) {
-tcg_out_ldrd_8(s, COND_AL, datalo, addrlo, 0);
-} else if (use_armv6_instructions  bswap
-(datahi  1) == 0  datalo == datahi + 1) {
-tcg_out_ldrd_8(s, COND_AL, datahi, addrlo, 0);
-} else if (datalo == addrlo) {
-tcg_out_ld32_12(s, COND_AL, datahi, addrlo, bswap ? 0 : 4);
-tcg_out_ld32_12(s, COND_AL, datalo, addrlo, bswap ? 4 : 0);
-} else {
-tcg_out_ld32_12(s, COND_AL, datalo, addrlo, bswap ? 4 : 0);
-tcg_out_ld32_12(s, COND_AL, datahi, addrlo, bswap ? 0 : 4);
-}
-if (bswap) {
-tcg_out_bswap32(s, COND_AL, datalo, datalo);
-tcg_out_bswap32(s, COND_AL, datahi, datahi);
+{
+TCGReg dl = (bswap ? datahi : datalo);
+TCGReg dh = (bswap ? datalo : datahi);
+
+if (use_armv6_instructions  (dl  1) == 0  dh == dl + 1) {
+tcg_out_ldrd_8(s, COND_AL, dl, addrlo, 0);
+} else if (dl == addrlo) {
+tcg_out_ld32_12(s, COND_AL, dh, addrlo, bswap ? 0 : 4);
+ 

[Qemu-devel] [PULL 06/17] tcg-arm: Use TCGMemOp within qemu_ldst routines

2013-10-12 Thread Richard Henderson
Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/arm/tcg-target.c | 125 +--
 1 file changed, 61 insertions(+), 64 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index c0e1466..980d030 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1169,7 +1169,7 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, 
tlb_table[NB_MMU_MODES - 1][1])
containing the addend of the tlb entry.  Clobbers R0, R1, R2, TMP.  */
 
 static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
-   int s_bits, int mem_index, bool is_load)
+   TCGMemOp s_bits, int mem_index, bool is_load)
 {
 TCGReg base = TCG_AREG0;
 int cmp_off =
@@ -1240,7 +1240,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
 /* Record the context of a call to the out of line helper code for the slow
path for a load or store, so that we can later generate the correct
helper code.  */
-static void add_qemu_ldst_label(TCGContext *s, int is_ld, int opc,
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
 int data_reg, int data_reg2, int addrlo_reg,
 int addrhi_reg, int mem_index,
 uint8_t *raddr, uint8_t *label_ptr)
@@ -1261,7 +1261,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, 
int opc,
 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
 TCGReg argreg, data_reg, data_reg2;
-int opc = lb-opc;
+TCGMemOp opc = lb-opc  MO_SSIZE;
 uintptr_t func;
 
 reloc_pc24(lb-label_ptr[0], (tcg_target_long)s-code_ptr);
@@ -1279,11 +1279,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
icache usage.  For pre-armv6, use the signed helpers since we do
not have a single insn sign-extend.  */
 if (use_armv6_instructions) {
-func = (uintptr_t)qemu_ld_helpers[opc  3];
+func = (uintptr_t)qemu_ld_helpers[opc  MO_SIZE];
 } else {
 func = (uintptr_t)qemu_ld_helpers[opc];
-if (opc  4) {
-opc = 2;
+if (opc  MO_SIGN) {
+opc = MO_UL;
 }
 }
 tcg_out_call(s, func);
@@ -1291,16 +1291,16 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 data_reg = lb-datalo_reg;
 data_reg2 = lb-datahi_reg;
 switch (opc) {
-case 0 | 4:
+case MO_SB:
 tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
 break;
-case 1 | 4:
+case MO_SW:
 tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0);
 break;
 default:
 tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
 break;
-case 3:
+case MO_Q:
 if (data_reg != TCG_REG_R1) {
 tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
 tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
@@ -1321,6 +1321,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
 TCGReg argreg, data_reg, data_reg2;
+TCGMemOp s_bits = lb-opc  MO_SIZE;
 
 reloc_pc24(lb-label_ptr[0], (tcg_target_long)s-code_ptr);
 
@@ -1334,17 +1335,18 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 
 data_reg = lb-datalo_reg;
 data_reg2 = lb-datahi_reg;
-switch (lb-opc) {
-case 0:
+switch (s_bits) {
+case MO_8:
 argreg = tcg_out_arg_reg8(s, argreg, data_reg);
 break;
-case 1:
+case MO_16:
 argreg = tcg_out_arg_reg16(s, argreg, data_reg);
 break;
-case 2:
+case MO_32:
+default:
 argreg = tcg_out_arg_reg32(s, argreg, data_reg);
 break;
-case 3:
+case MO_64:
 argreg = tcg_out_arg_reg64(s, argreg, data_reg, data_reg2);
 break;
 }
@@ -1353,32 +1355,27 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 argreg = tcg_out_arg_reg32(s, argreg, TCG_REG_R14);
 
 /* Tail-call to the helper, which will return to the fast path.  */
-tcg_out_goto(s, COND_AL, (tcg_target_long) qemu_st_helpers[lb-opc  3]);
+tcg_out_goto(s, COND_AL, (tcg_target_long) qemu_st_helpers[s_bits]);
 }
 #endif /* SOFTMMU */
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc)
 {
 TCGReg addr_reg, data_reg, data_reg2;
-bool bswap;
+TCGMemOp bswap = opc  MO_BSWAP;
+TCGMemOp s_bits = opc  MO_SIZE;
 #ifdef CONFIG_SOFTMMU
-int mem_index, s_bits;
+int mem_index;
 TCGReg addr_reg2, addend;
 uint8_t *label_ptr;
 #endif
-#ifdef TARGET_WORDS_BIGENDIAN
-bswap = 1;
-#else
-bswap = 0;
-#endif
 
 data_reg = *args++;
-data_reg2 = (opc == 3 ? *args++ : 0);
+data_reg2 

[Qemu-devel] [PULL 12/17] tcg-ppc64: Use TCGMemOp within qemu_ldst routines

2013-10-12 Thread Richard Henderson
Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/ppc64/tcg-target.c | 84 +++---
 1 file changed, 45 insertions(+), 39 deletions(-)

diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 12c1f61..499805f 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -809,22 +809,28 @@ static void tcg_out_mem_long(TCGContext *s, int opi, int 
opx, TCGReg rt,
 }
 }
 
-static const uint32_t qemu_ldx_opc[8] = {
-#ifdef TARGET_WORDS_BIGENDIAN
-LBZX, LHZX, LWZX, LDX,
-0,LHAX, LWAX, LDX
-#else
-LBZX, LHBRX, LWBRX, LDBRX,
-0,0, 0, LDBRX,
-#endif
+static const uint32_t qemu_ldx_opc[16] = {
+[MO_UB] = LBZX,
+[MO_UW] = LHZX,
+[MO_UL] = LWZX,
+[MO_Q]  = LDX,
+[MO_SW] = LHAX,
+[MO_SL] = LWAX,
+[MO_BSWAP | MO_UB] = LBZX,
+[MO_BSWAP | MO_UW] = LHBRX,
+[MO_BSWAP | MO_UL] = LWBRX,
+[MO_BSWAP | MO_Q]  = LDBRX,
 };
 
-static const uint32_t qemu_stx_opc[4] = {
-#ifdef TARGET_WORDS_BIGENDIAN
-STBX, STHX, STWX, STDX
-#else
-STBX, STHBRX, STWBRX, STDBRX,
-#endif
+static const uint32_t qemu_stx_opc[16] = {
+[MO_UB] = STBX,
+[MO_UW] = STHX,
+[MO_UL] = STWX,
+[MO_Q]  = STDX,
+[MO_BSWAP | MO_UB] = STBX,
+[MO_BSWAP | MO_UW] = STHBRX,
+[MO_BSWAP | MO_UL] = STWBRX,
+[MO_BSWAP | MO_Q]  = STDBRX,
 };
 
 static const uint32_t qemu_exts_opc[4] = {
@@ -856,7 +862,7 @@ static const void * const qemu_st_helpers[4] = {
in CR7, loads the addend of the TLB into R3, and returns the register
containing the guest address (zero-extended into R4).  Clobbers R0 and R2. 
*/
 
-static TCGReg tcg_out_tlb_read(TCGContext *s, int s_bits, TCGReg addr_reg,
+static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp s_bits, TCGReg addr_reg,
int mem_index, bool is_read)
 {
 int cmp_off
@@ -929,7 +935,7 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, int s_bits, 
TCGReg addr_reg,
 /* Record the context of a call to the out of line helper code for the slow
path for a load or store, so that we can later generate the correct
helper code.  */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld, int opc,
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOp opc,
 int data_reg, int addr_reg, int mem_index,
 uint8_t *raddr, uint8_t *label_ptr)
 {
@@ -946,8 +952,8 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, 
int opc,
 
 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-int opc = lb-opc;
-int s_bits = opc  3;
+TCGMemOp opc = lb-opc  MO_SSIZE;
+TCGMemOp s_bits = lb-opc  MO_SIZE;
 
 reloc_pc14(lb-label_ptr[0], (uintptr_t)s-code_ptr);
 
@@ -962,7 +968,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 
 tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[s_bits], 1);
 
-if (opc  4) {
+if (opc  MO_SIGN) {
 uint32_t insn = qemu_exts_opc[s_bits];
 tcg_out32(s, insn | RA(lb-datalo_reg) | RS(TCG_REG_R3));
 } else {
@@ -974,7 +980,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 
 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-int opc = lb-opc;
+TCGMemOp s_bits = lb-opc  MO_SIZE;
 
 reloc_pc14(lb-label_ptr[0], (uintptr_t)s-code_ptr);
 
@@ -985,20 +991,21 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb-addrlo_reg);
 
 tcg_out_rld(s, RLDICL, TCG_REG_R5, lb-datalo_reg,
-0, 64 - (1  (3 + opc)));
+0, 64 - (1  (3 + s_bits)));
 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb-mem_index);
 tcg_out32(s, MFSPR | RT(TCG_REG_R7) | LR);
 
-tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1);
+tcg_out_call(s, (tcg_target_long)qemu_st_helpers[s_bits], 1);
 
 tcg_out_b(s, 0, (uintptr_t)lb-raddr);
 }
 #endif /* SOFTMMU */
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc)
 {
 TCGReg addr_reg, data_reg, rbase;
-uint32_t insn, s_bits;
+uint32_t insn;
+TCGMemOp s_bits = opc  MO_SIZE;
 #ifdef CONFIG_SOFTMMU
 int mem_index;
 void *label_ptr;
@@ -1006,7 +1013,6 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, int opc)
 
 data_reg = *args++;
 addr_reg = *args++;
-s_bits = opc  3;
 
 #ifdef CONFIG_SOFTMMU
 mem_index = *args;
@@ -1035,7 +1041,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, int opc)
 } else if (insn) {
 tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg));
 } else {
-insn = qemu_ldx_opc[s_bits];
+insn = qemu_ldx_opc[opc  (MO_SIZE | MO_BSWAP)];
 tcg_out32(s, insn | TAB(data_reg, rbase, addr_reg));
 insn = 

[Qemu-devel] [PULL 09/17] tcg-arm: Convert to new ldst opcodes

2013-10-12 Thread Richard Henderson
Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/arm/tcg-target.c | 107 ++-
 tcg/arm/tcg-target.h |   2 +-
 2 files changed, 38 insertions(+), 71 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index e7d6bf0..c3fd2b0 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1367,24 +1367,27 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 }
 #endif /* SOFTMMU */
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 {
-TCGReg addrlo, datalo, datahi;
-TCGMemOp bswap = opc  MO_BSWAP;
-TCGMemOp s_bits = opc  MO_SIZE;
+TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
+TCGMemOp opc, bswap;
 #ifdef CONFIG_SOFTMMU
+TCGMemOp s_bits;
 int mem_index;
-TCGReg addrhi, addend;
+TCGReg addend;
 uint8_t *label_ptr;
 #endif
 
 datalo = *args++;
-datahi = (s_bits == MO_64 ? *args++ : 0);
+datahi = (is64 ? *args++ : 0);
 addrlo = *args++;
-#ifdef CONFIG_SOFTMMU
 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
-mem_index = *args;
+opc = *args++;
+bswap = opc  MO_BSWAP;
 
+#ifdef CONFIG_SOFTMMU
+s_bits = opc  MO_SIZE;
+mem_index = *args;
 addend = tcg_out_tlb_read(s, addrlo, addrhi, s_bits, mem_index, 1);
 
 /* This a conditional BL only to load a pointer within this opcode into LR
@@ -1514,29 +1517,26 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, TCGMemOp opc)
 #endif
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGMemOp opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 {
-TCGReg addrlo, datalo, datahi;
-TCGMemOp bswap = opc  MO_BSWAP;
-TCGMemOp s_bits = opc  MO_SIZE;
+TCGReg addrlo, datalo, datahi, addrhi __attribute__((unused));
+TCGMemOp opc, bswap, s_bits;
 #ifdef CONFIG_SOFTMMU
 int mem_index;
-TCGReg addrhi, addend;
+TCGReg addend;
 uint8_t *label_ptr;
 #endif
-#ifdef TARGET_WORDS_BIGENDIAN
-bswap = 1;
-#else
-bswap = 0;
-#endif
 
 datalo = *args++;
-datahi = (s_bits == MO_64 ? *args++ : 0);
+datahi = (is64 ? *args++ : 0);
 addrlo = *args++;
-#ifdef CONFIG_SOFTMMU
 addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
-mem_index = *args;
+opc = *args++;
+bswap = opc  MO_BSWAP;
+s_bits = opc  MO_SIZE;
 
+#ifdef CONFIG_SOFTMMU
+mem_index = *args;
 addend = tcg_out_tlb_read(s, addrlo, addrhi, s_bits, mem_index, 0);
 
 switch (s_bits) {
@@ -1902,36 +1902,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
 ARITH_MOV, args[0], 0, 0);
 break;
 
-case INDEX_op_qemu_ld8u:
-tcg_out_qemu_ld(s, args, MO_UB);
-break;
-case INDEX_op_qemu_ld8s:
-tcg_out_qemu_ld(s, args, MO_SB);
-break;
-case INDEX_op_qemu_ld16u:
-tcg_out_qemu_ld(s, args, MO_TEUW);
-break;
-case INDEX_op_qemu_ld16s:
-tcg_out_qemu_ld(s, args, MO_TESW);
-break;
-case INDEX_op_qemu_ld32:
-tcg_out_qemu_ld(s, args, MO_TEUL);
-break;
-case INDEX_op_qemu_ld64:
-tcg_out_qemu_ld(s, args, MO_TEQ);
-break;
-
-case INDEX_op_qemu_st8:
-tcg_out_qemu_st(s, args, MO_UB);
+case INDEX_op_qemu_ld_i32:
+tcg_out_qemu_ld(s, args, 0);
 break;
-case INDEX_op_qemu_st16:
-tcg_out_qemu_st(s, args, MO_TEUW);
+case INDEX_op_qemu_ld_i64:
+tcg_out_qemu_ld(s, args, 1);
 break;
-case INDEX_op_qemu_st32:
-tcg_out_qemu_st(s, args, MO_TEUL);
+case INDEX_op_qemu_st_i32:
+tcg_out_qemu_st(s, args, 0);
 break;
-case INDEX_op_qemu_st64:
-tcg_out_qemu_st(s, args, MO_TEQ);
+case INDEX_op_qemu_st_i64:
+tcg_out_qemu_st(s, args, 1);
 break;
 
 case INDEX_op_bswap16_i32:
@@ -2015,29 +1996,15 @@ static const TCGTargetOpDef arm_op_defs[] = {
 { INDEX_op_setcond2_i32, { r, r, r, rIN, rIN } },
 
 #if TARGET_LONG_BITS == 32
-{ INDEX_op_qemu_ld8u, { r, l } },
-{ INDEX_op_qemu_ld8s, { r, l } },
-{ INDEX_op_qemu_ld16u, { r, l } },
-{ INDEX_op_qemu_ld16s, { r, l } },
-{ INDEX_op_qemu_ld32, { r, l } },
-{ INDEX_op_qemu_ld64, { r, r, l } },
-
-{ INDEX_op_qemu_st8, { s, s } },
-{ INDEX_op_qemu_st16, { s, s } },
-{ INDEX_op_qemu_st32, { s, s } },
-{ INDEX_op_qemu_st64, { s, s, s } },
+{ INDEX_op_qemu_ld_i32, { r, l } },
+{ INDEX_op_qemu_ld_i64, { r, r, l } },
+{ INDEX_op_qemu_st_i32, { s, s } },
+{ INDEX_op_qemu_st_i64, { s, s, s } },
 #else
-{ INDEX_op_qemu_ld8u, { r, l, l } },
-{ INDEX_op_qemu_ld8s, { r, l, l } },
-{ INDEX_op_qemu_ld16u, { r, l, l } },
-{ INDEX_op_qemu_ld16s, { r, l, l } },
-{ INDEX_op_qemu_ld32, { r, l, l } },
-{ INDEX_op_qemu_ld64, { r, r, l, l } },
-

[Qemu-devel] [PULL 17/17] target-alpha: Convert to new ldst opcodes

2013-10-12 Thread Richard Henderson
Or, partially.  The fundamental primitives for the port are gen_load_mem
and gen_store_mem, which take a callback to emit the memory operation.
For that, we continue to use the original inline functions that forward
to the new ops, rather than replicate the same thing privately.

That said, all free-standing calls to tcg_gen_qemu_* have been converted.
The 32-bit floating-point references now use _i32 opcodes, eliminating
a truncate or extension.

Signed-off-by: Richard Henderson r...@twiddle.net
---
 target-alpha/translate.c | 49 +++-
 1 file changed, 15 insertions(+), 34 deletions(-)

diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 9cb8084..c24910f 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -168,44 +168,38 @@ static inline ExitStatus gen_invalid(DisasContext *ctx)
 
 static inline void gen_qemu_ldf(TCGv t0, TCGv t1, int flags)
 {
-TCGv tmp = tcg_temp_new();
 TCGv_i32 tmp32 = tcg_temp_new_i32();
-tcg_gen_qemu_ld32u(tmp, t1, flags);
-tcg_gen_trunc_i64_i32(tmp32, tmp);
+tcg_gen_qemu_ld_i32(tmp32, t1, flags, MO_LEUL);
 gen_helper_memory_to_f(t0, tmp32);
 tcg_temp_free_i32(tmp32);
-tcg_temp_free(tmp);
 }
 
 static inline void gen_qemu_ldg(TCGv t0, TCGv t1, int flags)
 {
 TCGv tmp = tcg_temp_new();
-tcg_gen_qemu_ld64(tmp, t1, flags);
+tcg_gen_qemu_ld_i64(tmp, t1, flags, MO_LEQ);
 gen_helper_memory_to_g(t0, tmp);
 tcg_temp_free(tmp);
 }
 
 static inline void gen_qemu_lds(TCGv t0, TCGv t1, int flags)
 {
-TCGv tmp = tcg_temp_new();
 TCGv_i32 tmp32 = tcg_temp_new_i32();
-tcg_gen_qemu_ld32u(tmp, t1, flags);
-tcg_gen_trunc_i64_i32(tmp32, tmp);
+tcg_gen_qemu_ld_i32(tmp32, t1, flags, MO_LEUL);
 gen_helper_memory_to_s(t0, tmp32);
 tcg_temp_free_i32(tmp32);
-tcg_temp_free(tmp);
 }
 
 static inline void gen_qemu_ldl_l(TCGv t0, TCGv t1, int flags)
 {
-tcg_gen_qemu_ld32s(t0, t1, flags);
+tcg_gen_qemu_ld_i64(t0, t1, flags, MO_LESL);
 tcg_gen_mov_i64(cpu_lock_addr, t1);
 tcg_gen_mov_i64(cpu_lock_value, t0);
 }
 
 static inline void gen_qemu_ldq_l(TCGv t0, TCGv t1, int flags)
 {
-tcg_gen_qemu_ld64(t0, t1, flags);
+tcg_gen_qemu_ld_i64(t0, t1, flags, MO_LEQ);
 tcg_gen_mov_i64(cpu_lock_addr, t1);
 tcg_gen_mov_i64(cpu_lock_value, t0);
 }
@@ -247,11 +241,8 @@ static inline void gen_load_mem(DisasContext *ctx,
 static inline void gen_qemu_stf(TCGv t0, TCGv t1, int flags)
 {
 TCGv_i32 tmp32 = tcg_temp_new_i32();
-TCGv tmp = tcg_temp_new();
 gen_helper_f_to_memory(tmp32, t0);
-tcg_gen_extu_i32_i64(tmp, tmp32);
-tcg_gen_qemu_st32(tmp, t1, flags);
-tcg_temp_free(tmp);
+tcg_gen_qemu_st_i32(tmp32, t1, flags, MO_LEUL);
 tcg_temp_free_i32(tmp32);
 }
 
@@ -259,18 +250,15 @@ static inline void gen_qemu_stg(TCGv t0, TCGv t1, int 
flags)
 {
 TCGv tmp = tcg_temp_new();
 gen_helper_g_to_memory(tmp, t0);
-tcg_gen_qemu_st64(tmp, t1, flags);
+tcg_gen_qemu_st_i64(tmp, t1, flags, MO_LEQ);
 tcg_temp_free(tmp);
 }
 
 static inline void gen_qemu_sts(TCGv t0, TCGv t1, int flags)
 {
 TCGv_i32 tmp32 = tcg_temp_new_i32();
-TCGv tmp = tcg_temp_new();
 gen_helper_s_to_memory(tmp32, t0);
-tcg_gen_extu_i32_i64(tmp, tmp32);
-tcg_gen_qemu_st32(tmp, t1, flags);
-tcg_temp_free(tmp);
+tcg_gen_qemu_st_i32(tmp32, t1, flags, MO_LEUL);
 tcg_temp_free_i32(tmp32);
 }
 
@@ -348,18 +336,11 @@ static ExitStatus gen_store_conditional(DisasContext 
*ctx, int ra, int rb,
 tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail);
 
 val = tcg_temp_new();
-if (quad) {
-tcg_gen_qemu_ld64(val, addr, ctx-mem_idx);
-} else {
-tcg_gen_qemu_ld32s(val, addr, ctx-mem_idx);
-}
+tcg_gen_qemu_ld_i64(val, addr, ctx-mem_idx, quad ? MO_LEQ : MO_LESL);
 tcg_gen_brcond_i64(TCG_COND_NE, val, cpu_lock_value, lab_fail);
 
-if (quad) {
-tcg_gen_qemu_st64(cpu_ir[ra], addr, ctx-mem_idx);
-} else {
-tcg_gen_qemu_st32(cpu_ir[ra], addr, ctx-mem_idx);
-}
+tcg_gen_qemu_st_i64(cpu_ir[ra], addr, ctx-mem_idx,
+quad ? MO_LEQ : MO_LEUL);
 tcg_gen_movi_i64(cpu_ir[ra], 1);
 tcg_gen_br(lab_done);
 
@@ -2966,11 +2947,11 @@ static ExitStatus translate_one(DisasContext *ctx, 
uint32_t insn)
 goto invalid_opc;
 case 0xA:
 /* Longword virtual access with protection check (hw_ldl/w) */
-tcg_gen_qemu_ld32s(cpu_ir[ra], addr, MMU_KERNEL_IDX);
+tcg_gen_qemu_ld_i64(cpu_ir[ra], addr, MMU_KERNEL_IDX, MO_LESL);
 break;
 case 0xB:
 /* Quadword virtual access with protection check (hw_ldq/w) */
-tcg_gen_qemu_ld64(cpu_ir[ra], addr, MMU_KERNEL_IDX);
+tcg_gen_qemu_ld_i64(cpu_ir[ra], addr, MMU_KERNEL_IDX, 

[Qemu-devel] [PULL 11/17] tcg-ppc: Use TCGMemOp within qemu_ldst routines

2013-10-12 Thread Richard Henderson
Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/ppc/tcg-target.c | 104 ---
 1 file changed, 48 insertions(+), 56 deletions(-)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 68778c2..c1b0908 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -525,7 +525,7 @@ static void tcg_out_call (TCGContext *s, tcg_target_long 
arg, int const_arg,
 
 static void add_qemu_ldst_label (TCGContext *s,
  int is_ld,
- int opc,
+ TCGMemOp opc,
  int data_reg,
  int data_reg2,
  int addrlo_reg,
@@ -575,7 +575,7 @@ static void *st_trampolines[4];
Clobbers R1 and R2.  */
 
 static void tcg_out_tlb_check(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
-  TCGReg addrlo, TCGReg addrhi, int s_bits,
+  TCGReg addrlo, TCGReg addrhi, TCGMemOp s_bits,
   int mem_index, int is_load, uint8_t **label_ptr)
 {
 int cmp_off =
@@ -647,10 +647,11 @@ static void tcg_out_tlb_check(TCGContext *s, TCGReg r0, 
TCGReg r1, TCGReg r2,
 }
 #endif
 
-static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc)
 {
 TCGReg addrlo, datalo, datahi, rbase;
-int bswap;
+TCGMemOp bswap = opc  MO_BSWAP;
+TCGMemOp s_bits = opc  MO_SIZE;
 #ifdef CONFIG_SOFTMMU
 int mem_index;
 TCGReg addrhi;
@@ -658,7 +659,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg 
*args, int opc)
 #endif
 
 datalo = *args++;
-datahi = (opc == 3 ? *args++ : 0);
+datahi = (s_bits == MO_64 ? *args++ : 0);
 addrlo = *args++;
 
 #ifdef CONFIG_SOFTMMU
@@ -666,31 +667,25 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg 
*args, int opc)
 mem_index = *args;
 
 tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo,
-  addrhi, opc  3, mem_index, 0, label_ptr);
+  addrhi, s_bits, mem_index, 0, label_ptr);
 rbase = TCG_REG_R3;
 #else  /* !CONFIG_SOFTMMU */
 rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
 #endif
 
-#ifdef TARGET_WORDS_BIGENDIAN
-bswap = 0;
-#else
-bswap = 1;
-#endif
-
-switch (opc) {
+switch (opc  MO_SSIZE) {
 default:
-case 0:
+case MO_UB:
 tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo));
 break;
-case 0|4:
+case MO_SB:
 tcg_out32(s, LBZX | TAB(datalo, rbase, addrlo));
 tcg_out32(s, EXTSB | RA(datalo) | RS(datalo));
 break;
-case 1:
+case MO_UW:
 tcg_out32(s, (bswap ? LHBRX : LHZX) | TAB(datalo, rbase, addrlo));
 break;
-case 1|4:
+case MO_SW:
 if (bswap) {
 tcg_out32(s, LHBRX | TAB(datalo, rbase, addrlo));
 tcg_out32(s, EXTSH | RA(datalo) | RS(datalo));
@@ -698,10 +693,10 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg 
*args, int opc)
 tcg_out32(s, LHAX | TAB(datalo, rbase, addrlo));
 }
 break;
-case 2:
+case MO_UL:
 tcg_out32(s, (bswap ? LWBRX : LWZX) | TAB(datalo, rbase, addrlo));
 break;
-case 3:
+case MO_Q:
 if (bswap) {
 tcg_out32(s, ADDI | RT(TCG_REG_R0) | RA(addrlo) | 4);
 tcg_out32(s, LWBRX | TAB(datalo, rbase, addrlo));
@@ -725,10 +720,11 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg 
*args, int opc)
 #endif
 }
 
-static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGMemOp opc)
 {
 TCGReg addrlo, datalo, datahi, rbase;
-int bswap;
+TCGMemOp bswap = opc  MO_BSWAP;
+TCGMemOp s_bits = opc  MO_SIZE;
 #ifdef CONFIG_SOFTMMU
 int mem_index;
 TCGReg addrhi;
@@ -736,7 +732,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg 
*args, int opc)
 #endif
 
 datalo = *args++;
-datahi = (opc == 3 ? *args++ : 0);
+datahi = (s_bits == MO_64 ? *args++ : 0);
 addrlo = *args++;
 
 #ifdef CONFIG_SOFTMMU
@@ -744,28 +740,24 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg 
*args, int opc)
 mem_index = *args;
 
 tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo,
-  addrhi, opc  3, mem_index, 0, label_ptr);
+  addrhi, s_bits, mem_index, 0, label_ptr);
 rbase = TCG_REG_R3;
 #else  /* !CONFIG_SOFTMMU */
 rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
 #endif
 
-#ifdef TARGET_WORDS_BIGENDIAN
-bswap = 0;
-#else
-bswap = 1;
-#endif
-switch (opc) {
-case 0:
+switch (s_bits) {
+case MO_8:
 tcg_out32(s, STBX | SAB(datalo, rbase, addrlo));
 break;
-case 1:
+case MO_16:
 tcg_out32(s, (bswap ? STHBRX : STHX) | 

[Qemu-devel] [PULL 08/17] tcg-arm: Tidy variable naming convention in qemu_ld/st

2013-10-12 Thread Richard Henderson
s/addr_reg2/addrhi/
s/addr_reg/addrlo/
s/data_reg2/datahi/
s/data_reg/datalo/

Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/arm/tcg-target.c | 230 +--
 1 file changed, 115 insertions(+), 115 deletions(-)

diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index 4692859..e7d6bf0 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -186,7 +186,7 @@ static int target_parse_constraint(TCGArgConstraint *ct, 
const char **pct_str)
 #endif
 break;
 
-/* qemu_st address  data_reg */
+/* qemu_st address  data */
 case 's':
 ct-ct |= TCG_CT_REG;
 tcg_regset_set32(ct-u.regs, 0, (1  TCG_TARGET_NB_REGS) - 1);
@@ -1187,13 +1187,13 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
 int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
 
 /* Should generate something like the following:
- *   shrtmp, addr_reg, #TARGET_PAGE_BITS  (1)
+ *   shrtmp, addrlo, #TARGET_PAGE_BITS(1)
  *   addr2, env, #high
  *   andr0, tmp, #(CPU_TLB_SIZE - 1)  (2)
  *   addr2, r2, r0, lsl #CPU_TLB_ENTRY_BITS   (3)
  *   ldrr0, [r2, #cmp](4)
- *   tstaddr_reg, #s_mask
- *   ldrr1, [r2, #add](5)
+ *   tstaddrlo, #s_mask
+ *   ldrr2, [r2, #add](5)
  *   cmpeq  r0, tmp, lsl #TARGET_PAGE_BITS
  */
 tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
@@ -1249,18 +1249,18 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg 
addrlo, TCGReg addrhi,
path for a load or store, so that we can later generate the correct
helper code.  */
 static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
-int data_reg, int data_reg2, int addrlo_reg,
-int addrhi_reg, int mem_index,
+TCGReg datalo, TCGReg datahi, TCGReg addrlo,
+TCGReg addrhi, int mem_index,
 uint8_t *raddr, uint8_t *label_ptr)
 {
 TCGLabelQemuLdst *label = new_ldst_label(s);
 
 label-is_ld = is_ld;
 label-opc = opc;
-label-datalo_reg = data_reg;
-label-datahi_reg = data_reg2;
-label-addrlo_reg = addrlo_reg;
-label-addrhi_reg = addrhi_reg;
+label-datalo_reg = datalo;
+label-datahi_reg = datahi;
+label-addrlo_reg = addrlo;
+label-addrhi_reg = addrhi;
 label-mem_index = mem_index;
 label-raddr = raddr;
 label-label_ptr[0] = label_ptr;
@@ -1268,7 +1268,7 @@ static void add_qemu_ldst_label(TCGContext *s, int is_ld, 
TCGMemOp opc,
 
 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-TCGReg argreg, data_reg, data_reg2;
+TCGReg argreg, datalo, datahi;
 TCGMemOp opc = lb-opc;
 uintptr_t func;
 
@@ -1296,29 +1296,29 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 }
 tcg_out_call(s, func);
 
-data_reg = lb-datalo_reg;
-data_reg2 = lb-datahi_reg;
+datalo = lb-datalo_reg;
+datahi = lb-datahi_reg;
 switch (opc  MO_SSIZE) {
 case MO_SB:
-tcg_out_ext8s(s, COND_AL, data_reg, TCG_REG_R0);
+tcg_out_ext8s(s, COND_AL, datalo, TCG_REG_R0);
 break;
 case MO_SW:
-tcg_out_ext16s(s, COND_AL, data_reg, TCG_REG_R0);
+tcg_out_ext16s(s, COND_AL, datalo, TCG_REG_R0);
 break;
 default:
-tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
+tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
 break;
 case MO_Q:
-if (data_reg != TCG_REG_R1) {
-tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
-tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
-} else if (data_reg2 != TCG_REG_R0) {
-tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
-tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
+if (datalo != TCG_REG_R1) {
+tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
+tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
+} else if (datahi != TCG_REG_R0) {
+tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
+tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_R0);
 } else {
 tcg_out_mov_reg(s, COND_AL, TCG_REG_TMP, TCG_REG_R0);
-tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
-tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_TMP);
+tcg_out_mov_reg(s, COND_AL, datahi, TCG_REG_R1);
+tcg_out_mov_reg(s, COND_AL, datalo, TCG_REG_TMP);
 }
 break;
 }
@@ -1328,7 +1328,7 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 
 static void tcg_out_qemu_st_slow_path(TCGContext 

[Qemu-devel] [PULL 14/17] tcg-ppc64: Convert to le/be ldst helpers

2013-10-12 Thread Richard Henderson
Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/ppc64/tcg-target.c | 38 ++
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 499805f..be29139 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -841,21 +841,27 @@ static const uint32_t qemu_exts_opc[4] = {
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
  * int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_ld_helpers[4] = {
-helper_ret_ldub_mmu,
-helper_ret_lduw_mmu,
-helper_ret_ldul_mmu,
-helper_ret_ldq_mmu,
+static const void * const qemu_ld_helpers[16] = {
+[MO_UB]   = helper_ret_ldub_mmu,
+[MO_LEUW] = helper_le_lduw_mmu,
+[MO_LEUL] = helper_le_ldul_mmu,
+[MO_LEQ]  = helper_le_ldq_mmu,
+[MO_BEUW] = helper_be_lduw_mmu,
+[MO_BEUL] = helper_be_ldul_mmu,
+[MO_BEQ]  = helper_be_ldq_mmu,
 };
 
 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
  * uintxx_t val, int mmu_idx, uintptr_t ra)
  */
-static const void * const qemu_st_helpers[4] = {
-helper_ret_stb_mmu,
-helper_ret_stw_mmu,
-helper_ret_stl_mmu,
-helper_ret_stq_mmu,
+static const void * const qemu_st_helpers[16] = {
+[MO_UB]   = helper_ret_stb_mmu,
+[MO_LEUW] = helper_le_stw_mmu,
+[MO_LEUL] = helper_le_stl_mmu,
+[MO_LEQ]  = helper_le_stq_mmu,
+[MO_BEUW] = helper_be_stw_mmu,
+[MO_BEUL] = helper_be_stl_mmu,
+[MO_BEQ]  = helper_be_stq_mmu,
 };
 
 /* Perform the TLB load and compare.  Places the result of the comparison
@@ -952,8 +958,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, 
TCGMemOp opc,
 
 static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-TCGMemOp opc = lb-opc  MO_SSIZE;
-TCGMemOp s_bits = lb-opc  MO_SIZE;
+TCGMemOp opc = lb-opc;
 
 reloc_pc14(lb-label_ptr[0], (uintptr_t)s-code_ptr);
 
@@ -966,10 +971,10 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb-mem_index);
 tcg_out32(s, MFSPR | RT(TCG_REG_R6) | LR);
 
-tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[s_bits], 1);
+tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[opc  ~MO_SIGN], 1);
 
 if (opc  MO_SIGN) {
-uint32_t insn = qemu_exts_opc[s_bits];
+uint32_t insn = qemu_exts_opc[opc  MO_SIZE];
 tcg_out32(s, insn | RA(lb-datalo_reg) | RS(TCG_REG_R3));
 } else {
 tcg_out_mov(s, TCG_TYPE_I64, lb-datalo_reg, TCG_REG_R3);
@@ -980,7 +985,8 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 
 static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
 {
-TCGMemOp s_bits = lb-opc  MO_SIZE;
+TCGMemOp opc = lb-opc;
+TCGMemOp s_bits = opc  MO_SIZE;
 
 reloc_pc14(lb-label_ptr[0], (uintptr_t)s-code_ptr);
 
@@ -995,7 +1001,7 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb-mem_index);
 tcg_out32(s, MFSPR | RT(TCG_REG_R7) | LR);
 
-tcg_out_call(s, (tcg_target_long)qemu_st_helpers[s_bits], 1);
+tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1);
 
 tcg_out_b(s, 0, (uintptr_t)lb-raddr);
 }
-- 
1.8.1.4




[Qemu-devel] [PULL 15/17] tcg-ppc: Support new ldst opcodes

2013-10-12 Thread Richard Henderson
Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/ppc/tcg-target.c | 97 +---
 tcg/ppc/tcg-target.h |  2 +-
 2 files changed, 33 insertions(+), 66 deletions(-)

diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index dadc108..dc2c2df 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -653,27 +653,26 @@ static void tcg_out_tlb_check(TCGContext *s, TCGReg r0, 
TCGReg r1, TCGReg r2,
 }
 #endif
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
 {
-TCGReg addrlo, datalo, datahi, rbase;
-TCGMemOp bswap = opc  MO_BSWAP;
-TCGMemOp s_bits = opc  MO_SIZE;
+TCGReg addrlo, datalo, datahi, rbase, addrhi __attribute__((unused));
+TCGMemOp opc, bswap;
 #ifdef CONFIG_SOFTMMU
 int mem_index;
-TCGReg addrhi;
 uint8_t *label_ptr;
 #endif
 
 datalo = *args++;
-datahi = (s_bits == MO_64 ? *args++ : 0);
+datahi = (is64 ? *args++ : 0);
 addrlo = *args++;
+addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+opc = *args++;
+bswap = opc  MO_BSWAP;
 
 #ifdef CONFIG_SOFTMMU
-addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
 mem_index = *args;
-
 tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo,
-  addrhi, s_bits, mem_index, 0, label_ptr);
+  addrhi, opc  MO_SIZE, mem_index, 0, label_ptr);
 rbase = TCG_REG_R3;
 #else  /* !CONFIG_SOFTMMU */
 rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
@@ -726,25 +725,25 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, TCGMemOp opc)
 #endif
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGMemOp opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
 {
-TCGReg addrlo, datalo, datahi, rbase;
-TCGMemOp bswap = opc  MO_BSWAP;
-TCGMemOp s_bits = opc  MO_SIZE;
+TCGReg addrlo, datalo, datahi, rbase, addrhi __attribute__((unused));
+TCGMemOp opc, bswap, s_bits;
 #ifdef CONFIG_SOFTMMU
 int mem_index;
-TCGReg addrhi;
 uint8_t *label_ptr;
 #endif
 
 datalo = *args++;
-datahi = (s_bits == MO_64 ? *args++ : 0);
+datahi = (is64 ? *args++ : 0);
 addrlo = *args++;
+addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
+opc = *args++;
+bswap = opc  MO_BSWAP;
+s_bits = opc  MO_SIZE;
 
 #ifdef CONFIG_SOFTMMU
-addrhi = (TARGET_LONG_BITS == 64 ? *args++ : 0);
 mem_index = *args;
-
 tcg_out_tlb_check(s, TCG_REG_R3, TCG_REG_R4, TCG_REG_R0, addrlo,
   addrhi, s_bits, mem_index, 0, label_ptr);
 rbase = TCG_REG_R3;
@@ -1707,35 +1706,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
const TCGArg *args,
 tcg_out32 (s, NOR | SAB (args[1], args[0], args[1]));
 break;
 
-case INDEX_op_qemu_ld8u:
-tcg_out_qemu_ld(s, args, MO_UB);
-break;
-case INDEX_op_qemu_ld8s:
-tcg_out_qemu_ld(s, args, MO_SB);
-break;
-case INDEX_op_qemu_ld16u:
-tcg_out_qemu_ld(s, args, MO_TEUW);
-break;
-case INDEX_op_qemu_ld16s:
-tcg_out_qemu_ld(s, args, MO_TESW);
-break;
-case INDEX_op_qemu_ld32:
-tcg_out_qemu_ld(s, args, MO_TEUL);
-break;
-case INDEX_op_qemu_ld64:
-tcg_out_qemu_ld(s, args, MO_TEQ);
-break;
-case INDEX_op_qemu_st8:
-tcg_out_qemu_st(s, args, MO_UB);
+case INDEX_op_qemu_ld_i32:
+tcg_out_qemu_ld(s, args, 0);
 break;
-case INDEX_op_qemu_st16:
-tcg_out_qemu_st(s, args, MO_TEUW);
+case INDEX_op_qemu_ld_i64:
+tcg_out_qemu_ld(s, args, 1);
 break;
-case INDEX_op_qemu_st32:
-tcg_out_qemu_st(s, args, MO_TEUL);
+case INDEX_op_qemu_st_i32:
+tcg_out_qemu_st(s, args, 0);
 break;
-case INDEX_op_qemu_st64:
-tcg_out_qemu_st(s, args, MO_TEQ);
+case INDEX_op_qemu_st_i64:
+tcg_out_qemu_st(s, args, 1);
 break;
 
 case INDEX_op_ext8s_i32:
@@ -1920,29 +1901,15 @@ static const TCGTargetOpDef ppc_op_defs[] = {
 { INDEX_op_bswap32_i32, { r, r } },
 
 #if TARGET_LONG_BITS == 32
-{ INDEX_op_qemu_ld8u, { r, L } },
-{ INDEX_op_qemu_ld8s, { r, L } },
-{ INDEX_op_qemu_ld16u, { r, L } },
-{ INDEX_op_qemu_ld16s, { r, L } },
-{ INDEX_op_qemu_ld32, { r, L } },
-{ INDEX_op_qemu_ld64, { L, L, L } },
-
-{ INDEX_op_qemu_st8, { K, K } },
-{ INDEX_op_qemu_st16, { K, K } },
-{ INDEX_op_qemu_st32, { K, K } },
-{ INDEX_op_qemu_st64, { M, M, M } },
+{ INDEX_op_qemu_ld_i32, { r, L } },
+{ INDEX_op_qemu_ld_i64, { L, L, L } },
+{ INDEX_op_qemu_st_i32, { K, K } },
+{ INDEX_op_qemu_st_i64, { M, M, M } },
 #else
-{ INDEX_op_qemu_ld8u, { r, L, L } },
-{ INDEX_op_qemu_ld8s, { r, L, L } },
-{ INDEX_op_qemu_ld16u, { r, L, L } },
-{ INDEX_op_qemu_ld16s, { r, L, L } },
-{ 

[Qemu-devel] [PULL 16/17] tcg-ppc64: Support new ldst opcodes

2013-10-12 Thread Richard Henderson
Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/ppc64/tcg-target.c | 77 +++---
 tcg/ppc64/tcg-target.h |  2 +-
 2 files changed, 17 insertions(+), 62 deletions(-)

diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index be29139..6109d86 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -1007,22 +1007,17 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, 
TCGLabelQemuLdst *lb)
 }
 #endif /* SOFTMMU */
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, TCGMemOp opc)
+static void tcg_out_qemu_ld(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
+TCGMemOp opc, int mem_index)
 {
-TCGReg addr_reg, data_reg, rbase;
+TCGReg rbase;
 uint32_t insn;
 TCGMemOp s_bits = opc  MO_SIZE;
 #ifdef CONFIG_SOFTMMU
-int mem_index;
 void *label_ptr;
 #endif
 
-data_reg = *args++;
-addr_reg = *args++;
-
 #ifdef CONFIG_SOFTMMU
-mem_index = *args;
-
 addr_reg = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true);
 
 /* Load a pointer into the current opcode w/conditional branch-link. */
@@ -1059,21 +1054,16 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args, TCGMemOp opc)
 #endif
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, TCGMemOp opc)
+static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
+TCGMemOp opc, int mem_index)
 {
-TCGReg addr_reg, rbase, data_reg;
+TCGReg rbase;
 uint32_t insn;
 #ifdef CONFIG_SOFTMMU
-int mem_index;
 void *label_ptr;
 #endif
 
-data_reg = *args++;
-addr_reg = *args++;
-
 #ifdef CONFIG_SOFTMMU
-mem_index = *args;
-
 addr_reg = tcg_out_tlb_read(s, opc  MO_SIZE, addr_reg, mem_index, false);
 
 /* Load a pointer into the current opcode w/conditional branch-link. */
@@ -1838,39 +1828,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
const TCGArg *args,
 tcg_out32(s, DIVDU | TAB(args[0], args[1], args[2]));
 break;
 
-case INDEX_op_qemu_ld8u:
-tcg_out_qemu_ld(s, args, MO_UB);
-break;
-case INDEX_op_qemu_ld8s:
-tcg_out_qemu_ld(s, args, MO_SB);
-break;
-case INDEX_op_qemu_ld16u:
-tcg_out_qemu_ld(s, args, MO_TEUW);
-break;
-case INDEX_op_qemu_ld16s:
-tcg_out_qemu_ld(s, args, MO_TESW);
-break;
-case INDEX_op_qemu_ld32:
-case INDEX_op_qemu_ld32u:
-tcg_out_qemu_ld(s, args, MO_TEUL);
-break;
-case INDEX_op_qemu_ld32s:
-tcg_out_qemu_ld(s, args, MO_TESL);
-break;
-case INDEX_op_qemu_ld64:
-tcg_out_qemu_ld(s, args, MO_TEQ);
-break;
-case INDEX_op_qemu_st8:
-tcg_out_qemu_st(s, args, MO_UB);
-break;
-case INDEX_op_qemu_st16:
-tcg_out_qemu_st(s, args, MO_TEUW);
-break;
-case INDEX_op_qemu_st32:
-tcg_out_qemu_st(s, args, MO_TEUL);
+case INDEX_op_qemu_ld_i32:
+case INDEX_op_qemu_ld_i64:
+tcg_out_qemu_ld(s, args[0], args[1], args[2], args[3]);
 break;
-case INDEX_op_qemu_st64:
-tcg_out_qemu_st(s, args, MO_TEQ);
+case INDEX_op_qemu_st_i32:
+case INDEX_op_qemu_st_i64:
+tcg_out_qemu_st(s, args[0], args[1], args[2], args[3]);
 break;
 
 case INDEX_op_ext8s_i32:
@@ -2133,19 +2097,10 @@ static const TCGTargetOpDef ppc_op_defs[] = {
 { INDEX_op_neg_i64, { r, r } },
 { INDEX_op_not_i64, { r, r } },
 
-{ INDEX_op_qemu_ld8u, { r, L } },
-{ INDEX_op_qemu_ld8s, { r, L } },
-{ INDEX_op_qemu_ld16u, { r, L } },
-{ INDEX_op_qemu_ld16s, { r, L } },
-{ INDEX_op_qemu_ld32, { r, L } },
-{ INDEX_op_qemu_ld32u, { r, L } },
-{ INDEX_op_qemu_ld32s, { r, L } },
-{ INDEX_op_qemu_ld64, { r, L } },
-
-{ INDEX_op_qemu_st8, { S, S } },
-{ INDEX_op_qemu_st16, { S, S } },
-{ INDEX_op_qemu_st32, { S, S } },
-{ INDEX_op_qemu_st64, { S, S } },
+{ INDEX_op_qemu_ld_i32, { r, L } },
+{ INDEX_op_qemu_ld_i64, { r, L } },
+{ INDEX_op_qemu_st_i32, { S, S } },
+{ INDEX_op_qemu_st_i64, { S, S } },
 
 { INDEX_op_ext8s_i32, { r, r } },
 { INDEX_op_ext16s_i32, { r, r } },
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 457ea69..7ee50b6 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -123,7 +123,7 @@ typedef enum {
 #define TCG_TARGET_HAS_muluh_i641
 #define TCG_TARGET_HAS_mulsh_i641
 
-#define TCG_TARGET_HAS_new_ldst 0
+#define TCG_TARGET_HAS_new_ldst 1
 
 #define TCG_AREG0 TCG_REG_R27
 
-- 
1.8.1.4




[Qemu-devel] [PULL 03/17] tcg-i386: Tidy softmmu routines

2013-10-12 Thread Richard Henderson
Pass two TCGReg to tcg_out_tlb_load, rather than idx+args.

Move ldst_optimization routines just below tcg_out_tlb_load to avoid
the need for forward declarations.

Use TCGReg enum in preference to int where apprpriate.

Signed-off-by: Richard Henderson r...@twiddle.net
---
 tcg/i386/tcg-target.c | 457 +++---
 1 file changed, 208 insertions(+), 249 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 1b86009..a7ff8a3 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1043,22 +1043,10 @@ static const void * const qemu_st_helpers[4] = {
 helper_ret_stq_mmu,
 };
 
-static void add_qemu_ldst_label(TCGContext *s,
-int is_ld,
-int opc,
-int data_reg,
-int data_reg2,
-int addrlo_reg,
-int addrhi_reg,
-int mem_index,
-uint8_t *raddr,
-uint8_t **label_ptr);
-
 /* Perform the TLB load and compare.
 
Inputs:
-   ADDRLO_IDX contains the index into ARGS of the low part of the
-   address; the high part of the address is at ADDR_LOW_IDX+1.
+   ADDRLO and ADDRHI contain the low and high part of the address.
 
MEM_INDEX and S_BITS are the memory context and log2 size of the load.
 
@@ -1076,14 +1064,12 @@ static void add_qemu_ldst_label(TCGContext *s,
 
First argument register is clobbered.  */
 
-static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
+static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg 
addrhi,
 int mem_index, TCGMemOp s_bits,
-const TCGArg *args,
 uint8_t **label_ptr, int which)
 {
-const int addrlo = args[addrlo_idx];
-const int r0 = TCG_REG_L0;
-const int r1 = TCG_REG_L1;
+const TCGReg r0 = TCG_REG_L0;
+const TCGReg r1 = TCG_REG_L1;
 TCGType ttype = TCG_TYPE_I32;
 TCGType htype = TCG_TYPE_I32;
 int trexw = 0, hrexw = 0;
@@ -1132,7 +1118,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, int 
addrlo_idx,
 
 if (TARGET_LONG_BITS  TCG_TARGET_REG_BITS) {
 /* cmp 4(r0), addrhi */
-tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4);
+tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, r0, 4);
 
 /* jne slow_path */
 tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
@@ -1146,6 +1132,183 @@ static inline void tcg_out_tlb_load(TCGContext *s, int 
addrlo_idx,
 tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
  offsetof(CPUTLBEntry, addend) - which);
 }
+
+/*
+ * Record the context of a call to the out of line helper code for the slow 
path
+ * for a load or store, so that we can later generate the correct helper code
+ */
+static void add_qemu_ldst_label(TCGContext *s, int is_ld, TCGMemOp opc,
+TCGReg datalo, TCGReg datahi,
+TCGReg addrlo, TCGReg addrhi,
+int mem_index, uint8_t *raddr,
+uint8_t **label_ptr)
+{
+TCGLabelQemuLdst *label = new_ldst_label(s);
+
+label-is_ld = is_ld;
+label-opc = opc;
+label-datalo_reg = datalo;
+label-datahi_reg = datahi;
+label-addrlo_reg = addrlo;
+label-addrhi_reg = addrhi;
+label-mem_index = mem_index;
+label-raddr = raddr;
+label-label_ptr[0] = label_ptr[0];
+if (TARGET_LONG_BITS  TCG_TARGET_REG_BITS) {
+label-label_ptr[1] = label_ptr[1];
+}
+}
+
+/*
+ * Generate code for the slow path for a load at the end of block
+ */
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
+{
+TCGMemOp opc = l-opc;
+TCGMemOp s_bits = opc  MO_SIZE;
+TCGReg data_reg;
+uint8_t **label_ptr = l-label_ptr[0];
+
+/* resolve label address */
+*(uint32_t *)label_ptr[0] = (uint32_t)(s-code_ptr - label_ptr[0] - 4);
+if (TARGET_LONG_BITS  TCG_TARGET_REG_BITS) {
+*(uint32_t *)label_ptr[1] = (uint32_t)(s-code_ptr - label_ptr[1] - 4);
+}
+
+if (TCG_TARGET_REG_BITS == 32) {
+int ofs = 0;
+
+tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, ofs);
+ofs += 4;
+
+tcg_out_st(s, TCG_TYPE_I32, l-addrlo_reg, TCG_REG_ESP, ofs);
+ofs += 4;
+
+if (TARGET_LONG_BITS == 64) {
+tcg_out_st(s, TCG_TYPE_I32, l-addrhi_reg, TCG_REG_ESP, ofs);
+ofs += 4;
+}
+
+tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l-mem_index);
+ofs += 4;
+
+tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l-raddr);
+} else {
+tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+/* The second argument is already