[Qemu-devel] [PATCH 05/21] tcg-i386: Tidy bswap operations.

Richard Henderson Wed, 14 Apr 2010 14:07:01 -0700

Define OPC_BSWAP.  Factor opcode emission to separate functions.
Use bswap+shift to implement 16-bit swap instead of a rolw; this
gets the proper zero-extension required by INDEX_op_bswap16_i32.


Signed-off-by: Richard Henderson <r...@twiddle.net>
---
 tcg/i386/tcg-target.c |   53 +++++++++++++++++++++++++------------------------
 1 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 75b9915..0bafd00 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -163,6 +163,7 @@ static inline int tcg_target_const_match(tcg_target_long 
val,
 
 #define P_EXT   0x100 /* 0x0f opcode prefix */
 
+#define OPC_BSWAP      (0xc8 | P_EXT)
 #define OPC_MOVZBL     (0xb6 | P_EXT)
 #define OPC_MOVZWL     (0xb7 | P_EXT)
 #define OPC_MOVSBL     (0xbe | P_EXT)
@@ -339,6 +340,22 @@ static inline void tcg_out_ext16s(TCGContext *s, int dest, 
int src)
     tcg_out_modrm(s, OPC_MOVSWL, dest, src);
 }
 
+static inline void tcg_out_bswap32(TCGContext *s, int reg)
+{
+    tcg_out_opc(s, OPC_BSWAP + reg);
+}
+
+static inline void tcg_out_bswap16(TCGContext *s, int reg, int sign)
+{
+    /* This swap+shift combination guarantees that the high part contains
+       the sign or zero extension required.  It also doesn't suffer the
+       problem of partial register stalls that using rolw does.  */
+    tcg_out_bswap32(s, reg);
+    /* shr $16, dest */
+    tcg_out_modrm(s, 0xc1, (sign ? SHIFT_SAR : SHIFT_SHR), reg);
+    tcg_out8(s, 16);
+}
+
 static inline void tgen_arithi(TCGContext *s, int c, int r0, int32_t val, int 
cf)
 {
     if (!cf && ((c == ARITH_ADD && val == 1) || (c == ARITH_SUB && val == 
-1))) {
@@ -745,31 +762,21 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args,
         /* movzwl */
         tcg_out_modrm_offset(s, OPC_MOVZWL, data_reg, r0, GUEST_BASE);
         if (bswap) {
-            /* rolw $8, data_reg */
-            tcg_out8(s, 0x66); 
-            tcg_out_modrm(s, 0xc1, 0, data_reg);
-            tcg_out8(s, 8);
+            tcg_out_bswap16(s, data_reg, 0);
         }
         break;
     case 1 | 4:
         /* movswl */
         tcg_out_modrm_offset(s, OPC_MOVSWL, data_reg, r0, GUEST_BASE);
         if (bswap) {
-            /* rolw $8, data_reg */
-            tcg_out8(s, 0x66); 
-            tcg_out_modrm(s, 0xc1, 0, data_reg);
-            tcg_out8(s, 8);
-
-            /* movswl data_reg, data_reg */
-            tcg_out_modrm(s, OPC_MOVSWL, data_reg, data_reg);
+            tcg_out_bswap16(s, data_reg, 1);
         }
         break;
     case 2:
         /* movl (r0), data_reg */
         tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE);
         if (bswap) {
-            /* bswap */
-            tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
+            tcg_out_bswap32(s, data_reg);
         }
         break;
     case 3:
@@ -786,11 +793,10 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg 
*args,
             tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE + 4);
         } else {
             tcg_out_modrm_offset(s, 0x8b, data_reg, r0, GUEST_BASE + 4);
-            tcg_out_opc(s, (0xc8 + data_reg) | P_EXT);
+            tcg_out_bswap32(s, data_reg);
 
             tcg_out_modrm_offset(s, 0x8b, data_reg2, r0, GUEST_BASE);
-            /* bswap */
-            tcg_out_opc(s, (0xc8 + data_reg2) | P_EXT);
+            tcg_out_bswap32(s, data_reg2);
         }
         break;
     default:
@@ -982,8 +988,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args,
     case 2:
         if (bswap) {
             tcg_out_mov(s, r1, data_reg);
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
+            tcg_out_bswap32(s, r1);
             data_reg = r1;
         }
         /* movl */
@@ -992,12 +997,10 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args,
     case 3:
         if (bswap) {
             tcg_out_mov(s, r1, data_reg2);
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
+            tcg_out_bswap32(s, r1);
             tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE);
             tcg_out_mov(s, r1, data_reg);
-            /* bswap data_reg */
-            tcg_out_opc(s, (0xc8 + r1) | P_EXT);
+            tcg_out_bswap32(s, r1);
             tcg_out_modrm_offset(s, 0x89, r1, r0, GUEST_BASE + 4);
         } else {
             tcg_out_modrm_offset(s, 0x89, data_reg, r0, GUEST_BASE);
@@ -1195,12 +1198,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode 
opc,
         break;
 
     case INDEX_op_bswap16_i32:
-        tcg_out8(s, 0x66);
-        tcg_out_modrm(s, 0xc1, SHIFT_ROL, args[0]);
-        tcg_out8(s, 8);
+        tcg_out_bswap16(s, args[0], 0);
         break;
     case INDEX_op_bswap32_i32:
-        tcg_out_opc(s, (0xc8 + args[0]) | P_EXT);
+        tcg_out_bswap32(s, args[0]);
         break;
 
     case INDEX_op_neg_i32:
-- 
1.6.2.5

[Qemu-devel] [PATCH 05/21] tcg-i386: Tidy bswap operations.

Reply via email to