Use extract2 to implement rotri. To make this easier,
redefine rotli in terms of rotri, rather than the reverse.
Signed-off-by: Richard Henderson <[email protected]>
---
tcg/tcg-op.c | 70 ++++++++++++++++++++++++++++++----------------------
1 file changed, 41 insertions(+), 29 deletions(-)
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 4caf77da1e..3e10a3ad16 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -834,23 +834,12 @@ void tcg_gen_rotl_i32(TCGv_i32 ret, TCGv_i32 arg1,
TCGv_i32 arg2)
void tcg_gen_rotli_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
{
tcg_debug_assert(arg2 >= 0 && arg2 < 32);
- /* some cases can be optimized here */
if (arg2 == 0) {
tcg_gen_mov_i32(ret, arg1);
} else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) {
- TCGv_i32 t0 = tcg_constant_i32(arg2);
- tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, t0);
- } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I32, 0)) {
- TCGv_i32 t0 = tcg_constant_i32(32 - arg2);
- tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, t0);
+ tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, tcg_constant_i32(arg2));
} else {
- TCGv_i32 t0 = tcg_temp_ebb_new_i32();
- TCGv_i32 t1 = tcg_temp_ebb_new_i32();
- tcg_gen_shli_i32(t0, arg1, arg2);
- tcg_gen_shri_i32(t1, arg1, 32 - arg2);
- tcg_gen_or_i32(ret, t0, t1);
- tcg_temp_free_i32(t0);
- tcg_temp_free_i32(t1);
+ tcg_gen_rotri_i32(ret, arg1, -arg2 & 31);
}
}
@@ -878,7 +867,16 @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1,
TCGv_i32 arg2)
void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
{
tcg_debug_assert(arg2 >= 0 && arg2 < 32);
- tcg_gen_rotli_i32(ret, arg1, -arg2 & 31);
+ if (arg2 == 0) {
+ tcg_gen_mov_i32(ret, arg1);
+ } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I64, 0)) {
+ tcg_gen_op3_i32(INDEX_op_rotr, ret, arg1, tcg_constant_i32(arg2));
+ } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) {
+ tcg_gen_op3_i32(INDEX_op_rotl, ret, arg1, tcg_constant_i32(32 - arg2));
+ } else {
+ /* Do not recurse with the rotri simplification. */
+ tcg_gen_op4i_i32(INDEX_op_extract2, ret, arg1, arg1, arg2);
+ }
}
void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
@@ -2417,23 +2415,12 @@ void tcg_gen_rotl_i64(TCGv_i64 ret, TCGv_i64 arg1,
TCGv_i64 arg2)
void tcg_gen_rotli_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
{
tcg_debug_assert(arg2 >= 0 && arg2 < 64);
- /* some cases can be optimized here */
if (arg2 == 0) {
tcg_gen_mov_i64(ret, arg1);
- } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I64, 0)) {
- TCGv_i64 t0 = tcg_constant_i64(arg2);
- tcg_gen_op3_i64(INDEX_op_rotl, ret, arg1, t0);
- } else if (tcg_op_supported(INDEX_op_rotr, TCG_TYPE_I64, 0)) {
- TCGv_i64 t0 = tcg_constant_i64(64 - arg2);
- tcg_gen_op3_i64(INDEX_op_rotr, ret, arg1, t0);
+ } else if (tcg_op_supported(INDEX_op_rotl, TCG_TYPE_I32, 0)) {