Handle small positive and negative numbers early. Check for logical immediates. Check if using MOVN for the first set helps.
Signed-off-by: Richard Henderson <r...@twiddle.net> --- tcg/aarch64/tcg-target.c | 85 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 19 deletions(-) diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 920c63c..02ab278 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -511,32 +511,79 @@ static inline void tcg_out_movr(TCGContext *s, AArch64Ext ext, tcg_out_aimm(s, INSN_ADDI, ext, dest, src, 0); } +static inline void tcg_out_movwi(TCGContext *s, AArch64Insn insn, + AArch64Ext ext, TCGReg rd, + uint16_t value, int shift) +{ + tcg_out32(s, insn | ext | shift << 17 | value << 5 | rd); +} + static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd, tcg_target_long value) { - AArch64Insn insn = INSN_MOVZ; + tcg_target_long valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull); + AArch64Insn insn; + AArch64Ext ext; + int i, wantinv, shift; + + value &= valid; + + /* Check small positive values. */ + if ((value & ~0xffff) == 0) { + tcg_out_movwi(s, INSN_MOVZ, E32, rd, value, 0); + return; + } + + /* Check small negative values. */ + if ((~value & valid & ~0xffff) == 0) { + tcg_out_movwi(s, INSN_MOVN, EXT(type == TCG_TYPE_I64), rd, ~value, 0); + return; + } + + /* Check for bitfield immediates. */ + if ((value & ~0xffffffffull) == 0) { + i = find_bitmask32(value); + ext = E32; + } else { + i = find_bitmask64(value); + ext = E64; + } + if (i >= 0) { + tcg_out32(s, INSN_ORRI | TCG_REG_XZR << 5 | ext + | bitmask_enc[i] << 10 | rd); + return; + } - if (type == TCG_TYPE_I32) { - value = (uint32_t)value; + /* Would it take fewer insns to load the inverse? */ + wantinv = 0; + for (i = 0; i < 64; i += 16) { + if (((value >> i) & 0xffff) == 0) { + wantinv--; + } + if (((~value >> i) & 0xffff) == 0) { + wantinv++; + } } - /* Construct halfwords of the immediate with MOVZ/MOVK with LSL. - Count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the - first MOVZ with the half-word immediate skipping the zeros, with - a shift (LSL) equal to this number. Then all other insns are MOVKs. - Zero the processed half-word in the value, continue until empty. - We build the final result 16bits at a time with up to 4 instructions, - but do not emit instructions for 16bit zero holes. */ - do { - unsigned shift = ctz64(value) & (63 & -16); - unsigned half = (value >> shift) & 0xffff; - AArch64Ext ext = EXT(shift >= 32); - - tcg_out32(s, insn | ext | shift << 17 | half << 5 | rd); - - insn = INSN_MOVK; + if (wantinv > 0) { + value = ~value; + insn = INSN_MOVN; + valid = -1; + } else { + insn = INSN_MOVZ; + valid = 0; + } + + /* Perform the first round specially, to handle the inverse. */ + shift = ctz64(value) & (63 & -16); + tcg_out_movwi(s, insn, ext, rd, value >> shift, shift); + value &= ~(0xffffUL << shift); + + while (value) { + shift = ctz64(value) & (63 & -16); + tcg_out_movwi(s, INSN_MOVK, ext, rd, (value ^ valid) >> shift, shift); value &= ~(0xffffUL << shift); - } while (value); + } } static inline void tcg_out_ldst_r(TCGContext *s, -- 1.8.3.1