From: Christophe Lyon <christophe.l...@st.com> For variants with rounding, fix cases where adding the rounding constant could overflow.
For VSHLL, fix bit mask. Signed-off-by: Christophe Lyon <christophe.l...@st.com> --- target-arm/neon_helper.c | 61 ++++++++++++++++++++++++++++++++++++++------- target-arm/translate.c | 12 +++++++- 2 files changed, 61 insertions(+), 12 deletions(-) diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index fead152..6c832b4 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -451,6 +451,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) return val; } +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator, which is really needed only when + * dealing with 32 bits input values. */ #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -459,11 +462,12 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ dest = src1 >> (sizeof(src1) * 8 - 1); \ } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (tmp - 1); \ + dest = src1 >> (-tmp - 1); \ dest++; \ dest >>= 1; \ } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \ + dest = big_dest >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0) @@ -472,6 +476,8 @@ NEON_VOP(rshl_s16, neon_s16, 2) NEON_VOP(rshl_s32, neon_s32, 1) #undef NEON_FN +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) { int8_t shift = (int8_t)shiftop; @@ -480,18 +486,37 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) val = 0; } else if (shift < -64) { val >>= 63; - } else if (shift == -63) { + } else if (shift == -64) { val >>= 63; val++; val >>= 1; } else if (shift < 0) { - val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; + int64_t round = (int64_t)1 << (-1 - shift); + /* Reduce the range as long as the addition overflows. It's + * sufficient to check if (val+round) is < 0 and val > 0 + * because round is > 0. */ + while ((val > 0) && ((val + round) < 0) && round > 1) { + shift++; + round >>= 1; + val >>= 1; + } + if ((val > 0) && (val + round) < 0) { + /* If addition still overflows at this point, it means + * that round==1, thus shift==-1, and also that + * val==0x7FFFFFFFFFFFFFFF. */ + val = 0x4000000000000000LL; + } else { + val = (val + round) >> -shift; + } } else { val <<= shift; } return val; } +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator, which is really needed only when + * dealing with 32 bits input values. */ #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -499,9 +524,10 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) tmp < -(ssize_t)sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (tmp - 1); \ + dest = src1 >> (-tmp - 1); \ } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \ + dest = big_dest >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0) @@ -513,14 +539,29 @@ NEON_VOP(rshl_u32, neon_u32, 1) uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) { int8_t shift = (uint8_t)shiftop; - if (shift >= 64 || shift < 64) { + if (shift >= 64 || shift < -64) { val = 0; } else if (shift == -64) { /* Rounding a 1-bit result just preserves that bit. */ val >>= 63; - } if (shift < 0) { - val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; - val >>= -shift; + } else if (shift < 0) { + uint64_t round = (uint64_t)1 << (-1 - shift); + /* Reduce the range as long as the addition overflows. It's + * sufficient to check if (val+round) is < val + * because val and round are > 0. */ + while (((val + round) < val) && round > 1) { + shift++; + round >>= 1; + val >>= 1; + } + if ((val + round) < val) { + /* If addition still overflows at this point, it means + * that round==1, thus shift==-1, and also that + * val==0x&FFFFFFFFFFFFFFF. */ + val = 0x8000000000000000LL; + } else { + val = (val + round) >> -shift; + } } else { val <<= shift; } diff --git a/target-arm/translate.c b/target-arm/translate.c index d95133f..b44f7a1 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -4877,10 +4877,18 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) if (size == 0) { imm = (0xffu >> (8 - shift)); imm |= imm << 16; - } else { + } else if (size == 1) { imm = 0xffff >> (16 - shift); + } else { + /* size == 2 */ + imm = 0xffffffff >> (32 - shift); + } + if (size < 2) { + imm64 = imm | (((uint64_t)imm) << 32); + } else { + imm64 = imm; } - imm64 = imm | (((uint64_t)imm) << 32); + imm64 = ~imm64; tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64); } } -- 1.7.2.3