From: Christophe Lyon <christophe.l...@st.com> Handle corner cases where the addition of the rounding constant could cause overflows.
Signed-off-by: Christophe Lyon <christophe.l...@st.com> --- target-arm/neon_helper.c | 61 ++++++++++++++++++++++++++++++++++++++------- target-arm/translate.c | 17 ++++++++++-- 2 files changed, 65 insertions(+), 13 deletions(-) diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c index bf29bbe..5971275 100644 --- a/target-arm/neon_helper.c +++ b/target-arm/neon_helper.c @@ -540,6 +540,9 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) return val; } +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator, which is really needed only when + * dealing with 32 bits input values. */ #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -548,11 +551,12 @@ uint64_t HELPER(neon_shl_s64)(uint64_t valop, uint64_t shiftop) } else if (tmp < -(ssize_t)sizeof(src1) * 8) { \ dest = src1 >> (sizeof(src1) * 8 - 1); \ } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (tmp - 1); \ + dest = src1 >> (-tmp - 1); \ dest++; \ dest >>= 1; \ } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + int64_t big_dest = ((int64_t)src1 + (1 << (-1 - tmp))); \ + dest = big_dest >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0) @@ -561,6 +565,8 @@ NEON_VOP(rshl_s16, neon_s16, 2) NEON_VOP(rshl_s32, neon_s32, 1) #undef NEON_FN +/* Handling addition overflow with 64 bits inputs values is more + * tricky than with 32 bits values. */ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) { int8_t shift = (int8_t)shiftop; @@ -569,18 +575,37 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) val = 0; } else if (shift < -64) { val >>= 63; - } else if (shift == -63) { + } else if (shift == -64) { val >>= 63; val++; val >>= 1; } else if (shift < 0) { - val = (val + ((int64_t)1 << (-1 - shift))) >> -shift; + int64_t round = (int64_t)1 << (-1 - shift); + /* Reduce the range as long as the addition overflows. It's + * sufficient to check if (val+round) is < 0 and val > 0 + * because round is > 0. */ + while ((val > 0) && ((val + round) < 0) && round > 1) { + shift++; + round >>= 1; + val >>= 1; + } + if ((val > 0) && (val + round) < 0) { + /* If addition still overflows at this point, it means + * that round==1, thus shift==-1, and also that + * val==0x7FFFFFFFFFFFFFFF. */ + val = 0x4000000000000000LL; + } else { + val = (val + round) >> -shift; + } } else { val <<= shift; } return val; } +/* The addition of the rounding constant may overflow, so we use an + * intermediate 64 bits accumulator, which is really needed only when + * dealing with 32 bits input values. */ #define NEON_FN(dest, src1, src2) do { \ int8_t tmp; \ tmp = (int8_t)src2; \ @@ -588,9 +613,10 @@ uint64_t HELPER(neon_rshl_s64)(uint64_t valop, uint64_t shiftop) tmp < -(ssize_t)sizeof(src1) * 8) { \ dest = 0; \ } else if (tmp == -(ssize_t)sizeof(src1) * 8) { \ - dest = src1 >> (tmp - 1); \ + dest = src1 >> (-tmp - 1); \ } else if (tmp < 0) { \ - dest = (src1 + (1 << (-1 - tmp))) >> -tmp; \ + uint64_t big_dest = ((uint64_t)src1 + (1 << (-1 - tmp))); \ + dest = big_dest >> -tmp; \ } else { \ dest = src1 << tmp; \ }} while (0) @@ -602,14 +628,29 @@ NEON_VOP(rshl_u32, neon_u32, 1) uint64_t HELPER(neon_rshl_u64)(uint64_t val, uint64_t shiftop) { int8_t shift = (uint8_t)shiftop; - if (shift >= 64 || shift < 64) { + if (shift >= 64 || shift < -64) { val = 0; } else if (shift == -64) { /* Rounding a 1-bit result just preserves that bit. */ val >>= 63; - } if (shift < 0) { - val = (val + ((uint64_t)1 << (-1 - shift))) >> -shift; - val >>= -shift; + } else if (shift < 0) { + uint64_t round = (uint64_t)1 << (-1 - shift); + /* Reduce the range as long as the addition overflows. It's + * sufficient to check if (val+round) is < val + * because val and round are > 0. */ + while (((val + round) < val) && round > 1) { + shift++; + round >>= 1; + val >>= 1; + } + if ((val + round) < val) { + /* If addition still overflows at this point, it means + * that round==1, thus shift==-1, and also that + * val==0x&FFFFFFFFFFFFFFF. */ + val = 0x8000000000000000LL; + } else { + val = (val + round) >> -shift; + } } else { val <<= shift; } diff --git a/target-arm/translate.c b/target-arm/translate.c index 4cf2ecd..b14fa4b 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -4885,13 +4885,24 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) tcg_gen_shli_i64(cpu_V0, cpu_V0, shift); if (size < 2 || !u) { uint64_t imm64; - if (size == 0) { + switch(size) { + case 0: imm = (0xffu >> (8 - shift)); imm |= imm << 16; - } else { + break; + case 1: imm = 0xffff >> (16 - shift); + break; + case 2: + imm = 0xffffffff >> (32 - shift); + break; + } + if (size < 2) { + imm64 = imm | (((uint64_t)imm) << 32); + } else { + imm64 = imm; } - imm64 = imm | (((uint64_t)imm) << 32); + imm64 = ~imm64; tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64); } } -- 1.7.2.3