Re: [Qemu-devel] [PATCH v1 16/19] fpu/softfloat: re-factor int/uint to float
Richard Henderson writes: > On 12/11/2017 04:57 AM, Alex Bennée wrote: >> These are considerably simpler as the lower order integers can just >> use the higher order conversion function. As the decomposed fractional >> part is a full 64 bit rounding and inexact handling comes from the >> pack functions. >> >> Signed-off-by: Alex Bennée >> >> static uint32_t uint32_pack_decomposed(decomposed_parts p, float_status *s) >> { >> uint64_t r = uint64_pack_decomposed(p, s); >> -return r > UINT32_MAX ? UINT32_MAX : r; >> +if (r > UINT32_MAX) { >> +s->float_exception_flags |= float_flag_invalid; >> +r = UINT32_MAX; >> +} >> +return r; >> } >> >> #define F > > Ah, the fix for the bug in patch 15 got squashed into the wrong patch. > ;-) Hmm slip of the re-base... the fix has been moved. > >> +float16 int16_to_float16(int16_t a, float_status *status) >> +{ >> +return int64_to_float16((int64_t) a, status); >> +} > > Kill all of the redundant casts? Ack. > > Otherwise, as amended in your followup, > > Reviewed-by: Richard Henderson > > > r~ -- Alex Bennée
Re: [Qemu-devel] [PATCH v1 16/19] fpu/softfloat: re-factor int/uint to float
On 12/11/2017 04:57 AM, Alex Bennée wrote: > These are considerably simpler as the lower order integers can just > use the higher order conversion function. As the decomposed fractional > part is a full 64 bit rounding and inexact handling comes from the > pack functions. > > Signed-off-by: Alex Bennée > --- > fpu/softfloat.c | 358 > +--- > include/fpu/softfloat.h | 30 ++-- > 2 files changed, 195 insertions(+), 193 deletions(-) > > diff --git a/fpu/softfloat.c b/fpu/softfloat.c > index d7858bdae5..1a7f1cab10 100644 > --- a/fpu/softfloat.c > +++ b/fpu/softfloat.c > @@ -1409,17 +1409,18 @@ FLOAT_TO_INT(64, 64) > > #undef FLOAT_TO_INT > > -/* > -| Returns the result of converting the floating-point value > -| `a' to the unsigned integer format. The conversion is > -| performed according to the IEC/IEEE Standard for Binary Floating-Point > -| Arithmetic---which means in particular that the conversion is rounded > -| according to the current rounding mode. If `a' is a NaN, the largest > -| unsigned integer is returned. Otherwise, if the conversion overflows, the > -| largest unsigned integer is returned. If the 'a' is negative, the result > -| is rounded and zero is returned; values that do not round to zero will > -| raise the inexact exception flag. > -**/ > +/* > + * Returns the result of converting the floating-point value `a' to > + * the unsigned integer format. The conversion is performed according > + * to the IEC/IEEE Standard for Binary Floating-Point > + * Arithmetic---which means in particular that the conversion is > + * rounded according to the current rounding mode. If `a' is a NaN, > + * the largest unsigned integer is returned. Otherwise, if the > + * conversion overflows, the largest unsigned integer is returned. If > + * the 'a' is negative, the result is rounded and zero is returned; > + * values that do not round to zero will raise the inexact exception > + * flag. > + */ > > static uint64_t uint64_pack_decomposed(decomposed_parts p, float_status *s) > { > @@ -1433,6 +1434,7 @@ static uint64_t uint64_pack_decomposed(decomposed_parts > p, float_status *s) > return 0; > case float_class_normal: > if (p.sign) { > +s->float_exception_flags |= float_flag_invalid; > return 0; > } > if (p.exp < DECOMPOSED_BINARY_POINT) { > @@ -1440,6 +1442,7 @@ static uint64_t uint64_pack_decomposed(decomposed_parts > p, float_status *s) > } else if (p.exp < 64) { > return p.frac << (p.exp - DECOMPOSED_BINARY_POINT); > } else { > +s->float_exception_flags |= float_flag_invalid; > return UINT64_MAX; > } > default: > @@ -1450,13 +1453,21 @@ static uint64_t > uint64_pack_decomposed(decomposed_parts p, float_status *s) > static uint16_t uint16_pack_decomposed(decomposed_parts p, float_status *s) > { > uint64_t r = uint64_pack_decomposed(p, s); > -return r > UINT16_MAX ? UINT16_MAX : r; > +if (r > UINT16_MAX) { > +s->float_exception_flags |= float_flag_invalid; > +r = UINT16_MAX; > +} > +return r; > } > > static uint32_t uint32_pack_decomposed(decomposed_parts p, float_status *s) > { > uint64_t r = uint64_pack_decomposed(p, s); > -return r > UINT32_MAX ? UINT32_MAX : r; > +if (r > UINT32_MAX) { > +s->float_exception_flags |= float_flag_invalid; > +r = UINT32_MAX; > +} > +return r; > } > > #define F Ah, the fix for the bug in patch 15 got squashed into the wrong patch. ;-) > +float16 int16_to_float16(int16_t a, float_status *status) > +{ > +return int64_to_float16((int64_t) a, status); > +} Kill all of the redundant casts? Otherwise, as amended in your followup, Reviewed-by: Richard Henderson r~
Re: [Qemu-devel] [PATCH v1 16/19] fpu/softfloat: re-factor int/uint to float
Alex Bennée writes: > These are considerably simpler as the lower order integers can just > use the higher order conversion function. As the decomposed fractional > part is a full 64 bit rounding and inexact handling comes from the > pack functions. > > +/* > + * Integer to float conversions > + * > + * Returns the result of converting the two's complement integer `a' > + * to the floating-point format. The conversion is performed according > + * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. > + */ > + > +static decomposed_parts int_to_float(int64_t a, float_status *status) > +{ > +decomposed_parts r; > +if (a == 0) { > +r.cls = float_class_zero; > +} else if (a == (1ULL << 63)) { As the re-pack code can handle -0 we need to explicitly set it here as we are building decomposed_parts from scratch: if (a == 0) { r.cls = float_class_zero; r.sign = false; } else if (a == (1ULL << 63)) { And also at: > + > +/* > + * Unsigned Integer to float conversions > + * > + * Returns the result of converting the unsigned integer `a' to the > + * floating-point format. The conversion is performed according to the > + * IEC/IEEE Standard for Binary Floating-Point Arithmetic. > + */ > + > +static decomposed_parts uint_to_float(uint64_t a, float_status *status) > +{ > +decomposed_parts r; > +if (a == 0) { > +r.cls = float_class_zero; > +} else { Now reads: decomposed_parts r = { .sign = false}; if (a == 0) { r.cls = float_class_zero; } else { int spare_bits = clz64(a) - 1; r.cls = float_class_normal; -- Alex Bennée
[Qemu-devel] [PATCH v1 16/19] fpu/softfloat: re-factor int/uint to float
These are considerably simpler as the lower order integers can just use the higher order conversion function. As the decomposed fractional part is a full 64 bit rounding and inexact handling comes from the pack functions. Signed-off-by: Alex Bennée --- fpu/softfloat.c | 358 +--- include/fpu/softfloat.h | 30 ++-- 2 files changed, 195 insertions(+), 193 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index d7858bdae5..1a7f1cab10 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1409,17 +1409,18 @@ FLOAT_TO_INT(64, 64) #undef FLOAT_TO_INT -/* -| Returns the result of converting the floating-point value -| `a' to the unsigned integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| unsigned integer is returned. Otherwise, if the conversion overflows, the -| largest unsigned integer is returned. If the 'a' is negative, the result -| is rounded and zero is returned; values that do not round to zero will -| raise the inexact exception flag. -**/ +/* + * Returns the result of converting the floating-point value `a' to + * the unsigned integer format. The conversion is performed according + * to the IEC/IEEE Standard for Binary Floating-Point + * Arithmetic---which means in particular that the conversion is + * rounded according to the current rounding mode. If `a' is a NaN, + * the largest unsigned integer is returned. Otherwise, if the + * conversion overflows, the largest unsigned integer is returned. If + * the 'a' is negative, the result is rounded and zero is returned; + * values that do not round to zero will raise the inexact exception + * flag. + */ static uint64_t uint64_pack_decomposed(decomposed_parts p, float_status *s) { @@ -1433,6 +1434,7 @@ static uint64_t uint64_pack_decomposed(decomposed_parts p, float_status *s) return 0; case float_class_normal: if (p.sign) { +s->float_exception_flags |= float_flag_invalid; return 0; } if (p.exp < DECOMPOSED_BINARY_POINT) { @@ -1440,6 +1442,7 @@ static uint64_t uint64_pack_decomposed(decomposed_parts p, float_status *s) } else if (p.exp < 64) { return p.frac << (p.exp - DECOMPOSED_BINARY_POINT); } else { +s->float_exception_flags |= float_flag_invalid; return UINT64_MAX; } default: @@ -1450,13 +1453,21 @@ static uint64_t uint64_pack_decomposed(decomposed_parts p, float_status *s) static uint16_t uint16_pack_decomposed(decomposed_parts p, float_status *s) { uint64_t r = uint64_pack_decomposed(p, s); -return r > UINT16_MAX ? UINT16_MAX : r; +if (r > UINT16_MAX) { +s->float_exception_flags |= float_flag_invalid; +r = UINT16_MAX; +} +return r; } static uint32_t uint32_pack_decomposed(decomposed_parts p, float_status *s) { uint64_t r = uint64_pack_decomposed(p, s); -return r > UINT32_MAX ? UINT32_MAX : r; +if (r > UINT32_MAX) { +s->float_exception_flags |= float_flag_invalid; +r = UINT32_MAX; +} +return r; } #define FLOAT_TO_UINT(fsz, isz) \ @@ -1489,6 +1500,168 @@ FLOAT_TO_UINT(64, 64) #undef FLOAT_TO_UINT +/* + * Integer to float conversions + * + * Returns the result of converting the two's complement integer `a' + * to the floating-point format. The conversion is performed according + * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. + */ + +static decomposed_parts int_to_float(int64_t a, float_status *status) +{ +decomposed_parts r; +if (a == 0) { +r.cls = float_class_zero; +} else if (a == (1ULL << 63)) { +r.cls = float_class_normal; +r.sign = true; +r.frac = DECOMPOSED_IMPLICIT_BIT; +r.exp = 63; +} else { +uint64_t f; +if (a < 0) { +f = -a; +r.sign = true; +} else { +f = a; +r.sign = false; +} +int shift = clz64(f) - 1; +r.cls = float_class_normal; +r.exp = (DECOMPOSED_BINARY_POINT - shift); +r.frac = f << shift; +} + +return r; +} + +float16 int64_to_float16(int64_t a, float_status *status) +{ +decomposed_parts pa = int_to_float(a, status); +return float16_round_pack_canonical(pa, status); +} + +float16 int32_to_float16(int32_t a, float_status *status) +{ +return int64_to_float16((int64_t) a, status); +} + +float16 int16_to_float16(int16_t a, float_status *status) +{ +return int64_to_float16((int64_t) a, status); +} + +float32 int64_to_float32(int64_t a, float_status *status) +{ +decom