Robert Jacques (who I know offline and, in fact, was the person who
introduced me to D) has massively improved/debugged CustomFloat and
asked me to post it for review and commit it to Phobos. See the
attached file. If everyone's happy with it, I'll replace the current
impl in std.numeric and check it in.
/**
* Custom Float Mark 2
*/
module customfloat;
import std.bitmanip;
import std.traits;
import std.typetuple;
import std.intrinsic;
import std.traits;
import std.math;
import std.contracts;
import std.stdio;
import std.conv;
debug {
/// Converts an unsigend integral to string containing its binary
representation.
auto inBinary(T)(T v, T m = T.max) if(is(Unsigned!T == T)) {
auto f = bsf(m);
auto r = bsr(m) + 1;
auto output = new char[r - f];
v >>= f;
foreach_reverse(ref c;output) {
c = v&1 ? '1' : '0';
v >>= 1;
}
return output;
}
}
/// 64-bit version of std.intrinsic.bsf
private int bsf(ulong value) {
union Ulong {
ulong raw;
struct {
uint low;
uint high;
}
}
Ulong v;
v.raw = value;
return v.low!=0 ? std.intrinsic.bsf(v.low) : std.intrinsic.bsf(v.high) + 32;
}
/// 64-bit version of std.intrinsic.bsr
private int bsr(ulong value) {
union Ulong {
ulong raw;
struct {
uint low;
uint high;
}
}
Ulong v;
v.raw = value;
return v.high==0 ? std.intrinsic.bsr(v.low) : std.intrinsic.bsr(v.high) +
32;
}
/// Format flags for CustomFloat.
public enum CustomFloatFlags {
/// Adds a sign bit to allow for signed numbers.
signed = 1,
/**
* Store values in normalized form by default. The actual precision of the
* significand is extended by 1 bit by assuming an implicit leading bit of 1
* instead of 0. i.e. $(D 1.nnnn) instead of $(D 0.nnnn).
* True for all $(LUCKY IEE754) types
*/
storeNormalized = 2,
/**
* Stores the significand in $(LUCKY IEEE754 denormalized) form when the
* exponent is 0. Required to express the value 0.
*/
allowDenorm = 4,
/// Allows the storage of $(LUCKY IEEE754 _infinity) values.
infinity = 8,
/// Allows the storage of $(LUCKY IEEE754 Not a Number) values.
nan = 16,
/**
* If set, select an exponent bias such that max_exp = 1.
* i.e. so that the maximum value is >= 1.0 and < 2.0.
* Ignored if the exponent bias is manually specified.
*/
probability = 32,
/// If set, unsigned custom floats are assumed to be negative.
negativeUnsigned = 64,
/// If set, 0 is the only allowed $(LUCKY IEEE754 denormalized) number.
Requires allowDenorm and storeNormalized.
allowDenormZeroOnly = 128 | allowDenorm | storeNormalized,
/// Include _all of the $(LUCKY IEEE754) options.
ieee = signed | storeNormalized | allowDenorm | infinity | nan ,
/// Include none of the above options.
none = 0
}
/**
* Allows user code to define custom floating-point formats. These formats are
* for storage only; all operations on them are performed by first implicitly
* extracting them to $(D real) first. After the operation is completed the
* result can be stored in a custom floating-point value via assignment.
*
* Example:
* ----
* // Define a 16-bit floating point values
* CustomFloat!16 x; // Using the number of
bits
* CustomFloat!(10, 5) y; // Using the precision
and exponent width
* CustomFloat!(10, 5,CustomFloatFlags.ieee) z; // Using the precision,
exponent width and format flags
* CustomFloat!(10, 5,CustomFloatFlags.ieee, 15) w; // Using the precision,
exponent width, format flags and exponent offset bias
*
* // Use the 16-bit floats mostly like normal numbers
* w = x*y - 1;
* writeln(w);
*
* // Functions calls require conversion
* z = sin(+x) + cos(+y); // Use uniary plus to
concisely convert to a real
* z = sin(x.re) + cos(y.re); // Or use the .re
property to convert to a real
* z = sin(x.get!float) + cos(y.get!float); // Or use get!T
* z = sin(cast(float)x) + cos(cast(float)y); // Or use cast(T) to
explicitly convert
*
* // Define a 8-bit custom float for storing probabilities
* alias CustomFloat!(4, 4,
CustomFloatFlags.ieee^CustomFloatFlags.probability^CustomFloatFlags.signed )
Probability;
* auto p = Probability(0.5);
* ----
*/
template CustomFloat(uint bits) if( bits == 8 || bits == 16 || bits == 32 ||
bits == 64 || bits == 80) {
static if(bits == 8) alias CustomFloat!( 4, 3) CustomFloat;
static if(bits == 16) alias CustomFloat!(10, 5) CustomFloat;
static if(bits == 32) alias CustomFloat!(23, 8) CustomFloat;
static if(bits == 64) alias CustomFloat!(52,11) CustomFloat;
static if(bits == 80) alias
CustomFloat!(64,15,CustomFloatFlags.ieee^CustomFloatFlags.storeNormalized)
CustomFloat;
}
///ditto
template CustomFloat(uint precision,uint exponentWidth,CustomFloatFlags flags =
CustomFloatFlags.ieee)
if( ( (flags & flags.signed) + precision + exponentWidth) % 8 == 0 &&
precision + exponentWidth > 0)
{
alias CustomFloat!(precision,exponentWidth,flags,
(1 << (exponentWidth - ((flags&flags.probability)==0) ))
- ((flags&(flags.nan|flags.infinity))!=0) -
((flags&flags.probability)!=0)
) CustomFloat; //
((flags&CustomFloatFlags.probability)==0)
}
///ditto
struct CustomFloat(
uint precision, // fraction bits (23 for float)
uint exponentWidth, // exponent bits (8 for float)
Exponent width
CustomFloatFlags flags,
uint bias)
if(( (flags & flags.signed) + precision + exponentWidth) % 8 == 0 &&
precision + exponentWidth > 0)
{
private:
template uType(uint bits) {
/// get the correct unsigned bitfield type to support > 32 bits
static if(bits <= size_t.sizeof*8) alias size_t uType;
else alias ulong uType;
}
template sType(uint bits) {
/// get the correct signed bitfield type to support > 32 bits
static if(bits <= ptrdiff_t.sizeof*8-1) alias ptrdiff_t sType;
else alias long sType;
}
alias uType!precision T_sig;
/// Simplify casting code
alias uType!exponentWidth T_exp;
/// ditto
alias sType!exponentWidth T_signed_exp;
/// ditto
alias CustomFloatFlags Flags;
/// Facilitate converting numeric types to custom float
union ToBinary(F) if(is(CustomFloat!(F.sizeof*8))) {
F set;
CustomFloat!(F.sizeof*8) get;
// Convert F to the correct binary type.
static typeof(get) opCall(F value) {
ToBinary r;
r.set = value;
return r.get;
}
alias get this;
}
/// Perform IEEE rounding with round to nearest detection
void roundedShift(T,U)(ref T sig, U shift) {
if( sig << (T.sizeof*8 - shift) == cast(T) 1uL << (T.sizeof*8 - 1)
) { // round to even
sig >>= shift;
sig += sig & 1;
} else {
sig >>= shift - 1;
sig += sig & 1;
// Perform standard rounding
sig >>= 1;
}
}
/// Convert the current value to signed exponent, normalized form
void toNormalized(T,U)(ref T sig, ref U exp) {
sig = significand;
auto shift = (T.sizeof*8) - precision;
exp = exponent;
static if(flags&(Flags.infinity|Flags.nan)) { // Handle inf or nan
if(exp == exponent_max) {
exp = exp.max;
sig <<= shift;
static if(flags&Flags.storeNormalized) { // Save inf/nan
in denormalized format
sig >>= 1;
sig += cast(T) 1uL << (T.sizeof*8 - 1);
}
return;
}
}
if( (~flags&Flags.storeNormalized) || // Convert denormalized
form to normalized form
((flags&Flags.allowDenorm)&&(exp==0)) ){
if(sig > 0) {
auto shift2 = precision - bsr(sig);
exp -= shift2-1;
shift += shift2;
} else { // value = 0.0
exp = exp.min;
return;
}
}
sig <<= shift;
exp -= bias;
}
/// Set the current value from signed exponent, normalized form
void fromNormalized(T,U)(ref T sig, ref U exp) {
auto shift = (T.sizeof*8) - precision;
if(exp == exp.max) { //
infinity or nan
exp = exponent_max;
static if(flags & Flags.storeNormalized) sig <<= 1; //
convert back to normalized form
static if(~flags & Flags.infinity) // No
infinity support?
enforce(sig != 0,"Infinity floating point value assigned to
a "~typeof(this).stringof~" (no infinity support).");
static if(~flags & Flags.nan) // No
NaN support?
enforce(sig == 0,"NaN floating point value assigned to a
"~typeof(this).stringof~" (no nan support).");
sig >>= shift;
return;
}
if(exp == exp.min){ // 0.0
exp = 0;
sig = 0;
return;
}
exp += bias;
if( exp <= 0 ) {
static if( ( flags&Flags.allowDenorm) || //
Convert from normalized form to denormalized
(~flags&Flags.storeNormalized) ) {
shift += -exp;
roundedShift(sig,1);
sig += cast(T) 1uL << (T.sizeof*8 - 1); // Add
the leading 1
exp = 0;
} else enforce( (flags&Flags.storeNormalized) && exp == 0,
"Underflow occured assigning to a
"~typeof(this).stringof~" (no denormal support).");
} else {
static if(~flags&Flags.storeNormalized) { //
Convert from normalized form to denormalized
roundedShift(sig,1);
sig += cast(T) 1uL << (T.sizeof*8 - 1); // Add
the leading 1
}
}
if(shift > 0)
roundedShift(sig,shift);
if(sig > significand_max) { //
handle significand overflow (should only be 1 bit)
static if(~flags&Flags.storeNormalized) {
sig >>= 1;
} else
sig &= significand_max;
exp++;
}
static
if((flags&Flags.allowDenormZeroOnly)==Flags.allowDenormZeroOnly) { //
disallow non-zero denormals
if(exp == 0) {
sig <<= 1;
if(sig > significand_max && (sig&significand_max) > 0 )
// Check and round to even
exp++;
sig = 0;
}
}
if(exp >= exponent_max ) {
static if( flags&(Flags.infinity|Flags.nan) ) {
sig = 0;
exp = exponent_max;
static if(~flags&(Flags.infinity))
enforce( false, "Overflow occured assigning to a
"~typeof(this).stringof~" (no infinity support).");
} else
enforce( exp == exponent_max, "Overflow occured assigning
to a "~typeof(this).stringof~" (no infinity support).");
}
}
public:
static if( precision == 64 ) {
// CustomFloat!80 support hack
ulong significand;
enum ulong significand_max = ulong.max;
mixin(bitfields!(
T_exp , "exponent", exponentWidth,
bool , "sign" , flags & flags.signed ));
} else {
mixin(bitfields!(
T_sig, "significand", precision,
T_exp, "exponent" , exponentWidth,
bool , "sign" , flags & flags.signed ));
}
static if (flags & Flags.infinity)
static @property CustomFloat infinity() {
/// Returns: infinity value
CustomFloat value;
static if (flags & Flags.signed)
value.sign = 0;
value.significand = 0;
value.exponent = exponent_max;
return value;
}
static if (flags & Flags.nan)
static @property CustomFloat nan() {
/// Returns: NaN value
CustomFloat value;
static if (flags & Flags.signed)
value.sign = 0;
value.significand = cast(typeof(significand_max)) 1L <<
(precision-1);
value.exponent = exponent_max;
return value;
}
static @property size_t dig(){
/// Returns: number of decimal digits of precision
return cast(size_t) log10( 1uL << precision -
(flags&Flags.storeNormalized != 0));
}
static @property CustomFloat epsilon() {
/// Returns: smallest increment to the value 1
CustomFloat value;
static if (flags & Flags.signed)
value.sign = 0;
T_signed_exp exp = -precision;
T_sig sig = 0;
value.fromNormalized(sig,exp);
if(exp == 0 && sig == 0) { // underflowed to zero
static if((flags&Flags.allowDenorm) ||
(~flags&Flags.storeNormalized))
sig = 1;
else
sig = cast(T) 1uL << (precision - 1);
}
value.exponent = cast(value.T_exp) exp;
value.significand = cast(value.T_sig) sig;
return value;
}
enum mant_dig = precision + (flags&Flags.storeNormalized != 0);
/// the number of bits in mantissa
static @property int max_10_exp(){ return cast(int) log10( +max ); }
/// Returns: maximum int value such that 10<sup>max_10_exp</sup> is
representable
enum max_exp = exponent_max-bias+((~flags&(Flags.infinity|flags.nan))!=0);
/// maximum int value such that 2<sup>max_exp-1</sup> is representable
static @property int min_10_exp(){ return cast(int) log10( +min_normal ); }
/// Returns: minimum int value such that 10<sup>min_10_exp</sup> is
representable
enum min_exp = cast(T_signed_exp)-bias +1+ ((flags&Flags.allowDenorm)!=0);
/// minimum int value such that 2<sup>min_exp-1</sup> is representable as a
normalized value
static @property CustomFloat max() {
/// Returns: largest representable value that's not infinity
CustomFloat value;
static if (flags & Flags.signed)
value.sign = 0;
value.exponent = exponent_max -
((flags&(flags.infinity|flags.nan)) != 0);
value.significand = significand_max;
return value;
}
static @property CustomFloat min_normal() {
/// Returns: smallest representable normalized value that's not 0
CustomFloat value;
static if (flags & Flags.signed)
value.sign = 0;
value.exponent = 1;
static if(flags&Flags.storeNormalized)
value.significand = 0;
else
value.significand = cast(T_sig) 1uL << (precision - 1);;
return value;
}
@property CustomFloat re() { return this; }
/// Returns: real part
static @property CustomFloat im() { return CustomFloat(0.0f); }
/// Returns: imaginary part
this(F)(F input) if (__traits(compiles, cast(real)input )) { this = input;
}/// Initialize from any $(D real) compatible type.
/// Self assignment
void opAssign(F:CustomFloat)(F input) {
static if (flags & Flags.signed)
sign = input.sign;
exponent = input.exponent;
significand = input.significand;
}
/// Assigns from any $(D real) compatible type.
void opAssign(F)(F input)
if (__traits(compiles, cast(real)input ))
{
static if( staticIndexOf!(Unqual!F, float, double, real) >= 0 )
auto value = ToBinary!(Unqual!F)(input);
else auto value = ToBinary!(real )(input);
// Assign the sign bit
static if (~flags & Flags.signed)
enforce( (!value.sign)^((flags&flags.negativeUnsigned)>0)
,"Incorrectly signed floating point value assigned to a
"~typeof(this).stringof~" (no sign support).");
else
sign = value.sign;
CommonType!(T_signed_exp ,value.T_signed_exp ) exp = value.exponent;
// Assign the exponent and fraction
CommonType!(T_sig, value.T_sig ) sig = value.significand;
value.toNormalized(sig,exp);
fromNormalized(sig,exp);
assert(exp <= exponent_max, text(typeof(this).stringof~" exponent
too large: " ,exp," > ",exponent_max, "\t",input,"\t",sig) );
assert(sig <= significand_max, text(typeof(this).stringof~" significand
too large: ",sig," > ",significand_max,"\t",input,"\t",exp," ",exponent_max) );
exponent = cast(T_exp) exp;
significand = cast(T_sig) sig;
}
/// Fetches the stored value either as a $(D float), $(D double) or $(D
real).
@property F get(F)()
if (staticIndexOf!(Unqual!F, float, double, real) >= 0)
{
ToBinary!F result;
static if (flags&Flags.signed) result.sign = sign;
else result.sign =
(flags&flags.negativeUnsigned) > 0;
CommonType!(T_signed_exp ,result.get.T_signed_exp ) exp = exponent;
// Assign the exponent and fraction
CommonType!(T_sig, result.get.T_sig ) sig = significand;
toNormalized(sig,exp);
result.fromNormalized(sig,exp);
assert(exp <= result.exponent_max, text("get exponent too large: "
,exp," > ",result.exponent_max) );
assert(sig <= result.significand_max, text("get significand too large:
",sig," > ",result.significand_max) );
result.exponent = cast(result.get.T_exp) exp;
result.significand = cast(result.get.T_sig) sig;
return result.set;
}
///ditto
T opCast(T)() if (__traits(compiles, get!T )) { return get!T; }
/// Convert the CustomFloat to a real and perform the relavent operator on
the result
real opUnary(string op)() if( __traits(compiles, mixin(op~`(get!real)`)) ||
op=="++" || op=="--" ){
static if(op=="++" || op=="--") {
auto result = get!real;
this = mixin(op~`result`);
return result;
} else
return mixin(op~`get!real`);
}
real opBinary(string op,T)(T b) if( __traits(compiles,
mixin(`get!real`~op~`b`) ) ) { /// ditto
return mixin(`get!real`~op~`b`);
}
real opBinaryRight(string op,T)(T a) if( __traits(compiles,
mixin(`a`~op~`get!real`) ) && /// ditto
!__traits(compiles,
mixin(`get!real`~op~`b`) ) ) {
return mixin(`a`~op~`get!real`);
}
int opCmp(T)(auto ref T b) if(__traits(compiles, cast(real)b ) ) { ///
ditto
auto x = get!real;
auto y = cast(real) b;
return (x>=y)-(x<=y);
}
void opOpAssign(string op, T)(auto ref T b) if ( __traits(compiles,
mixin(`get!real`~op[0..$-1]~`cast(real)b`))) { /// ditto
return mixin(`this = this `~op[0..$-1]~` cast(real)b`);
}
string toString() { return to!string(get!real); } /// ditto
}
_______________________________________________
phobos mailing list
[email protected]
http://lists.puremagic.com/mailman/listinfo/phobos