[phobos] CustomFloat

David Simcha Sat, 05 Jun 2010 07:43:12 -0700

Robert Jacques (who I know offline and, in fact, was the person whointroduced me to D) has massively improved/debugged CustomFloat andasked me to post it for review and commit it to Phobos. See theattached file. If everyone's happy with it, I'll replace the currentimpl in std.numeric and check it in.

/**
 * Custom Float Mark 2
 */
module customfloat;

import std.bitmanip;
import std.traits;
import std.typetuple;
import std.intrinsic;
import std.traits;
import std.math;
import std.contracts;
import std.stdio;
import std.conv;


debug {
    /// Converts an unsigend integral to string containing its binary 
representation.
    auto inBinary(T)(T v, T m = T.max) if(is(Unsigned!T == T)) {
        auto f = bsf(m);
        auto r = bsr(m) + 1;
        auto output = new char[r - f];
        v >>= f;
        foreach_reverse(ref c;output) {
            c = v&1 ? '1' : '0';
            v >>= 1;
        }
        return output;
    }
}

/// 64-bit version of std.intrinsic.bsf
private int bsf(ulong value) {
    union Ulong {
        ulong raw;
        struct {
            uint low;
            uint high;
        }
    }
    Ulong v;
    v.raw = value;
    return v.low!=0 ? std.intrinsic.bsf(v.low) : std.intrinsic.bsf(v.high) + 32;
}

/// 64-bit version of std.intrinsic.bsr
private int bsr(ulong value) {
    union Ulong {
        ulong raw;
        struct {
            uint low;
            uint high;
        }
    }
    Ulong v;
    v.raw = value;
    return v.high==0 ? std.intrinsic.bsr(v.low) : std.intrinsic.bsr(v.high) + 
32;
}

/// Format flags for CustomFloat.
public enum CustomFloatFlags {

    /// Adds a sign bit to allow for signed numbers.
    signed = 1,

    /**
     * Store values in normalized form by default. The actual precision of the
     * significand is extended by 1 bit by assuming an implicit leading bit of 1
     * instead of 0. i.e. $(D 1.nnnn) instead of $(D 0.nnnn).
     * True for all $(LUCKY IEE754) types
     */
    storeNormalized = 2,

    /**
     * Stores the significand in $(LUCKY IEEE754 denormalized) form when the
     * exponent is 0. Required to express the value 0.
     */
    allowDenorm = 4,

    /// Allows the storage of $(LUCKY IEEE754 _infinity) values.
    infinity = 8,

    /// Allows the storage of $(LUCKY IEEE754 Not a Number) values.
    nan = 16,

    /**
     * If set, select an exponent bias such that max_exp = 1.
     * i.e. so that the maximum value is >= 1.0 and < 2.0.
     * Ignored if the exponent bias is manually specified.
     */
    probability = 32,

    /// If set, unsigned custom floats are assumed to be negative.
    negativeUnsigned = 64,

    /// If set, 0 is the only allowed $(LUCKY IEEE754 denormalized) number. 
Requires allowDenorm and storeNormalized.
    allowDenormZeroOnly = 128 | allowDenorm | storeNormalized,

    /// Include _all of the $(LUCKY IEEE754) options.
    ieee = signed | storeNormalized | allowDenorm | infinity | nan ,

    /// Include none of the above options.
    none = 0
}


/**
 * Allows user code to define custom floating-point formats. These formats are
 * for storage only; all operations on them are performed by first implicitly
 * extracting them to $(D real) first. After the operation is completed the
 * result can be stored in a custom floating-point value via assignment.
 *
 * Example:
 * ----
 * // Define a 16-bit floating point values
 * CustomFloat!16                                x;     // Using the number of 
bits
 * CustomFloat!(10, 5)                           y;     // Using the precision 
and exponent width
 * CustomFloat!(10, 5,CustomFloatFlags.ieee)     z;     // Using the precision, 
exponent width and format flags
 * CustomFloat!(10, 5,CustomFloatFlags.ieee, 15) w;     // Using the precision, 
exponent width, format flags and exponent offset bias
 *
 * // Use the 16-bit floats mostly like normal numbers
 * w = x*y - 1;
 * writeln(w);
 *
 * // Functions calls require conversion
 * z = sin(+x)           + cos(+y);                     // Use uniary plus to 
concisely convert to a real
 * z = sin(x.re)         + cos(y.re);                   // Or use the .re 
property to convert to a real
 * z = sin(x.get!float)  + cos(y.get!float);            // Or use get!T
 * z = sin(cast(float)x) + cos(cast(float)y);           // Or use cast(T) to 
explicitly convert
 *
 * // Define a 8-bit custom float for storing probabilities
 * alias CustomFloat!(4, 4, 
CustomFloatFlags.ieee^CustomFloatFlags.probability^CustomFloatFlags.signed ) 
Probability;
 * auto p = Probability(0.5);
 * ----
 */
template CustomFloat(uint bits) if( bits == 8 || bits == 16 || bits == 32 || 
bits == 64 || bits == 80) {
    static if(bits ==  8) alias CustomFloat!( 4, 3) CustomFloat;
    static if(bits == 16) alias CustomFloat!(10, 5) CustomFloat;
    static if(bits == 32) alias CustomFloat!(23, 8) CustomFloat;
    static if(bits == 64) alias CustomFloat!(52,11) CustomFloat;
    static if(bits == 80) alias 
CustomFloat!(64,15,CustomFloatFlags.ieee^CustomFloatFlags.storeNormalized) 
CustomFloat;
}
///ditto
template CustomFloat(uint precision,uint exponentWidth,CustomFloatFlags flags = 
CustomFloatFlags.ieee)
    if(  ( (flags & flags.signed) + precision + exponentWidth) % 8 == 0 && 
precision + exponentWidth > 0)
{
    alias CustomFloat!(precision,exponentWidth,flags,
                       (1 << (exponentWidth  - ((flags&flags.probability)==0) ))
                       - ((flags&(flags.nan|flags.infinity))!=0) - 
((flags&flags.probability)!=0)
                       ) CustomFloat; // 
((flags&CustomFloatFlags.probability)==0)
}
///ditto
struct CustomFloat(
    uint                precision,  // fraction bits (23 for float)
    uint                exponentWidth,  // exponent bits (8 for float)  
Exponent width
    CustomFloatFlags    flags,
    uint                bias)
    if(( (flags & flags.signed)  + precision + exponentWidth) % 8 == 0 &&
       precision + exponentWidth > 0)
{
    private:
        template uType(uint bits) {                                             
/// get the correct unsigned bitfield type to support > 32 bits
            static if(bits <= size_t.sizeof*8)  alias size_t uType;
            else                                alias ulong  uType;
        }
        template sType(uint bits) {                                             
/// get the correct signed   bitfield type to support > 32 bits
            static if(bits <= ptrdiff_t.sizeof*8-1) alias ptrdiff_t sType;
            else                                    alias long      sType;
        }

        alias uType!precision     T_sig;                                        
/// Simplify casting code
        alias uType!exponentWidth T_exp;                                        
/// ditto
        alias sType!exponentWidth T_signed_exp;                                 
/// ditto

        alias CustomFloatFlags Flags;

        /// Facilitate converting numeric types to custom float
        union ToBinary(F) if(is(CustomFloat!(F.sizeof*8))) {
            F set;
            CustomFloat!(F.sizeof*8) get;

            // Convert F to the correct binary type.
            static typeof(get) opCall(F value) {
                ToBinary r;
                r.set = value;
                return r.get;
            }
            alias get this;
        }

        /// Perform IEEE rounding with round to nearest detection
        void roundedShift(T,U)(ref T sig, U shift) {
            if( sig << (T.sizeof*8 - shift) == cast(T) 1uL << (T.sizeof*8 - 1) 
) {  // round to even
                sig >>= shift;
                sig  += sig & 1;
            } else {
                sig >>= shift - 1;
                sig  += sig & 1;                                                
    // Perform standard rounding
                sig >>= 1;
            }
        }

        /// Convert the current value to signed exponent, normalized form
        void toNormalized(T,U)(ref T sig, ref U exp) {
            sig = significand;
            auto shift = (T.sizeof*8) - precision;
            exp = exponent;
            static if(flags&(Flags.infinity|Flags.nan)) {   // Handle inf or nan
                if(exp == exponent_max) {
                    exp = exp.max;
                    sig <<= shift;
                    static if(flags&Flags.storeNormalized) {    // Save inf/nan 
in denormalized format
                        sig >>= 1;
                        sig  += cast(T) 1uL << (T.sizeof*8 - 1);
                    }
                    return;
                }
            }
            if( (~flags&Flags.storeNormalized) ||       // Convert denormalized 
form to normalized form
                ((flags&Flags.allowDenorm)&&(exp==0)) ){
                if(sig > 0) {
                    auto shift2 = precision - bsr(sig);
                    exp  -= shift2-1;
                    shift += shift2;
                } else {                                // value = 0.0
                    exp = exp.min;
                    return;
                }
            }
            sig <<= shift;
            exp -= bias;
        }

        /// Set the current value from signed exponent, normalized form
        void fromNormalized(T,U)(ref T sig, ref U exp) {
            auto shift = (T.sizeof*8) - precision;
            if(exp == exp.max) {                                        // 
infinity or nan
                exp = exponent_max;
                static if(flags & Flags.storeNormalized) sig <<= 1;     // 
convert back to normalized form
                static if(~flags & Flags.infinity)                      // No 
infinity support?
                    enforce(sig != 0,"Infinity floating point value assigned to 
a "~typeof(this).stringof~" (no infinity support).");
                static if(~flags & Flags.nan)                           // No 
NaN support?
                    enforce(sig == 0,"NaN floating point value assigned to a 
"~typeof(this).stringof~" (no nan support).");
                sig >>= shift;
                return;
            }
            if(exp == exp.min){     // 0.0
                 exp = 0;
                 sig = 0;
                 return;
            }

            exp += bias;
            if( exp <= 0 ) {
                static if( ( flags&Flags.allowDenorm) ||                // 
Convert from normalized form to denormalized
                           (~flags&Flags.storeNormalized) ) {
                    shift += -exp;
                    roundedShift(sig,1);
                    sig   += cast(T) 1uL << (T.sizeof*8 - 1);           // Add 
the leading 1
                    exp    = 0;
                } else enforce( (flags&Flags.storeNormalized) && exp == 0,
                               "Underflow occured assigning to a 
"~typeof(this).stringof~" (no denormal support).");
            } else {
                static if(~flags&Flags.storeNormalized) {               // 
Convert from normalized form to denormalized
                    roundedShift(sig,1);
                    sig  += cast(T) 1uL << (T.sizeof*8 - 1);            // Add 
the leading 1
                }
            }

            if(shift > 0)
                roundedShift(sig,shift);
            if(sig > significand_max) {                                 // 
handle significand overflow (should only be 1 bit)
                static if(~flags&Flags.storeNormalized) {
                    sig >>= 1;
                } else
                    sig &= significand_max;
                exp++;
            }
            static 
if((flags&Flags.allowDenormZeroOnly)==Flags.allowDenormZeroOnly) {   // 
disallow non-zero denormals
                if(exp == 0) {
                    sig <<= 1;
                    if(sig > significand_max && (sig&significand_max) > 0 )     
        // Check and round to even
                        exp++;
                    sig = 0;
                }
            }

            if(exp >= exponent_max ) {
                static if( flags&(Flags.infinity|Flags.nan) ) {
                    sig         = 0;
                    exp         = exponent_max;
                    static if(~flags&(Flags.infinity))
                        enforce( false, "Overflow occured assigning to a 
"~typeof(this).stringof~" (no infinity support).");
                } else
                    enforce( exp == exponent_max, "Overflow occured assigning 
to a "~typeof(this).stringof~" (no infinity support).");
            }
        }

    public:
        static if( precision == 64 ) {                                          
// CustomFloat!80 support hack
            ulong significand;
            enum ulong significand_max = ulong.max;
            mixin(bitfields!(
                T_exp , "exponent", exponentWidth,
                bool  , "sign"    , flags & flags.signed ));

        } else {
            mixin(bitfields!(
                T_sig, "significand", precision,
                T_exp, "exponent"   , exponentWidth,
                bool , "sign"       , flags & flags.signed ));
        }

    static if (flags & Flags.infinity)
        static @property CustomFloat infinity() {                               
/// Returns: infinity value
            CustomFloat value;
            static if (flags & Flags.signed)
            value.sign          = 0;
            value.significand   = 0;
            value.exponent      = exponent_max;
            return value;
        }

    static if (flags & Flags.nan)
        static @property CustomFloat nan() {                                    
/// Returns: NaN value
            CustomFloat value;
            static if (flags & Flags.signed)
            value.sign          = 0;
            value.significand   = cast(typeof(significand_max)) 1L << 
(precision-1);
            value.exponent      = exponent_max;
            return value;
        }

    static @property size_t dig(){                                              
/// Returns: number of decimal digits of precision
        return cast(size_t) log10( 1uL << precision - 
(flags&Flags.storeNormalized != 0));
    }

    static @property CustomFloat epsilon() {                                    
/// Returns: smallest increment to the value 1
            CustomFloat value;
            static if (flags & Flags.signed)
            value.sign       = 0;
            T_signed_exp exp = -precision;
            T_sig        sig = 0;
            value.fromNormalized(sig,exp);
            if(exp == 0 && sig == 0) { // underflowed to zero
                static if((flags&Flags.allowDenorm) || 
(~flags&Flags.storeNormalized))
                    sig = 1;
                else
                    sig = cast(T) 1uL << (precision - 1);
            }
            value.exponent     = cast(value.T_exp) exp;
            value.significand  = cast(value.T_sig) sig;
            return value;
    }

    enum mant_dig = precision + (flags&Flags.storeNormalized != 0);             
/// the number of bits in mantissa

    static @property int max_10_exp(){ return cast(int) log10( +max ); }        
/// Returns: maximum int value such that 10<sup>max_10_exp</sup> is 
representable

    enum max_exp = exponent_max-bias+((~flags&(Flags.infinity|flags.nan))!=0);  
/// maximum int value such that 2<sup>max_exp-1</sup> is representable

    static @property int min_10_exp(){ return cast(int) log10( +min_normal ); } 
/// Returns: minimum int value such that 10<sup>min_10_exp</sup> is 
representable

    enum min_exp = cast(T_signed_exp)-bias +1+ ((flags&Flags.allowDenorm)!=0);  
/// minimum int value such that 2<sup>min_exp-1</sup> is representable as a 
normalized value

    static @property CustomFloat max() {                                        
/// Returns: largest representable value that's not infinity
            CustomFloat value;
            static if (flags & Flags.signed)
            value.sign        = 0;
            value.exponent    = exponent_max - 
((flags&(flags.infinity|flags.nan)) != 0);
            value.significand = significand_max;
            return value;
    }

    static @property CustomFloat min_normal() {                                 
/// Returns: smallest representable normalized value that's not 0
            CustomFloat value;
            static if (flags & Flags.signed)
            value.sign        = 0;
            value.exponent    = 1;
            static if(flags&Flags.storeNormalized)
                value.significand = 0;
            else
                value.significand = cast(T_sig) 1uL << (precision - 1);;
            return value;
    }

           @property CustomFloat re()   { return this;              }           
/// Returns: real part
    static @property CustomFloat im()   { return CustomFloat(0.0f); }           
/// Returns: imaginary part

    this(F)(F input) if (__traits(compiles, cast(real)input )) { this = input; 
}/// Initialize from any $(D real) compatible type.

    /// Self assignment
    void opAssign(F:CustomFloat)(F input) {
        static if (flags & Flags.signed)
        sign        = input.sign;
        exponent    = input.exponent;
        significand = input.significand;
    }

    /// Assigns from any $(D real) compatible type.
    void opAssign(F)(F input)
        if (__traits(compiles, cast(real)input ))
    {

        static if( staticIndexOf!(Unqual!F, float, double, real) >= 0 )
                auto value = ToBinary!(Unqual!F)(input);
        else    auto value = ToBinary!(real    )(input);
        // Assign the sign bit
        static if (~flags & Flags.signed)
            enforce( (!value.sign)^((flags&flags.negativeUnsigned)>0) 
,"Incorrectly signed floating point value assigned to a 
"~typeof(this).stringof~" (no sign support).");
        else
            sign = value.sign;

        CommonType!(T_signed_exp ,value.T_signed_exp ) exp = value.exponent;    
    // Assign the exponent and fraction
        CommonType!(T_sig,        value.T_sig        ) sig = value.significand;

        value.toNormalized(sig,exp);
        fromNormalized(sig,exp);


        assert(exp <= exponent_max,    text(typeof(this).stringof~" exponent 
too large: "   ,exp," > ",exponent_max,   "\t",input,"\t",sig) );
        assert(sig <= significand_max, text(typeof(this).stringof~" significand 
too large: ",sig," > ",significand_max,"\t",input,"\t",exp," ",exponent_max) );
        exponent    = cast(T_exp) exp;
        significand = cast(T_sig) sig;
    }

    /// Fetches the stored value either as a $(D float), $(D double) or $(D 
real).
    @property F get(F)()
        if (staticIndexOf!(Unqual!F, float, double, real) >= 0)
    {
        ToBinary!F result;

        static if (flags&Flags.signed) result.sign = sign;
        else                           result.sign = 
(flags&flags.negativeUnsigned) > 0;
        CommonType!(T_signed_exp ,result.get.T_signed_exp ) exp = exponent;     
        // Assign the exponent and fraction
        CommonType!(T_sig,        result.get.T_sig        ) sig = significand;

        toNormalized(sig,exp);
        result.fromNormalized(sig,exp);
        assert(exp <= result.exponent_max,    text("get exponent too large: "   
,exp," > ",result.exponent_max) );
        assert(sig <= result.significand_max, text("get significand too large: 
",sig," > ",result.significand_max) );
        result.exponent     = cast(result.get.T_exp) exp;
        result.significand  = cast(result.get.T_sig) sig;
        return result.set;
    }
    ///ditto
    T opCast(T)() if (__traits(compiles, get!T )) { return get!T; }

    /// Convert the CustomFloat to a real and perform the relavent operator on 
the result
    real opUnary(string op)() if( __traits(compiles, mixin(op~`(get!real)`)) || 
op=="++" || op=="--" ){
        static if(op=="++" || op=="--") {
            auto result = get!real;
            this = mixin(op~`result`);
            return result;
        } else
            return mixin(op~`get!real`);
    }

    real opBinary(string op,T)(T b) if( __traits(compiles, 
mixin(`get!real`~op~`b`)  )  ) {     /// ditto
        return mixin(`get!real`~op~`b`);
    }
    real opBinaryRight(string op,T)(T a) if( __traits(compiles, 
mixin(`a`~op~`get!real`) )  &&  /// ditto
                                            !__traits(compiles, 
mixin(`get!real`~op~`b`) )  ) {
        return mixin(`a`~op~`get!real`);
    }
    int opCmp(T)(auto ref T b) if(__traits(compiles, cast(real)b )  ) {     /// 
ditto
        auto x = get!real;
        auto y = cast(real) b;
        return  (x>=y)-(x<=y);
    }
    void opOpAssign(string op, T)(auto ref T b) if ( __traits(compiles, 
mixin(`get!real`~op[0..$-1]~`cast(real)b`))) {  /// ditto
        return mixin(`this = this `~op[0..$-1]~` cast(real)b`);
    }

    string toString() { return to!string(get!real); } /// ditto
}

_______________________________________________
phobos mailing list
[email protected]
http://lists.puremagic.com/mailman/listinfo/phobos

[phobos] CustomFloat

Reply via email to