> Am 27.09.20 um 18:21 schrieb Florian Klämpfl via fpc-devel:
> So we would need softfloat extended support. This is doable with one
> major obstacle: the "irrational" functions like ld, sin etc.: they need
> precise enough implementations for 80 bit (actually, just adding and
> using in the compiler 128 bit softfloat support would be even better).
> And this is probably something which is really hard. While I believe ;)
> I can solve most coding challenges, this is something I fear to touch :)
Well. Intel arch supports FMA instruction sets since 2013 (Intel) and since
2012 (AMD).
See https://en.wikipedia.org/wiki/FMA_instruction_set
Based on FMA support it's quite easy to create 128 bits (almost) semi-soft
floats.
Julia guys have a nice library called DoubleFloats
(https://github.com/JuliaMath/DoubleFloats.jl, MIT licensed)
They have a lot of trigonometric and other "complex" stuff already implemented.
So, maybe it's not that frightening after all :)
See my own small port attached. Russian comments ahead so beware :)
-- Regards,
Denis Golovan
unit uDF64;
{
модÑÐ»Ñ Ð´Ð»Ñ ÑабоÑÑ Ñ Double F64 ÑиÑлами
https://github.com/JuliaMath/DoubleFloats.jl
}
{$mode objfpc}{$H+}
{$ASMMODE INTEL}
{$modeswitch advancedrecords}
{$FPUType SSE42} //поÑемÑ-Ñо бÑÑÑÑее AVX/AVX2
{$Optimization DFA}
interface
uses
Classes, SysUtils;
type
TDF64 = record
hi,lo: Double;
end;
PDF64 = ^TDF64;
PTDF64 = ^TDF64; //ÑпÑоÑение Ð´Ð»Ñ Ð¼Ð°ÐºÑоÑов
operator -(const X : TDF64):TDF64;inline;
operator +(const A,B : TDF64):TDF64;
operator -(const A,B : TDF64):TDF64;
operator *(const A,B : TDF64):TDF64;
operator /(const X,Y : TDF64):TDF64;inline;
operator :=(const A:Double):TDF64;inline;
operator :=(const A:TDF64):Double;inline;
operator :=(const A:TDF64):NativeInt;inline;
operator=(const X, Y: TDF64): boolean;inline;
operator<(const X, Y: TDF64): boolean;inline;
operator<=(const X, Y: TDF64): boolean;inline;
operator>(const X, Y: TDF64): boolean;inline;
operator>=(const X, Y: TDF64): boolean;inline;
function Neg(const x:TDF64):TDF64;overload;inline;
function Inv(const x:TDF64):TDF64;overload;inline;
function Abs(const x:TDF64):TDF64;overload;inline;
function Max(const x,y:TDF64):TDF64;overload;inline;
function Min(const x,y:TDF64):TDF64;overload;inline;
function FMA(a,b,c:Double):Double;
function ToDF64(const a:Double):TDF64;inline;overload;
function ToDF64(const a:Int64):TDF64;inline;overload;
function ToF64(const a:TDF64):Double;inline;overload;
function ToStr(const p:TDF64):String;overload;
const ZeroDF64:TDF64 = (hi:0.0; lo:0.0);
const OneDF64:TDF64 = (hi:1.0; lo:0.0);
implementation
uses math, bigdecimalmath;
{$warn 2005 off} // disable "Comment level 2 found" warnings.
// $fpc -B -vwhilq -Fu./bigdecimalmath/ project1.lpr
// ... to see numbers for warnings
{
function FMA(a,b,c:Double):Double;
begin
// SetRoundMode(rmNearest); - нÑжно окÑÑгление до ближайÑего
asm
movlpd xmm0, [A]
movlpd xmm1, [B]
movlpd xmm2, [C]
vfmadd213sd xmm0, xmm1, xmm2 //a = a*b + c
//movhlps xmm1, xmm0
//vzeroupper
end;
end;
}
function FMA(a,b,c:Double):Double;assembler;nostackframe;
asm
// SetRoundMode(rmNearest); - нÑжно окÑÑгление до ближайÑего
vfmadd213sd xmm0, xmm1, xmm2 //a = a*b + c
end;
function quick_two_sum(const a,b:Double): TDF64;inline;
begin
Result.hi:=a+b;
Result.lo:=b-(Result.hi-a);
end;
{
function TwoSum(a::T, b::T) where {T<:AbstractFloat}
s = a + b
a1 = s - b
b1 = s - a1
da = a - a1
db = b - b1
t = da + db
return s, t
end
}
function two_sum(const a, b:Double):TDF64;inline;
var hi, lo, a1, b1: Double;
begin
hi := a + b;
a1 := hi - b;
b1 := hi - a1;
lo := (a - a1) + (b - b1);
Result.hi := hi;
Result.lo := lo;
end;
(*
@inline function two_hilo_sum(a::T, b::T) where {T<:FloatWithFMA}
s = a + b
e = b - (s - a)
return s, e
end
*)
function two_hilo_sum(const a, b:Double):TDF64;inline;
begin
Result.hi:=a+b;
Result.lo:=b - (Result.hi - a)
end;
function two_prod(const a,b:Double): TDF64;inline;
begin
Result.hi:=a*b;
Result.lo:=FMA(a,b,-Result.hi);
end;
(*
@inline function two_diff(a::T, b::T) where {T<:FloatWithFMA}
hi = a - b
a1 = hi + b
b1 = hi - a1
lo = (a - a1) - (b + b1)
return hi, lo
end
*)
function two_diff(const a,b:Double): TDF64;inline;
var a1,b1:Double;
begin
Result.hi:=a - b;
a1:=Result.hi + b;
b1:=Result.hi - a1;
Result.lo:=(a - a1) - (b + b1);
end;
(*
@inline function add_dddd_dd(x::Tuple{T,T}, y::Tuple{T,T}) where T<:IEEEFloat
xhi, xlo = x
yhi, ylo = y
hi, lo = two_sum(xhi, yhi)
thi, tlo = two_sum(xlo, ylo)
c = lo + thi
hi, lo = two_hilo_sum(hi, c)
c = tlo + lo
hi, lo = two_hilo_sum(hi, c)
return hi, lo
end
*)
operator +(const A,B : TDF64):TDF64;
var p1, p2:TDF64;
c:Double;
begin
p1:=two_sum(a.hi, b.hi);
p2:=two_sum(a.lo, b.lo);
c := p1.lo + p2.hi;
p1 := two_hilo_sum(p1.hi, c);
c := p2.lo + p1.lo;
p1 := two_hilo_sum(p1.hi, c);
Result:=p1;
end;
(*
@inline function sub_dddd_dd(x::Tuple{T,T}, y::Tuple{T,T}) where T<:IEEEFloat
xhi, xlo = x
yhi, ylo = y
hi, lo = two_diff(xhi, yhi)
thi, tlo = two_diff(xlo, ylo)
c = lo + thi# Algorithm 9 from Tight and rigourous error bounds. relative error <= 2u²
hi, lo = two_hilo_sum(hi, c)
c = tlo + lo
hi, lo = two_hilo_sum(hi, c)
return hi, lo
end
*)
operator -(const A,B : TDF64):TDF64;
var p1, p2:TDF64;
c:Double;
begin
p1:=two_diff(a.hi, b.hi);
p2:=two_diff(a.lo, b.lo);
c := p1.lo + p2.hi;
p1 := two_hilo_sum(p1.hi, c);
c := p2.lo + p1.lo;
p1 := two_hilo_sum(p1.hi, c);
Result:=p1;
end;
operator *(const A,B : TDF64):TDF64;
var p:TDF64;
begin
p:= two_prod(a.hi, b.hi);
p.lo := p.lo + a.hi * b.lo + a.lo * b.hi;
p:= quick_two_sum(p.hi, p.lo);
Result:=p;
end;
{
function TwoSum(a::T, b::T) where {T<:AbstractFloat}
s = a + b
a1 = s - b
b1 = s - a1
da = a - a1
db = b - b1
t = da + db
return s, t
end
@inline function Fast2Sum(a::T, b::T) where {T<:AbstractFloat}
s = a + b
z = s - a
t = b - z
return s, t
end
@inline function Fast2Mult(a::T, b::T) where {T<:AbstractFloat}
s = a * b
t = fma(a, b, -s)
return s, t
end
@inline function DWTimesFP3(xâáµ¢::T, xââ::T, y::T) where {T<:AbstractFloat}
câáµ¢, cââ = Fast2Mult(xâáµ¢, y)
c = fma(xââ, y, cââ)
zâáµ¢, zââ = Fast2Sum(câáµ¢, c)
return zâáµ¢, zââ
end
@inline function DWPlusFP(xâáµ¢::T, xââ::T, y::T) where {T<:AbstractFloat}
sâáµ¢, sââ = TwoSum(xâáµ¢, y)
v = xââ + sââ
zâáµ¢, zââ = TwoSum(sâáµ¢, v)
return zâáµ¢, zââ
end
function DWInvDW3(yâáµ¢::T, yââ::T) where {T<:AbstractFloat}
tâáµ¢ = inv(yâáµ¢)
râáµ¢ = fma(yâáµ¢, -tâáµ¢, one(T))
rââ = -(yââ * tâáµ¢)
eâáµ¢, eââ = Fast2Sum(râáµ¢, rââ)
dâáµ¢, dââ = DWTimesFP3(eâáµ¢, eââ, tâáµ¢)
zâáµ¢, zââ = DWPlusFP(dâáµ¢, dââ, tâáµ¢)
return zâáµ¢, zââ
end
}
function Fast2Sum(const a:Double; const b:Double):TDF64;inline;
var z:Double;
begin
Result.hi:=a + b;
z:=Result.hi - a;
Result.lo:=b - z;
end;
function Fast2Mult(const a:Double; const b:Double):TDF64;inline;
begin
Result.hi:=a * b;
Result.lo:=fma(a, b, -Result.hi);
end;
function DWTimesFP3(const xhi, xlo, y:Double):TDF64;inline;
var C:TDF64;
xc:Double;
begin
C := Fast2Mult(xhi, y);
xc := fma(xlo, y, c.lo);
Result := Fast2Sum(c.hi, xc);
end;
function DWPlusFP(const xhi, xlo, y:Double):TDF64;inline;
var S:TDF64;
v:Double;
begin
S := two_sum(xhi, y);
v := xlo + S.lo;
Result := two_sum(s.hi, v);
end;
function Inv(const x:Double):Double;inline;
begin
Result:=Double(1.0)/x;
end;
function Neg(const x: TDF64): TDF64;
begin
Result.hi:=-x.hi;
Result.lo:=-x.lo;
end;
operator-(const X: TDF64): TDF64;
begin
Result:=Neg(x);
end;
function Inv(const x:TDF64):TDF64;inline;
var thi, rhi, rlo:Double;
e, d:TDF64;
begin
// DWInvDW3
thi := Inv(x.hi);
rhi := fma(x.hi, -thi, Double(1.0));
rlo := -(x.lo * thi);
e := Fast2Sum(rhi, rlo);
d := DWTimesFP3(e.hi, e.lo, thi);
Result := DWPlusFP(d.hi, d.lo, thi);
end;
{
@inline function dvi_dddd_dd(x::Tuple{T,T}, y::Tuple{T,T}) where {T<:IEEEFloat}
xhi, xlo = x
yhi, ylo = y
hi = xhi / yhi
uh, ul = two_prod(hi, yhi)
lo = ((((xhi - uh) - ul) + xlo) - hi*ylo)/yhi
hi,lo = two_hilo_sum(hi, lo)
return hi, lo
end
}
function div_DF64(const X,Y:TDF64):TDF64;
var hi, lo: Double;
u: TDF64;
begin
hi := x.hi / y.hi;
u := two_prod(hi, y.hi);
lo := ((((x.hi - u.hi) - u.lo) + x.lo) - hi*y.lo)/y.hi;
Result := two_hilo_sum(hi, lo);
end;
operator /(const X,Y : TDF64):TDF64;inline;
begin
//Result:=X*inv(Y);
Result:=div_DF64(X, Y);
end;
function ToDF64(const a:Double):TDF64;overload;
begin
Result.hi:=a;
Result.lo:=0;
end;
function ToDF64(const a: Int64): TDF64;overload;
var abs_a:Int64;
begin
if a >= 0 then
begin
Result.hi:=a and $FFFFFFFF00000000;
Result.lo:=a and $00000000FFFFFFFF;
end
else
begin
abs_a:=-a;
Result.hi:=-Double(abs_a and $FFFFFFFF00000000);
Result.lo:=-Double(abs_a and $00000000FFFFFFFF);
end;
end;
function ToF64(const a: TDF64): Double;
begin
Result:=a.hi;
end;
operator:=(const A: Double): TDF64;
begin
Result:=ToDF64(A);
end;
operator:=(const A: TDF64): NativeInt;
var f:Double;
begin
f:=ToF64(A);
if f > High(NativeInt) then
Result:=High(NativeInt)
else if f < Low(NativeInt) then
Result:=Low(NativeInt)
else
Result:=Trunc(f);
end;
operator:=(const A: TDF64): Double;
begin
Result:=ToF64(A);
end;
operator=(const X, Y: TDF64): boolean;
begin
Result:=(x.hi = y.hi) and (x.lo = y.lo);
end;
operator<(const X, Y: TDF64): boolean;
begin
Result:=(x.hi < y.hi) or ((x.hi = y.hi) and (x.lo < y.lo));
end;
operator<=(const X, Y: TDF64): boolean;
begin
Result:=(x.hi < y.hi) or ((x.hi = y.hi) and (x.lo <= y.lo));
end;
operator>(const X, Y: TDF64): boolean;
begin
Result:=not (x <= y);
end;
operator>=(const X, Y: TDF64): boolean;
begin
Result:=not (x < y);
end;
function ToStr(const p:TDF64):String;
const MaxDigits = DIGITS_PER_ELEMENT*4; //9*4 = 36
var v:BigDecimal;
orgl,l:Integer;
begin
if IsNan(p.hi) or IsNan(p.lo) or IsInfinite(p.hi) or IsInfinite(p.lo) then
Exit( FloatToStr(p.hi) );
v:=FloatToBigDecimal(p.hi, bdffExact) + FloatToBigDecimal(p.lo, bdffExact);
// limit number of digits to "MaxDigits"
// 0 digit in "v" is least significant
orgl:=Length(v.digits);
l:=min(orgl*DIGITS_PER_ELEMENT, MaxDigits);
l:=l div DIGITS_PER_ELEMENT;
v.digits:=Copy(v.digits, Length(v.digits)-l, l);
v.exponent:=sign(v.exponent) * (abs(v.exponent) - (orgl - l));
Result:=BigDecimalToStr(v, bdfExact);
end;
function Abs(const x: TDF64): TDF64;
begin
if x >= ZeroDF64 then
Result:=x
else
Result:=-x;
end;
function Max(const x,y:TDF64):TDF64;
begin
if x >= y then
Result:=x
else
Result:=y;
end;
function Min(const x,y:TDF64):TDF64;
begin
if x <= y then
Result:=x
else
Result:=y;
end;
procedure TestUnit;
{$Assertions on}
var p1,p2,p3:TDF64;
s,s2:string;
begin
p1.hi:=1/3;
p1.lo:=0;
p2.hi:=1/3;
p2.lo:=0;
p3:=p1*p2;
s:=ToStr(p3);
assert(s = '0.111111111111111098775299726387149893');
//конвеÑÑаÑÐ¸Ñ Ð¸Ð· double
p1:=1/3;
p2:=1/3;
p3:=p1*p2;
s:=ToStr(p3);
assert(s = '0.111111111111111098775299726387149893');
p1:=ToDF64(123456789012345.0);
p2:=ToDF64(0.123456789012345);
p3:=p1+p2;
s:=ToStr(p3);
assert(s = '123456789012345.123456789012344997');
p1:=ToDF64(123456789012345.0);
p2:=ToDF64(0.123456789012345);
p3:=p1-p2;
s:=ToStr(p3);
assert(s = '123456789012344.876543210987655002');
p1:=ToDF64(1.0);
p2:=ToDF64(3.0);
p3:=p1/p2;
s:=ToStr(p3);
assert(s = '0.333333333333333333333333333333332306');
p1:=ToDF64(1.0);
p2:=ToDF64(3.0);
p3:=-p1/p2;
s:=ToStr(p3);
assert(s = '-0.333333333333333333333333333333332306');
p1:=ToDF64(1.0);
p2:=ToDF64(3.0);
assert(p1 < p2);
assert(p1 <= p2);
p1:=ToDF64(3.0);
p2:=ToDF64(3.0);
assert(p1 = p2);
p1:=ToDF64(3.0);
p2:=ToDF64(1.0);
assert(p1 > p2);
assert(p1 >= p2);
p1:=ToDF64(High(int64));
s:=ToStr(p1);
s2:=IntToStr(High(int64));
assert(s = s2);
p1:=ToDF64(-(High(int64)-1));
s:=ToStr(p1);
s2:=IntToStr(-(High(int64)-1));
assert(s = s2);
p1:=ToDF64(255);
s:=ToStr(p1);
assert(s = '255');
p1:=ToDF64(-255);
s:=ToStr(p1);
assert(s = '-255');
end;
initialization
SetRoundMode(rmNearest); // нÑжно окÑÑгление до ближайÑего Ð´Ð»Ñ FMA
{$IFDEF DEBUG}
TestUnit;
{$ENDIF}
end.
_______________________________________________
fpc-devel maillist - fpc-devel@lists.freepascal.org
https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel