> Am 27.09.20 um 18:21 schrieb Florian Klämpfl via fpc-devel:
> So we would need softfloat extended support. This is doable with one
> major obstacle: the "irrational" functions like ld, sin etc.: they need
> precise enough implementations for 80 bit (actually, just adding and
> using in the compiler 128 bit softfloat support would be even better).
> And this is probably something which is really hard. While I believe ;)
> I can solve most coding challenges, this is something I fear to touch :)

Well. Intel arch supports FMA instruction sets since 2013 (Intel) and since 
2012 (AMD).
See https://en.wikipedia.org/wiki/FMA_instruction_set

Based on FMA support it's quite easy to create 128 bits (almost) semi-soft 
floats.
Julia guys have a nice library called DoubleFloats 
(https://github.com/JuliaMath/DoubleFloats.jl, MIT licensed)
They have a lot of trigonometric and other "complex" stuff already implemented.

So, maybe it's not that frightening after all :)
See my own small port attached. Russian comments ahead so beware :)

-- Regards,
Denis Golovan
unit uDF64;

{
   модуль для работы с Double F64 числами
   https://github.com/JuliaMath/DoubleFloats.jl
}

{$mode objfpc}{$H+}
{$ASMMODE INTEL}
{$modeswitch advancedrecords}
{$FPUType SSE42} //почему-то быстрее AVX/AVX2
{$Optimization DFA}

interface

uses
  Classes, SysUtils;

type
  TDF64 = record
    hi,lo: Double;
  end;
  PDF64 = ^TDF64;
  PTDF64 = ^TDF64; //упрощение для макросов

operator -(const X : TDF64):TDF64;inline;
operator +(const A,B : TDF64):TDF64;
operator -(const A,B : TDF64):TDF64;
operator *(const A,B : TDF64):TDF64;
operator /(const X,Y : TDF64):TDF64;inline;

operator :=(const A:Double):TDF64;inline;
operator :=(const A:TDF64):Double;inline;
operator :=(const A:TDF64):NativeInt;inline;

operator=(const X, Y: TDF64): boolean;inline;
operator<(const X, Y: TDF64): boolean;inline;
operator<=(const X, Y: TDF64): boolean;inline;
operator>(const X, Y: TDF64): boolean;inline;
operator>=(const X, Y: TDF64): boolean;inline;

function Neg(const x:TDF64):TDF64;overload;inline;
function Inv(const x:TDF64):TDF64;overload;inline;
function Abs(const x:TDF64):TDF64;overload;inline;
function Max(const x,y:TDF64):TDF64;overload;inline;
function Min(const x,y:TDF64):TDF64;overload;inline;
function FMA(a,b,c:Double):Double;

function ToDF64(const a:Double):TDF64;inline;overload;
function ToDF64(const a:Int64):TDF64;inline;overload;
function ToF64(const a:TDF64):Double;inline;overload;
function ToStr(const p:TDF64):String;overload;

const ZeroDF64:TDF64 = (hi:0.0; lo:0.0);
const OneDF64:TDF64 = (hi:1.0; lo:0.0);

implementation
uses math, bigdecimalmath;

{$warn 2005 off} // disable "Comment level 2 found" warnings.
// $fpc -B -vwhilq -Fu./bigdecimalmath/ project1.lpr
// ... to see numbers for warnings

{
function FMA(a,b,c:Double):Double;
begin
  // SetRoundMode(rmNearest); - нужно округление до ближайшего
  asm
    movlpd    xmm0, [A]
    movlpd    xmm1, [B]
    movlpd    xmm2, [C]
    vfmadd213sd xmm0, xmm1, xmm2 //a = a*b + c
    //movhlps xmm1, xmm0
    //vzeroupper
  end;
end;
}
function FMA(a,b,c:Double):Double;assembler;nostackframe;
asm
  // SetRoundMode(rmNearest); - нужно округление до ближайшего
  vfmadd213sd xmm0, xmm1, xmm2 //a = a*b + c
end;

function quick_two_sum(const a,b:Double): TDF64;inline;
begin
  Result.hi:=a+b;
  Result.lo:=b-(Result.hi-a);
end;

{
function TwoSum(a::T, b::T) where {T<:AbstractFloat}
    s  = a + b
    a1 = s - b
    b1 = s - a1
    da = a - a1
    db = b - b1
    t  = da + db
    return s, t
end
}

function two_sum(const a, b:Double):TDF64;inline;
var hi, lo, a1, b1: Double;
begin
  hi := a + b;
  a1 := hi - b;
  b1 := hi - a1;
  lo := (a - a1) + (b - b1);
  Result.hi := hi;
  Result.lo := lo;
end;

(*
@inline function two_hilo_sum(a::T, b::T) where {T<:FloatWithFMA}
    s = a + b
    e = b - (s - a)
    return s, e
end
*)
function two_hilo_sum(const a, b:Double):TDF64;inline;
begin
  Result.hi:=a+b;
  Result.lo:=b - (Result.hi - a)
end;

function two_prod(const a,b:Double): TDF64;inline;
begin
  Result.hi:=a*b;
  Result.lo:=FMA(a,b,-Result.hi);
end;

(*
@inline function two_diff(a::T, b::T) where {T<:FloatWithFMA}
    hi = a - b
    a1 = hi + b
    b1 = hi - a1
    lo = (a - a1) - (b + b1)
    return hi, lo
end
*)
function two_diff(const a,b:Double): TDF64;inline;
var a1,b1:Double;
begin
  Result.hi:=a - b;
  a1:=Result.hi + b;
  b1:=Result.hi - a1;
  Result.lo:=(a - a1) - (b + b1);
end;

(*
 @inline function add_dddd_dd(x::Tuple{T,T}, y::Tuple{T,T}) where T<:IEEEFloat
    xhi, xlo = x
    yhi, ylo = y
    hi, lo = two_sum(xhi, yhi)
    thi, tlo = two_sum(xlo, ylo)
    c = lo + thi
    hi, lo = two_hilo_sum(hi, c)
    c = tlo + lo
    hi, lo = two_hilo_sum(hi, c)
    return hi, lo
end
*)
operator +(const A,B : TDF64):TDF64;
var p1, p2:TDF64;
    c:Double;
begin
  p1:=two_sum(a.hi, b.hi);
  p2:=two_sum(a.lo, b.lo);

  c := p1.lo + p2.hi;
  p1 := two_hilo_sum(p1.hi, c);
  c := p2.lo + p1.lo;
  p1 := two_hilo_sum(p1.hi, c);

  Result:=p1;
end;

(*
@inline function sub_dddd_dd(x::Tuple{T,T}, y::Tuple{T,T}) where T<:IEEEFloat
    xhi, xlo = x
    yhi, ylo = y
    hi, lo = two_diff(xhi, yhi)
    thi, tlo = two_diff(xlo, ylo)
    c = lo + thi# Algorithm 9 from Tight and rigourous error bounds. relative error <= 2u²
    hi, lo = two_hilo_sum(hi, c)
    c = tlo + lo
    hi, lo = two_hilo_sum(hi, c)
    return hi, lo
end
*)
operator -(const A,B : TDF64):TDF64;
var p1, p2:TDF64;
    c:Double;
begin
  p1:=two_diff(a.hi, b.hi);
  p2:=two_diff(a.lo, b.lo);

  c := p1.lo + p2.hi;
  p1 := two_hilo_sum(p1.hi, c);
  c := p2.lo + p1.lo;
  p1 := two_hilo_sum(p1.hi, c);

  Result:=p1;
end;

operator *(const A,B : TDF64):TDF64;
var p:TDF64;
begin
  p:= two_prod(a.hi, b.hi);
  p.lo := p.lo + a.hi * b.lo + a.lo * b.hi;
  p:= quick_two_sum(p.hi, p.lo);
  Result:=p;
end;

{
function TwoSum(a::T, b::T) where {T<:AbstractFloat}
    s  = a + b
    a1 = s - b
    b1 = s - a1
    da = a - a1
    db = b - b1
    t  = da + db
    return s, t
end

@inline function Fast2Sum(a::T, b::T) where {T<:AbstractFloat}
    s = a + b
    z = s - a
    t = b - z
    return s, t
end

@inline function Fast2Mult(a::T, b::T) where {T<:AbstractFloat}
    s = a * b
    t = fma(a, b, -s)
    return s, t
end

@inline function DWTimesFP3(xₕᵢ::T, xₗₒ::T, y::T) where {T<:AbstractFloat}
    cₕᵢ, cₗₒ = Fast2Mult(xₕᵢ, y)
    c = fma(xₗₒ, y, cₗₒ)
    zₕᵢ, zₗₒ = Fast2Sum(cₕᵢ, c)
    return zₕᵢ, zₗₒ
end

@inline function DWPlusFP(xₕᵢ::T, xₗₒ::T, y::T) where {T<:AbstractFloat}
    sₕᵢ, sₗₒ = TwoSum(xₕᵢ, y)
    v = xₗₒ + sₗₒ
    zₕᵢ, zₗₒ = TwoSum(sₕᵢ, v)
    return zₕᵢ, zₗₒ
end

function DWInvDW3(yₕᵢ::T, yₗₒ::T) where {T<:AbstractFloat}
   tₕᵢ = inv(yₕᵢ)
   rₕᵢ = fma(yₕᵢ, -tₕᵢ, one(T))
   rₗₒ = -(yₗₒ * tₕᵢ)
   eₕᵢ, eₗₒ = Fast2Sum(rₕᵢ, rₗₒ)
   dₕᵢ, dₗₒ = DWTimesFP3(eₕᵢ, eₗₒ, tₕᵢ)
   zₕᵢ, zₗₒ = DWPlusFP(dₕᵢ, dₗₒ, tₕᵢ)
   return zₕᵢ, zₗₒ
end
}

function Fast2Sum(const a:Double; const b:Double):TDF64;inline;
var z:Double;
begin
  Result.hi:=a + b;
  z:=Result.hi - a;
  Result.lo:=b - z;
end;

function Fast2Mult(const a:Double; const b:Double):TDF64;inline;
begin
  Result.hi:=a * b;
  Result.lo:=fma(a, b, -Result.hi);
end;

function DWTimesFP3(const xhi, xlo, y:Double):TDF64;inline;
var C:TDF64;
    xc:Double;
begin
  C := Fast2Mult(xhi, y);
  xc := fma(xlo, y, c.lo);
  Result := Fast2Sum(c.hi, xc);
end;

function DWPlusFP(const xhi, xlo, y:Double):TDF64;inline;
var S:TDF64;
    v:Double;
begin
  S := two_sum(xhi, y);
  v := xlo + S.lo;
  Result := two_sum(s.hi, v);
end;

function Inv(const x:Double):Double;inline;
begin
  Result:=Double(1.0)/x;
end;

function Neg(const x: TDF64): TDF64;
begin
  Result.hi:=-x.hi;
  Result.lo:=-x.lo;
end;

operator-(const X: TDF64): TDF64;
begin
  Result:=Neg(x);
end;

function Inv(const x:TDF64):TDF64;inline;
var thi, rhi, rlo:Double;
    e, d:TDF64;
begin
  // DWInvDW3
  thi := Inv(x.hi);
  rhi := fma(x.hi, -thi, Double(1.0));
  rlo := -(x.lo * thi);
  e := Fast2Sum(rhi, rlo);
  d := DWTimesFP3(e.hi, e.lo, thi);
  Result := DWPlusFP(d.hi, d.lo, thi);
end;

{
@inline function dvi_dddd_dd(x::Tuple{T,T}, y::Tuple{T,T}) where {T<:IEEEFloat}
    xhi, xlo = x
    yhi, ylo = y
    hi = xhi / yhi
    uh, ul = two_prod(hi, yhi)
    lo = ((((xhi - uh) - ul) + xlo) - hi*ylo)/yhi
    hi,lo = two_hilo_sum(hi, lo)
    return hi, lo
end
}

function div_DF64(const X,Y:TDF64):TDF64;
var hi, lo: Double;
    u: TDF64;
begin
  hi := x.hi / y.hi;
  u := two_prod(hi, y.hi);
  lo := ((((x.hi - u.hi) - u.lo) + x.lo) - hi*y.lo)/y.hi;
  Result := two_hilo_sum(hi, lo);
end;

operator /(const X,Y : TDF64):TDF64;inline;
begin
  //Result:=X*inv(Y);
  Result:=div_DF64(X, Y);
end;

function ToDF64(const a:Double):TDF64;overload;
begin
  Result.hi:=a;
  Result.lo:=0;
end;

function ToDF64(const a: Int64): TDF64;overload;
var abs_a:Int64;
begin
  if a >= 0 then
    begin
      Result.hi:=a and $FFFFFFFF00000000;
      Result.lo:=a and $00000000FFFFFFFF;
    end
  else
    begin
      abs_a:=-a;
      Result.hi:=-Double(abs_a and $FFFFFFFF00000000);
      Result.lo:=-Double(abs_a and $00000000FFFFFFFF);
    end;
end;

function ToF64(const a: TDF64): Double;
begin
  Result:=a.hi;
end;

operator:=(const A: Double): TDF64;
begin
  Result:=ToDF64(A);
end;

operator:=(const A: TDF64): NativeInt;
var f:Double;
begin
  f:=ToF64(A);
  if f > High(NativeInt) then
    Result:=High(NativeInt)
  else if f < Low(NativeInt) then
    Result:=Low(NativeInt)
  else
    Result:=Trunc(f);
end;

operator:=(const A: TDF64): Double;
begin
  Result:=ToF64(A);
end;

operator=(const X, Y: TDF64): boolean;
begin
  Result:=(x.hi = y.hi) and (x.lo = y.lo);
end;

operator<(const X, Y: TDF64): boolean;
begin
  Result:=(x.hi < y.hi) or ((x.hi = y.hi) and (x.lo < y.lo));
end;

operator<=(const X, Y: TDF64): boolean;
begin
  Result:=(x.hi < y.hi) or ((x.hi = y.hi) and (x.lo <= y.lo));
end;

operator>(const X, Y: TDF64): boolean;
begin
  Result:=not (x <= y);
end;

operator>=(const X, Y: TDF64): boolean;
begin
  Result:=not (x < y);
end;

function ToStr(const p:TDF64):String;
const MaxDigits = DIGITS_PER_ELEMENT*4; //9*4 = 36
var v:BigDecimal;
    orgl,l:Integer;
begin
  if IsNan(p.hi) or IsNan(p.lo) or IsInfinite(p.hi) or IsInfinite(p.lo) then
    Exit( FloatToStr(p.hi) );

  v:=FloatToBigDecimal(p.hi, bdffExact) + FloatToBigDecimal(p.lo, bdffExact);

  // limit number of digits to "MaxDigits"
  // 0 digit in "v" is least significant
  orgl:=Length(v.digits);
  l:=min(orgl*DIGITS_PER_ELEMENT, MaxDigits);
  l:=l div DIGITS_PER_ELEMENT;
  v.digits:=Copy(v.digits, Length(v.digits)-l, l);
  v.exponent:=sign(v.exponent) * (abs(v.exponent) - (orgl - l));

  Result:=BigDecimalToStr(v, bdfExact);
end;

function Abs(const x: TDF64): TDF64;
begin
  if x >= ZeroDF64 then
    Result:=x
    else
    Result:=-x;
end;

function Max(const x,y:TDF64):TDF64;
begin
  if x >= y then
    Result:=x
    else
    Result:=y;
end;

function Min(const x,y:TDF64):TDF64;
begin
  if x <= y then
    Result:=x
    else
    Result:=y;
end;

procedure TestUnit;
{$Assertions on}
var p1,p2,p3:TDF64;
    s,s2:string;
begin
  p1.hi:=1/3;
  p1.lo:=0;
  p2.hi:=1/3;
  p2.lo:=0;
  p3:=p1*p2;
  s:=ToStr(p3);
  assert(s = '0.111111111111111098775299726387149893');

  //конвертация из double
  p1:=1/3;
  p2:=1/3;
  p3:=p1*p2;
  s:=ToStr(p3);
  assert(s = '0.111111111111111098775299726387149893');

  p1:=ToDF64(123456789012345.0);
  p2:=ToDF64(0.123456789012345);
  p3:=p1+p2;
  s:=ToStr(p3);
  assert(s = '123456789012345.123456789012344997');

  p1:=ToDF64(123456789012345.0);
  p2:=ToDF64(0.123456789012345);
  p3:=p1-p2;
  s:=ToStr(p3);
  assert(s = '123456789012344.876543210987655002');

  p1:=ToDF64(1.0);
  p2:=ToDF64(3.0);
  p3:=p1/p2;
  s:=ToStr(p3);
  assert(s = '0.333333333333333333333333333333332306');

  p1:=ToDF64(1.0);
  p2:=ToDF64(3.0);
  p3:=-p1/p2;
  s:=ToStr(p3);
  assert(s = '-0.333333333333333333333333333333332306');

  p1:=ToDF64(1.0);
  p2:=ToDF64(3.0);
  assert(p1 < p2);
  assert(p1 <= p2);

  p1:=ToDF64(3.0);
  p2:=ToDF64(3.0);
  assert(p1 = p2);

  p1:=ToDF64(3.0);
  p2:=ToDF64(1.0);
  assert(p1 > p2);
  assert(p1 >= p2);

  p1:=ToDF64(High(int64));
  s:=ToStr(p1);
  s2:=IntToStr(High(int64));
  assert(s = s2);

  p1:=ToDF64(-(High(int64)-1));
  s:=ToStr(p1);
  s2:=IntToStr(-(High(int64)-1));
  assert(s = s2);

  p1:=ToDF64(255);
  s:=ToStr(p1);
  assert(s = '255');

  p1:=ToDF64(-255);
  s:=ToStr(p1);
  assert(s = '-255');
end;

initialization
  SetRoundMode(rmNearest); // нужно округление до ближайшего для FMA
  {$IFDEF DEBUG}
  TestUnit;
  {$ENDIF}

end.

_______________________________________________
fpc-devel maillist  -  fpc-devel@lists.freepascal.org
https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-devel

Reply via email to