Hello, in my program I have need for checking floating-point precision. I'm internally using floating-points for calculations, but in the end I have to use integer numbers. I cannot use Round() because I have to check for thresholds. I.e. that I wish to accept a value of 1.9999.... as being ">=2". As you can see, I cannot use Trunc() either, so I decided to check for the difference in ULPs (unit in the last place).
I couldn't find the corresponding functions in the RTL, but I think they should be included. I wrote the functions listed below. I would appreciate if the maintainers of FPC decide to add them to the Math unit. I skipped the functions for the Extended type, but if it's being desired, I can add hand them in later. Kind regards, Thomas *** code: const MAX_ULP_SINGLE = 2.028240960E+31; const MAX_ULP_DOUBLE = 1.9958403095347198E+292; function SingleToLongBits (Argument: Single): UInt32; begin Move (Argument, Result, SizeOf(UInt32)); end; {$ifdef FPC_HAS_TYPE_DOUBLE} function DoubleToLongBits (Argument: Double): UInt64; begin Move (Argument, Result, SizeOf(UInt64)); end; {$endif FPC_HAS_TYPE_DOUBLE} function LongBitsToSingle (Argument: UInt32): Single; begin Move (Argument, Result, SizeOf(Single)); end; {$ifdef FPC_HAS_TYPE_DOUBLE} function LongBitsToDouble (Argument: UInt64): Double; begin Move (Argument, Result, SizeOf(Double)); end; {$endif FPC_HAS_TYPE_DOUBLE} // returns the first floating-point argument with the sign of the second floating-point argument function CopySign (x: Single; y: Single): Single; begin Exit (Abs(x) * Sign(y)); end; {$ifdef FPC_HAS_TYPE_DOUBLE} // returns the first floating-point argument with the sign of the second floating-point argument function CopySign (x: Double; y: Double): Double; begin Exit (Abs(x) * Sign(y)); end; {$endif FPC_HAS_TYPE_DOUBLE} function IsSameSign (x: Single; y: Single): Boolean; begin Exit (CopySign(x, y) = x); end; {$ifdef FPC_HAS_TYPE_DOUBLE} function IsSameSign (x: Double; y: Double): Boolean; begin Exit (CopySign(x, y) = x); end; {$endif FPC_HAS_TYPE_DOUBLE} function NextAfter (Start: Single; Direction: Single): Single; var absStart: Single; absDir: Single; toZero: Boolean; begin // If either argument is a NaN, then NaN is returned. if IsNan(Start) OR IsNan(Direction) then Exit (NaN); // If both arguments compare as equal the second argument is returned. if (Start = Direction) THEN Exit (Direction); absStart := Abs(Start); absDir := Abs(Direction); toZero := NOT IsSameSign (Start, Direction) OR (AbsDir < absStart); if (toZero) then begin // we are reducing the magnitude, going toward zero. if (absStart = MinSingle) then Exit (CopySign (0.0, Start)); if IsInfinite(absStart) THEN Exit (CopySign (MaxSingle, Start)); Exit (CopySign (LongBitsToSingle (Pred(SingleToLongBits(absStart))), Start)); end else begin // we are increasing the magnitude, toward +-Infinity if (Start = 0.0) then Exit (CopySign (MinSingle, Direction)); if (absStart = MaxSingle) then Exit (CopySign (Infinity, Start)); Exit (CopySign (LongBitsToSingle(Succ(SingleToLongBits(absStart))), Start)); end; end; {$ifdef FPC_HAS_TYPE_DOUBLE} function NextAfter (Start: Double; Direction: Double): Double; var absStart: Double; absDir: Double; toZero: Boolean; begin // If either argument is a NaN, then NaN is returned. if IsNan(Start) OR IsNan(Direction) then Exit (NaN); // If both arguments compare as equal the second argument is returned. if (Start = Direction) THEN Exit (Direction); absStart := Abs(Start); absDir := Abs(Direction); toZero := NOT IsSameSign (Start, Direction) OR (AbsDir < AbsStart); if (toZero) then begin // we are reducing the magnitude, going toward zero. if (absStart = MinDouble) then Exit (CopySign (0.0, Start)); if IsInfinite(absStart) THEN Exit (CopySign (MaxDouble, Start)); Exit (CopySign (LongBitsToDouble (Pred(DoubleToLongBits(absStart))), Start)); end else begin // we are increasing the magnitude, toward +-Infinity if (Start = 0.0) then Exit (CopySign (MinDouble, Direction)); if (AbsStart = MaxDouble) then Exit (CopySign (Infinity, Start)); Exit (CopySign (LongBitsToDouble(Succ(DoubleToLongBits(absStart))), start)); end; end; {$endif FPC_HAS_TYPE_DOUBLE} function FloatPrior (Argument: Single): Single; begin Exit (NextAfter (Argument, NegInfinity)); end; function FloatNext (Argument: Single): Single; begin Exit (NextAfter (Argument, Infinity)); end; {$ifdef FPC_HAS_TYPE_DOUBLE} function FloatPrior (Argument: Double): Double; begin Exit (NextAfter (Argument, NegInfinity)); end; {$endif FPC_HAS_TYPE_DOUBLE} {$ifdef FPC_HAS_TYPE_DOUBLE} function FloatNext (Argument: Double): Double; begin Exit (NextAfter (Argument, Infinity)); end; {$endif FPC_HAS_TYPE_DOUBLE} function ULP (Argument: Single): Single; begin // If the argument is NaN, then the result is NaN. if IsNaN (Argument) then Exit (NaN); // If the argument is positive or negative infinity, then the result is positive infinity. if IsInfinite (Argument) then Exit (Infinity); // If the argument is positive or negative zero, then the result is Single.MIN_VALUE. if (Argument = 0.0) then Exit (MinSingle); Argument := Abs(Argument); // If the argument is ±Single.MAX_VALUE, then the result is equal to 2^104. if (Argument = MaxSingle) then Exit (MAX_ULP_SINGLE); Exit (FloatNext(Argument) - Argument); end; {$ifdef FPC_HAS_TYPE_DOUBLE} function ULP (Argument: Double): Double; begin // If the argument is NaN, then the result is NaN. if IsNaN (Argument) then Exit (NaN); // If the argument is positive or negative infinity, then the result is positive infinity. if IsInfinite (Argument) then Exit (Infinity); // If the argument is positive or negative zero, then the result is Double.MIN_VALUE. if (Argument = 0.0) then Exit (MinDouble); Argument := Abs(Argument); // If the argument is ±Double.MAX_VALUE, then the result is equal to 2^971. if (Argument = MaxDouble) then Exit (MAX_ULP_DOUBLE); Exit (FloatNext(Argument) - Argument); end; {$endif FPC_HAS_TYPE_DOUBLE} _______________________________________________ fpc-pascal maillist - fpc-pascal@lists.freepascal.org https://lists.freepascal.org/cgi-bin/mailman/listinfo/fpc-pascal