This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new 20d6ea347 Fix xs:float text number parse/unparse edge cases
20d6ea347 is described below
commit 20d6ea3479c85e431faa260424ecb5e2675d7d8c
Author: Steve Lawrence <[email protected]>
AuthorDate: Thu Jan 4 08:52:31 2024 -0500
Fix xs:float text number parse/unparse edge cases
When parsing text floats, ICU sometimes returns a BigInteger, such as
the case when parsing the maximum single precision floating-point value
(i.e. 3.40282347E+38).
The current logic for checking if these parsed BigIntegers are in range
of a float is to convert it to a double and then compare that against
the maximum value of a float (promoted to a double). However, converting
a BigInteger to a double can lead to changes in precision, making this
comparison inaccurate. Looking at bit representations makes this more
clear:
Float.MaxValue.doubleValue converted to bits is:
0 10001111110 1111111111111111111111100000000000000000000000000000
But BigInteger(Float.MaxValue).doubleValue converted to bits is:
0 10001111110 1111111111111111111111100101010011011010111111111000
The rules for converting a float to a double are different than
converting a BigInteger to a double, which leads to changes in precision
and a double value that is slightly bigger than the actual float max
value, leading to out of range errors.
Because converting a BigInteger to a double is unreliable, this adds a
new case for BigInteger, which converts it to a BigDecimal and compares
that against the max value, which avoids precision issues. Note that
this also changes the min/maxBD values to be created from a string
rather than a double, since converting a double to a BigDecimal also has
related precision issues.
Additionally, when unparsing text floats, ICU does not have a way to
unparse single precision floats. Instead, it converts the float to a
double and then unparses that. However, this leads to text numbers with
extra precision that implies more accuracy than exists for floats, and
leads to errors with some round trip tests. To fix this, this converts
the float to a BigDecimal using the String constructor (which maintains
precision), and then has ICU format the BigDecimal. This is more
expensive, but leads to more correct text floats. In practice, xs:float
should probably not be used for text numbers, favoring xs:double
instead, which avoids this issue.
DAFFODIL-2867
---
.../runtime1/ConvertTextStandardNumberUnparser.scala | 20 +++++++++++++++++++-
.../apache/daffodil/runtime1/dpath/NodeInfo.scala | 16 ++++++++++++++--
.../daffodil/section05/simple_types/SimpleTypes.tdml | 13 ++++++++-----
.../daffodil/section23/dfdl_functions/Functions.tdml | 12 ++++--------
.../section05/simple_types/TestSimpleTypes.scala | 4 +---
5 files changed, 46 insertions(+), 19 deletions(-)
diff --git
a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/ConvertTextStandardNumberUnparser.scala
b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/ConvertTextStandardNumberUnparser.scala
index 987ea248c..1049775bc 100644
---
a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/ConvertTextStandardNumberUnparser.scala
+++
b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/ConvertTextStandardNumberUnparser.scala
@@ -17,10 +17,13 @@
package org.apache.daffodil.unparsers.runtime1
+import java.lang.{ Float => JFloat }
import java.lang.{ Number => JNumber }
+import java.math.{ BigDecimal => JBigDecimal }
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Numbers.isZero
+import org.apache.daffodil.runtime1.dpath.NodeInfo
import org.apache.daffodil.runtime1.processors.TextNumberFormatEv
import org.apache.daffodil.runtime1.processors._
import
org.apache.daffodil.runtime1.processors.parsers.TextDecimalVirtualPointMixin
@@ -86,7 +89,22 @@ case class ConvertTextNumberUnparser(
case n: JNumber if zeroRep.isDefined && isZero(n) => zeroRep.get
case _ =>
try {
- df.format(scaledValue)
+ if (
+ (context.optPrimType.get eq NodeInfo.Float) &&
+ JFloat.isFinite(scaledValue.asInstanceOf[Float])
+ ) {
+ // ICU4J has has no format() function that handles a float.
Instead, ICU4J converts
+ // the float to a double, and formats that value. However,
formatting a double as a
+ // string has different precision than formatting a float as a
string, resulting in
+ // extra precision that implies more accuracy than actually exists
for floats, and
+ // can also lead to failures to exactly round. To solve this, we
convert the float
+ // to a String, convert that String to a BigDecimal, and then
format that. This is
+ // more expensive, but ensures we unparse the exact same precision
as represented by
+ // the float.
+ df.format(new JBigDecimal(scaledValue.toString))
+ } else {
+ df.format(scaledValue)
+ }
} catch {
case e: java.lang.ArithmeticException =>
UE(state, "Unable to format number to pattern: %s", e.getMessage())
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dpath/NodeInfo.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dpath/NodeInfo.scala
index 3b2492a05..759ae01e6 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dpath/NodeInfo.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dpath/NodeInfo.scala
@@ -651,13 +651,19 @@ object NodeInfo extends Enum {
trait PrimNumericFloat extends PrimNumeric { self: Numeric.Kind =>
def min: Double
def max: Double
- private lazy val minBD = new JBigDecimal(min)
- private lazy val maxBD = new JBigDecimal(max)
+ def minStr: String
+ def maxStr: String
+ private lazy val minBD = new JBigDecimal(minStr)
+ private lazy val maxBD = new JBigDecimal(maxStr)
def isValid(n: java.lang.Number): Boolean = n match {
case bd: JBigDecimal => {
bd.compareTo(minBD) >= 0 && bd.compareTo(maxBD) <= 0
}
+ case bi: JBigInt => {
+ val bd = new JBigDecimal(bi)
+ bd.compareTo(minBD) >= 0 && bd.compareTo(maxBD) <= 0
+ }
case _ => {
val d = n.doubleValue
(d.isNaN || d.isInfinite) || (d >= min && d <= max)
@@ -681,6 +687,10 @@ object NodeInfo extends Enum {
protected override def fromNumberNoCheck(n: Number): DataValueFloat =
n.floatValue
override val min = -JFloat.MAX_VALUE.doubleValue
override val max = JFloat.MAX_VALUE.doubleValue
+ // we cannot use min/max.toString for minStr/maxStr because min/max are
doubles so
+ // toString would have a precision different than Float.MaxValue.toString
+ override val minStr = "-" + JFloat.MAX_VALUE.toString
+ override val maxStr = JFloat.MAX_VALUE.toString
override val width: MaybeInt = MaybeInt(32)
}
@@ -698,6 +708,8 @@ object NodeInfo extends Enum {
protected override def fromNumberNoCheck(n: Number): DataValueDouble =
n.doubleValue
override val min = -JDouble.MAX_VALUE
override val max = JDouble.MAX_VALUE
+ override val minStr = "-" + JDouble.MAX_VALUE.toString
+ override val maxStr = JDouble.MAX_VALUE.toString
override val width: MaybeInt = MaybeInt(64)
}
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section05/simple_types/SimpleTypes.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section05/simple_types/SimpleTypes.tdml
index 2dda07540..2aabfbe06 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section05/simple_types/SimpleTypes.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section05/simple_types/SimpleTypes.tdml
@@ -140,8 +140,7 @@
<xs:element name="floatTextFail" type="xs:float"
dfdl:lengthKind="implicit" />
<xs:element name="floatTextDelim" type="xs:float"
- dfdl:representation="text" dfdl:lengthKind="delimited"
- dfdl:terminator="%SP;"/>
+ dfdl:representation="text" dfdl:lengthKind="delimited" />
<xs:element name="nonNegIntBin" type="xs:nonNegativeInteger"
dfdl:representation="binary" dfdl:lengthKind="explicit" dfdl:length="{
16 }" />
@@ -3722,10 +3721,14 @@
</tdml:infoset>
</tdml:parserTestCase>
- <tdml:parserTestCase name="float_text_delim" root="floatTextDelim"
+ <tdml:parserTestCase name="float_text_max" root="floatTextDelim"
model="SimpleTypes-Embedded.dfdl.xsd"
description="Section 5 Simple type-float - DFDL-5-008R">
-
- <tdml:document><![CDATA[3.4028235E38]]></tdml:document>
+ <!--
+ We would really prefer to use "3.4028235E38" as the document data (wich
would parse exactly the same),
+ but textNumberPattern is "#,##0.###" so this unparses with all 39 digits
plus commas instead of using
+ an exponent. In order to round trip correctly, we set the document to
the more verbose form
+ -->
+
<tdml:document><![CDATA[340,282,350,000,000,000,000,000,000,000,000,000,000]]></tdml:document>
<tdml:infoset>
<tdml:dfdlInfoset>
<floatTextDelim>3.4028235E38</floatTextDelim>
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_functions/Functions.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_functions/Functions.tdml
index be36ed8ec..a87ce6b4d 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_functions/Functions.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_functions/Functions.tdml
@@ -6333,8 +6333,7 @@
-->
<tdml:parserTestCase name="xPathFunc_round_hte_02" root="round-hte"
- model="XPathFunctions" description="Section 23.5.2 - Standard XPath
Functions - round-half-to-even - DFDL-23-098R"
- roundTrip="twoPass">
+ model="XPathFunctions" description="Section 23.5.2 - Standard XPath
Functions - round-half-to-even - DFDL-23-098R">
<tdml:document>
<tdml:documentPart type="text">3.455,2</tdml:documentPart>
@@ -6360,8 +6359,7 @@
-->
<tdml:parserTestCase name="xPathFunc_round_hte_03" root="round-hte"
- model="XPathFunctions" description="Section 23.5.2 - Standard XPath
Functions - round-half-to-even - DFDL-23-098R"
- roundTrip="twoPass">
+ model="XPathFunctions" description="Section 23.5.2 - Standard XPath
Functions - round-half-to-even - DFDL-23-098R">
<tdml:document>
<tdml:documentPart type="text">3.00865,4</tdml:documentPart>
@@ -6386,8 +6384,7 @@
-->
<tdml:parserTestCase name="xPathFunc_round_hte_04" root="round-hte"
- model="XPathFunctions" description="Section 23.5.2 - Standard XPath
Functions - round-half-to-even - DFDL-23-098R"
- roundTrip="twoPass">
+ model="XPathFunctions" description="Section 23.5.2 - Standard XPath
Functions - round-half-to-even - DFDL-23-098R">
<tdml:document>
<tdml:documentPart type="text">3.00065,4</tdml:documentPart>
@@ -6412,8 +6409,7 @@
-->
<tdml:parserTestCase name="xPathFunc_round_hte_05" root="round-hte"
- model="XPathFunctions" description="Section 23.5.2 - Standard XPath
Functions - round-half-to-even - DFDL-23-098R"
- roundTrip="twoPass">
+ model="XPathFunctions" description="Section 23.5.2 - Standard XPath
Functions - round-half-to-even - DFDL-23-098R">
<tdml:document>
<tdml:documentPart type="text">3.000065,5</tdml:documentPart>
diff --git
a/daffodil-test/src/test/scala/org/apache/daffodil/section05/simple_types/TestSimpleTypes.scala
b/daffodil-test/src/test/scala/org/apache/daffodil/section05/simple_types/TestSimpleTypes.scala
index 7e45770d4..a74e2ce1b 100644
---
a/daffodil-test/src/test/scala/org/apache/daffodil/section05/simple_types/TestSimpleTypes.scala
+++
b/daffodil-test/src/test/scala/org/apache/daffodil/section05/simple_types/TestSimpleTypes.scala
@@ -826,9 +826,7 @@ class TestSimpleTypes {
@Test def test_float_text(): Unit = { runner.runOneTest("float_text") }
@Test def test_float_text2(): Unit = { runner.runOneTest("float_text2") }
@Test def test_float_text3(): Unit = { runner.runOneTest("float_text3") }
-
- // DAFFODIL-2867
- // @Test def test_float_text_delim(): Unit = {
runner.runOneTest("float_text_delim") }
+ @Test def test_float_text_max(): Unit = {
runner.runOneTest("float_text_max") }
@Test def test_float_text_fail(): Unit = {
runner.runOneTest("float_text_fail") }
@Test def test_characterDuringValidFloat(): Unit = {