(daffodil) branch main updated: Remove workaround for ICU-22303, fixed in ICU 78.1

slawrence Thu, 13 Nov 2025 11:38:36 -0800

This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git



The following commit(s) were added to refs/heads/main by this push:
     new f2b215939 Remove workaround for ICU-22303, fixed in ICU 78.1
f2b215939 is described below

commit f2b215939325af3f73093dbbc6494104ca14a330
Author: Steve Lawrence <[email protected]>
AuthorDate: Fri Nov 7 06:30:14 2025 -0500

    Remove workaround for ICU-22303, fixed in ICU 78.1
    
    ICU 77.1 and older had a bug where it would not correctly parse infinity
    or NaN if the text pattern had a decimal and it was required to match a
    decimal point (which is enabled with strict parse policy). To workaround
    this bug, we added logic to parse a number second time if the first
    failed, but with different settings that did not require the decimal
    point, which allows it to detect infinity/NaN. But this adds extra
    overhead and complexity.
    
    ICU 78.1 fixed this bug, so we can now remove this workaround and avoid
    the overhead related to a second parse. This also updates the test that
    was added to detect if this bug was fixed, which now detects if ICU has
    a regression and breaks the fix.
    
    DAFFODIL-2985
---
 .../parsers/ConvertTextStandardNumberParser.scala  | 53 +++++++---------------
 .../runtime1/processors/input/TestICU.scala        | 12 ++---
 project/Dependencies.scala                         |  2 +-
 3 files changed, 23 insertions(+), 44 deletions(-)

diff --git 
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ConvertTextStandardNumberParser.scala
 
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ConvertTextStandardNumberParser.scala
index f370d10ee..609e2a6e5 100644
--- 
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ConvertTextStandardNumberParser.scala
+++ 
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ConvertTextStandardNumberParser.scala
@@ -192,43 +192,22 @@ case class ConvertTextStandardNumberParser(
         val pos = new ParsePosition(0)
         val icuNum: JNumber = df.parse(strToParse, pos) match {
           case null => {
-            val infNaN: JDouble =
-              if (df.isDecimalPatternMatchRequired) {
-                // ICU failed to parse. But there is a bug in ICU4J 
(ICU-22303) that if there is
-                // a decimal in the pattern and we've set that decimal to be 
required (due to
-                // strict mode), then it will fail to parse Inf/NaN 
representations. As a
-                // workaround, we clone the DecimalFormat, disable requiring 
the decimal, and
-                // reparse. We only accept successful Inf/NaN parses 
though--everything else is
-                // considered a parse error since it meant the decimal point 
was missing or
-                // wasn't either inf/nan or a valid number. If ICU fixes this 
bug, we should
-                // remove this infNan variable and its use, as it is likely 
pretty expensive to
-                // clone, change a setting, and reparse. Fortunately, it is 
only in the error
-                // case of strict parsing so should be rare.
-                pos.setIndex(0)
-                val newDF = df.clone().asInstanceOf[DecimalFormat]
-                newDF.setDecimalPatternMatchRequired(false)
-                newDF.parse(strToParse, pos) match {
-                  case d: JDouble => {
-                    Assert.invariant(d.isNaN || d.isInfinite)
-                    d
-                  }
-                  case _ => null
-                }
-              } else {
-                null
-              }
-
-            if (infNaN != null) {
-              infNaN
-            } else {
-              PE(
-                start,
-                "Unable to parse %s from text: %s",
-                context.optPrimType.get.globalQName,
-                str
-              )
-              return
-            }
+            // null indicates that ICU4J was unable to parse the string to a 
number matching the
+            // pattern and properties (e.g. grouping separator, decimal 
separator)--this is a
+            // processing error. Note that we do not need to consider that the 
string might have
+            // been textStandardInfinityRep or textStandardNaNRep like we had 
to do for
+            // textStandardZeroRep above. This is because those are handle by 
ICU4J--the
+            // textNumberFormatEv is built with a DecimalFormatSymbols 
instance that has the
+            // infinity and NaN representations set based on those properties. 
This means that
+            // if df.parse() returns null then we know it was not infinity, 
NaN, or a number
+            // matching the pattern/properties
+            PE(
+              start,
+              "Unable to parse %s from text: %s",
+              context.optPrimType.get.globalQName,
+              str
+            )
+            return
           }
           case d: JDouble => {
             // ICU returns a Double only if it parsed NaN, Infinity, 
-Infinity, or negative
diff --git 
a/daffodil-core/src/test/scala/org/apache/daffodil/runtime1/processors/input/TestICU.scala
 
b/daffodil-core/src/test/scala/org/apache/daffodil/runtime1/processors/input/TestICU.scala
index 010930f72..1c2ae1520 100644
--- 
a/daffodil-core/src/test/scala/org/apache/daffodil/runtime1/processors/input/TestICU.scala
+++ 
b/daffodil-core/src/test/scala/org/apache/daffodil/runtime1/processors/input/TestICU.scala
@@ -270,8 +270,8 @@ class TestICU {
 
   // shows that with parseStrict and decimalPatternMatch required, that ICU 
requires or
   // disallows a decimal point in the data based on whether or not a decimal 
point appears in
-  // the pattern. Also shows ICU failing to parse infinity/nan when 
decimalPatternMatchRequired
-  // is true and the pattern contains a decimal.
+  // the pattern. Also shows that ICU 78.1 fixed ICU-22303, now correctly 
parsing infinity/nan
+  // when decimalPatternMatchRequired is true and the pattern contains a 
decimal.
   @Test def test_decimalPatternMatchRequired(): Unit = {
     val dfs = new DecimalFormatSymbols(ULocale.US)
 
@@ -283,8 +283,8 @@ class TestICU {
 
     assertEquals(JLong.valueOf(1), df.parse("1.0", pp))
     assertEquals(null, df.parse("1", pp))
-    assertEquals(null, df.parse(dfs.getInfinity, pp)) // see ICU-22303
-    assertEquals(null, df.parse(dfs.getNaN, pp)) // see ICU-22303
+    assertEquals(JDouble.POSITIVE_INFINITY, df.parse(dfs.getInfinity, pp)) // 
see ICU-22303
+    assertEquals(JDouble.NaN, df.parse(dfs.getNaN, pp)) // see ICU-22303
 
     assertEquals("1.0", df.format(1L))
     assertEquals(dfs.getInfinity, df.format(JDouble.POSITIVE_INFINITY))
@@ -305,8 +305,8 @@ class TestICU {
 
     assertEquals(JLong.valueOf(1), df.parse("1.0", pp))
     assertEquals(null, df.parse("1", pp))
-    assertEquals(null, df.parse(dfs.getInfinity, pp)) // see ICU-22303
-    assertEquals(null, df.parse(dfs.getNaN, pp)) // see ICU-22303
+    assertEquals(JDouble.POSITIVE_INFINITY, df.parse(dfs.getInfinity, pp)) // 
see ICU-22303
+    assertEquals(JDouble.NaN, df.parse(dfs.getNaN, pp)) // see ICU-22303
 
     assertEquals("1", df.format(1L))
     assertEquals(dfs.getInfinity, df.format(JDouble.POSITIVE_INFINITY))
diff --git a/project/Dependencies.scala b/project/Dependencies.scala
index 679149e1e..ef804deba 100644
--- a/project/Dependencies.scala
+++ b/project/Dependencies.scala
@@ -25,7 +25,7 @@ object Dependencies {
     "com.lihaoyi" %% "os-lib" % "0.11.5", // for writing/compiling C source 
files
     "org.scala-lang.modules" %% "scala-xml" % "2.4.0",
     "org.scala-lang.modules" %% "scala-parser-combinators" % "2.4.0",
-    "com.ibm.icu" % "icu4j" % "77.1",
+    "com.ibm.icu" % "icu4j" % "78.1",
     ("xerces" % "xercesImpl" % "2.12.2").exclude("xml-apis", "xml-apis"),
     "xml-resolver" % "xml-resolver" % "1.2",
     "commons-io" % "commons-io" % "2.20.0",

(daffodil) branch main updated: Remove workaround for ICU-22303, fixed in ICU 78.1

Reply via email to