This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new f2b215939 Remove workaround for ICU-22303, fixed in ICU 78.1
f2b215939 is described below
commit f2b215939325af3f73093dbbc6494104ca14a330
Author: Steve Lawrence <[email protected]>
AuthorDate: Fri Nov 7 06:30:14 2025 -0500
Remove workaround for ICU-22303, fixed in ICU 78.1
ICU 77.1 and older had a bug where it would not correctly parse infinity
or NaN if the text pattern had a decimal and it was required to match a
decimal point (which is enabled with strict parse policy). To workaround
this bug, we added logic to parse a number second time if the first
failed, but with different settings that did not require the decimal
point, which allows it to detect infinity/NaN. But this adds extra
overhead and complexity.
ICU 78.1 fixed this bug, so we can now remove this workaround and avoid
the overhead related to a second parse. This also updates the test that
was added to detect if this bug was fixed, which now detects if ICU has
a regression and breaks the fix.
DAFFODIL-2985
---
.../parsers/ConvertTextStandardNumberParser.scala | 53 +++++++---------------
.../runtime1/processors/input/TestICU.scala | 12 ++---
project/Dependencies.scala | 2 +-
3 files changed, 23 insertions(+), 44 deletions(-)
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ConvertTextStandardNumberParser.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ConvertTextStandardNumberParser.scala
index f370d10ee..609e2a6e5 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ConvertTextStandardNumberParser.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ConvertTextStandardNumberParser.scala
@@ -192,43 +192,22 @@ case class ConvertTextStandardNumberParser(
val pos = new ParsePosition(0)
val icuNum: JNumber = df.parse(strToParse, pos) match {
case null => {
- val infNaN: JDouble =
- if (df.isDecimalPatternMatchRequired) {
- // ICU failed to parse. But there is a bug in ICU4J
(ICU-22303) that if there is
- // a decimal in the pattern and we've set that decimal to be
required (due to
- // strict mode), then it will fail to parse Inf/NaN
representations. As a
- // workaround, we clone the DecimalFormat, disable requiring
the decimal, and
- // reparse. We only accept successful Inf/NaN parses
though--everything else is
- // considered a parse error since it meant the decimal point
was missing or
- // wasn't either inf/nan or a valid number. If ICU fixes this
bug, we should
- // remove this infNan variable and its use, as it is likely
pretty expensive to
- // clone, change a setting, and reparse. Fortunately, it is
only in the error
- // case of strict parsing so should be rare.
- pos.setIndex(0)
- val newDF = df.clone().asInstanceOf[DecimalFormat]
- newDF.setDecimalPatternMatchRequired(false)
- newDF.parse(strToParse, pos) match {
- case d: JDouble => {
- Assert.invariant(d.isNaN || d.isInfinite)
- d
- }
- case _ => null
- }
- } else {
- null
- }
-
- if (infNaN != null) {
- infNaN
- } else {
- PE(
- start,
- "Unable to parse %s from text: %s",
- context.optPrimType.get.globalQName,
- str
- )
- return
- }
+ // null indicates that ICU4J was unable to parse the string to a
number matching the
+ // pattern and properties (e.g. grouping separator, decimal
separator)--this is a
+ // processing error. Note that we do not need to consider that the
string might have
+ // been textStandardInfinityRep or textStandardNaNRep like we had
to do for
+ // textStandardZeroRep above. This is because those are handle by
ICU4J--the
+ // textNumberFormatEv is built with a DecimalFormatSymbols
instance that has the
+ // infinity and NaN representations set based on those properties.
This means that
+ // if df.parse() returns null then we know it was not infinity,
NaN, or a number
+ // matching the pattern/properties
+ PE(
+ start,
+ "Unable to parse %s from text: %s",
+ context.optPrimType.get.globalQName,
+ str
+ )
+ return
}
case d: JDouble => {
// ICU returns a Double only if it parsed NaN, Infinity,
-Infinity, or negative
diff --git
a/daffodil-core/src/test/scala/org/apache/daffodil/runtime1/processors/input/TestICU.scala
b/daffodil-core/src/test/scala/org/apache/daffodil/runtime1/processors/input/TestICU.scala
index 010930f72..1c2ae1520 100644
---
a/daffodil-core/src/test/scala/org/apache/daffodil/runtime1/processors/input/TestICU.scala
+++
b/daffodil-core/src/test/scala/org/apache/daffodil/runtime1/processors/input/TestICU.scala
@@ -270,8 +270,8 @@ class TestICU {
// shows that with parseStrict and decimalPatternMatch required, that ICU
requires or
// disallows a decimal point in the data based on whether or not a decimal
point appears in
- // the pattern. Also shows ICU failing to parse infinity/nan when
decimalPatternMatchRequired
- // is true and the pattern contains a decimal.
+ // the pattern. Also shows that ICU 78.1 fixed ICU-22303, now correctly
parsing infinity/nan
+ // when decimalPatternMatchRequired is true and the pattern contains a
decimal.
@Test def test_decimalPatternMatchRequired(): Unit = {
val dfs = new DecimalFormatSymbols(ULocale.US)
@@ -283,8 +283,8 @@ class TestICU {
assertEquals(JLong.valueOf(1), df.parse("1.0", pp))
assertEquals(null, df.parse("1", pp))
- assertEquals(null, df.parse(dfs.getInfinity, pp)) // see ICU-22303
- assertEquals(null, df.parse(dfs.getNaN, pp)) // see ICU-22303
+ assertEquals(JDouble.POSITIVE_INFINITY, df.parse(dfs.getInfinity, pp)) //
see ICU-22303
+ assertEquals(JDouble.NaN, df.parse(dfs.getNaN, pp)) // see ICU-22303
assertEquals("1.0", df.format(1L))
assertEquals(dfs.getInfinity, df.format(JDouble.POSITIVE_INFINITY))
@@ -305,8 +305,8 @@ class TestICU {
assertEquals(JLong.valueOf(1), df.parse("1.0", pp))
assertEquals(null, df.parse("1", pp))
- assertEquals(null, df.parse(dfs.getInfinity, pp)) // see ICU-22303
- assertEquals(null, df.parse(dfs.getNaN, pp)) // see ICU-22303
+ assertEquals(JDouble.POSITIVE_INFINITY, df.parse(dfs.getInfinity, pp)) //
see ICU-22303
+ assertEquals(JDouble.NaN, df.parse(dfs.getNaN, pp)) // see ICU-22303
assertEquals("1", df.format(1L))
assertEquals(dfs.getInfinity, df.format(JDouble.POSITIVE_INFINITY))
diff --git a/project/Dependencies.scala b/project/Dependencies.scala
index 679149e1e..ef804deba 100644
--- a/project/Dependencies.scala
+++ b/project/Dependencies.scala
@@ -25,7 +25,7 @@ object Dependencies {
"com.lihaoyi" %% "os-lib" % "0.11.5", // for writing/compiling C source
files
"org.scala-lang.modules" %% "scala-xml" % "2.4.0",
"org.scala-lang.modules" %% "scala-parser-combinators" % "2.4.0",
- "com.ibm.icu" % "icu4j" % "77.1",
+ "com.ibm.icu" % "icu4j" % "78.1",
("xerces" % "xercesImpl" % "2.12.2").exclude("xml-apis", "xml-apis"),
"xml-resolver" % "xml-resolver" % "1.2",
"commons-io" % "commons-io" % "2.20.0",