This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new cdb41212d Use the negative pattern of textNumberPattern to resolve
padding ambiguities
cdb41212d is described below
commit cdb41212d755c45f23f981c395dd24ab56303b45
Author: Steve Lawrence <[email protected]>
AuthorDate: Thu Jan 11 12:58:10 2024 -0500
Use the negative pattern of textNumberPattern to resolve padding ambiguities
ICU only uses the positive pattern of the textNumberPattern property to
determine pad character and position, completely ignoring the negative
pattern. If the positive pattern has no affix associated with the pad
character, then there is an ambiguity if the pad character should appear
before or after the negative affix when unparsing negative numbers. In
this case, ICU defaults to before the affix, with no way to change it
via the pattern. This effectively means it is not possible for ICU
number padding to appear after a negative affix if there is no positive
affix.
To resolve this ambiguity and allow configuring where pad characters
appear, we inspect the negative pattern. If both negative and positive
patterns define padding on the same affix, and the positive pattern has
an empty string for that affix, then we use the pad position from the
negative pattern. In all other cases, the pad character in the negative
pattern is ignored following usual ICU behavior.
For example, a textNumberPattern of "*0####0;-*00" formats a negative
number with zero padding after the hyphen, whereas normal ICU behavior
would ignore the negative pattern and zero pad before the hyphen.
Deprecation/Compatibility:
The pad character in the negative part of textNumberPattern is no longer
ignored if the positive part of textNumberPattern defines a pad
character without an associated affix (e.g. "*0###0;-*00"). In these
cases, the position of the pad charcter in the negative part is used to
define whether padding occurs before or after the negative affix. All
other cases follow existing rules of textNumberPattern (i.e. the pad
character in the negative part is ignored).
DAFFODIL-2871
---
.../grammar/primitives/PrimitivesTextNumber.scala | 51 ++++++++++++++++++----
.../core/grammar/primitives/PrimitivesZoned.scala | 2 +
.../runtime1/processors/EvTextNumber.scala | 6 +++
.../text_number_props/TextNumberProps.tdml | 36 +++++++++++++++
.../text_number_props/TestTextNumberProps.scala | 7 +++
5 files changed, 94 insertions(+), 8 deletions(-)
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala
index 648d656b8..95da5f677 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala
@@ -28,6 +28,7 @@ import
org.apache.daffodil.lib.schema.annotation.props.gen.TextNumberRounding
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe._
import org.apache.daffodil.lib.util.MaybeDouble
+import org.apache.daffodil.lib.util.MaybeInt
import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
import org.apache.daffodil.runtime1.processors.Delimiter
import org.apache.daffodil.runtime1.processors.TextNumberFormatEv
@@ -38,6 +39,10 @@ import
org.apache.daffodil.runtime1.processors.unparsers.Unparser
import org.apache.daffodil.unparsers.runtime1.ConvertTextCombinatorUnparser
import org.apache.daffodil.unparsers.runtime1.ConvertTextNumberUnparser
+import com.ibm.icu.impl.number.AffixPatternProvider
+import com.ibm.icu.impl.number.Padder.PadPosition
+import com.ibm.icu.impl.number.PatternStringParser
+import com.ibm.icu.impl.number.PatternStringParser.ParsedPatternInfo
import com.ibm.icu.text.DecimalFormat
case class ConvertTextCombinator(e: ElementBase, value: Gram, converter: Gram)
@@ -399,14 +404,20 @@ trait ConvertTextNumberMixin {
} else pattern
}
- final protected def checkPatternWithICU(e: ElementBase) = {
- // Load the pattern to make sure it is valid
+ /**
+ * Validates the textNumberPattern using ICU's PatternStringParser. Although
this class is
+ * public, it is not part of the ICU API, so it maybe not be stable.
However, this is what
+ * DecimalFormat uses internally to parse patterns and extract information
for initialization,
+ * and that likely won't change significantly. Plus, by using this class
instead of parsing
+ * with DeciamlFormat we can return the ParsedPatternInfo to give callers
raw access to what
+ * was in the pattern without having to parse it ourselves. This can be
useful for additional
+ * validation or logic using parts of the pattern ICU might normally ignore.
+ */
+ final protected def checkPatternWithICU(e: ElementBase): ParsedPatternInfo =
{
try {
- if (hasV || hasP) {
- new DecimalFormat(runtimePattern)
- } else {
- new DecimalFormat(pattern)
- }
+ val patternToCheck = if (hasV || hasP) runtimePattern else pattern
+ val parsedPatternInfo =
PatternStringParser.parseToPatternInfo(patternToCheck)
+ parsedPatternInfo
} catch {
case ex: IllegalArgumentException =>
if (hasV || hasP) {
@@ -551,7 +562,7 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
pattern.startsWith(";"),
"The positive part of the dfdl:textNumberPattern is required. The
dfdl:textNumberPattern cannot begin with ';'.",
)
- checkPatternWithICU(e)
+ val parsedPatternInfo = checkPatternWithICU(e)
val (roundingIncrement: MaybeDouble, roundingMode) =
e.textNumberRounding match {
@@ -586,6 +597,29 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
Nope
}
+ // ICU does not have a way to set the pad position to after an affix if
the positive pattern
+ // does not have that affix. For example, "* 0", will always have a pad
position of
+ // BEFORE_PREFIX, with no way to set it to AFTER_PREFIX because the
positive pattern has no
+ // prefix. In cases where formats do not have a postive affix but want to
specify the pad
+ // position to AFTER, we allow them to do so in the negative pattern. For
example, a pattern
+ // of "* 0;-* 0" will have a pad position of AFTER_PREFIX. ICU normally
ignores the negative
+ // pattern for pad position. Note that we require the pad char to be
defined on the same
+ // affix or else it is ignored.
+ val posPadLoc = parsedPatternInfo.positive.paddingLocation
+ val negPadLoc =
+ if (parsedPatternInfo.negative != null)
parsedPatternInfo.negative.paddingLocation
+ else null
+ val posPrefix =
parsedPatternInfo.getString(AffixPatternProvider.FLAG_POS_PREFIX)
+ val posSuffix =
parsedPatternInfo.getString(AffixPatternProvider.FLAG_POS_SUFFIX)
+ val icuPadPosition =
+ (posPadLoc, negPadLoc, posPrefix, posSuffix) match {
+ case (PadPosition.BEFORE_PREFIX, PadPosition.AFTER_PREFIX, "", _) =>
+ MaybeInt(DecimalFormat.PAD_AFTER_PREFIX)
+ case (PadPosition.BEFORE_SUFFIX, PadPosition.AFTER_SUFFIX, _, "") =>
+ MaybeInt(DecimalFormat.PAD_AFTER_SUFFIX)
+ case _ => MaybeInt.Nope
+ }
+
val ev = new TextNumberFormatEv(
e.tci,
decSep,
@@ -599,6 +633,7 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
roundingMode,
roundingIncrement,
zeroRepsRaw,
+ icuPadPosition,
e.primType,
)
ev.compile(tunable)
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala
index d47694f08..060613858 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala
@@ -27,6 +27,7 @@ import
org.apache.daffodil.lib.util.DecimalUtils.OverpunchLocation
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe._
import org.apache.daffodil.lib.util.MaybeDouble
+import org.apache.daffodil.lib.util.MaybeInt
import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
import org.apache.daffodil.runtime1.processors.TextNumberFormatEv
import
org.apache.daffodil.runtime1.processors.parsers.ConvertZonedCombinatorParser
@@ -179,6 +180,7 @@ case class ConvertZonedNumberPrim(e: ElementBase)
roundingMode,
roundingIncrement,
Nil,
+ MaybeInt.Nope,
e.primType,
)
ev.compile(tunable)
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala
index d4de6012f..05bba8dfa 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala
@@ -32,6 +32,7 @@ import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe._
import org.apache.daffodil.lib.util.MaybeChar
import org.apache.daffodil.lib.util.MaybeDouble
+import org.apache.daffodil.lib.util.MaybeInt
import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
import org.apache.daffodil.runtime1.dsom._
@@ -81,6 +82,7 @@ class TextNumberFormatEv(
roundingMode: Maybe[TextNumberRoundingMode],
roundingIncrement: MaybeDouble,
zeroRepsRaw: List[String],
+ icuPadPosition: MaybeInt,
primType: PrimType,
) extends Evaluatable[DecimalFormat](tci)
with InfosetCachedEvaluatable[DecimalFormat] {
@@ -186,6 +188,10 @@ class TextNumberFormatEv(
}
}
+ if (icuPadPosition.isDefined) {
+ df.setPadPosition(icuPadPosition.get)
+ }
+
df
}
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
index cac85281f..6b28b9e54 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
@@ -321,6 +321,20 @@
<xs:element name="tnp103" type="xs:integer"
dfdl:textNumberPattern="###0.0##"
dfdl:textNumberCheckPolicy="strict" />
+ <!--
+ pad position is after prefix because of ambiguity in the postive pattern
+ resolved by the negative pattern. Otherwise ICU defaults to before prefix
+ -->
+ <xs:element name="tnp104" type="xs:integer"
dfdl:textNumberPattern="*_####0;(*_0)"
+ dfdl:textNumberCheckPolicy="strict" />
+
+ <!--
+ pad position is after suffix because of ambiguity in the postive pattern
+ resolved by the negative pattern. Otherwise ICU defaults to before suffix
+ -->
+ <xs:element name="tnp105" type="xs:integer"
dfdl:textNumberPattern="####0*_;(0)*_"
+ dfdl:textNumberCheckPolicy="strict" />
+
</tdml:defineSchema>
<tdml:defineSchema name="textNumberPattern2" elementFormDefault="qualified">
@@ -4504,4 +4518,26 @@
</tdml:errors>
</tdml:parserTestCase>
+ <tdml:parserTestCase name="textNumberPaddingAmbiguity01" root="tnp104"
model="textNumberPattern">
+ <tdml:document>
+ <tdml:documentPart type="text">(__1)</tdml:documentPart>
+ </tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <tnp104>-1</tnp104>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ </tdml:parserTestCase>
+
+ <tdml:parserTestCase name="textNumberPaddingAmbiguity02" root="tnp105"
model="textNumberPattern">
+ <tdml:document>
+ <tdml:documentPart type="text">(1)__</tdml:documentPart>
+ </tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <tnp105>-1</tnp105>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ </tdml:parserTestCase>
+
</tdml:testSuite>
diff --git
a/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala
b/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala
index 787c6819e..4a4a6a5ec 100644
---
a/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala
+++
b/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala
@@ -523,4 +523,11 @@ class TestTextNumberProps {
@Test def test_textNumberIntegerWithDecimal04(): Unit = {
runner.runOneTest("textNumberIntegerWithDecimal04")
}
+
+ @Test def test_textNumberPaddingAmbiguity01(): Unit = {
+ runner.runOneTest("textNumberPaddingAmbiguity01")
+ }
+ @Test def test_textNumberPaddingAmbiguity02(): Unit = {
+ runner.runOneTest("textNumberPaddingAmbiguity02")
+ }
}