This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new cdb41212d Use the negative pattern of textNumberPattern to resolve 
padding ambiguities
cdb41212d is described below

commit cdb41212d755c45f23f981c395dd24ab56303b45
Author: Steve Lawrence <[email protected]>
AuthorDate: Thu Jan 11 12:58:10 2024 -0500

    Use the negative pattern of textNumberPattern to resolve padding ambiguities
    
    ICU only uses the positive pattern of the textNumberPattern property to
    determine pad character and position, completely ignoring the negative
    pattern. If the positive pattern has no affix associated with the pad
    character, then there is an ambiguity if the pad character should appear
    before or after the negative affix when unparsing negative numbers. In
    this case, ICU defaults to before the affix, with no way to change it
    via the pattern. This effectively means it is not possible for ICU
    number padding to appear after a negative affix if there is no positive
    affix.
    
    To resolve this ambiguity and allow configuring where pad characters
    appear, we inspect the negative pattern. If both negative and positive
    patterns define padding on the same affix, and the positive pattern has
    an empty string for that affix, then we use the pad position from the
    negative pattern. In all other cases, the pad character in the negative
    pattern is ignored following usual ICU behavior.
    
    For example, a textNumberPattern of "*0####0;-*00" formats a negative
    number with zero padding after the hyphen, whereas normal ICU behavior
    would ignore the negative pattern and zero pad before the hyphen.
    
    Deprecation/Compatibility:
    
    The pad character in the negative part of textNumberPattern is no longer
    ignored if the positive part of textNumberPattern defines a pad
    character without an associated affix (e.g. "*0###0;-*00"). In these
    cases, the position of the pad charcter in the negative part is used to
    define whether padding occurs before or after the negative affix. All
    other cases follow existing rules of textNumberPattern (i.e. the pad
    character in the negative part is ignored).
    
    DAFFODIL-2871
---
 .../grammar/primitives/PrimitivesTextNumber.scala  | 51 ++++++++++++++++++----
 .../core/grammar/primitives/PrimitivesZoned.scala  |  2 +
 .../runtime1/processors/EvTextNumber.scala         |  6 +++
 .../text_number_props/TextNumberProps.tdml         | 36 +++++++++++++++
 .../text_number_props/TestTextNumberProps.scala    |  7 +++
 5 files changed, 94 insertions(+), 8 deletions(-)

diff --git 
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala
 
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala
index 648d656b8..95da5f677 100644
--- 
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala
+++ 
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesTextNumber.scala
@@ -28,6 +28,7 @@ import 
org.apache.daffodil.lib.schema.annotation.props.gen.TextNumberRounding
 import org.apache.daffodil.lib.util.Maybe
 import org.apache.daffodil.lib.util.Maybe._
 import org.apache.daffodil.lib.util.MaybeDouble
+import org.apache.daffodil.lib.util.MaybeInt
 import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
 import org.apache.daffodil.runtime1.processors.Delimiter
 import org.apache.daffodil.runtime1.processors.TextNumberFormatEv
@@ -38,6 +39,10 @@ import 
org.apache.daffodil.runtime1.processors.unparsers.Unparser
 import org.apache.daffodil.unparsers.runtime1.ConvertTextCombinatorUnparser
 import org.apache.daffodil.unparsers.runtime1.ConvertTextNumberUnparser
 
+import com.ibm.icu.impl.number.AffixPatternProvider
+import com.ibm.icu.impl.number.Padder.PadPosition
+import com.ibm.icu.impl.number.PatternStringParser
+import com.ibm.icu.impl.number.PatternStringParser.ParsedPatternInfo
 import com.ibm.icu.text.DecimalFormat
 
 case class ConvertTextCombinator(e: ElementBase, value: Gram, converter: Gram)
@@ -399,14 +404,20 @@ trait ConvertTextNumberMixin {
     } else pattern
   }
 
-  final protected def checkPatternWithICU(e: ElementBase) = {
-    // Load the pattern to make sure it is valid
+  /**
+   * Validates the textNumberPattern using ICU's PatternStringParser. Although 
this class is
+   * public, it is not part of the ICU API, so it maybe not be stable. 
However, this is what
+   * DecimalFormat uses internally to parse patterns and extract information 
for initialization,
+   * and that likely won't change significantly. Plus, by using this class 
instead of parsing
+   * with DeciamlFormat we can return the ParsedPatternInfo to give callers 
raw access to what
+   * was in the pattern without having to parse it ourselves. This can be 
useful for additional
+   * validation or logic using parts of the pattern ICU might normally ignore.
+   */
+  final protected def checkPatternWithICU(e: ElementBase): ParsedPatternInfo = 
{
     try {
-      if (hasV || hasP) {
-        new DecimalFormat(runtimePattern)
-      } else {
-        new DecimalFormat(pattern)
-      }
+      val patternToCheck = if (hasV || hasP) runtimePattern else pattern
+      val parsedPatternInfo = 
PatternStringParser.parseToPatternInfo(patternToCheck)
+      parsedPatternInfo
     } catch {
       case ex: IllegalArgumentException =>
         if (hasV || hasP) {
@@ -551,7 +562,7 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
       pattern.startsWith(";"),
       "The positive part of the dfdl:textNumberPattern is required. The 
dfdl:textNumberPattern cannot begin with ';'.",
     )
-    checkPatternWithICU(e)
+    val parsedPatternInfo = checkPatternWithICU(e)
 
     val (roundingIncrement: MaybeDouble, roundingMode) =
       e.textNumberRounding match {
@@ -586,6 +597,29 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
         Nope
       }
 
+    // ICU does not have a way to set the pad position to after an affix if 
the positive pattern
+    // does not have that affix. For example, "* 0", will always have a pad 
position of
+    // BEFORE_PREFIX, with no way to set it to AFTER_PREFIX because the 
positive pattern has no
+    // prefix. In cases where formats do not have a postive affix but want to 
specify the pad
+    // position to AFTER, we allow them to do so in the negative pattern. For 
example, a pattern
+    // of "* 0;-* 0" will have a pad position of AFTER_PREFIX. ICU normally 
ignores the negative
+    // pattern for pad position. Note that we require the pad char to be 
defined on the same
+    // affix or else it is ignored.
+    val posPadLoc = parsedPatternInfo.positive.paddingLocation
+    val negPadLoc =
+      if (parsedPatternInfo.negative != null) 
parsedPatternInfo.negative.paddingLocation
+      else null
+    val posPrefix = 
parsedPatternInfo.getString(AffixPatternProvider.FLAG_POS_PREFIX)
+    val posSuffix = 
parsedPatternInfo.getString(AffixPatternProvider.FLAG_POS_SUFFIX)
+    val icuPadPosition =
+      (posPadLoc, negPadLoc, posPrefix, posSuffix) match {
+        case (PadPosition.BEFORE_PREFIX, PadPosition.AFTER_PREFIX, "", _) =>
+          MaybeInt(DecimalFormat.PAD_AFTER_PREFIX)
+        case (PadPosition.BEFORE_SUFFIX, PadPosition.AFTER_SUFFIX, _, "") =>
+          MaybeInt(DecimalFormat.PAD_AFTER_SUFFIX)
+        case _ => MaybeInt.Nope
+      }
+
     val ev = new TextNumberFormatEv(
       e.tci,
       decSep,
@@ -599,6 +633,7 @@ case class ConvertTextStandardNumberPrim(e: ElementBase)
       roundingMode,
       roundingIncrement,
       zeroRepsRaw,
+      icuPadPosition,
       e.primType,
     )
     ev.compile(tunable)
diff --git 
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala
 
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala
index d47694f08..060613858 100644
--- 
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala
+++ 
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/PrimitivesZoned.scala
@@ -27,6 +27,7 @@ import 
org.apache.daffodil.lib.util.DecimalUtils.OverpunchLocation
 import org.apache.daffodil.lib.util.Maybe
 import org.apache.daffodil.lib.util.Maybe._
 import org.apache.daffodil.lib.util.MaybeDouble
+import org.apache.daffodil.lib.util.MaybeInt
 import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
 import org.apache.daffodil.runtime1.processors.TextNumberFormatEv
 import 
org.apache.daffodil.runtime1.processors.parsers.ConvertZonedCombinatorParser
@@ -179,6 +180,7 @@ case class ConvertZonedNumberPrim(e: ElementBase)
       roundingMode,
       roundingIncrement,
       Nil,
+      MaybeInt.Nope,
       e.primType,
     )
     ev.compile(tunable)
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala
index d4de6012f..05bba8dfa 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/EvTextNumber.scala
@@ -32,6 +32,7 @@ import org.apache.daffodil.lib.util.Maybe
 import org.apache.daffodil.lib.util.Maybe._
 import org.apache.daffodil.lib.util.MaybeChar
 import org.apache.daffodil.lib.util.MaybeDouble
+import org.apache.daffodil.lib.util.MaybeInt
 import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
 import org.apache.daffodil.runtime1.dsom._
 
@@ -81,6 +82,7 @@ class TextNumberFormatEv(
   roundingMode: Maybe[TextNumberRoundingMode],
   roundingIncrement: MaybeDouble,
   zeroRepsRaw: List[String],
+  icuPadPosition: MaybeInt,
   primType: PrimType,
 ) extends Evaluatable[DecimalFormat](tci)
   with InfosetCachedEvaluatable[DecimalFormat] {
@@ -186,6 +188,10 @@ class TextNumberFormatEv(
       }
     }
 
+    if (icuPadPosition.isDefined) {
+      df.setPadPosition(icuPadPosition.get)
+    }
+
     df
   }
 
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
index cac85281f..6b28b9e54 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
@@ -321,6 +321,20 @@
     <xs:element name="tnp103" type="xs:integer" 
dfdl:textNumberPattern="###0.0##"
       dfdl:textNumberCheckPolicy="strict" />
 
+    <!--
+      pad position is after prefix because of ambiguity in the postive pattern
+      resolved by the negative pattern. Otherwise ICU defaults to before prefix
+    -->
+    <xs:element name="tnp104" type="xs:integer" 
dfdl:textNumberPattern="*_####0;(*_0)"
+      dfdl:textNumberCheckPolicy="strict" />
+
+    <!--
+      pad position is after suffix because of ambiguity in the postive pattern
+      resolved by the negative pattern. Otherwise ICU defaults to before suffix
+    -->
+    <xs:element name="tnp105" type="xs:integer" 
dfdl:textNumberPattern="####0*_;(0)*_"
+      dfdl:textNumberCheckPolicy="strict" />
+
   </tdml:defineSchema>
   
   <tdml:defineSchema name="textNumberPattern2"  elementFormDefault="qualified">
@@ -4504,4 +4518,26 @@
     </tdml:errors>
   </tdml:parserTestCase>
 
+  <tdml:parserTestCase name="textNumberPaddingAmbiguity01" root="tnp104" 
model="textNumberPattern">
+    <tdml:document>
+      <tdml:documentPart type="text">(__1)</tdml:documentPart>
+    </tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <tnp104>-1</tnp104>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
+  <tdml:parserTestCase name="textNumberPaddingAmbiguity02" root="tnp105" 
model="textNumberPattern">
+    <tdml:document>
+      <tdml:documentPart type="text">(1)__</tdml:documentPart>
+    </tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <tnp105>-1</tnp105>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
 </tdml:testSuite>
diff --git 
a/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala
 
b/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala
index 787c6819e..4a4a6a5ec 100644
--- 
a/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala
+++ 
b/daffodil-test/src/test/scala/org/apache/daffodil/section13/text_number_props/TestTextNumberProps.scala
@@ -523,4 +523,11 @@ class TestTextNumberProps {
   @Test def test_textNumberIntegerWithDecimal04(): Unit = {
     runner.runOneTest("textNumberIntegerWithDecimal04")
   }
+
+  @Test def test_textNumberPaddingAmbiguity01(): Unit = {
+    runner.runOneTest("textNumberPaddingAmbiguity01")
+  }
+  @Test def test_textNumberPaddingAmbiguity02(): Unit = {
+    runner.runOneTest("textNumberPaddingAmbiguity02")
+  }
 }

Reply via email to