mbeckerle commented on code in PR #900:
URL: https://github.com/apache/daffodil/pull/900#discussion_r1067579484


##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -46,8 +45,249 @@ case class ConvertTextCombinator(e: ElementBase, value: 
Gram, converter: Gram)
   override lazy val unparser = new 
ConvertTextCombinatorUnparser(e.termRuntimeData, value.unparser, 
converter.unparser)
 }
 
-case class ConvertTextNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
+// This is a separate object for unit testing purposes.
+private[primitives]
+object TextNumberPatternUtils {
+
+  /**
+   * A regex which matches textNumberPatterns that legally use the V
+   * (implied decimal point) character.
+   */
+   private[primitives] lazy val vregexStandard = {
+    //  DFDL v1.0 Spec says
+    //  It is a Schema Definition Error if any symbols other than "0", "1" 
through "9" or #
+    //  are used in the vpinteger region of the pattern.
+    //
+    // The prefix and suffix chars can surround the vpinteger region, and 
there can be
+    // a positive and a negative pattern.
+    //
+    // The prefix and suffix cannot be digits, # or P or V. No quoted chars in 
prefix either.
+    //
+     val prefixChars = """[^0-9#PV']?""" // same for suffix.
+     val beforeVChars = """#*[0-9]+"""
+     val afterVChars = """[0-9]+"""
+     //
+     // Each of the capture groups is defined here in order
+     //
+     val posPrefix = s"""($prefixChars)"""
+     val posBeforeV = s"""($beforeVChars)"""
+     val posAfterV = s"""($afterVChars)"""
+     val posSuffix = posPrefix
+     val negPrefix = posPrefix
+     val negBeforeV = posBeforeV
+     val negAfterV = posAfterV
+     val negSuffix = posPrefix
+     // don't forget the ^ and $ (start of data, end of data) because we want
+     // the match to consume all the characters, starting at the beginning.
+    val vPattern=
+     
s"""^${posPrefix}${posBeforeV}V${posAfterV}${posSuffix}(?:;${negPrefix}${negBeforeV}V${negAfterV}${negSuffix})?$$"""
+    val re = vPattern.r
+    re
+  }
+
+
+
+  private[primitives] lazy val vregexZoned= {
+    // Note: for zoned, can only have a positive pattern
+    // Prefix or suffix can only be '+' character, to
+    // indicate leading or trailing sign, and can only
+    // one of those.
+    //
+    // Also we're not allowing the # character, since I think that
+    // makes no sense for zoned with virtual decimal point.
+    //
+    val prefixChars = """\+?""" // only + for prefix/suffix
+    val aroundVChars = """[0-9]+"""
+    //
+    // capture groups are defined here in sequence
+    //
+    val prefix = s"""($prefixChars)"""
+    val beforeV = s"""($aroundVChars)"""
+    val afterV = beforeV
+    val suffix = prefix
+    val vPattern = s"""^${prefix}${beforeV}V${afterV}${suffix}$$""" // only 
positive pattern allowed
+    val re = vPattern.r
+    re
+  }
+
+  /**
+   * Checks a pattern for suitability with V (virtual decimal point) in the 
pattern
+   * in the context of zoned textNumberRep.
+   *
+   * This is broken out separately for unit testing purposes.

Review Comment:
   Addressed in https://github.com/apache/daffodil/pull/910



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -46,8 +45,249 @@ case class ConvertTextCombinator(e: ElementBase, value: 
Gram, converter: Gram)
   override lazy val unparser = new 
ConvertTextCombinatorUnparser(e.termRuntimeData, value.unparser, 
converter.unparser)
 }
 
-case class ConvertTextNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
+// This is a separate object for unit testing purposes.
+private[primitives]
+object TextNumberPatternUtils {
+
+  /**
+   * A regex which matches textNumberPatterns that legally use the V
+   * (implied decimal point) character.
+   */
+   private[primitives] lazy val vregexStandard = {
+    //  DFDL v1.0 Spec says
+    //  It is a Schema Definition Error if any symbols other than "0", "1" 
through "9" or #
+    //  are used in the vpinteger region of the pattern.
+    //
+    // The prefix and suffix chars can surround the vpinteger region, and 
there can be
+    // a positive and a negative pattern.
+    //
+    // The prefix and suffix cannot be digits, # or P or V. No quoted chars in 
prefix either.
+    //
+     val prefixChars = """[^0-9#PV']?""" // same for suffix.
+     val beforeVChars = """#*[0-9]+"""
+     val afterVChars = """[0-9]+"""
+     //
+     // Each of the capture groups is defined here in order
+     //
+     val posPrefix = s"""($prefixChars)"""
+     val posBeforeV = s"""($beforeVChars)"""
+     val posAfterV = s"""($afterVChars)"""
+     val posSuffix = posPrefix
+     val negPrefix = posPrefix
+     val negBeforeV = posBeforeV
+     val negAfterV = posAfterV
+     val negSuffix = posPrefix
+     // don't forget the ^ and $ (start of data, end of data) because we want
+     // the match to consume all the characters, starting at the beginning.
+    val vPattern=
+     
s"""^${posPrefix}${posBeforeV}V${posAfterV}${posSuffix}(?:;${negPrefix}${negBeforeV}V${negAfterV}${negSuffix})?$$"""
+    val re = vPattern.r
+    re
+  }
+
+
+
+  private[primitives] lazy val vregexZoned= {
+    // Note: for zoned, can only have a positive pattern
+    // Prefix or suffix can only be '+' character, to
+    // indicate leading or trailing sign, and can only
+    // one of those.
+    //
+    // Also we're not allowing the # character, since I think that
+    // makes no sense for zoned with virtual decimal point.
+    //
+    val prefixChars = """\+?""" // only + for prefix/suffix
+    val aroundVChars = """[0-9]+"""
+    //
+    // capture groups are defined here in sequence
+    //
+    val prefix = s"""($prefixChars)"""
+    val beforeV = s"""($aroundVChars)"""
+    val afterV = beforeV
+    val suffix = prefix
+    val vPattern = s"""^${prefix}${beforeV}V${afterV}${suffix}$$""" // only 
positive pattern allowed
+    val re = vPattern.r
+    re
+  }
+
+  /**
+   * Checks a pattern for suitability with V (virtual decimal point) in the 
pattern
+   * in the context of zoned textNumberRep.
+   *
+   * This is broken out separately for unit testing purposes.
+   *
+   * @param patternStripped the dfdl:textNumberPattern pattern - with all 
quoting removed.
+   * @return None if the pattern is illegal syntax for use with V (virtual 
decimal point)
+   *         otherwise Some(N) where N is the number of digits to the right of 
the V character.
+   */
+  private[primitives] def textDecimalVirtualPointForZoned(patternStripped: 
String): Option[Int] = {
+    val r = TextNumberPatternUtils.vregexZoned
+    r.findFirstMatchIn(patternStripped) match {
+      // note: cannot have both a prefix and suffix. Only one of them.
+      case Some(r(pre, _, afterV, suf)) if (pre.length + suf.length <= 1) => 
Some(afterV.length)
+      case _ => None
+    }
+  }
+
+  /**
+   * The apos character (') is the quoting character in ICU patterns (and DFDL 
textNumberPattern).
+   * This removes any unquoted P or V characters (which are not implemented by 
ICU)
+   * leaving a pattern string that is suitable for use with ICU.
+   * @param pattern the textNumberPattern string
+   * @return the pattern string with all unquoted P and unquoted V removed.
+   */
+  def removeUnquotedPV(pattern: String) : String = {
+    val uniqueQQString = "alsdflslkjskjslkkjlkkjfppooiipsldsflj"
+    // A single regex that matches an unquoted character
+    // where the quoting char is self quoting requires
+    // a zero-width look behind that matches a potentially
+    // unbounded number of quote chars. That's not allowed.
+    //
+    // Consider ''''''P. We want a regex that matches only the P here
+    // because it is preceded by an even number of quotes.
+    //
+    // I did some tests, and the formulations you think might work
+    // such as from stack-overflow, don't work.
+    // (See test howNotToUseRegexLookBehindWithReplaceAll)
+    //
+    // So we use this brute force technique of replacing all ''
+    // first, so we have only single quotes to deal with.
+    pattern.replaceAll("''", uniqueQQString).
+      replaceAll("(?<!')P", "").
+      replaceAll("(?<!')V", "").
+      replaceAll(uniqueQQString, "''")
+  }
+}
+
+trait ConvertTextNumberMixin {
+
+  def e: ElementBase
+
+  /**
+   * Convenience. The original value of textNumberPattern
+   */
+  @inline
+  final protected def pattern = e.textNumberPattern
+
+  /**
+   * Checks a pattern for suitability with V (virtual decimal point) in the 
pattern
+   * in the context of the corresponding textNumberRep. Computes number of 
digits to right of the V.
+   *
+   * SDE if pattern has a syntax error.
+   *
+   * @return the number of digits to the right of the V character. Must be 1 
or greater.
+   *
+   */
+  protected def textDecimalVirtualPointFromPattern: Int
+
+  /**
+   * The pattern with escaped characters removed.
+   * This can be reliably searched for pattern characters,
+   * but cannot be used as an operational pattern given that
+   * potentially lots of things have been removed from it.
+   */
+  final protected lazy val patternStripped = {
+    // note: tick == apos == ' == single quote.
+    // First remove entirely all escaped ticks ie., ''
+    val noEscapedTicksRegex = """''""".r
+    val patternNoEscapedTicks = noEscapedTicksRegex.replaceAllIn(pattern, "")
+    // Next remove all tick-escaped characters entirely
+    val noQuotedRegex = """'[^']+'""".r
+    val patternNoQuoted = noQuotedRegex.replaceAllIn(patternNoEscapedTicks, "")
+    // the remaining string contains only pattern special characters
+    // and regular non-pattern characters
+    patternNoQuoted
+  }
+
+  protected final lazy val hasV = patternStripped.contains("V")
+  protected final lazy val hasP = patternStripped.contains("P")
+
+  /**
+   * analogous to the property dfdl:binaryDecimalVirtualPoint
+   *
+   * Value is 0 if there is no virtual decimal point.
+   * Value is the number of digits to the right of the 'V'
+   */
+  final lazy val textDecimalVirtualPoint: Int = {
+    if (!hasV && !hasP) 0 // no virtual point
+    else {
+      if (hasP) {
+        e.notYetImplemented("textNumberPattern with P symbol")
+      }
+      Assert.invariant(hasV)
+      //
+      // check for things incompatible with "V"
+      //
+      val virtualPoint = textDecimalVirtualPointFromPattern
+      Assert.invariant(virtualPoint >= 1) // if this fails the regex is broken.
+      virtualPoint
+    }
+  }
+
+  //
+  // We need 2 patterns
+  // 1. The original textNumberPattern string from the schema
+  // 2. Same but with the P or V removed.
+  //
+  // We need to use the original textNumberPattern in diagnostic messages
+  // since that's what the schema author wrote and expects to see.
+  // But if the pattern has P or V, then we need ICU at runtime to use the 
original
+  // but with the P or V removed since the P and V don't actually represent 
anything in the data.
+  final protected lazy val runtimePattern = {
+    if (hasV || hasP) {
+      val patternWithoutPV = TextNumberPatternUtils.removeUnquotedPV(pattern)
+      patternWithoutPV
+    }
+    else pattern
+  }
+
+  final protected def checkPatternWithICU(e: ElementBase) = {
+    // Load the pattern to make sure it is valid
+    try {
+      if (hasV || hasP) {
+        new DecimalFormat(runtimePattern)
+      } else {
+        new DecimalFormat(pattern)
+      }
+    } catch {
+      case ex: IllegalArgumentException =>
+        if (hasV || hasP) {
+          // we don't know what the diagnostic message will say here
+          // since it is from the ICU library.
+          // That library has only seen the pattern with the P and V
+          // removed. The messages might show that pattern which would
+          // confuse the schema author since that's not the pattern
+          // they wrote.
+          //
+          // So we try to explain....
+          e.SDE(
+            """Invalid textNumberPattern.
+              | The errors are about the pattern with the P (decimal scaling 
position)
+              | and V (virtual decimal point) characters removed: 
%s""".stripMargin, ex)
+        } else {
+          e.SDE("Invalid textNumberPattern: %s", ex)
+        }
+    }
+  }
+}
+
+case class ConvertTextStandardNumberPrim(e: ElementBase)
+  extends Terminal(e, true)
+  with ConvertTextNumberMixin {
+
+  final override protected lazy val textDecimalVirtualPointFromPattern: Int = {
+    val r = TextNumberPatternUtils.vregexStandard
+    r.findFirstMatchIn(patternStripped) match {
+      case Some(r(_, _, afterV, _, _, _, _, _)) => afterV.length
+      case None =>
+        e.SDE(
+          s"""The dfdl:textNumberPattern '%s' contains 'V' (virtual decimal 
point).

Review Comment:
   This is only called in a context where hasV is true. 
   I will add an Assert.invariant(hasV) to this to make that clear. 
   Failure to match this regex at all means that the pattern string DOES have a 
V, but doesn't otherwise obey the rules for patterns with V. 



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -46,8 +45,249 @@ case class ConvertTextCombinator(e: ElementBase, value: 
Gram, converter: Gram)
   override lazy val unparser = new 
ConvertTextCombinatorUnparser(e.termRuntimeData, value.unparser, 
converter.unparser)
 }
 
-case class ConvertTextNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
+// This is a separate object for unit testing purposes.
+private[primitives]
+object TextNumberPatternUtils {
+
+  /**
+   * A regex which matches textNumberPatterns that legally use the V
+   * (implied decimal point) character.
+   */
+   private[primitives] lazy val vregexStandard = {
+    //  DFDL v1.0 Spec says
+    //  It is a Schema Definition Error if any symbols other than "0", "1" 
through "9" or #
+    //  are used in the vpinteger region of the pattern.
+    //
+    // The prefix and suffix chars can surround the vpinteger region, and 
there can be
+    // a positive and a negative pattern.
+    //
+    // The prefix and suffix cannot be digits, # or P or V. No quoted chars in 
prefix either.
+    //
+     val prefixChars = """[^0-9#PV']?""" // same for suffix.
+     val beforeVChars = """#*[0-9]+"""
+     val afterVChars = """[0-9]+"""
+     //
+     // Each of the capture groups is defined here in order
+     //
+     val posPrefix = s"""($prefixChars)"""
+     val posBeforeV = s"""($beforeVChars)"""
+     val posAfterV = s"""($afterVChars)"""
+     val posSuffix = posPrefix
+     val negPrefix = posPrefix
+     val negBeforeV = posBeforeV
+     val negAfterV = posAfterV
+     val negSuffix = posPrefix
+     // don't forget the ^ and $ (start of data, end of data) because we want
+     // the match to consume all the characters, starting at the beginning.
+    val vPattern=
+     
s"""^${posPrefix}${posBeforeV}V${posAfterV}${posSuffix}(?:;${negPrefix}${negBeforeV}V${negAfterV}${negSuffix})?$$"""
+    val re = vPattern.r
+    re
+  }
+
+
+
+  private[primitives] lazy val vregexZoned= {
+    // Note: for zoned, can only have a positive pattern
+    // Prefix or suffix can only be '+' character, to
+    // indicate leading or trailing sign, and can only
+    // one of those.
+    //
+    // Also we're not allowing the # character, since I think that
+    // makes no sense for zoned with virtual decimal point.
+    //
+    val prefixChars = """\+?""" // only + for prefix/suffix
+    val aroundVChars = """[0-9]+"""
+    //
+    // capture groups are defined here in sequence
+    //
+    val prefix = s"""($prefixChars)"""
+    val beforeV = s"""($aroundVChars)"""
+    val afterV = beforeV
+    val suffix = prefix
+    val vPattern = s"""^${prefix}${beforeV}V${afterV}${suffix}$$""" // only 
positive pattern allowed
+    val re = vPattern.r
+    re
+  }
+
+  /**
+   * Checks a pattern for suitability with V (virtual decimal point) in the 
pattern
+   * in the context of zoned textNumberRep.
+   *
+   * This is broken out separately for unit testing purposes.
+   *
+   * @param patternStripped the dfdl:textNumberPattern pattern - with all 
quoting removed.
+   * @return None if the pattern is illegal syntax for use with V (virtual 
decimal point)
+   *         otherwise Some(N) where N is the number of digits to the right of 
the V character.
+   */
+  private[primitives] def textDecimalVirtualPointForZoned(patternStripped: 
String): Option[Int] = {
+    val r = TextNumberPatternUtils.vregexZoned
+    r.findFirstMatchIn(patternStripped) match {
+      // note: cannot have both a prefix and suffix. Only one of them.
+      case Some(r(pre, _, afterV, suf)) if (pre.length + suf.length <= 1) => 
Some(afterV.length)
+      case _ => None
+    }
+  }
+
+  /**
+   * The apos character (') is the quoting character in ICU patterns (and DFDL 
textNumberPattern).
+   * This removes any unquoted P or V characters (which are not implemented by 
ICU)
+   * leaving a pattern string that is suitable for use with ICU.
+   * @param pattern the textNumberPattern string
+   * @return the pattern string with all unquoted P and unquoted V removed.
+   */
+  def removeUnquotedPV(pattern: String) : String = {
+    val uniqueQQString = "alsdflslkjskjslkkjlkkjfppooiipsldsflj"
+    // A single regex that matches an unquoted character
+    // where the quoting char is self quoting requires
+    // a zero-width look behind that matches a potentially
+    // unbounded number of quote chars. That's not allowed.
+    //
+    // Consider ''''''P. We want a regex that matches only the P here
+    // because it is preceded by an even number of quotes.
+    //
+    // I did some tests, and the formulations you think might work
+    // such as from stack-overflow, don't work.
+    // (See test howNotToUseRegexLookBehindWithReplaceAll)
+    //
+    // So we use this brute force technique of replacing all ''
+    // first, so we have only single quotes to deal with.
+    pattern.replaceAll("''", uniqueQQString).
+      replaceAll("(?<!')P", "").
+      replaceAll("(?<!')V", "").

Review Comment:
   I will do something like this. 



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -46,8 +45,249 @@ case class ConvertTextCombinator(e: ElementBase, value: 
Gram, converter: Gram)
   override lazy val unparser = new 
ConvertTextCombinatorUnparser(e.termRuntimeData, value.unparser, 
converter.unparser)
 }
 
-case class ConvertTextNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
+// This is a separate object for unit testing purposes.

Review Comment:
   Addressed in https://github.com/apache/daffodil/pull/910



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesZoned.scala:
##########
@@ -47,71 +45,67 @@ case class ConvertZonedCombinator(e: ElementBase, value: 
Gram, converter: Gram)
 }
 
 case class ConvertZonedNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
-
-  val textNumberFormatEv: TextNumberFormatEv = {
-    val pattern = {
-      val p = e.textNumberPattern
+  extends Terminal(e, true)
+  with ConvertTextNumberMixin {
+
+  final override protected lazy val textDecimalVirtualPointFromPattern: Int = {
+    
TextNumberPatternUtils.textDecimalVirtualPointForZoned(patternStripped).getOrElse
 {

Review Comment:
   Yes. Will rename. 



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -46,8 +45,249 @@ case class ConvertTextCombinator(e: ElementBase, value: 
Gram, converter: Gram)
   override lazy val unparser = new 
ConvertTextCombinatorUnparser(e.termRuntimeData, value.unparser, 
converter.unparser)
 }
 
-case class ConvertTextNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
+// This is a separate object for unit testing purposes.
+private[primitives]
+object TextNumberPatternUtils {
+
+  /**
+   * A regex which matches textNumberPatterns that legally use the V
+   * (implied decimal point) character.
+   */
+   private[primitives] lazy val vregexStandard = {
+    //  DFDL v1.0 Spec says
+    //  It is a Schema Definition Error if any symbols other than "0", "1" 
through "9" or #
+    //  are used in the vpinteger region of the pattern.
+    //
+    // The prefix and suffix chars can surround the vpinteger region, and 
there can be
+    // a positive and a negative pattern.
+    //
+    // The prefix and suffix cannot be digits, # or P or V. No quoted chars in 
prefix either.
+    //
+     val prefixChars = """[^0-9#PV']?""" // same for suffix.
+     val beforeVChars = """#*[0-9]+"""
+     val afterVChars = """[0-9]+"""
+     //
+     // Each of the capture groups is defined here in order
+     //
+     val posPrefix = s"""($prefixChars)"""
+     val posBeforeV = s"""($beforeVChars)"""
+     val posAfterV = s"""($afterVChars)"""
+     val posSuffix = posPrefix
+     val negPrefix = posPrefix
+     val negBeforeV = posBeforeV
+     val negAfterV = posAfterV
+     val negSuffix = posPrefix
+     // don't forget the ^ and $ (start of data, end of data) because we want
+     // the match to consume all the characters, starting at the beginning.
+    val vPattern=
+     
s"""^${posPrefix}${posBeforeV}V${posAfterV}${posSuffix}(?:;${negPrefix}${negBeforeV}V${negAfterV}${negSuffix})?$$"""
+    val re = vPattern.r
+    re
+  }
+
+
+
+  private[primitives] lazy val vregexZoned= {
+    // Note: for zoned, can only have a positive pattern
+    // Prefix or suffix can only be '+' character, to
+    // indicate leading or trailing sign, and can only
+    // one of those.
+    //
+    // Also we're not allowing the # character, since I think that
+    // makes no sense for zoned with virtual decimal point.
+    //
+    val prefixChars = """\+?""" // only + for prefix/suffix
+    val aroundVChars = """[0-9]+"""
+    //
+    // capture groups are defined here in sequence
+    //
+    val prefix = s"""($prefixChars)"""
+    val beforeV = s"""($aroundVChars)"""
+    val afterV = beforeV
+    val suffix = prefix
+    val vPattern = s"""^${prefix}${beforeV}V${afterV}${suffix}$$""" // only 
positive pattern allowed
+    val re = vPattern.r
+    re
+  }
+
+  /**
+   * Checks a pattern for suitability with V (virtual decimal point) in the 
pattern
+   * in the context of zoned textNumberRep.
+   *
+   * This is broken out separately for unit testing purposes.
+   *
+   * @param patternStripped the dfdl:textNumberPattern pattern - with all 
quoting removed.
+   * @return None if the pattern is illegal syntax for use with V (virtual 
decimal point)
+   *         otherwise Some(N) where N is the number of digits to the right of 
the V character.
+   */
+  private[primitives] def textDecimalVirtualPointForZoned(patternStripped: 
String): Option[Int] = {
+    val r = TextNumberPatternUtils.vregexZoned
+    r.findFirstMatchIn(patternStripped) match {
+      // note: cannot have both a prefix and suffix. Only one of them.
+      case Some(r(pre, _, afterV, suf)) if (pre.length + suf.length <= 1) => 
Some(afterV.length)
+      case _ => None
+    }
+  }
+
+  /**
+   * The apos character (') is the quoting character in ICU patterns (and DFDL 
textNumberPattern).
+   * This removes any unquoted P or V characters (which are not implemented by 
ICU)
+   * leaving a pattern string that is suitable for use with ICU.
+   * @param pattern the textNumberPattern string
+   * @return the pattern string with all unquoted P and unquoted V removed.
+   */
+  def removeUnquotedPV(pattern: String) : String = {
+    val uniqueQQString = "alsdflslkjskjslkkjlkkjfppooiipsldsflj"
+    // A single regex that matches an unquoted character
+    // where the quoting char is self quoting requires
+    // a zero-width look behind that matches a potentially
+    // unbounded number of quote chars. That's not allowed.
+    //
+    // Consider ''''''P. We want a regex that matches only the P here
+    // because it is preceded by an even number of quotes.
+    //
+    // I did some tests, and the formulations you think might work
+    // such as from stack-overflow, don't work.
+    // (See test howNotToUseRegexLookBehindWithReplaceAll)
+    //
+    // So we use this brute force technique of replacing all ''
+    // first, so we have only single quotes to deal with.
+    pattern.replaceAll("''", uniqueQQString).
+      replaceAll("(?<!')P", "").
+      replaceAll("(?<!')V", "").

Review Comment:
   This is addressed in https://github.com/apache/daffodil/pull/910 



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesZoned.scala:
##########
@@ -47,71 +45,67 @@ case class ConvertZonedCombinator(e: ElementBase, value: 
Gram, converter: Gram)
 }
 
 case class ConvertZonedNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
-
-  val textNumberFormatEv: TextNumberFormatEv = {
-    val pattern = {
-      val p = e.textNumberPattern
+  extends Terminal(e, true)
+  with ConvertTextNumberMixin {
+
+  final override protected lazy val textDecimalVirtualPointFromPattern: Int = {
+    
TextNumberPatternUtils.textDecimalVirtualPointForZoned(patternStripped).getOrElse
 {
+      e.SDE(
+        s"""The dfdl:textNumberPattern '%s' contains 'V' (virtual decimal 
point).

Review Comment:
   Only called when hasV is true. Will add assertion to make that clear. 



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/ConvertTextStandardNumberParser.scala:
##########
@@ -54,11 +56,64 @@ case class ConvertTextCombinatorParser(
   }
 }
 
-case class ConvertTextNumberParser(
+trait TextDecimalVirtualPointMixin {
+  def textDecimalVirtualPoint: Int
+
+  final protected val virtualPointScaleFactor = scala.math.pow(10.0, 
textDecimalVirtualPoint)
+
+  final protected def applyTextDecimalVirtualPointForParse(num1: JNumber): 
JNumber = {
+    if (textDecimalVirtualPoint == 0) num1
+    else {
+      // scale for virtual decimal point
+      val scaledNum: JNumber = num1 match {
+        // Empirically, in our test suite, we do get Long back here, so the 
runtime sometimes represents small integer
+        // (or possibly even smaller decimal numbers with no fraction part) as 
Long.
+        case l: JLong => 
JBigDecimal.valueOf(l).scaleByPowerOfTen(-textDecimalVirtualPoint)
+        case bd: JBigDecimal => bd.scaleByPowerOfTen(-textDecimalVirtualPoint)
+        case f: JFloat => (f / virtualPointScaleFactor).toFloat
+        case d: JDouble => (d / virtualPointScaleFactor).toDouble
+        // $COVERAGE-OFF$
+        case _ => badType(num1)
+        // $COVERAGE-ON$
+      }
+      scaledNum
+    }
+  }
+
+  // $COVERAGE-OFF$
+  private def badType(num1: AnyRef) = {
+    Assert.invariantFailed(
+      s"""Number cannot be scaled for virtual decimal point,
+         |because it is not a decimal, float, or double.
+         |The type is ${num1.getClass.getSimpleName}.""".stripMargin)
+  }
+  // $COVERAGE-ON$
+
+  final protected def applyTextDecimalVirtualPointForUnparse(valueAsAnyRef: 
AnyRef) : JNumber = {
+    valueAsAnyRef match {
+      // This is not perfectly symmetrical with the parse side equivalent.
+      // Empirically in our test suite, we do not see JLong here.
+      case f: JFloat => (f * virtualPointScaleFactor).toFloat
+      case d: JDouble => (d * virtualPointScaleFactor).toDouble
+      case bd: JBigDecimal => bd.scaleByPowerOfTen(textDecimalVirtualPoint)
+
+      case n: JNumber =>
+        if (textDecimalVirtualPoint == 0) n
+        // $COVERAGE-OFF$ // both badType and the next case are coverage-off
+        else badType(n)
+      case _ => Assert.invariantFailed("Not a JNumber")
+      // $COVERAGE-ON$
+    }
+  }

Review Comment:
   Returns the type of the argument. 
   NaN and INF floating point values mean this can't just clamp to an Integer. 



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesZoned.scala:
##########
@@ -47,71 +45,67 @@ case class ConvertZonedCombinator(e: ElementBase, value: 
Gram, converter: Gram)
 }
 
 case class ConvertZonedNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
-
-  val textNumberFormatEv: TextNumberFormatEv = {
-    val pattern = {
-      val p = e.textNumberPattern
+  extends Terminal(e, true)
+  with ConvertTextNumberMixin {
+
+  final override protected lazy val textDecimalVirtualPointFromPattern: Int = {
+    
TextNumberPatternUtils.textDecimalVirtualPointForZoned(patternStripped).getOrElse
 {
+      e.SDE(
+        s"""The dfdl:textNumberPattern '%s' contains 'V' (virtual decimal 
point).

Review Comment:
   Resolved in https://github.com/apache/daffodil/pull/910



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/ConvertTextStandardNumberParser.scala:
##########
@@ -54,11 +56,64 @@ case class ConvertTextCombinatorParser(
   }
 }
 
-case class ConvertTextNumberParser(
+trait TextDecimalVirtualPointMixin {
+  def textDecimalVirtualPoint: Int
+
+  final protected val virtualPointScaleFactor = scala.math.pow(10.0, 
textDecimalVirtualPoint)

Review Comment:
   lazy it is. 



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/ConvertTextStandardNumberParser.scala:
##########
@@ -54,11 +56,64 @@ case class ConvertTextCombinatorParser(
   }
 }
 
-case class ConvertTextNumberParser(
+trait TextDecimalVirtualPointMixin {
+  def textDecimalVirtualPoint: Int
+
+  final protected val virtualPointScaleFactor = scala.math.pow(10.0, 
textDecimalVirtualPoint)
+
+  final protected def applyTextDecimalVirtualPointForParse(num1: JNumber): 
JNumber = {
+    if (textDecimalVirtualPoint == 0) num1
+    else {
+      // scale for virtual decimal point
+      val scaledNum: JNumber = num1 match {
+        // Empirically, in our test suite, we do get Long back here, so the 
runtime sometimes represents small integer
+        // (or possibly even smaller decimal numbers with no fraction part) as 
Long.
+        case l: JLong => 
JBigDecimal.valueOf(l).scaleByPowerOfTen(-textDecimalVirtualPoint)
+        case bd: JBigDecimal => bd.scaleByPowerOfTen(-textDecimalVirtualPoint)
+        case f: JFloat => (f / virtualPointScaleFactor).toFloat
+        case d: JDouble => (d / virtualPointScaleFactor).toDouble
+        // $COVERAGE-OFF$
+        case _ => badType(num1)
+        // $COVERAGE-ON$
+      }
+      scaledNum
+    }
+  }
+
+  // $COVERAGE-OFF$
+  private def badType(num1: AnyRef) = {
+    Assert.invariantFailed(
+      s"""Number cannot be scaled for virtual decimal point,
+         |because it is not a decimal, float, or double.
+         |The type is ${num1.getClass.getSimpleName}.""".stripMargin)
+  }
+  // $COVERAGE-ON$
+
+  final protected def applyTextDecimalVirtualPointForUnparse(valueAsAnyRef: 
AnyRef) : JNumber = {
+    valueAsAnyRef match {
+      // This is not perfectly symmetrical with the parse side equivalent.
+      // Empirically in our test suite, we do not see JLong here.
+      case f: JFloat => (f * virtualPointScaleFactor).toFloat
+      case d: JDouble => (d * virtualPointScaleFactor).toDouble
+      case bd: JBigDecimal => bd.scaleByPowerOfTen(textDecimalVirtualPoint)
+
+      case n: JNumber =>
+        if (textDecimalVirtualPoint == 0) n
+        // $COVERAGE-OFF$ // both badType and the next case are coverage-off
+        else badType(n)
+      case _ => Assert.invariantFailed("Not a JNumber")
+      // $COVERAGE-ON$
+    }
+  }

Review Comment:
   Changed and comments added about this in 
https://github.com/apache/daffodil/pull/910.



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -46,8 +45,249 @@ case class ConvertTextCombinator(e: ElementBase, value: 
Gram, converter: Gram)
   override lazy val unparser = new 
ConvertTextCombinatorUnparser(e.termRuntimeData, value.unparser, 
converter.unparser)
 }
 
-case class ConvertTextNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
+// This is a separate object for unit testing purposes.
+private[primitives]
+object TextNumberPatternUtils {
+
+  /**
+   * A regex which matches textNumberPatterns that legally use the V
+   * (implied decimal point) character.
+   */
+   private[primitives] lazy val vregexStandard = {
+    //  DFDL v1.0 Spec says
+    //  It is a Schema Definition Error if any symbols other than "0", "1" 
through "9" or #
+    //  are used in the vpinteger region of the pattern.
+    //
+    // The prefix and suffix chars can surround the vpinteger region, and 
there can be
+    // a positive and a negative pattern.
+    //
+    // The prefix and suffix cannot be digits, # or P or V. No quoted chars in 
prefix either.
+    //
+     val prefixChars = """[^0-9#PV']?""" // same for suffix.
+     val beforeVChars = """#*[0-9]+"""
+     val afterVChars = """[0-9]+"""
+     //
+     // Each of the capture groups is defined here in order
+     //
+     val posPrefix = s"""($prefixChars)"""
+     val posBeforeV = s"""($beforeVChars)"""
+     val posAfterV = s"""($afterVChars)"""
+     val posSuffix = posPrefix
+     val negPrefix = posPrefix
+     val negBeforeV = posBeforeV
+     val negAfterV = posAfterV
+     val negSuffix = posPrefix
+     // don't forget the ^ and $ (start of data, end of data) because we want
+     // the match to consume all the characters, starting at the beginning.
+    val vPattern=
+     
s"""^${posPrefix}${posBeforeV}V${posAfterV}${posSuffix}(?:;${negPrefix}${negBeforeV}V${negAfterV}${negSuffix})?$$"""
+    val re = vPattern.r
+    re
+  }
+
+
+
+  private[primitives] lazy val vregexZoned= {
+    // Note: for zoned, can only have a positive pattern
+    // Prefix or suffix can only be '+' character, to
+    // indicate leading or trailing sign, and can only
+    // one of those.
+    //
+    // Also we're not allowing the # character, since I think that
+    // makes no sense for zoned with virtual decimal point.
+    //
+    val prefixChars = """\+?""" // only + for prefix/suffix
+    val aroundVChars = """[0-9]+"""
+    //
+    // capture groups are defined here in sequence
+    //
+    val prefix = s"""($prefixChars)"""
+    val beforeV = s"""($aroundVChars)"""
+    val afterV = beforeV
+    val suffix = prefix
+    val vPattern = s"""^${prefix}${beforeV}V${afterV}${suffix}$$""" // only 
positive pattern allowed
+    val re = vPattern.r
+    re
+  }
+
+  /**
+   * Checks a pattern for suitability with V (virtual decimal point) in the 
pattern
+   * in the context of zoned textNumberRep.
+   *
+   * This is broken out separately for unit testing purposes.
+   *
+   * @param patternStripped the dfdl:textNumberPattern pattern - with all 
quoting removed.
+   * @return None if the pattern is illegal syntax for use with V (virtual 
decimal point)
+   *         otherwise Some(N) where N is the number of digits to the right of 
the V character.
+   */
+  private[primitives] def textDecimalVirtualPointForZoned(patternStripped: 
String): Option[Int] = {
+    val r = TextNumberPatternUtils.vregexZoned
+    r.findFirstMatchIn(patternStripped) match {
+      // note: cannot have both a prefix and suffix. Only one of them.
+      case Some(r(pre, _, afterV, suf)) if (pre.length + suf.length <= 1) => 
Some(afterV.length)
+      case _ => None
+    }
+  }
+
+  /**
+   * The apos character (') is the quoting character in ICU patterns (and DFDL 
textNumberPattern).
+   * This removes any unquoted P or V characters (which are not implemented by 
ICU)
+   * leaving a pattern string that is suitable for use with ICU.
+   * @param pattern the textNumberPattern string
+   * @return the pattern string with all unquoted P and unquoted V removed.
+   */
+  def removeUnquotedPV(pattern: String) : String = {
+    val uniqueQQString = "alsdflslkjskjslkkjlkkjfppooiipsldsflj"

Review Comment:
   Addressed in https://github.com/apache/daffodil/pull/910



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesZoned.scala:
##########
@@ -47,71 +45,67 @@ case class ConvertZonedCombinator(e: ElementBase, value: 
Gram, converter: Gram)
 }
 
 case class ConvertZonedNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
-
-  val textNumberFormatEv: TextNumberFormatEv = {
-    val pattern = {
-      val p = e.textNumberPattern
+  extends Terminal(e, true)
+  with ConvertTextNumberMixin {
+
+  final override protected lazy val textDecimalVirtualPointFromPattern: Int = {
+    
TextNumberPatternUtils.textDecimalVirtualPointForZoned(patternStripped).getOrElse
 {

Review Comment:
   Renamed in https://github.com/apache/daffodil/pull/910



##########
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/PrimitivesTextNumber.scala:
##########
@@ -46,8 +45,249 @@ case class ConvertTextCombinator(e: ElementBase, value: 
Gram, converter: Gram)
   override lazy val unparser = new 
ConvertTextCombinatorUnparser(e.termRuntimeData, value.unparser, 
converter.unparser)
 }
 
-case class ConvertTextNumberPrim(e: ElementBase)
-  extends Terminal(e, true) {
+// This is a separate object for unit testing purposes.
+private[primitives]
+object TextNumberPatternUtils {
+
+  /**
+   * A regex which matches textNumberPatterns that legally use the V
+   * (implied decimal point) character.
+   */
+   private[primitives] lazy val vregexStandard = {
+    //  DFDL v1.0 Spec says
+    //  It is a Schema Definition Error if any symbols other than "0", "1" 
through "9" or #
+    //  are used in the vpinteger region of the pattern.
+    //
+    // The prefix and suffix chars can surround the vpinteger region, and 
there can be
+    // a positive and a negative pattern.
+    //
+    // The prefix and suffix cannot be digits, # or P or V. No quoted chars in 
prefix either.
+    //
+     val prefixChars = """[^0-9#PV']?""" // same for suffix.
+     val beforeVChars = """#*[0-9]+"""
+     val afterVChars = """[0-9]+"""
+     //
+     // Each of the capture groups is defined here in order
+     //
+     val posPrefix = s"""($prefixChars)"""
+     val posBeforeV = s"""($beforeVChars)"""
+     val posAfterV = s"""($afterVChars)"""
+     val posSuffix = posPrefix
+     val negPrefix = posPrefix
+     val negBeforeV = posBeforeV
+     val negAfterV = posAfterV
+     val negSuffix = posPrefix
+     // don't forget the ^ and $ (start of data, end of data) because we want
+     // the match to consume all the characters, starting at the beginning.
+    val vPattern=
+     
s"""^${posPrefix}${posBeforeV}V${posAfterV}${posSuffix}(?:;${negPrefix}${negBeforeV}V${negAfterV}${negSuffix})?$$"""
+    val re = vPattern.r
+    re
+  }
+
+
+
+  private[primitives] lazy val vregexZoned= {
+    // Note: for zoned, can only have a positive pattern
+    // Prefix or suffix can only be '+' character, to
+    // indicate leading or trailing sign, and can only
+    // one of those.
+    //
+    // Also we're not allowing the # character, since I think that
+    // makes no sense for zoned with virtual decimal point.
+    //
+    val prefixChars = """\+?""" // only + for prefix/suffix
+    val aroundVChars = """[0-9]+"""
+    //
+    // capture groups are defined here in sequence
+    //
+    val prefix = s"""($prefixChars)"""
+    val beforeV = s"""($aroundVChars)"""
+    val afterV = beforeV
+    val suffix = prefix
+    val vPattern = s"""^${prefix}${beforeV}V${afterV}${suffix}$$""" // only 
positive pattern allowed
+    val re = vPattern.r
+    re
+  }
+
+  /**
+   * Checks a pattern for suitability with V (virtual decimal point) in the 
pattern
+   * in the context of zoned textNumberRep.
+   *
+   * This is broken out separately for unit testing purposes.
+   *
+   * @param patternStripped the dfdl:textNumberPattern pattern - with all 
quoting removed.
+   * @return None if the pattern is illegal syntax for use with V (virtual 
decimal point)
+   *         otherwise Some(N) where N is the number of digits to the right of 
the V character.
+   */
+  private[primitives] def textDecimalVirtualPointForZoned(patternStripped: 
String): Option[Int] = {
+    val r = TextNumberPatternUtils.vregexZoned
+    r.findFirstMatchIn(patternStripped) match {
+      // note: cannot have both a prefix and suffix. Only one of them.
+      case Some(r(pre, _, afterV, suf)) if (pre.length + suf.length <= 1) => 
Some(afterV.length)
+      case _ => None
+    }
+  }
+
+  /**
+   * The apos character (') is the quoting character in ICU patterns (and DFDL 
textNumberPattern).
+   * This removes any unquoted P or V characters (which are not implemented by 
ICU)
+   * leaving a pattern string that is suitable for use with ICU.
+   * @param pattern the textNumberPattern string
+   * @return the pattern string with all unquoted P and unquoted V removed.
+   */
+  def removeUnquotedPV(pattern: String) : String = {
+    val uniqueQQString = "alsdflslkjskjslkkjlkkjfppooiipsldsflj"
+    // A single regex that matches an unquoted character
+    // where the quoting char is self quoting requires
+    // a zero-width look behind that matches a potentially
+    // unbounded number of quote chars. That's not allowed.
+    //
+    // Consider ''''''P. We want a regex that matches only the P here
+    // because it is preceded by an even number of quotes.
+    //
+    // I did some tests, and the formulations you think might work
+    // such as from stack-overflow, don't work.
+    // (See test howNotToUseRegexLookBehindWithReplaceAll)
+    //
+    // So we use this brute force technique of replacing all ''
+    // first, so we have only single quotes to deal with.
+    pattern.replaceAll("''", uniqueQQString).
+      replaceAll("(?<!')P", "").
+      replaceAll("(?<!')V", "").
+      replaceAll(uniqueQQString, "''")
+  }
+}
+
+trait ConvertTextNumberMixin {
+
+  def e: ElementBase
+
+  /**
+   * Convenience. The original value of textNumberPattern
+   */
+  @inline
+  final protected def pattern = e.textNumberPattern
+
+  /**
+   * Checks a pattern for suitability with V (virtual decimal point) in the 
pattern
+   * in the context of the corresponding textNumberRep. Computes number of 
digits to right of the V.
+   *
+   * SDE if pattern has a syntax error.
+   *
+   * @return the number of digits to the right of the V character. Must be 1 
or greater.
+   *
+   */
+  protected def textDecimalVirtualPointFromPattern: Int
+
+  /**
+   * The pattern with escaped characters removed.
+   * This can be reliably searched for pattern characters,
+   * but cannot be used as an operational pattern given that
+   * potentially lots of things have been removed from it.
+   */
+  final protected lazy val patternStripped = {
+    // note: tick == apos == ' == single quote.
+    // First remove entirely all escaped ticks ie., ''
+    val noEscapedTicksRegex = """''""".r
+    val patternNoEscapedTicks = noEscapedTicksRegex.replaceAllIn(pattern, "")
+    // Next remove all tick-escaped characters entirely
+    val noQuotedRegex = """'[^']+'""".r
+    val patternNoQuoted = noQuotedRegex.replaceAllIn(patternNoEscapedTicks, "")
+    // the remaining string contains only pattern special characters
+    // and regular non-pattern characters
+    patternNoQuoted
+  }
+
+  protected final lazy val hasV = patternStripped.contains("V")
+  protected final lazy val hasP = patternStripped.contains("P")
+
+  /**
+   * analogous to the property dfdl:binaryDecimalVirtualPoint
+   *
+   * Value is 0 if there is no virtual decimal point.
+   * Value is the number of digits to the right of the 'V'
+   */
+  final lazy val textDecimalVirtualPoint: Int = {
+    if (!hasV && !hasP) 0 // no virtual point
+    else {
+      if (hasP) {
+        e.notYetImplemented("textNumberPattern with P symbol")
+      }
+      Assert.invariant(hasV)
+      //
+      // check for things incompatible with "V"
+      //
+      val virtualPoint = textDecimalVirtualPointFromPattern
+      Assert.invariant(virtualPoint >= 1) // if this fails the regex is broken.
+      virtualPoint
+    }
+  }
+
+  //
+  // We need 2 patterns
+  // 1. The original textNumberPattern string from the schema
+  // 2. Same but with the P or V removed.
+  //
+  // We need to use the original textNumberPattern in diagnostic messages
+  // since that's what the schema author wrote and expects to see.
+  // But if the pattern has P or V, then we need ICU at runtime to use the 
original
+  // but with the P or V removed since the P and V don't actually represent 
anything in the data.
+  final protected lazy val runtimePattern = {
+    if (hasV || hasP) {
+      val patternWithoutPV = TextNumberPatternUtils.removeUnquotedPV(pattern)
+      patternWithoutPV
+    }
+    else pattern
+  }
+
+  final protected def checkPatternWithICU(e: ElementBase) = {
+    // Load the pattern to make sure it is valid
+    try {
+      if (hasV || hasP) {
+        new DecimalFormat(runtimePattern)
+      } else {
+        new DecimalFormat(pattern)
+      }
+    } catch {
+      case ex: IllegalArgumentException =>
+        if (hasV || hasP) {
+          // we don't know what the diagnostic message will say here
+          // since it is from the ICU library.
+          // That library has only seen the pattern with the P and V
+          // removed. The messages might show that pattern which would
+          // confuse the schema author since that's not the pattern
+          // they wrote.
+          //
+          // So we try to explain....
+          e.SDE(
+            """Invalid textNumberPattern.
+              | The errors are about the pattern with the P (decimal scaling 
position)
+              | and V (virtual decimal point) characters removed: 
%s""".stripMargin, ex)
+        } else {
+          e.SDE("Invalid textNumberPattern: %s", ex)
+        }
+    }
+  }
+}
+
+case class ConvertTextStandardNumberPrim(e: ElementBase)
+  extends Terminal(e, true)
+  with ConvertTextNumberMixin {
+
+  final override protected lazy val textDecimalVirtualPointFromPattern: Int = {
+    val r = TextNumberPatternUtils.vregexStandard
+    r.findFirstMatchIn(patternStripped) match {
+      case Some(r(_, _, afterV, _, _, _, _, _)) => afterV.length
+      case None =>
+        e.SDE(
+          s"""The dfdl:textNumberPattern '%s' contains 'V' (virtual decimal 
point).

Review Comment:
   Resolved in https://github.com/apache/daffodil/pull/910



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/ConvertTextStandardNumberParser.scala:
##########
@@ -54,11 +56,64 @@ case class ConvertTextCombinatorParser(
   }
 }
 
-case class ConvertTextNumberParser(
+trait TextDecimalVirtualPointMixin {
+  def textDecimalVirtualPoint: Int
+
+  final protected val virtualPointScaleFactor = scala.math.pow(10.0, 
textDecimalVirtualPoint)

Review Comment:
   resolved in https://github.com/apache/daffodil/pull/910



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to