stevedlawrence commented on code in PR #1195:
URL: https://github.com/apache/daffodil/pull/1195#discussion_r1541027210


##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala:
##########
@@ -133,6 +134,17 @@ trait DFAField extends DFA {
   final override def run(r: Registers): Unit = runLoop(r, DFA.EndOfData, 
StateKind.EndOfData)
 }
 
+object DFADelimiter {
+  private val controlOrWhitespace = "\\p{C}|\\p{Z}".r
+
+  private def containsCtrlOrWS(s: String) = 
controlOrWhitespace.findFirstMatchIn(s).isDefined
+
+  def strForDiagnostic(s: String) =
+    if (containsCtrlOrWS(s))

Review Comment:
   Is this for performance, to avoid remapping something that doesn't need 
remapping? We might want to instead improve remapStringToVisibleGlyph. Maybe we 
should do something similar to the CharactSetRemapper where we just iterate 
over the string until we find something that needs remapping? And when we hit 
that then we create a StringBuilder, append what we've already looked at, and 
then start remapping chars. That way if no characters need remapping we just 
return the exact same string and minimize allocations.



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala:
##########
@@ -73,6 +76,35 @@ class DelimiterTextParser(
     foundLocalDFAIndex >= 0
   }
 
+  private def localDelimiters(state: PState): Seq[DFADelimiter] = {
+    var foundLocalDFAIndex = -1
+    val localIndexStart = state.mpstate.delimitersLocalIndexStack.top
+    val inScopeDelimiters = state.mpstate.delimiters
+    val res = inScopeDelimiters.slice(localIndexStart, 
inScopeDelimiters.length)
+    res
+  }
+
+  private def didNotFindExpectedDelimiter(foundDelimiter: ParseResult, start: 
PState): Unit = {
+    val delimString = foundDelimiter.matchedDelimiterValue.get
+    val localDelims = localDelimiters(start)
+    val foundDFA = foundDelimiter.matchedDFAs(0)
+    PE(
+      start,
+      s"""Found enclosing delimiter: %s during scan for local delimiter(s): %s.
+         | The expected delimiter(s) were: %s.
+         | The enclosing delimiter was from %s.
+         |""".stripMargin,
+      foundDFA.strForDiagnostic,
+      localDelims.map { d => d.strForDiagnostic }.mkString(", "),
+      localDelims
+        .map { d =>
+          s"  ${d.delimType.toString} ${d.strForDiagnostic} from location 
${d.location}."
+        }
+        .mkString("\n", "\n", ""),
+      foundDFA.location,

Review Comment:
   Thoughts on including `locationDescription` either instead of or in addition 
to `.location`? `.location` just outputs the element name which can sometimes 
make it hard to figure out where in the schema it actually is. 
`locationDescription` includes file/line/col info so makes it real easy.



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala:
##########
@@ -73,6 +76,35 @@ class DelimiterTextParser(
     foundLocalDFAIndex >= 0
   }
 
+  private def localDelimiters(state: PState): Seq[DFADelimiter] = {
+    var foundLocalDFAIndex = -1
+    val localIndexStart = state.mpstate.delimitersLocalIndexStack.top
+    val inScopeDelimiters = state.mpstate.delimiters
+    val res = inScopeDelimiters.slice(localIndexStart, 
inScopeDelimiters.length)
+    res
+  }
+
+  private def didNotFindExpectedDelimiter(foundDelimiter: ParseResult, start: 
PState): Unit = {
+    val delimString = foundDelimiter.matchedDelimiterValue.get
+    val localDelims = localDelimiters(start)
+    val foundDFA = foundDelimiter.matchedDFAs(0)
+    PE(
+      start,
+      s"""Found enclosing delimiter: %s during scan for local delimiter(s): %s.
+         | The expected delimiter(s) were: %s.
+         | The enclosing delimiter was from %s.
+         |""".stripMargin,
+      foundDFA.strForDiagnostic,
+      localDelims.map { d => d.strForDiagnostic }.mkString(", "),
+      localDelims

Review Comment:
   This duplicates localDelims. THoughts on an error message like
   
   ```scala
   PE(
     "Found enclosing delimiter %s from %s.
     |Expected delimiter(s) were:
     |%s",
     foundDFA.strForDiagnostic,
     foundDFA.locationDescription,
     localDelims.map { d =>
       s"  ${d.delimType.toString} ${d.strForDiagnostic} from location 
${d.location}."
     }
   )
   ```



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala:
##########
@@ -147,6 +159,9 @@ trait DFADelimiter extends DFA {
   final val isES = lookingFor == "%ES;"
 
   def unparseValue: String
+
+  def strForDiagnostic = DFADelimiter.strForDiagnostic(lookingFor)

Review Comment:
   Why do we need to remap `lookingFor`? Doesn't that come from the DFDL 
schema, which contains things like "%NL;" and don't need remapping? If 
lookingFor needs remapping, maybe we have a bug somewhere where we are 
incorrectly converting that to a string that isn't friendly for diagnostics?



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala:
##########
@@ -23,15 +23,18 @@ import org.apache.daffodil.lib.exceptions.Assert
 import org.apache.daffodil.lib.util.Enum
 import org.apache.daffodil.lib.util.Maybe
 import org.apache.daffodil.lib.util.Maybe._
-import org.apache.daffodil.lib.util.Misc
 import org.apache.daffodil.runtime1.processors.LocalTypedDelimiterIterator
 import 
org.apache.daffodil.runtime1.processors.RemoteTerminatingMarkupAndLocalTypedDelimiterIterator
 import org.apache.daffodil.runtime1.processors.TermRuntimeData
 import org.apache.daffodil.runtime1.processors.dfa.DFADelimiter
+import org.apache.daffodil.runtime1.processors.dfa.ParseResult
 import org.apache.daffodil.runtime1.processors.dfa.TextParser
 
 object DelimiterTextType extends Enum {
-  abstract sealed trait Type extends EnumValueType
+  abstract sealed trait Type extends EnumValueType {
+    override def toString() =

Review Comment:
   Can this be a lazy val? We may also want to lowercase the first char so it 
better matches the DFDL property?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to