stevedlawrence commented on code in PR #1195:
URL: https://github.com/apache/daffodil/pull/1195#discussion_r1541027210
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala:
##########
@@ -133,6 +134,17 @@ trait DFAField extends DFA {
final override def run(r: Registers): Unit = runLoop(r, DFA.EndOfData,
StateKind.EndOfData)
}
+object DFADelimiter {
+ private val controlOrWhitespace = "\\p{C}|\\p{Z}".r
+
+ private def containsCtrlOrWS(s: String) =
controlOrWhitespace.findFirstMatchIn(s).isDefined
+
+ def strForDiagnostic(s: String) =
+ if (containsCtrlOrWS(s))
Review Comment:
Is this for performance, to avoid remapping something that doesn't need
remapping? We might want to instead improve remapStringToVisibleGlyph. Maybe we
should do something similar to the CharactSetRemapper where we just iterate
over the string until we find something that needs remapping? And when we hit
that then we create a StringBuilder, append what we've already looked at, and
then start remapping chars. That way if no characters need remapping we just
return the exact same string and minimize allocations.
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala:
##########
@@ -73,6 +76,35 @@ class DelimiterTextParser(
foundLocalDFAIndex >= 0
}
+ private def localDelimiters(state: PState): Seq[DFADelimiter] = {
+ var foundLocalDFAIndex = -1
+ val localIndexStart = state.mpstate.delimitersLocalIndexStack.top
+ val inScopeDelimiters = state.mpstate.delimiters
+ val res = inScopeDelimiters.slice(localIndexStart,
inScopeDelimiters.length)
+ res
+ }
+
+ private def didNotFindExpectedDelimiter(foundDelimiter: ParseResult, start:
PState): Unit = {
+ val delimString = foundDelimiter.matchedDelimiterValue.get
+ val localDelims = localDelimiters(start)
+ val foundDFA = foundDelimiter.matchedDFAs(0)
+ PE(
+ start,
+ s"""Found enclosing delimiter: %s during scan for local delimiter(s): %s.
+ | The expected delimiter(s) were: %s.
+ | The enclosing delimiter was from %s.
+ |""".stripMargin,
+ foundDFA.strForDiagnostic,
+ localDelims.map { d => d.strForDiagnostic }.mkString(", "),
+ localDelims
+ .map { d =>
+ s" ${d.delimType.toString} ${d.strForDiagnostic} from location
${d.location}."
+ }
+ .mkString("\n", "\n", ""),
+ foundDFA.location,
Review Comment:
Thoughts on including `locationDescription` either instead of or in addition
to `.location`? `.location` just outputs the element name which can sometimes
make it hard to figure out where in the schema it actually is.
`locationDescription` includes file/line/col info so makes it real easy.
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala:
##########
@@ -73,6 +76,35 @@ class DelimiterTextParser(
foundLocalDFAIndex >= 0
}
+ private def localDelimiters(state: PState): Seq[DFADelimiter] = {
+ var foundLocalDFAIndex = -1
+ val localIndexStart = state.mpstate.delimitersLocalIndexStack.top
+ val inScopeDelimiters = state.mpstate.delimiters
+ val res = inScopeDelimiters.slice(localIndexStart,
inScopeDelimiters.length)
+ res
+ }
+
+ private def didNotFindExpectedDelimiter(foundDelimiter: ParseResult, start:
PState): Unit = {
+ val delimString = foundDelimiter.matchedDelimiterValue.get
+ val localDelims = localDelimiters(start)
+ val foundDFA = foundDelimiter.matchedDFAs(0)
+ PE(
+ start,
+ s"""Found enclosing delimiter: %s during scan for local delimiter(s): %s.
+ | The expected delimiter(s) were: %s.
+ | The enclosing delimiter was from %s.
+ |""".stripMargin,
+ foundDFA.strForDiagnostic,
+ localDelims.map { d => d.strForDiagnostic }.mkString(", "),
+ localDelims
Review Comment:
This duplicates localDelims. THoughts on an error message like
```scala
PE(
"Found enclosing delimiter %s from %s.
|Expected delimiter(s) were:
|%s",
foundDFA.strForDiagnostic,
foundDFA.locationDescription,
localDelims.map { d =>
s" ${d.delimType.toString} ${d.strForDiagnostic} from location
${d.location}."
}
)
```
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala:
##########
@@ -147,6 +159,9 @@ trait DFADelimiter extends DFA {
final val isES = lookingFor == "%ES;"
def unparseValue: String
+
+ def strForDiagnostic = DFADelimiter.strForDiagnostic(lookingFor)
Review Comment:
Why do we need to remap `lookingFor`? Doesn't that come from the DFDL
schema, which contains things like "%NL;" and don't need remapping? If
lookingFor needs remapping, maybe we have a bug somewhere where we are
incorrectly converting that to a string that isn't friendly for diagnostics?
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala:
##########
@@ -23,15 +23,18 @@ import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.util.Enum
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe._
-import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.runtime1.processors.LocalTypedDelimiterIterator
import
org.apache.daffodil.runtime1.processors.RemoteTerminatingMarkupAndLocalTypedDelimiterIterator
import org.apache.daffodil.runtime1.processors.TermRuntimeData
import org.apache.daffodil.runtime1.processors.dfa.DFADelimiter
+import org.apache.daffodil.runtime1.processors.dfa.ParseResult
import org.apache.daffodil.runtime1.processors.dfa.TextParser
object DelimiterTextType extends Enum {
- abstract sealed trait Type extends EnumValueType
+ abstract sealed trait Type extends EnumValueType {
+ override def toString() =
Review Comment:
Can this be a lazy val? We may also want to lowercase the first char so it
better matches the DFDL property?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]