This is an automated email from the ASF dual-hosted git repository.
mbeckerle pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new f2a9bd652 Improve diagnostic from delimiter scanning
f2a9bd652 is described below
commit f2a9bd652b51d98c34d5055c16dfe79ae22239b7
Author: Michael Beckerle <[email protected]>
AuthorDate: Tue Mar 26 18:22:34 2024 -0400
Improve diagnostic from delimiter scanning
DAFFODIL-2345
---
.../daffodil/cli/cliTest/TestCLIParsing.scala | 5 +-
.../main/scala/org/apache/daffodil/io/Dump.scala | 6 +-
.../org/apache/daffodil/io/TestDumpDisplay.scala | 21 ++-
.../scala/org/apache/daffodil/lib/util/Misc.scala | 205 ++++++++++-----------
.../org/apache/daffodil/lib/xml/XMLUtils.scala | 4 +-
.../daffodil/runtime1/processors/dfa/Runtime.scala | 15 ++
.../processors/parsers/DelimiterParsers.scala | 42 ++++-
.../processors/parsers/SeparatedParseHelper.scala | 2 +-
.../runtime1/parser/TestCharsetBehavior.scala | 2 +-
.../org/apache/daffodil/tdml/TDMLRunner.scala | 4 +-
.../delimiter_properties/DelimiterProperties.tdml | 26 ++-
.../sequence_groups/SequenceGroupDelimiters.tdml | 2 +-
12 files changed, 192 insertions(+), 142 deletions(-)
diff --git
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
index 1de7a5397..b68e32bf0 100644
---
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
+++
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/TestCLIParsing.scala
@@ -151,14 +151,15 @@ class TestCLIParsing {
}(ExitCode.BadExternalVariable)
}
- @Test def test_CLI_Parsing_SimpleParse_DFDL1197_fix(): Unit = {
+ @Test
+ def test_CLI_Parsing_SimpleParse_DFDL1197_fix(): Unit = {
val schema = path(
"daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/testOptionalInfix.dfdl.xsd",
)
runCLI(args"-vv parse -s $schema") { cli =>
cli.sendLine("1/3", inputDone = true)
- cli.expectErr("<Sequence><Separator/><RepMinMax name='s1'>")
+ cli.expectErr("<Sequence><separator/><RepMinMax name='s1'>")
}(ExitCode.LeftOverData)
}
diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
index 049875727..21a84f839 100644
--- a/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
+++ b/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
@@ -640,7 +640,7 @@ class DataDumper {
homogenizeChars(uCodePoint)
}
} else {
- homogenizeChars(Misc.remapCodepointToVisibleGlyph(allChars(0)))
+
homogenizeChars(Misc.remapControlOrLineEndingToVisibleGlyphs(allChars(0)))
}
remapped = r
nCols = n
@@ -661,8 +661,8 @@ class DataDumper {
//
// FIXME: This will be really broken for EBCDIC-based encodings. Pass
the encoding
// so that the glyph routine can be ascii/ebcdic sensitive.
- val remapped = Misc.remapByteToVisibleGlyph(byteValue)
- (remapped.toChar.toString, 1, 1)
+ val remapped = Misc.remapOneByteToVisibleGlyph(byteValue)
+ (remapped.toString, 1, 1)
}
}
}
diff --git
a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDumpDisplay.scala
b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDumpDisplay.scala
index e1c57aec1..3d98580ee 100644
--- a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDumpDisplay.scala
+++ b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDumpDisplay.scala
@@ -17,9 +17,6 @@
package org.apache.daffodil.io
-import java.nio.ByteBuffer
-import java.nio.CharBuffer
-
import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.lib.xml.XMLUtils
@@ -41,12 +38,8 @@ class TestDumpDisplay {
*/
@Test def testAllPrintableChars() = {
- // val bytes = 0 to 255 map { _.toByte }
- val bb = ByteBuffer.allocate(256)
- (0 to 255).foreach { n => bb.put(n, n.toByte) }
- val cb = CharBuffer.allocate(256)
- Misc.remapBytesToVisibleGlyphs(bb, cb)
- val res = cb.toString
+ val bytes: Array[Byte] = (0 to 255).map { _.toByte }.toArray
+ val res = Misc.remapBytesToStringOfVisibleGlyphs(bytes)
val exp =
//
// C0 Controls - use unicode control picture characters.
@@ -72,7 +65,15 @@ class TestDumpDisplay {
"¡¢£¤¥¦§¨©ª«¬" +
"-" + // 0xAD soft hyphen mapped to regular hyphen (because soft
hyphen seems to be a zero-width in many fonts.
"®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ"
- assertEquals(exp, res)
+ assertEquals(exp.length, res.length)
+ val sb = new StringBuilder()
+ ((exp.zip(res)).zip(0 to res.length)).foreach { case ((exp, res), i) =>
+ if (exp != res) {
+ sb.append(s"At index $i expected '$exp' but actual was '$res'\n")
+ }
+ }
+ val msg = sb.toString()
+ if (msg.nonEmpty) fail(msg)
}
}
diff --git
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Misc.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Misc.scala
index ff828f49a..2e78cefbb 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Misc.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Misc.scala
@@ -24,9 +24,9 @@ import java.io.IOException
import java.net.URI
import java.net.URLClassLoader
import java.nio.ByteBuffer
-import java.nio.CharBuffer
import java.nio.channels.ReadableByteChannel
import java.nio.channels.WritableByteChannel
+import java.nio.charset.Charset
import java.nio.charset.CodingErrorAction
import java.nio.charset.StandardCharsets
import java.nio.charset.{ Charset => JavaCharset }
@@ -38,6 +38,8 @@ import scala.io.Source
import org.apache.daffodil.lib.equality._
import org.apache.daffodil.lib.exceptions.Assert
+import passera.unsigned.UByte
+
/**
* Various reusable utilities that I couldn't easily find a better place for.
*/
@@ -424,7 +426,7 @@ object Misc {
/**
* This function creates a representation of data which doesn't
- * contain any whitespace characters that jump around the screen.
+ * contain any control or whitespace characters that jump around the screen.
* It replaces those with characters that have a simple glyph.
*
* The point of this is when you display the stream of data for
@@ -434,13 +436,40 @@ object Misc {
* Replacing these with the picture characters (designed for this purpose)
* in the unicode x2400 block helps.
*/
- def remapControlsAndLineEndingsToVisibleGlyphs(s: String) = {
- s.map { remapControlOrLineEndingToVisibleGlyphs(_) }.mkString
+ def remapStringToVisibleGlyphs(s: String) =
+ nonGlyphToVisibleGlyphsRemapper.remap(s)
+
+ object nonGlyphToVisibleGlyphsRemapper extends CharacterSetRemapper {
+ override protected def remap(prev: Char, curr: Char, next: Char): Int =
+ remapControlOrLineEndingToVisibleGlyphs(curr)
}
- def remapControlOrLineEndingToVisibleGlyphs(c: Char) = {
+ /**
+ * For debugger displays, data dumps, etc.
+ *
+ * Control characters, line-endings, spaces, and various others do not have
a glyph that is displayed.
+ * They also can cause text to be split across lines, bells to ring,
characters to be overwritten, etc.
+ *
+ * Convert to a character that has a glyph. Unicode provides some
control-picture
+ * characters for this purpose. Note that this by-definition loses
information, and many characters
+ * will map to the unicode replacement character. It is expected this
display would be complemented
+ * by a hex dump or other means of understanding the actual representation
of these remapped
+ * characters.
+ *
+ * Keep in mind this is a Unicode to Unicode transformation. It is not
starting from byte values.
+ * See `byteToChar(b)` for how to got from raw byte values to unicode chars.
+ * @param c a unicode character that may or may not have a glyph.
+ * @param replaceControlPictures when true means the Unicode control
pictures characters are replaced by
+ * the Unicode replacement character. When
false these characters are preserved.
+ * Defaults to false.
+ * @return a unicode character that definitely has a glyph.
+ */
+ def remapControlOrLineEndingToVisibleGlyphs(
+ c: Char,
+ replaceControlPictures: Boolean = false,
+ ): Char = {
val URC =
- 0x2426 // Unicode control picture character for substutition (also looks
like arabic q-mark)
+ 0x2426 // Unicode control picture character for substitution (also looks
like arabic q-mark)
val code = c.toInt match {
//
// C0 Control pictures
@@ -448,8 +477,15 @@ object Misc {
case 0x20 => 0x2423 // For space we use the SP we use the ␣ (Unicode
OPEN BOX)
case 0x7f => 0x2421 // DEL pic isn't at 0x247F, it's at 0x2421
//
+ // We remap these into the Unicode Latin Extended B codepoints by
+ // adding 0x100 to their basic value.
+ //
+ case n if (n >= 0x80 && n <= 0x9f) =>
+ n + 0x100
+ case 0xa0 => 0x2422 // non-break space => ␢ (blank symbol or little b
with stroke)
+ case 0xad => 0x002d // soft hyphen => hyphen
+ //
// Unicode separators & joiners
- case 0x00a0 => URC // no-break space
case 0x200b => URC // zero width space
case 0x2028 => URC // line separator
case 0x2029 => URC // paragraph separator
@@ -477,123 +513,84 @@ object Misc {
//
//
// Special case - if incoming character is one of the glyph
- // characters we're remapping onto, then change to URC
+ // characters we're remapping onto, then we could issue
+ // a substitution character, but there are things that depend
+ // on these being preserved. So we have a flag to control this.
//
- case n if (n > 0x2400 && n < 0x2423) => URC
+ case n if (n > 0x2400 && n < 0x2423 && replaceControlPictures) => URC
case _ => c
}
code.toChar
}
- private val bytesCharset =
- JavaCharset.forName("windows-1252") // same as iso-8859-1 but has a few
more glyphs.
- private val bytesDecoder = {
- val decoder = bytesCharset.newDecoder()
- decoder.onMalformedInput(CodingErrorAction.REPLACE)
- decoder.onUnmappableCharacter(CodingErrorAction.REPLACE)
- decoder
+ private lazy val byteToCharTable = {
+ val cs = Charset.forName("windows-1252")
+ val dec = cs
+ .newDecoder()
+ .onUnmappableCharacter(CodingErrorAction.REPLACE)
+ .onMalformedInput(CodingErrorAction.REPORT)
+ val bb = ByteBuffer.wrap((0 to 255).map { i => i.toByte }.toArray)
+ val cb = dec.decode(bb)
+ assert(cb.position == 0)
+ assert(cb.limit == 256)
+ // These 5 are unmapped by Windows-1252 but we want to turn any
+ // byte into a legit character. So these We add 0x100
+ // to get unicode codepoints.
+ cb.put(0x81, 0x181.toChar)
+ cb.put(0x8d, 0x18d.toChar)
+ cb.put(0x8f, 0x18f.toChar)
+ cb.put(0x90, 0x190.toChar)
+ cb.put(0x9d, 0x19d.toChar)
+ val res = cb.toString
+ res
}
/**
- * Used when creating a debugging dump of data, where the data might be
binary stuff
- * but we want to show some sort of glyph for each byte.
+ * Convert a byte to a unicode character assuming the byte is iso-8859-1
+ * (or really, windows-1252 which has a few more glyph chars but is otherwise
+ * the same as iso-8859-1)
*
- * This uses windows-1252 for all the places it has glyphs, and other unicode
- * glyph characters to replace the control characters and unused characters.
+ * This is a super pain to do using Java charsets because they
+ * don't provide an API to convert one character, only byte buffers
+ * into char buffers.
*
- * This allows printing a data dump to the screen, without worry that the
control
- * characters will ring bells or cause the text to jump around, and unmapped
- * characters will not look like spaces, nor all look like the same unicde
replacement
- * character.
+ * So we just use a lookup table.
+ * @param b a byte containing a code point of windows-1252 encoding
+ * @return a unicode equivalent character
*/
- def remapBytesToVisibleGlyphs(bb: ByteBuffer, cb: CharBuffer): Unit = {
- val numBytes = bb.remaining()
- bytesDecoder.decode(bb, cb, true)
- cb.flip
- var i = 0
- while (i < numBytes) {
- val newCodepoint = remapOneByteToVisibleGlyph(bb.get(i))
- if (newCodepoint != -1) {
- cb.put(i, newCodepoint.toChar)
- }
- i += 1
- }
- }
-
- /**
- * For unicode codepoints in the range 0 to 255, or signed -128 to 127,
- * make sure there is a visible glyph.
- */
- def remapCodepointToVisibleGlyph(codepoint: Int): Int = {
- if (codepoint > 255 || codepoint < -128) return codepoint
- val b = codepoint.toByte
- val r = remapOneByteToVisibleGlyph(b)
- if (r == -1) codepoint else r
- }
-
- def remapStringToVisibleGlyphs(s: String) = {
- s.map { c => remapCodepointToVisibleGlyph(c.toInt).toChar }
- }
-
- def remapBytesToStringOfVisibleGlyphs(ba: Array[Byte]): String = {
- ba.map { b => remapCodepointToVisibleGlyph(b.toInt).toChar }.mkString
- }
-
- def remapByteToVisibleGlyph(b: Byte): Int = {
- val bb = ByteBuffer.allocate(1)
- bb.put(0, b)
- val cb = CharBuffer.allocate(1)
- remapBytesToVisibleGlyphs(bb, cb)
- cb.get(0).toChar.toInt
+ def byteToChar(b: Byte): Char = {
+ byteToCharTable(UByte(b).toInt)
}
/**
- * Remaps a byte to a unicode codepoint for a visible picture, or -1 if
- * no remapping is needed.
- *
- * A difficulty is that there do not seem to be generally available Unicode
fonts
- * which are truly monospaced for every Unicode character. So since we are
- * trying to produce data dumps that are monospaced, the tabular layout is
off a bit.
- *
- * Even if there was such a font, it wouldn't be the default font.
+ * This function creates a representation of data which doesn't
+ * contain any control or whitespace characters that jump around the screen.
+ * It replaces those with characters that have a simple glyph.
*
- * Courier New seems to work well. It is monospaced for every character we
use
- * in this remap stuff. But not for the "double wide" Kanji or other wide
oriental
- * characters.
+ * The point of this is when you display the stream of data for
+ * debugging, or for a diagnostic message, and it is mostly single-byte text
+ * characters, then the characters which control position like CR, LF, FF,
+ * VT, HT, BS, etc. all make it hard to figure out what is going on.
+ * Replacing these with the picture characters (designed for this purpose)
+ * in the unicode x2400 block helps.
*/
- private def remapOneByteToVisibleGlyph(b: Byte): Int = {
- Bits.asUnsignedByte(b) match {
- //
- // replace C0 controls with unicode control pictures
- //
- case n if (n <= 0x1f) => n + 0x2400
- //
- // replace space and DEL with control pictures
- //
- case 0x20 => 0x2423 // For space we use the SP we use the ␣ (Unicode
OPEN BOX)
- case 0x7f => 0x2421 // DEL pic isn't at 0x247F, it's at 0x2421
- //
- // replace undefined characters in the C1 control space with
- // glyph characters. These are the only codepoints in the C1
- // space which do not have a glyph defined by windows-1252
- //
- // We remap these into the Unicode Latin Extended B codepoints by
- // adding 0x100 to their basic value.
- //
- case 0x81 => 0x0181
- case 0x8d => 0x018d
- case 0x8f => 0x018f
- case 0x90 => 0x0190
- case 0x9d => 0x019d
- //
- // Non-break space
- //
- case 0xa0 => 0x2422 // little b with stroke
- case 0xad => 0x002d // soft hyphen becomes hyphen
- case regular => -1 // all other cases -1 means we just use the regular
character glyph.
+ def remapBytesToStringOfVisibleGlyphs(ba: Array[Byte]): String = {
+ val len = ba.length
+ if (len == 0) ""
+ else {
+ val sb = new StringBuilder(ba.length)
+ var i: Int = 0
+ while (i < ba.length) {
+ sb.append(remapControlOrLineEndingToVisibleGlyphs(byteToChar(ba(i))))
+ i += 1
+ }
+ sb.toString()
}
}
+ def remapOneByteToVisibleGlyph(b: Byte) =
+ remapControlOrLineEndingToVisibleGlyphs(byteToChar(b))
+
/**
* True if this charset encoding is suitable for display using the
* all-visible-glyph stuff above.
diff --git
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
index 280d0e118..5d6996c55 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/XMLUtils.scala
@@ -1136,10 +1136,10 @@ Differences were (path, expected, actual):
// prefix of the other and index is where the prefix ends, or index is
// the first difference found. Either way, we can safely use slice() to
// get at most some number of characters at that index for context.
- val contextA = Misc.remapControlsAndLineEndingsToVisibleGlyphs(
+ val contextA = Misc.remapStringToVisibleGlyphs(
dataA.slice(index, index + CHARS_TO_SHOW_AFTER_DIFF),
)
- val contextB = Misc.remapControlsAndLineEndingsToVisibleGlyphs(
+ val contextB = Misc.remapStringToVisibleGlyphs(
dataB.slice(index, index + CHARS_TO_SHOW_AFTER_DIFF),
)
val path = zPath + ".charAt(" + (index + 1) + ")"
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala
index 263b1bb90..899bcc45f 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/dfa/Runtime.scala
@@ -19,6 +19,7 @@ package org.apache.daffodil.runtime1.processors.dfa
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.exceptions.SchemaFileLocation
+import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.runtime1.processors.parsers.DelimiterTextType
/**
@@ -133,6 +134,17 @@ trait DFAField extends DFA {
final override def run(r: Registers): Unit = runLoop(r, DFA.EndOfData,
StateKind.EndOfData)
}
+object DFADelimiter {
+ private val controlOrWhitespace = "\\p{C}|\\p{Z}".r
+
+ private def containsCtrlOrWS(s: String) =
controlOrWhitespace.findFirstMatchIn(s).isDefined
+
+ def strForDiagnostic(s: String) =
+ if (containsCtrlOrWS(s))
+ s"'$s' ('${Misc.remapStringToVisibleGlyphs(s)}')"
+ else s"'$s'"
+}
+
trait DFADelimiter extends DFA {
def delimType: DelimiterTextType.Type
def lookingFor: String
@@ -147,6 +159,9 @@ trait DFADelimiter extends DFA {
final val isES = lookingFor == "%ES;"
def unparseValue: String
+
+ lazy val strForDiagnostic: String = DFADelimiter.strForDiagnostic(lookingFor)
+
}
/**
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala
index 058d457eb..d8be2ef01 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/DelimiterParsers.scala
@@ -28,10 +28,14 @@ import
org.apache.daffodil.runtime1.processors.LocalTypedDelimiterIterator
import
org.apache.daffodil.runtime1.processors.RemoteTerminatingMarkupAndLocalTypedDelimiterIterator
import org.apache.daffodil.runtime1.processors.TermRuntimeData
import org.apache.daffodil.runtime1.processors.dfa.DFADelimiter
+import org.apache.daffodil.runtime1.processors.dfa.ParseResult
import org.apache.daffodil.runtime1.processors.dfa.TextParser
object DelimiterTextType extends Enum {
- abstract sealed trait Type extends EnumValueType
+ abstract sealed trait Type extends EnumValueType {
+ override lazy val toString =
+ Misc.initialLowerCase(getClass().getSimpleName.replace("$", ""))
+ }
case object Initiator extends Type
case object Separator extends Type
case object Terminator extends Type
@@ -73,6 +77,34 @@ class DelimiterTextParser(
foundLocalDFAIndex >= 0
}
+ private def localDelimiters(state: PState): Seq[DFADelimiter] = {
+ val localIndexStart = state.mpstate.delimitersLocalIndexStack.top
+ val inScopeDelimiters = state.mpstate.delimiters
+ val res = inScopeDelimiters.slice(localIndexStart,
inScopeDelimiters.length)
+ res
+ }
+
+ private def didNotFindExpectedDelimiter(foundDelimiter: ParseResult, start:
PState): Unit = {
+ val localDelims = localDelimiters(start)
+ val foundDFA = foundDelimiter.matchedDFAs(0)
+ PE(
+ start,
+ """Found enclosing delimiter: %s during scan for local delimiter(s): %s.
+ | The expected delimiter(s) were: %s.
+ | The enclosing delimiter was from %s %s.
+ |""".stripMargin,
+ foundDFA.strForDiagnostic,
+ localDelims.map { d => d.strForDiagnostic }.mkString(", "),
+ localDelims
+ .map { d =>
+ s" ${d.delimType.toString} ${d.strForDiagnostic} from ${d.location}
${d.location.locationDescription}."
+ }
+ .mkString("\n", "\n", ""),
+ foundDFA.location,
+ foundDFA.location.locationDescription,
+ )
+ }
+
override def parse(start: PState): Unit = {
val maybeDelimIter =
@@ -108,15 +140,9 @@ class DelimiterTextParser(
if (foundDelimiter.isDefined) {
if (!containsLocalMatch(foundDelimiter.get.matchedDFAs, start)) {
// It was a remote delimiter but we should have found a local one.
- PE(
- start,
- "Found out of scope delimiter: '%s' '%s'",
- foundDelimiter.get.matchedDFAs(0).lookingFor,
-
Misc.remapStringToVisibleGlyphs(foundDelimiter.get.matchedDelimiterValue.get),
- )
+ didNotFindExpectedDelimiter(foundDelimiter.get, start)
return
}
-
// Consume the found local delimiter but also check if it was supposed
to match
// a non-zero number of bits and throw a runtime SDE if necessary
val nChars = foundDelimiter.get.matchedDelimiterValue.get.length
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala
index b120d5463..78db82041 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/SeparatedParseHelper.scala
@@ -63,7 +63,7 @@ sealed abstract class SeparatorParseHelper(
cause,
)
case _ =>
- sep.PE(pstate, "Failed to parse %s separator. Cause: %s.", kind, cause)
+ sep.PE(pstate, "Failed to find %s separator. Cause: %s.", kind, cause)
}
}
}
diff --git
a/daffodil-runtime1/src/test/scala/org/apache/daffodil/runtime1/parser/TestCharsetBehavior.scala
b/daffodil-runtime1/src/test/scala/org/apache/daffodil/runtime1/parser/TestCharsetBehavior.scala
index d26147534..77a2912c0 100644
---
a/daffodil-runtime1/src/test/scala/org/apache/daffodil/runtime1/parser/TestCharsetBehavior.scala
+++
b/daffodil-runtime1/src/test/scala/org/apache/daffodil/runtime1/parser/TestCharsetBehavior.scala
@@ -476,7 +476,7 @@ class TestUnicodeErrorTolerance {
val inreader = new InputStreamReader(input, decoder)
val cb = new StringBuffer;
for (i <- 0 to 255) cb.appendCodePoint(inreader.read())
- val act = Misc.remapControlsAndLineEndingsToVisibleGlyphs(
+ val act = Misc.remapStringToVisibleGlyphs(
XMLUtils.remapXMLIllegalCharactersToPUA(cb.toString()),
)
//
diff --git
a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index fd26368d4..0499c5c29 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -1981,13 +1981,13 @@ object VerifyTestCase {
lazy val actual8859String =
StandardCharsets.ISO_8859_1.newDecoder().decode(ByteBuffer.wrap(actualBytes)).toString()
lazy val displayableActual =
- Misc.remapControlsAndLineEndingsToVisibleGlyphs(actual8859String)
+ Misc.remapStringToVisibleGlyphs(actual8859String)
val expectedBytes = IOUtils.toByteArray(expectedData)
lazy val expected8859String =
StandardCharsets.ISO_8859_1.newDecoder().decode(ByteBuffer.wrap(expectedBytes)).toString()
lazy val displayableExpected =
- Misc.remapControlsAndLineEndingsToVisibleGlyphs(expected8859String)
+ Misc.remapStringToVisibleGlyphs(expected8859String)
lazy val expectedAndActualDisplayStrings = "\n" +
"Excected data (as iso8859-1): " + displayableExpected + "\n" +
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
index 471126389..ba2391d69 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
@@ -742,9 +742,12 @@
model="ReqFieldMissingAndSepIsPrefixOfTerminator">
<tdml:document><![CDATA[$A$$]]></tdml:document>
<tdml:errors>
- <tdml:error>Parse Error</tdml:error>
- <tdml:error>Found out of scope delimiter</tdml:error>
- <tdml:error>$$</tdml:error>
+ <tdml:error>Parse Error</tdml:error>
+ <tdml:error>prefix separator</tdml:error>
+ <tdml:error>Found enclosing delimiter: '$$'</tdml:error>
+ <tdml:error>during scan for local delimiter(s): '$'</tdml:error>
+ <tdml:error>Separator '$' from</tdml:error>
+ <tdml:error>ex:E1</tdml:error>
</tdml:errors>
</tdml:parserTestCase>
@@ -752,9 +755,12 @@
model="ReqFieldMissingAndSepIsPrefixOfTerminator">
<tdml:document><![CDATA[A$$B$$]]></tdml:document>
<tdml:errors>
- <tdml:error>Parse Error</tdml:error>
- <tdml:error>Found out of scope delimiter</tdml:error>
- <tdml:error>$$</tdml:error>
+ <tdml:error>Parse Error</tdml:error>
+ <tdml:error>infix separator</tdml:error>
+ <tdml:error>Found enclosing delimiter: '$$'</tdml:error>
+ <tdml:error>during scan for local delimiter(s): '$'</tdml:error>
+ <tdml:error>Separator '$' from</tdml:error>
+ <tdml:error>ex:E2</tdml:error>
</tdml:errors>
</tdml:parserTestCase>
@@ -762,8 +768,12 @@
model="ReqFieldMissingAndSepIsPrefixOfTerminator">
<tdml:document><![CDATA[A$$B$$]]></tdml:document>
<tdml:errors>
- <tdml:error>Found out of scope delimiter</tdml:error>
- <tdml:error>$$</tdml:error>
+ <tdml:error>Parse Error</tdml:error>
+ <tdml:error>postfix separator</tdml:error>
+ <tdml:error>Found enclosing delimiter: '$$'</tdml:error>
+ <tdml:error>during scan for local delimiter(s): '$'</tdml:error>
+ <tdml:error>Separator '$' from</tdml:error>
+ <tdml:error>ex:E3</tdml:error>
</tdml:errors>
</tdml:parserTestCase>
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroupDelimiters.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroupDelimiters.tdml
index 190438d2b..d3f10849f 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroupDelimiters.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section14/sequence_groups/SequenceGroupDelimiters.tdml
@@ -588,7 +588,7 @@
<tdml:errors>
<tdml:error>Parse Error</tdml:error>
- <tdml:error>Failed to parse infix separator</tdml:error>
+ <tdml:error>Failed to find infix separator</tdml:error>
<tdml:error>Separator ';' not found</tdml:error>
</tdml:errors>