This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new b4f9cec Ensure we use UTF-8 when outputting and comparing SAX output
b4f9cec is described below
commit b4f9cec512783920a78cdfbfc4610e9e10a11e5f
Author: Steve Lawrence <[email protected]>
AuthorDate: Mon Dec 6 14:56:33 2021 -0500
Ensure we use UTF-8 when outputting and comparing SAX output
Whenever Daffodil outputs an infoset, it always does so with UTF-8
encoding. The one exception to this when converting SAX evensts to in
the CLI and TDML runner. This means that depending on a users system
encoding, tests may act differently and lead to failure. This kind of
system-dependent behavior is not desierable. So instead, thiss modifies
SAX related output to use UTF-8 so that a users environment does not
affect the results of tests or results of a parser.
Also use StandardCharsets instead of a encoding name string where
possible.
DAFFODIL-2600
---
.../src/main/scala/org/apache/daffodil/Main.scala | 3 ++-
.../daffodil/processors/charset/CharsetUtils.scala | 4 ++--
.../scala/org/apache/daffodil/io/TestDecoder.scala | 12 +++++------
.../org/apache/daffodil/io/TestISO8859_1.scala | 4 ++--
.../main/scala/org/apache/daffodil/util/Misc.scala | 3 ++-
.../scala/org/apache/daffodil/xml/XMLUtils.scala | 3 ++-
.../daffodil/infoset/JsonInfosetOutputter.scala | 4 ++--
.../daffodil/infoset/XMLTextInfosetOutputter.scala | 4 ++--
.../DaffodilParseOutputStreamContentHandler.scala | 3 ++-
.../apache/daffodil/processors/DataProcessor.scala | 3 ++-
.../daffodil/processors/parsers/PState.scala | 3 ++-
.../daffodil/parser/TestCharsetBehavior.scala | 24 ++++++++++-----------
.../org/apache/daffodil/tdml/TDMLRunner.scala | 8 +++----
.../daffodil/tdml/TDMLInfosetOutputter.scala | 4 +---
.../tdml/processor/DaffodilTDMLDFDLProcessor.scala | 25 ++++++++++------------
15 files changed, 53 insertions(+), 54 deletions(-)
diff --git a/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala
b/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala
index be22d07..7f1b162 100644
--- a/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala
+++ b/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala
@@ -26,6 +26,7 @@ import java.net.URI
import java.nio.ByteBuffer
import java.nio.channels.Channels
import java.nio.file.Paths
+import java.nio.charset.StandardCharsets
import java.util.Scanner
import java.util.concurrent.Executors
@@ -944,7 +945,7 @@ object Main {
// InfosetOutputters must manually get the result and write it
to the stream below
eitherOutputterOrHandler match {
case Left(sxml: ScalaXMLInfosetOutputter) => {
- val writer = new java.io.OutputStreamWriter(output,
"UTF-8")
+ val writer = new java.io.OutputStreamWriter(output,
StandardCharsets.UTF_8)
scala.xml.XML.write(writer, sxml.getResult, "UTF-8", true,
null)
writer.flush()
}
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
index b3ad930..a64fb15 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
@@ -19,9 +19,9 @@ package org.apache.daffodil.processors.charset
import java.nio.ByteBuffer
import java.nio.CharBuffer
-import java.nio.charset.{ Charset => JavaCharset }
import java.nio.charset.CoderResult
import java.nio.charset.CodingErrorAction
+import java.nio.charset.StandardCharsets
import org.apache.daffodil.exceptions.Assert
import org.apache.daffodil.io.LocalBufferMixin
import org.apache.daffodil.util.MaybeInt
@@ -48,7 +48,7 @@ object CharsetUtils {
* Java 7 at some point in the future.
*/
lazy val hasJava7DecoderBug = {
- val decoder = JavaCharset.forName("utf-8").newDecoder()
+ val decoder = StandardCharsets.UTF_8.newDecoder()
decoder.onMalformedInput(CodingErrorAction.REPORT)
decoder.onUnmappableCharacter(CodingErrorAction.REPORT)
val bb = ByteBuffer.allocate(6)
diff --git
a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDecoder.scala
b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDecoder.scala
index 8c7e673..c036ccc 100644
--- a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDecoder.scala
+++ b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDecoder.scala
@@ -20,7 +20,7 @@
//
package org.apache.daffodil.io
//
-import java.nio.charset.{ Charset => JavaCharset }
+import java.nio.charset.StandardCharsets
import java.nio.charset.CodingErrorAction
import java.nio.ByteBuffer
import java.nio.CharBuffer
@@ -66,7 +66,7 @@ class TestDecoder {
* checked for enough room for a surrogate pair.
*/
@Test def testDecoder1: Unit = {
- val originalDecoder = JavaCharset.forName("utf-8").newDecoder()
+ val originalDecoder = StandardCharsets.UTF_8.newDecoder()
originalDecoder.onMalformedInput(CodingErrorAction.REPORT)
originalDecoder.onUnmappableCharacter(CodingErrorAction.REPORT)
val decoder = originalDecoder
@@ -133,7 +133,7 @@ class TestDecoder {
}
@Test def testDecoderWorkaround1: Unit = {
- val originalDecoder = JavaCharset.forName("utf-8").newDecoder()
+ val originalDecoder = StandardCharsets.UTF_8.newDecoder()
originalDecoder.onMalformedInput(CodingErrorAction.REPORT)
originalDecoder.onUnmappableCharacter(CodingErrorAction.REPORT)
val decoder = originalDecoder
@@ -183,7 +183,7 @@ class TestDecoder {
// Delete once it's clear we're never going to have to do that.
//
// @Test def testDecoder2 {
- // val originalDecoder = Charset.forName("utf-8").newDecoder()
+ // val originalDecoder = StandardCharsets.UTF_8.newDecoder()
// originalDecoder.onMalformedInput(CodingErrorAction.REPORT)
// originalDecoder.onUnmappableCharacter(CodingErrorAction.REPORT)
// val decoder = DecoderWrapper(originalDecoder)
@@ -205,7 +205,7 @@ class TestDecoder {
// }
//
// @Test def testDecoder3 {
- // val originalDecoder = Charset.forName("utf-8").newDecoder()
+ // val originalDecoder = StandardCharsets.UTF_8.newDecoder()
// originalDecoder.onMalformedInput(CodingErrorAction.REPLACE)
// originalDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE)
// val decoder = DecoderWrapper(originalDecoder)
@@ -229,7 +229,7 @@ class TestDecoder {
// }
//
// @Test def testDecoder4 {
- // val originalDecoder = Charset.forName("utf-8").newDecoder()
+ // val originalDecoder = StandardCharsets.UTF_8.newDecoder()
// originalDecoder.onMalformedInput(CodingErrorAction.REPLACE)
// originalDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE)
// val decoder = DecoderWrapper(originalDecoder)
diff --git
a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestISO8859_1.scala
b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestISO8859_1.scala
index ebcde04..0ed3b51 100644
--- a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestISO8859_1.scala
+++ b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestISO8859_1.scala
@@ -49,7 +49,7 @@ class TestISO8859_1 {
@Test def
test_ISO_8859_1_has256CodepointsIsomorphicToUnicodeCodepointsU0000toU00FF():
Unit = {
val byteArray = (0 to 255).map { _.toByte }.toArray
val bb = ByteBuffer.wrap(byteArray)
- val cs = Charset.forName("iso-8859-1")
+ val cs = StandardCharsets.ISO_8859_1
val decoder = cs.newDecoder()
decoder.onMalformedInput(CodingErrorAction.REPORT)
decoder.onUnmappableCharacter(CodingErrorAction.REPORT)
@@ -76,4 +76,4 @@ class TestISO8859_1 {
assertArrayEquals(byteArray, actualByteArray)
}
-}
\ No newline at end of file
+}
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Misc.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Misc.scala
index 7fe35c4..4383165 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Misc.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Misc.scala
@@ -30,6 +30,7 @@ import java.nio.channels.ReadableByteChannel
import java.nio.channels.WritableByteChannel
import java.nio.charset.CodingErrorAction
import java.nio.charset.{ Charset => JavaCharset }
+import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Paths
@@ -340,7 +341,7 @@ object Misc {
// Moved here from Compiler object.
def stringToReadableByteChannel(s: String): ReadableByteChannel = {
- val bytes = s.getBytes("utf-8") // never use default charset. NEVER.
+ val bytes = s.getBytes(StandardCharsets.UTF_8) // never use default
charset. NEVER.
byteArrayToReadableByteChannel(bytes)
}
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
index b183599..ae8744c 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
@@ -18,6 +18,7 @@
package org.apache.daffodil.xml
import java.io.File
+import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Paths
import java.nio.file.StandardOpenOption
@@ -1214,7 +1215,7 @@ Differences were (path, expected, actual):
val pp = new org.apache.daffodil.xml.PrettyPrinter(2)
val xmlString = pp.format(xml)
val fos = new java.io.FileOutputStream(tmpSchemaFile)
- val fw = new java.io.OutputStreamWriter(fos, "utf-8")
+ val fw = new java.io.OutputStreamWriter(fos, StandardCharsets.UTF_8)
fw.write(xmlString)
fw.close()
tmpSchemaFile
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JsonInfosetOutputter.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JsonInfosetOutputter.scala
index c521e37..306e54f 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JsonInfosetOutputter.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JsonInfosetOutputter.scala
@@ -17,7 +17,7 @@
package org.apache.daffodil.infoset
-import java.nio.charset.Charset
+import java.nio.charset.StandardCharsets
import com.fasterxml.jackson.core.io.JsonStringEncoder
@@ -34,7 +34,7 @@ class JsonInfosetOutputter private (writer: java.io.Writer,
pretty: Boolean, dum
}
def this(os: java.io.OutputStream, pretty: Boolean) = {
- this(new java.io.OutputStreamWriter(os, Charset.forName("UTF-8")), pretty,
0)
+ this(new java.io.OutputStreamWriter(os, StandardCharsets.UTF_8), pretty, 0)
}
// Keeps track of if the next element we see is the first child or not of a
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala
index 0369424..b88e9a3 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala
@@ -17,7 +17,7 @@
package org.apache.daffodil.infoset
-import java.nio.charset.Charset
+import java.nio.charset.StandardCharsets
import org.apache.daffodil.util.Indentable
import org.apache.daffodil.dpath.NodeInfo
@@ -38,7 +38,7 @@ class XMLTextInfosetOutputter private (writer:
java.io.Writer, pretty: Boolean,
}
def this(os: java.io.OutputStream, pretty: Boolean) = {
- this(new java.io.OutputStreamWriter(os, Charset.forName("UTF-8")), pretty,
0)
+ this(new java.io.OutputStreamWriter(os, StandardCharsets.UTF_8), pretty, 0)
}
private val sb = new StringBuilder()
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
index fc639c1..5121483 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
@@ -19,6 +19,7 @@ package org.apache.daffodil.processors
import java.io.OutputStream
import java.io.OutputStreamWriter
+import java.nio.charset.StandardCharsets
import scala.xml.NamespaceBinding
@@ -42,7 +43,7 @@ import org.xml.sax.Locator
*/
class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty:
Boolean = false)
extends ContentHandler with Indentable {
- private val writer = new OutputStreamWriter(out)
+ private val writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)
/**
* represents the currently active prefix mappings (i.e all mappings include
from parent element),
* which is usefully for doing lookups
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala
index 98d4a3f..52292fd 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala
@@ -23,6 +23,7 @@ import java.io.ObjectOutputStream
import java.nio.CharBuffer
import java.nio.LongBuffer
import java.nio.channels.Channels
+import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.util.zip.GZIPOutputStream
import scala.collection.immutable.Queue
@@ -361,7 +362,7 @@ class DataProcessor private (
// write a null-terminated UTF-8 string as a simple version identifier
val headerString = "DAFFODIL " + Misc.getDaffodilVersion + "\u0000"
- os.write(headerString.getBytes("utf-8"))
+ os.write(headerString.getBytes(StandardCharsets.UTF_8))
// serialize and compress the data processor to the outputstream
val oos = new ObjectOutputStream(new GZIPOutputStream(os))
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
index b39ad4e..a398dc0 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
@@ -18,6 +18,7 @@
package org.apache.daffodil.processors.parsers
import java.nio.channels.Channels
+import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Path
import scala.collection.mutable
@@ -719,7 +720,7 @@ object PState {
output: InfosetOutputter,
dataProc: DFDL.DataProcessor,
areDebugging: Boolean): PState = {
- val in = InputSourceDataInputStream(data.getBytes("utf-8"))
+ val in = InputSourceDataInputStream(data.getBytes(StandardCharsets.UTF_8))
createInitialPState(root, in, output, dataProc, areDebugging)
}
diff --git
a/daffodil-runtime1/src/test/scala/org/apache/daffodil/parser/TestCharsetBehavior.scala
b/daffodil-runtime1/src/test/scala/org/apache/daffodil/parser/TestCharsetBehavior.scala
index e371412..6d64182 100644
---
a/daffodil-runtime1/src/test/scala/org/apache/daffodil/parser/TestCharsetBehavior.scala
+++
b/daffodil-runtime1/src/test/scala/org/apache/daffodil/parser/TestCharsetBehavior.scala
@@ -114,7 +114,7 @@ class TestUnicodeErrorTolerance {
*/
@Test def testUTF8Decode3ByteSurrogateIsMalformed(): Unit = {
// val exp = "\udcd0" // that's the trailing surrogate in the surrogate
pair for U+1d4d0
- val cs = Charset.forName("utf-8")
+ val cs = StandardCharsets.UTF_8
val dn = cs.displayName()
assertEquals("UTF-8", dn)
val decoder = cs.newDecoder()
@@ -165,7 +165,7 @@ class TestUnicodeErrorTolerance {
*/
@Test def testUTF8Encode3ByteSurrogateIsMalformed(): Unit = {
val s = "\udcd0\udcd0\udcd0\udcd0" // that's the 2nd half of a surrogate
pair for U+1d4d0
- val cs = Charset.forName("utf-8")
+ val cs = StandardCharsets.UTF_8
val dn = cs.displayName()
assertEquals("UTF-8", dn)
val encoder = cs.newEncoder()
@@ -184,7 +184,7 @@ class TestUnicodeErrorTolerance {
*/
@Test def testUTF8ToSurrogatePair(): Unit = {
val exp = "\ud800\udc00" // surrogate pair for U+010000
- val cs = Charset.forName("utf-8")
+ val cs = StandardCharsets.UTF_8
val dn = cs.displayName()
assertEquals("UTF-8", dn)
val decoder = cs.newDecoder()
@@ -204,7 +204,7 @@ class TestUnicodeErrorTolerance {
* of this code point possible.
*/
@Test def testUTF8Extreme6ByteToSurrogatePair(): Unit = {
- val cs = Charset.forName("utf-8")
+ val cs = StandardCharsets.UTF_8
val dn = cs.displayName()
assertEquals("UTF-8", dn)
val decoder = cs.newDecoder()
@@ -219,7 +219,7 @@ class TestUnicodeErrorTolerance {
}
@Test def testUTF8Extreme4ByteToSurrogatePair(): Unit = {
- val cs = Charset.forName("utf-8")
+ val cs = StandardCharsets.UTF_8
val dn = cs.displayName()
assertEquals("UTF-8", dn)
val decoder = cs.newDecoder()
@@ -239,7 +239,7 @@ class TestUnicodeErrorTolerance {
*/
@Test def testUTF8Decode6ByteSurrogatePairIsMalformed(): Unit = {
// val exp = "\ud4d0" // that's the 2nd half of a surrogate pair for
U+1d4d0
- val cs = Charset.forName("utf-8")
+ val cs = StandardCharsets.UTF_8
val dn = cs.displayName()
assertEquals("UTF-8", dn)
val decoder = cs.newDecoder()
@@ -271,7 +271,7 @@ class TestUnicodeErrorTolerance {
*/
@Test def testUTF16DecodeBadSurrogate(): Unit = {
val exp = "\ud4d0" // that's the 2nd half of a surrogate pair for U+1d4d0
- val cs = Charset.forName("utf-16BE")
+ val cs = StandardCharsets.UTF_16BE
val dn = cs.displayName()
assertEquals("UTF-16BE", dn)
val decoder = cs.newDecoder()
@@ -286,7 +286,7 @@ class TestUnicodeErrorTolerance {
*/
@Test def testUTF16DecodeBOMsInMidString(): Unit = {
val exp = "\uFEFF@\uFEFF@" // BOM, then @ then ZWNBS (aka BOM), then @
- val cs = Charset.forName("utf-16BE")
+ val cs = StandardCharsets.UTF_16BE
val dn = cs.displayName()
assertEquals("UTF-16BE", dn)
val decoder = cs.newDecoder()
@@ -298,7 +298,7 @@ class TestUnicodeErrorTolerance {
def howManyBadBytes(inBuf: Array[Byte]): Int = {
- val cs = Charset.forName("utf-8")
+ val cs = StandardCharsets.UTF_8
val dn = cs.displayName()
var counter: Int = 0
@@ -356,7 +356,7 @@ class TestUnicodeErrorTolerance {
}
def replaceBadCharacters(inBuf: Array[Byte]): String = {
- val cs = Charset.forName("utf-8")
+ val cs = StandardCharsets.UTF_8
val dn = cs.displayName()
assertEquals("UTF-8", dn)
val decoder = cs.newDecoder()
@@ -367,7 +367,7 @@ class TestUnicodeErrorTolerance {
}
def replaceBadCharactersEncoding(s: String): Array[Byte] = {
- val cs = Charset.forName("utf-8")
+ val cs = StandardCharsets.UTF_8
val dn = cs.displayName()
assertEquals("UTF-8", dn)
val encoder = cs.newEncoder()
@@ -424,7 +424,7 @@ class TestUnicodeErrorTolerance {
* This test shows that Java ISO-8859-1 can decode any byte at all.
*/
@Test def testISO8859HandlesAllBytes(): Unit = {
- val cs = Charset.forName("iso-8859-1")
+ val cs = StandardCharsets.ISO_8859_1
val decoder = cs.newDecoder()
val inBuf = Array[Int](
diff --git
a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index 8a1afb8..d26fbf5 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -27,7 +27,7 @@ import java.nio.ByteBuffer
import java.nio.CharBuffer
import java.nio.LongBuffer
import java.nio.charset.CoderResult
-import java.nio.charset.{Charset => JavaCharset}
+import java.nio.charset.StandardCharsets
import scala.collection.mutable
import scala.language.postfixOps
@@ -1781,16 +1781,14 @@ object VerifyTestCase {
}
}
- private val cs8859 = JavaCharset.forName("iso-8859-1")
-
def verifyBinaryOrMixedData(expectedData: InputStream, actualOutStream:
java.io.ByteArrayOutputStream,
implString: Option[String]): Unit = {
val actualBytes = actualOutStream.toByteArray
- lazy val actual8859String =
cs8859.newDecoder().decode(ByteBuffer.wrap(actualBytes)).toString()
+ lazy val actual8859String =
StandardCharsets.ISO_8859_1.newDecoder().decode(ByteBuffer.wrap(actualBytes)).toString()
lazy val displayableActual =
Misc.remapControlsAndLineEndingsToVisibleGlyphs(actual8859String)
val expectedBytes = IOUtils.toByteArray(expectedData)
- lazy val expected8859String =
cs8859.newDecoder().decode(ByteBuffer.wrap(expectedBytes)).toString()
+ lazy val expected8859String =
StandardCharsets.ISO_8859_1.newDecoder().decode(ByteBuffer.wrap(expectedBytes)).toString()
lazy val displayableExpected =
Misc.remapControlsAndLineEndingsToVisibleGlyphs(expected8859String)
lazy val expectedAndActualDisplayStrings = "\n" +
diff --git
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetOutputter.scala
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetOutputter.scala
index 0259c79..fa8d1a9 100644
---
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetOutputter.scala
+++
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetOutputter.scala
@@ -40,7 +40,7 @@ class TDMLInfosetOutputter() extends InfosetOutputter {
private def implString: String = "daffodil"
private val jsonStream = new ByteArrayOutputStream()
- private val xmlStream = new ByteArrayOutputStream()
+ val xmlStream = new ByteArrayOutputStream()
private val scalaOut = new ScalaXMLInfosetOutputter()
private val jdomOut = new JDOMInfosetOutputter()
@@ -104,8 +104,6 @@ class TDMLInfosetOutputter() extends InfosetOutputter {
def getResult() = scalaOut.getResult
- def getXmlString() = xmlStream.toString
-
def toInfosetInputter() = {
val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult)
val jdomIn = new JDOMInfosetInputter(jdomOut.getResult)
diff --git
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala
index 54bcb35..fe4b625 100644
---
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala
+++
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala
@@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.OutputStreamWriter
import java.nio.channels.Channels
+import java.nio.charset.StandardCharsets
import java.nio.file.Files
import java.nio.file.Path
import java.nio.file.Paths
@@ -269,10 +270,9 @@ class DaffodilTDMLDFDLProcessor private (private var dp:
DataProcessor) extends
unparse(inputter, resNode, outStream)
}
- def unparse(inputter: TDMLInfosetInputter, infosetXML: scala.xml.Node,
outStream: java.io
- .OutputStream): TDMLUnparseResult = {
+ def unparse(inputter: TDMLInfosetInputter, infosetXML: scala.xml.Node,
outStream: java.io.OutputStream): TDMLUnparseResult = {
val bos = new ByteArrayOutputStream()
- val osw = new OutputStreamWriter(bos)
+ val osw = new OutputStreamWriter(bos, StandardCharsets.UTF_8)
scala.xml.XML.write(osw, infosetXML, "UTF-8", xmlDecl = true, null)
osw.flush()
osw.close()
@@ -312,7 +312,7 @@ class DaffodilTDMLDFDLProcessor private (private var dp:
DataProcessor) extends
xri.parse(sis)
if (!actual.isError && !errorHandler.isError) {
- verifySameParseOutput(outputter, saxOutputStream)
+ verifySameParseOutput(outputter.xmlStream, saxOutputStream)
}
val dpParseDiag = actual.getDiagnostics.map(_.getMessage())
val saxParseDiag = errorHandler.getDiagnostics.map(_.getMessage())
@@ -367,18 +367,16 @@ class DaffodilTDMLDFDLProcessor private (private var dp:
DataProcessor) extends
new DaffodilTDMLUnparseResult(actualDP, dpOutputStream)
}
- def verifySameParseOutput(dpOutputter: TDMLInfosetOutputter, outputStream:
ByteArrayOutputStream): Unit = {
- val dpParseOutputString = dpOutputter.getXmlString()
- val saxParseOutputString = outputStream.toString
+ def verifySameParseOutput(dpOutputStream: ByteArrayOutputStream,
saxOutputStream: ByteArrayOutputStream): Unit = {
+ val dpParseOutputString = dpOutputStream.toString("UTF-8")
+ val saxParseOutputString = saxOutputStream.toString("UTF-8")
+
+ val dpParseXMLNodeOutput = scala.xml.XML.loadString(dpParseOutputString)
val saxParseXMLNodeOutput = scala.xml.XML.loadString(saxParseOutputString)
- // scala.xml.XML.loadString reverses the order of the namespace mappings,
so we call it for the
- // dpParseXMLNodeOutput as well so the reversal is mirrored and we can do
a proper prefixes and namespaces
- // comparison. dpOutputter.getOutput returns it in the right order, which
is why we don't use it
- val dpParseXMLNodeOutputReloaded =
scala.xml.XML.loadString(dpParseOutputString)
try {
XMLUtils.compareAndReport(
- dpParseXMLNodeOutputReloaded,
+ dpParseXMLNodeOutput,
saxParseXMLNodeOutput,
checkNamespaces = true,
checkPrefixes = true)
@@ -391,8 +389,7 @@ class DaffodilTDMLDFDLProcessor private (private var dp:
DataProcessor) extends
}
}
- private def verifySameDiagnostics(seqDiagExpected: Seq[String],
seqDiagActual: Seq[String]): Unit
- = {
+ private def verifySameDiagnostics(seqDiagExpected: Seq[String],
seqDiagActual: Seq[String]): Unit = {
val expected = seqDiagExpected.sorted
val actual = seqDiagActual.sorted