This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new b4f9cec  Ensure we use UTF-8 when outputting and comparing SAX output
b4f9cec is described below

commit b4f9cec512783920a78cdfbfc4610e9e10a11e5f
Author: Steve Lawrence <[email protected]>
AuthorDate: Mon Dec 6 14:56:33 2021 -0500

    Ensure we use UTF-8 when outputting and comparing SAX output
    
    Whenever Daffodil outputs an infoset, it always does so with UTF-8
    encoding. The one exception to this when converting SAX evensts to in
    the CLI and TDML runner. This means that depending on a users system
    encoding, tests may act differently and lead to failure. This kind of
    system-dependent behavior is not desierable. So instead, thiss modifies
    SAX related output to use UTF-8 so that a users environment does not
    affect the results of tests or results of a parser.
    
    Also use StandardCharsets instead of a encoding name string where
    possible.
    
    DAFFODIL-2600
---
 .../src/main/scala/org/apache/daffodil/Main.scala  |  3 ++-
 .../daffodil/processors/charset/CharsetUtils.scala |  4 ++--
 .../scala/org/apache/daffodil/io/TestDecoder.scala | 12 +++++------
 .../org/apache/daffodil/io/TestISO8859_1.scala     |  4 ++--
 .../main/scala/org/apache/daffodil/util/Misc.scala |  3 ++-
 .../scala/org/apache/daffodil/xml/XMLUtils.scala   |  3 ++-
 .../daffodil/infoset/JsonInfosetOutputter.scala    |  4 ++--
 .../daffodil/infoset/XMLTextInfosetOutputter.scala |  4 ++--
 .../DaffodilParseOutputStreamContentHandler.scala  |  3 ++-
 .../apache/daffodil/processors/DataProcessor.scala |  3 ++-
 .../daffodil/processors/parsers/PState.scala       |  3 ++-
 .../daffodil/parser/TestCharsetBehavior.scala      | 24 ++++++++++-----------
 .../org/apache/daffodil/tdml/TDMLRunner.scala      |  8 +++----
 .../daffodil/tdml/TDMLInfosetOutputter.scala       |  4 +---
 .../tdml/processor/DaffodilTDMLDFDLProcessor.scala | 25 ++++++++++------------
 15 files changed, 53 insertions(+), 54 deletions(-)

diff --git a/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala 
b/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala
index be22d07..7f1b162 100644
--- a/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala
+++ b/daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala
@@ -26,6 +26,7 @@ import java.net.URI
 import java.nio.ByteBuffer
 import java.nio.channels.Channels
 import java.nio.file.Paths
+import java.nio.charset.StandardCharsets
 import java.util.Scanner
 import java.util.concurrent.Executors
 
@@ -944,7 +945,7 @@ object Main {
                 // InfosetOutputters must manually get the result and write it 
to the stream below
                 eitherOutputterOrHandler match {
                   case Left(sxml: ScalaXMLInfosetOutputter) => {
-                    val writer = new java.io.OutputStreamWriter(output, 
"UTF-8")
+                    val writer = new java.io.OutputStreamWriter(output, 
StandardCharsets.UTF_8)
                     scala.xml.XML.write(writer, sxml.getResult, "UTF-8", true, 
null)
                     writer.flush()
                   }
diff --git 
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
 
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
index b3ad930..a64fb15 100644
--- 
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
+++ 
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
@@ -19,9 +19,9 @@ package org.apache.daffodil.processors.charset
 
 import java.nio.ByteBuffer
 import java.nio.CharBuffer
-import java.nio.charset.{ Charset => JavaCharset }
 import java.nio.charset.CoderResult
 import java.nio.charset.CodingErrorAction
+import java.nio.charset.StandardCharsets
 import org.apache.daffodil.exceptions.Assert
 import org.apache.daffodil.io.LocalBufferMixin
 import org.apache.daffodil.util.MaybeInt
@@ -48,7 +48,7 @@ object CharsetUtils {
    * Java 7 at some point in the future.
    */
   lazy val hasJava7DecoderBug = {
-    val decoder = JavaCharset.forName("utf-8").newDecoder()
+    val decoder = StandardCharsets.UTF_8.newDecoder()
     decoder.onMalformedInput(CodingErrorAction.REPORT)
     decoder.onUnmappableCharacter(CodingErrorAction.REPORT)
     val bb = ByteBuffer.allocate(6)
diff --git 
a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDecoder.scala 
b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDecoder.scala
index 8c7e673..c036ccc 100644
--- a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDecoder.scala
+++ b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDecoder.scala
@@ -20,7 +20,7 @@
 //
 package org.apache.daffodil.io
 //
-import java.nio.charset.{ Charset => JavaCharset }
+import java.nio.charset.StandardCharsets
 import java.nio.charset.CodingErrorAction
 import java.nio.ByteBuffer
 import java.nio.CharBuffer
@@ -66,7 +66,7 @@ class TestDecoder {
    * checked for enough room for a surrogate pair.
    */
   @Test def testDecoder1: Unit = {
-    val originalDecoder = JavaCharset.forName("utf-8").newDecoder()
+    val originalDecoder = StandardCharsets.UTF_8.newDecoder()
     originalDecoder.onMalformedInput(CodingErrorAction.REPORT)
     originalDecoder.onUnmappableCharacter(CodingErrorAction.REPORT)
     val decoder = originalDecoder
@@ -133,7 +133,7 @@ class TestDecoder {
   }
 
   @Test def testDecoderWorkaround1: Unit = {
-    val originalDecoder = JavaCharset.forName("utf-8").newDecoder()
+    val originalDecoder = StandardCharsets.UTF_8.newDecoder()
     originalDecoder.onMalformedInput(CodingErrorAction.REPORT)
     originalDecoder.onUnmappableCharacter(CodingErrorAction.REPORT)
     val decoder = originalDecoder
@@ -183,7 +183,7 @@ class TestDecoder {
   // Delete once it's clear we're never going to have to do that.
   //
   //  @Test def testDecoder2 {
-  //    val originalDecoder = Charset.forName("utf-8").newDecoder()
+  //    val originalDecoder = StandardCharsets.UTF_8.newDecoder()
   //    originalDecoder.onMalformedInput(CodingErrorAction.REPORT)
   //    originalDecoder.onUnmappableCharacter(CodingErrorAction.REPORT)
   //    val decoder = DecoderWrapper(originalDecoder)
@@ -205,7 +205,7 @@ class TestDecoder {
   //  }
   //
   //  @Test def testDecoder3 {
-  //    val originalDecoder = Charset.forName("utf-8").newDecoder()
+  //    val originalDecoder = StandardCharsets.UTF_8.newDecoder()
   //    originalDecoder.onMalformedInput(CodingErrorAction.REPLACE)
   //    originalDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE)
   //    val decoder = DecoderWrapper(originalDecoder)
@@ -229,7 +229,7 @@ class TestDecoder {
   //  }
   //
   //  @Test def testDecoder4 {
-  //    val originalDecoder = Charset.forName("utf-8").newDecoder()
+  //    val originalDecoder = StandardCharsets.UTF_8.newDecoder()
   //    originalDecoder.onMalformedInput(CodingErrorAction.REPLACE)
   //    originalDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE)
   //    val decoder = DecoderWrapper(originalDecoder)
diff --git 
a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestISO8859_1.scala 
b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestISO8859_1.scala
index ebcde04..0ed3b51 100644
--- a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestISO8859_1.scala
+++ b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestISO8859_1.scala
@@ -49,7 +49,7 @@ class TestISO8859_1 {
   @Test def 
test_ISO_8859_1_has256CodepointsIsomorphicToUnicodeCodepointsU0000toU00FF(): 
Unit = {
     val byteArray = (0 to 255).map { _.toByte }.toArray
     val bb = ByteBuffer.wrap(byteArray)
-    val cs = Charset.forName("iso-8859-1")
+    val cs = StandardCharsets.ISO_8859_1
     val decoder = cs.newDecoder()
     decoder.onMalformedInput(CodingErrorAction.REPORT)
     decoder.onUnmappableCharacter(CodingErrorAction.REPORT)
@@ -76,4 +76,4 @@ class TestISO8859_1 {
     assertArrayEquals(byteArray, actualByteArray)
   }
 
-}
\ No newline at end of file
+}
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Misc.scala 
b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Misc.scala
index 7fe35c4..4383165 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Misc.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Misc.scala
@@ -30,6 +30,7 @@ import java.nio.channels.ReadableByteChannel
 import java.nio.channels.WritableByteChannel
 import java.nio.charset.CodingErrorAction
 import java.nio.charset.{ Charset => JavaCharset }
+import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.nio.file.Paths
 
@@ -340,7 +341,7 @@ object Misc {
   // Moved here from Compiler object.
 
   def stringToReadableByteChannel(s: String): ReadableByteChannel = {
-    val bytes = s.getBytes("utf-8") // never use default charset. NEVER.
+    val bytes = s.getBytes(StandardCharsets.UTF_8) // never use default 
charset. NEVER.
     byteArrayToReadableByteChannel(bytes)
   }
 
diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala 
b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
index b183599..ae8744c 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
@@ -18,6 +18,7 @@
 package org.apache.daffodil.xml
 
 import java.io.File
+import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.nio.file.Paths
 import java.nio.file.StandardOpenOption
@@ -1214,7 +1215,7 @@ Differences were (path, expected, actual):
     val pp = new org.apache.daffodil.xml.PrettyPrinter(2)
     val xmlString = pp.format(xml)
     val fos = new java.io.FileOutputStream(tmpSchemaFile)
-    val fw = new java.io.OutputStreamWriter(fos, "utf-8")
+    val fw = new java.io.OutputStreamWriter(fos, StandardCharsets.UTF_8)
     fw.write(xmlString)
     fw.close()
     tmpSchemaFile
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JsonInfosetOutputter.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JsonInfosetOutputter.scala
index c521e37..306e54f 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JsonInfosetOutputter.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/JsonInfosetOutputter.scala
@@ -17,7 +17,7 @@
 
 package org.apache.daffodil.infoset
 
-import java.nio.charset.Charset
+import java.nio.charset.StandardCharsets
 
 import com.fasterxml.jackson.core.io.JsonStringEncoder
 
@@ -34,7 +34,7 @@ class JsonInfosetOutputter private (writer: java.io.Writer, 
pretty: Boolean, dum
   }
 
   def this(os: java.io.OutputStream, pretty: Boolean) = {
-    this(new java.io.OutputStreamWriter(os, Charset.forName("UTF-8")), pretty, 
0)
+    this(new java.io.OutputStreamWriter(os, StandardCharsets.UTF_8), pretty, 0)
   }
 
   // Keeps track of if the next element we see is the first child or not of a
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala
index 0369424..b88e9a3 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala
@@ -17,7 +17,7 @@
 
 package org.apache.daffodil.infoset
 
-import java.nio.charset.Charset
+import java.nio.charset.StandardCharsets
 
 import org.apache.daffodil.util.Indentable
 import org.apache.daffodil.dpath.NodeInfo
@@ -38,7 +38,7 @@ class XMLTextInfosetOutputter private (writer: 
java.io.Writer, pretty: Boolean,
   }
 
   def this(os: java.io.OutputStream, pretty: Boolean) = {
-    this(new java.io.OutputStreamWriter(os, Charset.forName("UTF-8")), pretty, 
0)
+    this(new java.io.OutputStreamWriter(os, StandardCharsets.UTF_8), pretty, 0)
   }
 
   private val sb = new StringBuilder()
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
index fc639c1..5121483 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
@@ -19,6 +19,7 @@ package org.apache.daffodil.processors
 
 import java.io.OutputStream
 import java.io.OutputStreamWriter
+import java.nio.charset.StandardCharsets
 
 import scala.xml.NamespaceBinding
 
@@ -42,7 +43,7 @@ import org.xml.sax.Locator
  */
 class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty: 
Boolean = false)
   extends ContentHandler with Indentable {
-  private val writer = new OutputStreamWriter(out)
+  private val writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)
   /**
    * represents the currently active prefix mappings (i.e all mappings include 
from parent element),
    * which is usefully for doing lookups
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala
index 98d4a3f..52292fd 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DataProcessor.scala
@@ -23,6 +23,7 @@ import java.io.ObjectOutputStream
 import java.nio.CharBuffer
 import java.nio.LongBuffer
 import java.nio.channels.Channels
+import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.util.zip.GZIPOutputStream
 import scala.collection.immutable.Queue
@@ -361,7 +362,7 @@ class DataProcessor private (
 
     // write a null-terminated UTF-8 string as a simple version identifier
     val headerString = "DAFFODIL " + Misc.getDaffodilVersion + "\u0000"
-    os.write(headerString.getBytes("utf-8"))
+    os.write(headerString.getBytes(StandardCharsets.UTF_8))
 
     // serialize and compress the data processor to the outputstream
     val oos = new ObjectOutputStream(new GZIPOutputStream(os))
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
index b39ad4e..a398dc0 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/PState.scala
@@ -18,6 +18,7 @@
 package org.apache.daffodil.processors.parsers
 
 import java.nio.channels.Channels
+import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.nio.file.Path
 import scala.collection.mutable
@@ -719,7 +720,7 @@ object PState {
     output: InfosetOutputter,
     dataProc: DFDL.DataProcessor,
     areDebugging: Boolean): PState = {
-    val in = InputSourceDataInputStream(data.getBytes("utf-8"))
+    val in = InputSourceDataInputStream(data.getBytes(StandardCharsets.UTF_8))
     createInitialPState(root, in, output, dataProc, areDebugging)
   }
 
diff --git 
a/daffodil-runtime1/src/test/scala/org/apache/daffodil/parser/TestCharsetBehavior.scala
 
b/daffodil-runtime1/src/test/scala/org/apache/daffodil/parser/TestCharsetBehavior.scala
index e371412..6d64182 100644
--- 
a/daffodil-runtime1/src/test/scala/org/apache/daffodil/parser/TestCharsetBehavior.scala
+++ 
b/daffodil-runtime1/src/test/scala/org/apache/daffodil/parser/TestCharsetBehavior.scala
@@ -114,7 +114,7 @@ class TestUnicodeErrorTolerance {
    */
   @Test def testUTF8Decode3ByteSurrogateIsMalformed(): Unit = {
     //    val exp = "\udcd0" // that's the trailing surrogate in the surrogate 
pair for U+1d4d0
-    val cs = Charset.forName("utf-8")
+    val cs = StandardCharsets.UTF_8
     val dn = cs.displayName()
     assertEquals("UTF-8", dn)
     val decoder = cs.newDecoder()
@@ -165,7 +165,7 @@ class TestUnicodeErrorTolerance {
    */
   @Test def testUTF8Encode3ByteSurrogateIsMalformed(): Unit = {
     val s = "\udcd0\udcd0\udcd0\udcd0" // that's the 2nd half of a surrogate 
pair for U+1d4d0
-    val cs = Charset.forName("utf-8")
+    val cs = StandardCharsets.UTF_8
     val dn = cs.displayName()
     assertEquals("UTF-8", dn)
     val encoder = cs.newEncoder()
@@ -184,7 +184,7 @@ class TestUnicodeErrorTolerance {
    */
   @Test def testUTF8ToSurrogatePair(): Unit = {
     val exp = "\ud800\udc00" // surrogate pair for U+010000
-    val cs = Charset.forName("utf-8")
+    val cs = StandardCharsets.UTF_8
     val dn = cs.displayName()
     assertEquals("UTF-8", dn)
     val decoder = cs.newDecoder()
@@ -204,7 +204,7 @@ class TestUnicodeErrorTolerance {
    * of this code point possible.
    */
   @Test def testUTF8Extreme6ByteToSurrogatePair(): Unit = {
-    val cs = Charset.forName("utf-8")
+    val cs = StandardCharsets.UTF_8
     val dn = cs.displayName()
     assertEquals("UTF-8", dn)
     val decoder = cs.newDecoder()
@@ -219,7 +219,7 @@ class TestUnicodeErrorTolerance {
   }
 
   @Test def testUTF8Extreme4ByteToSurrogatePair(): Unit = {
-    val cs = Charset.forName("utf-8")
+    val cs = StandardCharsets.UTF_8
     val dn = cs.displayName()
     assertEquals("UTF-8", dn)
     val decoder = cs.newDecoder()
@@ -239,7 +239,7 @@ class TestUnicodeErrorTolerance {
    */
   @Test def testUTF8Decode6ByteSurrogatePairIsMalformed(): Unit = {
     // val exp = "\ud4d0" // that's the 2nd half of a surrogate pair for 
U+1d4d0
-    val cs = Charset.forName("utf-8")
+    val cs = StandardCharsets.UTF_8
     val dn = cs.displayName()
     assertEquals("UTF-8", dn)
     val decoder = cs.newDecoder()
@@ -271,7 +271,7 @@ class TestUnicodeErrorTolerance {
    */
   @Test def testUTF16DecodeBadSurrogate(): Unit = {
     val exp = "\ud4d0" // that's the 2nd half of a surrogate pair for U+1d4d0
-    val cs = Charset.forName("utf-16BE")
+    val cs = StandardCharsets.UTF_16BE
     val dn = cs.displayName()
     assertEquals("UTF-16BE", dn)
     val decoder = cs.newDecoder()
@@ -286,7 +286,7 @@ class TestUnicodeErrorTolerance {
    */
   @Test def testUTF16DecodeBOMsInMidString(): Unit = {
     val exp = "\uFEFF@\uFEFF@" // BOM, then @ then ZWNBS (aka BOM), then @
-    val cs = Charset.forName("utf-16BE")
+    val cs = StandardCharsets.UTF_16BE
     val dn = cs.displayName()
     assertEquals("UTF-16BE", dn)
     val decoder = cs.newDecoder()
@@ -298,7 +298,7 @@ class TestUnicodeErrorTolerance {
 
   def howManyBadBytes(inBuf: Array[Byte]): Int = {
 
-    val cs = Charset.forName("utf-8")
+    val cs = StandardCharsets.UTF_8
     val dn = cs.displayName()
     var counter: Int = 0
 
@@ -356,7 +356,7 @@ class TestUnicodeErrorTolerance {
   }
 
   def replaceBadCharacters(inBuf: Array[Byte]): String = {
-    val cs = Charset.forName("utf-8")
+    val cs = StandardCharsets.UTF_8
     val dn = cs.displayName()
     assertEquals("UTF-8", dn)
     val decoder = cs.newDecoder()
@@ -367,7 +367,7 @@ class TestUnicodeErrorTolerance {
   }
 
   def replaceBadCharactersEncoding(s: String): Array[Byte] = {
-    val cs = Charset.forName("utf-8")
+    val cs = StandardCharsets.UTF_8
     val dn = cs.displayName()
     assertEquals("UTF-8", dn)
     val encoder = cs.newEncoder()
@@ -424,7 +424,7 @@ class TestUnicodeErrorTolerance {
    * This test shows that Java ISO-8859-1 can decode any byte at all.
    */
   @Test def testISO8859HandlesAllBytes(): Unit = {
-    val cs = Charset.forName("iso-8859-1")
+    val cs = StandardCharsets.ISO_8859_1
     val decoder = cs.newDecoder()
 
     val inBuf = Array[Int](
diff --git 
a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala 
b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index 8a1afb8..d26fbf5 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -27,7 +27,7 @@ import java.nio.ByteBuffer
 import java.nio.CharBuffer
 import java.nio.LongBuffer
 import java.nio.charset.CoderResult
-import java.nio.charset.{Charset => JavaCharset}
+import java.nio.charset.StandardCharsets
 
 import scala.collection.mutable
 import scala.language.postfixOps
@@ -1781,16 +1781,14 @@ object VerifyTestCase {
     }
   }
 
-  private val cs8859 = JavaCharset.forName("iso-8859-1")
-
   def verifyBinaryOrMixedData(expectedData: InputStream, actualOutStream: 
java.io.ByteArrayOutputStream,
     implString: Option[String]): Unit = {
     val actualBytes = actualOutStream.toByteArray
-    lazy val actual8859String = 
cs8859.newDecoder().decode(ByteBuffer.wrap(actualBytes)).toString()
+    lazy val actual8859String = 
StandardCharsets.ISO_8859_1.newDecoder().decode(ByteBuffer.wrap(actualBytes)).toString()
     lazy val displayableActual = 
Misc.remapControlsAndLineEndingsToVisibleGlyphs(actual8859String)
 
     val expectedBytes = IOUtils.toByteArray(expectedData)
-    lazy val expected8859String = 
cs8859.newDecoder().decode(ByteBuffer.wrap(expectedBytes)).toString()
+    lazy val expected8859String = 
StandardCharsets.ISO_8859_1.newDecoder().decode(ByteBuffer.wrap(expectedBytes)).toString()
     lazy val displayableExpected = 
Misc.remapControlsAndLineEndingsToVisibleGlyphs(expected8859String)
 
     lazy val expectedAndActualDisplayStrings = "\n" +
diff --git 
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetOutputter.scala
 
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetOutputter.scala
index 0259c79..fa8d1a9 100644
--- 
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetOutputter.scala
+++ 
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/TDMLInfosetOutputter.scala
@@ -40,7 +40,7 @@ class TDMLInfosetOutputter() extends InfosetOutputter {
   private def implString: String = "daffodil"
 
   private val jsonStream = new ByteArrayOutputStream()
-  private val xmlStream = new ByteArrayOutputStream()
+  val xmlStream = new ByteArrayOutputStream()
 
   private val scalaOut = new ScalaXMLInfosetOutputter()
   private val jdomOut = new JDOMInfosetOutputter()
@@ -104,8 +104,6 @@ class TDMLInfosetOutputter() extends InfosetOutputter {
 
   def getResult() = scalaOut.getResult
 
-  def getXmlString() = xmlStream.toString
-
   def toInfosetInputter() = {
     val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult)
     val jdomIn = new JDOMInfosetInputter(jdomOut.getResult)
diff --git 
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala
 
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala
index 54bcb35..fe4b625 100644
--- 
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala
+++ 
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala
@@ -21,6 +21,7 @@ import java.io.ByteArrayInputStream
 import java.io.ByteArrayOutputStream
 import java.io.OutputStreamWriter
 import java.nio.channels.Channels
+import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.nio.file.Path
 import java.nio.file.Paths
@@ -269,10 +270,9 @@ class DaffodilTDMLDFDLProcessor private (private var dp: 
DataProcessor) extends
     unparse(inputter, resNode, outStream)
   }
 
-  def unparse(inputter: TDMLInfosetInputter, infosetXML: scala.xml.Node, 
outStream: java.io
-  .OutputStream): TDMLUnparseResult = {
+  def unparse(inputter: TDMLInfosetInputter, infosetXML: scala.xml.Node, 
outStream: java.io.OutputStream): TDMLUnparseResult = {
     val bos = new ByteArrayOutputStream()
-    val osw = new OutputStreamWriter(bos)
+    val osw = new OutputStreamWriter(bos, StandardCharsets.UTF_8)
     scala.xml.XML.write(osw, infosetXML, "UTF-8", xmlDecl = true, null)
     osw.flush()
     osw.close()
@@ -312,7 +312,7 @@ class DaffodilTDMLDFDLProcessor private (private var dp: 
DataProcessor) extends
     xri.parse(sis)
 
     if (!actual.isError && !errorHandler.isError) {
-      verifySameParseOutput(outputter, saxOutputStream)
+      verifySameParseOutput(outputter.xmlStream, saxOutputStream)
     }
     val dpParseDiag = actual.getDiagnostics.map(_.getMessage())
     val saxParseDiag = errorHandler.getDiagnostics.map(_.getMessage())
@@ -367,18 +367,16 @@ class DaffodilTDMLDFDLProcessor private (private var dp: 
DataProcessor) extends
     new DaffodilTDMLUnparseResult(actualDP, dpOutputStream)
   }
 
-  def verifySameParseOutput(dpOutputter: TDMLInfosetOutputter, outputStream: 
ByteArrayOutputStream): Unit = {
-    val dpParseOutputString = dpOutputter.getXmlString()
-    val saxParseOutputString = outputStream.toString
+  def verifySameParseOutput(dpOutputStream: ByteArrayOutputStream, 
saxOutputStream: ByteArrayOutputStream): Unit = {
+    val dpParseOutputString = dpOutputStream.toString("UTF-8")
+    val saxParseOutputString = saxOutputStream.toString("UTF-8")
+
+    val dpParseXMLNodeOutput = scala.xml.XML.loadString(dpParseOutputString)
     val saxParseXMLNodeOutput = scala.xml.XML.loadString(saxParseOutputString)
-    // scala.xml.XML.loadString reverses the order of the namespace mappings, 
so we call it for the
-    // dpParseXMLNodeOutput as well so the reversal is mirrored and we can do 
a proper prefixes and namespaces
-    // comparison. dpOutputter.getOutput returns it in the right order, which 
is why we don't use it
-    val dpParseXMLNodeOutputReloaded = 
scala.xml.XML.loadString(dpParseOutputString)
 
     try {
       XMLUtils.compareAndReport(
-        dpParseXMLNodeOutputReloaded,
+        dpParseXMLNodeOutput,
         saxParseXMLNodeOutput,
         checkNamespaces = true,
         checkPrefixes = true)
@@ -391,8 +389,7 @@ class DaffodilTDMLDFDLProcessor private (private var dp: 
DataProcessor) extends
     }
   }
 
-  private def verifySameDiagnostics(seqDiagExpected: Seq[String], 
seqDiagActual: Seq[String]): Unit
-  = {
+  private def verifySameDiagnostics(seqDiagExpected: Seq[String], 
seqDiagActual: Seq[String]): Unit = {
     val expected = seqDiagExpected.sorted
     val actual = seqDiagActual.sorted
 

Reply via email to