This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new 27fc0c4a6 Ensure UTF-8 is used during CLI tests
27fc0c4a6 is described below

commit 27fc0c4a64911f41a46bfee8f96e3546298f5cc4
Author: Steve Lawrence <[email protected]>
AuthorDate: Mon Sep 11 14:30:53 2023 -0400

    Ensure UTF-8 is used during CLI tests
    
    Daffodil should ignore the Java file.encoding property, only using the
    encoding specified in a DFDL schema. This goes for all of our tests as
    well. The one place where this isn't the case is with CLI tests, which
    can lead to some tests failing or hanging if run on a system with a
    different encoding.
    
    To fix this, this modifies the CLI test infrastructure to run CLI tests
    with a UTF-8 encoding, regardless of the file.encoding property provided
    to Java, since parts of the CLI, specifically the debugger, use special
    UTF-8 characters and have tests that expect them.
    
    This also modifies the Windows/Java 8 GitHub action to set the Java
    file.encoding property to US-ASCII. This ensures that if we ever do add
    file.encoding specific behavior to Daffodil then hopefully a test will
    fail. Note that newer versions of SBT do not get the file.encoding
    property from LANG so this also modifies the GitHub actions so that we
    can specify the encoding and LANG separately.
    
    This also found tests for stringAsXml that relied on file.encoding
    instead of specifying an encoding. Fortunately, this was just broken
    tests and not a broken stringAsXml implementation.
    
    DAFFODIL-2612
---
 .github/workflows/main.yml                         | 37 ++++++++++++++--------
 .../org/apache/daffodil/cli/cliTest/Util.scala     |  7 ++--
 .../apache/daffodil/infoset/TestStringAsXml.scala  |  5 +--
 3 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 1bf267e1f..838198db2 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -41,15 +41,15 @@ jobs:
         java_distribution: [ temurin ]
         java_version: [ 8, 11, 17 ]
         scala_version: [ 2.12.18 ]
-        os: [ ubuntu-22.04, windows-2022 ]
-        include:
+        os: [ ubuntu-22.04, windows-2022, macos-12 ]
+        exclude:
+          # only run macos on java 17
           - os: macos-12
-            shell: bash
-            java_distribution: temurin
-            java_version: 17
-            scala_version: 2.12.18
-            env_cc: cc
-            env_ar: ar
+            java_version: 8
+          - os: macos-12
+            java_version: 11
+        include:
+          # configure shell/cc/ar for all OSes
           - os: ubuntu-22.04
             shell: bash
             env_cc: clang
@@ -58,14 +58,23 @@ jobs:
             shell: msys2 {0}
             env_cc: clang
             env_ar: llvm-ar
-          # configure different languages for some jobs, defaulting to 
en_US.UTF-8
-          - env_lang: en_US.UTF-8
+          - os: macos-12
+            shell: bash
+            env_cc: cc
+            env_ar: ar
+          # configure different languages/encodings for some jobs, defaulting 
to en_US/UTF-8
+          - lang: en_US
+          - encoding: UTF-8
           - os: ubuntu-22.04
             java_version: 8
-            env_lang: de_DE.UTF-8
+            lang: de_DE
           - os: ubuntu-22.04
             java_version: 11
-            env_lang: ja_JP.UTF-8
+            lang: ja_JP
+          - os: windows-2022
+            java_version: 8
+            encoding: US-ASCII
+
 
     runs-on: ${{ matrix.os }}
     defaults:
@@ -74,8 +83,8 @@ jobs:
     env:
       AR: ${{ matrix.env_ar }}
       CC: ${{ matrix.env_cc }}
-      LANG: ${{ matrix.env_lang }}
-      SBT: sbt -J-Xms1024m -J-Xmx5120m -J-XX:ReservedCodeCacheSize=512m 
-J-XX:MaxMetaspaceSize=1024m ++${{ matrix.scala_version }} coverage
+      LANG: ${{ matrix.lang }}.${{ matrix.encoding }}
+      SBT: sbt -J-Xms1024m -J-Xmx5120m -J-XX:ReservedCodeCacheSize=512m 
-J-XX:MaxMetaspaceSize=1024m -J-Dfile.encoding=${{ matrix.encoding }} ++${{ 
matrix.scala_version }} coverage
       SONARSCAN: ${{
                      matrix.os == 'ubuntu-22.04' &&
                      matrix.java_version == '11' &&
diff --git 
a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/Util.scala 
b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/Util.scala
index 9eb857012..74ac79651 100644
--- a/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/Util.scala
+++ b/daffodil-cli/src/test/scala/org/apache/daffodil/cli/cliTest/Util.scala
@@ -25,6 +25,7 @@ import java.io.PipedOutputStream
 import java.io.PrintStream
 import java.lang.ProcessBuilder
 import java.math.BigInteger
+import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.nio.file.Path
 import java.nio.file.Paths
@@ -224,6 +225,7 @@ object Util {
     eb.withOutput(toIn)
     eb.withInputs(fromOut, fromErr)
     eb.withInputFilters(replaceInString("\r\n", "\n"))
+    eb.withCharset(StandardCharsets.UTF_8)
     // Disable timeouts on expect calls. We do this because often times the 
CLI can
     // take a while to start up (e.g. spawning a thread, schema compilation) 
which
     // can lead to timeouts and false negatives. Instead, we will spawn the 
expect
@@ -367,8 +369,9 @@ object Util {
     var exitCode: ExitCode.Value = _
 
     override def run(): Unit = {
-      val psOut = new PrintStream(out)
-      val psErr = new PrintStream(err)
+      // require UTF-8 output streams when testing the CLI
+      val psOut = new PrintStream(out, false, StandardCharsets.UTF_8.name)
+      val psErr = new PrintStream(err, false, StandardCharsets.UTF_8.name)
 
       try {
         // Run a thread-safe CLI instance that uses our custom streams that
diff --git 
a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXml.scala
 
b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXml.scala
index 719e23738..b57f4ab4b 100644
--- 
a/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXml.scala
+++ 
b/daffodil-test/src/test/scala/org/apache/daffodil/infoset/TestStringAsXml.scala
@@ -55,7 +55,7 @@ class TestStringAsXml {
     val parseOut = new XMLTextInfosetOutputter(parseBos, pretty = true)
     val parseRes = dp.parse(parseIn, parseOut)
     val parseDiags = parseRes.getDiagnostics.map(_.toString)
-    val parseStrOpt = if (parseRes.isProcessingError) None else 
Some(parseBos.toString)
+    val parseStrOpt = if (parseRes.isProcessingError) None else 
Some(parseBos.toString("UTF-8"))
     (parseDiags, parseStrOpt)
   }
 
@@ -65,7 +65,8 @@ class TestStringAsXml {
     val unparseOut = java.nio.channels.Channels.newChannel(unparseBos)
     val unparseRes = dp.unparse(unparseIn, unparseOut)
     val unparseDiags = unparseRes.getDiagnostics.map(_.toString)
-    val unparseStrOpt = if (unparseRes.isProcessingError) None else 
Some(unparseBos.toString)
+    val unparseStrOpt =
+      if (unparseRes.isProcessingError) None else 
Some(unparseBos.toString("UTF-8"))
     (unparseDiags, unparseStrOpt)
   }
 

Reply via email to