stevedlawrence commented on code in PR #821:
URL: https://github.com/apache/daffodil/pull/821#discussion_r1051043153


##########
daffodil-japi/src/main/java/org/apache/daffodil/japi/XMLOutputStyle.java:
##########
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.japi;
+
+/**
+ * XMLOutputStyles for determining whether to wrap info in CDATA chunks
+ */
+public enum XMLOutputStyle {
+  /**
+   * No CDATA Wrapping

Review Comment:
   We should probably describe what this does do instead of what it doesn't do. 
In this case, infoset strings are written as is, and special characters (e.g. < 
> & ", maybe others?) are escaped using XML &-escaping. Where as 
PrettyPrintSafe does not do that escaping, but instead wraps infoset strings in 
CDATA tags.



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala:
##########
@@ -160,7 +163,13 @@ class XMLTextInfosetOutputter private (writer: 
java.io.Writer, pretty: Boolean)
         if (simple.erd.runtimeProperties.get(XMLTextInfoset.stringAsXml) == 
"true") {
           writeStringAsXml(simpleVal)
         } else {
-          writer.write(scala.xml.Utility.escape(remapped(simpleVal)))
+          val simpleValEscaped = xmlOutputStyle match {
+            case XMLOutputStyle.PrettyPrintSafe => {
+              "<![CDATA[%s]]>".format(remapped(simpleVal).replaceAll("]]>", 
"]]]]><![CDATA[>"))
+            }
+            case XMLOutputStyle.Standard => 
scala.xml.Utility.escape(remapped(simpleVal))

Review Comment:
   I just now realize that this new enum is really just describing how we 
escape XML strings. `PrettyPrintSafe` is maybe the intention and a good reason 
why to use this, but I wonder if somethign like `InfosetStringEscapeStyle` with 
values `CDATA` and `Standard` might be more accurate? Or maybe staying generic 
allows us come up with other changes in the future that make things more safe 
for pretty printing?  



##########
daffodil-sapi/src/main/scala/org/apache/daffodil/sapi/infoset/Infoset.scala:
##########
@@ -248,8 +251,14 @@ class XMLTextInfosetOutputter private (outputter: 
SXMLTextInfosetOutputter)
    * @param pretty enable or disable pretty printing. Pretty printing will only
    *               insert indentation and newlines where it will not affect the
    *               content of the XML.
+   * @param xmlOutputStyle determine whether to wrap values of elements of type
+   *                       xs:string in CDATA chunks in order to preserve
+   *                       whitespace.
    */
-  def this(os: java.io.OutputStream, pretty: Boolean) = this(new 
SXMLTextInfosetOutputter(os, pretty))
+  def this(os: java.io.OutputStream, pretty: Boolean,
+    xmlOutputStyle: XMLOutputStyle.Value = XMLOutputStyle.Standard) = {
+    this(new SXMLTextInfosetOutputter(os, pretty, 
XMLOutputStyleConversions.modeToScala(xmlOutputStyle)))

Review Comment:
   We aren't doing the null usage check here. If we are going to do that, we 
should be consistent.



##########
daffodil-sapi/src/test/scala/org/apache/daffodil/example/TestScalaAPI.scala:
##########
@@ -1127,44 +1129,101 @@ class TestScalaAPI {
   }
 
 
-    @Test
-    def testScalaAPI25(): Unit = {
-      // Demonstrates the use of a custom InfosetInputter/Outputter
-
-      val expectedData = "42"
-      val expectedEvents = Array(
-        TestInfosetEvent.startDocument(),
-        TestInfosetEvent.startComplex("e1", "http://example.com";),
-        TestInfosetEvent.startSimple("e2", "http://example.com";, expectedData),
-        TestInfosetEvent.endSimple("e2", "http://example.com";),
-        TestInfosetEvent.endComplex("e1", "http://example.com";),
-        TestInfosetEvent.endDocument()
-      )
-
-      val c = Daffodil.compiler()
-
-      val schemaFile = getResource("/test/sapi/mySchema1.dfdl.xsd")
-      val pf = c.compileFile(schemaFile)
-      val dp = pf.onPath("/")
-
-      val file = getResource("/test/sapi/myData.dat")
-      val fis = new java.io.FileInputStream(file)
-      val dis = new InputSourceDataInputStream(fis)
-      val outputter = new TestInfosetOutputter()
-      val pr = dp.parse(dis, outputter)
-
-      assertFalse(pr.isError())
-      assertArrayEquals(
-        expectedEvents.asInstanceOf[Array[Object]],
-        outputter.events.toArray.asInstanceOf[Array[Object]]
-      )
-
-      val bos = new java.io.ByteArrayOutputStream()
-      val wbc = java.nio.channels.Channels.newChannel(bos)
-      val inputter = new TestInfosetInputter(expectedEvents: _*)
-
-      val ur = dp.unparse(inputter, wbc)
-      assertFalse(ur.isError)
-      assertEquals(expectedData, bos.toString())
+  @Test
+  def testScalaAPI25(): Unit = {
+    // Demonstrates the use of a custom InfosetInputter/Outputter
+
+    val expectedData = "42"
+    val expectedEvents = Array(
+      TestInfosetEvent.startDocument(),
+      TestInfosetEvent.startComplex("e1", "http://example.com";),
+      TestInfosetEvent.startSimple("e2", "http://example.com";, expectedData),
+      TestInfosetEvent.endSimple("e2", "http://example.com";),
+      TestInfosetEvent.endComplex("e1", "http://example.com";),
+      TestInfosetEvent.endDocument()
+    )
+
+    val c = Daffodil.compiler()
+
+    val schemaFile = getResource("/test/sapi/mySchema1.dfdl.xsd")
+    val pf = c.compileFile(schemaFile)
+    val dp = pf.onPath("/")
+
+    val file = getResource("/test/sapi/myData.dat")
+    val fis = new java.io.FileInputStream(file)
+    val dis = new InputSourceDataInputStream(fis)
+    val outputter = new TestInfosetOutputter()
+    val pr = dp.parse(dis, outputter)
+
+    assertFalse(pr.isError())
+    assertArrayEquals(
+      expectedEvents.asInstanceOf[Array[Object]],
+      outputter.events.toArray.asInstanceOf[Array[Object]]
+    )
+
+    val bos = new java.io.ByteArrayOutputStream()
+    val wbc = java.nio.channels.Channels.newChannel(bos)
+    val inputter = new TestInfosetInputter(expectedEvents: _*)
+
+    val ur = dp.unparse(inputter, wbc)
+    assertFalse(ur.isError)
+    assertEquals(expectedData, bos.toString())
+  }
+
+  @Test
+  def testScalaAPICDATA1(): Unit = {
+    val expected = "<![CDATA[NO_WHITESPACE_AT_ALL]]>"
+    val data = "NO_WHITESPACE_AT_ALL$"
+    val schemaType = "string"
+    doXMLOutputStyleTest(expected, data, schemaType)
+  }
+
+  @Test
+  def testScalaAPICDATA2(): Unit = {
+    val expected = "<![CDATA[   'some' stuff   here &#xE000; and 
]]]]><![CDATA[> even]]>"
+    val data = "   'some' stuff   here &#xE000; and ]]> even$"
+    val schemaType = "string"
+    doXMLOutputStyleTest(expected, data, schemaType)
+  }
+
+  @Test
+  def testScalaAPICDATA3(): Unit = {
+    val expected = "6.892"
+    val data = "6.892"
+    val schemaType = "float"
+    doXMLOutputStyleTest(expected, data, schemaType)
+  }
+
+  @Test
+  def testScalaAPICDATA4(): Unit = {
+    val expected = "<![CDATA[this contains a CRLF\nline ending]]>"
+    val data = "this contains a CRLF\r\nline ending$"
+    val schemaType = "string"
+    doXMLOutputStyleTest(expected, data, schemaType)
+  }
+
+  def doXMLOutputStyleTest(expect: String, data: String, schemaType: String): 
Unit = {
+    val c = Daffodil.compiler()
+    val schemaFile = schemaType match {
+      case "string" => getResource("/test/sapi/mySchemaCDATAString.dfdl.xsd")
+      case "float" => getResource("/test/sapi/mySchemaCDATAFloat.dfdl.xsd")
     }
+    val pf = c.compileFile(schemaFile)
+    var dp = pf.onPath("/")
+
+    val is = new ByteArrayInputStream(data.getBytes(StandardCharsets.UTF_8))
+    val input = new InputSourceDataInputStream(is)
+    val bosDP = new ByteArrayOutputStream()
+    val outputter = new XMLTextInfosetOutputter(bosDP, true, 
XMLOutputStyle.PrettyPrintSafe)
+    val res = dp.parse(input, outputter)
+    val err = res.isError()
+
+    val infosetDPString = bosDP.toString()
+    val start = infosetDPString.indexOf(".com\">") + 6
+    val end = infosetDPString.indexOf("</tns")
+    val value = infosetDPString.substring(start, end)
+
+    assertFalse(err)
+    assertEquals(expect, value)
+  }
 }

Review Comment:
   Should we add similar JAPI tests to ensure this all works and doesn't 
accidentally break by a future change?



##########
daffodil-japi/src/main/java/org/apache/daffodil/japi/XMLOutputStyle.java:
##########
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.japi;

Review Comment:
   Suggest this be the `org.apache.daffodil.japi.infoset` package, since this 
is pretty specific to infoset InfosetOutputter behavior. And then you won't 
need to import it in Infoset.scala. Same goes for he SAPI.



##########
daffodil-sapi/src/main/scala/org/apache/daffodil/sapi/packageprivate/Utils.scala:
##########
@@ -53,6 +55,25 @@ private[sapi] object ValidationConversions {
   }
 }
 
+private[sapi] object XMLOutputStyleConversions {
+
+  def modeToScala(mode: XMLOutputStyle.Value): SXMLOutputStyle.Type = {
+    val smode: SXMLOutputStyle.Type = mode match {
+      case XMLOutputStyle.Standard => SXMLOutputStyle.Standard
+      case XMLOutputStyle.PrettyPrintSafe => SXMLOutputStyle.PrettyPrintSafe
+    }
+    smode
+  }
+
+  def modeFromScala(smode: SXMLOutputStyle.Type): XMLOutputStyle.Value = {

Review Comment:
   Nothing uses this function. We should delete it.



##########
daffodil-japi/src/main/scala/org/apache/daffodil/japi/packageprivate/Utils.scala:
##########
@@ -42,6 +44,17 @@ private[japi] object ValidationConversions {
   }
 }
 
+private[japi] object XMLOutputStyleConversions {
+
+  def modeToScala(mode: XMLOutputStyle): SXMLOutputStyle.Type = {

Review Comment:
   `mode` doesn't make sense in this context. We use mode in 
ValidationConversions because it is converting a `ValidationMode`. In this 
case, if you want a short name, maybe `style` is more appropriate?



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/XMLTextInfosetOutputter.scala:
##########
@@ -31,12 +31,15 @@ import org.apache.daffodil.util.Indentable
  * @param writer The writer to write the XML text to
  * @param pretty Whether or to enable pretty printing. Set to true, XML
  *               elements are indented and newlines are inserted.
+ * @param xmlOutputStyle Determine whether to wrap values of elements of type
+ *        xs:string in CDATA chunks in order to preserve whitespace.
  */
-class XMLTextInfosetOutputter private (writer: java.io.Writer, pretty: Boolean)
+class XMLTextInfosetOutputter private (writer: java.io.Writer, pretty: 
Boolean, xmlOutputStyle: XMLOutputStyle.Type)
   extends InfosetOutputter with Indentable with XMLInfosetOutputter {
 
-  def this(os: java.io.OutputStream, pretty: Boolean) = {
-    this(new java.io.OutputStreamWriter(os, StandardCharsets.UTF_8), pretty)
+  def this(os: java.io.OutputStream, pretty: Boolean, xmlOutputStyle: 
XMLOutputStyle.Type = XMLOutputStyle.Standard) = {
+    this(new java.io.OutputStreamWriter(os, StandardCharsets.UTF_8), pretty, 
xmlOutputStyle)
+    Assert.usage(xmlOutputStyle != null, "Null is not a valid value for 
parameter xmlOutputStyle")

Review Comment:
   What's our standard for validating API constructor/function argumens? Users 
could pass in null to a bunch of our APIs and they would end up with a 
NullPointerException. This particular parameter doesn't feel any different than 
any other parameters, so should we even be checking null here? Or should we 
open a bug to validate non-nullness for all parameters?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to