mbeckerle commented on code in PR #797:
URL: https://github.com/apache/daffodil/pull/797#discussion_r915149798


##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetInputter.scala:
##########
@@ -28,15 +28,15 @@ import java.io.ByteArrayInputStream
 import java.io.ByteArrayOutputStream
 import java.io.BufferedOutputStream
 import javax.xml.transform.stream.StreamResult
-import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.{ MaybeBoolean, Misc }
 import org.apache.daffodil.dpath.NodeInfo
 
 object EXIInfosetInputter {
-  def ConvertEXIToXMLWithExificient(input: java.io.InputStream): 
java.io.InputStream = {
-    val xsdLocation = 
"daffodil-lib/src/main/resources/org/apache/daffodil/xsd/XMLSchema.xsd"
+  def ConvertEXIToXML(input: java.io.InputStream): java.io.InputStream = {
+    val xsdLocation = 
Misc.getRequiredResource("org/apache/daffodil/xsd/XMLSchema.xsd")

Review Comment:
   Wha? You are using the XMLSchema for XML Schema? What for?



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -17,25 +17,26 @@
 
 package org.apache.daffodil.infoset
 
-import org.apache.daffodil.util.Indentable
+import org.apache.daffodil.util.{ Indentable, Misc }
 import org.apache.daffodil.dpath.NodeInfo
 import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
 import com.siemens.ct.exi.main.api.sax.EXIResult
 import com.siemens.ct.exi.grammars.GrammarFactory
 import com.siemens.ct.exi.core.FidelityOptions
 import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.InputSource
 
 /**
  * Writes the infoset to a java.io.Writer as XML text.
  *
- * @param writer The writer to write the XML text to
+ * @param os The OutputStream to write the XML text to
  * @param pretty Whether or to enable pretty printing. Set to true, XML
  *               elements are indented and newlines are inserted.
  */
-class EXIInfosetOutputter (writer: java.io.FileOutputStream, pretty: Boolean)
+class EXIInfosetOutputter (os: java.io.OutputStream, pretty: Boolean)

Review Comment:
   If we have to have this parameter pretty for the constructor, then I would 
name the parameter "ignored" in this constructor. If we can remove it we 
should. Not sure we need Indentable mixin given that indenting makes no sense 
for EXI, but maybe we have to have it for method signature reasons. 



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -17,25 +17,26 @@
 
 package org.apache.daffodil.infoset
 
-import org.apache.daffodil.util.Indentable
+import org.apache.daffodil.util.{ Indentable, Misc }
 import org.apache.daffodil.dpath.NodeInfo
 import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
 import com.siemens.ct.exi.main.api.sax.EXIResult
 import com.siemens.ct.exi.grammars.GrammarFactory
 import com.siemens.ct.exi.core.FidelityOptions
 import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.InputSource
 
 /**
  * Writes the infoset to a java.io.Writer as XML text.
  *
- * @param writer The writer to write the XML text to
+ * @param os The OutputStream to write the XML text to

Review Comment:
   Update comment. EXI is what is being written, and there is no such thing as 
pretty printing for that. 



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -172,17 +173,18 @@ class EXIInfosetOutputter (writer: 
java.io.FileOutputStream, pretty: Boolean)
     sw.write(System.lineSeparator())
     sw.flush()
 
-    val xsdLocation = 
"daffodil-lib/src/main/resources/org/apache/daffodil/xsd/XMLSchema.xsd"
+    val xsdLocation = 
Misc.getRequiredResource("org/apache/daffodil/xsd/XMLSchema.xsd")
     lazy val exiFactory = DefaultEXIFactory.newInstance()
                val grammarFactory = GrammarFactory.newInstance()
-               val g = grammarFactory.createGrammars(xsdLocation)
+               val g = grammarFactory.createGrammars(xsdLocation.toString)
                exiFactory.setGrammars(g);
     
exiFactory.getFidelityOptions().setFidelity(FidelityOptions.FEATURE_PREFIX,true)

Review Comment:
   I was expecting to see other options here also to enable schema awareness, 
but then again I have no clue what's happening with the XMLSchema.xsd above.  
   
   There are perhaps some options for telling it to include the identity of the 
schema that is needed maybe, so that the data going across provides the EXI 
reader of the exact schema the data is for? I think?



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -17,25 +17,26 @@
 
 package org.apache.daffodil.infoset
 
-import org.apache.daffodil.util.Indentable
+import org.apache.daffodil.util.{ Indentable, Misc }
 import org.apache.daffodil.dpath.NodeInfo
 import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
 import com.siemens.ct.exi.main.api.sax.EXIResult
 import com.siemens.ct.exi.grammars.GrammarFactory
 import com.siemens.ct.exi.core.FidelityOptions
 import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.InputSource
 
 /**
  * Writes the infoset to a java.io.Writer as XML text.
  *
- * @param writer The writer to write the XML text to
+ * @param os The OutputStream to write the XML text to
  * @param pretty Whether or to enable pretty printing. Set to true, XML
  *               elements are indented and newlines are inserted.
  */
-class EXIInfosetOutputter (writer: java.io.FileOutputStream, pretty: Boolean)
+class EXIInfosetOutputter (os: java.io.OutputStream, pretty: Boolean)
   extends InfosetOutputter with Indentable with XMLInfosetOutputter {
 
-  def this(os: java.io.FileOutputStream) = {
+  def this(os: java.io.OutputStream) = {

Review Comment:
   Everything below about indentation can be removed. 



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -172,17 +173,18 @@ class EXIInfosetOutputter (writer: 
java.io.FileOutputStream, pretty: Boolean)
     sw.write(System.lineSeparator())
     sw.flush()
 
-    val xsdLocation = 
"daffodil-lib/src/main/resources/org/apache/daffodil/xsd/XMLSchema.xsd"
+    val xsdLocation = 
Misc.getRequiredResource("org/apache/daffodil/xsd/XMLSchema.xsd")

Review Comment:
   Why XMLSchema.xsd? That's the XML schema for XML Schemas. Don't we want the 
DFDL schema of the data being parsed here in order to do schema-aware EXI 
creation?
   
   However, I'm ok with a first cut being a non-schema aware EXI outputter. 



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import org.apache.daffodil.util.{ Indentable, Misc }
+import org.apache.daffodil.dpath.NodeInfo
+import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
+import com.siemens.ct.exi.main.api.sax.EXIResult
+import com.siemens.ct.exi.grammars.GrammarFactory
+import com.siemens.ct.exi.core.FidelityOptions
+import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.InputSource
+
+/**
+ * Writes the infoset to a java.io.Writer as XML text.
+ *
+ * @param os The OutputStream to write the XML text to
+ * @param pretty Whether or to enable pretty printing. Set to true, XML
+ *               elements are indented and newlines are inserted.
+ */
+class EXIInfosetOutputter (os: java.io.OutputStream, pretty: Boolean)
+  extends InfosetOutputter with Indentable with XMLInfosetOutputter {
+
+  def this(os: java.io.OutputStream) = {
+    this(os, false)
+  }
+
+  private val sb = new StringBuilder()
+  private val sw = new java.io.StringWriter()

Review Comment:
   Definitely not right. We should not be creating any string here. Everyplace 
we write to this string writer we should be instead delegating to the EXI 
content handler. 



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetInputter.scala:
##########
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
+import javax.xml.transform.TransformerFactory
+import com.siemens.ct.exi.main.api.sax.EXISource
+import com.siemens.ct.exi.grammars.GrammarFactory
+import com.siemens.ct.exi.core.FidelityOptions
+import javax.xml.transform.sax.SAXSource
+import org.xml.sax.InputSource
+import java.io.ByteArrayInputStream
+import java.io.ByteArrayOutputStream
+import java.io.BufferedOutputStream
+import javax.xml.transform.stream.StreamResult
+import org.apache.daffodil.util.{ MaybeBoolean, Misc }
+import org.apache.daffodil.dpath.NodeInfo
+
+object EXIInfosetInputter {
+  def ConvertEXIToXML(input: java.io.InputStream): java.io.InputStream = {
+    val xsdLocation = 
Misc.getRequiredResource("org/apache/daffodil/xsd/XMLSchema.xsd")
+    lazy val exiFactory = DefaultEXIFactory.newInstance()
+    val grammarFactory = GrammarFactory.newInstance()
+    val g = grammarFactory.createGrammars(xsdLocation.toString)
+    exiFactory.setGrammars(g)
+    
exiFactory.getFidelityOptions().setFidelity(FidelityOptions.FEATURE_PREFIX, 
true)
+    val saxSource = new EXISource(exiFactory)
+    val xmlReader = saxSource.getXMLReader()
+
+    val tf = TransformerFactory.newInstance()
+    val transformer = tf.newTransformer()
+
+    val exiSource = new SAXSource(new InputSource(input))
+    exiSource.setXMLReader(xmlReader);

Review Comment:
   This is again first creating textual XML from the EXI, and then using the 
textual XML for input to daffodil unparse. 
   
   We want to go directly from EXI data to daffodil unparse. 
   
   You *could* do this via the SAXInfosetInputter, and given the symmetry with 
the SAXInfosetOutputter probably that should be done first.
   
   For performance reasons, however, the natural control model for the unparser 
is not SAX (event calls), but more like StAX, which is pulling XML events. 
   
   So we'll want to use a XML pull style API (i.e., StAX) if Exifficient 
provides one. In fact a generic StAXInfosetInputter that takes a 
StAX-API-Implementing object passed to its constructor would be the best way to 
do that leveraging the StAX standard. 
   



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import org.apache.daffodil.util.{ Indentable, Misc }
+import org.apache.daffodil.dpath.NodeInfo
+import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
+import com.siemens.ct.exi.main.api.sax.EXIResult
+import com.siemens.ct.exi.grammars.GrammarFactory
+import com.siemens.ct.exi.core.FidelityOptions
+import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.InputSource
+
+/**
+ * Writes the infoset to a java.io.Writer as XML text.
+ *
+ * @param os The OutputStream to write the XML text to
+ * @param pretty Whether or to enable pretty printing. Set to true, XML
+ *               elements are indented and newlines are inserted.
+ */
+class EXIInfosetOutputter (os: java.io.OutputStream, pretty: Boolean)
+  extends InfosetOutputter with Indentable with XMLInfosetOutputter {
+
+  def this(os: java.io.OutputStream) = {
+    this(os, false)
+  }
+
+  private val sb = new StringBuilder()
+  private val sw = new java.io.StringWriter()
+
+  /**
+   * Used to keep determine if the in-scope complex element has children, and
+   * thus if we should output a newline or not when closing that complex type.
+   * A complex type with no children should just be output like
+   *
+   *   <complex></complex>
+   *
+   * This value is initialized to false when a complex type is started, since
+   * we don't know if it has children yet. This value is then set to true
+   * either when a simple type is started (i.e. the current complex type must
+   * have at least one child), or when a complex type is ended (i.e. the parent
+   * and all subsequent parents of the ended complex must have at least one
+   * child, which is the complex that just eneded).
+   */
+  private var inScopeComplexElementHasChildren = false
+
+  override def reset(): Unit = {
+    resetIndentation()
+    inScopeComplexElementHasChildren = false
+  }
+
+  private def outputTagName(elem: DIElement): Unit = {
+    val prefix = elem.erd.prefix
+    if (prefix != null && prefix != "") {
+      sw.write(prefix)
+      sw.write(":")
+    }
+    sw.write(elem.erd.name)
+  }
+
+  private def outputStartTag(elem: DIElement): Unit = {
+    sw.write("<")
+
+    outputTagName(elem)
+
+    val nsbStart = elem.erd.minimizedScope
+    val nsbEnd = if (elem.isRoot) scala.xml.TopScope else 
elem.diParent.erd.minimizedScope
+    if (nsbStart != nsbEnd) {
+      sb.setLength(0) // reset the stringbuilder
+      nsbStart.buildString(sb, nsbEnd)
+      sw.write(sb.toString)

Review Comment:
   Ok, so this is writing first an XML string, and then callling the EXI 
library once to convert that to EXI.
   
   That is not what we want to do. We want to cmpletely avoid the creation of 
any XML strings entirely. Just delegate over to the EXI library. 
   
   If the EXIficient EXI library provides a SAX Content Handler, then I think 
you can use the SAXInfosetOutputter passing it an initialized EXIficient 
content handler and that's it. It should just work. 



##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetInputter.scala:
##########
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
+import javax.xml.transform.TransformerFactory
+import com.siemens.ct.exi.main.api.sax.EXISource
+import com.siemens.ct.exi.grammars.GrammarFactory
+import com.siemens.ct.exi.core.FidelityOptions
+import javax.xml.transform.sax.SAXSource
+import org.xml.sax.InputSource
+import java.io.ByteArrayInputStream
+import java.io.ByteArrayOutputStream
+import java.io.BufferedOutputStream
+import javax.xml.transform.stream.StreamResult
+import org.apache.daffodil.util.{ MaybeBoolean, Misc }
+import org.apache.daffodil.dpath.NodeInfo
+
+object EXIInfosetInputter {
+  def ConvertEXIToXML(input: java.io.InputStream): java.io.InputStream = {
+    val xsdLocation = 
Misc.getRequiredResource("org/apache/daffodil/xsd/XMLSchema.xsd")

Review Comment:
   Wha? Why XMLSchema.xsd?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to