mbeckerle commented on code in PR #797:
URL: https://github.com/apache/daffodil/pull/797#discussion_r915149798
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetInputter.scala:
##########
@@ -28,15 +28,15 @@ import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
import java.io.BufferedOutputStream
import javax.xml.transform.stream.StreamResult
-import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.{ MaybeBoolean, Misc }
import org.apache.daffodil.dpath.NodeInfo
object EXIInfosetInputter {
- def ConvertEXIToXMLWithExificient(input: java.io.InputStream):
java.io.InputStream = {
- val xsdLocation =
"daffodil-lib/src/main/resources/org/apache/daffodil/xsd/XMLSchema.xsd"
+ def ConvertEXIToXML(input: java.io.InputStream): java.io.InputStream = {
+ val xsdLocation =
Misc.getRequiredResource("org/apache/daffodil/xsd/XMLSchema.xsd")
Review Comment:
Wha? You are using the XMLSchema for XML Schema? What for?
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -17,25 +17,26 @@
package org.apache.daffodil.infoset
-import org.apache.daffodil.util.Indentable
+import org.apache.daffodil.util.{ Indentable, Misc }
import org.apache.daffodil.dpath.NodeInfo
import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
import com.siemens.ct.exi.main.api.sax.EXIResult
import com.siemens.ct.exi.grammars.GrammarFactory
import com.siemens.ct.exi.core.FidelityOptions
import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.InputSource
/**
* Writes the infoset to a java.io.Writer as XML text.
*
- * @param writer The writer to write the XML text to
+ * @param os The OutputStream to write the XML text to
* @param pretty Whether or to enable pretty printing. Set to true, XML
* elements are indented and newlines are inserted.
*/
-class EXIInfosetOutputter (writer: java.io.FileOutputStream, pretty: Boolean)
+class EXIInfosetOutputter (os: java.io.OutputStream, pretty: Boolean)
Review Comment:
If we have to have this parameter pretty for the constructor, then I would
name the parameter "ignored" in this constructor. If we can remove it we
should. Not sure we need Indentable mixin given that indenting makes no sense
for EXI, but maybe we have to have it for method signature reasons.
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -17,25 +17,26 @@
package org.apache.daffodil.infoset
-import org.apache.daffodil.util.Indentable
+import org.apache.daffodil.util.{ Indentable, Misc }
import org.apache.daffodil.dpath.NodeInfo
import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
import com.siemens.ct.exi.main.api.sax.EXIResult
import com.siemens.ct.exi.grammars.GrammarFactory
import com.siemens.ct.exi.core.FidelityOptions
import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.InputSource
/**
* Writes the infoset to a java.io.Writer as XML text.
*
- * @param writer The writer to write the XML text to
+ * @param os The OutputStream to write the XML text to
Review Comment:
Update comment. EXI is what is being written, and there is no such thing as
pretty printing for that.
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -172,17 +173,18 @@ class EXIInfosetOutputter (writer:
java.io.FileOutputStream, pretty: Boolean)
sw.write(System.lineSeparator())
sw.flush()
- val xsdLocation =
"daffodil-lib/src/main/resources/org/apache/daffodil/xsd/XMLSchema.xsd"
+ val xsdLocation =
Misc.getRequiredResource("org/apache/daffodil/xsd/XMLSchema.xsd")
lazy val exiFactory = DefaultEXIFactory.newInstance()
val grammarFactory = GrammarFactory.newInstance()
- val g = grammarFactory.createGrammars(xsdLocation)
+ val g = grammarFactory.createGrammars(xsdLocation.toString)
exiFactory.setGrammars(g);
exiFactory.getFidelityOptions().setFidelity(FidelityOptions.FEATURE_PREFIX,true)
Review Comment:
I was expecting to see other options here also to enable schema awareness,
but then again I have no clue what's happening with the XMLSchema.xsd above.
There are perhaps some options for telling it to include the identity of the
schema that is needed maybe, so that the data going across provides the EXI
reader of the exact schema the data is for? I think?
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -17,25 +17,26 @@
package org.apache.daffodil.infoset
-import org.apache.daffodil.util.Indentable
+import org.apache.daffodil.util.{ Indentable, Misc }
import org.apache.daffodil.dpath.NodeInfo
import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
import com.siemens.ct.exi.main.api.sax.EXIResult
import com.siemens.ct.exi.grammars.GrammarFactory
import com.siemens.ct.exi.core.FidelityOptions
import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.InputSource
/**
* Writes the infoset to a java.io.Writer as XML text.
*
- * @param writer The writer to write the XML text to
+ * @param os The OutputStream to write the XML text to
* @param pretty Whether or to enable pretty printing. Set to true, XML
* elements are indented and newlines are inserted.
*/
-class EXIInfosetOutputter (writer: java.io.FileOutputStream, pretty: Boolean)
+class EXIInfosetOutputter (os: java.io.OutputStream, pretty: Boolean)
extends InfosetOutputter with Indentable with XMLInfosetOutputter {
- def this(os: java.io.FileOutputStream) = {
+ def this(os: java.io.OutputStream) = {
Review Comment:
Everything below about indentation can be removed.
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -172,17 +173,18 @@ class EXIInfosetOutputter (writer:
java.io.FileOutputStream, pretty: Boolean)
sw.write(System.lineSeparator())
sw.flush()
- val xsdLocation =
"daffodil-lib/src/main/resources/org/apache/daffodil/xsd/XMLSchema.xsd"
+ val xsdLocation =
Misc.getRequiredResource("org/apache/daffodil/xsd/XMLSchema.xsd")
Review Comment:
Why XMLSchema.xsd? That's the XML schema for XML Schemas. Don't we want the
DFDL schema of the data being parsed here in order to do schema-aware EXI
creation?
However, I'm ok with a first cut being a non-schema aware EXI outputter.
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import org.apache.daffodil.util.{ Indentable, Misc }
+import org.apache.daffodil.dpath.NodeInfo
+import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
+import com.siemens.ct.exi.main.api.sax.EXIResult
+import com.siemens.ct.exi.grammars.GrammarFactory
+import com.siemens.ct.exi.core.FidelityOptions
+import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.InputSource
+
+/**
+ * Writes the infoset to a java.io.Writer as XML text.
+ *
+ * @param os The OutputStream to write the XML text to
+ * @param pretty Whether or to enable pretty printing. Set to true, XML
+ * elements are indented and newlines are inserted.
+ */
+class EXIInfosetOutputter (os: java.io.OutputStream, pretty: Boolean)
+ extends InfosetOutputter with Indentable with XMLInfosetOutputter {
+
+ def this(os: java.io.OutputStream) = {
+ this(os, false)
+ }
+
+ private val sb = new StringBuilder()
+ private val sw = new java.io.StringWriter()
Review Comment:
Definitely not right. We should not be creating any string here. Everyplace
we write to this string writer we should be instead delegating to the EXI
content handler.
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetInputter.scala:
##########
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
+import javax.xml.transform.TransformerFactory
+import com.siemens.ct.exi.main.api.sax.EXISource
+import com.siemens.ct.exi.grammars.GrammarFactory
+import com.siemens.ct.exi.core.FidelityOptions
+import javax.xml.transform.sax.SAXSource
+import org.xml.sax.InputSource
+import java.io.ByteArrayInputStream
+import java.io.ByteArrayOutputStream
+import java.io.BufferedOutputStream
+import javax.xml.transform.stream.StreamResult
+import org.apache.daffodil.util.{ MaybeBoolean, Misc }
+import org.apache.daffodil.dpath.NodeInfo
+
+object EXIInfosetInputter {
+ def ConvertEXIToXML(input: java.io.InputStream): java.io.InputStream = {
+ val xsdLocation =
Misc.getRequiredResource("org/apache/daffodil/xsd/XMLSchema.xsd")
+ lazy val exiFactory = DefaultEXIFactory.newInstance()
+ val grammarFactory = GrammarFactory.newInstance()
+ val g = grammarFactory.createGrammars(xsdLocation.toString)
+ exiFactory.setGrammars(g)
+
exiFactory.getFidelityOptions().setFidelity(FidelityOptions.FEATURE_PREFIX,
true)
+ val saxSource = new EXISource(exiFactory)
+ val xmlReader = saxSource.getXMLReader()
+
+ val tf = TransformerFactory.newInstance()
+ val transformer = tf.newTransformer()
+
+ val exiSource = new SAXSource(new InputSource(input))
+ exiSource.setXMLReader(xmlReader);
Review Comment:
This is again first creating textual XML from the EXI, and then using the
textual XML for input to daffodil unparse.
We want to go directly from EXI data to daffodil unparse.
You *could* do this via the SAXInfosetInputter, and given the symmetry with
the SAXInfosetOutputter probably that should be done first.
For performance reasons, however, the natural control model for the unparser
is not SAX (event calls), but more like StAX, which is pulling XML events.
So we'll want to use a XML pull style API (i.e., StAX) if Exifficient
provides one. In fact a generic StAXInfosetInputter that takes a
StAX-API-Implementing object passed to its constructor would be the best way to
do that leveraging the StAX standard.
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetOutputter.scala:
##########
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import org.apache.daffodil.util.{ Indentable, Misc }
+import org.apache.daffodil.dpath.NodeInfo
+import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
+import com.siemens.ct.exi.main.api.sax.EXIResult
+import com.siemens.ct.exi.grammars.GrammarFactory
+import com.siemens.ct.exi.core.FidelityOptions
+import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.InputSource
+
+/**
+ * Writes the infoset to a java.io.Writer as XML text.
+ *
+ * @param os The OutputStream to write the XML text to
+ * @param pretty Whether or to enable pretty printing. Set to true, XML
+ * elements are indented and newlines are inserted.
+ */
+class EXIInfosetOutputter (os: java.io.OutputStream, pretty: Boolean)
+ extends InfosetOutputter with Indentable with XMLInfosetOutputter {
+
+ def this(os: java.io.OutputStream) = {
+ this(os, false)
+ }
+
+ private val sb = new StringBuilder()
+ private val sw = new java.io.StringWriter()
+
+ /**
+ * Used to keep determine if the in-scope complex element has children, and
+ * thus if we should output a newline or not when closing that complex type.
+ * A complex type with no children should just be output like
+ *
+ * <complex></complex>
+ *
+ * This value is initialized to false when a complex type is started, since
+ * we don't know if it has children yet. This value is then set to true
+ * either when a simple type is started (i.e. the current complex type must
+ * have at least one child), or when a complex type is ended (i.e. the parent
+ * and all subsequent parents of the ended complex must have at least one
+ * child, which is the complex that just eneded).
+ */
+ private var inScopeComplexElementHasChildren = false
+
+ override def reset(): Unit = {
+ resetIndentation()
+ inScopeComplexElementHasChildren = false
+ }
+
+ private def outputTagName(elem: DIElement): Unit = {
+ val prefix = elem.erd.prefix
+ if (prefix != null && prefix != "") {
+ sw.write(prefix)
+ sw.write(":")
+ }
+ sw.write(elem.erd.name)
+ }
+
+ private def outputStartTag(elem: DIElement): Unit = {
+ sw.write("<")
+
+ outputTagName(elem)
+
+ val nsbStart = elem.erd.minimizedScope
+ val nsbEnd = if (elem.isRoot) scala.xml.TopScope else
elem.diParent.erd.minimizedScope
+ if (nsbStart != nsbEnd) {
+ sb.setLength(0) // reset the stringbuilder
+ nsbStart.buildString(sb, nsbEnd)
+ sw.write(sb.toString)
Review Comment:
Ok, so this is writing first an XML string, and then callling the EXI
library once to convert that to EXI.
That is not what we want to do. We want to cmpletely avoid the creation of
any XML strings entirely. Just delegate over to the EXI library.
If the EXIficient EXI library provides a SAX Content Handler, then I think
you can use the SAXInfosetOutputter passing it an initialized EXIficient
content handler and that's it. It should just work.
##########
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/EXIInfosetInputter.scala:
##########
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import com.siemens.ct.exi.core.helpers.DefaultEXIFactory
+import javax.xml.transform.TransformerFactory
+import com.siemens.ct.exi.main.api.sax.EXISource
+import com.siemens.ct.exi.grammars.GrammarFactory
+import com.siemens.ct.exi.core.FidelityOptions
+import javax.xml.transform.sax.SAXSource
+import org.xml.sax.InputSource
+import java.io.ByteArrayInputStream
+import java.io.ByteArrayOutputStream
+import java.io.BufferedOutputStream
+import javax.xml.transform.stream.StreamResult
+import org.apache.daffodil.util.{ MaybeBoolean, Misc }
+import org.apache.daffodil.dpath.NodeInfo
+
+object EXIInfosetInputter {
+ def ConvertEXIToXML(input: java.io.InputStream): java.io.InputStream = {
+ val xsdLocation =
Misc.getRequiredResource("org/apache/daffodil/xsd/XMLSchema.xsd")
Review Comment:
Wha? Why XMLSchema.xsd?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]