stevedlawrence commented on a change in pull request #478:
URL: https://github.com/apache/daffodil/pull/478#discussion_r616625305
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -22,17 +22,44 @@ import java.io.OutputStreamWriter
import scala.xml.NamespaceBinding
-import org.apache.daffodil.infoset.XMLInfosetOutputter
+import org.apache.daffodil.exceptions.Assert
import org.apache.daffodil.util.Indentable
+import org.apache.daffodil.util.MStackOf
import org.apache.daffodil.util.MStackOfBoolean
+import org.apache.daffodil.util.Maybe
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.xml.XMLUtils
import org.xml.sax.Attributes
import org.xml.sax.ContentHandler
import org.xml.sax.Locator
+/**
+ * ContentHandler implementation that receives SAX events from
DaffodilParseXMLReader to output
+ * XML to the specified outputStream. Depending on the features set in the
XMLReader, it uses either
+ * prefixMappings or attributes to determine the prefix of the XML element.
This means it will always
+ * try to find and print a prefix if an element has a URI.
+ *
+ * @param out outputStream object to write generated XML to
+ * @param pretty boolean to pretty print XML if true, or not if false
+ */
class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty:
Boolean = false)
- extends ContentHandler with Indentable with XMLInfosetOutputter {
+ extends ContentHandler with Indentable {
private val writer = new OutputStreamWriter(out)
- private var prefixMapping: NamespaceBinding = null
+ /**
+ * represents the currently active prefix mappings (i.e all mappings include
from parent element),
+ * which is usefully for doing lookups
+ */
+ private var activePrefixMapping: NamespaceBinding = null
+ /**
+ * represents only the prefix mapping of the current element. We use this to
generate the the prefix mappings
Review comment:
Duplicate "the the"
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -84,21 +180,42 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
writer.write(System.lineSeparator())
outputIndentation(writer)
}
+
+ // extracts attributes (or new mappings in the case where namespacePrefix
== true) from atts
+ val pmAttrRes = processAttributes(atts)
+ /**
+ * represents the attributes for the current element. We use this to
generate the attributes list
+ * within the start tag
+ */
+ val currentElementAttributes = pmAttrRes._1
+ currentElementPrefixMapping =
+ if (pmAttrRes._2 != null) {
+ currentElementPrefixMapping
+ } else {
+ NamespaceBinding(pmAttrRes._2.prefix, pmAttrRes._2.uri,
currentElementPrefixMapping)
+ }
+
+ // we always push, but activePrefixMapping won't always be populated with
new information
Review comment:
Might be worth pointing out that processAttributes may have mutated
activePrefixMapping so it's clear where that gets populated, since it's not
very obvious.
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala
##########
@@ -194,8 +198,10 @@ class DaffodilParseXMLReader(dp: DataProcessor) extends
DFDL.DaffodilParseXMLRea
*
* @return SAXInfosetOutputter object with or without blob Attributes set
*/
- private def createSAXInfosetOutputter(xmlReader: DaffodilParseXMLReader):
SAXInfosetOutputter = {
- val sioo = new SAXInfosetOutputter(xmlReader)
+ private def createSAXInfosetOutputter(xmlReader: DaffodilParseXMLReader,
+ namespacesFeature: Boolean,
+ namespacePrefixesFeature: Boolean): SAXInfosetOutputter = {
Review comment:
Minor inconsisency here. We pass in these two features to this funciton
(which are private members) but don't pass in the blob properties (which are
also private members). So it's not clear why some things are passed in and
other very simialr things aren't.
I guess really, we don't even need to pass in anything to this function
since everything it accesses is a private member of the class.
In fact, this function is only called in one place, and it's pretty small.
Thoughts on just moving this function logic to the on place it's called and
not having to worry about passing anything in? So the one ``parse`` function is
responsible for creating the infoset outputter and gett the properties/features
values?
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetOutputter.scala
##########
@@ -144,29 +149,25 @@ class SAXInfosetOutputter(xmlReader:
DFDL.DaffodilParseXMLReader)
}
private def doStartPrefixMapping(diElem: DIElement, contentHandler:
ContentHandler): Unit = {
- val nsbStart = diElem.erd.minimizedScope
- val nsbEnd = if (diElem.isRoot) {
- scala.xml.TopScope
- } else {
- diElem.diParent.erd.minimizedScope
- }
+ val (nsbStart: NamespaceBinding, nsbEnd: NamespaceBinding) =
getNsbStartAndEnd(diElem)
var n = nsbStart
+ var mappingsList: Seq[(String, String)] = Seq()
while (n != nsbEnd && n != null && n != scala.xml.TopScope) {
val prefix = if (n.prefix == null) "" else n.prefix
val uri = if (n.uri == null) "" else n.uri
- contentHandler.startPrefixMapping(prefix, uri)
+ // we generate a list here by prepending so can build the prefixMapping
in the
+ // same order as it is in minimizedScope when we call
startPrefixMapping; we do this because
+ // getPrefix in the contentHandler is order dependent
+ mappingsList +:= (prefix, uri)
n = n.parent
}
+ mappingsList.foreach{ case (prefix, uri) =>
+ contentHandler.startPrefixMapping(prefix, uri)
+ }
}
private def doEndPrefixMapping(diElem: DIElement, contentHandler:
ContentHandler): Unit = {
- val nsbStart = diElem.erd.minimizedScope
- val nsbEnd = if (diElem.isRoot) {
- scala.xml.TopScope
- } else {
- diElem.diParent.erd
- .minimizedScope
- }
+ val (nsbStart: NamespaceBinding, nsbEnd: NamespaceBinding) =
getNsbStartAndEnd(diElem)
var n = nsbStart
Review comment:
I don't think think this comment was resolved. Same issue exists above
in start prefix mapping and below in doAttributesPrefixMapping. Anytime we
iterate over a NamespaceBinding from a start to an end, we should be using
ne/eq.
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -66,13 +93,82 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
override def startPrefixMapping(prefix: String, uri: String): Unit = {
val _prefix = if (prefix == "") null else prefix
- prefixMapping = NamespaceBinding(_prefix, uri, prefixMapping)
+ activePrefixMapping = NamespaceBinding(_prefix, uri, activePrefixMapping)
+ currentElementPrefixMapping = NamespaceBinding(_prefix, uri,
currentElementPrefixMapping)
}
override def endPrefixMapping(prefix: String): Unit = {
// do nothing
}
+ /**
+ * Uses Attributes, which is passed in to the startElement callback, to
gather element attributes
+ * or in the case where namespacePrefixes is true, prefix mappings. Any new
prefix mappings are used
+ * to update the activePrefixMapping.
+ *
+ * @return tuple of Seq of string attribute=val pairings and a
NamespaceBinding of any extracted
+ * prefixMappings
+ */
+ def processAttributes(atts: Attributes): (Seq[String], NamespaceBinding) = {
+ var newPrefixMappings: NamespaceBinding = null
+ var attrPairings: Seq[String] = Seq()
+ var i = 0
+ var newMappingsList: Seq[(String, String)] = Seq()
+ while (i < atts.getLength) {
+ val qName = atts.getQName(i)
+ val attrVal = atts.getValue(i)
+ if (qName.nonEmpty) {
+ // if qName is populated; as in when namespacePrefixes == true
+ // describing namespace mapping
+ if (qName.startsWith("xmlns:") || qName == "xmlns") {
+ // get prefix
+ val pre = if (qName.startsWith("xmlns:")) {
+ qName.substring(6)
+ } else {
+ null
+ }
+ // we make this call to check if the prefix already exists. If it
doesn't exist, we get a
+ // Nope, so we can add it to our list, but if it does exist, nothing
happens and it doesn't
+ // get re-added and we instead proceed to the next item in Attributes
+ val maybeUri = XMLUtils.maybeURI(activePrefixMapping, pre)
+ if (maybeUri.isEmpty) { // not found yet, add it
+ newMappingsList +:= (pre, attrVal)
+ }
Review comment:
Is this correct? If XML attempts to redefine a prefix mapping, then we
don't consider it a new mapping? For example:
```xml
<pre:foo xmlns:pre="uri_one">
<pre:foo xmlns:pre="uri_two">test</pre:foo>
</pre:foo>
```
This is insane and I hope to never actually see XML like that, but If this
function was called to process the attributes of the inner foo, then we
wouldn't consider the ``uri_two`` mapping as a new mapping, right? And so that
would be lost? Should we just always unconditionally add this mapping to the
newMappingsList? Or maybe the goal here is to remove duplicate/unnecessary
mappings, in which case maybe the logic should be something like this?
```scala
if (maybeUri.isEmpty || maybeUri.get != attrVal)
```
So if the prefix isn't already mapped to anything, or it is but it's mapped
to a different URI, then we do output it?
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala
##########
@@ -144,7 +142,13 @@ class DaffodilParseXMLReader(dp: DataProcessor) extends
DFDL.DaffodilParseXMLRea
}
def parse(isdis: InputSourceDataInputStream): Unit = {
- val sio = createSAXInfosetOutputter(this)
+ val sio = createSAXInfosetOutputter(this,
+ saxNamespaceFeatureValue,
+ saxNamespacePrefixesFeatureValue)
+ // validate that the features are not false/false
+ if (!sio.namespacesFeature && !sio.namespacePrefixesFeature) {
+ throw new SAXException("Illegal State: Features cannot both be false")
Review comment:
Might be worth mentioning specifically which features, that might not
always be obvious to people not faimilar with SAX. Probably not too confusing
right now since Daffodil only supports two features, but if ever add more in
the future it could be helpful.
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -116,9 +256,19 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
}
}
writer.write("</")
- writer.write(qName)
+ outputTagName(uri, localName, qName)
writer.write(">")
outputNewlineStack.pop()
+
+ // throw out current prefix mapping context as we're done with it
+ if (!activePrefixMappingContextStack.isEmpty)
activePrefixMappingContextStack.pop
Review comment:
Shouldn't this condition always be true? I think we always push onto
this stack when startElement is called, and so there should always be an
element to pop when endELement is called. I guess the only time that wouldn't
be true is if the XMLReader messed up and called endElement without calling a
matching startElement but that should be an error. Perhaps this condition that
the stack isn't empty should just be changed to an Assert.invariant?
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -45,7 +72,7 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
def reset(): Unit = {
resetIndentation()
writer.flush()
- prefixMapping = null
+ activePrefixMapping = null
Review comment:
Should we also reset ``currentElementPrefixMapping`` to null? I think
theortetcially it should always be null when finished, but maybe at somepoint
an exception could be thrown, in which case the state might be incorrect and
reset() would need to fix that.
Do we also need to empty the ``activePrefixMappingContextStack`` for the
same reason?
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -84,21 +180,42 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
writer.write(System.lineSeparator())
outputIndentation(writer)
}
+
+ // extracts attributes (or new mappings in the case where namespacePrefix
== true) from atts
+ val pmAttrRes = processAttributes(atts)
+ /**
+ * represents the attributes for the current element. We use this to
generate the attributes list
+ * within the start tag
+ */
+ val currentElementAttributes = pmAttrRes._1
+ currentElementPrefixMapping =
+ if (pmAttrRes._2 != null) {
+ currentElementPrefixMapping
+ } else {
+ NamespaceBinding(pmAttrRes._2.prefix, pmAttrRes._2.uri,
currentElementPrefixMapping)
+ }
Review comment:
Is this right? Say ``processAttributes`` does process some xmlns:pre
mappings. In that case, it will return a ``NamespaceBinding`` of those local
prefix mappings, and ``pmAttrRes._2`` will not be null and we fall into the
first block. But if we got local mappings, dont' we want the
``currentELementPrefixMapping`` to be set to those, i.e. ``pmAttrrRes._2``?
And in the case when ``pmAttrsRes._2`` is null, then we fall into the else
block and then try to get .prefix and .uri from that, but isn't it null? Won't
that be a NullPointerException?
Why don't we just unconditionally set ``currentElementPrefixMapping =
pmAttrRes._2``? Doesn't that represent the prefix mappings procesed for just
this element?
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala
##########
@@ -144,7 +142,13 @@ class DaffodilParseXMLReader(dp: DataProcessor) extends
DFDL.DaffodilParseXMLRea
}
def parse(isdis: InputSourceDataInputStream): Unit = {
- val sio = createSAXInfosetOutputter(this)
+ val sio = createSAXInfosetOutputter(this,
Review comment:
Unrelated, but I noticed above the setProperty function just calls
value.asInstanceOf. If value isn't an instance of the right thing, I think that
will throw a ClassCastException? Should that first check if it's the right
instance, and then throw SAXNotRecognizedException or SAXNotSupportedException?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]