stevedlawrence commented on a change in pull request #478:
URL: https://github.com/apache/daffodil/pull/478#discussion_r616625305



##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -22,17 +22,44 @@ import java.io.OutputStreamWriter
 
 import scala.xml.NamespaceBinding
 
-import org.apache.daffodil.infoset.XMLInfosetOutputter
+import org.apache.daffodil.exceptions.Assert
 import org.apache.daffodil.util.Indentable
+import org.apache.daffodil.util.MStackOf
 import org.apache.daffodil.util.MStackOfBoolean
+import org.apache.daffodil.util.Maybe
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.xml.XMLUtils
 import org.xml.sax.Attributes
 import org.xml.sax.ContentHandler
 import org.xml.sax.Locator
 
+/**
+ * ContentHandler implementation that receives SAX events from 
DaffodilParseXMLReader to output
+ * XML to the specified outputStream. Depending on the features set in the 
XMLReader, it uses either
+ * prefixMappings or attributes to determine the prefix of the XML element. 
This means it will always
+ * try to find and print a prefix if an element has a URI.
+ *
+ * @param out outputStream object to write generated XML to
+ * @param pretty boolean to pretty print XML if true, or not if false
+ */
 class DaffodilParseOutputStreamContentHandler(out: OutputStream, pretty: 
Boolean = false)
-  extends ContentHandler with Indentable with XMLInfosetOutputter {
+  extends ContentHandler with Indentable {
   private val writer = new OutputStreamWriter(out)
-  private var prefixMapping: NamespaceBinding = null
+  /**
+   * represents the currently active prefix mappings (i.e all mappings include 
from parent element),
+   * which is usefully for doing lookups
+   */
+  private var activePrefixMapping: NamespaceBinding = null
+  /**
+   * represents only the prefix mapping of the current element. We use this to 
generate the the prefix mappings

Review comment:
       Duplicate "the the"

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -84,21 +180,42 @@ class DaffodilParseOutputStreamContentHandler(out: 
OutputStream, pretty: Boolean
       writer.write(System.lineSeparator())
       outputIndentation(writer)
     }
+
+    // extracts attributes (or new mappings in the case where namespacePrefix 
== true) from atts
+    val pmAttrRes = processAttributes(atts)
+    /**
+     * represents the attributes for the current element. We use this to 
generate the attributes list
+     * within the start tag
+     */
+    val currentElementAttributes = pmAttrRes._1
+    currentElementPrefixMapping =
+      if (pmAttrRes._2 != null) {
+        currentElementPrefixMapping
+      } else {
+        NamespaceBinding(pmAttrRes._2.prefix, pmAttrRes._2.uri, 
currentElementPrefixMapping)
+      }
+
+    // we always push, but activePrefixMapping won't always be populated with 
new information

Review comment:
       Might be worth pointing out that processAttributes may have mutated 
activePrefixMapping so it's clear where that gets populated, since it's not 
very obvious.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala
##########
@@ -194,8 +198,10 @@ class DaffodilParseXMLReader(dp: DataProcessor) extends 
DFDL.DaffodilParseXMLRea
    *
    * @return SAXInfosetOutputter object with or without blob Attributes set
    */
-  private def createSAXInfosetOutputter(xmlReader: DaffodilParseXMLReader): 
SAXInfosetOutputter = {
-    val sioo = new SAXInfosetOutputter(xmlReader)
+  private def createSAXInfosetOutputter(xmlReader: DaffodilParseXMLReader,
+    namespacesFeature: Boolean,
+    namespacePrefixesFeature: Boolean): SAXInfosetOutputter = {

Review comment:
       Minor inconsisency here. We pass in these two features to this funciton 
(which are private members) but don't pass in the blob properties (which are 
also private members). So it's not clear why some things are passed in and 
other very simialr things aren't.
   
   I guess really, we don't even need to pass in anything to this function 
since everything it accesses is a private member of the class.
   
   In fact, this function is only called in one place, and it's pretty small. 
Thoughts on just moving this function  logic to the on place it's called and 
not having to worry about passing anything in? So the one ``parse`` function is 
responsible for creating the infoset outputter and gett the properties/features 
values?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetOutputter.scala
##########
@@ -144,29 +149,25 @@ class SAXInfosetOutputter(xmlReader: 
DFDL.DaffodilParseXMLReader)
   }
 
   private def doStartPrefixMapping(diElem: DIElement, contentHandler: 
ContentHandler): Unit = {
-    val nsbStart = diElem.erd.minimizedScope
-    val nsbEnd = if (diElem.isRoot) {
-      scala.xml.TopScope
-    } else {
-      diElem.diParent.erd.minimizedScope
-    }
+    val (nsbStart: NamespaceBinding, nsbEnd: NamespaceBinding) = 
getNsbStartAndEnd(diElem)
     var n = nsbStart
+    var mappingsList: Seq[(String, String)] = Seq()
     while (n != nsbEnd && n != null && n != scala.xml.TopScope) {
       val prefix = if (n.prefix == null) "" else n.prefix
       val uri = if (n.uri == null) "" else n.uri
-      contentHandler.startPrefixMapping(prefix, uri)
+      // we generate a list here by prepending so can build the prefixMapping 
in the
+      // same order as it is in minimizedScope when we call 
startPrefixMapping; we do this because
+      // getPrefix in the contentHandler is order dependent
+      mappingsList +:= (prefix, uri)
       n = n.parent
     }
+    mappingsList.foreach{ case (prefix, uri) =>
+      contentHandler.startPrefixMapping(prefix, uri)
+    }
   }
 
   private def doEndPrefixMapping(diElem: DIElement, contentHandler: 
ContentHandler): Unit = {
-    val nsbStart = diElem.erd.minimizedScope
-    val nsbEnd = if (diElem.isRoot) {
-      scala.xml.TopScope
-    } else {
-      diElem.diParent.erd
-        .minimizedScope
-    }
+    val (nsbStart: NamespaceBinding, nsbEnd: NamespaceBinding) = 
getNsbStartAndEnd(diElem)
     var n = nsbStart

Review comment:
       I don't think think this comment was resolved. Same issue exists above 
in start prefix mapping and below in doAttributesPrefixMapping. Anytime we 
iterate over a NamespaceBinding from a start to an end, we should be using 
ne/eq.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -66,13 +93,82 @@ class DaffodilParseOutputStreamContentHandler(out: 
OutputStream, pretty: Boolean
 
   override def startPrefixMapping(prefix: String, uri: String): Unit = {
     val _prefix = if (prefix == "") null else prefix
-    prefixMapping = NamespaceBinding(_prefix, uri, prefixMapping)
+    activePrefixMapping = NamespaceBinding(_prefix, uri, activePrefixMapping)
+    currentElementPrefixMapping = NamespaceBinding(_prefix, uri, 
currentElementPrefixMapping)
   }
 
   override def endPrefixMapping(prefix: String): Unit = {
     // do nothing
   }
 
+  /**
+   * Uses Attributes, which is passed in to the startElement callback, to 
gather element attributes
+   * or in the case where namespacePrefixes is true, prefix mappings. Any new 
prefix mappings are used
+   * to update the activePrefixMapping.
+   *
+   * @return tuple of Seq of string attribute=val pairings and a 
NamespaceBinding of any extracted
+   *         prefixMappings
+   */
+  def processAttributes(atts: Attributes): (Seq[String], NamespaceBinding) = {
+    var newPrefixMappings: NamespaceBinding = null
+    var attrPairings: Seq[String] = Seq()
+    var i = 0
+    var newMappingsList: Seq[(String, String)] = Seq()
+    while (i < atts.getLength) {
+      val qName = atts.getQName(i)
+      val attrVal =  atts.getValue(i)
+      if (qName.nonEmpty) {
+        // if qName is populated; as in when namespacePrefixes == true
+        // describing namespace mapping
+        if (qName.startsWith("xmlns:") || qName == "xmlns") {
+          // get prefix
+          val pre = if (qName.startsWith("xmlns:")) {
+            qName.substring(6)
+          } else {
+            null
+          }
+          // we make this call to check if the prefix already exists. If it 
doesn't exist, we get a
+          // Nope, so we can add it to our list, but if it does exist, nothing 
happens and it doesn't
+          // get re-added and we instead proceed to the next item in Attributes
+          val maybeUri = XMLUtils.maybeURI(activePrefixMapping, pre)
+          if (maybeUri.isEmpty) { // not found yet, add it
+            newMappingsList +:= (pre, attrVal)
+          }

Review comment:
       Is this correct? If XML attempts to redefine a prefix mapping, then we 
don't consider it a new mapping? For example:
   ```xml
   <pre:foo xmlns:pre="uri_one">
     <pre:foo xmlns:pre="uri_two">test</pre:foo>
   </pre:foo>
   ```
   This is insane and I hope to never actually see XML like that, but If this 
function was called to process the attributes of the inner foo, then we 
wouldn't consider the ``uri_two`` mapping as a new mapping, right? And so that 
would be lost? Should we just always unconditionally add this mapping to the 
newMappingsList? Or maybe the goal here is to remove duplicate/unnecessary 
mappings, in which case maybe the logic should be something like this?
   ```scala
   if (maybeUri.isEmpty || maybeUri.get != attrVal)
   ```
   So if the prefix isn't already mapped to anything, or it is but it's mapped 
to a different URI, then we do output it?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala
##########
@@ -144,7 +142,13 @@ class DaffodilParseXMLReader(dp: DataProcessor) extends 
DFDL.DaffodilParseXMLRea
   }
 
   def parse(isdis: InputSourceDataInputStream): Unit = {
-    val sio = createSAXInfosetOutputter(this)
+    val sio = createSAXInfosetOutputter(this,
+      saxNamespaceFeatureValue,
+      saxNamespacePrefixesFeatureValue)
+    // validate that the features are not false/false
+    if (!sio.namespacesFeature && !sio.namespacePrefixesFeature) {
+      throw new SAXException("Illegal State: Features cannot both be false")

Review comment:
       Might be worth mentioning specifically which features, that might not 
always be obvious to people not faimilar with SAX. Probably not too confusing 
right now since Daffodil only supports two features, but if ever add more in 
the future it could be helpful.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -116,9 +256,19 @@ class DaffodilParseOutputStreamContentHandler(out: 
OutputStream, pretty: Boolean
       }
     }
     writer.write("</")
-    writer.write(qName)
+    outputTagName(uri, localName, qName)
     writer.write(">")
     outputNewlineStack.pop()
+
+    // throw out current prefix mapping context as we're done with it
+    if (!activePrefixMappingContextStack.isEmpty) 
activePrefixMappingContextStack.pop

Review comment:
       Shouldn't this condition always be true? I think we always push onto 
this stack when startElement is called, and so there should always be an 
element to pop when endELement is called. I guess the only time that wouldn't 
be true is if the XMLReader messed up and called endElement without calling a 
matching startElement but that should be an error. Perhaps this condition that 
the stack isn't empty should just be changed to an Assert.invariant?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -45,7 +72,7 @@ class DaffodilParseOutputStreamContentHandler(out: 
OutputStream, pretty: Boolean
   def reset(): Unit = {
     resetIndentation()
     writer.flush()
-    prefixMapping = null
+    activePrefixMapping = null

Review comment:
       Should we also reset ``currentElementPrefixMapping`` to null? I think 
theortetcially it should always be null when finished, but maybe at somepoint 
an exception could be thrown, in which case the state might be incorrect and 
reset() would need to fix that.
   
   Do we also need to empty the ``activePrefixMappingContextStack`` for the 
same reason?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -84,21 +180,42 @@ class DaffodilParseOutputStreamContentHandler(out: 
OutputStream, pretty: Boolean
       writer.write(System.lineSeparator())
       outputIndentation(writer)
     }
+
+    // extracts attributes (or new mappings in the case where namespacePrefix 
== true) from atts
+    val pmAttrRes = processAttributes(atts)
+    /**
+     * represents the attributes for the current element. We use this to 
generate the attributes list
+     * within the start tag
+     */
+    val currentElementAttributes = pmAttrRes._1
+    currentElementPrefixMapping =
+      if (pmAttrRes._2 != null) {
+        currentElementPrefixMapping
+      } else {
+        NamespaceBinding(pmAttrRes._2.prefix, pmAttrRes._2.uri, 
currentElementPrefixMapping)
+      }

Review comment:
       Is this right? Say ``processAttributes`` does process some xmlns:pre 
mappings. In that case, it will return a ``NamespaceBinding`` of those local 
prefix mappings, and ``pmAttrRes._2`` will not be null and we fall into the 
first block. But if we got local mappings, dont' we want the 
``currentELementPrefixMapping`` to be set to those, i.e. ``pmAttrrRes._2``?
   
   And in the case when ``pmAttrsRes._2`` is null, then we fall into the else 
block and then try to get .prefix and .uri from that, but isn't it null? Won't 
that be a NullPointerException?
   
   Why don't we just unconditionally set ``currentElementPrefixMapping = 
pmAttrRes._2``? Doesn't that represent the prefix mappings procesed for just 
this element?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala
##########
@@ -144,7 +142,13 @@ class DaffodilParseXMLReader(dp: DataProcessor) extends 
DFDL.DaffodilParseXMLRea
   }
 
   def parse(isdis: InputSourceDataInputStream): Unit = {
-    val sio = createSAXInfosetOutputter(this)
+    val sio = createSAXInfosetOutputter(this,

Review comment:
       Unrelated, but I noticed above the setProperty function just calls 
value.asInstanceOf. If value isn't an instance of the right thing, I think that 
will throw a ClassCastException? Should that first check if it's the right 
instance, and then throw SAXNotRecognizedException or SAXNotSupportedException?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to