stevedlawrence commented on a change in pull request #436:
URL: https://github.com/apache/incubator-daffodil/pull/436#discussion_r505581860



##########
File path: daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala
##########
@@ -779,24 +782,32 @@ object Main extends Logging {
           }
         }
       }
+      case "sax" => data

Review comment:
       This made me realize that for unparsing with the CLI, we read the entire 
infoset into a byte array. We really shouldn't be doing this as this makes it 
impossible to unparse infosets that are larger than memory using the CLI. You 
don't have to fix this as part of this change though--this isn't an issue with 
your SAX implementation. I've creatd 
[DAFFODIL-2421](https://issues.apache.org/jira/browse/DAFFODIL-2412) to track 
this issue.

##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
##########
@@ -424,6 +424,9 @@ object XMLUtils {
   val DAFFODIL_SAX_URN_BLOBPREFIX: String = DAFFODIL_SAX_URN_ROOT + 
":BlobPrefix"
   val DAFFODIL_SAX_URN_BLOBSUFFIX: String = DAFFODIL_SAX_URN_ROOT + 
":BlobSuffix"
 
+  val DAFFODIL_SAX_NAMESPACE_FEATURE = "http://xml.org/sax/features/namespaces";
+  val DAFFODIL_SAX_NAMESPACE_PREFIX_FEATURE = 
"http://xml.org/sax/features/namespace-prefixes";
+

Review comment:
       These namespaces aren't specific to Daffodil, which the ``DAFFODIL_`` 
prefix sort of implies. I thought these were enabling some daffodil specific 
behavior at first. Dropping the prefix I think would make it more clear.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/api/DFDLParserUnparser.scala
##########
@@ -202,6 +214,70 @@ object DFDL {
     def parse(ab: Array[Byte]): Unit
   }
 
+  trait DaffodilInputContentHandler extends org.xml.sax.ContentHandler with 
ProducerCoroutine {
+    def setUnparseResult(ur: UnparseResult): Unit

Review comment:
       Is this setter needed? We don't want external users to be able to change 
the UnparseResult. 

##########
File path: daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala
##########
@@ -779,24 +782,32 @@ object Main extends Logging {
           }
         }
       }
+      case "sax" => data
     }
   }
 
-  def getInfosetInputter(infosetType: String, anyRef: AnyRef): InfosetInputter 
= {
+  def getInfosetInputter(
+    infosetType: String,
+    anyRef: AnyRef,
+    processor: Option[DFDL.DataProcessor] = None,
+    outChannel: Option[DFDL.Output] = None): Either[InfosetInputter, 
DFDL.DaffodilInputContentHandler] = {
     infosetType match {
       case "xml" => {
         val is = new ByteArrayInputStream(anyRef.asInstanceOf[Array[Byte]])
-        new XMLTextInfosetInputter(is)
+        Left(new XMLTextInfosetInputter(is))
       }
-      case "scala-xml" => new 
ScalaXMLInfosetInputter(anyRef.asInstanceOf[scala.xml.Node])
+      case "scala-xml" => Left(new 
ScalaXMLInfosetInputter(anyRef.asInstanceOf[scala.xml.Node]))
       case "json" => {
         val is = new ByteArrayInputStream(anyRef.asInstanceOf[Array[Byte]])
-        new JsonInfosetInputter(is)
+        Left(new JsonInfosetInputter(is))
       }
-      case "jdom" => new 
JDOMInfosetInputter(anyRef.asInstanceOf[org.jdom2.Document])
+      case "jdom" => Left(new 
JDOMInfosetInputter(anyRef.asInstanceOf[org.jdom2.Document]))
       case "w3cdom" => {
         val tl = anyRef.asInstanceOf[ThreadLocal[org.w3c.dom.Document]]
-        new W3CDOMInfosetInputter(tl.get)
+        Left(new W3CDOMInfosetInputter(tl.get))
+      }
+      case "sax" => {
+        Right(new DaffodilInputContentHandler(processor.get, outChannel.get))

Review comment:
       Rather thant creating a DaffodilInputContentHandler, it probably makes 
sense to use the dp.newContentHandlerInstance function you've created.
   

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/api/DFDLParserUnparser.scala
##########
@@ -202,6 +214,70 @@ object DFDL {
     def parse(ab: Array[Byte]): Unit
   }
 
+  trait DaffodilInputContentHandler extends org.xml.sax.ContentHandler with 
ProducerCoroutine {
+    def setUnparseResult(ur: UnparseResult): Unit
+    def getUnparseResult: UnparseResult
+  }
+
+  case class DaffodilUnparseErrorSAXException(unparseResult: UnparseResult)
+    extends 
org.xml.sax.SAXException(unparseResult.getDiagnostics.head.getMessage(),
+      unparseResult.getDiagnostics.filter(_.isError).head) {

Review comment:
       I'm concerned we're going to lose diagnostics since this only gets the 
head. I would suggest just extending the ``SAXException(s: String)`` 
contstructor and just provide your ``mkString("\n")`` message below. I don't 
think the extra Exception parameter adds anything, and I think you also 
wouldn't need to override toString.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilInputContentHandler
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+class SAXInfosetInputter(ch: DaffodilInputContentHandler, dp: 
DFDL.DataProcessor, output: DFDL
+.Output) extends InfosetInputter with DFDL.ConsumerCoroutine {
+
+  var isDone = false
+  var currentEvent: DFDL.SaxInfosetEvent = _
+  var nextEvent: DFDL.SaxInfosetEvent = _
+  val contentHandlerEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+
+  override def getEventType(): InfosetInputterEventType = 
currentEvent.eventType.orNull
+
+  override def getLocalName(): String = currentEvent.localName.orNull
+
+  override def getNamespaceURI(): String = currentEvent.namespaceURI.orNull
+
+  override def getSimpleText(primType: NodeInfo.Kind): String = {
+    val res = if (currentEvent.simpleText.isDefined) {
+      currentEvent.simpleText.get
+    } else (
+      throw new NonTextFoundInSimpleContentException(getLocalName())
+    )
+    if (primType.isInstanceOf[NodeInfo.AnyURI.Kind] && res.nonEmpty) {
+      try {
+        val uri = new URI(res)
+        if (!uri.getPath.startsWith("/")) {
+          // TDML files must allow blob URI's to be relative, but Daffodil
+          // requires them to be absolute with a scheme. So search for the file
+          // using TDML semantics and convert to an absolute URI
+          val abs = Misc.searchResourceOption(uri.getPath, None)
+          abs.get.toString
+        } else {
+          res
+        }
+      } catch {
+        case _: URISyntaxException => res
+      }

Review comment:
       Hmm, I sort of don't like this. InfosetIInputters shouldn't have to 
worry about handling aboslute URI's in data. That's really someone only the 
TDMLInfosetInputter should have to worry about. But, I guess the 
TDMLInfosetInputter isn't involved in this at all so can't do that.
   
   Perhaps we add a funtion to the ContentHandler that sets a flag to enable 
this capability. The TDML Runner can call this, but by default it's off so the 
normal behavior is the same as all oher InfosetInputters?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilInputContentHandler.scala
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+class DaffodilInputContentHandler(dp: DFDL.DataProcessor, output: DFDL.Output)
+  extends DFDL.DaffodilInputContentHandler {
+  private val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private var infosetEvent: DFDL.SaxInfosetEvent = _
+  private val characterData = new StringBuilder()
+
+  def setUnparseResult(ur: DFDL.UnparseResult): Unit = {
+    unparseResult = ur
+  }
+
+  def getUnparseResult: DFDL.UnparseResult = unparseResult
+
+  override def setDocumentLocator(locator: Locator): Unit = {
+    // do nothing
+  }
+
+  override def startDocument(): Unit = {
+    infosetEvent = new DFDL.SaxInfosetEvent
+    infosetEvent.eventType = One(StartDocument)
+    sendToInputter
+  }
+
+  override def endDocument(): Unit = {
+    if (infosetEvent == null) {
+      infosetEvent = new DFDL.SaxInfosetEvent
+    }
+    infosetEvent.eventType = One(EndDocument)
+    sendToInputter
+  }
+
+  // TODO not sure what to do with the prefix mappings, technically we don't 
need them since
+  // we have the attributes with startElement to look up any prefixes we need
+  override def startPrefixMapping(prefix: String, uri: String): Unit = {
+    val pre = if (prefix == "") null else prefix
+    infosetEvent.prefixMapping = One(NamespaceBinding(pre, uri, 
infosetEvent.prefixMapping
+      .orNull))
+  }

Review comment:
       Yeah, seem like we don't need it. Unless there are XMLReaders don't 
prvide certain parametrs in startELement? LIke maybe uri is always null, and 
they expect you to figure out the namespace uri based on the qName string?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilInputContentHandler
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+class SAXInfosetInputter(ch: DaffodilInputContentHandler, dp: 
DFDL.DataProcessor, output: DFDL
+.Output) extends InfosetInputter with DFDL.ConsumerCoroutine {
+
+  var isDone = false
+  var currentEvent: DFDL.SaxInfosetEvent = _
+  var nextEvent: DFDL.SaxInfosetEvent = _
+  val contentHandlerEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+
+  override def getEventType(): InfosetInputterEventType = 
currentEvent.eventType.orNull
+
+  override def getLocalName(): String = currentEvent.localName.orNull
+
+  override def getNamespaceURI(): String = currentEvent.namespaceURI.orNull
+
+  override def getSimpleText(primType: NodeInfo.Kind): String = {
+    val res = if (currentEvent.simpleText.isDefined) {
+      currentEvent.simpleText.get
+    } else (
+      throw new NonTextFoundInSimpleContentException(getLocalName())
+    )
+    if (primType.isInstanceOf[NodeInfo.AnyURI.Kind] && res.nonEmpty) {
+      try {
+        val uri = new URI(res)
+        if (!uri.getPath.startsWith("/")) {
+          // TDML files must allow blob URI's to be relative, but Daffodil
+          // requires them to be absolute with a scheme. So search for the file
+          // using TDML semantics and convert to an absolute URI
+          val abs = Misc.searchResourceOption(uri.getPath, None)
+          abs.get.toString
+        } else {
+          res
+        }
+      } catch {
+        case _: URISyntaxException => res
+      }
+    } else {
+      if (primType.isInstanceOf[NodeInfo.String.Kind]) {
+        val remapped = XMLUtils.remapPUAToXMLIllegalCharacters(res)
+        remapped
+      } else {
+        res
+      }
+    }
+  }
+
+  override def isNilled(): MaybeBoolean = {
+    val _isNilled = if (currentEvent.nilValue.isDefined) {
+      val nilValue = currentEvent.nilValue.get
+      if (nilValue == "true" || nilValue == "1") {
+        MaybeBoolean(true)
+      } else if (nilValue == "false" || nilValue == "0") {
+        MaybeBoolean(false)
+      } else {
+        throw new InvalidInfosetException("xsi:nil property is not a valid 
boolean: '" + nilValue +
+          "' for element " + getLocalName())
+      }
+    } else {
+      MaybeBoolean.Nope
+    }
+    _isNilled
+  }
+
+  //called without changing any state
+  override def hasNext(): Boolean = {
+    if (isDone) {
+      false
+    } else {
+      val event = this.resume(ch, Try(currentEvent))
+      nextEvent = copyEvent(event.getOrElse(null))

Review comment:
       Can we just do ``nextEvent = event.getOrElse(null)`` and avoid the copy? 
Or does the ContentHandler keep a single event and just modify it?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilInputContentHandler.scala
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+class DaffodilInputContentHandler(dp: DFDL.DataProcessor, output: DFDL.Output)
+  extends DFDL.DaffodilInputContentHandler {
+  private val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private var infosetEvent: DFDL.SaxInfosetEvent = _

Review comment:
       If we're copying events, this can probably just be a val. That way there 
would literally only be three instances of the SaxInfosetEvent: curEvent, 
nextEvent, and infosetEvent, and then we copy fields between them. 
   
   Might be worth adding some scala doc at the top of the 
DaffodilInputContentHandler and SAXInfosetInputer explaining how these two 
classes interact with one another. There is a somewhat complex interaction 
going on so adding comments about how this all work is very helpful.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilInputContentHandler.scala
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+class DaffodilInputContentHandler(dp: DFDL.DataProcessor, output: DFDL.Output)
+  extends DFDL.DaffodilInputContentHandler {
+  private val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private var infosetEvent: DFDL.SaxInfosetEvent = _
+  private val characterData = new StringBuilder()
+
+  def setUnparseResult(ur: DFDL.UnparseResult): Unit = {
+    unparseResult = ur
+  }
+
+  def getUnparseResult: DFDL.UnparseResult = unparseResult
+
+  override def setDocumentLocator(locator: Locator): Unit = {
+    // do nothing
+  }
+
+  override def startDocument(): Unit = {
+    infosetEvent = new DFDL.SaxInfosetEvent
+    infosetEvent.eventType = One(StartDocument)
+    sendToInputter
+  }
+
+  override def endDocument(): Unit = {
+    if (infosetEvent == null) {
+      infosetEvent = new DFDL.SaxInfosetEvent
+    }
+    infosetEvent.eventType = One(EndDocument)
+    sendToInputter
+  }
+
+  // TODO not sure what to do with the prefix mappings, technically we don't 
need them since
+  // we have the attributes with startElement to look up any prefixes we need
+  override def startPrefixMapping(prefix: String, uri: String): Unit = {
+    val pre = if (prefix == "") null else prefix
+    infosetEvent.prefixMapping = One(NamespaceBinding(pre, uri, 
infosetEvent.prefixMapping
+      .orNull))
+  }
+
+  override def endPrefixMapping(prefix: String): Unit = {
+    // do nothing
+  }
+
+  override def startElement(uri: String, localName: String, qName: String, 
atts: Attributes): Unit = {
+    // we need to check if the characters info is all whitespace, if not, it 
is an error
+    if (characterData.nonEmpty && characterData.toString().trim.nonEmpty) {
+      throw new IllegalContentWhereEventExpected("Non-whitespace characters in 
complex " +
+        "Element: " + characterData.toString())
+    } else {
+      // reset since it was whitespace only
+      characterData.setLength(0)
+    }
+
+    if (infosetEvent == null) infosetEvent = new DFDL.SaxInfosetEvent
+    else if (!infosetEvent.isEmpty && infosetEvent.localName.isDefined) {
+      // we started another element while we were in the process of building 
another
+      // this most likely means the first element was complex, either ways, it 
is ready
+      // for the inputter queue
+      sendToInputter

Review comment:
       > most likely means
   
   I think this definitely means, right? I'm not sure I understand the isEmpty 
or localName.isDefined checks? can you explain those?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilInputContentHandler
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+class SAXInfosetInputter(ch: DaffodilInputContentHandler, dp: 
DFDL.DataProcessor, output: DFDL
+.Output) extends InfosetInputter with DFDL.ConsumerCoroutine {

Review comment:
       Fix the wrapping here.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/api/DFDLParserUnparser.scala
##########
@@ -185,6 +192,11 @@ object DFDL {
      */
     def newXMLReaderInstance: DaffodilXMLReader
 
+    /**
+     * Creates a new instance of XMLReader for SAX Unparsing

Review comment:
       Comment is incorrect, this creates a ContentHandler.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilInputContentHandler
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+class SAXInfosetInputter(ch: DaffodilInputContentHandler, dp: 
DFDL.DataProcessor, output: DFDL
+.Output) extends InfosetInputter with DFDL.ConsumerCoroutine {
+
+  var isDone = false
+  var currentEvent: DFDL.SaxInfosetEvent = _
+  var nextEvent: DFDL.SaxInfosetEvent = _
+  val contentHandlerEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+
+  override def getEventType(): InfosetInputterEventType = 
currentEvent.eventType.orNull
+
+  override def getLocalName(): String = currentEvent.localName.orNull
+
+  override def getNamespaceURI(): String = currentEvent.namespaceURI.orNull
+
+  override def getSimpleText(primType: NodeInfo.Kind): String = {
+    val res = if (currentEvent.simpleText.isDefined) {
+      currentEvent.simpleText.get
+    } else (
+      throw new NonTextFoundInSimpleContentException(getLocalName())
+    )
+    if (primType.isInstanceOf[NodeInfo.AnyURI.Kind] && res.nonEmpty) {
+      try {
+        val uri = new URI(res)
+        if (!uri.getPath.startsWith("/")) {
+          // TDML files must allow blob URI's to be relative, but Daffodil
+          // requires them to be absolute with a scheme. So search for the file
+          // using TDML semantics and convert to an absolute URI
+          val abs = Misc.searchResourceOption(uri.getPath, None)
+          abs.get.toString
+        } else {
+          res
+        }
+      } catch {
+        case _: URISyntaxException => res
+      }
+    } else {
+      if (primType.isInstanceOf[NodeInfo.String.Kind]) {
+        val remapped = XMLUtils.remapPUAToXMLIllegalCharacters(res)
+        remapped
+      } else {
+        res
+      }
+    }
+  }
+
+  override def isNilled(): MaybeBoolean = {
+    val _isNilled = if (currentEvent.nilValue.isDefined) {
+      val nilValue = currentEvent.nilValue.get
+      if (nilValue == "true" || nilValue == "1") {
+        MaybeBoolean(true)
+      } else if (nilValue == "false" || nilValue == "0") {
+        MaybeBoolean(false)
+      } else {
+        throw new InvalidInfosetException("xsi:nil property is not a valid 
boolean: '" + nilValue +
+          "' for element " + getLocalName())
+      }
+    } else {
+      MaybeBoolean.Nope
+    }
+    _isNilled
+  }
+
+  //called without changing any state
+  override def hasNext(): Boolean = {
+    if (isDone) {
+      false
+    } else {
+      val event = this.resume(ch, Try(currentEvent))
+      nextEvent = copyEvent(event.getOrElse(null))
+      nextEvent != null
+    }
+  }
+
+  // done with this current event, move on to the next event
+  override def next(): Unit = {
+    currentEvent = copyEvent(nextEvent)
+    if (currentEvent.eventType.contains(EndDocument)) {
+      isDone = true
+    }

Review comment:
       I think this should have the logic that hasNext has. When we ask for the 
next() even, we also need to know if it hasNext() as well. So I would expect 
this to be something like
   ```suggestion
       currentEvent = copyEvent(nextEvent)
       if (currentEvent.eventType.contains(EndDocument)) {
         isDone = true
       } else {
         val event = this.resume(...)
         nextEvent = copyEvent(event)
       }
   ```
   And then hasNext just becomes something like
   ```scala
   nextEvent == null
   ```
   
   I'm also wondering if we should consider not copying and just moving things? 
e.g.
   ```
   currentEvent = nextEvent
   nextEvent = this.resume(...).getOrElse(null)
   ```
   I guess the question is do we want to avoid allocations or copies of data? 
I'm not sure at which point one is favored over the other.

##########
File path: 
daffodil-tdml-processor/src/main/scala/org/apache/daffodil/tdml/processor/DaffodilTDMLDFDLProcessor.scala
##########
@@ -311,15 +323,62 @@ class DaffodilTDMLDFDLProcessor private (private var dp: 
DataProcessor) extends
     xri.parse(sis)
     val actual = dp.parse(dis, outputter)
 
-
     if (!actual.isError && !errorHandler.isError) {
-      verifySameParseOutput(outputter, outputStream)
+      verifySameParseOutput(outputter, saxOutputStream)
     }
-    verifySameDiagnostics(actual, errorHandler)
+    val dpParseDiag = actual.getDiagnostics.map(_.getMessage())
+    val saxParseDiag = errorHandler.getDiagnostics.map(_.getMessage())
+    verifySameDiagnostics(dpParseDiag, saxParseDiag)
 
     new DaffodilTDMLParseResult(actual, outputter)
   }
 
+  def doUnparseWithBothApis(dpInputter: TDMLInfosetInputter, saxInputStream: 
java.io.InputStream,
+    dpOutputStream: java.io.OutputStream): DaffodilTDMLUnparseResult = {
+
+    val dpOutputChannel = java.nio.channels.Channels.newChannel(dpOutputStream)
+    val saxOutputStream = new ByteArrayOutputStream
+    val saxOutputChannel = 
java.nio.channels.Channels.newChannel(saxOutputStream)
+    val errorHandler = new DaffodilTDMLSAXErrorHandler()
+    val inputContentHandler = new DaffodilInputContentHandler(dp, 
saxOutputChannel)
+    val xmlReader = SAXParserFactory.newInstance.newSAXParser.getXMLReader
+    xmlReader.setContentHandler(inputContentHandler)
+    xmlReader.setErrorHandler(errorHandler)
+    xmlReader.setFeature(XMLUtils.DAFFODIL_SAX_NAMESPACE_FEATURE, true)
+    xmlReader.setFeature(XMLUtils.DAFFODIL_SAX_NAMESPACE_PREFIX_FEATURE, true)
+
+    val actualDP = dp.unparse(dpInputter, 
dpOutputChannel).asInstanceOf[UnparseResult]
+    dpOutputChannel.close()
+    // kick off SAX Unparsing
+    try {
+      xmlReader.parse(new InputSource(saxInputStream))
+    } catch {
+      case e: DaffodilUnhandledSAXException =>
+        // In the case of an unexpected errors, catch and throw as 
TDMLException
+        throw TDMLException("Unexpected error during SAX Unparse:" + e, None)
+      case _: DaffodilUnparseErrorSAXException =>
+        // TODO not sure what else to do with this error, unparseResult and 
its diagnostics
+        // will be handled below

Review comment:
       Yeah, I think this is safe to ignore since you get the parse result from 
the ContentHandler.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilInputContentHandler.scala
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+class DaffodilInputContentHandler(dp: DFDL.DataProcessor, output: DFDL.Output)
+  extends DFDL.DaffodilInputContentHandler {
+  private val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private var infosetEvent: DFDL.SaxInfosetEvent = _
+  private val characterData = new StringBuilder()
+
+  def setUnparseResult(ur: DFDL.UnparseResult): Unit = {
+    unparseResult = ur
+  }
+
+  def getUnparseResult: DFDL.UnparseResult = unparseResult
+
+  override def setDocumentLocator(locator: Locator): Unit = {
+    // do nothing
+  }
+
+  override def startDocument(): Unit = {
+    infosetEvent = new DFDL.SaxInfosetEvent
+    infosetEvent.eventType = One(StartDocument)
+    sendToInputter
+  }
+
+  override def endDocument(): Unit = {
+    if (infosetEvent == null) {
+      infosetEvent = new DFDL.SaxInfosetEvent
+    }
+    infosetEvent.eventType = One(EndDocument)
+    sendToInputter
+  }
+
+  // TODO not sure what to do with the prefix mappings, technically we don't 
need them since
+  // we have the attributes with startElement to look up any prefixes we need
+  override def startPrefixMapping(prefix: String, uri: String): Unit = {
+    val pre = if (prefix == "") null else prefix
+    infosetEvent.prefixMapping = One(NamespaceBinding(pre, uri, 
infosetEvent.prefixMapping
+      .orNull))
+  }
+
+  override def endPrefixMapping(prefix: String): Unit = {
+    // do nothing
+  }
+
+  override def startElement(uri: String, localName: String, qName: String, 
atts: Attributes): Unit = {
+    // we need to check if the characters info is all whitespace, if not, it 
is an error
+    if (characterData.nonEmpty && characterData.toString().trim.nonEmpty) {
+      throw new IllegalContentWhereEventExpected("Non-whitespace characters in 
complex " +
+        "Element: " + characterData.toString())
+    } else {
+      // reset since it was whitespace only
+      characterData.setLength(0)
+    }
+
+    if (infosetEvent == null) infosetEvent = new DFDL.SaxInfosetEvent
+    else if (!infosetEvent.isEmpty && infosetEvent.localName.isDefined) {
+      // we started another element while we were in the process of building 
another
+      // this most likely means the first element was complex, either ways, it 
is ready
+      // for the inputter queue
+      sendToInputter
+    }
+    // use Attributes to determine xsi:nil value
+    val nilIn = atts.getIndex(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, 
"nil")
+    infosetEvent.nilValue = if (nilIn >= 0) {
+      val nilValue = atts.getValue(nilIn)
+      One(nilValue)
+    } else {
+      Nope
+    }
+    // set localName and namespaceURI
+    if (localName.nonEmpty && uri.nonEmpty) {
+      infosetEvent.namespaceURI = One(uri)
+      infosetEvent.localName = One(localName)
+    } else if (qName.nonEmpty) {
+      val qNameArr = qName.split(":")
+
+      if (qNameArr.length > 1) {
+        infosetEvent.localName = qNameArr.lift(1)
+        val qNamePrefix = qNameArr.lift(0).get
+        val in = atts.getIndex(s"xmlns:$qNamePrefix")

Review comment:
       This implies that the namespace for this prefix is defined on this 
element. But it could have been defined on some parent element. I think perhaps 
this is where the prefix mapping comes into place. Perhaps we need to build a 
NamespaceBuilding, and if we do not get uri + local name, then we have to look 
up the prefix (which could be empty string) in the NamespaceBinding?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilInputContentHandler
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+class SAXInfosetInputter(ch: DaffodilInputContentHandler, dp: 
DFDL.DataProcessor, output: DFDL
+.Output) extends InfosetInputter with DFDL.ConsumerCoroutine {
+
+  var isDone = false
+  var currentEvent: DFDL.SaxInfosetEvent = _
+  var nextEvent: DFDL.SaxInfosetEvent = _
+  val contentHandlerEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+
+  override def getEventType(): InfosetInputterEventType = 
currentEvent.eventType.orNull
+
+  override def getLocalName(): String = currentEvent.localName.orNull
+
+  override def getNamespaceURI(): String = currentEvent.namespaceURI.orNull
+
+  override def getSimpleText(primType: NodeInfo.Kind): String = {
+    val res = if (currentEvent.simpleText.isDefined) {
+      currentEvent.simpleText.get
+    } else (
+      throw new NonTextFoundInSimpleContentException(getLocalName())
+    )
+    if (primType.isInstanceOf[NodeInfo.AnyURI.Kind] && res.nonEmpty) {
+      try {
+        val uri = new URI(res)
+        if (!uri.getPath.startsWith("/")) {
+          // TDML files must allow blob URI's to be relative, but Daffodil
+          // requires them to be absolute with a scheme. So search for the file
+          // using TDML semantics and convert to an absolute URI
+          val abs = Misc.searchResourceOption(uri.getPath, None)
+          abs.get.toString
+        } else {
+          res
+        }
+      } catch {
+        case _: URISyntaxException => res
+      }
+    } else {
+      if (primType.isInstanceOf[NodeInfo.String.Kind]) {
+        val remapped = XMLUtils.remapPUAToXMLIllegalCharacters(res)
+        remapped
+      } else {
+        res
+      }
+    }
+  }
+
+  override def isNilled(): MaybeBoolean = {
+    val _isNilled = if (currentEvent.nilValue.isDefined) {
+      val nilValue = currentEvent.nilValue.get
+      if (nilValue == "true" || nilValue == "1") {
+        MaybeBoolean(true)
+      } else if (nilValue == "false" || nilValue == "0") {
+        MaybeBoolean(false)
+      } else {
+        throw new InvalidInfosetException("xsi:nil property is not a valid 
boolean: '" + nilValue +
+          "' for element " + getLocalName())
+      }
+    } else {
+      MaybeBoolean.Nope
+    }
+    _isNilled
+  }
+
+  //called without changing any state
+  override def hasNext(): Boolean = {
+    if (isDone) {
+      false
+    } else {
+      val event = this.resume(ch, Try(currentEvent))
+      nextEvent = copyEvent(event.getOrElse(null))
+      nextEvent != null
+    }
+  }
+
+  // done with this current event, move on to the next event
+  override def next(): Unit = {
+    currentEvent = copyEvent(nextEvent)
+    if (currentEvent.eventType.contains(EndDocument)) {
+      isDone = true
+    }
+    nextEvent = null
+  }
+
+  def copyEvent(sourceEvent: DFDL.SaxInfosetEvent): DFDL.SaxInfosetEvent = {
+    var destEvent: DFDL.SaxInfosetEvent = null
+    if (sourceEvent == null) destEvent
+    else {
+      destEvent = new DFDL.SaxInfosetEvent
+      destEvent.eventType = sourceEvent.eventType
+      destEvent.namespaceURI = sourceEvent.namespaceURI
+      destEvent.localName = sourceEvent.localName
+      destEvent.nilValue = sourceEvent.nilValue
+      destEvent.simpleText = sourceEvent.simpleText
+      destEvent
+    }
+  }
+
+  override val supportsNamespaces: Boolean = true
+
+  override def fini: Unit = {
+    currentEvent = null
+  }
+
+  override protected def run(): Unit = {
+    try {
+      currentEvent = copyEvent(this.waitForResume.getOrElse(null))

Review comment:
       So this is waiting for the startDocument event, and then sets that as 
the current event? So nothing actually kicks off until we get startDOc event? 
Seems reasonable, but it might be worth documenting this.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilInputContentHandler.scala
##########
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+class DaffodilInputContentHandler(dp: DFDL.DataProcessor, output: DFDL.Output)
+  extends DFDL.DaffodilInputContentHandler {
+  private val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private var infosetEvent: DFDL.SaxInfosetEvent = _
+  private val characterData = new StringBuilder()
+
+  def setUnparseResult(ur: DFDL.UnparseResult): Unit = {
+    unparseResult = ur
+  }
+
+  def getUnparseResult: DFDL.UnparseResult = unparseResult
+
+  override def setDocumentLocator(locator: Locator): Unit = {
+    // do nothing
+  }
+
+  override def startDocument(): Unit = {
+    infosetEvent = new DFDL.SaxInfosetEvent
+    infosetEvent.eventType = One(StartDocument)
+    sendToInputter
+  }
+
+  override def endDocument(): Unit = {
+    if (infosetEvent == null) {
+      infosetEvent = new DFDL.SaxInfosetEvent
+    }
+    infosetEvent.eventType = One(EndDocument)
+    sendToInputter
+  }
+
+  // TODO not sure what to do with the prefix mappings, technically we don't 
need them since
+  // we have the attributes with startElement to look up any prefixes we need
+  override def startPrefixMapping(prefix: String, uri: String): Unit = {
+    val pre = if (prefix == "") null else prefix
+    infosetEvent.prefixMapping = One(NamespaceBinding(pre, uri, 
infosetEvent.prefixMapping
+      .orNull))
+  }
+
+  override def endPrefixMapping(prefix: String): Unit = {
+    // do nothing
+  }
+
+  override def startElement(uri: String, localName: String, qName: String, 
atts: Attributes): Unit = {
+    // we need to check if the characters info is all whitespace, if not, it 
is an error
+    if (characterData.nonEmpty && characterData.toString().trim.nonEmpty) {
+      throw new IllegalContentWhereEventExpected("Non-whitespace characters in 
complex " +
+        "Element: " + characterData.toString())
+    } else {
+      // reset since it was whitespace only
+      characterData.setLength(0)
+    }
+
+    if (infosetEvent == null) infosetEvent = new DFDL.SaxInfosetEvent
+    else if (!infosetEvent.isEmpty && infosetEvent.localName.isDefined) {
+      // we started another element while we were in the process of building 
another
+      // this most likely means the first element was complex, either ways, it 
is ready
+      // for the inputter queue
+      sendToInputter
+    }
+    // use Attributes to determine xsi:nil value
+    val nilIn = atts.getIndex(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, 
"nil")
+    infosetEvent.nilValue = if (nilIn >= 0) {
+      val nilValue = atts.getValue(nilIn)
+      One(nilValue)
+    } else {
+      Nope
+    }
+    // set localName and namespaceURI
+    if (localName.nonEmpty && uri.nonEmpty) {
+      infosetEvent.namespaceURI = One(uri)
+      infosetEvent.localName = One(localName)
+    } else if (qName.nonEmpty) {
+      val qNameArr = qName.split(":")
+
+      if (qNameArr.length > 1) {
+        infosetEvent.localName = qNameArr.lift(1)
+        val qNamePrefix = qNameArr.lift(0).get
+        val in = atts.getIndex(s"xmlns:$qNamePrefix")
+        if (in >= 0) {
+          val attrUri = atts.getValue(in)
+          infosetEvent.namespaceURI = One(attrUri)
+        }
+      } else {
+        infosetEvent.localName = qNameArr.lift(0)
+      }
+    } else {
+      infosetEvent.localName = if(localName.nonEmpty) One(localName) else Nope
+      infosetEvent.namespaceURI = if(uri.nonEmpty) One(uri) else Nope
+    }
+    infosetEvent.eventType = One(StartElement)
+  }
+
+  override def endElement(uri: String, localName: String, qName: String): Unit 
= {
+    // if infosetEvent is a startElement, send that first
+    if (infosetEvent.eventType.contains(StartElement)) {
+      // any characterData that exists at this point is valid data as padding 
data has been
+      // taken care of in startElement
+      val maybeNewStr = One(characterData.toString())
+      infosetEvent.simpleText = maybeNewStr
+      characterData.setLength(0)
+      sendToInputter
+    }
+    infosetEvent.eventType = One(EndElement)
+    sendToInputter
+  }
+
+  override def characters(ch: Array[Char], start: Int, length: Int): Unit = {
+    if (infosetEvent == null) {
+      infosetEvent = new DFDL.SaxInfosetEvent
+    }

Review comment:
       Having a single infosetEvent avoid this kind of stuff. We could just 
always have an infoset event.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilInputContentHandler
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+class SAXInfosetInputter(ch: DaffodilInputContentHandler, dp: 
DFDL.DataProcessor, output: DFDL
+.Output) extends InfosetInputter with DFDL.ConsumerCoroutine {
+
+  var isDone = false
+  var currentEvent: DFDL.SaxInfosetEvent = _
+  var nextEvent: DFDL.SaxInfosetEvent = _
+  val contentHandlerEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+
+  override def getEventType(): InfosetInputterEventType = 
currentEvent.eventType.orNull
+
+  override def getLocalName(): String = currentEvent.localName.orNull
+
+  override def getNamespaceURI(): String = currentEvent.namespaceURI.orNull
+
+  override def getSimpleText(primType: NodeInfo.Kind): String = {
+    val res = if (currentEvent.simpleText.isDefined) {
+      currentEvent.simpleText.get
+    } else (
+      throw new NonTextFoundInSimpleContentException(getLocalName())
+    )
+    if (primType.isInstanceOf[NodeInfo.AnyURI.Kind] && res.nonEmpty) {
+      try {
+        val uri = new URI(res)
+        if (!uri.getPath.startsWith("/")) {
+          // TDML files must allow blob URI's to be relative, but Daffodil
+          // requires them to be absolute with a scheme. So search for the file
+          // using TDML semantics and convert to an absolute URI
+          val abs = Misc.searchResourceOption(uri.getPath, None)
+          abs.get.toString
+        } else {
+          res
+        }
+      } catch {
+        case _: URISyntaxException => res
+      }
+    } else {
+      if (primType.isInstanceOf[NodeInfo.String.Kind]) {
+        val remapped = XMLUtils.remapPUAToXMLIllegalCharacters(res)
+        remapped
+      } else {
+        res
+      }
+    }
+  }
+
+  override def isNilled(): MaybeBoolean = {
+    val _isNilled = if (currentEvent.nilValue.isDefined) {
+      val nilValue = currentEvent.nilValue.get
+      if (nilValue == "true" || nilValue == "1") {
+        MaybeBoolean(true)
+      } else if (nilValue == "false" || nilValue == "0") {
+        MaybeBoolean(false)
+      } else {
+        throw new InvalidInfosetException("xsi:nil property is not a valid 
boolean: '" + nilValue +
+          "' for element " + getLocalName())
+      }
+    } else {
+      MaybeBoolean.Nope
+    }
+    _isNilled
+  }
+
+  //called without changing any state
+  override def hasNext(): Boolean = {
+    if (isDone) {
+      false
+    } else {
+      val event = this.resume(ch, Try(currentEvent))
+      nextEvent = copyEvent(event.getOrElse(null))
+      nextEvent != null
+    }
+  }
+
+  // done with this current event, move on to the next event
+  override def next(): Unit = {
+    currentEvent = copyEvent(nextEvent)
+    if (currentEvent.eventType.contains(EndDocument)) {
+      isDone = true
+    }
+    nextEvent = null
+  }
+
+  def copyEvent(sourceEvent: DFDL.SaxInfosetEvent): DFDL.SaxInfosetEvent = {
+    var destEvent: DFDL.SaxInfosetEvent = null
+    if (sourceEvent == null) destEvent
+    else {
+      destEvent = new DFDL.SaxInfosetEvent
+      destEvent.eventType = sourceEvent.eventType
+      destEvent.namespaceURI = sourceEvent.namespaceURI
+      destEvent.localName = sourceEvent.localName
+      destEvent.nilValue = sourceEvent.nilValue
+      destEvent.simpleText = sourceEvent.simpleText
+      destEvent
+    }
+  }
+
+  override val supportsNamespaces: Boolean = true
+
+  override def fini: Unit = {
+    currentEvent = null
+  }
+
+  override protected def run(): Unit = {
+    try {
+      currentEvent = copyEvent(this.waitForResume.getOrElse(null))
+      val unparseResult = dp.unparse(this, output)
+      ch.setUnparseResult(unparseResult)

Review comment:
       Ah so this is where UnparseResult is set in the content handler. 
Instead, can this be passed back in resumeFinal, and then the ContentHandler 
can change it's internal state? Then you don't need the setUnpasreResult 
function.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilInputContentHandler
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+class SAXInfosetInputter(ch: DaffodilInputContentHandler, dp: 
DFDL.DataProcessor, output: DFDL
+.Output) extends InfosetInputter with DFDL.ConsumerCoroutine {
+
+  var isDone = false
+  var currentEvent: DFDL.SaxInfosetEvent = _
+  var nextEvent: DFDL.SaxInfosetEvent = _
+  val contentHandlerEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+
+  override def getEventType(): InfosetInputterEventType = 
currentEvent.eventType.orNull
+
+  override def getLocalName(): String = currentEvent.localName.orNull
+
+  override def getNamespaceURI(): String = currentEvent.namespaceURI.orNull
+
+  override def getSimpleText(primType: NodeInfo.Kind): String = {
+    val res = if (currentEvent.simpleText.isDefined) {
+      currentEvent.simpleText.get
+    } else (
+      throw new NonTextFoundInSimpleContentException(getLocalName())
+    )
+    if (primType.isInstanceOf[NodeInfo.AnyURI.Kind] && res.nonEmpty) {
+      try {
+        val uri = new URI(res)
+        if (!uri.getPath.startsWith("/")) {
+          // TDML files must allow blob URI's to be relative, but Daffodil
+          // requires them to be absolute with a scheme. So search for the file
+          // using TDML semantics and convert to an absolute URI
+          val abs = Misc.searchResourceOption(uri.getPath, None)
+          abs.get.toString
+        } else {
+          res
+        }
+      } catch {
+        case _: URISyntaxException => res
+      }
+    } else {
+      if (primType.isInstanceOf[NodeInfo.String.Kind]) {
+        val remapped = XMLUtils.remapPUAToXMLIllegalCharacters(res)
+        remapped
+      } else {
+        res
+      }
+    }
+  }
+
+  override def isNilled(): MaybeBoolean = {
+    val _isNilled = if (currentEvent.nilValue.isDefined) {
+      val nilValue = currentEvent.nilValue.get
+      if (nilValue == "true" || nilValue == "1") {
+        MaybeBoolean(true)
+      } else if (nilValue == "false" || nilValue == "0") {
+        MaybeBoolean(false)
+      } else {
+        throw new InvalidInfosetException("xsi:nil property is not a valid 
boolean: '" + nilValue +
+          "' for element " + getLocalName())
+      }
+    } else {
+      MaybeBoolean.Nope
+    }
+    _isNilled
+  }
+
+  //called without changing any state
+  override def hasNext(): Boolean = {
+    if (isDone) {
+      false
+    } else {
+      val event = this.resume(ch, Try(currentEvent))

Review comment:
       This feels incorrect to me. hasNext should be allowed to be called 
multiple times, only changing state when next() is call. If you call hasNext() 
multiple times, we'll keep getting a new event and replacing the next event. I 
think we should only be changing curEvent, nextEvent when next() is called.
   
   This likely just works right now because Daffodil is careful not to call 
hasNext() multiple times.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to