mbeckerle commented on a change in pull request #436:
URL: https://github.com/apache/incubator-daffodil/pull/436#discussion_r507936536



##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+/**
+ * The SAXInfosetInputter consumes infosetEvent objects from the 
DaffodilInputContentHandler class
+ * and converts it to an event that the Dataprocessor unparse can use. This 
class contains two
+ * infosetEvent objects that contain the current event the unparse method is 
processing, and the
+ * next event to be processed after.
+ *
+ * This class together with the DaffodilInputContentHandler use coroutines to 
ensure that only one event,
+ * at a time, is passed between the two classes. The following is the general 
process:
+ *
+ * - the run method is called, with a StartDocument event already loaded on 
the inputter's queue.
+ * This is collected and stored in the nextEvent method, and the inputter's 
next method is called
+ * to populate the currentEvent and load the nextEvent
+ * - The dp.unparse method is called, and it calls hasNext to make sure an 
event exists to be
+ * processed and then queries the currentEvent, after it is done with the 
currentEvent, it calls
+ * inputter.next to get the next event, and that copies the nextEvent into the 
currentEvent and
+ * transfser control to the contentHandler to load the nextEvent
+ * - That continues until the currentEvent contains an EndDocument event, at 
which point, the
+ * nextEvent is clear, endDocumentReceived is set to true and hasNext is set 
to false
+ * - This ends the unparse process, and the unparseResult and/or any Errors 
are set on the event,
+ * and we call resumeFinal passing along that element, terminating this thread 
and resuming the
+ * contentHandler for the last time.
+ *
+ * @param inputContentHandler producer coroutine that sends infosetEvent to 
this class
+ * @param dp dataprocessor that we use to kickstart the unparse process and 
that consumes the
+ *           currentEvent
+ * @param output  outputChannel of choice where the unparsed data is stored
+ */
+class SAXInfosetInputter(
+  inputContentHandler: DFDL.DaffodilInputContentHandler,
+  dp: DFDL.DataProcessor,
+  output: DFDL.Output)
+  extends InfosetInputter with DFDL.ConsumerCoroutine {
+  // allows support for converting relative URIs in data to absolute URIs, 
this is mainly use
+  // for TDML as the tests allow relative URIs. This can be set to true by 
calling the
+  // inputContentHandler.enableInputterUriAbsolutization()
+  var enableUriAbsolutization: Boolean = false
+
+  var endDocumentReceived = false
+  val currentEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+  val nextEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+
+  override def getEventType(): InfosetInputterEventType = 
currentEvent.eventType.orNull
+
+  override def getLocalName(): String = currentEvent.localName.orNull
+
+  override def getNamespaceURI(): String = currentEvent.namespaceURI.orNull
+
+  override def getSimpleText(primType: NodeInfo.Kind): String = {
+    val res = if (currentEvent.simpleText.isDefined) {
+      currentEvent.simpleText.get
+    } else (
+      throw new NonTextFoundInSimpleContentException(getLocalName())
+    )
+    primType match {
+      case _: NodeInfo.String.Kind =>
+        val remapped = XMLUtils.remapPUAToXMLIllegalCharacters(res)
+        remapped
+      case _: NodeInfo.AnyURI.Kind if enableUriAbsolutization && res.nonEmpty 
=>
+        val absUri = convertRelativeURItoAbsolute(res)
+        absUri
+      case _ =>
+        res
+    }
+  }
+
+  override def isNilled(): MaybeBoolean = {
+    val _isNilled = if (currentEvent.nilValue.isDefined) {
+      val nilValue = currentEvent.nilValue.get
+      if (nilValue == "true" || nilValue == "1") {
+        MaybeBoolean(true)
+      } else if (nilValue == "false" || nilValue == "0") {
+        MaybeBoolean(false)
+      } else {
+        throw new InvalidInfosetException("xsi:nil property is not a valid 
boolean: '" + nilValue +
+          "' for element " + getLocalName())
+      }
+    } else {
+      MaybeBoolean.Nope
+    }
+    _isNilled
+  }
+
+  //called without changing any state
+  override def hasNext(): Boolean = {
+    !endDocumentReceived && !nextEvent.isEmpty
+  }
+
+  // done with current event, move on to the next event
+  override def next(): Unit = {
+    copyEvent(source = nextEvent, dest = currentEvent)
+    if (currentEvent.eventType.contains(EndDocument)) {
+      endDocumentReceived = true
+      nextEvent.clear
+    } else {
+      val event = this.resume(inputContentHandler, Try(currentEvent))
+      copyEvent(source = event.getOrElse(null), dest = nextEvent)

Review comment:
       You are doing getOrElse(null) here, and then testing for null in the 
copyEvent method. Why not just pass the Option/Maybe object and do "isDefined" 
test in the copyEvent method?
   
   That is to say, converting Option/Maybe objects to null/not-null is 
generally to be avoided. You really only want to do that if you are calling 
some method defined in a library that uses the null-not-null convention. 

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+/**
+ * The SAXInfosetInputter consumes infosetEvent objects from the 
DaffodilInputContentHandler class
+ * and converts it to an event that the Dataprocessor unparse can use. This 
class contains two
+ * infosetEvent objects that contain the current event the unparse method is 
processing, and the
+ * next event to be processed after.
+ *
+ * This class together with the DaffodilInputContentHandler use coroutines to 
ensure that only one event,
+ * at a time, is passed between the two classes. The following is the general 
process:
+ *
+ * - the run method is called, with a StartDocument event already loaded on 
the inputter's queue.
+ * This is collected and stored in the nextEvent method, and the inputter's 
next method is called
+ * to populate the currentEvent and load the nextEvent
+ * - The dp.unparse method is called, and it calls hasNext to make sure an 
event exists to be
+ * processed and then queries the currentEvent, after it is done with the 
currentEvent, it calls
+ * inputter.next to get the next event, and that copies the nextEvent into the 
currentEvent and
+ * transfser control to the contentHandler to load the nextEvent
+ * - That continues until the currentEvent contains an EndDocument event, at 
which point, the
+ * nextEvent is clear, endDocumentReceived is set to true and hasNext is set 
to false
+ * - This ends the unparse process, and the unparseResult and/or any Errors 
are set on the event,
+ * and we call resumeFinal passing along that element, terminating this thread 
and resuming the
+ * contentHandler for the last time.
+ *
+ * @param inputContentHandler producer coroutine that sends infosetEvent to 
this class
+ * @param dp dataprocessor that we use to kickstart the unparse process and 
that consumes the
+ *           currentEvent
+ * @param output  outputChannel of choice where the unparsed data is stored
+ */
+class SAXInfosetInputter(
+  inputContentHandler: DFDL.DaffodilInputContentHandler,
+  dp: DFDL.DataProcessor,
+  output: DFDL.Output)
+  extends InfosetInputter with DFDL.ConsumerCoroutine {
+  // allows support for converting relative URIs in data to absolute URIs, 
this is mainly use
+  // for TDML as the tests allow relative URIs. This can be set to true by 
calling the
+  // inputContentHandler.enableInputterUriAbsolutization()
+  var enableUriAbsolutization: Boolean = false
+
+  var endDocumentReceived = false
+  val currentEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+  val nextEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+
+  override def getEventType(): InfosetInputterEventType = 
currentEvent.eventType.orNull
+
+  override def getLocalName(): String = currentEvent.localName.orNull
+
+  override def getNamespaceURI(): String = currentEvent.namespaceURI.orNull
+
+  override def getSimpleText(primType: NodeInfo.Kind): String = {
+    val res = if (currentEvent.simpleText.isDefined) {
+      currentEvent.simpleText.get
+    } else (
+      throw new NonTextFoundInSimpleContentException(getLocalName())
+    )
+    primType match {
+      case _: NodeInfo.String.Kind =>
+        val remapped = XMLUtils.remapPUAToXMLIllegalCharacters(res)
+        remapped
+      case _: NodeInfo.AnyURI.Kind if enableUriAbsolutization && res.nonEmpty 
=>
+        val absUri = convertRelativeURItoAbsolute(res)
+        absUri
+      case _ =>
+        res
+    }
+  }
+
+  override def isNilled(): MaybeBoolean = {
+    val _isNilled = if (currentEvent.nilValue.isDefined) {
+      val nilValue = currentEvent.nilValue.get
+      if (nilValue == "true" || nilValue == "1") {
+        MaybeBoolean(true)
+      } else if (nilValue == "false" || nilValue == "0") {
+        MaybeBoolean(false)
+      } else {
+        throw new InvalidInfosetException("xsi:nil property is not a valid 
boolean: '" + nilValue +
+          "' for element " + getLocalName())
+      }
+    } else {
+      MaybeBoolean.Nope
+    }
+    _isNilled
+  }
+
+  //called without changing any state
+  override def hasNext(): Boolean = {
+    !endDocumentReceived && !nextEvent.isEmpty
+  }
+
+  // done with current event, move on to the next event
+  override def next(): Unit = {
+    copyEvent(source = nextEvent, dest = currentEvent)
+    if (currentEvent.eventType.contains(EndDocument)) {
+      endDocumentReceived = true
+      nextEvent.clear
+    } else {
+      val event = this.resume(inputContentHandler, Try(currentEvent))
+      copyEvent(source = event.getOrElse(null), dest = nextEvent)
+    }
+  }
+
+  /**  */
+  def copyEvent(source: DFDL.SaxInfosetEvent, dest: DFDL.SaxInfosetEvent): 
Unit= {
+    if (source == null) dest.clear
+    else {
+      dest.eventType = source.eventType
+      dest.namespaceURI = source.namespaceURI
+      dest.localName = source.localName
+      dest.nilValue = source.nilValue
+      dest.simpleText = source.simpleText
+    }
+  }
+
+  // TDML files must allow blob URI's to be relative, but Daffodil

Review comment:
       This comment can be scaladoc, not just line comments.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilInputContentHandler.scala
##########
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+/**
+ * DaffodilInputContentHandler produces InfosetEvent objects for the 
SAXInfosetInputter to
+ * consume and convert to a event that the Dataprocessor unparse can use. The 
infosetEvent object
+ * is built from information that is passed to the ContentHandler from an 
XMLReader parser. In
+ * order to receive the uri and prefix information from the XMLReader, the 
following features
+ * must be set to true on whatever XMLReader is used: 
http://xml.org/sax/features/namespaces and
+ * http://xml.org/sax/features/namespace-prefixes
+ *
+ * This class, together with the SAXInfosetInputter, uses coroutines to ensure 
that only one event,
+ * at a time, is passed between the two classes. The following is the general 
process:
+ *
+ * - an external call is made to parse an XML Documents
+ * - this class receives a StartDocument call, which is the first infosetEvent 
that is sent to
+ * the SAXInfosetInputter. That event is put on the inputter's queue, this 
thread is paused, and
+ * that inputter's thread is run
+ * - when the SAXInfosetInputter is done processing an event and is ready for 
a new event, it
+ * sends the completed event via the coroutine system, and loads it on the 
contentHandler's
+ * queue, which restarts this thread and pauses that one. In the expected 
case, the events will
+ * contain no new information, until the unparse is completed.
+ * -  this process continues until the EndDocument method is called. Once that 
infosetEvent is
+ * sent to the inputter, it signals the end of events coming from the 
contentHandler. This
+ * ends the unparseProcess and returns the event with the unparseResult and/or 
any error
+ * information
+ *
+ * @param dp dataprocessor object that will be used to call the parse
+ * @param output outputChannel of choice where the unparsed data is stored
+ */
+class DaffodilInputContentHandler(
+  dp: DFDL.DataProcessor,
+  output: DFDL.Output)
+  extends DFDL.DaffodilInputContentHandler {
+  private val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private val infosetEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+  private val characterData = new StringBuilder()
+  private var prefixMapping: NamespaceBinding = _
+
+  def getUnparseResult: DFDL.UnparseResult = unparseResult
+
+  def enableInputterUriAbsolutization(): Unit = 
inputter.enableUriAbsolutization = true
+
+  override def setDocumentLocator(locator: Locator): Unit = {
+    // do nothing
+  }
+
+  override def startDocument(): Unit = {
+    infosetEvent.eventType = One(StartDocument)
+    sendToInputter()
+  }
+
+  override def endDocument(): Unit = {
+    infosetEvent.eventType = One(EndDocument)
+    sendToInputter()
+  }
+
+  override def startPrefixMapping(prefix: String, uri: String): Unit = {
+    val pre = if (prefix == "") null else prefix
+    prefixMapping = NamespaceBinding(pre, uri, prefixMapping)
+  }
+
+  override def endPrefixMapping(prefix: String): Unit = {
+    // do nothing
+  }
+
+  override def startElement(uri: String, localName: String, qName: String, 
atts: Attributes): Unit = {
+    // we need to check if the characters data is all whitespace, if it is we 
drop the whitespace
+    // data, if it is not, it is an error as starting a new element with 
actual characterData means
+    // we haven't hit an endElement yet, which means we're in a complexElement 
and a complexElement
+    // cannot have character content
+    if (characterData.nonEmpty && characterData.toString().trim.nonEmpty) {
+      throw new IllegalContentWhereEventExpected("Non-whitespace characters in 
complex " +
+        "Element: " + characterData.toString()
+      )
+    } else {
+      // reset since it was whitespace only
+      characterData.setLength(0)
+    }
+
+    if (!infosetEvent.isEmpty && infosetEvent.localName.isDefined) {
+      // we started another element while we were in the process of building a 
startElement
+      // this means the first element was complex and we are ready for the 
inputter queue
+      sendToInputter()
+    }
+    // use Attributes to determine xsi:nil value
+    val nilIn = atts.getIndex(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, 
"nil")
+    infosetEvent.nilValue = if (nilIn >= 0) {
+      val nilValue = atts.getValue(nilIn)
+      One(nilValue)
+    } else {
+      Nope
+    }
+    // set localName and namespaceURI
+    val qNameArr = qName.split(":")
+    infosetEvent.localName =
+      if (localName.nonEmpty) {
+        One(localName)
+      } else if (qNameArr.length > 1) {
+        qNameArr.lift(1)
+      } else if (qNameArr.nonEmpty) {
+        qNameArr.lift(0)
+      } else {
+        Nope
+      }
+    infosetEvent.namespaceURI =
+      if (uri.nonEmpty) {
+        One(uri)
+      } else if (qNameArr.length > 1) { // has a prefix
+        // get the prefix off the qname
+        val qNamePrefix = qNameArr.lift(0).get
+        // look up prefix with and without xmlns prefix
+        val in = atts.getIndex(s"xmlns:$qNamePrefix")
+        val otherIn = if (in == -1) atts.getIndex("qNamePrefix") else in

Review comment:
       Should this be "$qNamePrefix", which is the same thing as just 
qNamePrefix?. "qNamePrefix" seems like it has to be wrong, as you're searching 
for that literal string. 

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+/**
+ * The SAXInfosetInputter consumes infosetEvent objects from the 
DaffodilInputContentHandler class
+ * and converts it to an event that the Dataprocessor unparse can use. This 
class contains two
+ * infosetEvent objects that contain the current event the unparse method is 
processing, and the
+ * next event to be processed after.
+ *
+ * This class together with the DaffodilInputContentHandler use coroutines to 
ensure that only one event,
+ * at a time, is passed between the two classes. The following is the general 
process:
+ *
+ * - the run method is called, with a StartDocument event already loaded on 
the inputter's queue.
+ * This is collected and stored in the nextEvent method, and the inputter's 
next method is called
+ * to populate the currentEvent and load the nextEvent
+ * - The dp.unparse method is called, and it calls hasNext to make sure an 
event exists to be
+ * processed and then queries the currentEvent, after it is done with the 
currentEvent, it calls
+ * inputter.next to get the next event, and that copies the nextEvent into the 
currentEvent and
+ * transfser control to the contentHandler to load the nextEvent
+ * - That continues until the currentEvent contains an EndDocument event, at 
which point, the
+ * nextEvent is clear, endDocumentReceived is set to true and hasNext is set 
to false
+ * - This ends the unparse process, and the unparseResult and/or any Errors 
are set on the event,
+ * and we call resumeFinal passing along that element, terminating this thread 
and resuming the
+ * contentHandler for the last time.
+ *
+ * @param inputContentHandler producer coroutine that sends infosetEvent to 
this class
+ * @param dp dataprocessor that we use to kickstart the unparse process and 
that consumes the
+ *           currentEvent
+ * @param output  outputChannel of choice where the unparsed data is stored
+ */
+class SAXInfosetInputter(
+  inputContentHandler: DFDL.DaffodilInputContentHandler,
+  dp: DFDL.DataProcessor,
+  output: DFDL.Output)
+  extends InfosetInputter with DFDL.ConsumerCoroutine {
+  // allows support for converting relative URIs in data to absolute URIs, 
this is mainly use

Review comment:
       Scaladoc. Also, can you explain just a bit more of the motivation here. 
Is this purely for TDML support? If this is just for BLOB URIs, then maybe the 
name should have BLOB in it?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+/**
+ * The SAXInfosetInputter consumes infosetEvent objects from the 
DaffodilInputContentHandler class
+ * and converts it to an event that the Dataprocessor unparse can use. This 
class contains two
+ * infosetEvent objects that contain the current event the unparse method is 
processing, and the
+ * next event to be processed after.
+ *
+ * This class together with the DaffodilInputContentHandler use coroutines to 
ensure that only one event,
+ * at a time, is passed between the two classes. The following is the general 
process:
+ *
+ * - the run method is called, with a StartDocument event already loaded on 
the inputter's queue.
+ * This is collected and stored in the nextEvent method, and the inputter's 
next method is called
+ * to populate the currentEvent and load the nextEvent
+ * - The dp.unparse method is called, and it calls hasNext to make sure an 
event exists to be
+ * processed and then queries the currentEvent, after it is done with the 
currentEvent, it calls
+ * inputter.next to get the next event, and that copies the nextEvent into the 
currentEvent and
+ * transfser control to the contentHandler to load the nextEvent
+ * - That continues until the currentEvent contains an EndDocument event, at 
which point, the
+ * nextEvent is clear, endDocumentReceived is set to true and hasNext is set 
to false
+ * - This ends the unparse process, and the unparseResult and/or any Errors 
are set on the event,
+ * and we call resumeFinal passing along that element, terminating this thread 
and resuming the
+ * contentHandler for the last time.
+ *
+ * @param inputContentHandler producer coroutine that sends infosetEvent to 
this class
+ * @param dp dataprocessor that we use to kickstart the unparse process and 
that consumes the
+ *           currentEvent
+ * @param output  outputChannel of choice where the unparsed data is stored
+ */
+class SAXInfosetInputter(
+  inputContentHandler: DFDL.DaffodilInputContentHandler,
+  dp: DFDL.DataProcessor,
+  output: DFDL.Output)
+  extends InfosetInputter with DFDL.ConsumerCoroutine {
+  // allows support for converting relative URIs in data to absolute URIs, 
this is mainly use
+  // for TDML as the tests allow relative URIs. This can be set to true by 
calling the
+  // inputContentHandler.enableInputterUriAbsolutization()
+  var enableUriAbsolutization: Boolean = false
+
+  var endDocumentReceived = false
+  val currentEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+  val nextEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+
+  override def getEventType(): InfosetInputterEventType = 
currentEvent.eventType.orNull
+
+  override def getLocalName(): String = currentEvent.localName.orNull
+
+  override def getNamespaceURI(): String = currentEvent.namespaceURI.orNull
+
+  override def getSimpleText(primType: NodeInfo.Kind): String = {
+    val res = if (currentEvent.simpleText.isDefined) {
+      currentEvent.simpleText.get
+    } else (
+      throw new NonTextFoundInSimpleContentException(getLocalName())
+    )
+    primType match {
+      case _: NodeInfo.String.Kind =>
+        val remapped = XMLUtils.remapPUAToXMLIllegalCharacters(res)
+        remapped
+      case _: NodeInfo.AnyURI.Kind if enableUriAbsolutization && res.nonEmpty 
=>
+        val absUri = convertRelativeURItoAbsolute(res)
+        absUri
+      case _ =>
+        res
+    }
+  }
+
+  override def isNilled(): MaybeBoolean = {
+    val _isNilled = if (currentEvent.nilValue.isDefined) {
+      val nilValue = currentEvent.nilValue.get
+      if (nilValue == "true" || nilValue == "1") {
+        MaybeBoolean(true)
+      } else if (nilValue == "false" || nilValue == "0") {
+        MaybeBoolean(false)
+      } else {
+        throw new InvalidInfosetException("xsi:nil property is not a valid 
boolean: '" + nilValue +
+          "' for element " + getLocalName())
+      }
+    } else {
+      MaybeBoolean.Nope
+    }
+    _isNilled
+  }
+
+  //called without changing any state

Review comment:
       next/hasNext java iterator idiom usually requires implementing the logic 
to fetch the next item in the "hasNext" routine, so that if you call hasNext, 
and it has to try to get one to see if there is in fact one left, then next 
just returns it and clears the state. 

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilInputContentHandler.scala
##########
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+/**
+ * DaffodilInputContentHandler produces InfosetEvent objects for the 
SAXInfosetInputter to
+ * consume and convert to a event that the Dataprocessor unparse can use. The 
infosetEvent object
+ * is built from information that is passed to the ContentHandler from an 
XMLReader parser. In
+ * order to receive the uri and prefix information from the XMLReader, the 
following features
+ * must be set to true on whatever XMLReader is used: 
http://xml.org/sax/features/namespaces and
+ * http://xml.org/sax/features/namespace-prefixes
+ *
+ * This class, together with the SAXInfosetInputter, uses coroutines to ensure 
that only one event,
+ * at a time, is passed between the two classes. The following is the general 
process:
+ *
+ * - an external call is made to parse an XML Documents
+ * - this class receives a StartDocument call, which is the first infosetEvent 
that is sent to
+ * the SAXInfosetInputter. That event is put on the inputter's queue, this 
thread is paused, and
+ * that inputter's thread is run
+ * - when the SAXInfosetInputter is done processing an event and is ready for 
a new event, it
+ * sends the completed event via the coroutine system, and loads it on the 
contentHandler's
+ * queue, which restarts this thread and pauses that one. In the expected 
case, the events will
+ * contain no new information, until the unparse is completed.
+ * -  this process continues until the EndDocument method is called. Once that 
infosetEvent is
+ * sent to the inputter, it signals the end of events coming from the 
contentHandler. This
+ * ends the unparseProcess and returns the event with the unparseResult and/or 
any error
+ * information
+ *
+ * @param dp dataprocessor object that will be used to call the parse
+ * @param output outputChannel of choice where the unparsed data is stored
+ */
+class DaffodilInputContentHandler(
+  dp: DFDL.DataProcessor,
+  output: DFDL.Output)
+  extends DFDL.DaffodilInputContentHandler {
+  private val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private val infosetEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+  private val characterData = new StringBuilder()
+  private var prefixMapping: NamespaceBinding = _
+
+  def getUnparseResult: DFDL.UnparseResult = unparseResult
+
+  def enableInputterUriAbsolutization(): Unit = 
inputter.enableUriAbsolutization = true
+
+  override def setDocumentLocator(locator: Locator): Unit = {
+    // do nothing
+  }
+
+  override def startDocument(): Unit = {
+    infosetEvent.eventType = One(StartDocument)
+    sendToInputter()
+  }
+
+  override def endDocument(): Unit = {
+    infosetEvent.eventType = One(EndDocument)
+    sendToInputter()
+  }
+
+  override def startPrefixMapping(prefix: String, uri: String): Unit = {
+    val pre = if (prefix == "") null else prefix
+    prefixMapping = NamespaceBinding(pre, uri, prefixMapping)
+  }
+
+  override def endPrefixMapping(prefix: String): Unit = {
+    // do nothing
+  }
+
+  override def startElement(uri: String, localName: String, qName: String, 
atts: Attributes): Unit = {
+    // we need to check if the characters data is all whitespace, if it is we 
drop the whitespace
+    // data, if it is not, it is an error as starting a new element with 
actual characterData means
+    // we haven't hit an endElement yet, which means we're in a complexElement 
and a complexElement
+    // cannot have character content
+    if (characterData.nonEmpty && characterData.toString().trim.nonEmpty) {

Review comment:
       Can we test the characterData without calling toString on it? You are 
using a StringBuffer for a reason, seems to me if you are going to call 
toString on it, every time, then probably that defeats the purpose of the 
StringBuffer.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilInputContentHandler.scala
##########
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+/**
+ * DaffodilInputContentHandler produces InfosetEvent objects for the 
SAXInfosetInputter to
+ * consume and convert to a event that the Dataprocessor unparse can use. The 
infosetEvent object
+ * is built from information that is passed to the ContentHandler from an 
XMLReader parser. In
+ * order to receive the uri and prefix information from the XMLReader, the 
following features
+ * must be set to true on whatever XMLReader is used: 
http://xml.org/sax/features/namespaces and
+ * http://xml.org/sax/features/namespace-prefixes
+ *
+ * This class, together with the SAXInfosetInputter, uses coroutines to ensure 
that only one event,
+ * at a time, is passed between the two classes. The following is the general 
process:
+ *
+ * - an external call is made to parse an XML Documents
+ * - this class receives a StartDocument call, which is the first infosetEvent 
that is sent to
+ * the SAXInfosetInputter. That event is put on the inputter's queue, this 
thread is paused, and
+ * that inputter's thread is run
+ * - when the SAXInfosetInputter is done processing an event and is ready for 
a new event, it
+ * sends the completed event via the coroutine system, and loads it on the 
contentHandler's
+ * queue, which restarts this thread and pauses that one. In the expected 
case, the events will
+ * contain no new information, until the unparse is completed.
+ * -  this process continues until the EndDocument method is called. Once that 
infosetEvent is
+ * sent to the inputter, it signals the end of events coming from the 
contentHandler. This
+ * ends the unparseProcess and returns the event with the unparseResult and/or 
any error
+ * information
+ *
+ * @param dp dataprocessor object that will be used to call the parse
+ * @param output outputChannel of choice where the unparsed data is stored
+ */
+class DaffodilInputContentHandler(
+  dp: DFDL.DataProcessor,
+  output: DFDL.Output)
+  extends DFDL.DaffodilInputContentHandler {
+  private val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private val infosetEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+  private val characterData = new StringBuilder()
+  private var prefixMapping: NamespaceBinding = _
+
+  def getUnparseResult: DFDL.UnparseResult = unparseResult
+
+  def enableInputterUriAbsolutization(): Unit = 
inputter.enableUriAbsolutization = true
+
+  override def setDocumentLocator(locator: Locator): Unit = {
+    // do nothing
+  }
+
+  override def startDocument(): Unit = {
+    infosetEvent.eventType = One(StartDocument)
+    sendToInputter()
+  }
+
+  override def endDocument(): Unit = {
+    infosetEvent.eventType = One(EndDocument)
+    sendToInputter()
+  }
+
+  override def startPrefixMapping(prefix: String, uri: String): Unit = {
+    val pre = if (prefix == "") null else prefix
+    prefixMapping = NamespaceBinding(pre, uri, prefixMapping)
+  }
+
+  override def endPrefixMapping(prefix: String): Unit = {
+    // do nothing
+  }
+
+  override def startElement(uri: String, localName: String, qName: String, 
atts: Attributes): Unit = {
+    // we need to check if the characters data is all whitespace, if it is we 
drop the whitespace
+    // data, if it is not, it is an error as starting a new element with 
actual characterData means
+    // we haven't hit an endElement yet, which means we're in a complexElement 
and a complexElement
+    // cannot have character content
+    if (characterData.nonEmpty && characterData.toString().trim.nonEmpty) {
+      throw new IllegalContentWhereEventExpected("Non-whitespace characters in 
complex " +
+        "Element: " + characterData.toString()
+      )
+    } else {
+      // reset since it was whitespace only
+      characterData.setLength(0)
+    }
+
+    if (!infosetEvent.isEmpty && infosetEvent.localName.isDefined) {
+      // we started another element while we were in the process of building a 
startElement
+      // this means the first element was complex and we are ready for the 
inputter queue
+      sendToInputter()
+    }
+    // use Attributes to determine xsi:nil value
+    val nilIn = atts.getIndex(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, 
"nil")
+    infosetEvent.nilValue = if (nilIn >= 0) {
+      val nilValue = atts.getValue(nilIn)
+      One(nilValue)
+    } else {
+      Nope
+    }
+    // set localName and namespaceURI
+    val qNameArr = qName.split(":")
+    infosetEvent.localName =
+      if (localName.nonEmpty) {
+        One(localName)
+      } else if (qNameArr.length > 1) {
+        qNameArr.lift(1)
+      } else if (qNameArr.nonEmpty) {
+        qNameArr.lift(0)
+      } else {
+        Nope
+      }
+    infosetEvent.namespaceURI =
+      if (uri.nonEmpty) {
+        One(uri)
+      } else if (qNameArr.length > 1) { // has a prefix
+        // get the prefix off the qname
+        val qNamePrefix = qNameArr.lift(0).get
+        // look up prefix with and without xmlns prefix
+        val in = atts.getIndex(s"xmlns:$qNamePrefix")
+        val otherIn = if (in == -1) atts.getIndex("qNamePrefix") else in
+        if (in >= 0) {
+          val attrUri = atts.getValue(in)
+          One(attrUri)
+        } else if (otherIn >= 0) {
+          val attrUri = atts.getValue(otherIn)
+          One(attrUri)
+        } else {
+          One(prefixMapping.getURI(qNamePrefix))
+        }
+      } else {
+        try {
+          val in = atts.getIndex("xmlns")
+          if (in >= 0) {
+            val attrUri = atts.getValue(in)
+            One(attrUri)
+          } else {
+            One(prefixMapping.getURI(null))
+          }
+        } catch {
+          case _: NullPointerException => Nope
+        }
+      }
+    infosetEvent.eventType = One(StartElement)
+  }
+
+  override def endElement(uri: String, localName: String, qName: String): Unit 
= {
+    // if infosetEvent is a startElement, send that first
+    if (infosetEvent.eventType.contains(StartElement)) {
+      // any characterData that exists at this point is valid data as padding 
data has been
+      // taken care of in startElement
+      val maybeNewStr = One(characterData.toString())
+      infosetEvent.simpleText = maybeNewStr
+      characterData.setLength(0)
+      sendToInputter()
+    }
+    val qNameArr = qName.split(":")
+    infosetEvent.localName =
+      if (localName.nonEmpty) {
+        One(localName)
+      } else if (qNameArr.nonEmpty) {
+        One(qNameArr.last)
+      } else {
+        Nope
+      }
+    infosetEvent.namespaceURI =
+      if (uri.nonEmpty) {
+        One(uri)
+      } else if (qNameArr.length > 1) {
+        One(prefixMapping.getURI(qNameArr.head))
+      } else {
+        try {
+          One(prefixMapping.getURI(null))
+        } catch {
+          case _: NullPointerException => Nope
+        }
+      }
+    infosetEvent.eventType = One(EndElement)
+    sendToInputter()
+  }
+
+  override def characters(ch: Array[Char], start: Int, length: Int): Unit = {
+    characterData.appendAll(ch, start, length)
+  }
+
+  private def sendToInputter(): Unit = {
+    val infosetEventWithResponse = this.resume(inputter, Try(infosetEvent))
+    infosetEvent.clear
+    // if it is failure, we will not have an unparseResult, so we only set 
unparseResults for
+    // events wrapped in Success, which can include events wrapped in Success, 
that have

Review comment:
       "events wrapped in Success, which can include events wrapped in 
Success"...?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilInputContentHandler.scala
##########
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+/**
+ * DaffodilInputContentHandler produces InfosetEvent objects for the 
SAXInfosetInputter to
+ * consume and convert to a event that the Dataprocessor unparse can use. The 
infosetEvent object
+ * is built from information that is passed to the ContentHandler from an 
XMLReader parser. In
+ * order to receive the uri and prefix information from the XMLReader, the 
following features
+ * must be set to true on whatever XMLReader is used: 
http://xml.org/sax/features/namespaces and
+ * http://xml.org/sax/features/namespace-prefixes
+ *
+ * This class, together with the SAXInfosetInputter, uses coroutines to ensure 
that only one event,
+ * at a time, is passed between the two classes. The following is the general 
process:
+ *
+ * - an external call is made to parse an XML Documents
+ * - this class receives a StartDocument call, which is the first infosetEvent 
that is sent to
+ * the SAXInfosetInputter. That event is put on the inputter's queue, this 
thread is paused, and
+ * that inputter's thread is run
+ * - when the SAXInfosetInputter is done processing an event and is ready for 
a new event, it
+ * sends the completed event via the coroutine system, and loads it on the 
contentHandler's
+ * queue, which restarts this thread and pauses that one. In the expected 
case, the events will
+ * contain no new information, until the unparse is completed.
+ * -  this process continues until the EndDocument method is called. Once that 
infosetEvent is
+ * sent to the inputter, it signals the end of events coming from the 
contentHandler. This
+ * ends the unparseProcess and returns the event with the unparseResult and/or 
any error
+ * information
+ *
+ * @param dp dataprocessor object that will be used to call the parse
+ * @param output outputChannel of choice where the unparsed data is stored
+ */
+class DaffodilInputContentHandler(
+  dp: DFDL.DataProcessor,
+  output: DFDL.Output)
+  extends DFDL.DaffodilInputContentHandler {
+  private val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private val infosetEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+  private val characterData = new StringBuilder()
+  private var prefixMapping: NamespaceBinding = _
+
+  def getUnparseResult: DFDL.UnparseResult = unparseResult
+
+  def enableInputterUriAbsolutization(): Unit = 
inputter.enableUriAbsolutization = true
+
+  override def setDocumentLocator(locator: Locator): Unit = {
+    // do nothing
+  }
+
+  override def startDocument(): Unit = {
+    infosetEvent.eventType = One(StartDocument)
+    sendToInputter()
+  }
+
+  override def endDocument(): Unit = {
+    infosetEvent.eventType = One(EndDocument)
+    sendToInputter()
+  }
+
+  override def startPrefixMapping(prefix: String, uri: String): Unit = {
+    val pre = if (prefix == "") null else prefix
+    prefixMapping = NamespaceBinding(pre, uri, prefixMapping)
+  }
+
+  override def endPrefixMapping(prefix: String): Unit = {
+    // do nothing
+  }
+
+  override def startElement(uri: String, localName: String, qName: String, 
atts: Attributes): Unit = {
+    // we need to check if the characters data is all whitespace, if it is we 
drop the whitespace
+    // data, if it is not, it is an error as starting a new element with 
actual characterData means
+    // we haven't hit an endElement yet, which means we're in a complexElement 
and a complexElement
+    // cannot have character content
+    if (characterData.nonEmpty && characterData.toString().trim.nonEmpty) {
+      throw new IllegalContentWhereEventExpected("Non-whitespace characters in 
complex " +
+        "Element: " + characterData.toString()
+      )
+    } else {
+      // reset since it was whitespace only
+      characterData.setLength(0)
+    }
+
+    if (!infosetEvent.isEmpty && infosetEvent.localName.isDefined) {
+      // we started another element while we were in the process of building a 
startElement
+      // this means the first element was complex and we are ready for the 
inputter queue
+      sendToInputter()
+    }
+    // use Attributes to determine xsi:nil value
+    val nilIn = atts.getIndex(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, 
"nil")
+    infosetEvent.nilValue = if (nilIn >= 0) {
+      val nilValue = atts.getValue(nilIn)
+      One(nilValue)
+    } else {
+      Nope
+    }
+    // set localName and namespaceURI
+    val qNameArr = qName.split(":")
+    infosetEvent.localName =
+      if (localName.nonEmpty) {
+        One(localName)
+      } else if (qNameArr.length > 1) {
+        qNameArr.lift(1)
+      } else if (qNameArr.nonEmpty) {
+        qNameArr.lift(0)
+      } else {
+        Nope
+      }
+    infosetEvent.namespaceURI =
+      if (uri.nonEmpty) {
+        One(uri)
+      } else if (qNameArr.length > 1) { // has a prefix
+        // get the prefix off the qname
+        val qNamePrefix = qNameArr.lift(0).get
+        // look up prefix with and without xmlns prefix
+        val in = atts.getIndex(s"xmlns:$qNamePrefix")
+        val otherIn = if (in == -1) atts.getIndex("qNamePrefix") else in
+        if (in >= 0) {
+          val attrUri = atts.getValue(in)
+          One(attrUri)
+        } else if (otherIn >= 0) {
+          val attrUri = atts.getValue(otherIn)
+          One(attrUri)
+        } else {
+          One(prefixMapping.getURI(qNamePrefix))
+        }
+      } else {
+        try {
+          val in = atts.getIndex("xmlns")
+          if (in >= 0) {
+            val attrUri = atts.getValue(in)
+            One(attrUri)
+          } else {
+            One(prefixMapping.getURI(null))
+          }
+        } catch {
+          case _: NullPointerException => Nope
+        }
+      }
+    infosetEvent.eventType = One(StartElement)
+  }
+
+  override def endElement(uri: String, localName: String, qName: String): Unit 
= {
+    // if infosetEvent is a startElement, send that first
+    if (infosetEvent.eventType.contains(StartElement)) {
+      // any characterData that exists at this point is valid data as padding 
data has been
+      // taken care of in startElement
+      val maybeNewStr = One(characterData.toString())
+      infosetEvent.simpleText = maybeNewStr
+      characterData.setLength(0)
+      sendToInputter()
+    }
+    val qNameArr = qName.split(":")
+    infosetEvent.localName =
+      if (localName.nonEmpty) {
+        One(localName)
+      } else if (qNameArr.nonEmpty) {
+        One(qNameArr.last)
+      } else {
+        Nope
+      }
+    infosetEvent.namespaceURI =
+      if (uri.nonEmpty) {
+        One(uri)
+      } else if (qNameArr.length > 1) {
+        One(prefixMapping.getURI(qNameArr.head))
+      } else {
+        try {
+          One(prefixMapping.getURI(null))
+        } catch {
+          case _: NullPointerException => Nope
+        }
+      }
+    infosetEvent.eventType = One(EndElement)
+    sendToInputter()
+  }
+
+  override def characters(ch: Array[Char], start: Int, length: Int): Unit = {
+    characterData.appendAll(ch, start, length)
+  }
+
+  private def sendToInputter(): Unit = {
+    val infosetEventWithResponse = this.resume(inputter, Try(infosetEvent))
+    infosetEvent.clear
+    // if it is failure, we will not have an unparseResult, so we only set 
unparseResults for
+    // events wrapped in Success, which can include events wrapped in Success, 
that have
+    // expected errors.
+    if (infosetEventWithResponse.isSuccess && 
infosetEventWithResponse.get.unparseResult.isDefined) {
+      unparseResult = infosetEventWithResponse.get.unparseResult.get
+    }
+    // the exception from events wrapped in failures and events wrapped in 
Success with an unparse

Review comment:
       Can you please clarify what "wrap in Success" or "wrap in failure" means?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilInputContentHandler.scala
##########
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+/**
+ * DaffodilInputContentHandler produces InfosetEvent objects for the 
SAXInfosetInputter to
+ * consume and convert to a event that the Dataprocessor unparse can use. The 
infosetEvent object
+ * is built from information that is passed to the ContentHandler from an 
XMLReader parser. In
+ * order to receive the uri and prefix information from the XMLReader, the 
following features
+ * must be set to true on whatever XMLReader is used: 
http://xml.org/sax/features/namespaces and
+ * http://xml.org/sax/features/namespace-prefixes
+ *
+ * This class, together with the SAXInfosetInputter, uses coroutines to ensure 
that only one event,
+ * at a time, is passed between the two classes. The following is the general 
process:
+ *
+ * - an external call is made to parse an XML Documents
+ * - this class receives a StartDocument call, which is the first infosetEvent 
that is sent to
+ * the SAXInfosetInputter. That event is put on the inputter's queue, this 
thread is paused, and
+ * that inputter's thread is run
+ * - when the SAXInfosetInputter is done processing an event and is ready for 
a new event, it
+ * sends the completed event via the coroutine system, and loads it on the 
contentHandler's
+ * queue, which restarts this thread and pauses that one. In the expected 
case, the events will
+ * contain no new information, until the unparse is completed.
+ * -  this process continues until the EndDocument method is called. Once that 
infosetEvent is
+ * sent to the inputter, it signals the end of events coming from the 
contentHandler. This
+ * ends the unparseProcess and returns the event with the unparseResult and/or 
any error
+ * information
+ *
+ * @param dp dataprocessor object that will be used to call the parse
+ * @param output outputChannel of choice where the unparsed data is stored
+ */
+class DaffodilInputContentHandler(
+  dp: DFDL.DataProcessor,
+  output: DFDL.Output)
+  extends DFDL.DaffodilInputContentHandler {
+  private val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private val infosetEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent
+  private val characterData = new StringBuilder()
+  private var prefixMapping: NamespaceBinding = _
+
+  def getUnparseResult: DFDL.UnparseResult = unparseResult
+
+  def enableInputterUriAbsolutization(): Unit = 
inputter.enableUriAbsolutization = true
+
+  override def setDocumentLocator(locator: Locator): Unit = {
+    // do nothing
+  }
+
+  override def startDocument(): Unit = {
+    infosetEvent.eventType = One(StartDocument)
+    sendToInputter()
+  }
+
+  override def endDocument(): Unit = {
+    infosetEvent.eventType = One(EndDocument)
+    sendToInputter()
+  }
+
+  override def startPrefixMapping(prefix: String, uri: String): Unit = {
+    val pre = if (prefix == "") null else prefix
+    prefixMapping = NamespaceBinding(pre, uri, prefixMapping)
+  }
+
+  override def endPrefixMapping(prefix: String): Unit = {
+    // do nothing
+  }
+
+  override def startElement(uri: String, localName: String, qName: String, 
atts: Attributes): Unit = {
+    // we need to check if the characters data is all whitespace, if it is we 
drop the whitespace
+    // data, if it is not, it is an error as starting a new element with 
actual characterData means
+    // we haven't hit an endElement yet, which means we're in a complexElement 
and a complexElement
+    // cannot have character content
+    if (characterData.nonEmpty && characterData.toString().trim.nonEmpty) {
+      throw new IllegalContentWhereEventExpected("Non-whitespace characters in 
complex " +
+        "Element: " + characterData.toString()
+      )
+    } else {
+      // reset since it was whitespace only
+      characterData.setLength(0)
+    }
+
+    if (!infosetEvent.isEmpty && infosetEvent.localName.isDefined) {
+      // we started another element while we were in the process of building a 
startElement
+      // this means the first element was complex and we are ready for the 
inputter queue
+      sendToInputter()
+    }
+    // use Attributes to determine xsi:nil value
+    val nilIn = atts.getIndex(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, 
"nil")
+    infosetEvent.nilValue = if (nilIn >= 0) {
+      val nilValue = atts.getValue(nilIn)
+      One(nilValue)
+    } else {
+      Nope
+    }
+    // set localName and namespaceURI
+    val qNameArr = qName.split(":")
+    infosetEvent.localName =
+      if (localName.nonEmpty) {
+        One(localName)
+      } else if (qNameArr.length > 1) {
+        qNameArr.lift(1)
+      } else if (qNameArr.nonEmpty) {
+        qNameArr.lift(0)
+      } else {
+        Nope
+      }
+    infosetEvent.namespaceURI =
+      if (uri.nonEmpty) {
+        One(uri)
+      } else if (qNameArr.length > 1) { // has a prefix
+        // get the prefix off the qname
+        val qNamePrefix = qNameArr.lift(0).get
+        // look up prefix with and without xmlns prefix
+        val in = atts.getIndex(s"xmlns:$qNamePrefix")
+        val otherIn = if (in == -1) atts.getIndex("qNamePrefix") else in
+        if (in >= 0) {
+          val attrUri = atts.getValue(in)
+          One(attrUri)
+        } else if (otherIn >= 0) {
+          val attrUri = atts.getValue(otherIn)
+          One(attrUri)
+        } else {
+          One(prefixMapping.getURI(qNamePrefix))
+        }
+      } else {
+        try {
+          val in = atts.getIndex("xmlns")
+          if (in >= 0) {
+            val attrUri = atts.getValue(in)
+            One(attrUri)
+          } else {
+            One(prefixMapping.getURI(null))
+          }
+        } catch {
+          case _: NullPointerException => Nope
+        }
+      }
+    infosetEvent.eventType = One(StartElement)
+  }
+
+  override def endElement(uri: String, localName: String, qName: String): Unit 
= {
+    // if infosetEvent is a startElement, send that first
+    if (infosetEvent.eventType.contains(StartElement)) {
+      // any characterData that exists at this point is valid data as padding 
data has been
+      // taken care of in startElement
+      val maybeNewStr = One(characterData.toString())
+      infosetEvent.simpleText = maybeNewStr
+      characterData.setLength(0)
+      sendToInputter()
+    }
+    val qNameArr = qName.split(":")
+    infosetEvent.localName =
+      if (localName.nonEmpty) {
+        One(localName)
+      } else if (qNameArr.nonEmpty) {
+        One(qNameArr.last)
+      } else {
+        Nope
+      }
+    infosetEvent.namespaceURI =
+      if (uri.nonEmpty) {
+        One(uri)
+      } else if (qNameArr.length > 1) {
+        One(prefixMapping.getURI(qNameArr.head))
+      } else {
+        try {
+          One(prefixMapping.getURI(null))
+        } catch {
+          case _: NullPointerException => Nope

Review comment:
       If prefixMapping.getURI(null) can throw a NPE, then can the call to 
prefixMapping.getURI above also? So should this try/catch surround that also?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to