tuxji commented on a change in pull request #436:
URL: https://github.com/apache/incubator-daffodil/pull/436#discussion_r511509810



##########
File path: 
daffodil-core/src/test/scala/org/apache/daffodil/processor/TestSAXUnparseAPI.scala
##########
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processor
+
+import java.io.ByteArrayInputStream
+import java.io.ByteArrayOutputStream
+
+import scala.xml.Elem
+
+import javax.xml.parsers.SAXParserFactory
+import org.apache.daffodil.compiler.Compiler
+import org.apache.daffodil.processors.DataProcessor
+import org.apache.daffodil.util.SchemaUtils
+import org.apache.daffodil.xml.XMLUtils
+import org.junit.Assert.assertEquals
+import org.junit.Assert.assertTrue
+import org.junit.Assert.fail
+import org.junit.Test
+import org.xml.sax.InputSource
+import org.xml.sax.XMLReader
+
+object TestSAXUnparseAPI {
+
+  lazy val testSchema: Elem = SchemaUtils.dfdlTestSchema(
+      <xs:include 
schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>,
+      <dfdl:format ref="tns:GeneralFormat"/>,
+      <xs:element name="list" type="tns:example1"/>
+      <xs:complexType name="example1">
+        <xs:sequence>
+          <xs:element name="w" type="xs:int" dfdl:length="1" 
dfdl:lengthKind="explicit" maxOccurs="unbounded"/>
+        </xs:sequence>
+      </xs:complexType>
+  )
+  lazy val testInfoset: Elem =
+    <list xmlns="http://example.com";><w>9</w><w>5</w><w>3</w><w>0</w></list>
+  lazy val testInfosetString: String = testInfoset.toString()
+  lazy val testData = "9530"
+
+  lazy val dp: DataProcessor = testDataprocessor(testSchema)
+  lazy val xmlReader: XMLReader = 
SAXParserFactory.newInstance.newSAXParser.getXMLReader
+
+  def testDataprocessor(testSchema: scala.xml.Elem): DataProcessor = {
+    val schemaCompiler = Compiler()
+    val pf = schemaCompiler.compileNode(testSchema)
+    if (pf.isError) {
+      val msgs = pf.getDiagnostics.map { _.getMessage() }.mkString("\n")
+      fail("pf compile errors: " + msgs)
+    }
+    pf.sset.root.erd.preSerialization // force evaluation of all compile-time 
constructs
+    val dp = pf.onPath("/").asInstanceOf[DataProcessor]
+    if (dp.isError) {
+      val msgs = dp.getDiagnostics.map { _.getMessage() }.mkString("\n")
+      fail("dp compile errors: " + msgs)
+    }
+    dp
+  }
+}
+
+class TestSAXUnparseAPI {
+  import TestSAXUnparseAPI._
+
+  @Test def testUnparseContentHandler_unparse(): Unit = {
+    val bao = new ByteArrayOutputStream()
+    val wbc = java.nio.channels.Channels.newChannel(bao)
+    val unparseContentHandler = dp.newContentHandlerInstance(wbc)
+    xmlReader.setContentHandler(unparseContentHandler)
+    xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
+    xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+    val bai = new ByteArrayInputStream(testInfosetString.getBytes)
+    xmlReader.parse(new InputSource(bai))
+    val ur = unparseContentHandler.getUnparseResult
+    assertTrue(!ur.isError)
+    assertEquals(testData, bao.toString)
+  }
+
+  @Test def testUnparseContentHandler_unparse_namespace_feature(): Unit = {
+    val bao = new ByteArrayOutputStream()
+    val wbc = java.nio.channels.Channels.newChannel(bao)
+    val unparseContentHandler = dp.newContentHandlerInstance(wbc)
+    xmlReader.setContentHandler(unparseContentHandler)
+    xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
+    xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, false)
+    val bai = new ByteArrayInputStream(testInfosetString.getBytes)
+    xmlReader.parse(new InputSource(bai))
+    val ur = unparseContentHandler.getUnparseResult
+    assertTrue(!ur.isError)
+    assertEquals(testData, bao.toString)
+  }
+
+  @Test def testUnparseContentHandler_unparse_namespace_prefix_feature(): Unit 
= {
+    val testSchema1 = SchemaUtils.dfdlTestSchema(
+      <xs:include 
schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>,
+      <dfdl:format ref="tns:GeneralFormat"/>,
+        <h:hello xmlns:h="http://www.greeting.com/ns/"; id="a1" 
h:person="David"/>
+    )

Review comment:
       Looks to me like testSchema1 is defined but not used afterwards.

##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/util/Coroutines.scala
##########
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ package org.apache.daffodil.util
+
+ import java.util.concurrent.ArrayBlockingQueue
+
+ import scala.util.Try
+ import scala.util.Success
+ import scala.util.Failure
+
+ import org.apache.daffodil.exceptions.Assert
+ import org.apache.daffodil.exceptions.UnsuppressableException
+
+ /**
+  * General purpose Co-routines.
+  *
+  * Some design concerns: if these are used along with lazy vals and other 
things
+  * that make use of synchronized methods, there could be interactions.
+  *
+  * Definition of Coroutine - separate stacks, but NO CONCURRENCY. Only one
+  * of a set of coroutines is running at any given time.
+  */
+ trait Coroutine[T] {
+
+   private val queueCapacity: Int = 1
+   private val inboundQueue = new ArrayBlockingQueue[Try[T]](queueCapacity)
+
+   private val self = this
+
+   /**
+    * Override this in the main thread to be
+    *
+    * `override final def isMain = true`
+    *
+    * This suppresses creation of a thread for when the main
+    * thread is itself one of the co-routines.
+    */
+   protected def isMain: Boolean = false
+
+   private var thread_ : Option[Thread] = None
+
+   private final def init: Unit = {
+     if (!isMain && thread_.isEmpty) {
+       val thr = new Thread {
+         override def run() = self.run()
+       }
+       thread_ = Some(thr)
+       thr.start
+     }
+   }
+
+   /**
+    * Call when a co-routine resumes another (to provide a result of some sort)
+    * and then terminates. The coroutine calling this must return from the 
run()
+    * method immediately after calling this.
+    */
+   final def resumeFinal(coroutine: Coroutine[T], in: Try[T]): Unit = {
+     coroutine.init
+     coroutine.inboundQueue.put(in) // allows other to run  final
+   }
+
+   /**
+    * Call when one co-routine wants to resume another, tranmitting a
+    * argument value to it.
+    *
+    * The current co-routine will be suspended until it is resumed later.
+    */
+   final def resume(coroutine: Coroutine[T], in: Try[T]): Try[T] = {
+     resumeFinal(coroutine, in)
+     val res = waitForResume // blocks until it is resumed
+     res
+   }
+
+   final def waitForResume: Try[T] = {
+     inboundQueue.take
+   }
+
+   protected def run(): Unit
+ }
+
+ /**
+  * Convert something that has callbacks (e.g., SAX-like parser that calls 
back on events)
+  * into a pull-style API aka Iterator.
+  *
+  * Exceptions are reported on the thread doing the pulling, aka the consumer.
+  *
+  * Rules:
+  * (1) you have no access to the thing that generates call-backs other than
+  * you can start it. It can be an opaque library you cannot modify.
+  * (2) the generator code does not have to be thread safe. Hence, only one 
thread can
+  * be executing at a time here. There MUST BE NO CONCURRENCY. Two threads are 
necessary here,
+  * but only one will be executing at a time.
+  * (3) Finite storage - no building up of an arbitrary list/stream.
+  *
+  * Concepts adapted from
+  * https://gist.github.com/dportabella/5766099
+  * and
+  * 
https://scalaenthusiast.wordpress.com/2013/06/12/transform-a-callback-function-to-an-iteratorlist-in-scala/
+  */
+
+ final class InvertControl[S](body: => Unit) extends Iterator[S] with 
Coroutine[S] {
+
+   private object EndMarker extends Throwable
+   private val EndOfData = Failure(EndMarker)
+
+   /**
+    * The producer will run the body function, and from within it,
+    * calls to setNext() will
+    * produce the values for the consumer. The consumer (main thread)
+    * just uses ordinary next/hasNext calls to get the values.
+    *
+    * After the last value is produced, the consumer is resumed with EndOfData
+    * and the producer terminates.
+    */
+   class Producer(val consumer: Coroutine[S]) extends Coroutine[S] {
+     override final def run(): Unit = {
+       try {
+         waitForResume

Review comment:
       Should this Couroutines.scala file follow the Scala style convention 
that functions calls with side effects should have parentheses?  I see a number 
of calls without any parenthesis to functions with side effects (`init`, 
`start`, `waitForResume`) scattered throughout this entire file.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilUnparseContentHandler.scala
##########
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.MStackOf
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.Misc
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+/**
+ * DaffodilUnparseContentHandler produces InfosetEvent objects for the 
SAXInfosetInputter to

Review comment:
       SaxInfosetEvent, not InfosetEvent

##########
File path: 
daffodil-japi/src/main/java/org/apache/daffodil/japi/package-info.java
##########
@@ -38,44 +38,48 @@
  * contain information about the parse/unparse, such as whether or not the
  * processing succeeded with any diagnostic information.
  *
- * The {@link org.apache.daffodil.japi.DataProcessor} also provides a function 
to create a
- * {@link org.apache.daffodil.japi.DaffodilXMLReader} that can be used to 
perform parsing via the
- * SAX API.
+ * The {@link org.apache.daffodil.japi.DataProcessor} also provides two 
functions that can be used to
+ * perform parsing/unparsing via the SAX API. The first creates a
+ * {@link org.apache.daffodil.japi.DaffodilParseXMLReader} which is used for 
parsing, and the
+ * second creates a {@link 
org.apache.daffodil.japi.DaffodilUnparseContentHandler} which is used for
+ * unparsing.
  *
  * <pre>
  * {@code
- * DaffodilXMLReader xmlRdr = dp.newXMLReaderInstance();
+ * DaffodilParseXMLReader xmlReader = dp.newXMLReaderInstance();
+ * DaffodilUnparseContentHandler unparseContentHandler = 
dp.newContentHandlerInstance(output);
  * }</pre>
  *
- * The {@link org.apache.daffodil.japi.DaffodilXMLReader} has several methods 
that allow one to
+ * The {@link org.apache.daffodil.japi.DaffodilParseXMLReader} has several 
methods that allow one to
  * set properties and handlers (such as ContentHandlers or ErrorHandlers) for 
the reader. One can
  * use any contentHandler/errorHandler as long as they extend the
  * {@link org.xml.sax.ContentHandler} and {@link org.xml.sax.ErrorHandler} 
interfaces
- * respectively. One can also set properties for the {@link 
org.apache.daffodil.japi.DaffodilXMLReader}
- * using {@link 
org.apache.daffodil.japi.DaffodilXMLReader#setProperty(java.lang.String, 
java.lang.Object)}.
+ * respectively. One can also set properties for the {@link 
org.apache.daffodil.japi.DaffodilParseXMLReader}
+ * using {@link 
org.apache.daffodil.japi.DaffodilParseXMLReader#setProperty(java.lang.String, 
java.lang.Object)}.
  *
  * The following properties can be set as follows:
+ *
+ * <p><i>The variables below resolve to 
"urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:sax:" and
+ * "BlobDirectory", "BlobPrefix" and "BlobSuffix" respectively.</i></p>
+ *
  * <pre>
  * {@code
- * xmlRdr.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBDIRECTORY(), "/tmp/");
- * xmlRdr.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBPREFIX(), "daffodil-sax-");
- * xmlRdr.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBSUFFIX(), ".bin");
+ * xmlReader.setProperty(xmlReader.DAFFODIL_SAX_URN_BLOBDIRECTORY(),
+ *  Paths.get(System.getProperty("java.io.tmpdir"))); // value type: 
java.nio.file.Paths
+ * xmlReader.setProperty(xmlReader.DAFFODIL_SAX_URN_BLOBPREFIX(), 
"daffodil-sax-"); // value type String
+ * xmlReader.setProperty(xmlReader.DAFFODIL_SAX_URN_BLOBSUFFIX(), ".bin"); // 
value type String

Review comment:
       Constants shouldn't need parentheses; these `()` can be removed, 
correct?  If you search through the diff, you will find 6 more places where 
`()` can be removed from these constants too.

##########
File path: 
daffodil-japi/src/main/java/org/apache/daffodil/japi/package-info.java
##########
@@ -206,6 +214,8 @@
  *
  * <h4>Unparse</h4>
  *
+ * <h5>Dataprocessor Unparse</h5>
+ *
  * The same {@link org.apache.daffodil.japi.DataProcessor} used for parse can 
be used to unparse an infoset
  * via the {@link 
org.apache.daffodil.japi.DataProcessor#unparse(org.apache.daffodil.japi.infoset.InfosetInputter,
 java.nio.channels.WritableByteChannel)} method. An {@link 
org.apache.daffodil.japi.infoset.InfosetInputter}

Review comment:
       Above line is very long, please break it in two.  The diff cuts it off 
and I have to drag / scroll the diff to see the rest.

##########
File path: 
daffodil-japi/src/main/java/org/apache/daffodil/japi/package-info.java
##########
@@ -217,6 +227,52 @@
  * UnparseResult ur = dp.unparse(jdomInputter, wbc)
  * }</pre>
  *
+ * <h5>SAX Unparse</h5>
+ *
+ * In order to kick off an unparse via the SAX API, one must register the
+ * {@link org.apache.daffodil.japi.DaffodilUnparseContentHandler} as the 
contentHandler for an XMLReader
+ * implementation. The call to the
+ * {@link 
org.apache.daffodil.japi.DataProcessor#newContentHandlerInstance(java.nio.channels.WritableByteChannel)}
 method must be provided with the {@link java.nio.channels.WritableByteChannel},
+ * where the unparsed data ought to be written to. Any XMLReader 
implementation is permissible, as
+ * long as they allow support for the namespace and namespace-prefixes 
features of XMLReader. The
+ * namespaces feature MUST be set to true, and namespace-prefixes is only 
optional if the former is true.

Review comment:
       What bad thing happens if the namespaces feature isn't set to true, or 
the XMLReader doesn't support setting the namespaces feature?  Also, who sets 
the namespaces feature, the caller or the Daffodil code?  I see a lot of calls 
in the Daffodil code that already set the namespaces feature to true, but the 
above comment makes it sound like the user has to do it too if passing in their 
own XMLReader implementation.  Please clarify.

##########
File path: 
daffodil-japi/src/main/java/org/apache/daffodil/japi/package-info.java
##########
@@ -38,44 +38,48 @@
  * contain information about the parse/unparse, such as whether or not the
  * processing succeeded with any diagnostic information.
  *
- * The {@link org.apache.daffodil.japi.DataProcessor} also provides a function 
to create a
- * {@link org.apache.daffodil.japi.DaffodilXMLReader} that can be used to 
perform parsing via the
- * SAX API.
+ * The {@link org.apache.daffodil.japi.DataProcessor} also provides two 
functions that can be used to
+ * perform parsing/unparsing via the SAX API. The first creates a
+ * {@link org.apache.daffodil.japi.DaffodilParseXMLReader} which is used for 
parsing, and the
+ * second creates a {@link 
org.apache.daffodil.japi.DaffodilUnparseContentHandler} which is used for
+ * unparsing.
  *
  * <pre>
  * {@code
- * DaffodilXMLReader xmlRdr = dp.newXMLReaderInstance();
+ * DaffodilParseXMLReader xmlReader = dp.newXMLReaderInstance();
+ * DaffodilUnparseContentHandler unparseContentHandler = 
dp.newContentHandlerInstance(output);
  * }</pre>
  *
- * The {@link org.apache.daffodil.japi.DaffodilXMLReader} has several methods 
that allow one to
+ * The {@link org.apache.daffodil.japi.DaffodilParseXMLReader} has several 
methods that allow one to
  * set properties and handlers (such as ContentHandlers or ErrorHandlers) for 
the reader. One can
  * use any contentHandler/errorHandler as long as they extend the
  * {@link org.xml.sax.ContentHandler} and {@link org.xml.sax.ErrorHandler} 
interfaces
- * respectively. One can also set properties for the {@link 
org.apache.daffodil.japi.DaffodilXMLReader}
- * using {@link 
org.apache.daffodil.japi.DaffodilXMLReader#setProperty(java.lang.String, 
java.lang.Object)}.
+ * respectively. One can also set properties for the {@link 
org.apache.daffodil.japi.DaffodilParseXMLReader}
+ * using {@link 
org.apache.daffodil.japi.DaffodilParseXMLReader#setProperty(java.lang.String, 
java.lang.Object)}.
  *
  * The following properties can be set as follows:
+ *
+ * <p><i>The variables below resolve to 
"urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:sax:" and
+ * "BlobDirectory", "BlobPrefix" and "BlobSuffix" respectively.</i></p>

Review comment:
       I think the original comment (tweaking it slightly) was better, or maybe 
users don't even need to know?
   
       <p><i>The constants below have literal values starting with 
"urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:sax:"
       and ending with "BlobDirectory", "BlobPrefix" and "BlobSuffix" 
respectively.
   
   If you do change the japi comment here, the sapi comment will need the same 
change too.

##########
File path: daffodil-japi/src/main/scala/org/apache/daffodil/japi/Daffodil.scala
##########
@@ -953,3 +989,72 @@ class DaffodilXMLReader private[japi] (xmlrdr: 
SDaffodilXMLReader) extends org.x
    */
   def parse(arr: Array[Byte]): Unit = xmlrdr.parse(arr)
 }
+
+/**
+ * Accepts SAX callback events from any SAX XMLReader for unparsing
+ */
+class DaffodilUnparseContentHandler private[japi] (sContentHandler: 
SDaffodilUnparseContentHandler)
+  extends ContentHandlerProxy {
+
+  override protected val contentHandler: org.xml.sax.ContentHandler = 
sContentHandler
+
+  /**
+   * Returns the result of the SAX unparse containing diagnostic information. 
In the case of an
+   * DaffodilUnhandledSAXException, this will return null.
+   */
+  def getUnparseResult: UnparseResult =
+    new 
UnparseResult(sContentHandler.getUnparseResult.asInstanceOf[SUnparseResult])

Review comment:
       The comment says that getUnparseResult might return null, but this 
function always returns a new UnparseResult instance.  Does the comment need to 
be changed?  If so, please search through the diff for the words "return null" 
since there are 3 or 4 other similar comments that may need to be changed too.

##########
File path: 
daffodil-japi/src/main/java/org/apache/daffodil/japi/package-info.java
##########
@@ -217,6 +227,52 @@
  * UnparseResult ur = dp.unparse(jdomInputter, wbc)
  * }</pre>
  *
+ * <h5>SAX Unparse</h5>
+ *
+ * In order to kick off an unparse via the SAX API, one must register the
+ * {@link org.apache.daffodil.japi.DaffodilUnparseContentHandler} as the 
contentHandler for an XMLReader
+ * implementation. The call to the
+ * {@link 
org.apache.daffodil.japi.DataProcessor#newContentHandlerInstance(java.nio.channels.WritableByteChannel)}
 method must be provided with the {@link java.nio.channels.WritableByteChannel},

Review comment:
       Another long line (the diff cuts it off).

##########
File path: 
daffodil-japi/src/main/java/org/apache/daffodil/japi/package-info.java
##########
@@ -145,41 +153,41 @@
  * }</pre>
  *
  * <h5>SAX Parse</h5>
- * The {@link org.apache.daffodil.japi.DaffodilXMLReader#parse(
+ * The {@link org.apache.daffodil.japi.DaffodilParseXMLReader#parse(
  * org.apache.daffodil.japi.io.InputSourceDataInputStream)} method accepts 
input data to parse in
  * the form of a {@link 
org.apache.daffodil.japi.io.InputSourceDataInputStream}. The output
  * representation of the infoset, as well as how parse errors are handled, are 
dependent on the
- * content handler and the error handler provided to the {@link 
org.apache.daffodil.japi.DaffodilXMLReader}. For example the
+ * content handler and the error handler provided to the {@link 
org.apache.daffodil.japi.DaffodilParseXMLReader}. For example the
  * {@link org.jdom2.input.sax.SAXHandler} provides a JDOM representation, 
whereas other Content
- * Handlers may output directly to an {@link java.io.OutputStream} or {@link 
java.io.Writer}.
+ * Handlers may output directly to a {@link java.io.OutputStream} or {@link 
java.io.Writer}.
  *
  * <pre>
  * {@code
  * SAXHandler contentHandler = new SAXHandler();
- * xmlRdr.setContentHandler(contentHandler);
+ * xmlReader.setContentHandler(contentHandler);
  * InputSourceDataInputStream is = new InputSourceDataInputStream(data);
  * xmlReader.parse(is);
- * ParseResult pr = (ParseResult) 
xmlRdr.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT());
+ * ParseResult pr = (ParseResult) 
xmlReader.getProperty(xmlReader.DAFFODIL_SAX_URN_PARSERESULT());
  * Document doc = saxHandler.getDocument();
  * }</pre>
  *
- * The The {@link org.apache.daffodil.japi.DaffodilXMLReader#parse(
+ * The The {@link org.apache.daffodil.japi.DaffodilParseXMLReader#parse(
  * org.apache.daffodil.japi.io.InputSourceDataInputStream)} method is not 
thread-safe and may
  * only be called again/reused once a parse operation is completed. This can 
be done multiple
- * times without the need to create new DaffodilXMLReaders, ContentHandlers or 
ErrorHandlers. It
+ * times without the need to create new DaffodilParseXMLReaders, 
ContentHandlers or ErrorHandlers. It
  * might be necessary to reset whatever ContentHandler is used (or allocate a 
new one). A
- * thread-safe implementation would require unique instances of the 
DaffodilXMLReader and its
+ * thread-safe implementation would require unique instances of the 
DaffodilParseXMLReader and its
  * components. For example:
  *
  * <pre>
  * {@code
  * SAXHandler contentHandler = new SAXHandler();
- * xmlRdr.setContentHandler(contentHandler);
+ * xmlReader.setContentHandler(contentHandler);
  * for (File f : inputFiles) {
  *   contentHandler.reset();
  *   InputSourceDataInputStream is = new InputSourceDataInputStream(new 
FileInputStream(f));
  *   xmlReader.parse(is);
- *   ParseResult pr = (ParseResult) 
xmlRdr.getProperty("urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:sax:ParseResult");
+ *   ParseResult pr = (ParseResult) 
xmlReader.getProperty("urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:sax:ParseResult");

Review comment:
       Shouldn't this literal be replaced with 
`xmlReader.DAFFODIL_SAX_URN_PARSERESULT`?

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala
##########
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.infoset
+
+import java.net.URI
+import java.net.URISyntaxException
+
+import scala.util.Try
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.dpath.NodeInfo
+import org.apache.daffodil.exceptions.Assert
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.MaybeBoolean
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.xml.XMLUtils
+
+/**
+ * The SAXInfosetInputter consumes infosetEvent objects from the 
DaffodilUnparseContentHandler class
+ * and converts it to an event that the Dataprocessor unparse can use. This 
class contains two
+ * infosetEvent objects that contain the current event the unparse method is 
processing, and the
+ * next event to be processed after.

Review comment:
       Might read better as:
   
        * The SAXInfosetInputter consumes SaxInfosetEvent objects from the 
DaffodilUnparseContentHandler
        * class and converts them to events that the DataProcessor unparse can 
use. This class contains two
        * SaxInfosetEvent objects, the current event the unparse method is 
processing and the next event
        * to be processed later.
   
   I also notice we're not using a consistent case for SAX itself.  It looks 
like we're using SAX in most places, but also Sax in some places.  It would be 
nice if Daffodil would follow the Scala naming conventions 
(<https://docs.scala-lang.org/style/naming-conventions.html>) in which case we 
would use Sax instead of SAX, but I see Daffodil preferring to use entirely 
upper-case words for some acronyms (API, CLI, DFDL, JDOM, OOLAG, PUA, SDE, SDW, 
TDML, URI, URL, XML, etc.) so SAX fits that pattern.

##########
File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilUnparseContentHandler.scala
##########
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processors
+
+import scala.util.Try
+import scala.xml.NamespaceBinding
+
+import javax.xml.XMLConstants
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException
+import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException
+import org.apache.daffodil.infoset.IllegalContentWhereEventExpected
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument
+import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement
+import org.apache.daffodil.infoset.SAXInfosetInputter
+import org.apache.daffodil.util.MStackOf
+import org.apache.daffodil.util.Maybe.Nope
+import org.apache.daffodil.util.Maybe.One
+import org.apache.daffodil.util.Misc
+import org.xml.sax.Attributes
+import org.xml.sax.Locator
+
+/**
+ * DaffodilUnparseContentHandler produces InfosetEvent objects for the 
SAXInfosetInputter to
+ * consume and convert to a event that the Dataprocessor unparse can use. The 
infosetEvent object
+ * is built from information that is passed to the ContentHandler from an 
XMLReader parser. In
+ * order to receive the uri and prefix information from the XMLReader, the 
following features
+ * must be set to true on whatever XMLReader is used: 
http://xml.org/sax/features/namespaces and
+ * http://xml.org/sax/features/namespace-prefixes
+ *
+ * This class, together with the SAXInfosetInputter, uses coroutines to ensure 
that only one event,
+ * at a time, is passed between the two classes. The following is the general 
process:
+ *
+ * - an external call is made to parse an XML Documents
+ * - this class receives a StartDocument call, which is the first infosetEvent 
that is sent to
+ * the SAXInfosetInputter. That event is put on the inputter's queue, this 
thread is paused, and
+ * that inputter's thread is run
+ * - when the SAXInfosetInputter is done processing an event and is ready for 
a new event, it
+ * sends the completed event via the coroutine system, and loads it on the 
contentHandler's
+ * queue, which restarts this thread and pauses that one. In the expected 
case, the events will
+ * contain no new information, until the unparse is completed.
+ * -  this process continues until the EndDocument method is called. Once that 
infosetEvent is
+ * sent to the inputter, it signals the end of events coming from the 
contentHandler. This
+ * ends the unparseProcess and returns the event with the unparseResult and/or 
any error
+ * information
+ *
+ * @param dp dataprocessor object that will be used to call the parse
+ * @param output outputChannel of choice where the unparsed data is stored
+ */
+class DaffodilUnparseContentHandler(
+  dp: DFDL.DataProcessor,
+  output: DFDL.Output)
+  extends DFDL.DaffodilUnparseContentHandler {
+  private lazy val inputter = new SAXInfosetInputter(this, dp, output)
+  private var unparseResult: DFDL.UnparseResult = _
+  private lazy val infosetEvent: DFDL.SaxInfosetEvent = new 
DFDL.SaxInfosetEvent
+  private lazy val characterData = new StringBuilder
+  private var prefixMapping: NamespaceBinding = _
+  private lazy val prefixMappingTrackingStack = new MStackOf[NamespaceBinding]
+  private var contentHandlerPrefixMappingUsed = false
+
+  /**
+   * returns null in the case of an DaffodilUnhandledSAXException
+   */
+  def getUnparseResult: DFDL.UnparseResult = unparseResult
+
+  def enableInputterResolutionOfRelativeInfosetBlobURIs(): Unit = 
inputter.enableResolutionOfRelativeInfosetBlobURIs()
+
+  override def setDocumentLocator(locator: Locator): Unit = {
+    // do nothing
+  }
+
+  override def startDocument(): Unit = {
+    infosetEvent.eventType = One(StartDocument)
+    sendToInputter()
+  }
+
+  override def endDocument(): Unit = {
+    infosetEvent.eventType = One(EndDocument)
+    sendToInputter()
+  }
+
+  override def startPrefixMapping(prefix: String, uri: String): Unit = {
+    contentHandlerPrefixMappingUsed = true
+    val pre = if (prefix == "") null else prefix
+    prefixMapping = NamespaceBinding(pre, uri, prefixMapping)
+  }
+
+  /**
+   * XMLReader does not guarantee the order of the prefixes called for this 
function, but it does
+   * guarantee that this method is called after its corresponding endElement, 
which means we can
+   * can just take off the top mappings, because the element that might have 
cared about the order
+   * is already done using the prefixMappings
+   */
+  override def endPrefixMapping(prefix: String): Unit = {
+    prefixMapping = if (prefixMapping == null) prefixMapping else 
prefixMapping.parent
+  }
+
+  /**
+   * Uses Attributes, which is passed in to the startElement callback, to 
extract prefix mappings and
+   * populate the global prefixMapping
+   */
+  def mapPrefixMappingFromAttributesImpl(atts:Attributes): Unit = {
+    var i = 0
+    while (i < atts.getLength) {
+      val qName = atts.getQName(i)
+      if (qName.startsWith("xmlns")) {
+        val uri =  atts.getValue(i)
+        val prefix = if(qName.contains(":")) {
+          val pref = qName.split(":").last
+          pref
+        } else {
+         null // NamespaceBinding does not allow blanks so return null instead
+        }
+        prefixMapping = NamespaceBinding(prefix, uri, prefixMapping)
+      }
+      i += 1
+    }
+  }
+
+  override def startElement(uri: String, localName: String, qName: String, 
atts: Attributes): Unit = {
+    // we need to check if the characters data is all whitespace, if it is we 
drop the whitespace
+    // data, if it is not, it is an error as starting a new element with 
actual characterData means
+    // we haven't hit an endElement yet, which means we're in a complexElement 
and a complexElement
+    // cannot have character content
+    if (characterData.nonEmpty && !Misc.isAllWhitespace(characterData)) {
+      throw new IllegalContentWhereEventExpected("Non-whitespace characters in 
complex " +
+        "Element: " + characterData.toString
+      )
+    } else {
+      // reset since it was whitespace only
+      characterData.setLength(0)
+    }
+
+    if (!contentHandlerPrefixMappingUsed) {
+      // always pushes but doesn't always add a mapping since atts can be empty
+      prefixMappingTrackingStack.push(prefixMapping)
+      mapPrefixMappingFromAttributesImpl(atts)
+    }
+
+    if (!infosetEvent.isEmpty && infosetEvent.localName.isDefined) {
+      // we started another element while we were in the process of building a 
startElement
+      // this means the first element was complex and we are ready for the 
inputter queue
+      sendToInputter()
+    }
+    // use Attributes to determine xsi:nil value
+    val nilIn = atts.getIndex(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, 
"nil")
+    infosetEvent.nilValue = if (nilIn >= 0) {
+      val nilValue = atts.getValue(nilIn)
+      One(nilValue)
+    } else {
+      Nope
+    }
+    // set localName and namespaceURI
+    lazy val qNameArr = qName.split(":")
+    infosetEvent.localName =
+      if (localName.nonEmpty) {
+        One(localName)
+      } else if (qNameArr.length > 1) {
+        One(qNameArr.last)
+      } else if (qNameArr.nonEmpty) {
+        One(qNameArr.head)
+      } else {

Review comment:
       What is the difference between the `qNameArr.length > 1` and 
`qNameArr.nonEmpty` cases which requires returning `One(qNameArr.last)` on one 
hand and `One(qNameArr.head)` on the other hand?  Could you merge both cases 
into `qNameArr.nonEmpty` and return `One(qNameArr.last)` since `last == head` 
when `qNameArr.length == 1`?  I suspect there must be a reason since I see the 
simpler logic used in endElement at lines 232-239 below.

##########
File path: daffodil-sapi/src/main/scala/org/apache/daffodil/sapi/package.scala
##########
@@ -134,36 +139,37 @@ package org.apache.daffodil
  *
  * <h5>SAX Parse</h5>
  *
- * The 
[[DaffodilXMLReader.parse(isdis:org\.apache\.daffodil\.sapi\.io\.InputSourceDataInputStream*
 DaffodilXMLReader.parse]] method accepts input data to parse in the form of a
+ * The 
[[DaffodilParseXMLReader.parse(isdis:org\.apache\.daffodil\.sapi\.io\.InputSourceDataInputStream*
 DaffodilParseXMLReader.parse]] method accepts input data to parse in the form 
of a

Review comment:
       Very long line here.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to