stevedlawrence commented on a change in pull request #436: URL: https://github.com/apache/incubator-daffodil/pull/436#discussion_r511918773
########## File path: daffodil-core/src/test/scala/org/apache/daffodil/processor/TestSAXUnparseAPI.scala ########## @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.processor + +import java.io.ByteArrayInputStream +import java.io.ByteArrayOutputStream + +import scala.xml.Elem + +import javax.xml.parsers.SAXParserFactory +import org.apache.daffodil.compiler.Compiler +import org.apache.daffodil.processors.DataProcessor +import org.apache.daffodil.util.SchemaUtils +import org.apache.daffodil.xml.XMLUtils +import org.junit.Assert.assertEquals +import org.junit.Assert.assertTrue +import org.junit.Assert.fail +import org.junit.Test +import org.xml.sax.InputSource +import org.xml.sax.XMLReader + +object TestSAXUnparseAPI { + + lazy val testSchema: Elem = SchemaUtils.dfdlTestSchema( + <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>, + <dfdl:format ref="tns:GeneralFormat"/>, + <xs:element name="list" type="tns:example1"/> + <xs:complexType name="example1"> + <xs:sequence> + <xs:element name="w" type="xs:int" dfdl:length="1" dfdl:lengthKind="explicit" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + ) + lazy val testInfoset: Elem = + <list xmlns="http://example.com"><w>9</w><w>5</w><w>3</w><w>0</w></list> + lazy val testInfosetString: String = testInfoset.toString() + lazy val testData = "9530" + + lazy val dp: DataProcessor = testDataprocessor(testSchema) + lazy val xmlReader: XMLReader = SAXParserFactory.newInstance.newSAXParser.getXMLReader + + def testDataprocessor(testSchema: scala.xml.Elem): DataProcessor = { + val schemaCompiler = Compiler() + val pf = schemaCompiler.compileNode(testSchema) + if (pf.isError) { + val msgs = pf.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("pf compile errors: " + msgs) + } + pf.sset.root.erd.preSerialization // force evaluation of all compile-time constructs + val dp = pf.onPath("/").asInstanceOf[DataProcessor] + if (dp.isError) { + val msgs = dp.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("dp compile errors: " + msgs) + } + dp + } +} + +class TestSAXUnparseAPI { + import TestSAXUnparseAPI._ + + @Test def testUnparseContentHandler_unparse(): Unit = { + val bao = new ByteArrayOutputStream() + val wbc = java.nio.channels.Channels.newChannel(bao) + val unparseContentHandler = dp.newContentHandlerInstance(wbc) + xmlReader.setContentHandler(unparseContentHandler) + xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true) + xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true) Review comment: All these tests share an XMLReader. Are XMLReader's thread safe? I was under the impression they wern't. If these tests get executed in parallel I suspect things could break. Should we create a unique XMLReader per test? ########## File path: daffodil-core/src/test/scala/org/apache/daffodil/processor/TestSAXParseAPI.scala ########## @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.processor + +import java.io.ByteArrayInputStream +import java.io.ByteArrayOutputStream +import java.io.IOException + +import scala.xml.Elem +import scala.xml.SAXParseException + +import org.apache.daffodil.Implicits.intercept +import org.apache.daffodil.compiler.Compiler +import org.apache.daffodil.infoset.DaffodilOutputContentHandler +import org.apache.daffodil.processors.DataProcessor +import org.apache.daffodil.processors.ParseResult +import org.apache.daffodil.util.SchemaUtils +import org.apache.daffodil.xml.XMLUtils +import org.jdom2.input.sax.BuilderErrorHandler +import org.jdom2.input.sax.SAXHandler +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertNotEquals +import org.junit.Assert.assertNull +import org.junit.Assert.assertTrue +import org.junit.Assert.fail +import org.junit.Test +import org.xml.sax.InputSource +import org.xml.sax.SAXNotRecognizedException +import org.xml.sax.SAXNotSupportedException + + +object TestSAXParseAPI { + + val testSchema: Elem = SchemaUtils.dfdlTestSchema( + <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>, + <dfdl:format ref="tns:GeneralFormat"/>, + <xs:element name="list" type="tns:example1"/> + <xs:complexType name="example1"> + <xs:sequence> + <xs:element name="w" type="xs:int" dfdl:length="1" dfdl:lengthKind="explicit" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + ) + val testInfoset: Elem = <list xmlns="http://example.com"><w>9</w><w>1</w><w>0</w></list> + val testInfosetString: String = testInfoset.toString() + val testData = "910" + + lazy val dp: DataProcessor = testDataprocessor(testSchema) + + def testDataprocessor(testSchema: scala.xml.Elem): DataProcessor = { + val schemaCompiler = Compiler() + val pf = schemaCompiler.compileNode(testSchema) + if (pf.isError) { + val msgs = pf.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("pf compile errors: " + msgs) + } + pf.sset.root.erd.preSerialization // force evaluation of all compile-time constructs + val dp = pf.onPath("/").asInstanceOf[DataProcessor] + if (dp.isError) { + val msgs = dp.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("dp compile errors: " + msgs) + } + dp + } +} + +class TestSAXParseAPI { + import TestSAXParseAPI._ + + @Test def testDaffodilParseXMLReader_setFeatureUnsupported(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val snr = intercept[SAXNotRecognizedException] { + xmlReader.setFeature("http://xml.org/sax/features/validation", true) + } + assertTrue(snr.getMessage.contains("Feature unsupported")) + assertTrue(snr.getMessage.contains("Supported features are:")) + } + + @Test def testDaffodilParseXMLReader_get_setFeature(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val feature = "http://xml.org/sax/features/namespace-prefixes" + val origValue = xmlReader.getFeature(feature) + assertFalse(origValue) + xmlReader.setFeature(feature, true) + val newValue = xmlReader.getFeature(feature) + assertTrue(newValue) + } + + @Test def testDaffodilParseXMLReader_setProperty_unsupported(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val property: AnyRef = "Hello" + val snr = intercept[SAXNotRecognizedException] { + xmlReader.setProperty("http://xml.org/sax/properties/xml-string", property) + } + assertTrue(snr.getMessage.contains("Property unsupported")) + } + + @Test def testDaffodilParseXMLReader_setProperty_badValue(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val property: String = XMLUtils.DAFFODIL_SAX_URN_BLOBDIRECTORY + val propertyVal: AnyRef = "/tmp/i/am/a/directory" + val sns = intercept[SAXNotSupportedException]( + xmlReader.setProperty(property, propertyVal) + ) + assertTrue(sns.getMessage.contains("Unsupported value for property")) + } + + @Test def testDaffodilParseXMLReader_get_setProperty(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val property: String = XMLUtils.DAFFODIL_SAX_URN_BLOBPREFIX + val propertyVal: AnyRef ="testing-blobs" + val origValue = xmlReader.getProperty(property) + assertNotEquals(propertyVal, origValue) + xmlReader.setProperty(property, propertyVal) + val newValue = xmlReader.getProperty(property) + assertEquals(propertyVal, newValue.asInstanceOf[String]) + } + + @Test def testDaffodilParseXMLReader_get_setContentHandler(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val ch = new SAXHandler() + val origValue = xmlReader.getContentHandler + assertNull(origValue) + xmlReader.setContentHandler(ch) + val newValue = xmlReader.getContentHandler + assertTrue(newValue.isInstanceOf[SAXHandler]) + } + + @Test def testDaffodilParseXMLReader_get_setErrorHandler(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val eh = new BuilderErrorHandler() + val origValue = xmlReader.getErrorHandler + assertNull(origValue) + xmlReader.setErrorHandler(eh) + val newValue = xmlReader.getErrorHandler + assertTrue(newValue.isInstanceOf[BuilderErrorHandler]) + } + + @Test def testDaffodilParseXMLReader_parse_inputSource_no_backing_stream(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val input = new InputSource() + val ioe = intercept[IOException]( + xmlReader.parse(input) + ) + assertTrue(ioe.getMessage.contains("Inputsource must be backed by Inputstream")) Review comment: Should be a capital 'S' in InputStream and InputSource error message. ########## File path: daffodil-core/src/test/scala/org/apache/daffodil/processor/TestSAXParseAPI.scala ########## @@ -0,0 +1,218 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.processor + +import java.io.ByteArrayInputStream +import java.io.ByteArrayOutputStream +import java.io.IOException + +import scala.xml.Elem +import scala.xml.SAXParseException + +import org.apache.daffodil.Implicits.intercept +import org.apache.daffodil.compiler.Compiler +import org.apache.daffodil.infoset.DaffodilOutputContentHandler +import org.apache.daffodil.processors.DataProcessor +import org.apache.daffodil.processors.ParseResult +import org.apache.daffodil.util.SchemaUtils +import org.apache.daffodil.xml.XMLUtils +import org.jdom2.input.sax.BuilderErrorHandler +import org.jdom2.input.sax.SAXHandler +import org.junit.Assert.assertEquals +import org.junit.Assert.assertFalse +import org.junit.Assert.assertNotEquals +import org.junit.Assert.assertNull +import org.junit.Assert.assertTrue +import org.junit.Assert.fail +import org.junit.Test +import org.xml.sax.InputSource +import org.xml.sax.SAXNotRecognizedException +import org.xml.sax.SAXNotSupportedException + + +object TestSAXParseAPI { + + val testSchema: Elem = SchemaUtils.dfdlTestSchema( + <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>, + <dfdl:format ref="tns:GeneralFormat"/>, + <xs:element name="list" type="tns:example1"/> + <xs:complexType name="example1"> + <xs:sequence> + <xs:element name="w" type="xs:int" dfdl:length="1" dfdl:lengthKind="explicit" maxOccurs="unbounded"/> + </xs:sequence> + </xs:complexType> + ) + val testInfoset: Elem = <list xmlns="http://example.com"><w>9</w><w>1</w><w>0</w></list> + val testInfosetString: String = testInfoset.toString() + val testData = "910" + + lazy val dp: DataProcessor = testDataprocessor(testSchema) + + def testDataprocessor(testSchema: scala.xml.Elem): DataProcessor = { + val schemaCompiler = Compiler() + val pf = schemaCompiler.compileNode(testSchema) + if (pf.isError) { + val msgs = pf.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("pf compile errors: " + msgs) + } + pf.sset.root.erd.preSerialization // force evaluation of all compile-time constructs + val dp = pf.onPath("/").asInstanceOf[DataProcessor] + if (dp.isError) { + val msgs = dp.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("dp compile errors: " + msgs) + } + dp + } +} + +class TestSAXParseAPI { + import TestSAXParseAPI._ + + @Test def testDaffodilParseXMLReader_setFeatureUnsupported(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val snr = intercept[SAXNotRecognizedException] { + xmlReader.setFeature("http://xml.org/sax/features/validation", true) + } + assertTrue(snr.getMessage.contains("Feature unsupported")) + assertTrue(snr.getMessage.contains("Supported features are:")) + } + + @Test def testDaffodilParseXMLReader_get_setFeature(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val feature = "http://xml.org/sax/features/namespace-prefixes" + val origValue = xmlReader.getFeature(feature) + assertFalse(origValue) + xmlReader.setFeature(feature, true) + val newValue = xmlReader.getFeature(feature) + assertTrue(newValue) + } + + @Test def testDaffodilParseXMLReader_setProperty_unsupported(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val property: AnyRef = "Hello" + val snr = intercept[SAXNotRecognizedException] { + xmlReader.setProperty("http://xml.org/sax/properties/xml-string", property) + } + assertTrue(snr.getMessage.contains("Property unsupported")) + } + + @Test def testDaffodilParseXMLReader_setProperty_badValue(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val property: String = XMLUtils.DAFFODIL_SAX_URN_BLOBDIRECTORY + val propertyVal: AnyRef = "/tmp/i/am/a/directory" + val sns = intercept[SAXNotSupportedException]( + xmlReader.setProperty(property, propertyVal) + ) + assertTrue(sns.getMessage.contains("Unsupported value for property")) + } + + @Test def testDaffodilParseXMLReader_get_setProperty(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val property: String = XMLUtils.DAFFODIL_SAX_URN_BLOBPREFIX + val propertyVal: AnyRef ="testing-blobs" + val origValue = xmlReader.getProperty(property) + assertNotEquals(propertyVal, origValue) + xmlReader.setProperty(property, propertyVal) + val newValue = xmlReader.getProperty(property) + assertEquals(propertyVal, newValue.asInstanceOf[String]) + } + + @Test def testDaffodilParseXMLReader_get_setContentHandler(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val ch = new SAXHandler() + val origValue = xmlReader.getContentHandler + assertNull(origValue) + xmlReader.setContentHandler(ch) + val newValue = xmlReader.getContentHandler + assertTrue(newValue.isInstanceOf[SAXHandler]) + } + + @Test def testDaffodilParseXMLReader_get_setErrorHandler(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val eh = new BuilderErrorHandler() + val origValue = xmlReader.getErrorHandler + assertNull(origValue) + xmlReader.setErrorHandler(eh) + val newValue = xmlReader.getErrorHandler + assertTrue(newValue.isInstanceOf[BuilderErrorHandler]) + } + + @Test def testDaffodilParseXMLReader_parse_inputSource_no_backing_stream(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val input = new InputSource() + val ioe = intercept[IOException]( + xmlReader.parse(input) + ) + assertTrue(ioe.getMessage.contains("Inputsource must be backed by Inputstream")) + } + + @Test def testDaffodilParseXMLReader_parse_inputSource_with_backing_stream(): Unit = { + val xmlReader = dp.newXMLReaderInstance + val baos = new ByteArrayOutputStream() + val ch = new DaffodilOutputContentHandler(baos) Review comment: Instead of directl creating a DaffodilOutputContentHandler (I thought this was changed to DaffodilUnparseContentHandler?), should we use the dp.newContentHandlerInstance, to match dp.newXMLReaderInstance? This is a cool test, btw. Directly connecting parse and unparse using the SAX API. Might also be worth ensuring the baos contains the correct unparse data. ########## File path: daffodil-japi/src/main/java/org/apache/daffodil/japi/package-info.java ########## @@ -217,6 +227,52 @@ * UnparseResult ur = dp.unparse(jdomInputter, wbc) * }</pre> * + * <h5>SAX Unparse</h5> + * + * In order to kick off an unparse via the SAX API, one must register the + * {@link org.apache.daffodil.japi.DaffodilUnparseContentHandler} as the contentHandler for an XMLReader + * implementation. The call to the + * {@link org.apache.daffodil.japi.DataProcessor#newContentHandlerInstance(java.nio.channels.WritableByteChannel)} method must be provided with the {@link java.nio.channels.WritableByteChannel}, + * where the unparsed data ought to be written to. Any XMLReader implementation is permissible, as + * long as they allow support for the namespace and namespace-prefixes features of XMLReader. The + * namespaces feature MUST be set to true, and namespace-prefixes is only optional if the former is true. + * + * <pre> + * {@code + * ByteArrayInputStream is = new ByteArrayInputStream(data); + * DaffodilUnparseContentHandler unparseContentHandler = dp.newContentHandlerInstance(wbc); + * try { + * XMLReader xmlReader = SAXParserFactory.newInstance().newSAXParser().getXMLReader(); + * xmlReader.setContentHandler(unparseContentHandler) + * } catch (ParserConfigurationException | SAXException e) { Review comment: The is ByteArrayInputStream is never used. And there is ``wbc`` variable that is never defined. Can you please define wbc so it's more clear that the parameter to ``newContentHandlerInstance`` is the output channel? Also, should we add a ``xmlReader.parse(is)`` so it's clear that the 'data' in this example is an infoset that will be unparsed by the content handler? ########## File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetInputter.scala ########## @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.infoset + +import java.net.URI +import java.net.URISyntaxException + +import scala.util.Try + +import org.apache.daffodil.api.DFDL +import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException +import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException +import org.apache.daffodil.dpath.NodeInfo +import org.apache.daffodil.exceptions.Assert +import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument +import org.apache.daffodil.util.Maybe.One +import org.apache.daffodil.util.MaybeBoolean +import org.apache.daffodil.util.Misc +import org.apache.daffodil.xml.XMLUtils + +/** + * The SAXInfosetInputter consumes infosetEvent objects from the DaffodilUnparseContentHandler class + * and converts it to an event that the Dataprocessor unparse can use. This class contains two + * infosetEvent objects that contain the current event the unparse method is processing, and the + * next event to be processed after. + * + * This class together with the DaffodilUnparseContentHandler use coroutines to ensure that only one event, + * at a time, is passed between the two classes. The following is the general process: + * + * - the run method is called, with a StartDocument event already loaded on the inputter's queue. + * This is collected and stored in the nextEvent method, and the inputter's next method is called + * to populate the currentEvent and load the nextEvent + * - The dp.unparse method is called, and it calls hasNext to make sure an event exists to be + * processed and then queries the currentEvent, after it is done with the currentEvent, it calls + * inputter.next to get the next event, and that copies the nextEvent into the currentEvent and + * transfser control to the contentHandler to load the nextEvent Review comment: transfers* ########## File path: daffodil-japi/src/main/scala/org/apache/daffodil/japi/Daffodil.scala ########## @@ -953,3 +989,72 @@ class DaffodilXMLReader private[japi] (xmlrdr: SDaffodilXMLReader) extends org.x */ def parse(arr: Array[Byte]): Unit = xmlrdr.parse(arr) } + +/** + * Accepts SAX callback events from any SAX XMLReader for unparsing + */ +class DaffodilUnparseContentHandler private[japi] (sContentHandler: SDaffodilUnparseContentHandler) + extends ContentHandlerProxy { + + override protected val contentHandler: org.xml.sax.ContentHandler = sContentHandler + + /** + * Returns the result of the SAX unparse containing diagnostic information. In the case of an + * DaffodilUnhandledSAXException, this will return null. + */ + def getUnparseResult: UnparseResult = + new UnparseResult(sContentHandler.getUnparseResult.asInstanceOf[SUnparseResult]) +} + +/* A proxy for existing contentHandlers */ +abstract class ContentHandlerProxy extends org.xml.sax.ContentHandler { Review comment: I'm wondering if we even gain anything by having this ContentHandlerProxy. It's not going to be used in by any other ContentHandlers and it's logic is very specific to Daffodil and the exceptions it's thrown. I suggest to removing this and moving all the logic in the DaffodilUnparseContentHandler. Then the contentHandler variable can become private. ########## File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilUnparseContentHandler.scala ########## @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.processors + +import scala.util.Try +import scala.xml.NamespaceBinding + +import javax.xml.XMLConstants +import org.apache.daffodil.api.DFDL +import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException +import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException +import org.apache.daffodil.infoset.IllegalContentWhereEventExpected +import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument +import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement +import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument +import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement +import org.apache.daffodil.infoset.SAXInfosetInputter +import org.apache.daffodil.util.MStackOf +import org.apache.daffodil.util.Maybe.Nope +import org.apache.daffodil.util.Maybe.One +import org.apache.daffodil.util.Misc +import org.xml.sax.Attributes +import org.xml.sax.Locator + +/** + * DaffodilUnparseContentHandler produces InfosetEvent objects for the SAXInfosetInputter to + * consume and convert to a event that the Dataprocessor unparse can use. The infosetEvent object + * is built from information that is passed to the ContentHandler from an XMLReader parser. In + * order to receive the uri and prefix information from the XMLReader, the following features + * must be set to true on whatever XMLReader is used: http://xml.org/sax/features/namespaces and + * http://xml.org/sax/features/namespace-prefixes + * + * This class, together with the SAXInfosetInputter, uses coroutines to ensure that only one event, + * at a time, is passed between the two classes. The following is the general process: + * + * - an external call is made to parse an XML Documents + * - this class receives a StartDocument call, which is the first infosetEvent that is sent to + * the SAXInfosetInputter. That event is put on the inputter's queue, this thread is paused, and + * that inputter's thread is run + * - when the SAXInfosetInputter is done processing an event and is ready for a new event, it + * sends the completed event via the coroutine system, and loads it on the contentHandler's + * queue, which restarts this thread and pauses that one. In the expected case, the events will + * contain no new information, until the unparse is completed. + * - this process continues until the EndDocument method is called. Once that infosetEvent is + * sent to the inputter, it signals the end of events coming from the contentHandler. This + * ends the unparseProcess and returns the event with the unparseResult and/or any error + * information + * + * @param dp dataprocessor object that will be used to call the parse + * @param output outputChannel of choice where the unparsed data is stored + */ +class DaffodilUnparseContentHandler( + dp: DFDL.DataProcessor, + output: DFDL.Output) + extends DFDL.DaffodilUnparseContentHandler { + private lazy val inputter = new SAXInfosetInputter(this, dp, output) + private var unparseResult: DFDL.UnparseResult = _ + private lazy val infosetEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent + private lazy val characterData = new StringBuilder + private var prefixMapping: NamespaceBinding = _ + private lazy val prefixMappingTrackingStack = new MStackOf[NamespaceBinding] + private var contentHandlerPrefixMappingUsed = false + + /** + * returns null in the case of an DaffodilUnhandledSAXException + */ + def getUnparseResult: DFDL.UnparseResult = unparseResult + + def enableInputterResolutionOfRelativeInfosetBlobURIs(): Unit = inputter.enableResolutionOfRelativeInfosetBlobURIs() + + override def setDocumentLocator(locator: Locator): Unit = { + // do nothing + } + + override def startDocument(): Unit = { + infosetEvent.eventType = One(StartDocument) + sendToInputter() + } + + override def endDocument(): Unit = { + infosetEvent.eventType = One(EndDocument) + sendToInputter() + } + + override def startPrefixMapping(prefix: String, uri: String): Unit = { + contentHandlerPrefixMappingUsed = true + val pre = if (prefix == "") null else prefix + prefixMapping = NamespaceBinding(pre, uri, prefixMapping) + } + + /** + * XMLReader does not guarantee the order of the prefixes called for this function, but it does + * guarantee that this method is called after its corresponding endElement, which means we can + * can just take off the top mappings, because the element that might have cared about the order + * is already done using the prefixMappings + */ + override def endPrefixMapping(prefix: String): Unit = { + prefixMapping = if (prefixMapping == null) prefixMapping else prefixMapping.parent + } + + /** + * Uses Attributes, which is passed in to the startElement callback, to extract prefix mappings and + * populate the global prefixMapping + */ + def mapPrefixMappingFromAttributesImpl(atts:Attributes): Unit = { + var i = 0 + while (i < atts.getLength) { + val qName = atts.getQName(i) + if (qName.startsWith("xmlns")) { + val uri = atts.getValue(i) + val prefix = if(qName.contains(":")) { + val pref = qName.split(":").last + pref + } else { Review comment: I think there's potential for very subtle, though rare, bugs. Which are some of the worst. For example, what if we have an attribute thats ``xmlnsblah:foo=whatever``. That's not an xmlns attribute, but this logic would think it is. I think maybe instead we need something more like ```scala if (qname == "xmlns") { prefixMapping = NamespaceBinding(null, uri, prefixMapping) // no prefix } else if (qname.startsWith("xmlns:")) { prefixMapping = NamespaceBinding(qname.substring(6), uri, prefixMapping) } else { // no-op, not a namespace mapping attribute } ``` This also avoids ``contains`` and ``split``, which I think are relatively expensive as far as string functions go--I think they both treat the string parameter as a regex behind the scenes. ########## File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilUnparseContentHandler.scala ########## @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.processors + +import scala.util.Try +import scala.xml.NamespaceBinding + +import javax.xml.XMLConstants +import org.apache.daffodil.api.DFDL +import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException +import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException +import org.apache.daffodil.infoset.IllegalContentWhereEventExpected +import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument +import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement +import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument +import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement +import org.apache.daffodil.infoset.SAXInfosetInputter +import org.apache.daffodil.util.MStackOf +import org.apache.daffodil.util.Maybe.Nope +import org.apache.daffodil.util.Maybe.One +import org.apache.daffodil.util.Misc +import org.xml.sax.Attributes +import org.xml.sax.Locator + +/** + * DaffodilUnparseContentHandler produces InfosetEvent objects for the SAXInfosetInputter to + * consume and convert to a event that the Dataprocessor unparse can use. The infosetEvent object + * is built from information that is passed to the ContentHandler from an XMLReader parser. In + * order to receive the uri and prefix information from the XMLReader, the following features + * must be set to true on whatever XMLReader is used: http://xml.org/sax/features/namespaces and + * http://xml.org/sax/features/namespace-prefixes + * + * This class, together with the SAXInfosetInputter, uses coroutines to ensure that only one event, + * at a time, is passed between the two classes. The following is the general process: + * + * - an external call is made to parse an XML Documents + * - this class receives a StartDocument call, which is the first infosetEvent that is sent to + * the SAXInfosetInputter. That event is put on the inputter's queue, this thread is paused, and + * that inputter's thread is run + * - when the SAXInfosetInputter is done processing an event and is ready for a new event, it + * sends the completed event via the coroutine system, and loads it on the contentHandler's + * queue, which restarts this thread and pauses that one. In the expected case, the events will + * contain no new information, until the unparse is completed. + * - this process continues until the EndDocument method is called. Once that infosetEvent is + * sent to the inputter, it signals the end of events coming from the contentHandler. This + * ends the unparseProcess and returns the event with the unparseResult and/or any error + * information + * + * @param dp dataprocessor object that will be used to call the parse + * @param output outputChannel of choice where the unparsed data is stored + */ +class DaffodilUnparseContentHandler( + dp: DFDL.DataProcessor, + output: DFDL.Output) + extends DFDL.DaffodilUnparseContentHandler { + private lazy val inputter = new SAXInfosetInputter(this, dp, output) + private var unparseResult: DFDL.UnparseResult = _ + private lazy val infosetEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent + private lazy val characterData = new StringBuilder + private var prefixMapping: NamespaceBinding = _ + private lazy val prefixMappingTrackingStack = new MStackOf[NamespaceBinding] + private var contentHandlerPrefixMappingUsed = false + + /** + * returns null in the case of an DaffodilUnhandledSAXException + */ + def getUnparseResult: DFDL.UnparseResult = unparseResult + + def enableInputterResolutionOfRelativeInfosetBlobURIs(): Unit = inputter.enableResolutionOfRelativeInfosetBlobURIs() + + override def setDocumentLocator(locator: Locator): Unit = { + // do nothing + } + + override def startDocument(): Unit = { + infosetEvent.eventType = One(StartDocument) + sendToInputter() + } + + override def endDocument(): Unit = { + infosetEvent.eventType = One(EndDocument) + sendToInputter() + } + + override def startPrefixMapping(prefix: String, uri: String): Unit = { + contentHandlerPrefixMappingUsed = true + val pre = if (prefix == "") null else prefix + prefixMapping = NamespaceBinding(pre, uri, prefixMapping) + } + + /** + * XMLReader does not guarantee the order of the prefixes called for this function, but it does + * guarantee that this method is called after its corresponding endElement, which means we can + * can just take off the top mappings, because the element that might have cared about the order + * is already done using the prefixMappings + */ + override def endPrefixMapping(prefix: String): Unit = { + prefixMapping = if (prefixMapping == null) prefixMapping else prefixMapping.parent + } + + /** + * Uses Attributes, which is passed in to the startElement callback, to extract prefix mappings and + * populate the global prefixMapping + */ + def mapPrefixMappingFromAttributesImpl(atts:Attributes): Unit = { + var i = 0 + while (i < atts.getLength) { + val qName = atts.getQName(i) + if (qName.startsWith("xmlns")) { + val uri = atts.getValue(i) + val prefix = if(qName.contains(":")) { + val pref = qName.split(":").last + pref + } else { + null // NamespaceBinding does not allow blanks so return null instead + } + prefixMapping = NamespaceBinding(prefix, uri, prefixMapping) + } + i += 1 + } + } + + override def startElement(uri: String, localName: String, qName: String, atts: Attributes): Unit = { + // we need to check if the characters data is all whitespace, if it is we drop the whitespace + // data, if it is not, it is an error as starting a new element with actual characterData means + // we haven't hit an endElement yet, which means we're in a complexElement and a complexElement + // cannot have character content + if (characterData.nonEmpty && !Misc.isAllWhitespace(characterData)) { + throw new IllegalContentWhereEventExpected("Non-whitespace characters in complex " + + "Element: " + characterData.toString + ) + } else { + // reset since it was whitespace only + characterData.setLength(0) + } + + if (!contentHandlerPrefixMappingUsed) { + // always pushes but doesn't always add a mapping since atts can be empty + prefixMappingTrackingStack.push(prefixMapping) + mapPrefixMappingFromAttributesImpl(atts) + } + + if (!infosetEvent.isEmpty && infosetEvent.localName.isDefined) { + // we started another element while we were in the process of building a startElement + // this means the first element was complex and we are ready for the inputter queue + sendToInputter() + } + // use Attributes to determine xsi:nil value + val nilIn = atts.getIndex(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "nil") + infosetEvent.nilValue = if (nilIn >= 0) { + val nilValue = atts.getValue(nilIn) + One(nilValue) + } else { + Nope + } + // set localName and namespaceURI + lazy val qNameArr = qName.split(":") + infosetEvent.localName = + if (localName.nonEmpty) { + One(localName) + } else if (qNameArr.length > 1) { + One(qNameArr.last) + } else if (qNameArr.nonEmpty) { + One(qNameArr.head) + } else { + Nope + } + infosetEvent.namespaceURI = + if (uri.nonEmpty) { + One(uri) + } else if (qNameArr.length > 1) { // has a prefix + // this use case is for the situation where the XMLReader doesn't pass in uri/localname, + // so prefix information must be determined via AttributeImpl attribute. This can usually Review comment: Do you mean Attribute instead of AttributeImpl here? The parameter passed in in just Attribute. ########## File path: daffodil-japi/src/main/scala/org/apache/daffodil/japi/Daffodil.scala ########## @@ -953,3 +989,72 @@ class DaffodilXMLReader private[japi] (xmlrdr: SDaffodilXMLReader) extends org.x */ def parse(arr: Array[Byte]): Unit = xmlrdr.parse(arr) } + +/** + * Accepts SAX callback events from any SAX XMLReader for unparsing + */ +class DaffodilUnparseContentHandler private[japi] (sContentHandler: SDaffodilUnparseContentHandler) + extends ContentHandlerProxy { + + override protected val contentHandler: org.xml.sax.ContentHandler = sContentHandler + + /** + * Returns the result of the SAX unparse containing diagnostic information. In the case of an + * DaffodilUnhandledSAXException, this will return null. + */ + def getUnparseResult: UnparseResult = + new UnparseResult(sContentHandler.getUnparseResult.asInstanceOf[SUnparseResult]) Review comment: Rather than changing the comment, since this just wraps the internal UnparseResult in an SAPI UnparseResult, this probably should be something like ```scala val res = sContentHandler.getUnparseResult(...) if (res == null) null else new UnparseResult(res) ``` So we still return null if here is no unparse result, but if there is then we wrap it with the SAPI wapper. ########## File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilUnparseContentHandler.scala ########## @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.processors + +import scala.util.Try +import scala.xml.NamespaceBinding + +import javax.xml.XMLConstants +import org.apache.daffodil.api.DFDL +import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException +import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException +import org.apache.daffodil.infoset.IllegalContentWhereEventExpected +import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument +import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement +import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument +import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement +import org.apache.daffodil.infoset.SAXInfosetInputter +import org.apache.daffodil.util.MStackOf +import org.apache.daffodil.util.Maybe.Nope +import org.apache.daffodil.util.Maybe.One +import org.apache.daffodil.util.Misc +import org.xml.sax.Attributes +import org.xml.sax.Locator + +/** + * DaffodilUnparseContentHandler produces InfosetEvent objects for the SAXInfosetInputter to + * consume and convert to a event that the Dataprocessor unparse can use. The infosetEvent object + * is built from information that is passed to the ContentHandler from an XMLReader parser. In + * order to receive the uri and prefix information from the XMLReader, the following features + * must be set to true on whatever XMLReader is used: http://xml.org/sax/features/namespaces and + * http://xml.org/sax/features/namespace-prefixes + * + * This class, together with the SAXInfosetInputter, uses coroutines to ensure that only one event, + * at a time, is passed between the two classes. The following is the general process: + * + * - an external call is made to parse an XML Documents + * - this class receives a StartDocument call, which is the first infosetEvent that is sent to + * the SAXInfosetInputter. That event is put on the inputter's queue, this thread is paused, and + * that inputter's thread is run + * - when the SAXInfosetInputter is done processing an event and is ready for a new event, it + * sends the completed event via the coroutine system, and loads it on the contentHandler's + * queue, which restarts this thread and pauses that one. In the expected case, the events will + * contain no new information, until the unparse is completed. + * - this process continues until the EndDocument method is called. Once that infosetEvent is + * sent to the inputter, it signals the end of events coming from the contentHandler. This + * ends the unparseProcess and returns the event with the unparseResult and/or any error + * information + * + * @param dp dataprocessor object that will be used to call the parse + * @param output outputChannel of choice where the unparsed data is stored + */ +class DaffodilUnparseContentHandler( + dp: DFDL.DataProcessor, + output: DFDL.Output) + extends DFDL.DaffodilUnparseContentHandler { + private lazy val inputter = new SAXInfosetInputter(this, dp, output) + private var unparseResult: DFDL.UnparseResult = _ + private lazy val infosetEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent + private lazy val characterData = new StringBuilder + private var prefixMapping: NamespaceBinding = _ + private lazy val prefixMappingTrackingStack = new MStackOf[NamespaceBinding] + private var contentHandlerPrefixMappingUsed = false + + /** + * returns null in the case of an DaffodilUnhandledSAXException + */ + def getUnparseResult: DFDL.UnparseResult = unparseResult + + def enableInputterResolutionOfRelativeInfosetBlobURIs(): Unit = inputter.enableResolutionOfRelativeInfosetBlobURIs() + + override def setDocumentLocator(locator: Locator): Unit = { + // do nothing + } + + override def startDocument(): Unit = { + infosetEvent.eventType = One(StartDocument) + sendToInputter() + } + + override def endDocument(): Unit = { + infosetEvent.eventType = One(EndDocument) + sendToInputter() + } + + override def startPrefixMapping(prefix: String, uri: String): Unit = { + contentHandlerPrefixMappingUsed = true + val pre = if (prefix == "") null else prefix + prefixMapping = NamespaceBinding(pre, uri, prefixMapping) + } + + /** + * XMLReader does not guarantee the order of the prefixes called for this function, but it does + * guarantee that this method is called after its corresponding endElement, which means we can + * can just take off the top mappings, because the element that might have cared about the order + * is already done using the prefixMappings + */ + override def endPrefixMapping(prefix: String): Unit = { + prefixMapping = if (prefixMapping == null) prefixMapping else prefixMapping.parent + } + + /** + * Uses Attributes, which is passed in to the startElement callback, to extract prefix mappings and + * populate the global prefixMapping + */ + def mapPrefixMappingFromAttributesImpl(atts:Attributes): Unit = { + var i = 0 + while (i < atts.getLength) { + val qName = atts.getQName(i) + if (qName.startsWith("xmlns")) { + val uri = atts.getValue(i) + val prefix = if(qName.contains(":")) { + val pref = qName.split(":").last + pref + } else { + null // NamespaceBinding does not allow blanks so return null instead + } + prefixMapping = NamespaceBinding(prefix, uri, prefixMapping) + } + i += 1 + } + } + + override def startElement(uri: String, localName: String, qName: String, atts: Attributes): Unit = { + // we need to check if the characters data is all whitespace, if it is we drop the whitespace + // data, if it is not, it is an error as starting a new element with actual characterData means + // we haven't hit an endElement yet, which means we're in a complexElement and a complexElement + // cannot have character content + if (characterData.nonEmpty && !Misc.isAllWhitespace(characterData)) { + throw new IllegalContentWhereEventExpected("Non-whitespace characters in complex " + + "Element: " + characterData.toString + ) + } else { + // reset since it was whitespace only + characterData.setLength(0) + } + + if (!contentHandlerPrefixMappingUsed) { + // always pushes but doesn't always add a mapping since atts can be empty + prefixMappingTrackingStack.push(prefixMapping) + mapPrefixMappingFromAttributesImpl(atts) + } + + if (!infosetEvent.isEmpty && infosetEvent.localName.isDefined) { + // we started another element while we were in the process of building a startElement + // this means the first element was complex and we are ready for the inputter queue + sendToInputter() + } + // use Attributes to determine xsi:nil value + val nilIn = atts.getIndex(XMLConstants.W3C_XML_SCHEMA_INSTANCE_NS_URI, "nil") + infosetEvent.nilValue = if (nilIn >= 0) { + val nilValue = atts.getValue(nilIn) + One(nilValue) + } else { + Nope + } + // set localName and namespaceURI + lazy val qNameArr = qName.split(":") + infosetEvent.localName = + if (localName.nonEmpty) { + One(localName) + } else if (qNameArr.length > 1) { + One(qNameArr.last) + } else if (qNameArr.nonEmpty) { + One(qNameArr.head) + } else { + Nope + } + infosetEvent.namespaceURI = + if (uri.nonEmpty) { + One(uri) + } else if (qNameArr.length > 1) { // has a prefix + // this use case is for the situation where the XMLReader doesn't pass in uri/localname, + // so prefix information must be determined via AttributeImpl attribute. This can usually + // be remedied by setting the SAX_NAMESPACES_FEATURE of the XMLReader that has this class + // as its contentHandler. + + // get the prefix off the qname + lazy val qNamePrefix = qNameArr.head + // look up prefix with and without xmlns prefix + if (prefixMapping != null) { + try { + One(prefixMapping.getURI(qNamePrefix)) + } catch { + case _: NullPointerException => Nope + } + } else { + Nope + } + } else { + // this case is a last ditch attempt to try to get the namespace when the prefix is "" + // since namespacebinding doesn't take "", we convert to null. + try { + if (prefixMapping != null) { + One(prefixMapping.getURI(null)) + } else { + Nope + } + } catch { + case _: NullPointerException => Nope + } + } + infosetEvent.eventType = One(StartElement) + } + + override def endElement(uri: String, localName: String, qName: String): Unit = { + // if infosetEvent is a startElement, send that first + if (infosetEvent.eventType.contains(StartElement)) { + // any characterData that exists at this point is valid data as padding data has been + // taken care of in startElement + val maybeNewStr = One(characterData.toString) + infosetEvent.simpleText = maybeNewStr + characterData.setLength(0) + sendToInputter() + } + + lazy val qNameArr = qName.split(":") + infosetEvent.localName = + if (localName.nonEmpty) { + One(localName) + } else if (qNameArr.nonEmpty) { + One(qNameArr.last) + } else { + Nope + } + infosetEvent.namespaceURI = + if (uri.nonEmpty) { + One(uri) + } else if (qNameArr.length > 1) { + try { + One(prefixMapping.getURI(qNameArr.head)) + } catch { + case _: NullPointerException => Nope + } + } else { + try { + One(prefixMapping.getURI(null)) + } catch { + case _: NullPointerException => Nope + } + } + infosetEvent.eventType = One(EndElement) + + if (!contentHandlerPrefixMappingUsed) { + // always pops + prefixMapping = prefixMappingTrackingStack.pop + } + sendToInputter() + } + Review comment: Can the logic involved uri, localname and qname be factored out into a helper function? It looks like the logic in startElement and enedElement is the same? ########## File path: daffodil-japi/src/main/scala/org/apache/daffodil/japi/Daffodil.scala ########## @@ -835,11 +845,29 @@ class InvalidParserException private[japi] (cause: org.apache.daffodil.compiler. */ class InvalidUsageException private[japi] (cause: org.apache.daffodil.processors.InvalidUsageException) extends Exception(cause.getMessage(), cause.getCause()) +/** + * This exception will be thrown when when the unparseResult.isError during a SAX Unparse Review comment: Should be something like ``... when the unparseResult.isError is true during ...``. ########## File path: daffodil-japi/src/main/scala/org/apache/daffodil/japi/Daffodil.scala ########## @@ -835,11 +845,29 @@ class InvalidParserException private[japi] (cause: org.apache.daffodil.compiler. */ class InvalidUsageException private[japi] (cause: org.apache.daffodil.processors.InvalidUsageException) extends Exception(cause.getMessage(), cause.getCause()) +/** + * This exception will be thrown when when the unparseResult.isError during a SAX Unparse + */ +class DaffodilUnparseErrorSAXException private[japi] (exception: SDaffodilUnparseErrorSAXException) + extends org.xml.sax.SAXException(exception.getMessage) + +/** + * This exception will be thrown when an unexpected error occurs during the SAX unparse + */ +class DaffodilUnhandledSAXException private[japi] (exception: SDaffodilUnhandledSAXException) + extends org.xml.sax.SAXException(exception.getMessage, new Exception(exception.getCause)) + /** * SAX method of parsing schema and getting the DFDL Infoset via some * org.xml.sax.ContentHandler, based on the org.xml.sax.XMLReader interface */ -class DaffodilXMLReader private[japi] (xmlrdr: SDaffodilXMLReader) extends org.xml.sax.XMLReader { +class DaffodilParseXMLReader private[japi] (xmlrdr: SDaffodilParseXMLReader) extends org.xml.sax.XMLReader { + + lazy val DAFFODIL_SAX_URN_PARSERESULT = "urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:sax:ParseResult" + lazy val DAFFODIL_SAX_URN_BLOBDIRECTORY= "urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:sax:BlobDirectory" + lazy val DAFFODIL_SAX_URN_BLOBPREFIX = "urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:sax:BlobPrefix" + lazy val DAFFODIL_SAX_URN_BLOBSUFFIX = "urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:sax:BlobSuffix" + Review comment: Can these be moved to an ``object DaffodilParseXMLReader`` so that become static and so are not defined for every XMLReader instance. Then accessing this becomes ``DaffodilParseXMLReader.DAFFODIL_SAX_....``. Also, no need for them to be lazy if defined in an object since they'll only ever be evaluated once. Also, it's probably a good idea for the values to be set to ``XMLUtils.whatever`` so that we dont' have to change them multiple times. API users shouldn't necessarily directly access XMLUtils, but it's fine for our API to access then. ########## File path: daffodil-cli/src/main/scala/org/apache/daffodil/Main.scala ########## @@ -779,24 +782,32 @@ object Main extends Logging { } } } + case "sax" => data Review comment: Note that this change DAFFODIL-2421 has been fixed and merge and will probably cause a merge conflict here. To deal with the merge conflict for this function, the "sax" implementation will want to just be a copy/paste of the xml or json cases, so that this function can return either a byte array or an InputStream. The unparseWithSAX function will then need to be able to handle an anyRef value that is either a InputStream or a Array[Byte] ########## File path: daffodil-japi/src/main/scala/org/apache/daffodil/japi/Daffodil.scala ########## @@ -953,3 +989,72 @@ class DaffodilXMLReader private[japi] (xmlrdr: SDaffodilXMLReader) extends org.x */ def parse(arr: Array[Byte]): Unit = xmlrdr.parse(arr) } + +/** + * Accepts SAX callback events from any SAX XMLReader for unparsing + */ +class DaffodilUnparseContentHandler private[japi] (sContentHandler: SDaffodilUnparseContentHandler) + extends ContentHandlerProxy { + + override protected val contentHandler: org.xml.sax.ContentHandler = sContentHandler + + /** + * Returns the result of the SAX unparse containing diagnostic information. In the case of an + * DaffodilUnhandledSAXException, this will return null. + */ + def getUnparseResult: UnparseResult = + new UnparseResult(sContentHandler.getUnparseResult.asInstanceOf[SUnparseResult]) +} + +/* A proxy for existing contentHandlers */ +abstract class ContentHandlerProxy extends org.xml.sax.ContentHandler { + + protected val contentHandler: org.xml.sax.ContentHandler + + override def setDocumentLocator(locator: org.xml.sax.Locator): Unit = + contentHandler.setDocumentLocator(locator) + + override def startDocument(): Unit = + try { + contentHandler.startDocument() + } catch { + case e: SDaffodilUnparseErrorSAXException => throw new DaffodilUnparseErrorSAXException(e) + case e: SDaffodilUnhandledSAXException => throw new DaffodilUnhandledSAXException(e) + } + + override def endDocument(): Unit = + try { + contentHandler.endDocument() + } catch { + case e: SDaffodilUnparseErrorSAXException => throw new DaffodilUnparseErrorSAXException(e) + case e: SDaffodilUnhandledSAXException => throw new DaffodilUnhandledSAXException(e) + } + + override def startPrefixMapping(prefix: String, uri: String): Unit = + contentHandler.startPrefixMapping(prefix, uri) + override def endPrefixMapping(prefix: String): Unit = contentHandler.endPrefixMapping(prefix) + + override def startElement(uri: String, localName: String, qName: String, atts: org.xml.sax.Attributes): Unit = + try { + contentHandler.startElement(uri, localName, qName, atts) + } catch { + case e: SDaffodilUnparseErrorSAXException => throw new DaffodilUnparseErrorSAXException(e) + case e: SDaffodilUnhandledSAXException => throw new DaffodilUnhandledSAXException(e) + } + + override def endElement(uri: String, localName: String, qName: String): Unit = + try { + contentHandler.endElement(uri, localName, qName) + } catch { + case e: SDaffodilUnparseErrorSAXException => throw new DaffodilUnparseErrorSAXException(e) + case e: SDaffodilUnhandledSAXException => throw new DaffodilUnhandledSAXException(e) + } + + override def characters(ch: Array[Char], start: Int, length: Int): Unit = + contentHandler.characters(ch, start, length) + override def ignorableWhitespace(ch: Array[Char], start: Int, length: Int): Unit = + contentHandler.ignorableWhitespace(ch, start, length) + override def processingInstruction(target: String, data: String): Unit = + contentHandler.processingInstruction(target, data) + override def skippedEntity(name: String): Unit = contentHandler.skippedEntity(name) Review comment: Just for consistency and ease of scanning code, it's probably wise to move the skippedEntity on a new line like the others. ########## File path: daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilUnparseContentHandler.scala ########## @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.processors + +import scala.util.Try +import scala.xml.NamespaceBinding + +import javax.xml.XMLConstants +import org.apache.daffodil.api.DFDL +import org.apache.daffodil.api.DFDL.DaffodilUnhandledSAXException +import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException +import org.apache.daffodil.infoset.IllegalContentWhereEventExpected +import org.apache.daffodil.infoset.InfosetInputterEventType.EndDocument +import org.apache.daffodil.infoset.InfosetInputterEventType.EndElement +import org.apache.daffodil.infoset.InfosetInputterEventType.StartDocument +import org.apache.daffodil.infoset.InfosetInputterEventType.StartElement +import org.apache.daffodil.infoset.SAXInfosetInputter +import org.apache.daffodil.util.MStackOf +import org.apache.daffodil.util.Maybe.Nope +import org.apache.daffodil.util.Maybe.One +import org.apache.daffodil.util.Misc +import org.xml.sax.Attributes +import org.xml.sax.Locator + +/** + * DaffodilUnparseContentHandler produces InfosetEvent objects for the SAXInfosetInputter to + * consume and convert to a event that the Dataprocessor unparse can use. The infosetEvent object + * is built from information that is passed to the ContentHandler from an XMLReader parser. In + * order to receive the uri and prefix information from the XMLReader, the following features + * must be set to true on whatever XMLReader is used: http://xml.org/sax/features/namespaces and + * http://xml.org/sax/features/namespace-prefixes + * + * This class, together with the SAXInfosetInputter, uses coroutines to ensure that only one event, + * at a time, is passed between the two classes. The following is the general process: + * + * - an external call is made to parse an XML Documents + * - this class receives a StartDocument call, which is the first infosetEvent that is sent to + * the SAXInfosetInputter. That event is put on the inputter's queue, this thread is paused, and + * that inputter's thread is run + * - when the SAXInfosetInputter is done processing an event and is ready for a new event, it + * sends the completed event via the coroutine system, and loads it on the contentHandler's + * queue, which restarts this thread and pauses that one. In the expected case, the events will + * contain no new information, until the unparse is completed. + * - this process continues until the EndDocument method is called. Once that infosetEvent is + * sent to the inputter, it signals the end of events coming from the contentHandler. This + * ends the unparseProcess and returns the event with the unparseResult and/or any error + * information + * + * @param dp dataprocessor object that will be used to call the parse + * @param output outputChannel of choice where the unparsed data is stored + */ +class DaffodilUnparseContentHandler( + dp: DFDL.DataProcessor, + output: DFDL.Output) + extends DFDL.DaffodilUnparseContentHandler { + private lazy val inputter = new SAXInfosetInputter(this, dp, output) + private var unparseResult: DFDL.UnparseResult = _ + private lazy val infosetEvent: DFDL.SaxInfosetEvent = new DFDL.SaxInfosetEvent + private lazy val characterData = new StringBuilder + private var prefixMapping: NamespaceBinding = _ + private lazy val prefixMappingTrackingStack = new MStackOf[NamespaceBinding] + private var contentHandlerPrefixMappingUsed = false + + /** + * returns null in the case of an DaffodilUnhandledSAXException + */ + def getUnparseResult: DFDL.UnparseResult = unparseResult + + def enableInputterResolutionOfRelativeInfosetBlobURIs(): Unit = inputter.enableResolutionOfRelativeInfosetBlobURIs() + + override def setDocumentLocator(locator: Locator): Unit = { + // do nothing + } + + override def startDocument(): Unit = { + infosetEvent.eventType = One(StartDocument) + sendToInputter() + } + + override def endDocument(): Unit = { + infosetEvent.eventType = One(EndDocument) + sendToInputter() + } + + override def startPrefixMapping(prefix: String, uri: String): Unit = { + contentHandlerPrefixMappingUsed = true Review comment: Does the SAX API say anythign about not mixing use of startPrefixMapping and xmlns in the Attributes? It seems reasonable to only have one or the other, but if they are allowed to be mixed, we might not want to disable the the mapPrefixMapping stuff if we see startPrefixMapping. It's defnitely nice if they can't be mixed, because then we can avoid that stack logic and parsing Attributes for xmlns. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
