stevedlawrence commented on code in PR #873: URL: https://github.com/apache/daffodil/pull/873#discussion_r1021549856
########## daffodil-cli/src/main/scala/org/apache/daffodil/InfosetTypes.scala: ########## @@ -0,0 +1,697 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil + +import java.io.ByteArrayInputStream +import java.io.InputStream +import java.io.OutputStream +import java.net.URI +import java.nio.charset.StandardCharsets +import javax.xml.parsers.DocumentBuilderFactory +import javax.xml.transform.TransformerFactory +import javax.xml.transform.dom.DOMSource +import javax.xml.transform.stream.StreamResult + +import scala.collection.mutable.ArrayBuffer +import scala.xml.SAXParser + +import com.siemens.ct.exi.core.EXIFactory +import com.siemens.ct.exi.core.helpers.DefaultEXIFactory +import com.siemens.ct.exi.grammars.GrammarFactory +import com.siemens.ct.exi.main.api.sax.EXIResult +import com.siemens.ct.exi.main.api.sax.EXISource + +import org.apache.commons.io.IOUtils + +import org.xml.sax.Attributes +import org.xml.sax.ContentHandler +import org.xml.sax.InputSource +import org.xml.sax.XMLReader +import org.xml.sax.Locator +import org.xml.sax.helpers.DefaultHandler + +import org.apache.daffodil.api.DFDL +import org.apache.daffodil.api.DFDL.DataProcessor +import org.apache.daffodil.api.DFDL.DaffodilUnparseErrorSAXException +import org.apache.daffodil.api.DFDL.ParseResult +import org.apache.daffodil.api.DFDL.UnparseResult +import org.apache.daffodil.infoset.InfosetInputter +import org.apache.daffodil.infoset.InfosetOutputter +import org.apache.daffodil.infoset.JDOMInfosetInputter +import org.apache.daffodil.infoset.JDOMInfosetOutputter +import org.apache.daffodil.infoset.JsonInfosetInputter +import org.apache.daffodil.infoset.JsonInfosetOutputter +import org.apache.daffodil.infoset.NullInfosetInputter +import org.apache.daffodil.infoset.NullInfosetOutputter +import org.apache.daffodil.infoset.ScalaXMLInfosetInputter +import org.apache.daffodil.infoset.ScalaXMLInfosetOutputter +import org.apache.daffodil.infoset.W3CDOMInfosetInputter +import org.apache.daffodil.infoset.W3CDOMInfosetOutputter +import org.apache.daffodil.infoset.XMLTextInfosetInputter +import org.apache.daffodil.infoset.XMLTextInfosetOutputter +import org.apache.daffodil.io.InputSourceDataInputStream +import org.apache.daffodil.processors.DaffodilParseOutputStreamContentHandler +import org.apache.daffodil.xml.DFDLCatalogResolver +import org.apache.daffodil.xml.DaffodilSAXParserFactory +import org.apache.daffodil.xml.XMLUtils + +object InfosetType extends Enumeration { + type Type = Value + + val EXI = Value("exi") + val EXISA = Value("exisa") + val JDOM = Value("jdom") + val JSON = Value("json") + val NULL = Value("null") + val SAX = Value("sax") + val SCALA_XML = Value("scala-xml") + val W3CDOM = Value("w3cdom") + val XML = Value("xml") + + /** + * Get an InfosetHandler, with the goal of doing as much initialization/work + * prior to calling the parse() or unparse() methods to improve accuracy of + * performance metrics + * + * @param infosetType the type of InfosetHandler to create + * @param dataProcessor the dataProcessor that the InfosetHandler should user + * during parse/unparse operations + * @param schemaUri only used for EXISA, to support schema aware + * parsing/unparsing + * @param forPerformance only used for SAX. If true, the + * SAXInfosetHandler will drop all SAX events on parse, and will + * pre-process the infoset into an array of SAX events and replay them on + * unparse. If false, it directly parses and unparses to/from XML + * text--this allows the caller to visualize the SAX as XML when, similar + * to how JDOM, SCALA_XML, etc can be serialized to strings + */ + def getInfosetHandler( + infosetType: InfosetType.Type, + dataProcessor: DFDL.DataProcessor, + schemaUri: Option[URI], + forPerformance: Boolean): InfosetHandler = { + + infosetType match { + case InfosetType.EXI => EXIInfosetHandler(dataProcessor) + case InfosetType.EXISA => EXIInfosetHandler(dataProcessor, schemaUri.get) + case InfosetType.JDOM => JDOMInfosetHandler(dataProcessor) + case InfosetType.JSON => JsonInfosetHandler(dataProcessor) + case InfosetType.NULL => NULLInfosetHandler(dataProcessor) + case InfosetType.SAX => SAXInfosetHandler(dataProcessor, forPerformance) + case InfosetType.SCALA_XML => ScalaXMLInfosetHandler(dataProcessor) + case InfosetType.W3CDOM => W3CDOMInfosetHandler(dataProcessor) + case InfosetType.XML => XMLTextInfosetHandler(dataProcessor) + } + } +} + +sealed trait InfosetHandler { + + /** + * Parse data using the provided DataProcessor and InputSourceDataInputStream. + * + * This may optionally write to the OutputStream if it is required for + * parsing. If this writes to the OutputStream, it should just return an new + * InfosetParseResult instance. If this does not write to the output stream + * and there is a reasonable serialization to XML, it should return a custom + * implementation of InfosetToString that overrides the write() function to + * do so. The write function will only be called if the ParseResult is + * successful + * + * This will be called in a performance loop, so as much as preprocessing and + * initialization as possible should be done in he InfosetHandler constructor Review Comment: This is much improved, and is an accurate description. Thanks! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
