This is an automated email from the ASF dual-hosted git repository.
olabusayo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new fd4be9255 Add use of DAFFODIL_TDML_API_INFOSETS environment variable
fd4be9255 is described below
commit fd4be9255408438dddb197221edb8e948ed14304
Author: olabusayoT <[email protected]>
AuthorDate: Tue Nov 5 21:20:56 2024 -0500
Add use of DAFFODIL_TDML_API_INFOSETS environment variable
- currently during testing we parse/unparse using both the sax and non-sax
API, which leads to issues like trace running outputting twice for the same
test which is confusing. We also run the parse for all our infoset outputters.
With this environment variable, we default to the more efficient single infoset
outputter (scalaxml) and single API (non-sax) parse/unparse.
- the DAFFODIL_TDML_API_INFOSETS env has 2 options: 'scala' and 'all'. with
scala being the default and 'all' being the current implementation of running
both APIs and all infoset outputters
- we convert TDMLInfosetOutputter to a trait so the All and Scala
subclasses can extend it as well as TeeInfosetOutputter
- get rid of unused and inaccessible parse function
- set CI mode to all for regression testing
- add integration test showing use of scala and all mode
DAFFODIL-2904
---
.github/workflows/main.yml | 1 +
.../processor/tdml/DaffodilTDMLDFDLProcessor.scala | 200 +++++++++++----------
.../processor/tdml/TDMLInfosetOutputter.scala | 48 ++++-
.../org/apache/daffodil/cliTest/TestCLITdml.scala | 58 ++++++
4 files changed, 210 insertions(+), 97 deletions(-)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 332203c3f..55ec18377 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -95,6 +95,7 @@ jobs:
github.repository == 'apache/daffodil' &&
github.ref == 'refs/heads/main'
}}
+ DAFFODIL_TDML_API_INFOSETS: all
steps:
diff --git
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
index 6c5a40f85..4ae2a2e84 100644
---
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
+++
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/DaffodilTDMLDFDLProcessor.scala
@@ -176,6 +176,8 @@ class DaffodilTDMLDFDLProcessor private (private var dp:
DataProcessor)
private def blobPrefix = ""
private def blobSuffix = ".bin"
+ private lazy val tdmlApiInfosetsEnv =
sys.env.getOrElse("DAFFODIL_TDML_API_INFOSETS", "scala")
+
override def withDebugging(b: Boolean): DaffodilTDMLDFDLProcessor =
copy(dp = dp.withDebugging(b))
@@ -206,22 +208,16 @@ class DaffodilTDMLDFDLProcessor private (private var dp:
DataProcessor)
): DaffodilTDMLDFDLProcessor =
copy(dp = dp.withExternalVariables(externalVarBindings))
- def parse(uri: java.net.URI, lengthLimitInBits: Long): TDMLParseResult = {
- val url = uri.toURL
- val dpInputStream = url.openStream()
- val saxInputStream = url.openStream()
- doParseWithBothApis(dpInputStream, saxInputStream, lengthLimitInBits)
- }
-
- def parse(arr: Array[Byte], lengthLimitInBits: Long): TDMLParseResult = {
- val dpInputStream = new ByteArrayInputStream(arr)
- val saxInputStream = new ByteArrayInputStream(arr)
- doParseWithBothApis(dpInputStream, saxInputStream, lengthLimitInBits)
- }
-
override def parse(is: java.io.InputStream, lengthLimitInBits: Long):
TDMLParseResult = {
- val arr = IOUtils.toByteArray(is)
- parse(arr, lengthLimitInBits)
+ val (dpInputStream, optSaxInputStream) = if (tdmlApiInfosetsEnv == "all") {
+ val arr = IOUtils.toByteArray(is)
+ val saxInputStream = new ByteArrayInputStream(arr)
+ val dpInputStream = new ByteArrayInputStream(arr)
+ (dpInputStream, Some(saxInputStream))
+ } else {
+ (is, None)
+ }
+ doParse(dpInputStream, optSaxInputStream, lengthLimitInBits)
}
override def unparse(
@@ -252,104 +248,126 @@ class DaffodilTDMLDFDLProcessor private (private var
dp: DataProcessor)
infosetXML: scala.xml.Node,
outStream: java.io.OutputStream
): TDMLUnparseResult = {
- val bos = new ByteArrayOutputStream()
- val osw = new OutputStreamWriter(bos, StandardCharsets.UTF_8)
- scala.xml.XML.write(osw, infosetXML, "UTF-8", xmlDecl = true, null)
- osw.flush()
- osw.close()
- val saxInstream = new ByteArrayInputStream(bos.toByteArray)
- doUnparseWithBothApis(inputter, saxInstream, outStream)
+ val optSaxInstream = if (tdmlApiInfosetsEnv == "all") {
+ val bos = new ByteArrayOutputStream()
+ val osw = new OutputStreamWriter(bos, StandardCharsets.UTF_8)
+ scala.xml.XML.write(osw, infosetXML, "UTF-8", xmlDecl = true, null)
+ osw.flush()
+ osw.close()
+ val sis = new ByteArrayInputStream(bos.toByteArray)
+ Some(sis)
+ } else {
+ None
+ }
+ doUnparse(inputter, optSaxInstream, outStream)
}
- def doParseWithBothApis(
+ def doParse(
dpInputStream: java.io.InputStream,
- saxInputStream: java.io.InputStream,
+ optSaxInputStream: Option[java.io.InputStream] = None,
lengthLimitInBits: Long
): TDMLParseResult = {
- val outputter = new TDMLInfosetOutputter()
+ val outputter = if (tdmlApiInfosetsEnv == "all") {
+ new TDMLInfosetOutputterAll
+ } else {
+ new TDMLInfosetOutputterScala
+ }
outputter.setBlobAttributes(blobDir, blobPrefix, blobSuffix)
- val xri = dp.newXMLReaderInstance
- val errorHandler = new DaffodilTDMLSAXErrorHandler()
- val saxOutputStream = new ByteArrayOutputStream()
- val saxHandler =
- new DaffodilParseOutputStreamContentHandler(saxOutputStream, pretty =
false)
- xri.setContentHandler(saxHandler)
- xri.setErrorHandler(errorHandler)
- xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBDIRECTORY, blobDir)
- xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBPREFIX, blobPrefix)
- xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBSUFFIX, blobSuffix)
-
using(InputSourceDataInputStream(dpInputStream)) { dis =>
- using(InputSourceDataInputStream(saxInputStream)) { sis =>
- // The length limit here should be the length of the document
- // under test. Only set a limit when the end of the document
- // do not match a byte boundary.
- if (lengthLimitInBits % 8 != 0) {
- Assert.usage(lengthLimitInBits >= 0)
- dis.setBitLimit0b(MaybeULong(lengthLimitInBits))
- sis.setBitLimit0b(MaybeULong(lengthLimitInBits))
- }
-
- val actual = dp.parse(dis, outputter)
- xri.parse(sis)
+ // The length limit here should be the length of the document
+ // under test. Only set a limit when the end of the document
+ // do not match a byte boundary.
+ if (lengthLimitInBits % 8 != 0) {
+ Assert.usage(lengthLimitInBits >= 0)
+ dis.setBitLimit0b(MaybeULong(lengthLimitInBits))
+ }
- if (!actual.isError && !errorHandler.isError) {
- verifySameParseOutput(outputter.xmlStream, saxOutputStream)
+ val actual = dp.parse(dis, outputter)
+ if (tdmlApiInfosetsEnv == "all") {
+ val saxInputStream = optSaxInputStream.get
+ using(InputSourceDataInputStream(saxInputStream)) { sis =>
+ // The length limit here should be the length of the document
+ // under test. Only set a limit when the end of the document
+ // do not match a byte boundary.
+ if (lengthLimitInBits % 8 != 0) {
+ Assert.usage(lengthLimitInBits >= 0)
+ sis.setBitLimit0b(MaybeULong(lengthLimitInBits))
+ }
+
+ val xri = dp.newXMLReaderInstance
+ val errorHandler = new DaffodilTDMLSAXErrorHandler()
+ val saxOutputStream = new ByteArrayOutputStream()
+ val saxHandler =
+ new DaffodilParseOutputStreamContentHandler(saxOutputStream,
pretty = false)
+ xri.setContentHandler(saxHandler)
+ xri.setErrorHandler(errorHandler)
+ xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBDIRECTORY, blobDir)
+ xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBPREFIX, blobPrefix)
+ xri.setProperty(XMLUtils.DAFFODIL_SAX_URN_BLOBSUFFIX, blobSuffix)
+
+ xri.parse(sis)
+
+ if (!actual.isError && !errorHandler.isError) {
+ verifySameParseOutput(outputter.xmlStream, saxOutputStream)
+ }
+ val dpParseDiag = actual.getDiagnostics.map(_.getMessage())
+ val saxParseDiag = errorHandler.getDiagnostics.map(_.getMessage())
+ verifySameDiagnostics(dpParseDiag, saxParseDiag)
}
- val dpParseDiag = actual.getDiagnostics.map(_.getMessage())
- val saxParseDiag = errorHandler.getDiagnostics.map(_.getMessage())
- verifySameDiagnostics(dpParseDiag, saxParseDiag)
-
- new DaffodilTDMLParseResult(actual, outputter)
}
+ new DaffodilTDMLParseResult(actual, outputter)
}
}
- def doUnparseWithBothApis(
+ def doUnparse(
dpInputter: TDMLInfosetInputter,
- saxInputStream: java.io.InputStream,
+ optSaxInputStream: Option[java.io.InputStream] = None,
dpOutputStream: java.io.OutputStream
): DaffodilTDMLUnparseResult = {
val dpOutputChannel = java.nio.channels.Channels.newChannel(dpOutputStream)
- val saxOutputStream = new ByteArrayOutputStream
- val saxOutputChannel =
java.nio.channels.Channels.newChannel(saxOutputStream)
- val unparseContentHandler = dp.newContentHandlerInstance(saxOutputChannel)
- unparseContentHandler.enableResolutionOfRelativeInfosetBlobURIs()
- val xmlReader = DaffodilSAXParserFactory().newSAXParser.getXMLReader
- xmlReader.setContentHandler(unparseContentHandler)
- xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
- xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
-
val actualDP = dp.unparse(dpInputter,
dpOutputChannel).asInstanceOf[UnparseResult]
dpOutputChannel.close()
- // kick off SAX Unparsing
- try {
- xmlReader.parse(new InputSource(saxInputStream))
- } catch {
- case e: DaffodilUnhandledSAXException =>
- // In the case of an unexpected errors, catch and throw as
TDMLException
- throw TDMLException("Unexpected error during SAX Unparse:" + e, None)
- case _: DaffodilUnparseErrorSAXException =>
- // do nothing as unparseResult and its diagnostics will be handled below
- }
- val actualSAX = unparseContentHandler.getUnparseResult
- saxOutputChannel.close()
- if (!actualDP.isError && !actualSAX.isError) {
- val dpis = new ByteArrayInputStream(
- dpOutputStream.asInstanceOf[ByteArrayOutputStream].toByteArray
- )
- if (actualDP.isScannable && actualSAX.isScannable) {
- VerifyTestCase.verifyTextData(dpis, saxOutputStream,
actualSAX.encodingName, None)
- } else {
- VerifyTestCase.verifyBinaryOrMixedData(dpis, saxOutputStream, None)
+ if (tdmlApiInfosetsEnv == "all") {
+ val saxInputStream = optSaxInputStream.get
+ val saxOutputStream = new ByteArrayOutputStream
+ val saxOutputChannel =
java.nio.channels.Channels.newChannel(saxOutputStream)
+ val unparseContentHandler =
dp.newContentHandlerInstance(saxOutputChannel)
+ unparseContentHandler.enableResolutionOfRelativeInfosetBlobURIs()
+ val xmlReader = DaffodilSAXParserFactory().newSAXParser.getXMLReader
+ xmlReader.setContentHandler(unparseContentHandler)
+ xmlReader.setFeature(XMLUtils.SAX_NAMESPACES_FEATURE, true)
+ xmlReader.setFeature(XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE, true)
+
+ // kick off SAX Unparsing
+ try {
+ xmlReader.parse(new InputSource(saxInputStream))
+ } catch {
+ case e: DaffodilUnhandledSAXException =>
+ // In the case of an unexpected errors, catch and throw as
TDMLException
+ throw TDMLException("Unexpected error during SAX Unparse:" + e, None)
+ case _: DaffodilUnparseErrorSAXException =>
+ // do nothing as unparseResult and its diagnostics will be handled
below
+ }
+
+ val actualSAX = unparseContentHandler.getUnparseResult
+ saxOutputChannel.close()
+ if (!actualDP.isError && !actualSAX.isError) {
+ val dpis = new ByteArrayInputStream(
+ dpOutputStream.asInstanceOf[ByteArrayOutputStream].toByteArray
+ )
+ if (actualDP.isScannable && actualSAX.isScannable) {
+ VerifyTestCase.verifyTextData(dpis, saxOutputStream,
actualSAX.encodingName, None)
+ } else {
+ VerifyTestCase.verifyBinaryOrMixedData(dpis, saxOutputStream, None)
+ }
}
+ val dpUnparseDiag = actualDP.getDiagnostics.map(_.getMessage())
+ val saxUnparseDiag = actualSAX.getDiagnostics.map(_.getMessage())
+ verifySameDiagnostics(dpUnparseDiag, saxUnparseDiag)
}
- val dpUnparseDiag = actualDP.getDiagnostics.map(_.getMessage())
- val saxUnparseDiag = actualSAX.getDiagnostics.map(_.getMessage())
- verifySameDiagnostics(dpUnparseDiag, saxUnparseDiag)
new DaffodilTDMLUnparseResult(actualDP, dpOutputStream)
}
@@ -408,11 +426,11 @@ class DaffodilTDMLDFDLProcessor private (private var dp:
DataProcessor)
final class DaffodilTDMLParseResult(actual: DFDL.ParseResult, outputter:
TDMLInfosetOutputter)
extends TDMLParseResult {
- override def getResult: Node = outputter.getResult()
+ override def getResult: Node = outputter.getResult
override def getBlobPaths: Seq[Path] = outputter.getBlobPaths()
- def inputter = outputter.toInfosetInputter()
+ def inputter = outputter.toInfosetInputter
override def isProcessingError: Boolean = actual.isProcessingError
diff --git
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
index ea569918b..40a8e495b 100644
---
a/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
+++
b/daffodil-tdml-processor/src/main/scala/org/apache/daffodil/processor/tdml/TDMLInfosetOutputter.scala
@@ -19,7 +19,10 @@ package org.apache.daffodil.processor.tdml
import java.io.ByteArrayInputStream
import java.io.ByteArrayOutputStream
+import java.nio.charset.Charset
+import scala.xml.Node
+import org.apache.daffodil.runtime1.infoset.InfosetOutputter
import org.apache.daffodil.runtime1.infoset.JDOMInfosetInputter
import org.apache.daffodil.runtime1.infoset.JDOMInfosetOutputter
import org.apache.daffodil.runtime1.infoset.JsonInfosetInputter
@@ -33,10 +36,32 @@ import
org.apache.daffodil.runtime1.infoset.W3CDOMInfosetOutputter
import org.apache.daffodil.runtime1.infoset.XMLTextInfosetInputter
import org.apache.daffodil.runtime1.infoset.XMLTextInfosetOutputter
-class TDMLInfosetOutputter
+class TDMLInfosetOutputterScala
+ extends {
+ private val scalaOut = new ScalaXMLInfosetOutputter()
+ private val outputters: Seq[InfosetOutputter] = Seq(scalaOut)
+ }
+ with TeeInfosetOutputter(outputters: _*)
+ with TDMLInfosetOutputter {
+
+ override def getResult: Node = scalaOut.getResult
+
+ override lazy val xmlStream: ByteArrayOutputStream = {
+ val bos = new ByteArrayOutputStream()
+ bos.write(getResult.toString().getBytes(Charset.defaultCharset()))
+ bos
+ }
+
+ override def toInfosetInputter: TDMLInfosetInputter = {
+ val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult)
+ new TDMLInfosetInputter(scalaIn, Seq())
+ }
+}
+
+class TDMLInfosetOutputterAll
extends {
private val jsonStream = new ByteArrayOutputStream()
- val xmlStream = new ByteArrayOutputStream()
+ override val xmlStream = new ByteArrayOutputStream()
private val scalaOut = new ScalaXMLInfosetOutputter()
private val jdomOut = new JDOMInfosetOutputter()
@@ -44,13 +69,15 @@ class TDMLInfosetOutputter
private val jsonOut = new JsonInfosetOutputter(jsonStream, false)
private val xmlOut = new XMLTextInfosetOutputter(xmlStream, false)
- private val outputters = Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)
+ private val outputters: Seq[InfosetOutputter] =
+ Seq(xmlOut, scalaOut, jdomOut, w3cdomOut, jsonOut)
}
- with TeeInfosetOutputter(outputters: _*) {
+ with TeeInfosetOutputter(outputters: _*)
+ with TDMLInfosetOutputter {
- def getResult() = scalaOut.getResult
+ override def getResult: Node = scalaOut.getResult
- def toInfosetInputter() = {
+ override def toInfosetInputter: TDMLInfosetInputter = {
val scalaIn = new ScalaXMLInfosetInputter(scalaOut.getResult)
val jdomIn = new JDOMInfosetInputter(jdomOut.getResult)
val w3cdomIn = new W3CDOMInfosetInputter(w3cdomOut.getResult)
@@ -63,3 +90,12 @@ class TDMLInfosetOutputter
new TDMLInfosetInputter(scalaIn, Seq(jdomIn, w3cdomIn, jsonIn, xmlIn,
nullIn))
}
}
+
+trait TDMLInfosetOutputter extends InfosetOutputter {
+
+ def xmlStream: ByteArrayOutputStream
+
+ def getResult: Node
+
+ def toInfosetInputter: TDMLInfosetInputter
+}
diff --git
a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala
b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala
new file mode 100644
index 000000000..7435696f2
--- /dev/null
+++
b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLITdml.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.cliTest
+
+import org.apache.daffodil.cli.Main.ExitCode
+import org.apache.daffodil.cli.cliTest.Util._
+
+import org.junit.Test
+
+class TestCLITdml {
+
+ @Test def test_CLI_Tdml_Trace_singleTest1(): Unit = {
+ val tdml = path(
+
"daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/Entities.tdml"
+ )
+
+ val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "all")
+
+ runCLI(args"test -i -t $tdml byte_entities_6_08", envs = envs) { cli =>
+ // legacy parse
+ cli.expect("parser: <Element name='e3'>")
+ // sax parse
+ cli.expect("parser: <Element name='e3'>")
+ cli.expect("[Pass] byte_entities_6_08")
+ }(ExitCode.Success)
+ }
+
+ @Test def test_CLI_Tdml_Trace_singleTest2(): Unit = {
+ val tdml = path(
+
"daffodil-test/src/test/resources/org/apache/daffodil/section06/entities/Entities.tdml"
+ )
+
+ val envs = Map("DAFFODIL_TDML_API_INFOSETS" -> "scala")
+
+ runCLI(args"test -i -t $tdml byte_entities_6_08", envs = envs) { cli =>
+ // parse
+ cli.expect("parser: <Element name='e3'>")
+ // unparse
+ cli.expect("parser: not available")
+ cli.expect("[Pass] byte_entities_6_08")
+ }(ExitCode.Success)
+ }
+}