stevedlawrence commented on a change in pull request #478:
URL: https://github.com/apache/daffodil/pull/478#discussion_r589426532
##########
File path:
daffodil-core/src/test/scala/org/apache/daffodil/processor/TestSAXParseAPI.scala
##########
@@ -120,58 +163,215 @@ class TestSAXParseAPI {
assertTrue(ioe.getMessage.contains("InputSource must be backed by
InputStream"))
}
+ /**
+ * tests that we can parse using an inputSource with a backing inputStream
+ */
@Test def
testDaffodilParseXMLReader_parse_inputSource_with_backing_stream(): Unit = {
- val xmlReader = dp.newXMLReaderInstance
- val baos = new ByteArrayOutputStream()
- val parseOutputStreamContentHandler = new
DaffodilParseOutputStreamContentHandler(baos)
- xmlReader.setContentHandler(parseOutputStreamContentHandler)
- val inArray = testData.getBytes()
+ val (xmlReader: DFDL.DaffodilParseXMLReader,
+ baos: ByteArrayOutputStream,
+ inArray: Array[Byte]) = setupSAXParserTest(dp, testData)
val bais = new ByteArrayInputStream(inArray)
val input = new InputSource(bais)
xmlReader.parse(input)
val pr =
xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
assertTrue(!pr.isError)
- assertEquals(testInfoset, scala.xml.XML.loadString(baos.toString))
+ assertEquals(expectedInfoset, scala.xml.XML.loadString(baos.toString))
}
+ /**
+ * tests that we can parse using an inputStream
+ */
@Test def testDaffodilParseXMLReader_parse_inputStream(): Unit = {
- val xmlReader = dp.newXMLReaderInstance
- val baos = new ByteArrayOutputStream()
- val parseOutputStreamContentHandler = new
DaffodilParseOutputStreamContentHandler(baos)
- xmlReader.setContentHandler(parseOutputStreamContentHandler)
- val inArray = testData.getBytes()
+ val (xmlReader: DFDL.DaffodilParseXMLReader,
+ baos: ByteArrayOutputStream,
+ inArray: Array[Byte]) = setupSAXParserTest(dp, testData)
val bais = new ByteArrayInputStream(inArray)
xmlReader.parse(bais)
val pr =
xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
assertTrue(!pr.isError)
- assertEquals(testInfoset, scala.xml.XML.loadString(baos.toString))
+ assertEquals(expectedInfoset, scala.xml.XML.loadString(baos.toString))
}
+ /**
+ * tests that we can parse using a byte array
+ */
@Test def testDaffodilParseXMLReader_parse_byteArray(): Unit = {
- val xmlReader = dp.newXMLReaderInstance
- val baos = new ByteArrayOutputStream()
- val parseOutputStreamContentHandler = new
DaffodilParseOutputStreamContentHandler(baos)
- xmlReader.setContentHandler(parseOutputStreamContentHandler)
- val inArray = testData.getBytes()
+ val (xmlReader: DFDL.DaffodilParseXMLReader,
+ baos: ByteArrayOutputStream,
+ inArray: Array[Byte]) = setupSAXParserTest(dp, testData)
xmlReader.parse(inArray)
+ val actualInfoset = scala.xml.XML.loadString(baos.toString)
val pr =
xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
assertTrue(!pr.isError)
- assertEquals(testInfoset, scala.xml.XML.loadString(baos.toString))
+ assertEquals(expectedInfoset, actualInfoset)
}
+ /**
+ * tests that the error handler is populated if we try to parse an empty
input
+ */
@Test def testDaffodilParseXMLReader_parse_errorHandler_empty_byteArray():
Unit = {
- val xmlReader = dp.newXMLReaderInstance
- val baos = new ByteArrayOutputStream()
- val parseOutputStreamContentHandler = new
DaffodilParseOutputStreamContentHandler(baos)
- xmlReader.setContentHandler(parseOutputStreamContentHandler)
+ val (xmlReader: DFDL.DaffodilParseXMLReader, _, inArray: Array[Byte]) =
+ setupSAXParserTest(dp, "")
val eh = new BuilderErrorHandler
xmlReader.setErrorHandler(eh)
- val inArray = "".getBytes()
val spe = intercept[SAXParseException](
xmlReader.parse(inArray)
)
val pr =
xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
assertTrue(pr.isError)
assertTrue(spe.getMessage.contains("Insufficient bits in data"))
}
+
+ /*
+ * tests that the output from the parser is as expected, when the namespaces
feature is set
+ * to false and the namespace prefix feature is set to true.
+ */
+ @Test def testDaffodilParseXMLReader_parse_features_prefixes_only(): Unit = {
+ val (pr, actualInfoset) = saxParseWithFeatures(namespaces = false,
namespacePrefixes = true)
+ assertTrue(!pr.isError)
+ assertEquals(namespacesExpectedInfoset1, actualInfoset)
+ }
+
+ /*
+ * tests that the output from the parser is as expected, when the namespaces
feature is set
+ * to true and the namespace prefix false is set to false.
+ */
+ @Test def testDaffodilParseXMLReader_parse_features_namespace_only(): Unit =
{
+ val (pr, actualInfoset) = saxParseWithFeatures(namespaces = true,
namespacePrefixes = false)
+ assertTrue(!pr.isError)
+ assertEquals(namespacesExpectedInfoset1, actualInfoset)
+ }
+
+ /*
+ * tests that the output from the parser is as expected, when the namespaces
and the namespace
+ * prefix feature is set to true.
+ */
+ @Test def testDaffodilParseXMLReader_parse_features_and_prefixes(): Unit = {
+ val (pr, actualInfoset) = saxParseWithFeatures(namespaces = true,
namespacePrefixes = true)
+ assertTrue(!pr.isError)
+ assertEquals(namespacesExpectedInfoset1, actualInfoset)
+ }
+
+ /*
+ * tests that the output from the parser is as expected, when the namespaces
feature is set
+ * to true and the namespace prefix feature is set to false.
+ */
+ @Test def testDaffodilParseXMLReader_trace_features_default(): Unit = {
+ val baos = saxTraceParseWithFeatures(namespaces = true, namespacePrefixes
= false)
+ val actualOutput = baos.toString
+ val xsiUri = XMLUtils.XSI_NAMESPACE
+ val a02Uri = "http://a02.com"
+ val b02Uri = "http://b02.com"
+ val expectedOutput =
+s"""startDocument
+startPrefixMapping(a02, $a02Uri)
+startPrefixMapping(b02, $b02Uri)
+startPrefixMapping(xsi, ${XMLUtils.XSI_NAMESPACE})
+startElement($b02Uri, seq, , Attributes())
+startElement($b02Uri, seq2, , Attributes())
+startElement($a02Uri, intx, ,
Attributes((${XMLUtils.XSI_NAMESPACE},nil,,true)))
+endElement($a02Uri, intx, )
+endElement($b02Uri, seq2, )
+startElement($b02Uri, seq2, , Attributes())
+startElement($a02Uri, inty, , Attributes())
+character(Array(3), 0, 1)
+endElement($a02Uri, inty, )
+endElement($b02Uri, seq2, )
+startElement($b02Uri, seq2, , Attributes())
+startElement($b02Uri, inty, , Attributes())
+character(Array(4), 0, 1)
+endElement($b02Uri, inty, )
+endElement($b02Uri, seq2, )
+startElement($b02Uri, seq2, , Attributes())
+startElement($a02Uri, intx, , Attributes())
+character(Array(7), 0, 1)
+endElement($a02Uri, intx, )
+endElement($b02Uri, seq2, )
+endElement($b02Uri, seq, )
+endPrefixMapping(xsi)
+endPrefixMapping(b02)
+endPrefixMapping(a02)
+endDocument"""
Review comment:
For things like this, I find ``stripMargin`` to be useful to keep normal
indenting, e.g.:
```scala
val expectedOutput =
"""startDocument
|startPrefixMapping(a02, $a02Uri)
|startPrefixMapping(b02, $b02Uri)
|...
|endDOcument""".stripMargin
##########
File path:
daffodil-core/src/test/scala/org/apache/daffodil/processor/TestSAXParseAPI.scala
##########
@@ -120,58 +163,215 @@ class TestSAXParseAPI {
assertTrue(ioe.getMessage.contains("InputSource must be backed by
InputStream"))
}
+ /**
+ * tests that we can parse using an inputSource with a backing inputStream
+ */
@Test def
testDaffodilParseXMLReader_parse_inputSource_with_backing_stream(): Unit = {
- val xmlReader = dp.newXMLReaderInstance
- val baos = new ByteArrayOutputStream()
- val parseOutputStreamContentHandler = new
DaffodilParseOutputStreamContentHandler(baos)
- xmlReader.setContentHandler(parseOutputStreamContentHandler)
- val inArray = testData.getBytes()
+ val (xmlReader: DFDL.DaffodilParseXMLReader,
+ baos: ByteArrayOutputStream,
+ inArray: Array[Byte]) = setupSAXParserTest(dp, testData)
val bais = new ByteArrayInputStream(inArray)
val input = new InputSource(bais)
xmlReader.parse(input)
val pr =
xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
assertTrue(!pr.isError)
- assertEquals(testInfoset, scala.xml.XML.loadString(baos.toString))
+ assertEquals(expectedInfoset, scala.xml.XML.loadString(baos.toString))
}
+ /**
+ * tests that we can parse using an inputStream
+ */
@Test def testDaffodilParseXMLReader_parse_inputStream(): Unit = {
- val xmlReader = dp.newXMLReaderInstance
- val baos = new ByteArrayOutputStream()
- val parseOutputStreamContentHandler = new
DaffodilParseOutputStreamContentHandler(baos)
- xmlReader.setContentHandler(parseOutputStreamContentHandler)
- val inArray = testData.getBytes()
+ val (xmlReader: DFDL.DaffodilParseXMLReader,
+ baos: ByteArrayOutputStream,
+ inArray: Array[Byte]) = setupSAXParserTest(dp, testData)
val bais = new ByteArrayInputStream(inArray)
xmlReader.parse(bais)
val pr =
xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
assertTrue(!pr.isError)
- assertEquals(testInfoset, scala.xml.XML.loadString(baos.toString))
+ assertEquals(expectedInfoset, scala.xml.XML.loadString(baos.toString))
}
+ /**
+ * tests that we can parse using a byte array
+ */
@Test def testDaffodilParseXMLReader_parse_byteArray(): Unit = {
- val xmlReader = dp.newXMLReaderInstance
- val baos = new ByteArrayOutputStream()
- val parseOutputStreamContentHandler = new
DaffodilParseOutputStreamContentHandler(baos)
- xmlReader.setContentHandler(parseOutputStreamContentHandler)
- val inArray = testData.getBytes()
+ val (xmlReader: DFDL.DaffodilParseXMLReader,
+ baos: ByteArrayOutputStream,
+ inArray: Array[Byte]) = setupSAXParserTest(dp, testData)
xmlReader.parse(inArray)
+ val actualInfoset = scala.xml.XML.loadString(baos.toString)
val pr =
xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
assertTrue(!pr.isError)
- assertEquals(testInfoset, scala.xml.XML.loadString(baos.toString))
+ assertEquals(expectedInfoset, actualInfoset)
}
+ /**
+ * tests that the error handler is populated if we try to parse an empty
input
+ */
@Test def testDaffodilParseXMLReader_parse_errorHandler_empty_byteArray():
Unit = {
- val xmlReader = dp.newXMLReaderInstance
- val baos = new ByteArrayOutputStream()
- val parseOutputStreamContentHandler = new
DaffodilParseOutputStreamContentHandler(baos)
- xmlReader.setContentHandler(parseOutputStreamContentHandler)
+ val (xmlReader: DFDL.DaffodilParseXMLReader, _, inArray: Array[Byte]) =
+ setupSAXParserTest(dp, "")
val eh = new BuilderErrorHandler
xmlReader.setErrorHandler(eh)
- val inArray = "".getBytes()
val spe = intercept[SAXParseException](
xmlReader.parse(inArray)
)
val pr =
xmlReader.getProperty(XMLUtils.DAFFODIL_SAX_URN_PARSERESULT).asInstanceOf[ParseResult]
assertTrue(pr.isError)
assertTrue(spe.getMessage.contains("Insufficient bits in data"))
}
+
+ /*
+ * tests that the output from the parser is as expected, when the namespaces
feature is set
+ * to false and the namespace prefix feature is set to true.
+ */
+ @Test def testDaffodilParseXMLReader_parse_features_prefixes_only(): Unit = {
+ val (pr, actualInfoset) = saxParseWithFeatures(namespaces = false,
namespacePrefixes = true)
+ assertTrue(!pr.isError)
+ assertEquals(namespacesExpectedInfoset1, actualInfoset)
+ }
+
+ /*
+ * tests that the output from the parser is as expected, when the namespaces
feature is set
+ * to true and the namespace prefix false is set to false.
+ */
+ @Test def testDaffodilParseXMLReader_parse_features_namespace_only(): Unit =
{
+ val (pr, actualInfoset) = saxParseWithFeatures(namespaces = true,
namespacePrefixes = false)
+ assertTrue(!pr.isError)
+ assertEquals(namespacesExpectedInfoset1, actualInfoset)
+ }
+
+ /*
+ * tests that the output from the parser is as expected, when the namespaces
and the namespace
+ * prefix feature is set to true.
+ */
+ @Test def testDaffodilParseXMLReader_parse_features_and_prefixes(): Unit = {
+ val (pr, actualInfoset) = saxParseWithFeatures(namespaces = true,
namespacePrefixes = true)
+ assertTrue(!pr.isError)
+ assertEquals(namespacesExpectedInfoset1, actualInfoset)
+ }
+
+ /*
+ * tests that the output from the parser is as expected, when the namespaces
feature is set
+ * to true and the namespace prefix feature is set to false.
+ */
+ @Test def testDaffodilParseXMLReader_trace_features_default(): Unit = {
+ val baos = saxTraceParseWithFeatures(namespaces = true, namespacePrefixes
= false)
+ val actualOutput = baos.toString
+ val xsiUri = XMLUtils.XSI_NAMESPACE
+ val a02Uri = "http://a02.com"
+ val b02Uri = "http://b02.com"
+ val expectedOutput =
+s"""startDocument
+startPrefixMapping(a02, $a02Uri)
+startPrefixMapping(b02, $b02Uri)
+startPrefixMapping(xsi, ${XMLUtils.XSI_NAMESPACE})
+startElement($b02Uri, seq, , Attributes())
+startElement($b02Uri, seq2, , Attributes())
+startElement($a02Uri, intx, ,
Attributes((${XMLUtils.XSI_NAMESPACE},nil,,true)))
+endElement($a02Uri, intx, )
+endElement($b02Uri, seq2, )
+startElement($b02Uri, seq2, , Attributes())
+startElement($a02Uri, inty, , Attributes())
+character(Array(3), 0, 1)
+endElement($a02Uri, inty, )
+endElement($b02Uri, seq2, )
+startElement($b02Uri, seq2, , Attributes())
+startElement($b02Uri, inty, , Attributes())
+character(Array(4), 0, 1)
+endElement($b02Uri, inty, )
+endElement($b02Uri, seq2, )
+startElement($b02Uri, seq2, , Attributes())
+startElement($a02Uri, intx, , Attributes())
+character(Array(7), 0, 1)
Review comment:
I think it would be worth it to output the contents of the arrays to
ensure the arrays contain what we expect them to.
##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
##########
@@ -1228,6 +1238,32 @@ Differences were (path, expected, actual):
sb.append(s)
sb.append(";")
}
+
+ /**
+ * Return optional uri from NamespaceBinding based on some input prefix
+ * @param nsb NamespaceBinding we wish to search for the prefix's uri
+ * @param prefix Prefix whose URI we search through the NamespaceBinding for
+ * @return the uri string wrapped in a Some, or None, if not found
+ */
+ def getURIOption(nsb: NamespaceBinding, prefix: String): Option[String] = {
Review comment:
Should we return a ``Maybe`` instead of ``Option`` to avoid the
allocation?
##########
File path:
daffodil-japi/src/main/java/org/apache/daffodil/japi/package-info.java
##########
@@ -200,6 +200,11 @@
* }
* </pre>
*
+ * The value of the supported features cannot be changed during a parse, and
the parse will run
+ * with the value of the features as they were when the parse was kicked off.
To run a parse with
+ * different feature values, one must wait till the running parse finishes,
set the feature values
Review comment:
s/till/until/
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/infoset/SAXInfosetOutputter.scala
##########
@@ -144,29 +149,25 @@ class SAXInfosetOutputter(xmlReader:
DFDL.DaffodilParseXMLReader)
}
private def doStartPrefixMapping(diElem: DIElement, contentHandler:
ContentHandler): Unit = {
- val nsbStart = diElem.erd.minimizedScope
- val nsbEnd = if (diElem.isRoot) {
- scala.xml.TopScope
- } else {
- diElem.diParent.erd.minimizedScope
- }
+ val (nsbStart: NamespaceBinding, nsbEnd: NamespaceBinding) =
getNsbStartAndEnd(diElem)
var n = nsbStart
+ var mappingsList: Seq[(String, String)] = Seq()
while (n != nsbEnd && n != null && n != scala.xml.TopScope) {
val prefix = if (n.prefix == null) "" else n.prefix
val uri = if (n.uri == null) "" else n.uri
- contentHandler.startPrefixMapping(prefix, uri)
+ // we generate a list here by prepending so can build the prefixMapping
in the
+ // same order as it is in minimizedScope when we call
startPrefixMapping; we do this because
+ // getPrefix in the contentHandler is order dependent
+ mappingsList +:= (prefix, uri)
n = n.parent
}
+ mappingsList.foreach{ case (prefix, uri) =>
+ contentHandler.startPrefixMapping(prefix, uri)
+ }
}
private def doEndPrefixMapping(diElem: DIElement, contentHandler:
ContentHandler): Unit = {
- val nsbStart = diElem.erd.minimizedScope
- val nsbEnd = if (diElem.isRoot) {
- scala.xml.TopScope
- } else {
- diElem.diParent.erd
- .minimizedScope
- }
+ val (nsbStart: NamespaceBinding, nsbEnd: NamespaceBinding) =
getNsbStartAndEnd(diElem)
var n = nsbStart
Review comment:
Curious about this, it looks like scala-xml does implement it's own
equality stuff, and I think it does compare the full tree. Agreed that ne/eq is
more appropriate and should have better performance. We might want to inspect
places where we are performance this NamespaceBinding equality check and fix. I
suspect we do something similar in other InfosetOutputters. Might be worth
create a bug for that.
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -116,9 +247,16 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
}
}
writer.write("</")
- writer.write(qName)
+ outputTagName(uri, localName, qName)
writer.write(">")
outputNewlineStack.pop()
+
+ if (!globalPrefixMappingContextStack.isEmpty)
globalPrefixMappingContextStack.pop
+ if (globalPrefixMappingContextStack.isEmpty) {
+ globalPrefixMapping = null
+ } else {
+ globalPrefixMapping = globalPrefixMappingContextStack.top
+ }
Review comment:
I think we searched for one and actually couldn't find many that wrote
out to an OutputStream. And those that did pulled in tons of dependencies or
didn't work the way we expected. Might be worth a second look. I think we also
recently added SaxonHE as a new dependency, so that might be worth checking as
well.
##########
File path:
daffodil-core/src/test/scala/org/apache/daffodil/processor/TestSAXUtils.scala
##########
@@ -0,0 +1,236 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.processor
+
+import java.io.ByteArrayOutputStream
+import java.io.File
+import java.io.OutputStream
+import java.io.OutputStreamWriter
+
+import scala.xml.Elem
+
+import org.apache.daffodil.api.DFDL
+import org.apache.daffodil.compiler.Compiler
+import org.apache.daffodil.processors.DaffodilParseOutputStreamContentHandler
+import org.apache.daffodil.processors.DataProcessor
+import org.apache.daffodil.processors.ParseResult
+import org.apache.daffodil.util.Misc
+import org.apache.daffodil.util.SchemaUtils
+import org.apache.daffodil.xml.XMLUtils
+import org.junit.Assert.fail
+import org.xml.sax.Attributes
+import org.xml.sax.ContentHandler
+import org.xml.sax.Locator
+
+object TestSAXUtils {
+ lazy val testSchema: Elem = SchemaUtils.dfdlTestSchema(
+ <xs:include
schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>,
+ <dfdl:format ref="tns:GeneralFormat"/>,
+ <xs:element name="list" type="tns:example1"/>
+ <xs:complexType name="example1">
+ <xs:sequence>
+ <xs:element name="w" type="xs:int" dfdl:length="1"
dfdl:lengthKind="explicit" maxOccurs="unbounded"/>
+ </xs:sequence>
+ </xs:complexType>
+ )
+ lazy val expectedInfoset: Elem = <list
xmlns="http://example.com"><w>9</w><w>1</w><w>0</w></list>
+ lazy val testInfosetString: String = expectedInfoset.toString()
+ lazy val testData: String = "910"
+
+ lazy val schemaFile: File = new
File(Misc.getRequiredResource("/test/example_nested_namespaces.dfdl.xsd"))
+ lazy val testNamespacesSchema1: Elem = scala.xml.XML.loadFile(schemaFile)
+ lazy val namespacesExpectedInfoset1: Elem = {
+<b02:seq xmlns:b02="http://b02.com" xmlns:a02="http://a02.com">
+ <b02:seq2>
+ <a02:inty>3</a02:inty>
+ </b02:seq2>
+ <b02:seq2>
+ <b02:inty>4</b02:inty>
+ </b02:seq2>
+ <b02:seq2>
+ <a02:inty>2</a02:inty>
+ </b02:seq2>
+ <b02:seq2>
+ <b02:inty>1</b02:inty>
+ </b02:seq2>
+ <b02:seq2>
+ <b02:inty>44</b02:inty>
+ </b02:seq2>
+ <b02:seq2>
+ <b02:inty>643</b02:inty>
+ </b02:seq2>
+ <b02:seq2>
+ <a02:inty>3</a02:inty>
+ </b02:seq2>
+ <b02:seq2>
+ <a02:inty>5</a02:inty>
+ </b02:seq2>
+ <b02:seq2>
+ <b02:inty>1</b02:inty>
+ </b02:seq2>
+</b02:seq>
Review comment:
This XML an be indented in. It doesn't need to start at the beginning of
lines.
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -107,6 +208,36 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
outputNewlineStack.push(false)
}
+ private def outputTagName(uri: String, localName: String, qName: String,
atts: Maybe[Attributes] = Nope): Unit = {
+ val tagName = {
+ if (qName.nonEmpty) {
+ qName
+ } else {
+ // use attributes
Review comment:
This could be a bit more verbose to be more clear what "use attributes"
means. I think what's going on here is we don't have a qname string, so we need
to figure out what prefix to use so one case uses attributes and the other uses
globalPrefixMapping?
##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
##########
@@ -1228,6 +1238,32 @@ Differences were (path, expected, actual):
sb.append(s)
sb.append(";")
}
+
+ /**
+ * Return optional uri from NamespaceBinding based on some input prefix
+ * @param nsb NamespaceBinding we wish to search for the prefix's uri
+ * @param prefix Prefix whose URI we search through the NamespaceBinding for
+ * @return the uri string wrapped in a Some, or None, if not found
+ */
+ def getURIOption(nsb: NamespaceBinding, prefix: String): Option[String] = {
+ if (nsb == null) None
+ else if (nsb.prefix == prefix) Some(nsb.uri)
+ else getURIOption(nsb.parent, prefix)
+ }
+
+ /**
+ * Return optional prefix from NamespaceBinding based on some input uri
+ *
+ * @param nsb NamespaceBinding we wish to search for the uri's prefix
+ * @param uri Prefix whose URI we search through the NamespaceBinding for
+ * @return the prefix string wrapped in a Some, or None, if not found
+ */
+ def getPrefixOption(nsb: NamespaceBinding, uri: String): Option[String] = {
+
+ if (nsb == null) None
+ else if (nsb.uri == uri) Some(nsb.prefix)
+ else getPrefixOption(nsb.parent, uri)
+ }
}
Review comment:
I think it's worth ading a comment to these that the NamespaceBinding
functions that do similar things don't handle the nsb.parent == null case and
could throw a NullPointerException.
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -107,6 +208,36 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
outputNewlineStack.push(false)
}
+ private def outputTagName(uri: String, localName: String, qName: String,
atts: Maybe[Attributes] = Nope): Unit = {
+ val tagName = {
+ if (qName.nonEmpty) {
+ qName
+ } else {
+ // use attributes
+ lazy val attrs = atts.get
+ lazy val in = attrs.getIndex(uri, localName)
+ if (atts.nonEmpty && in >= 0) {
+ val qn = attrs.getQName(in)
+ qn
Review comment:
This feels wrong, or I don't understand what's going on here. So in the
case where qname is empty, the uri is something like ``http://example.com`` and
localName is something like ``foo``, right? And we want to figure out the qname
(i.e. prefix + localName) to output for this element tag.
But doesn't that mean this code is looking for an attribute with name "foo"
and namespace "https://example.com" and has the value of the qname? So is this
expecting an attribute that looks like this to existt:
``{https://example.com}foo="ex:foo"``? Does that kind of attribute actually
exist, or am I reading this wrong?
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -107,6 +208,36 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
outputNewlineStack.push(false)
}
+ private def outputTagName(uri: String, localName: String, qName: String,
atts: Maybe[Attributes] = Nope): Unit = {
+ val tagName = {
+ if (qName.nonEmpty) {
+ qName
+ } else {
+ // use attributes
+ lazy val attrs = atts.get
+ lazy val in = attrs.getIndex(uri, localName)
+ if (atts.nonEmpty && in >= 0) {
+ val qn = attrs.getQName(in)
+ qn
+ } else {
+ // use globalPrefixMapping
+ val sanitizedUri = if (uri.isEmpty) null else uri
+ val optPrefix = XMLUtils.getPrefixOption(globalPrefixMapping,
sanitizedUri)
+ optPrefix match {
+ case Some(pre) =>
+ if (pre != null && pre.nonEmpty) {
+ s"$pre:$localName"
+ } else {
+ localName
+ }
+ case None => localName
Review comment:
Does this case imply something went wrong? I guess this would mean we
never got a prefix mapping for some URI, but then saw an element with that URI?
I'm not sure what SAX says about how to handle this. I could see an argument
that this is a SAXException or something rather than silenting ignoring the
uri, but maybe that's being too strict?
##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
##########
@@ -826,11 +828,19 @@ object XMLUtils {
Comparison failed.
Expected (attributes stripped)
%s
+%s
Review comment:
I'm not we ever want the infoset printed twice, which will happen if
checkPrefixes or checkNamespaces are true. What about if one of
checkPrefixes/checkNamespaces are true (which should be rare) then we print the
full expected, otherwise we print the removedAttributes(expected)?
##########
File path: daffodil-lib/src/main/scala/org/apache/daffodil/xml/XMLUtils.scala
##########
@@ -812,6 +812,8 @@ object XMLUtils {
actual: Node,
ignoreProcInstr: Boolean = true,
checkPrefixes: Boolean = false,
+ // checkNamespaces isn't order specific; it only checks that they have the
same (element-wise)
Review comment:
This comment also feels more appropriate in scala-doc for this function.
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -66,13 +88,74 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
override def startPrefixMapping(prefix: String, uri: String): Unit = {
val _prefix = if (prefix == "") null else prefix
- prefixMapping = NamespaceBinding(_prefix, uri, prefixMapping)
+ globalPrefixMapping = NamespaceBinding(_prefix, uri, globalPrefixMapping)
+ currentElementPrefixMapping = NamespaceBinding(_prefix, uri,
currentElementPrefixMapping)
}
override def endPrefixMapping(prefix: String): Unit = {
// do nothing
}
+ /**
+ * Uses Attributes, which is passed in to the startElement callback, to
extract prefix mappings
+ * and other attributes and return a tuple of an updated globalPrefixMapping
and a Seq of attribute=val
+ * String pairings
+ */
+ def processAttributes(atts: Attributes, prefixMapping: NamespaceBinding):
(NamespaceBinding, Seq[String]) = {
+ var pm = prefixMapping
+ var attrPairings: Seq[String] = Seq()
+ var i = 0
+ var mappingsList: Seq[(String, String)] = Seq()
+ while (i < atts.getLength) {
+ val qName = atts.getQName(i)
+ val attrVal = atts.getValue(i)
+ if (qName.nonEmpty) { //if qName is populated; as in when
namespacePrefixes == true
+ if (qName.startsWith("xmlns:") || qName == "xmlns") { // describing
namespace mapping
+ // get prefix
+ val pre = if (qName.startsWith("xmlns:")) {
+ qName.substring(6)
+ } else {
+ null
+ }
+ // we make this call to check if the prefix already exists. If it
doesn't exist, we get the
+ // npe so we can add it, but if it does exist, nothing happens and
it doesn't get re-added
Review comment:
I think this comment is out of date? I think we used to call
NamespaceBinding which colud thrown an NPE if the binding didn't exist. I think
this new XMLUtils can never throw an NPE, so the comment should just be removed.
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -84,21 +167,39 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
writer.write(System.lineSeparator())
outputIndentation(writer)
}
+
+ // this is for the situation where the XMLReader doesn't use the
start/endPrefixMappings to
+ // pass along namespaceMapping information, so prefix information must be
determined via
+ // the Attribute param.
+ // we always push but processAttributes won't always add a mapping since
the Attributes param
+ // can be empty or the prefix can already exist
+ val pmAttrRes = processAttributes(atts, globalPrefixMapping)
+ globalPrefixMapping = pmAttrRes._1
+ currentElementAttributes = pmAttrRes._2
+ currentElementPrefixMapping = processAttributes(atts,
currentElementPrefixMapping)._1
+
+ globalPrefixMappingContextStack.push(globalPrefixMapping)
+
// handle start of tag
writer.write("<")
- writer.write(qName)
- // handle attributes
- for (i <- 0 until atts.getLength) {
- val attsValue = atts.getValue(i)
- val attsQName = atts.getQName(i)
- writer.write(s""" $attsQName="$attsValue"""")
- }
- // handle namespaces
- if (prefixMapping != null) {
- val pm = prefixMapping.toString()
+ outputTagName(uri, localName, qName, Some(atts))
+
+ // handle namespaces in the case where prefixMappings aren't populated
+ // this contains only xmlns prefixes and are populated via the
start/endPrefixMappings
+ // or Attributes via processAttributes()
+ if (currentElementPrefixMapping != null) {
+ val pm = currentElementPrefixMapping.toString()
writer.write(pm)
- prefixMapping = null
+ currentElementPrefixMapping = null
}
+
+ // handles attributes from the Attributes object. Example attributes is
xsi:nil
+ if (currentElementAttributes.nonEmpty) {
+ val attrs = currentElementAttributes.mkString(" ")
+ writer.write(attrs)
+ currentElementAttributes = Seq()
Review comment:
Looks like currentElementAttributes is only used in this function. Can
the member variable be removed and this changed to just a local variable of
this function?
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -84,21 +167,39 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
writer.write(System.lineSeparator())
outputIndentation(writer)
}
+
+ // this is for the situation where the XMLReader doesn't use the
start/endPrefixMappings to
+ // pass along namespaceMapping information, so prefix information must be
determined via
+ // the Attribute param.
+ // we always push but processAttributes won't always add a mapping since
the Attributes param
+ // can be empty or the prefix can already exist
+ val pmAttrRes = processAttributes(atts, globalPrefixMapping)
+ globalPrefixMapping = pmAttrRes._1
+ currentElementAttributes = pmAttrRes._2
+ currentElementPrefixMapping = processAttributes(atts,
currentElementPrefixMapping)._1
+
Review comment:
Not sure I understand this. We first process all the attributes using
the globalPrefixMapping, and then processAttributes again using the
currentElementPrefixMapping? Can you explian when we need to iterate over the
attributes twice with different mappings?
After reading some more code, my guess is that when we as we process
attributes, we need to add any new namespace bindings to the
globalPrefixMapping but also to the currentElementPrefixMapping? Can both be
done in the same function, such that processAttributes returns two mappings,
one is the new globalPrefixMaping and one is the new
currentElementPrefixMappin? Or is that going to get too messy?
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseOutputStreamContentHandler.scala
##########
@@ -84,21 +167,39 @@ class DaffodilParseOutputStreamContentHandler(out:
OutputStream, pretty: Boolean
writer.write(System.lineSeparator())
outputIndentation(writer)
}
+
+ // this is for the situation where the XMLReader doesn't use the
start/endPrefixMappings to
+ // pass along namespaceMapping information, so prefix information must be
determined via
+ // the Attribute param.
+ // we always push but processAttributes won't always add a mapping since
the Attributes param
+ // can be empty or the prefix can already exist
+ val pmAttrRes = processAttributes(atts, globalPrefixMapping)
+ globalPrefixMapping = pmAttrRes._1
+ currentElementAttributes = pmAttrRes._2
+ currentElementPrefixMapping = processAttributes(atts,
currentElementPrefixMapping)._1
+
+ globalPrefixMappingContextStack.push(globalPrefixMapping)
+
// handle start of tag
writer.write("<")
- writer.write(qName)
- // handle attributes
- for (i <- 0 until atts.getLength) {
- val attsValue = atts.getValue(i)
- val attsQName = atts.getQName(i)
- writer.write(s""" $attsQName="$attsValue"""")
- }
- // handle namespaces
- if (prefixMapping != null) {
- val pm = prefixMapping.toString()
+ outputTagName(uri, localName, qName, Some(atts))
+
+ // handle namespaces in the case where prefixMappings aren't populated
+ // this contains only xmlns prefixes and are populated via the
start/endPrefixMappings
+ // or Attributes via processAttributes()
+ if (currentElementPrefixMapping != null) {
Review comment:
Not sure I understand the first line of this comment. Are there other
places other than start/endPrefixMappings and Attributes where prefix mappings
are populated?
##########
File path:
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/DaffodilParseXMLReader.scala
##########
@@ -43,35 +42,28 @@ class DaffodilParseXMLReader(dp: DataProcessor) extends
DFDL.DaffodilParseXMLRea
private var errorHandler: ErrorHandler = _
private var dtdHandler: DTDHandler = _
private var entityResolver: EntityResolver = _
- var saxParseResultPropertyValue: ParseResult = _
- var saxBlobDirectoryPropertyValue: Path =
Paths.get(System.getProperty("java.io.tmpdir"))
- var saxBlobPrefixPropertyValue: String = "daffodil-sax-"
- var saxBlobSuffixPropertyValue: String = ".blob"
-
- private val featureMap = mutable.Map[String, Boolean](
- XMLUtils.SAX_NAMESPACES_FEATURE -> false,
- XMLUtils.SAX_NAMESPACE_PREFIXES_FEATURE -> false
- )
+ private var saxParseResultPropertyValue: ParseResult = _
Review comment:
This is definitely not test code. This class implements the SAX
XMLReader interface and is what someone uses when they want to parse data using
SAX. This class essentially wraps a DataProcessor calling parse with a
SAXInfosetOutputter. As the SAXInfosetOutputter gets parse infoset events, it
then forwards them as SAX events to the SAX ContentHandler.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]