This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git
The following commit(s) were added to refs/heads/master by this push:
new c8b4d91 Ensure InputStreams created by includes/imports are closed
c8b4d91 is described below
commit c8b4d91b911e96adccb80bdf716c2d72a0df2f83
Author: Steve Lawrence <[email protected]>
AuthorDate: Thu May 16 11:04:38 2019 -0400
Ensure InputStreams created by includes/imports are closed
- When we open a stream from a DaffodilInputSource, make sure we close
the streams when we are done with them
- In our custom LSInput implementation (renamed to InputStreamLSInput),
we provided an InputStream *and* an implementation of getStringData().
However, when getStringData() returns non-null, Xerces ignores the
fact that an InputStream is provided and so Xerces never closes it. If
we instead make getStringData() return null, Xerces will then use the
InputStream and will close it once the stream is parsed. The added
benefit here is that Xerces *should* be able to look at the preamble
and determine XML encoding, which we previously did not do with our
getStringData implementation.
These changes should allow Daffodil to support a much larger number of
includes/imports without hitting "Too many open files" errors.
DAFFODIL-2130
---
.../apache/daffodil/xml/DaffodilXMLLoader.scala | 77 +++++++++++++---------
1 file changed, 46 insertions(+), 31 deletions(-)
diff --git
a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilXMLLoader.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilXMLLoader.scala
index 5830f41..3c27ade 100644
---
a/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilXMLLoader.scala
+++
b/daffodil-lib/src/main/scala/org/apache/daffodil/xml/DaffodilXMLLoader.scala
@@ -23,8 +23,15 @@ package org.apache.daffodil.xml
* See
http://stackoverflow.com/questions/4446137/how-to-track-the-source-line-location-of-an-xml-element
*/
+import java.io.BufferedInputStream
import java.io.File
+import java.io.InputStream
+import java.io.Reader
import java.net.URI
+
+import javax.xml.XMLConstants
+import javax.xml.transform.sax.SAXSource
+
import scala.collection.JavaConverters.asScalaBufferConverter
import scala.xml.Elem
import scala.xml.InputSource
@@ -33,20 +40,17 @@ import scala.xml.SAXParseException
import scala.xml.SAXParser
import scala.xml.TopScope
import scala.xml.parsing.NoBindingFactoryAdapter
-import org.apache.xerces.xni.parser.XMLInputSource
-import org.apache.xml.resolver.Catalog
-import org.apache.xml.resolver.CatalogManager
+
import org.w3c.dom.ls.LSInput
+
+import org.apache.daffodil.api.DaffodilSchemaSource
import org.apache.daffodil.exceptions.Assert
import org.apache.daffodil.util.LogLevel
import org.apache.daffodil.util.Logging
import org.apache.daffodil.util.Misc
-import org.apache.daffodil.api.DaffodilSchemaSource
-import javax.xml.XMLConstants
-import java.io.InputStream
-import java.io.BufferedInputStream
-import java.io.Reader
-import javax.xml.transform.sax.SAXSource
+import org.apache.xerces.xni.parser.XMLInputSource
+import org.apache.xml.resolver.Catalog
+import org.apache.xml.resolver.CatalogManager
/**
* Resolves URI/URL/URNs to loadable files/streams.
@@ -238,8 +242,8 @@ class DFDLCatalogResolver private ()
case None => null
case Some(uri) => {
try {
- val resourceAsStream = uri.toURL.openStream()
- val input = new Input(publicId, uri.toString, new
BufferedInputStream(resourceAsStream))
+ val resourceAsStream = new
BufferedInputStream(uri.toURL.openStream())
+ val input = new InputStreamLSInput(publicId, uri.toString,
resourceAsStream)
input
} catch {
case _: java.io.IOException => null
@@ -302,35 +306,43 @@ object DFDLCatalogResolver {
def get = d.get
}
-class Input(var pubId: String, var sysId: String, var inputStream:
BufferedInputStream)
+/**
+ * This LSInput implementation is tailored specifically for Daffodil's use with
+ * Xerces and has implementation details to ensure XML data is read correctly
+ * and InputStreams are closed.
+ *
+ * It is important here that the different data getters (e.g.
+ * getCharacterStream, getStringData, getEncoding) do not return a value. Only
+ * getByteStream should return a value representing the data. This ensures that
+ * Xerces will use the data from the InputStream, use the XML preamble to
+ * determine encoding, and close the InputStream upon completion. If any of the
+ * other getters return a value, Xerces might ignore the InputStream
+ * completely, which can lead to open file descriptors or errors in XML
decoding.
+ */
+class InputStreamLSInput(var pubId: String, var sysId: String, inputStream:
InputStream)
extends LSInput {
var myBaseURI: String = null
+ def getBaseURI = myBaseURI
def getPublicId = pubId
+ def getSystemId = sysId
+
+ def setBaseURI(baseURI: String) = myBaseURI = baseURI
def setPublicId(publicId: String) = pubId = publicId
- def getBaseURI = myBaseURI
- def getByteStream = null
+ def setSystemId(systemId: String) = sysId = systemId
+
+ def getByteStream = inputStream
def getCertifiedText = false
def getCharacterStream = null
def getEncoding = null
- def getStringData = {
- this.synchronized {
- val input: Array[Byte] = new Array[Byte](inputStream.available())
- inputStream.read(input)
- val contents = new String(input)
- contents
- }
- }
- def setBaseURI(baseURI: String) = myBaseURI = baseURI
+ def getStringData = null
+
def setByteStream(byteStream: InputStream) = {}
def setCertifiedText(certifiedText: Boolean) = {}
def setCharacterStream(characterStream: Reader) = {}
def setEncoding(encoding: String) = {}
def setStringData(stringData: String) = {}
- def getSystemId = sysId
- def setSystemId(systemId: String) = sysId = systemId
- def getInputStream: BufferedInputStream = inputStream
}
/**
@@ -463,9 +475,11 @@ trait SchemaAwareLoaderMixin {
* it a plain old file or resource, and not try to play games to get it to
* pick up the file/line/col information from attributes of the elements.
*/
- def validateSchema(source: DaffodilSchemaSource) = {
- val saxSource = new SAXSource(source.newInputSource())
+ def validateSchema(source: DaffodilSchemaSource): Unit = {
+ val inputSource = source.newInputSource()
+ val saxSource = new SAXSource(inputSource)
sf.newSchema(saxSource)
+ inputSource.getByteStream().close()
}
}
@@ -534,10 +548,10 @@ class DaffodilXMLLoader(val errorHandler:
org.xml.sax.ErrorHandler) {
* Does (optional) validation,
*/
def load(source: DaffodilSchemaSource): scala.xml.Node = {
- var xercesNode: Node = null
if (doValidation) {
- xercesNode =
- xercesAdapter.load(source.newInputSource()) // validates
+ val inputSource = source.newInputSource()
+ val xercesNode = xercesAdapter.load(inputSource) // validates
+ inputSource.getByteStream().close()
if (xercesNode == null) return null
// Note: we don't call xercesAdapter.validateSchema(source)
@@ -551,6 +565,7 @@ class DaffodilXMLLoader(val errorHandler:
org.xml.sax.ErrorHandler) {
//
val constructingLoader = new
DaffodilConstructingLoader(source.uriForLoading, errorHandler)
val res = constructingLoader.load() // construct the XML objects for us.
+ constructingLoader.input.close()
res
}