This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new 619b0b758 Remove many uses of ThreadLocals
619b0b758 is described below
commit 619b0b758fa550a2ee765b8346b711e745d4a2ac
Author: Steve Lawrence <[email protected]>
AuthorDate: Wed Aug 20 08:53:39 2025 -0400
Remove many uses of ThreadLocals
Currently we make use of ThreadLocal's as a way to create a per-thread
instance of a class that is not thread safe. One major drawback with
this approach is that values stored in ThreadLocals are never guaranteed
to be garbage collected unless you call ThreadLocal.remove, even if the
ThreadLocal has no more direct strong references--this is beacause the
Thread iself has a table of strong references to ThreadLocal values. And
few of our ThreadLocal uses ever call remove(). This can lead to pretty
significant memory leaks in some edge cases.
The reason we don't call remove is because in most cases we use
ThreadLocals as an efficient pool, where each thread gets its own slot
from the pool, and we don't necessarily know when a thread will no
longer need that instance. But that means these instances can leak.
To resolve this, this adds a new ThreadSafePool class, which is close to
a drop-in replacement for ThreadLocal. The only real difference is this
uses defines `allocate()` instead of `initialValue()`, and a
`withInstance` function must be used to get an instance and
automatically return it to the pool when withInstance ends.
The ThreadSafePool implementation makes use of a ConcurrentLinkedQueue
to support thread safe pool access--note that this does allow for an
unbounded number of pool instances, but in practice each pool should
never contain more than the number of threads. And unlike ThreadLocal,
this does allow different threads to reuse the same instance, so it
could potentially reduce memory. Additionally, if a pool no longer has
strong reference then its values can be garbage collected, so there is
no need to call `remove()` to avoid memory leaks.
The main drawback is that the ConcurrentLinkedQueue could add some
overhead since it does have to deal with potential thread contention,
and it does require allocations for the backing list. But hopefully this
is all relatively minor.
DAFFODIL-3030
---
.../org/apache/daffodil/cli/InfosetTypes.scala | 27 ++--
.../scala/org/apache/daffodil/io/FormatInfo.scala | 4 +-
.../daffodil/io/InputSourceDataInputStream.scala | 140 +++++++++++----------
.../scala/org/apache/daffodil/lib/util/Pool.scala | 54 ++++++++
.../runtime1/processors/DataProcessor.scala | 11 +-
.../runtime1/processors/EvCalendarLanguage.scala | 18 +--
.../runtime1/processors/ProcessorStateBases.scala | 5 +-
.../daffodil/runtime1/processors/RuntimeData.scala | 41 +++---
.../runtime1/processors/parsers/PState.scala | 3 +-
.../processors/parsers/PrimitivesDateTime1.scala | 9 +-
.../runtime1/processors/unparsers/UState.scala | 5 +-
.../runtime1/ConvertTextCalendarUnparser.scala | 9 +-
.../daffodil/validation/XercesValidator.scala | 68 +++++-----
.../apache/daffodil/io/FormatInfoForUnitTest.scala | 11 +-
.../schematron/SchematronValidator.scala | 23 ++--
.../org/apache/daffodil/tdml/TDMLRunner.scala | 5 +-
.../runtime1/layers/AISPayloadArmoringLayer.scala | 4 +-
17 files changed, 257 insertions(+), 180 deletions(-)
diff --git
a/daffodil-cli/src/main/scala/org/apache/daffodil/cli/InfosetTypes.scala
b/daffodil-cli/src/main/scala/org/apache/daffodil/cli/InfosetTypes.scala
index 501b0d8d0..d22085b3c 100644
--- a/daffodil-cli/src/main/scala/org/apache/daffodil/cli/InfosetTypes.scala
+++ b/daffodil-cli/src/main/scala/org/apache/daffodil/cli/InfosetTypes.scala
@@ -31,6 +31,7 @@ import scala.xml.SAXParser
import org.apache.daffodil.api
import org.apache.daffodil.io.InputSourceDataInputStream
+import org.apache.daffodil.lib.util.ThreadSafePool
import org.apache.daffodil.lib.xml.DFDLCatalogResolver
import org.apache.daffodil.lib.xml.DaffodilSAXParserFactory
import org.apache.daffodil.lib.xml.XMLUtils
@@ -427,20 +428,22 @@ case class W3CDOMInfosetHandler(dataProcessor:
api.DataProcessor) extends Infose
}
def unparse(data: AnyRef, output: DFDL.Output): api.UnparseResult = {
- val doc = data.asInstanceOf[ThreadLocal[org.w3c.dom.Document]].get
- val input = new W3CDOMInfosetInputter(doc)
- val ur = unparseWithInfosetInputter(input, output)
- ur
+ val domPool = data.asInstanceOf[ThreadSafePool[org.w3c.dom.Document]]
+ domPool.withInstance { doc =>
+ val input = new W3CDOMInfosetInputter(doc)
+ val ur = unparseWithInfosetInputter(input, output)
+ ur
+ }
}
def dataToInfoset(bytes: Array[Byte]): AnyRef = {
- // W3C Documents are not thread safe. So create a ThreadLocal so each
- // thread gets its own DOM tree. This has the unfortunate downside that we
- // don't actually convert the XML bytes to DOM until the first call to
- // unparse(), and we'll parse it multiple times if there are multiple
- // threads.
- val doc = new ThreadLocal[org.w3c.dom.Document] {
- override def initialValue = {
+ // W3C Documents are not thread safe. So we create a thread safe pool so
+ // each unparse gets its own DOM tree. This has the unfortunate downside
+ // that we don't actually convert the XML bytes to DOM until the first call
+ // to unparse(), and we'll parse it multiple times if there are multiple
+ // threads. Future unparses can reuse the already parsed Docuement though.
+ val domPool = new ThreadSafePool[org.w3c.dom.Document] {
+ override def allocate() = {
val dbf = DocumentBuilderFactory.newInstance()
dbf.setNamespaceAware(true)
dbf.setFeature(XMLUtils.XML_DISALLOW_DOCTYPE_FEATURE, true)
@@ -448,7 +451,7 @@ case class W3CDOMInfosetHandler(dataProcessor:
api.DataProcessor) extends Infose
db.parse(new ByteArrayInputStream(bytes))
}
}
- doc
+ domPool
}
def dataToInfoset(stream: InputStream): AnyRef =
dataToInfoset(IOUtils.toByteArray(stream))
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/io/FormatInfo.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/io/FormatInfo.scala
index d24699626..ac3ee9c05 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/io/FormatInfo.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/io/FormatInfo.scala
@@ -30,6 +30,7 @@ import
org.apache.daffodil.lib.schema.annotation.props.gen.EncodingErrorPolicy
import org.apache.daffodil.lib.schema.annotation.props.gen.UTF16Width
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.MaybeInt
+import org.apache.daffodil.lib.util.ThreadSafePool
/**
* Abstract interface to obtain format properties or values derived from
@@ -119,6 +120,5 @@ trait FormatInfo {
/**
* Buffers used for regex matching
*/
- def regexMatchBuffer: CharBuffer
- def regexMatchBitPositionBuffer: LongBuffer
+ def regexMatchStatePool: ThreadSafePool[(CharBuffer, LongBuffer)]
}
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/io/InputSourceDataInputStream.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/io/InputSourceDataInputStream.scala
index 537df095d..8e39d7db4 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/io/InputSourceDataInputStream.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/io/InputSourceDataInputStream.scala
@@ -681,84 +681,86 @@ final class InputSourceDataInputStream private (val
inputSource: InputSource)
if (!aligned) {
false
} else {
- var regexMatchBufferLimit =
finfo.tunable.initialRegexMatchLimitInCharacters
- val regexMatchBuffer = finfo.regexMatchBuffer
- val regexMatchBitPositionBuffer = finfo.regexMatchBitPositionBuffer
-
- regexMatchBuffer.position(0)
- regexMatchBuffer.limit(0)
- regexMatchBitPositionBuffer.position(0)
- regexMatchBitPositionBuffer.limit(0)
-
- val startingBitPos = bitPos0b
- var keepMatching = true
- var isMatch = false
-
- while (keepMatching) {
- // set the position to the last place data stopped decoding and
increase
- // the limit so we can fill more data
- regexMatchBuffer.position(regexMatchBuffer.limit())
- regexMatchBuffer.limit(regexMatchBufferLimit)
-
regexMatchBitPositionBuffer.position(regexMatchBitPositionBuffer.limit())
- regexMatchBitPositionBuffer.limit(regexMatchBufferLimit)
-
- val numDecoded =
- finfo.decoder.decode(this, finfo, regexMatchBuffer,
regexMatchBitPositionBuffer)
- val potentiallyMoreData = regexMatchBuffer.position() ==
regexMatchBuffer.limit()
-
- regexMatchBuffer.flip
- regexMatchBitPositionBuffer.flip
-
- if (numDecoded > 0) {
- // we decoded at least one extra characer than we had before, so the
- // match results could have changed. Try again.
- matcher.reset(regexMatchBuffer)
- isMatch = matcher.lookingAt()
- val hitEnd = matcher.hitEnd
- val requireEnd = matcher.requireEnd
-
- if (potentiallyMoreData && (hitEnd || (isMatch && requireEnd))) {
- // We filled the CharBuffer to its limit, so it's possible there is
- // more data available AND either 1) we hit the end of the char
- // buffer and more data might change the match or 2) we got a match
- // but require the end and more data could negate the match. In
- // either case, let's increase the match limit if possible, try to
- // decode more data, and try the match again if we got more data.
- if (regexMatchBufferLimit == regexMatchBuffer.capacity) {
- // consumed too much data, just give up
- keepMatching = false
+ finfo.regexMatchStatePool.withInstance { regexMatchState =>
+ var regexMatchBufferLimit =
finfo.tunable.initialRegexMatchLimitInCharacters
+ val regexMatchBuffer = regexMatchState._1
+ val regexMatchBitPositionBuffer = regexMatchState._2
+
+ regexMatchBuffer.position(0)
+ regexMatchBuffer.limit(0)
+ regexMatchBitPositionBuffer.position(0)
+ regexMatchBitPositionBuffer.limit(0)
+
+ val startingBitPos = bitPos0b
+ var keepMatching = true
+ var isMatch = false
+
+ while (keepMatching) {
+ // set the position to the last place data stopped decoding and
increase
+ // the limit so we can fill more data
+ regexMatchBuffer.position(regexMatchBuffer.limit())
+ regexMatchBuffer.limit(regexMatchBufferLimit)
+
regexMatchBitPositionBuffer.position(regexMatchBitPositionBuffer.limit())
+ regexMatchBitPositionBuffer.limit(regexMatchBufferLimit)
+
+ val numDecoded =
+ finfo.decoder.decode(this, finfo, regexMatchBuffer,
regexMatchBitPositionBuffer)
+ val potentiallyMoreData = regexMatchBuffer.position() ==
regexMatchBuffer.limit()
+
+ regexMatchBuffer.flip
+ regexMatchBitPositionBuffer.flip
+
+ if (numDecoded > 0) {
+ // we decoded at least one extra characer than we had before, so
the
+ // match results could have changed. Try again.
+ matcher.reset(regexMatchBuffer)
+ isMatch = matcher.lookingAt()
+ val hitEnd = matcher.hitEnd
+ val requireEnd = matcher.requireEnd
+
+ if (potentiallyMoreData && (hitEnd || (isMatch && requireEnd))) {
+ // We filled the CharBuffer to its limit, so it's possible there
is
+ // more data available AND either 1) we hit the end of the char
+ // buffer and more data might change the match or 2) we got a
match
+ // but require the end and more data could negate the match. In
+ // either case, let's increase the match limit if possible, try
to
+ // decode more data, and try the match again if we got more data.
+ if (regexMatchBufferLimit == regexMatchBuffer.capacity) {
+ // consumed too much data, just give up
+ keepMatching = false
+ } else {
+ regexMatchBufferLimit =
+ Math.min(regexMatchBufferLimit * 2,
regexMatchBuffer.capacity)
+ }
} else {
- regexMatchBufferLimit =
- Math.min(regexMatchBufferLimit * 2, regexMatchBuffer.capacity)
+ // no more data could affect the match result, so exit the loop
and
+ // figure out the match result
+ keepMatching = false
}
} else {
- // no more data could affect the match result, so exit the loop and
- // figure out the match result
+ // We failed to decode any data, that means that there is no more
+ // data to match against. Either we've already done a match or more
+ // data could have changed the outcome, or there was never any data
+ // to match in the first place. In either case, the state of
isMatch
+ // is correct--we are done.
keepMatching = false
}
+ }
+
+ if (isMatch && matcher.end != 0) {
+ // got a non-zero length match, set the bit position to the end of
the
+ // match, note that the end match position is the index *after* the
+ // last match, so we need the ending bitPosition of the previous
+ // character
+ val endingBitPos = regexMatchBitPositionBuffer.get(matcher.end - 1)
+ setBitPos0b(endingBitPos)
} else {
- // We failed to decode any data, that means that there is no more
- // data to match against. Either we've already done a match or more
- // data could have changed the outcome, or there was never any data
- // to match in the first place. In either case, the state of isMatch
- // is correct--we are done.
- keepMatching = false
+ // failed to match, set the bit position back to where we started
+ setBitPos0b(startingBitPos)
}
- }
- if (isMatch && matcher.end != 0) {
- // got a non-zero length match, set the bit position to the end of the
- // match, note that the end match position is the index *after* the
- // last match, so we need the ending bitPosition of the previous
- // character
- val endingBitPos = regexMatchBitPositionBuffer.get(matcher.end - 1)
- setBitPos0b(endingBitPos)
- } else {
- // failed to match, set the bit position back to where we started
- setBitPos0b(startingBitPos)
+ isMatch
}
-
- isMatch
}
}
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/lib/util/Pool.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/lib/util/Pool.scala
index bae35b891..96d94b092 100644
--- a/daffodil-core/src/main/scala/org/apache/daffodil/lib/util/Pool.scala
+++ b/daffodil-core/src/main/scala/org/apache/daffodil/lib/util/Pool.scala
@@ -17,6 +17,7 @@
package org.apache.daffodil.lib.util
+import java.util.concurrent.ConcurrentLinkedQueue
import scala.collection.mutable
import org.apache.daffodil.lib.equality.*
@@ -108,3 +109,56 @@ trait Pool[T <: Poolable] {
}
}
+
+/**
+ * Provides a fast and efficient thread safe pool of reusable instances
+ *
+ * This intentionally avoids the use of ThreadLocal, since ThreadLocals can
easily lead to
+ * memory leaks that are difficult to avoid. And for this reason, in general
it is best to
+ * avoid ThreadLocals entirely unless you really are storing Thread specific
data. If
+ * ThreadLocal is just being used as a pool of non-threadsafe instances, this
+ * ThreadSafePool should likely be used instead.
+ *
+ * Unlike ThreadLocals, the instances from this ThreadSafePool could be shared
with
+ * different threads, so it is important to take that into consideration if
instances are
+ * mutated.
+ *
+ * To ensure efficiency and thread safety, this makes use of a
ConcurrentLinkedQueue to
+ * store available instances. New instances are allocated by implementing the
allocate()
+ * method. The withInstance method will get an instance form the queue (or
create one if
+ * none are available), call the lamdba function, and return the instance back
to the
+ * queue. The instance must not be stored to a variable outside the scope of
withInstance.
+ *
+ * To avoid potential lambda allocations, this inlines the withInstance
function and
+ * parameters.
+ *
+ * Example usage:
+ *
+ * class Foo {
+ * val barPool = new ThreadSafePool[Bar] {
+ * override def allocate(): Bar = { new Bar() }
+ * }
+ *
+ * def doSomething(): Unit = {
+ * barPool.withInstance { bar =>
+ * bar.act()
+ * }
+ * }
+ * }
+ *
+ */
+abstract class ThreadSafePool[T]() extends Serializable {
+ protected def allocate(): T
+
+ private val queue = new ConcurrentLinkedQueue[T]()
+
+ inline def withInstance[A](inline f: T => A): A = {
+ val polled = queue.poll()
+ val inst = if (polled == null) allocate() else polled
+ try {
+ f(inst)
+ } finally {
+ queue.offer(inst)
+ }
+ }
+}
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
index 9407eae8b..93909021e 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
@@ -57,6 +57,7 @@ import org.apache.daffodil.lib.oolag.ErrorAlreadyHandled
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe.*
import org.apache.daffodil.lib.util.Misc
+import org.apache.daffodil.lib.util.ThreadSafePool
import org.apache.daffodil.runtime1.events.MultipleEventHandler
import org.apache.daffodil.runtime1.externalvars.ExternalVariablesLoader
import org.apache.daffodil.runtime1.infoset.DIElement
@@ -119,14 +120,12 @@ class DataProcessor(
diagnostics
)
- // This thread local state is used by the PState when it needs buffers for
+ // This thread safe state pool is used by the PState when it needs buffers
for
// regex matching. This cannot be in PState because a PState does not last
// beyond a single parse, but we want to share this among different parses to
- // avoid large memory allocations. The alternative is to use a ThreadLocal
- // companion object, but that would have not access to tunables, so one could
- // not configure the size of the regex match buffers.
- @transient lazy val regexMatchState = new ThreadLocal[(CharBuffer,
LongBuffer)] {
- override def initialValue = {
+ // avoid large memory allocations.
+ @transient lazy val regexMatchStatePool = new ThreadSafePool[(CharBuffer,
LongBuffer)] {
+ override def allocate() = {
val cb =
CharBuffer.allocate(tunables.maximumRegexMatchLengthInCharacters)
val lb =
LongBuffer.allocate(tunables.maximumRegexMatchLengthInCharacters)
(cb, lb)
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/EvCalendarLanguage.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/EvCalendarLanguage.scala
index 82e35439c..fd17beb55 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/EvCalendarLanguage.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/EvCalendarLanguage.scala
@@ -19,6 +19,7 @@ package org.apache.daffodil.runtime1.processors
import org.apache.daffodil.lib.cookers.Converter
import org.apache.daffodil.lib.exceptions.*
+import org.apache.daffodil.lib.util.ThreadSafePool
import org.apache.daffodil.runtime1.dsom.*
import com.ibm.icu.text.SimpleDateFormat
@@ -103,8 +104,8 @@ class DateTimeFormatterEv(
localeEv: CalendarLanguageEv,
pattern: String,
eci: DPathElementCompileInfo
-) extends Evaluatable[ThreadLocal[SimpleDateFormat]](eci)
- with InfosetCachedEvaluatable[ThreadLocal[SimpleDateFormat]] {
+) extends Evaluatable[ThreadSafePool[SimpleDateFormat]](eci)
+ with InfosetCachedEvaluatable[ThreadSafePool[SimpleDateFormat]] {
override def runtimeDependencies = Seq(localeEv)
@@ -113,17 +114,18 @@ class DateTimeFormatterEv(
val locale = localeEv.evaluate(state)
// As per ICU4J documentation, "Date formats are not synchronized. If
multiple threads
- // access a format concurrently, it must be synchronized externally."
Rather than
- // synchronzing, we create a ThreadLocal so each thread gets their own
copy of the
- // SimpleDateFormat
- val dateFormatTL = new ThreadLocal[SimpleDateFormat] with Serializable {
- override def initialValue = {
+ // access a format concurrently, it must be synchronized externally." We
achieve thread
+ // saftey be creating a thread safe pool of SimpleDateFormats. This should
have minimal
+ // overhead and allows reuse of SimpleDateFormat instances, and avoids
things like memory
+ // leaks often created by ThreadLocals
+ val dateFormatPool = new ThreadSafePool[SimpleDateFormat] {
+ override def allocate() = {
val formatter = new SimpleDateFormat(pattern, locale)
formatter.setCalendar(calendar)
formatter.setLenient(calendar.isLenient)
formatter
}
}
- dateFormatTL
+ dateFormatPool
}
}
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/ProcessorStateBases.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/ProcessorStateBases.scala
index 701ac6180..fecc77fe1 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/ProcessorStateBases.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/ProcessorStateBases.scala
@@ -49,6 +49,7 @@ import org.apache.daffodil.lib.util.Maybe.Nope
import org.apache.daffodil.lib.util.Maybe.One
import org.apache.daffodil.lib.util.MaybeInt
import org.apache.daffodil.lib.util.MaybeULong
+import org.apache.daffodil.lib.util.ThreadSafePool
import org.apache.daffodil.runtime1.dpath.DState
import org.apache.daffodil.runtime1.dsom.DPathCompileInfo
import org.apache.daffodil.runtime1.dsom.RuntimeSchemaDefinitionError
@@ -699,8 +700,8 @@ final class CompileState(
// do nothing
}
- def regexMatchBuffer: CharBuffer = Assert.usageError("Not to be used.")
- def regexMatchBitPositionBuffer: LongBuffer = Assert.usageError("Not to be
used.")
+ def regexMatchStatePool: ThreadSafePool[(CharBuffer, LongBuffer)] =
+ Assert.usageError("Not to be used.")
// $COVERAGE-OFF$
override def setVariable(
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/RuntimeData.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/RuntimeData.scala
index 545e5a58d..adf4c23b3 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/RuntimeData.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/RuntimeData.scala
@@ -50,6 +50,7 @@ import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe.Nope
import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.lib.util.OKOrError
+import org.apache.daffodil.lib.util.ThreadSafePool
import org.apache.daffodil.lib.xml.GlobalQName
import org.apache.daffodil.lib.xml.LocalDeclQName
import org.apache.daffodil.lib.xml.NS
@@ -263,9 +264,8 @@ final class SimpleTypeRuntimeData(
* matchers, and we need this generally because matchers are stateful so
cannot
* be shared across threads.
*/
- def matchers: (Seq[Matcher], Option[Matcher]) = matcherTL.get()
- private lazy val matcherTL = new ThreadLocal[(Seq[Matcher],
Option[Matcher])] {
- protected final override def initialValue() = {
+ private lazy val matcherPool = new ThreadSafePool[(Seq[Matcher],
Option[Matcher])] {
+ protected final override def allocate() = {
val patternMatchers = patternValues.map { case (_, r) =>
r.pattern.matcher("") }
val optEnumMatcher = enumerationValues.map { en =>
en.r.pattern.matcher("") }
(patternMatchers, optEnumMatcher)
@@ -333,25 +333,30 @@ final class SimpleTypeRuntimeData(
val e = this
- lazy val (patternMatchers, optEnumMatcher) = this.matchers
+ if (e.patternValues.nonEmpty || e.enumerationValues.isDefined) {
+ matcherPool.withInstance { matchers =>
+ val (patternMatchers, optEnumMatcher) = matchers
+
+ if (e.patternValues.nonEmpty) {
+ val check = checkPatterns(currentElement, patternMatchers)
+ if (!check) {
+ // The escaping is important here as error messages were
impossible to figure out when control chars were involved.
+ val patternStrings = e.patternValues
+ .map { case (_, r: Regex) =>
XMLUtils.escape(r.pattern.pattern()) }
+ .mkString(",")
+ return Error("facet pattern(s): %s".format(patternStrings))
+ }
+ }
- if (e.patternValues.nonEmpty) {
- val check = checkPatterns(currentElement, patternMatchers)
- if (!check) {
- // The escaping is important here as error messages were impossible to
figure out when control chars were involved.
- val patternStrings = e.patternValues
- .map { case (_, r: Regex) => XMLUtils.escape(r.pattern.pattern()) }
- .mkString(",")
- return Error("facet pattern(s): %s".format(patternStrings))
+ if (e.enumerationValues.isDefined) {
+ val check = checkEnumerations(currentElement, optEnumMatcher.get)
+ if (!check) {
+ return Error("facet enumeration(s):
%s".format(e.enumerationValues.mkString(",")))
+ }
+ }
}
}
- if (e.enumerationValues.isDefined) {
- val check = checkEnumerations(currentElement, optEnumMatcher.get)
- if (!check) {
- return Error("facet enumeration(s):
%s".format(e.enumerationValues.mkString(",")))
- }
- }
// Check length
if (e.length.isDefined) {
val length = e.length.get
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PState.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PState.scala
index 11a54f326..8bda3425a 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PState.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PState.scala
@@ -528,8 +528,7 @@ final class PState private (
}
}
- override lazy val (regexMatchBuffer, regexMatchBitPositionBuffer) =
- dataProcArg.regexMatchState.get
+ override lazy val regexMatchStatePool = dataProcArg.regexMatchStatePool
/**
* Verify that the state is left where we expect it to be after
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PrimitivesDateTime1.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PrimitivesDateTime1.scala
index e29602268..88002568e 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PrimitivesDateTime1.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/PrimitivesDateTime1.scala
@@ -62,8 +62,6 @@ case class ConvertTextCalendarParser(
// the case.
calendarOrig.clear()
- val df = dateTimeFormatterEv.evaluate(start).get
-
// When we evaluate calendarEV, if it is a constant we will always get back
// the same Calendar object. Because of this it is important here to clone
// this calendar and always use the clone below for two reasons:
@@ -78,8 +76,11 @@ case class ConvertTextCalendarParser(
// cloning, we ensure that they modify different objects.
val calendar = calendarOrig.clone().asInstanceOf[Calendar]
- df.setCalendar(calendar)
- df.parse(str, calendar, pos);
+ val dateTimeFormatterPool = dateTimeFormatterEv.evaluate(start)
+ dateTimeFormatterPool.withInstance { df =>
+ df.setCalendar(calendar)
+ df.parse(str, calendar, pos);
+ }
// Verify that we did not fail to parse and that we consumed the entire
string. Note that
// getErrorIndex is never set and is always -1. Only a getIndex value of
zero means there
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/unparsers/UState.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/unparsers/UState.scala
index 125fad645..fb0f627cd 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/unparsers/UState.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/runtime1/processors/unparsers/UState.scala
@@ -41,6 +41,7 @@ import org.apache.daffodil.lib.util.MStackOfMaybe
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe.Nope
import org.apache.daffodil.lib.util.Maybe.One
+import org.apache.daffodil.lib.util.ThreadSafePool
import org.apache.daffodil.runtime1.dpath.UnparserBlocking
import org.apache.daffodil.runtime1.iapi.DFDL
import org.apache.daffodil.runtime1.infoset.DIArray
@@ -398,8 +399,8 @@ abstract class UState(
}
}
- def regexMatchBuffer: CharBuffer = Assert.usageError("Not to be used.")
- def regexMatchBitPositionBuffer: LongBuffer = Assert.usageError("Not to be
used.")
+ def regexMatchStatePool: ThreadSafePool[(CharBuffer, LongBuffer)] =
+ Assert.usageError("Not to be used.")
def documentElement: DIDocument
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/unparsers/runtime1/ConvertTextCalendarUnparser.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/unparsers/runtime1/ConvertTextCalendarUnparser.scala
index 47c2a2bed..386ab5cf7 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/unparsers/runtime1/ConvertTextCalendarUnparser.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/unparsers/runtime1/ConvertTextCalendarUnparser.scala
@@ -67,8 +67,6 @@ case class ConvertTextCalendarUnparser(
// the case.
calendarOrig.clear()
- val df = dateTimeFormatterEv.evaluate(state).get
-
// When we evaluate calendarEV, if it is a constant we will always get back
// the same Calendar object. Because of this it is important here to clone
// this calendar and always use the clone below for two reasons:
@@ -105,8 +103,11 @@ case class ConvertTextCalendarUnparser(
calendar.set(Calendar.MILLISECOND,
infosetCalendar.get(Calendar.MILLISECOND))
calendar.setTimeZone(infosetCalendar.getTimeZone)
- df.setCalendar(calendar)
- val str = df.format(calendar)
+ val dateFormatterPool = dateTimeFormatterEv.evaluate(state)
+ val str = dateFormatterPool.withInstance { df =>
+ df.setCalendar(calendar)
+ df.format(calendar)
+ }
node.overwriteDataValue(str)
}
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/validation/XercesValidator.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/validation/XercesValidator.scala
index 49ee64cd8..abe928e82 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/validation/XercesValidator.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/validation/XercesValidator.scala
@@ -25,6 +25,7 @@ import javax.xml.transform.stream.StreamSource
import org.apache.daffodil.api
import org.apache.daffodil.lib.util.Misc
+import org.apache.daffodil.lib.util.ThreadSafePool
import org.apache.daffodil.lib.xml.DFDLCatalogResolver
import org.apache.daffodil.lib.xml.XMLUtils
@@ -72,15 +73,26 @@ object XercesValidatorFactory {
class XercesValidator(schemaSource: javax.xml.transform.Source)
extends api.validation.Validator {
- private val factory = new
org.apache.xerces.jaxp.validation.XMLSchemaFactory()
- private val resolver = DFDLCatalogResolver.get
- factory.setResourceResolver(resolver)
-
- private val schema = factory.newSchema(schemaSource)
+ private val schema = {
+ val factory = new org.apache.xerces.jaxp.validation.XMLSchemaFactory()
+ factory.setResourceResolver(DFDLCatalogResolver.get)
+ factory.newSchema(schemaSource)
+ }
- private val validator = new ThreadLocal[XercesValidatorImpl] {
- override def initialValue(): XercesValidatorImpl =
- initializeValidator(schema.newValidator, resolver)
+ // the Xerces Validator is not thread safe, so we use a ThreadSafePool. The
use of a pool
+ // allows reuse of objects that are expensive to create while ensuring each
Thread gets their
+ // own instance. Note that we do not use ThreadLocal, since that can lead to
memory leaks that
+ // cannot be easily cleaned up
+ private val validatorPool = new ThreadSafePool[XercesValidatorImpl] {
+ override def allocate(): XercesValidatorImpl = {
+ val v = schema.newValidator
+ v.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
+ v.setFeature(XMLUtils.XML_DISALLOW_DOCTYPE_FEATURE, true)
+ v.setFeature("http://xml.org/sax/features/validation", true)
+ v.setFeature("http://apache.org/xml/features/validation/schema", true)
+
v.setFeature("http://apache.org/xml/features/validation/schema-full-checking",
true)
+ v
+ }
}
def validateXML(
@@ -98,34 +110,22 @@ class XercesValidator(schemaSource:
javax.xml.transform.Source)
val documentSource = new StreamSource(document)
- // get the validator instance for this thread
- val xv = validator.get()
-
- xv.setErrorHandler(eh)
-
- // validate the document
- try {
- xv.validate(documentSource)
- } catch {
- // can be thrown by the resolver if it cannot
- // resolve the schemaLocation of an include/import.
- // Regular Xerces doesn't report this as an error.
- case spe: SAXParseException => eh.error(spe)
+ validatorPool.withInstance { xv =>
+ try {
+ // DFDLCatalogResolver.get returns a resolver for use by a thread, but
the validator in
+ // the pool might have been created with another thread. We need to
make sure this
+ // validator uses the resolver allocated for this thread.
+ xv.setResourceResolver(DFDLCatalogResolver.get)
+ xv.setErrorHandler(eh)
+ xv.validate(documentSource)
+ } catch {
+ // can be thrown by the resolver if it cannot
+ // resolve the schemaLocation of an include/import.
+ // Regular Xerces doesn't report this as an error.
+ case spe: SAXParseException => eh.error(spe)
+ }
}
}
-
- private def initializeValidator(
- validator: XercesValidatorImpl,
- resolver: DFDLCatalogResolver
- ): XercesValidatorImpl = {
- validator.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true)
- validator.setFeature(XMLUtils.XML_DISALLOW_DOCTYPE_FEATURE, true)
- validator.setFeature("http://xml.org/sax/features/validation", true)
- validator.setFeature("http://apache.org/xml/features/validation/schema",
true)
-
validator.setFeature("http://apache.org/xml/features/validation/schema-full-checking",
true)
- validator.setResourceResolver(resolver)
- validator
- }
}
object XercesValidator {
diff --git
a/daffodil-core/src/test/scala/org/apache/daffodil/io/FormatInfoForUnitTest.scala
b/daffodil-core/src/test/scala/org/apache/daffodil/io/FormatInfoForUnitTest.scala
index 17c511e25..4366569e1 100644
---
a/daffodil-core/src/test/scala/org/apache/daffodil/io/FormatInfoForUnitTest.scala
+++
b/daffodil-core/src/test/scala/org/apache/daffodil/io/FormatInfoForUnitTest.scala
@@ -34,6 +34,7 @@ import
org.apache.daffodil.lib.schema.annotation.props.gen.EncodingErrorPolicy
import org.apache.daffodil.lib.schema.annotation.props.gen.UTF16Width
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.MaybeInt
+import org.apache.daffodil.lib.util.ThreadSafePool
object FormatInfoForUnitTest {
def apply() = {
@@ -58,8 +59,11 @@ class FormatInfoForUnitTest private () extends FormatInfo {
var encodingMandatoryAlignmentInBits: Int = 8
var encodingErrorPolicy: EncodingErrorPolicy = EncodingErrorPolicy.Replace
var tunable: DaffodilTunables = DaffodilTunables()
- var regexMatchBuffer = CharBuffer.allocate(1024)
- var regexMatchBitPositionBuffer = LongBuffer.allocate(1024)
+ var regexMatchStatePool = new ThreadSafePool[(CharBuffer, LongBuffer)] {
+ override def allocate() = {
+ (CharBuffer.allocate(1024), LongBuffer.allocate(1024))
+ }
+ }
def reset(cs: BitsCharset): Unit = {
priorEncoding = cs
@@ -107,8 +111,7 @@ class FakeFormatInfo(val bitOrder: BitOrder, val byteOrder:
ByteOrder) extends F
def encodingMandatoryAlignmentInBits: Int = ???
def encodingErrorPolicy: EncodingErrorPolicy = ???
def tunable: DaffodilTunables = ???
- def regexMatchBuffer: CharBuffer = ???
- def regexMatchBitPositionBuffer: LongBuffer = ???
+ def regexMatchStatePool: ThreadSafePool[(CharBuffer, LongBuffer)] = ???
}
object FakeFormatInfo_MSBF_BE
diff --git
a/daffodil-schematron/src/main/scala/org/apache/daffodil/validation/schematron/SchematronValidator.scala
b/daffodil-schematron/src/main/scala/org/apache/daffodil/validation/schematron/SchematronValidator.scala
index 4d9ad6abf..011546913 100644
---
a/daffodil-schematron/src/main/scala/org/apache/daffodil/validation/schematron/SchematronValidator.scala
+++
b/daffodil-schematron/src/main/scala/org/apache/daffodil/validation/schematron/SchematronValidator.scala
@@ -31,6 +31,7 @@ import scala.xml.Elem
import scala.xml.XML
import org.apache.daffodil.api
+import org.apache.daffodil.lib.util.ThreadSafePool
import org.apache.daffodil.lib.xml.DaffodilSAXParserFactory
import org.xml.sax.InputSource
@@ -44,11 +45,12 @@ final class SchematronValidator(
svrlPath: Option[URI]
) extends api.validation.Validator {
- // XMLReader and Transformer are not thread safe so are ThreadLocals.
Alternatively, they could be
- // created every time validateXML, but ThreadLocal allows reuse of objects
- // that are potentially expensive to create
- val readerTransformerTL = new ThreadLocal[(XMLReader, Transformer)] {
- override def initialValue(): (XMLReader, Transformer) = {
+ // XMLReader and Transformer are not thread safe, so we use a
ThreadSafePool. The use of a
+ // pool allows reuse of objects that are expensive to create while ensuring
each Thread gets
+ // their own instance. Note that we do not use ThreadLocal, since that can
lead to memory
+ // leaks that cannot be easily cleaned up
+ val readerTransformerPool = new ThreadSafePool[(XMLReader, Transformer)] {
+ override def allocate(): (XMLReader, Transformer) = {
val factory = DaffodilSAXParserFactory()
factory.setValidating(false)
val reader = factory.newSAXParser.getXMLReader
@@ -61,10 +63,15 @@ final class SchematronValidator(
document: InputStream,
handler: api.validation.ValidationHandler
): Unit = {
- val (reader, transformer) = readerTransformerTL.get
val writer = new StringWriter
- transformer
- .transform(new SAXSource(reader, new InputSource(document)), new
StreamResult(writer))
+
+ readerTransformerPool.withInstance { rt =>
+ val (reader, transformer) = rt
+ val source = new SAXSource(reader, new InputSource(document))
+ val result = new StreamResult(writer)
+ transformer.transform(source, result)
+ }
+
val svrlString = writer.toString
val svrl = XML.loadString(svrlString)
svrl.child.collect { case f @ Elem("svrl", "failed-assert", _, _, msg*) =>
diff --git
a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index 8e8dacbae..e74a2ce33 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -72,6 +72,7 @@ import org.apache.daffodil.lib.util.Misc.bits2Bytes
import org.apache.daffodil.lib.util.Misc.hex2Bits
import org.apache.daffodil.lib.util.Misc.uriToDiagnosticFile
import org.apache.daffodil.lib.util.SchemaUtils
+import org.apache.daffodil.lib.util.ThreadSafePool
import org.apache.daffodil.lib.xml.DaffodilXMLLoader
import org.apache.daffodil.lib.xml.XMLUtils
import org.apache.daffodil.tdml.DiagnosticType.DiagnosticType
@@ -1954,9 +1955,7 @@ object VerifyTestCase {
override def tunable: DaffodilTunables = doNotUse
- override def regexMatchBuffer: CharBuffer = doNotUse
-
- override def regexMatchBitPositionBuffer: LongBuffer = doNotUse
+ override def regexMatchStatePool: ThreadSafePool[(CharBuffer,
LongBuffer)] = doNotUse
}
Using.resource(InputSourceDataInputStream(bytes)) { dis =>
diff --git
a/daffodil-test/src/test/scala/org/apache/daffodil/runtime1/layers/AISPayloadArmoringLayer.scala
b/daffodil-test/src/test/scala/org/apache/daffodil/runtime1/layers/AISPayloadArmoringLayer.scala
index 4cda6de4c..94b3ce444 100644
---
a/daffodil-test/src/test/scala/org/apache/daffodil/runtime1/layers/AISPayloadArmoringLayer.scala
+++
b/daffodil-test/src/test/scala/org/apache/daffodil/runtime1/layers/AISPayloadArmoringLayer.scala
@@ -38,6 +38,7 @@ import
org.apache.daffodil.lib.schema.annotation.props.gen.EncodingErrorPolicy
import org.apache.daffodil.lib.schema.annotation.props.gen.UTF16Width
import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.MaybeInt
+import org.apache.daffodil.lib.util.ThreadSafePool
import org.apache.commons.io.IOUtils
@@ -107,8 +108,7 @@ class AISPayloadArmoringOutputStream(jos:
java.io.OutputStream) extends OutputSt
override def maybeUTF16Width: Maybe[UTF16Width] = doNotUse
override def encodingMandatoryAlignmentInBits: Int = doNotUse
override def tunable: DaffodilTunables = doNotUse
- override def regexMatchBuffer: CharBuffer = doNotUse
- override def regexMatchBitPositionBuffer: LongBuffer = doNotUse
+ override def regexMatchStatePool: ThreadSafePool[(CharBuffer, LongBuffer)]
= doNotUse
}
override def close(): Unit = {