This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new 6eeae43d1 Allow for sharing of Validators between DataProcessors
6eeae43d1 is described below
commit 6eeae43d1f7ae9201815311910e7897c783ef25b
Author: Steve Lawrence <[email protected]>
AuthorDate: Mon Jun 3 12:59:25 2024 -0400
Allow for sharing of Validators between DataProcessors
Each DataProcessor currently creates and stores a unique instance of its
Validator. When a DataProcessor is copied with one of the withXYZ
functions, the validator is not copied and must be created again when
that new processor performans validation, even thought it uses the same
schema. In most normal uses this isn't actually a big deal since withXYZ
functions are not called frequently and the validator won't actually be
created until validation is needed.
However, the TDML Runner often calls withXYZ for every test, which means
if validation is enabled then every test will recreate a unique
Validator. This can be very slow and expensive, especially for large
schemas.
To avoid this, this modifies the DataProcessor so the withXYZ functions
copy the validator so it is shared among DataProcessors. And the
withValidationMode function ensures we only create a new validator if
the mode actually changes, avoiding the need to create
unnecessary/expensive Validators.
This also modifies the TDML runner so that the cached DataProcessor are
built using the value of defaultValidation. This way the cached
DataProcessor contains the pre-built Validator and any test cases that
use the same validation mode will not need to rebuild the Validator.
Note that this means test should run much quicker if you set
defaultValidation="on" and validation="off" for tests that don't need
validation, rather than setting defaultValidation="off" and
validation="on" for tests that do need it, since the former will build
the Validator once and share it with the tests that do not turn off
validation.
Another side effect of this change is we now build the Validator
immediately when withValidaionMode is called rather than lazily waiting
for the validator to be used. This is arguably better since it means
there won't be possible hiccup on the first parse.
DAFFODIL-2901
---
.../apache/daffodil/lib/api/ValidationMode.scala | 8 ++++
.../runtime1/processors/DataProcessor.scala | 49 +++++++++++++++-------
.../org/apache/daffodil/tdml/TDMLRunner.scala | 43 +++++++++++--------
3 files changed, 69 insertions(+), 31 deletions(-)
diff --git
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/api/ValidationMode.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/api/ValidationMode.scala
index 48603b84c..b8892dd17 100644
---
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/api/ValidationMode.scala
+++
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/api/ValidationMode.scala
@@ -31,4 +31,12 @@ object ValidationMode extends Enum {
case object Full extends Type(30)
case class Custom(v: Validator) extends Type(100)
+
+ def fromString(str: String): ValidationMode.Type = {
+ str match {
+ case "on" => Full
+ case "limited" => Limited
+ case "off" => Off
+ }
+ }
}
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
index 20e17dd25..98525bc07 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
@@ -137,6 +137,9 @@ class DataProcessor(
// back when the object is re-initialized.
//
val validationMode: ValidationMode.Type = ValidationMode.Off,
+ // The Validator API requires this to be thread-safe so this is safe to
share among different
+ // DataProcessors
+ val validator: Validator = null,
protected val areDebugging: Boolean = false,
protected val optDebugger: Option[Debugger] = None,
protected val diagnostics: Seq[Diagnostic] = Seq.empty
@@ -172,6 +175,7 @@ class DataProcessor(
tunables: DaffodilTunables = tunables,
variableMap: VariableMap = variableMap.copy(),
validationMode: ValidationMode.Type = validationMode,
+ validator: Validator = validator,
areDebugging: Boolean = areDebugging,
optDebugger: Option[Debugger] = optDebugger,
diagnostics: Seq[Diagnostic] = diagnostics
@@ -180,6 +184,7 @@ class DataProcessor(
tunables,
variableMap,
validationMode,
+ validator,
areDebugging,
optDebugger,
diagnostics
@@ -205,25 +210,38 @@ class DataProcessor(
override def clone(): DataProcessor = copy()
/**
- * Returns a data processor with all the same state, but the validation mode
changed to that of the argument.
+ * Returns a data processor with all the same state, but the validation mode
and validator changed to that of the argument.
*
* Note that the default validation mode is "off", that is, no validation is
performed.
*/
- def withValidationMode(mode: ValidationMode.Type): DataProcessor =
copy(validationMode = mode)
-
- def withValidator(validator: Validator): DataProcessor = withValidationMode(
- ValidationMode.Custom(validator)
- )
-
- lazy val validator: Validator = {
- validationMode match {
+ def withValidationMode(mode: ValidationMode.Type): DataProcessor = {
+ // create the appropriate validator for the mode. If the mode isn't
actually changing then
+ // we won't generate a new validator and just use the same one. The
Validator API requires
+ // the Validator to be thread safe, so it is fine to share the same
Validator with multiple
+ // DataProcessors
+ val newValidator = mode match {
+ case `validationMode` => validator
+ case ValidationMode.Off => null
+ case ValidationMode.Limited => null
case ValidationMode.Custom(cv) => cv
- case _ =>
+ case ValidationMode.Full => {
val cfg = XercesValidatorFactory.makeConfig(
ssrd.elementRuntimeData.schemaURIStringsForFullValidation
)
XercesValidatorFactory.makeValidator(cfg)
+ }
}
+ copy(
+ validationMode = mode,
+ validator = newValidator
+ )
+ }
+
+ def withValidator(validator: Validator): DataProcessor = {
+ copy(
+ validationMode = ValidationMode.Custom(validator),
+ validator = validator
+ )
}
def debugger = {
@@ -304,10 +322,13 @@ class DataProcessor(
// them back to their original values.
//
val dpToSave = this.copy(
- variableMap = ssrd.originalVariables, // reset to original variables
defined in schema
- validationMode =
- ValidationMode.Off, // explicitly turn off, so restored processor
won't be validating
- diagnostics = Seq.empty // don't save any warnings that were generated
+ // reset to original variables defined in schema
+ variableMap = ssrd.originalVariables,
+ // explicitly turn off validation, reloaded processors must call
withValidationMode
+ validationMode = ValidationMode.Off,
+ validator = null,
+ // don't save any warnings that were generated
+ diagnostics = Seq.empty
)
try {
diff --git
a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index ee4830291..51a6a0ad1 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -401,6 +401,8 @@ class DFDLTestSuite private[tdml] (
val str = (ts \ "@defaultValidation").text
if (str == "") defaultValidationDefault else str
}
+ lazy val defaultValidationMode = ValidationMode.fromString(defaultValidation)
+
lazy val defaultConfig = {
val str = (ts \ "@defaultConfig").text
str
@@ -564,7 +566,11 @@ class DFDLTestSuite private[tdml] (
GlobalTDMLCompileResultCache.cache
}
- val compileResult = cache.getCompileResult(impl, key)
+ val compileResult = cache.getCompileResult(
+ impl,
+ key,
+ defaultValidationMode
+ )
compileResult
}
@@ -603,7 +609,7 @@ abstract class TestCase(testCaseXML: NodeSeq, val parent:
DFDLTestSuite) {
protected final var processor: TDMLDFDLProcessor = null
lazy val defaultRoundTrip: RoundTrip = parent.defaultRoundTrip
- lazy val defaultValidation: String = parent.defaultValidation
+ lazy val defaultValidationMode: ValidationMode.Type =
parent.defaultValidationMode
private lazy val defaultImplementations: Seq[String] =
parent.defaultImplementations
private lazy val tcImplementations = (testCaseXML \ "@implementations").text
@@ -713,18 +719,8 @@ abstract class TestCase(testCaseXML: NodeSeq, val parent:
DFDLTestSuite) {
case _ => false
}
lazy val validationMode: ValidationMode.Type = (testCaseXML \
"@validation").text match {
- case "on" => ValidationMode.Full
- case "limited" => ValidationMode.Limited
- case "off" => ValidationMode.Off
- case "" =>
- defaultValidation match {
- case "on" => ValidationMode.Full
- case "limited" => ValidationMode.Limited
- case "off" => ValidationMode.Off
- case other =>
- Assert.invariantFailed("unrecognized default validation enum string:
" + other)
- }
- case other => Assert.invariantFailed("unrecognized validation enum string:
" + other)
+ case "" => defaultValidationMode
+ case mode => ValidationMode.fromString(mode)
}
lazy val shouldValidate = validationMode != ValidationMode.Off
@@ -904,6 +900,7 @@ abstract class TestCase(testCaseXML: NodeSeq, val parent:
DFDLTestSuite) {
val useSerializedProcessor =
if (validationMode == ValidationMode.Full) false
+ else if (defaultValidationMode == ValidationMode.Full) false
else if (optExpectedWarnings.isDefined) false
else true
@@ -3006,7 +3003,8 @@ case class
TDMLCompileResultCache(entryExpireDurationSeconds: Option[Long]) {
def getCompileResult(
impl: AbstractTDMLDFDLProcessorFactory,
- key: TDMLCompileResultCacheKey
+ key: TDMLCompileResultCacheKey,
+ defaultValidationMode: ValidationMode.Type
): TDML.CompileResult = this.synchronized {
if (entryExpireDurationSeconds.isDefined) {
@@ -3032,7 +3030,6 @@ case class
TDMLCompileResultCache(entryExpireDurationSeconds: Option[Long]) {
key.optRootNamespace,
key.tunables
)
- cache += (key -> TDMLCompileResultCacheValue(compileResult, None))
compileResult match {
case Left(diags) => {
// a Left must have at least one error diagnostic if we don't get a
processor
@@ -3043,7 +3040,19 @@ case class
TDMLCompileResultCache(entryExpireDurationSeconds: Option[Long]) {
Assert.invariant(diags.forall(!_.isError))
}
}
- compileResult
+
+ // if compileResult is Right and we got a processor, set the processor
validation mode to
+ // the default validation mode of the test suite. This is useful in
cases where the
+ // default validation mode is on/full, in which case the Xerces
validator will be compiled
+ // and added to the DataProcessor that we cache. This way any tests that
do not change the
+ // validation mode from the default will be able to use the same
validator so we won't
+ // have to rebuild it for every test. This saves memory and should be
significantly
+ // faster.
+ val value = compileResult.map { case (diags, proc) =>
+ (diags, proc.withValidationMode(defaultValidationMode))
+ }
+ cache += (key -> TDMLCompileResultCacheValue(value, None))
+ value
}
}