This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new 6eeae43d1 Allow for sharing of Validators between DataProcessors
6eeae43d1 is described below

commit 6eeae43d1f7ae9201815311910e7897c783ef25b
Author: Steve Lawrence <[email protected]>
AuthorDate: Mon Jun 3 12:59:25 2024 -0400

    Allow for sharing of Validators between DataProcessors
    
    Each DataProcessor currently creates and stores a unique instance of its
    Validator. When a DataProcessor is copied with one of the withXYZ
    functions, the validator is not copied and must be created again when
    that new processor performans validation, even thought it uses the same
    schema. In most normal uses this isn't actually a big deal since withXYZ
    functions are not called frequently and the validator won't actually be
    created until validation is needed.
    
    However, the TDML Runner often calls withXYZ for every test, which means
    if validation is enabled then every test will recreate a unique
    Validator. This can be very slow and expensive, especially for large
    schemas.
    
    To avoid this, this modifies the DataProcessor so the withXYZ functions
    copy the validator so it is shared among DataProcessors. And the
    withValidationMode function ensures we only create a new validator if
    the mode actually changes, avoiding the need to create
    unnecessary/expensive Validators.
    
    This also modifies the TDML runner so that the cached DataProcessor are
    built using the value of defaultValidation. This way the cached
    DataProcessor contains the pre-built Validator and any test cases that
    use the same validation mode will not need to rebuild the Validator.
    
    Note that this means test should run much quicker if you set
    defaultValidation="on" and validation="off" for tests that don't need
    validation, rather than setting defaultValidation="off" and
    validation="on" for tests that do need it, since the former will build
    the Validator once and share it with the tests that do not turn off
    validation.
    
    Another side effect of this change is we now build the Validator
    immediately when withValidaionMode is called rather than lazily waiting
    for the validator to be used. This is arguably better since it means
    there won't be possible hiccup on the first parse.
    
    DAFFODIL-2901
---
 .../apache/daffodil/lib/api/ValidationMode.scala   |  8 ++++
 .../runtime1/processors/DataProcessor.scala        | 49 +++++++++++++++-------
 .../org/apache/daffodil/tdml/TDMLRunner.scala      | 43 +++++++++++--------
 3 files changed, 69 insertions(+), 31 deletions(-)

diff --git 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/api/ValidationMode.scala 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/api/ValidationMode.scala
index 48603b84c..b8892dd17 100644
--- 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/api/ValidationMode.scala
+++ 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/api/ValidationMode.scala
@@ -31,4 +31,12 @@ object ValidationMode extends Enum {
   case object Full extends Type(30)
 
   case class Custom(v: Validator) extends Type(100)
+
+  def fromString(str: String): ValidationMode.Type = {
+    str match {
+      case "on" => Full
+      case "limited" => Limited
+      case "off" => Off
+    }
+  }
 }
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
index 20e17dd25..98525bc07 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/DataProcessor.scala
@@ -137,6 +137,9 @@ class DataProcessor(
   // back when the object is re-initialized.
   //
   val validationMode: ValidationMode.Type = ValidationMode.Off,
+  // The Validator API requires this to be thread-safe so this is safe to 
share among different
+  // DataProcessors
+  val validator: Validator = null,
   protected val areDebugging: Boolean = false,
   protected val optDebugger: Option[Debugger] = None,
   protected val diagnostics: Seq[Diagnostic] = Seq.empty
@@ -172,6 +175,7 @@ class DataProcessor(
     tunables: DaffodilTunables = tunables,
     variableMap: VariableMap = variableMap.copy(),
     validationMode: ValidationMode.Type = validationMode,
+    validator: Validator = validator,
     areDebugging: Boolean = areDebugging,
     optDebugger: Option[Debugger] = optDebugger,
     diagnostics: Seq[Diagnostic] = diagnostics
@@ -180,6 +184,7 @@ class DataProcessor(
     tunables,
     variableMap,
     validationMode,
+    validator,
     areDebugging,
     optDebugger,
     diagnostics
@@ -205,25 +210,38 @@ class DataProcessor(
   override def clone(): DataProcessor = copy()
 
   /**
-   * Returns a data processor with all the same state, but the validation mode 
changed to that of the argument.
+   * Returns a data processor with all the same state, but the validation mode 
and validator changed to that of the argument.
    *
    * Note that the default validation mode is "off", that is, no validation is 
performed.
    */
-  def withValidationMode(mode: ValidationMode.Type): DataProcessor = 
copy(validationMode = mode)
-
-  def withValidator(validator: Validator): DataProcessor = withValidationMode(
-    ValidationMode.Custom(validator)
-  )
-
-  lazy val validator: Validator = {
-    validationMode match {
+  def withValidationMode(mode: ValidationMode.Type): DataProcessor = {
+    // create the appropriate validator for the mode. If the mode isn't 
actually changing then
+    // we won't generate a new validator and just use the same one. The 
Validator API requires
+    // the Validator to be thread safe, so it is fine to share the same 
Validator with multiple
+    // DataProcessors
+    val newValidator = mode match {
+      case `validationMode` => validator
+      case ValidationMode.Off => null
+      case ValidationMode.Limited => null
       case ValidationMode.Custom(cv) => cv
-      case _ =>
+      case ValidationMode.Full => {
         val cfg = XercesValidatorFactory.makeConfig(
           ssrd.elementRuntimeData.schemaURIStringsForFullValidation
         )
         XercesValidatorFactory.makeValidator(cfg)
+      }
     }
+    copy(
+      validationMode = mode,
+      validator = newValidator
+    )
+  }
+
+  def withValidator(validator: Validator): DataProcessor = {
+    copy(
+      validationMode = ValidationMode.Custom(validator),
+      validator = validator
+    )
   }
 
   def debugger = {
@@ -304,10 +322,13 @@ class DataProcessor(
     // them back to their original values.
     //
     val dpToSave = this.copy(
-      variableMap = ssrd.originalVariables, // reset to original variables 
defined in schema
-      validationMode =
-        ValidationMode.Off, // explicitly turn off, so restored processor 
won't be validating
-      diagnostics = Seq.empty // don't save any warnings that were generated
+      // reset to original variables defined in schema
+      variableMap = ssrd.originalVariables,
+      // explicitly turn off validation, reloaded processors must call 
withValidationMode
+      validationMode = ValidationMode.Off,
+      validator = null,
+      // don't save any warnings that were generated
+      diagnostics = Seq.empty
     )
 
     try {
diff --git 
a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala 
b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index ee4830291..51a6a0ad1 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -401,6 +401,8 @@ class DFDLTestSuite private[tdml] (
     val str = (ts \ "@defaultValidation").text
     if (str == "") defaultValidationDefault else str
   }
+  lazy val defaultValidationMode = ValidationMode.fromString(defaultValidation)
+
   lazy val defaultConfig = {
     val str = (ts \ "@defaultConfig").text
     str
@@ -564,7 +566,11 @@ class DFDLTestSuite private[tdml] (
         GlobalTDMLCompileResultCache.cache
       }
 
-    val compileResult = cache.getCompileResult(impl, key)
+    val compileResult = cache.getCompileResult(
+      impl,
+      key,
+      defaultValidationMode
+    )
     compileResult
   }
 
@@ -603,7 +609,7 @@ abstract class TestCase(testCaseXML: NodeSeq, val parent: 
DFDLTestSuite) {
   protected final var processor: TDMLDFDLProcessor = null
 
   lazy val defaultRoundTrip: RoundTrip = parent.defaultRoundTrip
-  lazy val defaultValidation: String = parent.defaultValidation
+  lazy val defaultValidationMode: ValidationMode.Type = 
parent.defaultValidationMode
 
   private lazy val defaultImplementations: Seq[String] = 
parent.defaultImplementations
   private lazy val tcImplementations = (testCaseXML \ "@implementations").text
@@ -713,18 +719,8 @@ abstract class TestCase(testCaseXML: NodeSeq, val parent: 
DFDLTestSuite) {
     case _ => false
   }
   lazy val validationMode: ValidationMode.Type = (testCaseXML \ 
"@validation").text match {
-    case "on" => ValidationMode.Full
-    case "limited" => ValidationMode.Limited
-    case "off" => ValidationMode.Off
-    case "" =>
-      defaultValidation match {
-        case "on" => ValidationMode.Full
-        case "limited" => ValidationMode.Limited
-        case "off" => ValidationMode.Off
-        case other =>
-          Assert.invariantFailed("unrecognized default validation enum string: 
" + other)
-      }
-    case other => Assert.invariantFailed("unrecognized validation enum string: 
" + other)
+    case "" => defaultValidationMode
+    case mode => ValidationMode.fromString(mode)
   }
 
   lazy val shouldValidate = validationMode != ValidationMode.Off
@@ -904,6 +900,7 @@ abstract class TestCase(testCaseXML: NodeSeq, val parent: 
DFDLTestSuite) {
 
       val useSerializedProcessor =
         if (validationMode == ValidationMode.Full) false
+        else if (defaultValidationMode == ValidationMode.Full) false
         else if (optExpectedWarnings.isDefined) false
         else true
 
@@ -3006,7 +3003,8 @@ case class 
TDMLCompileResultCache(entryExpireDurationSeconds: Option[Long]) {
 
   def getCompileResult(
     impl: AbstractTDMLDFDLProcessorFactory,
-    key: TDMLCompileResultCacheKey
+    key: TDMLCompileResultCacheKey,
+    defaultValidationMode: ValidationMode.Type
   ): TDML.CompileResult = this.synchronized {
 
     if (entryExpireDurationSeconds.isDefined) {
@@ -3032,7 +3030,6 @@ case class 
TDMLCompileResultCache(entryExpireDurationSeconds: Option[Long]) {
         key.optRootNamespace,
         key.tunables
       )
-      cache += (key -> TDMLCompileResultCacheValue(compileResult, None))
       compileResult match {
         case Left(diags) => {
           // a Left must have at least one error diagnostic if we don't get a 
processor
@@ -3043,7 +3040,19 @@ case class 
TDMLCompileResultCache(entryExpireDurationSeconds: Option[Long]) {
           Assert.invariant(diags.forall(!_.isError))
         }
       }
-      compileResult
+
+      // if compileResult is Right and we got a processor, set the processor 
validation mode to
+      // the default validation mode of the test suite. This is useful in 
cases where the
+      // default validation mode is on/full, in which case the Xerces 
validator will be compiled
+      // and added to the DataProcessor that we cache. This way any tests that 
do not change the
+      // validation mode from the default will be able to use the same 
validator so we won't
+      // have to rebuild it for every test. This saves memory and should be 
significantly
+      // faster.
+      val value = compileResult.map { case (diags, proc) =>
+        (diags, proc.withValidationMode(defaultValidationMode))
+      }
+      cache += (key -> TDMLCompileResultCacheValue(value, None))
+      value
     }
   }
 

Reply via email to