stevedlawrence commented on a change in pull request #129: Cross Testing
Capability with IBM DFDL
URL: https://github.com/apache/incubator-daffodil/pull/129#discussion_r229054897
##########
File path:
daffodil-tdml/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
##########
@@ -437,69 +449,121 @@ abstract class TestCase(testCaseXML: NodeSeq, val
parent: DFDLTestSuite)
lazy val defaultRoundTrip: RoundTrip = parent.defaultRoundTrip
lazy val defaultValidation: String = parent.defaultValidation
- /**
- * This doesn't fetch a serialized processor, it runs whatever the processor
is
- * through a serialize then unserialize path to get a processor as if
- * it were being fetched from a file.
- */
- private def generateProcessor(pf: DFDL.ProcessorFactory,
useSerializedProcessor: Boolean): DFDLTestSuite.CompileResult = {
- val p = pf.onPath("/")
- val diags = p.getDiagnostics
- if (p.isError) Left(diags)
- else {
- val dp =
- if (useSerializedProcessor) {
- val os = new java.io.ByteArrayOutputStream()
- val output = Channels.newChannel(os)
- p.save(output)
+ private lazy val defaultImplementations: Seq[String] =
parent.defaultImplementations
+ private lazy val tcImplementations = (testCaseXML \ "@implementations").text
+ private lazy val implementationStrings =
+ if (tcImplementations == "") defaultImplementations
+ else tcImplementations.split("""\s+""").toSeq
- val is = new java.io.ByteArrayInputStream(os.toByteArray)
- val input = Channels.newChannel(is)
- val compiler_ = Compiler()
- compiler_.reload(input)
- } else p
-
- Right((diags, dp))
+ lazy val implementations: Seq[TDMLDFDLProcessorFactory] = {
+ //
+ // just by way of eating our own dogfood, we're always going to dynaload
the
+ // processor, even for the built-in Daffodil one.
+ //
+ lazy val dafpfs =
dynamicLoadTDMLProcessorFactory(Runner.daffodilTDMLDFDLProcessorFactoryName)
+ val allImpls = {
+ if (parent.optDebugger.isDefined)
+ dafpfs // when debugging or tracing, we only run daffodil, ignore
other implementations
+ else {
+ implementationStrings.flatMap { s =>
+ s.toLowerCase match {
+ //
+ // When adding another processor implementation to a test case
+ // you can also use daffodil by just specifying
implementations="daffodil otherOne.full.class.name"
+ // That is, you don't have to use a class name for daffodil.
+ case "daffodil" => dafpfs
+ case _ => dynamicLoadTDMLProcessorFactory(s)
+ }
+ }
+ }
}
+ allImpls.foreach {
+ _.setValidateDFDLSchemas(parent.validateDFDLSchemas)
+ }
+ if (allImpls.isEmpty) throw new TDMLException("No TDML DFDL
implementations found for '%s'".format(implementationStrings.mkString(", ")))
+ allImpls
}
- private def compileProcessor(schemaSource: DaffodilSchemaSource,
useSerializedProcessor: Boolean): DFDLTestSuite.CompileResult = {
- val pf = compiler.compileSource(schemaSource)
- val diags = pf.getDiagnostics
- if (pf.isError) {
- Left(diags) // throw new TDMLException(diags)
- } else {
- val res = this.generateProcessor(pf, useSerializedProcessor)
- res
- }
+ private lazy val tdmlDFDLProcessorFactoryTraitName = {
+ import scala.language.reflectiveCalls
+ // Doing this so that if we decide to rename the verbose class name here
+ // it will handle this code too.
+ // Also we can rename the packages, etc.
+ val clazzTag = scala.reflect.classTag[TDMLDFDLProcessorFactory]
+ val cname = clazzTag.toString()
+ cname
}
- final protected def getProcessor(schemaSource: DaffodilSchemaSource,
useSerializedProcessor: Boolean): DFDLTestSuite.CompileResult = {
- val res: DFDLTestSuite.CompileResult = schemaSource match {
- case uss: URISchemaSource =>
- SchemaDataProcessorCache.compileAndCache(uss, useSerializedProcessor,
- parent.checkAllTopLevel,
- root,
- null) {
- compileProcessor(uss, useSerializedProcessor)
- }
- case _ => {
- compileProcessor(schemaSource, useSerializedProcessor)
+ def dynamicLoadTDMLProcessorFactory(className: String):
Seq[TDMLDFDLProcessorFactory] = {
+ import scala.language.reflectiveCalls
+ import scala.language.existentials
+
+ val res =
+ try {
+
Seq(Class.forName(className).newInstance().asInstanceOf[TDMLDFDLProcessorFactory])
+ } catch {
+ case cnf: ClassNotFoundException =>
+ System.err.println("TDML Runner did not find implementation '%s'. No
tests will be run against it.".format(className))
+ Seq()
+ case ie: InstantiationException => {
+ //
+ // In this case, we found it, but we couldn't create an instance of
it, so it's somehow broken,
+ // but there's no point in trying to use it.
+ //
+ System.err.println("TDML Runner found implementation '%s'. But was
unable to create an instance. No tests will be run against
it.".format(className))
+ Seq()
+ }
+ case cce: ClassCastException => {
+ //
+ // In this case, we found it, created an instance, but it's not the
right type.
+ // So there's no point in trying to use it.
+ //
+ System.err.println("TDML Runner found implementation '%s'. But it
was not an instance of %s. No tests will be run against it.".format(
+ className, tdmlDFDLProcessorFactoryTraitName
+ ))
+ Seq()
+ }
Review comment:
I sortof imagined this different implementation thing working slightly
differently.
The way it works as I understand it, every test case has a list of
"implementations" associated with it (which may come from some defaults if not
defined). Then when we run a test case, we try to load and run the test against
all listed implementations. If an implementation is not found, it prints out
one of the above errors and ignores the implementation and continues with the
remaining implementations. And the test is considered a fail if any one of the
implementations fail.
However, in most cases, I'm only going to care about just the Daffodil
implementation and I won't even have the IBM jars on the class path. So I'm
just going to get a bunch of messages about the missing implementation.
Instead, does it make sense to change it so the "implementations" attribute
just specifies which implementations a particular test is know to work for, but
not necessarily that the test should be run with that implementation. We then
have some way to specify which implementation you want to use when you run
tests. Tests that are run but do not work with that implementation can be
skipped with junit's assume() method, e.g.:
```scala
assumeTrue("Test is not compatible with current implementation, skipping.",
curTest.worksWith(curImpl))
```
Tests that fail the assumption are marked as "skipped" in the final count,
so you end up with a clear count of how many tests were run and passed, how
many were run but failed, and how many were skipped due to known
incompatibilities with the implementation.
Another potential benefit is that I think right now the "implementations"
attribute is a list of fully qualify class names (e.g.
org.apache.daffodil.tdml.processors.DaffodilTDMLDFDLProcessorFactory). That's
pretty verbose, especially if we start listing more implementations. We could
potentially make it so a TDMLDFDLProcessorFactory must provide a simple name
(e.g. "daffodil", "ibm-java", "dfdl4s"), and those short names are all that go
in the implementations attribute. Then the above becomes something like:
```scala
assumeTrue("Test is not compatible with current implementation, skipping.",
curTest.implementations.contains(curImpl.name))
```
Thoughts?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services