This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new be3fee7b2 Improve reload diagnosics when a character set plugin is
missing
be3fee7b2 is described below
commit be3fee7b241b3f0df577070bc43268cfa9d125ba
Author: Steve Lawrence <[email protected]>
AuthorDate: Thu Aug 22 16:05:09 2024 -0400
Improve reload diagnosics when a character set plugin is missing
Currently, reloading a saved parser when a neccessary character set
plugin is not on the classpath results in an exception with a very
helpful message that has nothing to do with character sets or plugins,
for example:
java.lang.ClassCastException: cannot assign instance of
scala.collection.immutable.List$SerializationProxy to field
org.apache.daffodil.runtime1.processors.ModelGroupRuntimeData.groupMembers
of type scala.collection.Seq in instance of
org.apache.daffodil.runtime1.processors.SequenceRuntimeData
This is a known issue with scala and its use of proxy when serializing
data structures like Lists and Maps.
To fix the exception and improve diagnostics, this uses writeReplace to
serialize a BitsCharset as a BitsCharsetSerializationProxy with
information about the needed BitsCharset. This proxy implements
readResolve to look up the BitsCharset from the bits charset registry
and restore the original BitsChraset. If not found in the registry, then
we throw an helpful exception that bubbles up to the reload function and
we can output a helpful diagnostic. Now we get something like:
[error] The saved parser was created with a different set of
dependencies containing a class no longer on the classpath: Charset
plugin com.example.MyCustomCharset for X-DFDL-MY-CUSTOM-CHARSET
Add an integraion test to make sure this errors with a reasonable
diagnostic, which require splitting a schema file into valid and invalid
parts so we can reuse it with the CLI.
Add tests for reloading UDFs and layers without a correct classpath.
UDF's work as is. For layers we just need to catch SDE exception when
reloading.
DAFFODIL-2915
---
.../apache/daffodil/core/compiler/Compiler.scala | 14 ++-
.../io/processors/charset/BitsCharset.scala | 31 +++++
.../apache/daffodil/cliTest/TestCLIPlugins.scala | 133 +++++++++++++++++++++
....xsd => TestBitsCharsetDefinition-DNE.dfdl.xsd} | 28 -----
.../charsets/TestBitsCharsetDefinition.dfdl.xsd | 9 --
.../charsets/TestBitsCharsetDefinition.tdml | 2 +-
6 files changed, 177 insertions(+), 40 deletions(-)
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/core/compiler/Compiler.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/core/compiler/Compiler.scala
index c16c71016..44b50ecc7 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/core/compiler/Compiler.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/core/compiler/Compiler.scala
@@ -20,6 +20,7 @@ package org.apache.daffodil.core.compiler
import java.io.File
import java.io.FileInputStream
import java.io.InvalidClassException
+import java.io.InvalidObjectException
import java.io.ObjectInputStream
import java.io.StreamCorruptedException
import java.net.URI
@@ -40,6 +41,7 @@ import org.apache.daffodil.lib.api.UnitTestSchemaSource
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.util.Misc
import org.apache.daffodil.runtime1.api.DFDL
+import org.apache.daffodil.runtime1.dsom.SchemaDefinitionError
import org.apache.daffodil.runtime1.processors.DataProcessor
/**
@@ -301,8 +303,9 @@ class Compiler private (
"The saved parser was created with a different version of " +
dependencyStr + " with incompatible class: " + ex.classname
)
//
- case ex @ (_: ClassNotFoundException | _: NoClassDefFoundError) =>
- // Both of these exception happens if a class that was used when saving
+ case ex @ (_: ClassNotFoundException | _: NoClassDefFoundError |
+ _: InvalidObjectException | _: SchemaDefinitionError) =>
+ // These exception happens if a class that was used when saving
// is no longer on the classpath when reloading.
//
// One example of this happening is saving a schema using Java 8 but
@@ -313,6 +316,13 @@ class Compiler private (
// when reloading a schema, and dependencies are just missing, or if a
// user switches depenency versions and the new version completely
// removes a class.
+ //
+ // Another example is we use a special BitsCharsetSerializationProxy to
+ // serialize charsets. This proxy lets us do our own existence checks
+ // during deserialization and avoids very confusions and unhelpful
+ // messages from the default Java/Scala deserialization when the class
+ // does not exist. This custom logic throws an InvalidObjectException
if
+ // the charset is not found
throw new InvalidParserException(
"The saved parser was created with a different set of dependencies
containing a class no longer on the classpath: " + ex.getMessage
)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/io/processors/charset/BitsCharset.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/io/processors/charset/BitsCharset.scala
index d3c794eef..8a3b26bfd 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/io/processors/charset/BitsCharset.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/io/processors/charset/BitsCharset.scala
@@ -16,6 +16,8 @@
*/
package org.apache.daffodil.io.processors.charset
+import java.io.InvalidObjectException
+import java.io.ObjectStreamException
import java.nio.ByteBuffer
import java.nio.CharBuffer
import java.nio.charset.CoderResult
@@ -87,6 +89,35 @@ trait BitsCharset extends Serializable {
}
}
}
+
+ /**
+ * If we serialize a BitsCharset from a plugin and that plugin isn't on the
classpath when
+ * deserializing, then we get a really unhelpful and confusing error message
coming from
+ * Java/Scala internals. To avoid this, we use writeReplace to instead
serialize a proxy class
+ * containing information about the charset and will always exist since it's
part of Daffodil.
+ * When the proxy is deserialized, its readResolve function is called, which
finds the
+ * BitsCharset from the registry and replaces the proxy class with it. If
the charset does not
+ * exist, we can provide a helpful diagnostic.
+ */
+ @throws(classOf[ObjectStreamException])
+ protected def writeReplace(): Object =
+ new BitsCharsetSerializationProxy(this.getClass.getName, name)
+
+}
+
+class BitsCharsetSerializationProxy(className: String, charsetName: String)
+ extends Serializable {
+ @throws(classOf[ObjectStreamException])
+ def readResolve(): Object = {
+ val bitsCharset = CharsetUtils.getCharset(charsetName)
+ if (bitsCharset == null) {
+ // note that we cannot throw ClassNotFoundException since that is what
leads to the
+ // unhelpful error message. InvalidObjectException isn't technically
correct, but it's the
+ // best option of the ObjectStreamExceptions.
+ throw new InvalidObjectException(s"Charset plugin $className for
$charsetName")
+ }
+ bitsCharset
+ }
}
trait IsResetMixin {
diff --git
a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLIPlugins.scala
b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLIPlugins.scala
new file mode 100644
index 000000000..e6439c2b7
--- /dev/null
+++
b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLIPlugins.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.cliTest
+
+import java.nio.file.Path
+
+import org.apache.daffodil.cli.Main.ExitCode
+import org.apache.daffodil.cli.cliTest.Util._
+
+import org.junit.Test
+
+class TestCLIPlugins {
+
+ /**
+ * Return a sequence of paths, made up of the one classpath containing all
+ * compiled daffodil-test .class files, and any additional classpaths needed
for a
+ * specific test
+ */
+ private def testClasspath(extra: String*): Seq[Path] = {
+ val classes = path("daffodil-test/target/scala-2.12/test-classes/")
+ val paths = extra.map(path(_))
+ classes +: paths
+ }
+
+ /**
+ * Tests the case when reloading a saved parser when a needed charset is not
on the classpath.
+ */
+ @Test def test_reload_missing_charset(): Unit = {
+ val schema = path(
+
"daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd"
+ )
+ val classpath = testClasspath()
+
+ withTempFile { parser =>
+ // create a saved parser with the correct classpath
+ runCLI(args"save-parser -s $schema -r s2 $parser", classpath, fork =
true) { cli => }(
+ ExitCode.Success
+ )
+
+ // exclude classpath so reloading is missing the charset class
+ runCLI(args"parse -P $parser", fork = true) { cli =>
+ cli.expectErr(
+ "Charset plugin
org.apache.daffodil.charsets.BitsCharsetTest_ISO_8859_13$ for ISO-8859-13"
+ )
+ }(ExitCode.UnableToCreateProcessor)
+
+ // make sure it works with the correct classpath
+ runCLI(args"parse -P $parser", classpath, fork = true) { cli =>
+ val bytes = Array[Int](
+ 0xc0, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0xc0, 0x31, 0x32,
0x33, 0x34, 0x35,
+ 0x36, 0x37
+ ).map(_.toByte)
+ cli.sendBytes(bytes, inputDone = true)
+ cli.expect("<e1>À1234567</e1>")
+ cli.expect("<e2>Ą1234567</e2>")
+ }(ExitCode.Success)
+ }
+ }
+
+ /**
+ * Tests the case when reloading a saved parser when a needed layer is not
on the classpath.
+ */
+ @Test def test_reload_missing_layer(): Unit = {
+ val schema = path(
+
"daffodil-test/src/test/resources/org/apache/daffodil/layers/useAllTypesLayer.dfdl.xsd"
+ )
+ val classpath = testClasspath()
+
+ withTempFile { parser =>
+ // create a saved parser with the correct classpath
+ runCLI(args"save-parser -s $schema -r root $parser", classpath, fork =
true) { cli => }(
+ ExitCode.Success
+ )
+
+ // exclude classpath so reloading is missing the charset class
+ runCLI(args"parse -P $parser", fork = true) { cli =>
+
cli.expectErr("{urn:org.apache.daffodil.layers.xsd.AllTypesLayer}allTypesLayer")
+ }(ExitCode.UnableToCreateProcessor)
+
+ // make sure it works with the correct classpath
+ runCLI(args"parse -P $parser", classpath, fork = true) { cli =>
+ cli.send("", inputDone = true)
+ cli.expect("<integer2>-84</integer2>")
+ }(ExitCode.Success)
+ }
+ }
+
+ /**
+ * Tests the case when reloading a saved parser when a needed udf is not on
the classpath.
+ */
+ @Test def test_reload_missing_udf(): Unit = {
+ val schema = path(
+
"daffodil-udf/src/test/resources/org/apache/daffodil/udf/genericUdfSchema.xsd"
+ )
+ val classpath = testClasspath(
+ "daffodil-udf/target/scala-2.12/test-classes/"
+ )
+
+ withTempFile { parser =>
+ // create a saved parser with the correct classpath
+ runCLI(args"save-parser -s $schema -r user_func1 $parser", classpath,
fork = true) {
+ cli =>
+ }(ExitCode.Success)
+
+ // exclude classpath so reloading is missing the charset class
+ runCLI(args"parse -P $parser", fork = true) { cli =>
+ cli.expectErr("org.jgoodudfs.example.StringFunctions.Replace")
+ }(ExitCode.UnableToCreateProcessor)
+
+ // make sure it works with the correct classpath
+ runCLI(args"parse -P $parser", classpath, fork = true) { cli =>
+ cli.send("strng", inputDone = true)
+ cli.expect("<user_func1>")
+ }(ExitCode.Success)
+ }
+ }
+
+}
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition-DNE.dfdl.xsd
similarity index 61%
copy from
daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
copy to
daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition-DNE.dfdl.xsd
index 5c6ce1654..6e99d83b4 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition-DNE.dfdl.xsd
@@ -30,33 +30,6 @@
</appinfo>
</annotation>
- <element name="s1">
- <complexType>
- <sequence>
- <element name="e1" dfdl:encoding="ISO-8859-13"
dfdl:lengthKind="explicit" dfdl:length="8" type="xsd:string" />
- <element name="e2"
dfdl:encoding="X-DFDL-ISO-8859-1-8-BIT-PACKED-LSB-FIRST-REVERSE"
dfdl:lengthKind="delimited" type="xsd:string" />
- </sequence>
- </complexType>
- </element>
-
- <element name="s2">
- <complexType>
- <sequence>
- <element name="e1" dfdl:encoding="ISO-8859-1"
dfdl:lengthKind="explicit" dfdl:length="8" type="xsd:string" />
- <element name="e2" dfdl:encoding="ISO-8859-13"
dfdl:lengthKind="delimited" type="xsd:string" />
- </sequence>
- </complexType>
- </element>
-
- <element name="s3">
- <complexType>
- <sequence>
- <element name="e1" dfdl:encoding="ISO-8859-13"
dfdl:lengthKind="explicit" dfdl:length="8" type="xsd:string" />
- <element name="e2" dfdl:encoding="ISO-8859-1"
dfdl:lengthKind="delimited" type="xsd:string" />
- </sequence>
- </complexType>
- </element>
-
<element name="s4">
<complexType>
<sequence>
@@ -65,5 +38,4 @@
</complexType>
</element>
-
</schema>
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
index 5c6ce1654..423f38010 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
@@ -57,13 +57,4 @@
</complexType>
</element>
- <element name="s4">
- <complexType>
- <sequence>
- <element name="e1" dfdl:encoding="ISO-DNE" dfdl:lengthKind="explicit"
dfdl:length="8" type="xsd:string" />
- </sequence>
- </complexType>
- </element>
-
-
</schema>
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
index 656d00011..a5ba1295e 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
@@ -138,7 +138,7 @@
</tdml:infoset>
</tdml:unparserTestCase>
- <tdml:unparserTestCase name="verify_error_message" root="s4"
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd">
+ <tdml:unparserTestCase name="verify_error_message" root="s4"
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition-DNE.dfdl.xsd">
<tdml:document>
<tdml:documentPart type="byte"><![CDATA[C0 31 32 33 34 35 36 37]]>