This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new be3fee7b2 Improve reload diagnosics when a character set plugin is 
missing
be3fee7b2 is described below

commit be3fee7b241b3f0df577070bc43268cfa9d125ba
Author: Steve Lawrence <[email protected]>
AuthorDate: Thu Aug 22 16:05:09 2024 -0400

    Improve reload diagnosics when a character set plugin is missing
    
    Currently, reloading a saved parser when a neccessary character set
    plugin is not on the classpath results in an exception with a very
    helpful message that has nothing to do with character sets or plugins,
    for example:
    
        java.lang.ClassCastException: cannot assign instance of
        scala.collection.immutable.List$SerializationProxy to field
        
org.apache.daffodil.runtime1.processors.ModelGroupRuntimeData.groupMembers
        of type scala.collection.Seq in instance of
        org.apache.daffodil.runtime1.processors.SequenceRuntimeData
    
    This is a known issue with scala and its use of proxy when serializing
    data structures like Lists and Maps.
    
    To fix the exception and improve diagnostics, this uses writeReplace to
    serialize a BitsCharset as a BitsCharsetSerializationProxy with
    information about the needed BitsCharset. This proxy implements
    readResolve to look up the BitsCharset from the bits charset registry
    and restore the original BitsChraset. If not found in the registry, then
    we throw an helpful exception that bubbles up to the reload function and
    we can output a helpful diagnostic. Now we get something like:
    
        [error] The saved parser was created with a different set of
        dependencies containing a class no longer on the classpath: Charset
        plugin com.example.MyCustomCharset for X-DFDL-MY-CUSTOM-CHARSET
    
    Add an integraion test to make sure this errors with a reasonable
    diagnostic, which require splitting a schema file into valid and invalid
    parts so we can reuse it with the CLI.
    
    Add tests for reloading UDFs and layers without a correct classpath.
    UDF's work as is. For layers we just need to catch SDE exception when
    reloading.
    
    DAFFODIL-2915
---
 .../apache/daffodil/core/compiler/Compiler.scala   |  14 ++-
 .../io/processors/charset/BitsCharset.scala        |  31 +++++
 .../apache/daffodil/cliTest/TestCLIPlugins.scala   | 133 +++++++++++++++++++++
 ....xsd => TestBitsCharsetDefinition-DNE.dfdl.xsd} |  28 -----
 .../charsets/TestBitsCharsetDefinition.dfdl.xsd    |   9 --
 .../charsets/TestBitsCharsetDefinition.tdml        |   2 +-
 6 files changed, 177 insertions(+), 40 deletions(-)

diff --git 
a/daffodil-core/src/main/scala/org/apache/daffodil/core/compiler/Compiler.scala 
b/daffodil-core/src/main/scala/org/apache/daffodil/core/compiler/Compiler.scala
index c16c71016..44b50ecc7 100644
--- 
a/daffodil-core/src/main/scala/org/apache/daffodil/core/compiler/Compiler.scala
+++ 
b/daffodil-core/src/main/scala/org/apache/daffodil/core/compiler/Compiler.scala
@@ -20,6 +20,7 @@ package org.apache.daffodil.core.compiler
 import java.io.File
 import java.io.FileInputStream
 import java.io.InvalidClassException
+import java.io.InvalidObjectException
 import java.io.ObjectInputStream
 import java.io.StreamCorruptedException
 import java.net.URI
@@ -40,6 +41,7 @@ import org.apache.daffodil.lib.api.UnitTestSchemaSource
 import org.apache.daffodil.lib.exceptions.Assert
 import org.apache.daffodil.lib.util.Misc
 import org.apache.daffodil.runtime1.api.DFDL
+import org.apache.daffodil.runtime1.dsom.SchemaDefinitionError
 import org.apache.daffodil.runtime1.processors.DataProcessor
 
 /**
@@ -301,8 +303,9 @@ class Compiler private (
           "The saved parser was created with a different version of " + 
dependencyStr + " with incompatible class: " + ex.classname
         )
       //
-      case ex @ (_: ClassNotFoundException | _: NoClassDefFoundError) =>
-        // Both of these exception happens if a class that was used when saving
+      case ex @ (_: ClassNotFoundException | _: NoClassDefFoundError |
+          _: InvalidObjectException | _: SchemaDefinitionError) =>
+        // These exception happens if a class that was used when saving
         // is no longer on the classpath when reloading.
         //
         // One example of this happening is saving a schema using Java 8 but
@@ -313,6 +316,13 @@ class Compiler private (
         // when reloading a schema, and dependencies are just missing, or if a
         // user switches depenency versions and the new version completely
         // removes a class.
+        //
+        // Another example is we use a special BitsCharsetSerializationProxy to
+        // serialize charsets. This proxy lets us do our own existence checks
+        // during deserialization and avoids very confusions and unhelpful
+        // messages from the default Java/Scala deserialization when the class
+        // does not exist. This custom logic throws an InvalidObjectException 
if
+        // the charset is not found
         throw new InvalidParserException(
           "The saved parser was created with a different set of dependencies 
containing a class no longer on the classpath: " + ex.getMessage
         )
diff --git 
a/daffodil-io/src/main/scala/org/apache/daffodil/io/processors/charset/BitsCharset.scala
 
b/daffodil-io/src/main/scala/org/apache/daffodil/io/processors/charset/BitsCharset.scala
index d3c794eef..8a3b26bfd 100644
--- 
a/daffodil-io/src/main/scala/org/apache/daffodil/io/processors/charset/BitsCharset.scala
+++ 
b/daffodil-io/src/main/scala/org/apache/daffodil/io/processors/charset/BitsCharset.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.daffodil.io.processors.charset
 
+import java.io.InvalidObjectException
+import java.io.ObjectStreamException
 import java.nio.ByteBuffer
 import java.nio.CharBuffer
 import java.nio.charset.CoderResult
@@ -87,6 +89,35 @@ trait BitsCharset extends Serializable {
       }
     }
   }
+
+  /**
+   * If we serialize a BitsCharset from a plugin and that plugin isn't on the 
classpath when
+   * deserializing, then we get a really unhelpful and confusing error message 
coming from
+   * Java/Scala internals. To avoid this, we use writeReplace to instead 
serialize a proxy class
+   * containing information about the charset and will always exist since it's 
part of Daffodil.
+   * When the proxy is deserialized, its readResolve function is called, which 
finds the
+   * BitsCharset from the registry and replaces the proxy class with it. If 
the charset does not
+   * exist, we can provide a helpful diagnostic.
+   */
+  @throws(classOf[ObjectStreamException])
+  protected def writeReplace(): Object =
+    new BitsCharsetSerializationProxy(this.getClass.getName, name)
+
+}
+
+class BitsCharsetSerializationProxy(className: String, charsetName: String)
+  extends Serializable {
+  @throws(classOf[ObjectStreamException])
+  def readResolve(): Object = {
+    val bitsCharset = CharsetUtils.getCharset(charsetName)
+    if (bitsCharset == null) {
+      // note that we cannot throw ClassNotFoundException since that is what 
leads to the
+      // unhelpful error message. InvalidObjectException isn't technically 
correct, but it's the
+      // best option of the ObjectStreamExceptions.
+      throw new InvalidObjectException(s"Charset plugin $className for 
$charsetName")
+    }
+    bitsCharset
+  }
 }
 
 trait IsResetMixin {
diff --git 
a/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLIPlugins.scala
 
b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLIPlugins.scala
new file mode 100644
index 000000000..e6439c2b7
--- /dev/null
+++ 
b/daffodil-test-integration/src/test/scala/org/apache/daffodil/cliTest/TestCLIPlugins.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.cliTest
+
+import java.nio.file.Path
+
+import org.apache.daffodil.cli.Main.ExitCode
+import org.apache.daffodil.cli.cliTest.Util._
+
+import org.junit.Test
+
+class TestCLIPlugins {
+
+  /**
+   * Return a sequence of paths, made up of the one classpath containing all
+   * compiled daffodil-test .class files, and any additional classpaths needed 
for a
+   * specific test
+   */
+  private def testClasspath(extra: String*): Seq[Path] = {
+    val classes = path("daffodil-test/target/scala-2.12/test-classes/")
+    val paths = extra.map(path(_))
+    classes +: paths
+  }
+
+  /**
+   * Tests the case when reloading a saved parser when a needed charset is not 
on the classpath.
+   */
+  @Test def test_reload_missing_charset(): Unit = {
+    val schema = path(
+      
"daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd"
+    )
+    val classpath = testClasspath()
+
+    withTempFile { parser =>
+      // create a saved parser with the correct classpath
+      runCLI(args"save-parser -s $schema -r s2 $parser", classpath, fork = 
true) { cli => }(
+        ExitCode.Success
+      )
+
+      // exclude classpath so reloading is missing the charset class
+      runCLI(args"parse -P $parser", fork = true) { cli =>
+        cli.expectErr(
+          "Charset plugin 
org.apache.daffodil.charsets.BitsCharsetTest_ISO_8859_13$ for ISO-8859-13"
+        )
+      }(ExitCode.UnableToCreateProcessor)
+
+      // make sure it works with the correct classpath
+      runCLI(args"parse -P $parser", classpath, fork = true) { cli =>
+        val bytes = Array[Int](
+          0xc0, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0xc0, 0x31, 0x32, 
0x33, 0x34, 0x35,
+          0x36, 0x37
+        ).map(_.toByte)
+        cli.sendBytes(bytes, inputDone = true)
+        cli.expect("<e1>À1234567</e1>")
+        cli.expect("<e2>Ą1234567</e2>")
+      }(ExitCode.Success)
+    }
+  }
+
+  /**
+   * Tests the case when reloading a saved parser when a needed layer is not 
on the classpath.
+   */
+  @Test def test_reload_missing_layer(): Unit = {
+    val schema = path(
+      
"daffodil-test/src/test/resources/org/apache/daffodil/layers/useAllTypesLayer.dfdl.xsd"
+    )
+    val classpath = testClasspath()
+
+    withTempFile { parser =>
+      // create a saved parser with the correct classpath
+      runCLI(args"save-parser -s $schema -r root $parser", classpath, fork = 
true) { cli => }(
+        ExitCode.Success
+      )
+
+      // exclude classpath so reloading is missing the charset class
+      runCLI(args"parse -P $parser", fork = true) { cli =>
+        
cli.expectErr("{urn:org.apache.daffodil.layers.xsd.AllTypesLayer}allTypesLayer")
+      }(ExitCode.UnableToCreateProcessor)
+
+      // make sure it works with the correct classpath
+      runCLI(args"parse -P $parser", classpath, fork = true) { cli =>
+        cli.send("", inputDone = true)
+        cli.expect("<integer2>-84</integer2>")
+      }(ExitCode.Success)
+    }
+  }
+
+  /**
+   * Tests the case when reloading a saved parser when a needed udf is not on 
the classpath.
+   */
+  @Test def test_reload_missing_udf(): Unit = {
+    val schema = path(
+      
"daffodil-udf/src/test/resources/org/apache/daffodil/udf/genericUdfSchema.xsd"
+    )
+    val classpath = testClasspath(
+      "daffodil-udf/target/scala-2.12/test-classes/"
+    )
+
+    withTempFile { parser =>
+      // create a saved parser with the correct classpath
+      runCLI(args"save-parser -s $schema -r user_func1 $parser", classpath, 
fork = true) {
+        cli =>
+      }(ExitCode.Success)
+
+      // exclude classpath so reloading is missing the charset class
+      runCLI(args"parse -P $parser", fork = true) { cli =>
+        cli.expectErr("org.jgoodudfs.example.StringFunctions.Replace")
+      }(ExitCode.UnableToCreateProcessor)
+
+      // make sure it works with the correct classpath
+      runCLI(args"parse -P $parser", classpath, fork = true) { cli =>
+        cli.send("strng", inputDone = true)
+        cli.expect("<user_func1>")
+      }(ExitCode.Success)
+    }
+  }
+
+}
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
 
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition-DNE.dfdl.xsd
similarity index 61%
copy from 
daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
copy to 
daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition-DNE.dfdl.xsd
index 5c6ce1654..6e99d83b4 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition-DNE.dfdl.xsd
@@ -30,33 +30,6 @@
     </appinfo>
   </annotation>
 
-  <element name="s1">
-    <complexType>
-      <sequence>
-        <element name="e1" dfdl:encoding="ISO-8859-13" 
dfdl:lengthKind="explicit" dfdl:length="8" type="xsd:string" />
-        <element name="e2" 
dfdl:encoding="X-DFDL-ISO-8859-1-8-BIT-PACKED-LSB-FIRST-REVERSE" 
dfdl:lengthKind="delimited" type="xsd:string" />
-      </sequence>
-    </complexType>
-  </element>
-
-  <element name="s2">
-    <complexType>
-      <sequence>
-        <element name="e1" dfdl:encoding="ISO-8859-1" 
dfdl:lengthKind="explicit" dfdl:length="8" type="xsd:string" />
-        <element name="e2" dfdl:encoding="ISO-8859-13" 
dfdl:lengthKind="delimited" type="xsd:string" />
-      </sequence>
-    </complexType>
-  </element>
-
-  <element name="s3">
-    <complexType>
-      <sequence>
-        <element name="e1" dfdl:encoding="ISO-8859-13" 
dfdl:lengthKind="explicit" dfdl:length="8" type="xsd:string" />
-        <element name="e2" dfdl:encoding="ISO-8859-1" 
dfdl:lengthKind="delimited" type="xsd:string" />
-      </sequence>
-    </complexType>
-  </element>
-
   <element name="s4">
     <complexType>
       <sequence>
@@ -65,5 +38,4 @@
     </complexType>
   </element>
 
-
 </schema>
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
 
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
index 5c6ce1654..423f38010 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
@@ -57,13 +57,4 @@
     </complexType>
   </element>
 
-  <element name="s4">
-    <complexType>
-      <sequence>
-        <element name="e1" dfdl:encoding="ISO-DNE" dfdl:lengthKind="explicit" 
dfdl:length="8" type="xsd:string" />
-      </sequence>
-    </complexType>
-  </element>
-
-
 </schema>
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
index 656d00011..a5ba1295e 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
@@ -138,7 +138,7 @@
   </tdml:infoset>
 </tdml:unparserTestCase>
 
-  <tdml:unparserTestCase name="verify_error_message" root="s4" 
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd">
+  <tdml:unparserTestCase name="verify_error_message" root="s4" 
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition-DNE.dfdl.xsd">
 
     <tdml:document>
       <tdml:documentPart type="byte"><![CDATA[C0 31 32 33 34 35 36 37]]>

Reply via email to