This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new 5d314ab58 Replace Scala Maps with Java Maps in choice parsers to avoid 
bad diagnostics
5d314ab58 is described below

commit 5d314ab58b03bd9161c858436e4ae0391bcbae21
Author: Steve Lawrence <[email protected]>
AuthorDate: Mon Sep 18 14:18:06 2023 -0400

    Replace Scala Maps with Java Maps in choice parsers to avoid bad diagnostics
    
    When Scala serializes a Map, it uses writeReplace() to actually
    serialize a HashMap$SerializationProxy object. There appears to be a bug
    in the deserialization of this proxy object which can lead to very
    confusing diagnostics if classpaths are not set up correctly when
    reloading a saved parser.
    
    For example, if a layer transformer is not on the classpath when
    reloading, you could get an error mentioning only this
    HashMap$SerializationProxy and the ChoiceCombinator parser. It is not
    clear at all that the core issue is a layer transformer is not on the
    classpath.
    
    To fix this, this changes the choice combinator parsers and unparsers to
    use Java Maps instead. This do not have the deserialization issues that
    Scala Maps have. Now, if a layer transformer is not on the classpath you
    get a much more helpful diagnostic stating the saved parser was "created
    with a different set of dependencies containing a class no longer on the
    classpath", and includes the actual transformer factory class name,
    making it much more clear what the problem is.
    
    DAFFODIL-2728
---
 .../core/grammar/primitives/ChoiceCombinator.scala | 10 ++++-
 .../org/apache/daffodil/lib/util/Serialize.scala   | 43 ++++++++++++++++++++++
 .../runtime1/ChoiceAndOtherVariousUnparsers.scala  | 11 ++++--
 .../processors/parsers/ElementKindParsers.scala    | 13 ++++---
 4 files changed, 66 insertions(+), 11 deletions(-)

diff --git 
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/ChoiceCombinator.scala
 
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/ChoiceCombinator.scala
index 40478eca8..a0731b8a5 100644
--- 
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/ChoiceCombinator.scala
+++ 
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/ChoiceCombinator.scala
@@ -31,6 +31,8 @@ import org.apache.daffodil.lib.cookers.IntRangeCooker
 import org.apache.daffodil.lib.exceptions.Assert
 import org.apache.daffodil.lib.schema.annotation.props.gen.ChoiceLengthKind
 import org.apache.daffodil.lib.util.MaybeInt
+import org.apache.daffodil.lib.util.ProperlySerializableMap._
+import org.apache.daffodil.runtime1.infoset.ChoiceBranchEvent
 import org.apache.daffodil.runtime1.processors.RangeBound
 import org.apache.daffodil.runtime1.processors.parsers._
 import org.apache.daffodil.runtime1.processors.unparsers._
@@ -246,7 +248,9 @@ case class ChoiceCombinator(ch: ChoiceTermBase, 
alternatives: Seq[Gram])
           }
         (parser, isRepresented)
       }
-      val serializableMap: Map[String, (Parser, Boolean)] = 
dispatchBranchKeyMap.map(identity)
+
+      val serializableMap: ProperlySerializableMap[String, (Parser, Boolean)] =
+        dispatchBranchKeyMap.toProperlySerializableMap
       val serializableKeyRangeMap: Vector[(RangeBound, RangeBound, Parser, 
Boolean)] =
         dispatchBranchKeyRangeTuples.toVector.map(identity)
 
@@ -319,7 +323,9 @@ case class ChoiceCombinator(ch: ChoiceTermBase, 
alternatives: Seq[Gram])
         branchForUnparse.get
       }
     } else {
-      val cbm = ChoiceBranchMap(eventUnparserMap, branchForUnparse)
+      val serializableMap: ProperlySerializableMap[ChoiceBranchEvent, 
Unparser] =
+        eventUnparserMap.toProperlySerializableMap
+      val cbm = ChoiceBranchMap(serializableMap, branchForUnparse)
       new ChoiceCombinatorUnparser(ch.modelGroupRuntimeData, cbm, 
choiceLengthInBits)
     }
   }
diff --git 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Serialize.scala 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Serialize.scala
index c4ef43c76..47ca824e8 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Serialize.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/util/Serialize.scala
@@ -17,6 +17,7 @@
 
 package org.apache.daffodil.lib.util
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
 
 import org.apache.daffodil.lib.exceptions.Assert
@@ -93,3 +94,45 @@ private object PreSerialization {
     hasIt
   }
 }
+
+/**
+ * Scala has a bug where failure to deserialize a Scala HashMap when classpath 
jars aren't
+ * correct can lead to very confusing and unhelpful error messages. This seems 
to be related to
+ * the fact that a HashMap serializes to a HashMap.SerializationProxy, which 
appears to have a
+ * bug when deserialization fails. Note that we cannot just use .asJava since 
that creates a
+ * Scala JavaCollectionWrappers.MapWrapper, which has the same serialization 
issues.
+ *
+ * Fortunately, the Java HashMap does not have these serialization issues. So 
anywhere we have a
+ * Scala HashMap that could easily fail deserialization, we should probably 
use a Java HashMap
+ * instead, and do so by create a new Java HashMap instance and copying in the 
keys/values from
+ * the Scala HashMap.
+ *
+ * To make to make it clear throughout the code where we actually do this, a 
new type alias is
+ * created for a Java Map called "ProperlySerializableMap". Although the 
compiler cannot enforce
+ * alias use instead of directly using a Java Map, as convention we should use 
this type alias
+ * anywhere we are using a Map for correct serialization purposes. 
Additionally, an implicit
+ * class with helper function is added to make it easy to correctly convert a 
Scala Map to a
+ * ProperlySerializableMap. For example:
+ *
+ *     import org.apache.daffodil.lib.util.ProperlySerializableMap._
+ *
+ *     val myScalaMap = Map((1,2), (3,4))
+ *     val serializableMap = myScalaMap.toProperlySerializableMap
+ *
+ * Note that the toProperlySerializableMap function creates a LinkedHashMap to 
guarantee
+ * repeatable order.
+ *
+ * See DAFFODIL-228 for examples of the confusing error messages and details 
of the issue.
+ */
+object ProperlySerializableMap {
+
+  type ProperlySerializableMap[K, V] = java.util.Map[K, V]
+
+  implicit class DecoratedWithToProperlySerialableMap[K, V](m: Map[K, V]) {
+    def toProperlySerializableMap: ProperlySerializableMap[K, V] = {
+      // This LinkedHashMap constructor copies the keys/values from the Scala 
MapWrapper, so
+      // anything using the result of this function will not have any Scala 
serialization issues
+      new java.util.LinkedHashMap[K, V](m.asJava)
+    }
+  }
+}
diff --git 
a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/ChoiceAndOtherVariousUnparsers.scala
 
b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/ChoiceAndOtherVariousUnparsers.scala
index a0efbb2f0..0a5b0b0be 100644
--- 
a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/ChoiceAndOtherVariousUnparsers.scala
+++ 
b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/ChoiceAndOtherVariousUnparsers.scala
@@ -17,23 +17,26 @@
 
 package org.apache.daffodil.unparsers.runtime1
 
+import scala.collection.JavaConverters._
+
 import org.apache.daffodil.lib.util.Maybe
 import org.apache.daffodil.lib.util.Maybe._
 import org.apache.daffodil.lib.util.MaybeInt
+import org.apache.daffodil.lib.util.ProperlySerializableMap._
 import org.apache.daffodil.runtime1.infoset._
 import org.apache.daffodil.runtime1.processors.RuntimeData
 import org.apache.daffodil.runtime1.processors._
 import org.apache.daffodil.runtime1.processors.unparsers._
 
 case class ChoiceBranchMap(
-  lookupTable: Map[ChoiceBranchEvent, Unparser],
+  lookupTable: ProperlySerializableMap[ChoiceBranchEvent, Unparser],
   unmappedDefault: Option[Unparser],
 ) extends Serializable {
 
   def get(cbe: ChoiceBranchEvent): Maybe[Unparser] = {
     val fromTable = lookupTable.get(cbe)
     val res =
-      if (fromTable.isDefined) One(fromTable.get)
+      if (fromTable != null) One(fromTable)
       else {
         //
         // There must be an unmapped default in this case
@@ -49,9 +52,9 @@ case class ChoiceBranchMap(
 
   def defaultUnparser = unmappedDefault
 
-  def childProcessors = lookupTable.values.toSeq ++ unmappedDefault
+  def childProcessors = lookupTable.values.iterator.asScala.toVector ++ 
unmappedDefault
 
-  def keys = lookupTable.keys
+  def keys = lookupTable.keySet.asScala
 }
 
 /*
diff --git 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ElementKindParsers.scala
 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ElementKindParsers.scala
index c04622ae6..6202a5eab 100644
--- 
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ElementKindParsers.scala
+++ 
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/processors/parsers/ElementKindParsers.scala
@@ -18,9 +18,11 @@
 package org.apache.daffodil.runtime1.processors.parsers
 
 import java.math.{ BigInteger => JBigInt }
+import scala.collection.JavaConverters._
 
 import org.apache.daffodil.lib.util.Logger
 import org.apache.daffodil.lib.util.Maybe
+import org.apache.daffodil.lib.util.ProperlySerializableMap._
 import org.apache.daffodil.runtime1.processors.ChoiceDispatchKeyEv
 import org.apache.daffodil.runtime1.processors.DelimiterParseEv
 import org.apache.daffodil.runtime1.processors.EscapeSchemeParseEv
@@ -140,7 +142,7 @@ class ChoiceBranchEmptyParser(val context: RuntimeData) 
extends PrimParserNoData
 
 abstract class ChoiceDispatchCombinatorParserBase(
   rd: TermRuntimeData,
-  dispatchBranchKeyMap: Map[String, (Parser, Boolean)],
+  dispatchBranchKeyMap: ProperlySerializableMap[String, (Parser, Boolean)],
   dispatchKeyRangeMap: Vector[(RangeBound, RangeBound, Parser, Boolean)],
 ) extends CombinatorParser(rd) {
 
@@ -149,7 +151,8 @@ abstract class ChoiceDispatchCombinatorParserBase(
   override lazy val runtimeDependencies = Vector()
 
   override def childProcessors =
-    dispatchBranchKeyMap.values.map(_._1).toVector ++ 
dispatchKeyRangeMap.map(_._3)
+    dispatchBranchKeyMap.values.iterator.asScala.map(_._1).toVector ++
+      dispatchKeyRangeMap.map(_._3)
 
   /*
    * Returns a value if pstate.processorStatus eq Success
@@ -165,8 +168,8 @@ abstract class ChoiceDispatchCombinatorParserBase(
 
         val parserOpt1 = dispatchBranchKeyMap.get(key)
         val parserOpt2 =
-          if (parserOpt1.isDefined) {
-            parserOpt1
+          if (parserOpt1 != null) {
+            Some(parserOpt1)
           } else {
             if (!dispatchKeyRangeMap.isEmpty) {
               try {
@@ -235,7 +238,7 @@ abstract class ChoiceDispatchCombinatorParserBase(
 class ChoiceDispatchCombinatorParser(
   rd: TermRuntimeData,
   dispatchKeyEv: ChoiceDispatchKeyEv,
-  dispatchBranchKeyMap: Map[String, (Parser, Boolean)],
+  dispatchBranchKeyMap: ProperlySerializableMap[String, (Parser, Boolean)],
   dispatchKeyRangeMap: Vector[(RangeBound, RangeBound, Parser, Boolean)],
 ) extends ChoiceDispatchCombinatorParserBase(rd, dispatchBranchKeyMap, 
dispatchKeyRangeMap) {
   override def computeDispatchKey(pstate: PState): Maybe[String] = Maybe(

Reply via email to