This is an automated email from the ASF dual-hosted git repository. slawrence pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git
The following commit(s) were added to refs/heads/master by this push: new 1267898 Fix deep stack sizes when serializing some schemas 1267898 is described below commit 126789891ed5e834037198e5aa59e1f6b34dcb04 Author: Steve Lawrence <slawre...@apache.org> AuthorDate: Mon Mar 9 09:19:46 2020 -0400 Fix deep stack sizes when serializing some schemas The "parents" val in a DPathCompileInfo is a backpointer to all DPathCompileInfo's that reference it. The problem with this is that when elements are shared, these backpointers create a highly connected graph that requires a large stack to serialize using the default java serialization as it jumps around parents and children. To avoid this large stack requirement, we make the parents backpointer transient. This prevents jumping back up to parents during serialization and results in only needing a stack depth relative to the schema depth. Once all that serialization is completed and all the DPathCompileInfo's are serialized, we then manually traverse all the DPathCompileInfo's again and serialize the parent sequences (via the serailizeParents method). Because all the DPathCompileInfo's are already serialized, this just serializes the Sequence objects and the stack depth is again relative to the schema depth. On complex schemas, this saw an order of magnitude reduction in stack size during serialization. DAFFODIL-2283 --- .../scala/org/apache/daffodil/util/Serialize.scala | 1 - .../apache/daffodil/dsom/CompiledExpression1.scala | 54 +++++++++++++++++++++- .../daffodil/processors/SchemaSetRuntimeData.scala | 10 ++++ 3 files changed, 62 insertions(+), 3 deletions(-) diff --git a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala index 1a94fa8..20bb92e 100644 --- a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala +++ b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala @@ -41,7 +41,6 @@ trait PreSerialization extends Serializable { protected final def serializeObject(out: java.io.ObjectOutputStream) { try { - // println("serializing " + Misc.getNameFromClass(this)) // good place for a breakpoint preSerialization out.defaultWriteObject() } catch { diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala index da9c5a4..6a6c985 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala @@ -212,7 +212,47 @@ class DPathCompileInfo( extends ImplementsThrowsSDE with PreSerialization with HasSchemaFileLocation { - lazy val parents = parentsArg + + /** + * This "parents" val is a backpointer to all DPathCompileInfo's that + * reference this DPathCompileInfo. The problem with this is that when + * elements are shared, these backpointers create a highly connected graph + * that requires a large stack to serialize using the default java + * serialization as it jumps around parents and children. To avoid this large + * stack requirement, we make the parents backpointer transient. This + * prevents jumping back up to parents during serialization and results in + * only needing a stack depth relative to the schema depth. Once all that + * serialization is completed and all the DPathCompileInfo's are serialized, + * we then manually traverse all the DPathCompileInfo's again and serialize + * the parent sequences (via the serailizeParents method). Because all the + * DPathCompileInfo's are already serialized, this just serializes the + * Sequence objects and the stack depth is again relative to the schema + * depth. + */ + @transient + val parents = parentsArg + + def serializeParents(oos: java.io.ObjectOutputStream): Unit = { + oos.writeObject(parents) + } + + def deserializeParents(ois: java.io.ObjectInputStream): Unit = { + val deserializedParents = ois.readObject().asInstanceOf[Seq[DPathCompileInfo]] + + // Set the parents field via reflection so that it can be a val rather than a var + val clazz = this.getClass + val parentsField = try { + clazz.getDeclaredField("parents") + } catch { + case e: java.lang.NoSuchFieldException => + clazz.getSuperclass.getDeclaredField("parents") + } + parentsField.setAccessible(true) + parentsField.set(this, deserializedParents) // set the value to the deserialized value + parentsField.setAccessible(false) + } + + lazy val variableMap = variableMapArg @@ -222,7 +262,6 @@ class DPathCompileInfo( lazy val typeCalcMap: TypeCalcMap = typeCalcMapArg.map(identity) override def preSerialization: Any = { - parents variableMap } @@ -301,6 +340,17 @@ class DPathElementCompileInfo( unqualifiedPathStepPolicy, typeCalcMap, lexicalContextRuntimeData) { + override def serializeParents(oos: java.io.ObjectOutputStream): Unit = { + super.serializeParents(oos) + elementChildrenCompileInfo.foreach { _.serializeParents(oos) } + } + + override def deserializeParents(ois: java.io.ObjectInputStream): Unit = { + super.deserializeParents(ois) + elementChildrenCompileInfo.foreach { _.deserializeParents(ois) } + } + + lazy val elementChildrenCompileInfo = elementChildrenCompileInfoArg override def preSerialization: Any = { diff --git a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala index b974cb7..72e0768 100644 --- a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala +++ b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala @@ -40,4 +40,14 @@ final class SchemaSetRuntimeData( override def schemaFileLocation = elementRuntimeData.schemaFileLocation override def SDE(str: String, args: Any*) = elementRuntimeData.SDE(str, args) + private def writeObject(oos: java.io.ObjectOutputStream): Unit = { + oos.defaultWriteObject() + elementRuntimeData.dpathElementCompileInfo.serializeParents(oos) + } + + private def readObject(ois: java.io.ObjectInputStream): Unit = { + ois.defaultReadObject() + elementRuntimeData.dpathElementCompileInfo.deserializeParents(ois) + } + }