This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-daffodil.git
The following commit(s) were added to refs/heads/master by this push:
new 1267898 Fix deep stack sizes when serializing some schemas
1267898 is described below
commit 126789891ed5e834037198e5aa59e1f6b34dcb04
Author: Steve Lawrence <[email protected]>
AuthorDate: Mon Mar 9 09:19:46 2020 -0400
Fix deep stack sizes when serializing some schemas
The "parents" val in a DPathCompileInfo is a backpointer to all
DPathCompileInfo's that reference it. The problem with this is that when
elements are shared, these backpointers create a highly connected graph
that requires a large stack to serialize using the default java
serialization as it jumps around parents and children. To avoid this
large stack requirement, we make the parents backpointer transient. This
prevents jumping back up to parents during serialization and results in
only needing a stack depth relative to the schema depth. Once all that
serialization is completed and all the DPathCompileInfo's are
serialized, we then manually traverse all the DPathCompileInfo's again
and serialize the parent sequences (via the serailizeParents method).
Because all the DPathCompileInfo's are already serialized, this just
serializes the Sequence objects and the stack depth is again relative to
the schema depth.
On complex schemas, this saw an order of magnitude reduction in stack
size during serialization.
DAFFODIL-2283
---
.../scala/org/apache/daffodil/util/Serialize.scala | 1 -
.../apache/daffodil/dsom/CompiledExpression1.scala | 54 +++++++++++++++++++++-
.../daffodil/processors/SchemaSetRuntimeData.scala | 10 ++++
3 files changed, 62 insertions(+), 3 deletions(-)
diff --git
a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala
index 1a94fa8..20bb92e 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/util/Serialize.scala
@@ -41,7 +41,6 @@ trait PreSerialization extends Serializable {
protected final def serializeObject(out: java.io.ObjectOutputStream) {
try {
- // println("serializing " + Misc.getNameFromClass(this)) // good place
for a breakpoint
preSerialization
out.defaultWriteObject()
} catch {
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
index da9c5a4..6a6c985 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/dsom/CompiledExpression1.scala
@@ -212,7 +212,47 @@ class DPathCompileInfo(
extends ImplementsThrowsSDE with PreSerialization
with HasSchemaFileLocation {
- lazy val parents = parentsArg
+
+ /**
+ * This "parents" val is a backpointer to all DPathCompileInfo's that
+ * reference this DPathCompileInfo. The problem with this is that when
+ * elements are shared, these backpointers create a highly connected graph
+ * that requires a large stack to serialize using the default java
+ * serialization as it jumps around parents and children. To avoid this large
+ * stack requirement, we make the parents backpointer transient. This
+ * prevents jumping back up to parents during serialization and results in
+ * only needing a stack depth relative to the schema depth. Once all that
+ * serialization is completed and all the DPathCompileInfo's are serialized,
+ * we then manually traverse all the DPathCompileInfo's again and serialize
+ * the parent sequences (via the serailizeParents method). Because all the
+ * DPathCompileInfo's are already serialized, this just serializes the
+ * Sequence objects and the stack depth is again relative to the schema
+ * depth.
+ */
+ @transient
+ val parents = parentsArg
+
+ def serializeParents(oos: java.io.ObjectOutputStream): Unit = {
+ oos.writeObject(parents)
+ }
+
+ def deserializeParents(ois: java.io.ObjectInputStream): Unit = {
+ val deserializedParents =
ois.readObject().asInstanceOf[Seq[DPathCompileInfo]]
+
+ // Set the parents field via reflection so that it can be a val rather
than a var
+ val clazz = this.getClass
+ val parentsField = try {
+ clazz.getDeclaredField("parents")
+ } catch {
+ case e: java.lang.NoSuchFieldException =>
+ clazz.getSuperclass.getDeclaredField("parents")
+ }
+ parentsField.setAccessible(true)
+ parentsField.set(this, deserializedParents) // set the value to the
deserialized value
+ parentsField.setAccessible(false)
+ }
+
+
lazy val variableMap =
variableMapArg
@@ -222,7 +262,6 @@ class DPathCompileInfo(
lazy val typeCalcMap: TypeCalcMap = typeCalcMapArg.map(identity)
override def preSerialization: Any = {
- parents
variableMap
}
@@ -301,6 +340,17 @@ class DPathElementCompileInfo(
unqualifiedPathStepPolicy,
typeCalcMap, lexicalContextRuntimeData) {
+ override def serializeParents(oos: java.io.ObjectOutputStream): Unit = {
+ super.serializeParents(oos)
+ elementChildrenCompileInfo.foreach { _.serializeParents(oos) }
+ }
+
+ override def deserializeParents(ois: java.io.ObjectInputStream): Unit = {
+ super.deserializeParents(ois)
+ elementChildrenCompileInfo.foreach { _.deserializeParents(ois) }
+ }
+
+
lazy val elementChildrenCompileInfo = elementChildrenCompileInfoArg
override def preSerialization: Any = {
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala
index b974cb7..72e0768 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/SchemaSetRuntimeData.scala
@@ -40,4 +40,14 @@ final class SchemaSetRuntimeData(
override def schemaFileLocation = elementRuntimeData.schemaFileLocation
override def SDE(str: String, args: Any*) = elementRuntimeData.SDE(str, args)
+ private def writeObject(oos: java.io.ObjectOutputStream): Unit = {
+ oos.defaultWriteObject()
+ elementRuntimeData.dpathElementCompileInfo.serializeParents(oos)
+ }
+
+ private def readObject(ois: java.io.ObjectInputStream): Unit = {
+ ois.defaultReadObject()
+ elementRuntimeData.dpathElementCompileInfo.deserializeParents(ois)
+ }
+
}