tuxji commented on a change in pull request #437: URL: https://github.com/apache/incubator-daffodil/pull/437#discussion_r512888336
########## File path: daffodil-core/src/test/scala/org/apache/daffodil/infoset/TestInfosetFree.scala ########## @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.infoset + +import org.junit.Assert._ +import org.junit.Test + +import org.apache.daffodil.compiler.Compiler +import org.apache.daffodil.compiler.ProcessorFactory +import org.apache.daffodil.io.InputSourceDataInputStream +import org.apache.daffodil.processors.DataProcessor +import org.apache.daffodil.processors.parsers.PState +import org.apache.daffodil.processors.unparsers.UStateMain +import org.apache.daffodil.util.SchemaUtils + +object TestInfosetFree { + + /** + * Compiles an infoset with infoset releasing disabled. By disabling the + * releasing, the Scala infoset is marked with attributes signifying which + * elements *would* have been freed if we didn't disable it. This allows for + * a reliable way to validate which elements were freed. + * + * This first parses and unparses data, without freeing any infoset + * nodes--only marking as described above. It then walks both infosets, + * configured to add information to the infoset about which elements were + * freed. We validate that both the parse and unparse results are the + * same--any infoset elements freed during a parse should also be freed + * during unparse. We then return the infoset for the test to compare against + * the expected value. + */ + def test( + schema: scala.xml.Elem, + bytes: Array[Byte]): scala.xml.Node = { + + val compiler = Compiler() + .withTunable("releaseUnneededInfoset", "false") + + val pf = compiler.compileNode(schema).asInstanceOf[ProcessorFactory] Review comment: I don't think the .asInstanceOf[ProcessorFactory] is necessary, but I could be wrong. ########## File path: daffodil-core/src/test/scala/org/apache/daffodil/infoset/TestInfosetFree.scala ########## @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.infoset + +import org.junit.Assert._ +import org.junit.Test + +import org.apache.daffodil.compiler.Compiler +import org.apache.daffodil.compiler.ProcessorFactory +import org.apache.daffodil.io.InputSourceDataInputStream +import org.apache.daffodil.processors.DataProcessor +import org.apache.daffodil.processors.parsers.PState +import org.apache.daffodil.processors.unparsers.UStateMain +import org.apache.daffodil.util.SchemaUtils + +object TestInfosetFree { + + /** + * Compiles an infoset with infoset releasing disabled. By disabling the + * releasing, the Scala infoset is marked with attributes signifying which + * elements *would* have been freed if we didn't disable it. This allows for + * a reliable way to validate which elements were freed. + * + * This first parses and unparses data, without freeing any infoset + * nodes--only marking as described above. It then walks both infosets, + * configured to add information to the infoset about which elements were + * freed. We validate that both the parse and unparse results are the + * same--any infoset elements freed during a parse should also be freed + * during unparse. We then return the infoset for the test to compare against + * the expected value. + */ + def test( + schema: scala.xml.Elem, + bytes: Array[Byte]): scala.xml.Node = { + + val compiler = Compiler() + .withTunable("releaseUnneededInfoset", "false") + + val pf = compiler.compileNode(schema).asInstanceOf[ProcessorFactory] + if (pf.isError) { + val msgs = pf.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("pf compile errors: " + msgs) + } + pf.sset.root.erd.preSerialization // force evaluation of all compile-time constructs + val dp = pf.onPath("/").asInstanceOf[DataProcessor] + if (dp.isError) { + val msgs = dp.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("dp compile errors: " + msgs) + } + + // Parse the data, note that we do no set showFreedInfo here because Review comment: *no -> not ########## File path: daffodil-core/src/test/scala/org/apache/daffodil/infoset/TestInfosetFree.scala ########## @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.infoset + +import org.junit.Assert._ +import org.junit.Test + +import org.apache.daffodil.compiler.Compiler +import org.apache.daffodil.compiler.ProcessorFactory +import org.apache.daffodil.io.InputSourceDataInputStream +import org.apache.daffodil.processors.DataProcessor +import org.apache.daffodil.processors.parsers.PState +import org.apache.daffodil.processors.unparsers.UStateMain +import org.apache.daffodil.util.SchemaUtils + +object TestInfosetFree { + + /** + * Compiles an infoset with infoset releasing disabled. By disabling the + * releasing, the Scala infoset is marked with attributes signifying which + * elements *would* have been freed if we didn't disable it. This allows for + * a reliable way to validate which elements were freed. + * + * This first parses and unparses data, without freeing any infoset + * nodes--only marking as described above. It then walks both infosets, + * configured to add information to the infoset about which elements were + * freed. We validate that both the parse and unparse results are the + * same--any infoset elements freed during a parse should also be freed + * during unparse. We then return the infoset for the test to compare against + * the expected value. + */ + def test( + schema: scala.xml.Elem, + bytes: Array[Byte]): scala.xml.Node = { + + val compiler = Compiler() + .withTunable("releaseUnneededInfoset", "false") + + val pf = compiler.compileNode(schema).asInstanceOf[ProcessorFactory] + if (pf.isError) { + val msgs = pf.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("pf compile errors: " + msgs) + } + pf.sset.root.erd.preSerialization // force evaluation of all compile-time constructs + val dp = pf.onPath("/").asInstanceOf[DataProcessor] + if (dp.isError) { + val msgs = dp.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("dp compile errors: " + msgs) + } + + // Parse the data, note that we do no set showFreedInfo here because + // DINodes aren't freed until *after* the infoset walker walks them. So the + // maybeFreed state hasn't been set yet when the infoset outputter gets the + // events. We must walk the infoset again after the parse is complete + val parseInput = InputSourceDataInputStream(bytes) + val parseOutputter = new ScalaXMLInfosetOutputter() + val parseResult = dp.parse(parseInput, parseOutputter) + if (parseResult.isError) { + val msgs = parseResult.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("parse errors: " + msgs) + } + + val unparseInputter = new ScalaXMLInfosetInputter(parseOutputter.getResult) + val unparseOutput = new org.apache.commons.io.output.NullOutputStream + val unparseResult = dp.unparse(unparseInputter, unparseOutput) + if (unparseResult.isError) { + val msgs = unparseResult.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("unparse errors: " + msgs) + } + + // now walk the parse and unparse infosets and convert them to Scala XML + // with the showFreedInfoset set + + def docToXML(doc: DIDocument): scala.xml.Node = { + val detailedOutputter = new ScalaXMLInfosetOutputter( + showFormatInfo = false, + showFreedInfo = true) + + val infosetWalker = InfosetWalker( + doc.asInstanceOf[DIElement], + detailedOutputter, + walkHidden = true, // let's ensure any hidden elements are free + ignoreBlocks = true, // there should be no blocks, but ignore them just to be sure + releaseUnneededInfoset = false) // do not free the infoset + infosetWalker.walk(lastWalk = true) + + detailedOutputter.getResult() + } + + val parseDoc = parseResult.resultState.asInstanceOf[PState].infoset.asInstanceOf[DIDocument] + val unparseDoc = unparseResult.resultState.asInstanceOf[UStateMain].documentElement + + val parseXML = docToXML(parseDoc) + val unparseXML = docToXML(unparseDoc) + + if (parseXML.toString != unparseXML.toString) { + fail("parse and unparse XML did not match, infoset not freed the same") + } + + parseXML + } +} + +class TestInfosetFree { + + @Test def testInfosetFree1(): Unit = { + val testSchema = SchemaUtils.dfdlTestSchema( + <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />, + <dfdl:format ref="tns:GeneralFormat" />, + <xs:element name="root"> + <xs:complexType> + <xs:sequence> + <xs:element name="e" type="xs:int" maxOccurs="unbounded" + dfdl:length="1" dfdl:lengthKind="explicit" /> + </xs:sequence> + </xs:complexType> + </xs:element>) + + val actualXML = TestInfosetFree.test(testSchema, "123".getBytes) + + // all elements freed, including array elements and the array itself + val expectedXML = + <root freed="self" xmlns="http://example.com"> + <e freed="self+array">1</e> + <e freed="self+array">2</e> + <e freed="self+array">3</e> + </root> + + assertEquals(scala.xml.Utility.trim(expectedXML).toString, actualXML.toString) + } + + @Test def testInfosetFree2(): Unit = { + val testSchema = SchemaUtils.dfdlTestSchema( + <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />, + <dfdl:format ref="tns:GeneralFormat" />, + <xs:group name="hidden"> + <xs:sequence> + <xs:element name="e" type="xs:int" dfdl:length="1" dfdl:lengthKind="explicit" + dfdl:outputValueCalc="{ 1 }" /> + </xs:sequence> + </xs:group> + <xs:element name="root"> + <xs:complexType> + <xs:sequence> + <xs:sequence dfdl:hiddenGroupRef="tns:hidden" /> + </xs:sequence> + </xs:complexType> + </xs:element>) + + val actualXML = TestInfosetFree.test(testSchema, "1".getBytes) + + // all elements freed, including hidden elements + val expectedXML = + <root freed="self" xmlns="http://example.com"> + <e freed="self">1</e> + </root> + + assertEquals(scala.xml.Utility.trim(expectedXML).toString, actualXML.toString) + } + + @Test def testInfosetFree3(): Unit = { + val testSchema = SchemaUtils.dfdlTestSchema( + <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />, + <dfdl:format ref="tns:GeneralFormat" />, + <xs:element name="root"> + <xs:complexType> + <xs:sequence> + <xs:element name="fieldLen" type="xs:int" dfdl:lengthKind="explicit" dfdl:length="1" /> + <xs:element name="field" type="xs:int" maxOccurs="unbounded" + dfdl:lengthKind="explicit" dfdl:length="{ ../tns:fieldLen }" /> + </xs:sequence> + </xs:complexType> + </xs:element>) + + val actualXML = TestInfosetFree.test(testSchema, "1123".getBytes) + + // all elements freed, execpted for fieldLen because it is used in an expression + val expectedXML = + <root freed="self" xmlns="http://example.com"> + <fieldLen>1</fieldLen> + <field freed="self+array">1</field> + <field freed="self+array">2</field> + <field freed="self+array">3</field> + </root> + + assertEquals(scala.xml.Utility.trim(expectedXML).toString, actualXML.toString) + } + + @Test def testInfosetFree4(): Unit = { + val testSchema = SchemaUtils.dfdlTestSchema( + <xs:include schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />, + <dfdl:format ref="tns:GeneralFormat" />, + <xs:element name="root"> + <xs:complexType> + <xs:sequence> + <xs:element name="fieldCount" type="xs:int" dfdl:lengthKind="explicit" dfdl:length="1" + dfdl:outputValueCalc="{ fn:count(../tns:field) }" /> + <xs:element name="field" type="xs:int" maxOccurs="unbounded" + dfdl:occursCountKind="expression" dfdl:occursCount="{ ../tns:fieldCount }" + dfdl:lengthKind="explicit" dfdl:length="1" /> + </xs:sequence> + </xs:complexType> + </xs:element>) + + val actualXML = TestInfosetFree.test(testSchema, "3123".getBytes) + + // Most elements are not freed. Both fieldCount and field are used in an + // expression. Even though only the count of the field array is needed, and + // we could theoretically free the field elements but no the array, they Review comment: *no -> not ########## File path: daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/processors/unparsers/ElementUnparser.scala ########## @@ -515,35 +561,60 @@ trait OVCStartEndStrategy Assert.invariant(endEv.isEnd && endEv.erd == erd) val e = new DISimple(erd) - state.currentInfosetNode.asComplex.addChild(e, state.tunable) // Remove any state that was set by what created this event. Later // code asserts that OVC elements do not have a value e.resetValue e } else { // Event was optional and didn't exist, create a new InfosetElement and add it val e = new DISimple(erd) - state.currentInfosetNode.asComplex.addChild(e, state.tunable) e } } else { // Event was hidden and will never exist, create a new InfosetElement and add it val e = new DISimple(erd) e.setHidden() - state.currentInfosetNode.asComplex.addChild(e, state.tunable) e } - val e = One(elem) - state.currentInfosetNodeStack.push(e) + // We are about to add a new OVC child to this complex element. Before we + // do that, if the last child added to this complex is a DIArray, that + // implies that the DIArray will have no more children added and should be + // marked as final, and we can attempt to free that array. + val parentNode = state.currentInfosetNode + val parentComplex = parentNode.asComplex + val lastChildMaybe = parentComplex.maybeLastChild + if (lastChildMaybe.isDefined) { + val lastChild = lastChildMaybe.get + if (lastChild.isArray) { + lastChild.isFinal = true + parentComplex.freeChildIfNoLongerNeeded(parentComplex.numChildren - 1, state.releaseUnneededInfoset) + } + } + + parentComplex.addChild(ovcElem, state.tunable) + state.currentInfosetNodeStack.push(One(ovcElem)) } protected final override def unparseEnd(state: UState): Unit = { - state.currentInfosetNodeStack.pop - // if an OVC element existed, the start AND end events were consumed in // unparseBegin. No need to advance the cursor here. + // ovcElem is finished, free it if possible. OVC elements are not allow in Review comment: *allow -> allowed ########## File path: daffodil-core/src/test/scala/org/apache/daffodil/infoset/TestInfosetFree.scala ########## @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.infoset + +import org.junit.Assert._ +import org.junit.Test + +import org.apache.daffodil.compiler.Compiler +import org.apache.daffodil.compiler.ProcessorFactory +import org.apache.daffodil.io.InputSourceDataInputStream +import org.apache.daffodil.processors.DataProcessor +import org.apache.daffodil.processors.parsers.PState +import org.apache.daffodil.processors.unparsers.UStateMain +import org.apache.daffodil.util.SchemaUtils + +object TestInfosetFree { + + /** + * Compiles an infoset with infoset releasing disabled. By disabling the + * releasing, the Scala infoset is marked with attributes signifying which + * elements *would* have been freed if we didn't disable it. This allows for + * a reliable way to validate which elements were freed. + * + * This first parses and unparses data, without freeing any infoset + * nodes--only marking as described above. It then walks both infosets, + * configured to add information to the infoset about which elements were + * freed. We validate that both the parse and unparse results are the + * same--any infoset elements freed during a parse should also be freed + * during unparse. We then return the infoset for the test to compare against + * the expected value. + */ + def test( + schema: scala.xml.Elem, + bytes: Array[Byte]): scala.xml.Node = { + + val compiler = Compiler() + .withTunable("releaseUnneededInfoset", "false") + + val pf = compiler.compileNode(schema).asInstanceOf[ProcessorFactory] + if (pf.isError) { + val msgs = pf.getDiagnostics.map { _.getMessage() }.mkString("\n") + fail("pf compile errors: " + msgs) + } + pf.sset.root.erd.preSerialization // force evaluation of all compile-time constructs + val dp = pf.onPath("/").asInstanceOf[DataProcessor] Review comment: Likewise, is the asInstanceOf really needed here? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
