andrewjc2000 commented on a change in pull request #394: URL: https://github.com/apache/incubator-daffodil/pull/394#discussion_r455462202
########## File path: daffodil-core/src/test/scala/org/apache/daffodil/dsom/walker/TestWalker.scala ########## @@ -0,0 +1,335 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.daffodil.dsom.walker + +import scala.collection.mutable.ListBuffer + +/** + * This represents all possible types a Field could be; the View Mixin is pattern matched to one + * of these in TestNode + */ +object FieldType extends Enumeration { + type FieldType = Value + val RecordType, ChoiceType, ArrayType, DateTimeType, DateType, TimeType, BooleanType, ByteType, + ShortType, IntType, LongType, BigIntType, FloatType, DoubleType, StringType = Value +} + +import org.apache.daffodil.dsom.walker.FieldType._ + +/** + * Represents the DataType of a TestField; note that all of the Simple types manifest as + * a SimpleDataType with a single FieldType containing the FieldType enum for the simple. + * @param fieldType the particular FieldType Enum that this DataType backs. + */ +class DataType(val fieldType: FieldType.Value) +case class RecordDataType(var childSchema: TestSchema) extends DataType(RecordType) +case class ChoiceDataType(var possibleSubTypes: List[DataType]) extends DataType(ChoiceType) +case class ArrayDataType(var elementType: DataType) extends DataType(ArrayType) +case class SimpleDataType(simpleType: FieldType) extends DataType(simpleType) + +/** + * A field within a TestSchema + * @param fieldName name of the field + * @param dataType dataType of the field + */ +class TestField(val fieldName: String, val dataType: DataType) + +/** + * Base Schema object for this test class. The only part that is really used is the field list; + * there are some helper methods based around the list + * @param name the name for this TestSchema + * @param namespace the target namespace of this TestSchema + * @param fields the list of fields that define this TestSchema + */ +class TestSchema(val name: String, val namespace: String, val fields: List[TestField]) { + def getField(name: String): Option[TestField] = + fields.filter(field => field.fieldName == name) match { + case List(result) => Some(result) + case _ => None + } + val fieldCount: Int = fields.size + val fieldNames: List[String] = fields.map(field => field.fieldName) +} + + +/** + * @param fieldName the name for the RecordField + * @param dataType the DataType of this RecordField + */ +class OptionalTestField(fieldName: String, dataType: DataType) + extends TestField(fieldName, dataType) { + // this is the easiest way to construct it; essentially a "copy constructor" except to denote + // that we want to treat it as this special optional subclass. + def this(recordField: TestField) = this(recordField.fieldName, recordField.dataType) +} + +/** + * Concrete implementation of the AbstractDSOMWalker abstract class. + * This class produces a TestSchema that is intended to match the original DFDL file. + * + * The TestSchema is built in 3 primary stages: + * 1) A tree of TestNodes is created as the DFDL file is walked; this walk is performed + * through the various event handlers defined in the parent abstract class. + * 2) The tree of TestNodes undergoes some post-processing, mainly to remove redundant Record wrappers. + * 3) The tree of TestNodes is converted into a TestSchema; it is walked recursively within this class. + */ +class TestWalker extends AbstractDSOMWalker { + + // this is the critical data structure for managing the temporary TestNodes that are created + // when the Schema is initially walked. This will then be converted to the actual TestSchema. + private var objectStack: List[TestNode] = List() + + // this will contain the final TestSchema after step 3 as described above is complete. + private var result: Option[TestSchema] = None + def getResult: TestSchema = result.orNull + def stringRep: String = if (result.isDefined) result.get.toString else "" + + /** + * Every event handler for onXBegin is routed to this helper method. Individual properties of + * a given element are not actually handled here; these properties are extracted when a + * TestNode is instantiated + * @param dsomElement the particular element that was encountered. Could be essentially any Term element. + */ + private def onNodeBegin(dsomElement: TermView): Unit = { + val newNode: TestNode = new TestNode(dsomElement) + // we need add the new node as a new child of whatever is currently at the top of the stack + objectStack.head.addChild(newNode) + // however, we also add the node to the stack itself! We need to be able to add children to it + // if it is, say, another record or array. + objectStack = newNode +: objectStack + } + + /** + * Every event handler for onXEnd is routed to this helper method. We don't need to do anything other + * than remove the top element of the stack; this should be the current element. + */ + private def onNodeEnd(): Unit = objectStack = objectStack.tail + + override def onTermBegin(termElement: TermView): Unit = termElement match { + case _: SequenceView | _: ChoiceView | _: ElementBaseView => onNodeBegin(termElement) + case _ => + } + + override def onTermEnd(termElement: TermView): Unit = termElement match { + case _: SequenceView | _: ChoiceView | _: ElementBaseView => onNodeEnd() + case _ => + } + + override def onTypeBegin(typeElement: TypeView): Unit = {} + + override def onTypeEnd(typeElement: TypeView): Unit = {} + + override def onWalkBegin(root: RootView): Unit = objectStack = List(new TestNode(root)) + + override def onWalkEnd(root: RootView): Unit = { + // After the walk is over, we perform postProcessing and then convert the TestNode tree + // into a TestSchema. Also, if we are in dev. mode, we print out the TestNode tree + if (!RecordWalker.PRODUCTION_MODE) println(objectStack.head) + postProcessing() + if (!RecordWalker.PRODUCTION_MODE) println(objectStack.head) + result = Some(getRecordSchema(objectStack.head)) + } + + /** + * Determines if a Record is "extra"; that is, if it should be replaced with + * its list of children SchemaNodes within whatever parent TestNode it's a part of + * @param childNode the node to be considered + * @return true if this node is a Record and has no name, false otherwise + */ + private def isExtraRecord(parentNode: TestNode, childNode: TestNode): Boolean = { + // any no-name nodes with no children are immediately removed + (childNode.name.isEmpty && childNode.children.isEmpty) || { + parentNode.fieldType match { + case RecordType | ArrayType => + childNode.fieldType match { + // This removes extra wrapper records around children of records or arrays + // usually used to remove things like the DFDL complexType, simpleType elements + case RecordType => childNode.name.isEmpty + case _ => false + } + // Currently, all double choices are removed. This was mainly done to make GroupRefs work + // for the JPEG Schema, but may not be the correct approach for all cases. + case ChoiceType => + childNode.fieldType match { + case ChoiceType => true + case _ => false + } + case _ => false + } + } + + } + + /** + * Recursively replace any SchemaNodes that are of type record and do not have + * a name attribute with their children. These usually represent unnecessary wrapper nodes or empty + * records with no elements. + * + * Given a TestNode, we copy over all the children to a list, and initialize another empty list. + * Then, for every Node in the first list, if it is considered "extra", + * then its *children* are added to the second list. + * Otherwise, the Node itself is added. + * + * Then, we let the first list equal the second and we repeat this process + * until none of the children are "extra". + * This is necessary because sometimes we can have towers of extra Nodes that would + * never get resolved if we just took care of 1 or 2 layers; all must be dealt with at once. + * + * Then, the current TestNode's children list is replaced with this filtered one, + * and this method is called on all the items of that list. + * @param schemaNode the current Node undergoing the algorithm described above + */ + private def removeExtraRecords(schemaNode: TestNode): Unit = { + schemaNode.fieldType match { + // Only SchemaNodes that are these 3 types should be able to have child nodes. + case RecordType | ChoiceType | ArrayType => + if (schemaNode.children.exists(child => isExtraRecord(schemaNode, child))) { + var currChildren: ListBuffer[TestNode] = new ListBuffer[TestNode]() + for (child <- schemaNode.children) currChildren += child + while (currChildren.exists(child => isExtraRecord(schemaNode, child))) { + val toCopy: ListBuffer[TestNode] = new ListBuffer[TestNode]() + for (child <- currChildren) { + if (isExtraRecord(schemaNode, child)) { + for (subChild <- child.children) toCopy += subChild + } else toCopy += child + } + currChildren = toCopy + } + schemaNode.children = currChildren.toList + } + case _ => + } + // call this helper method on each of this Nodes's children. There is no infinite recursion because + // eventually a sub-child will not have any more children. This of course assumes that we are dealing + // with a rooted tree (there shouldn't be any cycles), which should be the case. + for (child <- schemaNode.children) removeExtraRecords(child) + } + + /** + * Perform postProcessing; this happens *after* the TestNode tree is created but *before* that tree + * gets converted to a TestSchema + */ + private def postProcessing(): Unit = { + removeExtraRecords(objectStack.head) + } + + private def getSimpleType(schemaNode: TestNode): FieldType = { + if (!schemaNode.isSimple) throw new IllegalArgumentException(s"Node $schemaNode is not a simple type!") + schemaNode.fieldType + } + + private def nodeToField(schemaNode: TestNode): TestField = { + // by default, if this node doesn't have a name, its data type is used as the field name. + // This should only ever be the case for anonymous choices. + val recordField: TestField = new TestField( + schemaNode.name.getOrElse(schemaNode.fieldType.toString), + schemaNodeToDataType(schemaNode) + ) + if (schemaNode.isOptional) new OptionalTestField(recordField) else recordField + } + + private def newRecordSchema(schemaNode: TestNode, fieldList: List[TestField]): TestSchema = { + new TestSchema(schemaNode.name.getOrElse(""), schemaNode.namespace, fieldList) + } + + /** + * Helper method to specifically convert a TestNode known to be a Record into a TestSchema. + * Each Child of the TestNode becomes a Field of the TestSchema + * @param schemaNode the node to convert to a TestSchema + * @return a tuple containing the finalized TestSchema and its corresponding Record Data Type + */ + private def schemaNodeToRecordType(schemaNode: TestNode): RecordDataType = { + val fieldList: ListBuffer[TestField] = ListBuffer() + for (child <- schemaNode.children) fieldList += nodeToField(child) + val TestSchema: TestSchema = newRecordSchema(schemaNode, fieldList.toList) + RecordDataType(TestSchema) + } + + /** + * Helper method to convert a TestNode known to be a choice into a Choice data type. + * This is able to handle a DFDL schema in which either named elements are directly sub-members of + * DFDL choices or if they are embedded in another element (which corresponds to being in a Record). + * In the end, if they are not already inside a Record, then they are put there. Choices cannot + * have fields, only possible sub-types, so anything that would be a "field" has a wrapper Record + * put around it. + * @param schemaNode the node to convert to a Choice data type + * @return a Choice data type as described above + */ + private def choiceNodeToChoiceSchema(schemaNode: TestNode): DataType = { + val childList: ListBuffer[DataType] = new ListBuffer() + for (child <- schemaNode.children) { + if (child.name.isEmpty) { + childList += schemaNodeToRecordType(child) + } else { + // In the case where the child has a name, we embed it in an additional TestSchema, + // since we must add some DataType rather than a Field. + val fieldList: ListBuffer[TestField] = ListBuffer() + fieldList += nodeToField(child) + val choiceOption: TestSchema = newRecordSchema(child, fieldList.toList) + childList += RecordDataType(choiceOption) + } + } + ChoiceDataType(childList.toList) + } + + /** + * Local helper method to appropriately convert a TestNode into an appropriate Record + * Data Type. Records and Choices get routed to other helper methods, and Arrays are handled in the method. + * @param schemaNode the node to convert to a Record Data Type + * @return the finalized Record Data type + */ + private def schemaNodeToDataType(schemaNode: TestNode): DataType = { + schemaNode.fieldType match { + case ArrayType => + if (schemaNode.isSimple) { + ArrayDataType(new DataType(getSimpleType(schemaNode))) + } else if (schemaNode.children.nonEmpty) { + ArrayDataType(schemaNodeToRecordType(schemaNode)) + } else { + throw new IllegalStateException( + s"Array Node $schemaNode either needs to be simple or have child nodes!" + ) + } + case RecordType => schemaNodeToRecordType(schemaNode) + case ChoiceType => choiceNodeToChoiceSchema(schemaNode) + case _ => schemaNode.simpleType.get + } + } + + /** + * In general, this method can be used to extract the TestSchema node from the tuple result + * of schemaNodeToRecordSchema. However, it is really only used to kick off the conversion from + * the root TestNode. + * @param schemaNode the node from which a TestSchema will be extracted + * @return the processed TestSchema represented by the input + */ + private def getRecordSchema(schemaNode: TestNode): TestSchema = { + schemaNodeToRecordType(schemaNode).childSchema + } + +} + +/** + * The sole purpose of this Companion Object is to contain this one static variable. + */ +object RecordWalker { + /** + * Used to suppress all of the extra printing from running test cases + */ + final val PRODUCTION_MODE: Boolean = true Review comment: I did not wind up ignoring things like complexType and simpleType in the testComplexTypes() test case, but I do have a flag for that in the new testing class if that is desired - let me know if you prefer that. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
