Github user mbeckerle commented on a diff in the pull request:

    https://github.com/apache/incubator-daffodil/pull/5#discussion_r150872917
  
    --- Diff: 
daffodil-core/src/main/scala/edu/illinois/ncsa/daffodil/dsom/GroupDef.scala ---
    @@ -34,70 +34,137 @@ package edu.illinois.ncsa.daffodil.dsom
     
     import scala.xml.Node
     import edu.illinois.ncsa.daffodil.exceptions.Assert
    -import edu.illinois.ncsa.daffodil.xml.QName
    +import scala.xml.Text
    +import scala.xml.Comment
     
    -class GlobalGroupDefFactory(xmlArg: Node, schemaDocumentArg: 
SchemaDocument)
    -  extends SchemaComponentFactory(xmlArg, schemaDocumentArg)
    +final class GlobalGroupDefFactory(defXML: Node, schemaDocument: 
SchemaDocument)
    +  extends SchemaComponentFactory(defXML, schemaDocument)
       with GlobalNonElementComponentMixin {
     
    -  private lazy val trimmedXml = scala.xml.Utility.trim(xmlArg)
    -
    -  def forGroupRef(gref: GroupRef, position: Int) = {
    +  def forGroupRef(refXML: Node, refLexicalParent: SchemaComponent, 
position: Int,
    +    isHidden: Boolean): (GroupRef, GlobalGroupDef) = {
    +    val trimmedXml = scala.xml.Utility.trim(defXML)
         trimmedXml match {
           case <group>{ contents @ _* }</group> => {
    -        val ggd = contents.collect {
    -          case <sequence>{ _* }</sequence> => new 
GlobalSequenceGroupDef(xml, schemaDocument, gref, position)
    -          case <choice>{ _* }</choice> => new GlobalChoiceGroupDef(xml, 
schemaDocument, gref, position)
    +        val list = contents.collect {
    +          case groupXML @ <sequence>{ _* }</sequence> => {
    +            lazy val gref: SequenceGroupRef = new SequenceGroupRef(gdef, 
refXML, refLexicalParent, position, isHidden)
    +            lazy val gdef: GlobalSequenceGroupDef = new 
GlobalSequenceGroupDef(defXML, groupXML, schemaDocument, gref)
    +            gref.groupDef
    +            gdef.groupRef
    +            (gref, gdef)
    +          }
    +          case groupXML @ <choice>{ _* }</choice> =>
    +            lazy val gref: ChoiceGroupRef = new ChoiceGroupRef(gdef, 
refXML, refLexicalParent, position, isHidden)
    +            lazy val gdef: GlobalChoiceGroupDef = new 
GlobalChoiceGroupDef(defXML, groupXML, schemaDocument, gref)
    +            gref.groupDef
    +            gdef.groupRef
    +            (gref, gdef)
             }
    -        Assert.invariant(ggd.length == 1)
    -        ggd(0)
    +        val res = list(0)
    +        res
           }
           case _ => Assert.invariantFailed("not a group")
         }
       }
    -
    -  override lazy val namedQName = QName.createGlobal(name, targetNamespace, 
xml.scope)
    -
     }
     
    -sealed abstract class GlobalGroupDef(xmlArg: Node, schemaDocumentArg: 
SchemaDocument, val groupRef: GroupRef, position: Int)
    -  extends SchemaComponent(xmlArg, schemaDocumentArg)
    -  with GlobalNonElementComponentMixin 
    -  with NestingTraversesToReferenceMixin {
    -
    -  requiredEvaluations(modelGroup)
    -  requiredEvaluations(validateChoiceBranchKey)
    -
    -  final lazy val referringComponent = {
    -    val res = Some(groupRef)
    -    res
    -  }
    +/**
    + * Many things can be contained either by a model group or a
    + * group definition (aka global sequence group definition or global choice
    + * group definition).
    + *
    + * These things are GroupLike.
    + */
    +trait GroupDefLike
    +  extends AnnotatedSchemaComponent {
     
    -  // final override protected def enclosingComponentDef = 
groupRef.enclosingComponent
    +  def xmlChildren: Seq[Node]
     
       //
    -  // Note: Dealing with XML can be fragile. It's easy to forget some of 
these children
    -  // might be annotations and Text nodes. Even if you trim the text nodes 
out, there are
    -  // places where annotations can be.
    -  //
    -  final lazy val <group>{ xmlChildren @ _* }</group> = xml
    -  //
       // So we have to flatMap, so that we can tolerate annotation objects 
(like documentation objects).
       // and our ModelGroup factory has to return Nil for annotations and Text 
nodes.
       //
    -  final lazy val Seq(modelGroup: ModelGroup) = xmlChildren.flatMap { 
ModelGroupFactory(_, this, position) }
    +  final lazy val groupMembers: Seq[Term] = LV('groupMembers) {
    +    val goodXmlChildren = LV('goodXMLChildren) { xmlChildren.flatMap { 
removeNonInteresting(_) } }.value
    +    val positions = List.range(1, goodXmlChildren.length + 1) // range is 
exclusive on 2nd arg. So +1.
    +    val pairs = goodXmlChildren zip positions
    +    pairs.flatMap {
    +      case (n, i) =>
    +        TermFactory(n, this, i)
    +    }
    +  }.value
    +
    +  /**
    +   * XML is full of uninteresting text nodes. We just want the element 
children, not all children.
    +   */
    +  private def removeNonInteresting(child: Node) = {
    +    val childList: List[Node] = child match {
    +      case _: Text => Nil
    +      case _: Comment => Nil
    +      case <annotation>{ _* }</annotation> => Nil
    +      case _ => List(child)
    +    }
    +    childList
    +  }
    --- End diff --
    
    I agree that we should profile it. 
    
    The alternative coding that wouldn't copy has to cope with the variety of 
XML nodes subsequently where children are processed. The copying here may be 
worth it to reduce the complexity. Keeping in mind everywhere one traverses XML 
 children that comments, whitespace text nodes, PIs, etc. can appear is very 
error prone. Basically you can't use XML pattern matching, because you'll get 
all these additional nodes. You'd have to use an XPath expression (scala 
formulation thereof) to pull the nodes you want, only.
    
    One thing we could do is walk the whole schema exactly once removing all 
these irrelevant nodes. I just think that would make debugging very tedious, 
because the XML would all be giant single lines.   


---

Reply via email to