stevedlawrence commented on a change in pull request #214: Sequences and
Separators Refactoring and Rewrite
URL: https://github.com/apache/incubator-daffodil/pull/214#discussion_r285166558
##########
File path:
daffodil-core/src/main/scala/org/apache/daffodil/grammar/primitives/SequenceChild.scala
##########
@@ -27,216 +27,434 @@ import
org.apache.daffodil.schema.annotation.props.gen.OccursCountKind
import org.apache.daffodil.schema.annotation.props.SeparatorSuppressionPolicy
import org.apache.daffodil.schema.annotation.props.gen.LengthKind
import org.apache.daffodil.schema.annotation.props.gen.Representation
+import SeparatorSuppressionPolicy._
+import org.apache.daffodil.util.Maybe
+import org.apache.daffodil.schema.annotation.props.EmptyElementPolicy
+import org.apache.daffodil.processors.parsers._
+import org.apache.daffodil.processors.TerminatorParseEv
/**
* A SequenceChild is exactly that, a child Term of a Sequence
*
+ * Methods/members of this class combine information about a child term
+ * with that about the sequence itself. This class is really a part of the
+ * state of the sequence object.
+ *
+ * This is the class primarily responsible for compilation of sequence
+ * and term information into digested form for the runtime, such
+ * that the runtime can properly implement complex DFDL behaviors like
+ * separator suppression and emptyElementPolicy.
+ *
* These objects are part of the Gram object hierarchy.
* They represent the use of a Term in a context. They are
* objects that belong to (are owned by exactly one) the enclosing sequence,
are part of it, and
* so it is reasonable for a SequenceChild to have a backpointer to the
- * enclosing Sequence object.
+ * enclosing Sequence object in all cases, and this is passed to them on
construction.
+ * This particular backpointer is
+ * not a challenge to sharing substructure as these objects cannot and are
+ * not intended to be shared. They really are state of the enclosing sequence
+ * broken out for convenience.
*
- * This allows the Term object that provides the definition of the
SequenceChild
- * to be shared/reused, in principle without having a backpointer to the
- * enclosing Sequence. That allows sharing, and removes lots of
duplication/copying
- * in the schema compiler data strucures.
+ * This allows the child (of the Sequence) Term object (dsom object) that
provides the
+ * definition of the SequenceChild to be shared/reused, in principle without
having
+ * a backpointer to the enclosing Sequence. That allows sharing, and removes
lots of
+ * duplication/copying that is otherwise needed in the schema compiler data
structures.
*
* Eventually things like alignment calculations should move from
* Terms to these objects. That is, those calculations should not be done
* on the DSOM objects, but on these SequenceChild objects in the Gram
* objects.
*/
-abstract class SequenceChild(
- protected val sq: SequenceTermBase, child: Term, groupIndex: Int)
+abstract class SequenceChild(protected val sq: SequenceTermBase, child: Term,
groupIndex: Int)
extends Terminal(child, true) {
+ import SeparatorSuppressionPolicy._
+ import SeparatedSequenceChildBehavior._
- protected def childParser = child.termContentBody.parser
- protected def childUnparser = child.termContentBody.unparser
+ private lazy val sgtb = sq.asInstanceOf[SequenceGroupTermBase]
- final override def parser = sequenceChildParser
- final override def unparser = sequenceChildUnparser
+ protected lazy val childParser = child.termContentBody.parser
+ protected lazy val childUnparser = child.termContentBody.unparser
+
+ final override lazy val parser = sequenceChildParser
+ final override lazy val unparser = sequenceChildUnparser
protected def sequenceChildParser: SequenceChildParser
protected def sequenceChildUnparser: SequenceChildUnparser
- def optSequenceChildParser: Option[SequenceChildParser] =
+ final lazy val optSequenceChildParser: Option[SequenceChildParser] =
if (childParser.isEmpty) None else Some(parser)
- def optSequenceChildUnparser: Option[SequenceChildUnparser] =
+ final lazy val optSequenceChildUnparser: Option[SequenceChildUnparser] =
if (childUnparser.isEmpty) None else Some(unparser)
- protected lazy val sepGram = sq.sequenceSeparator
+ /**
+ * There's only parse result helpers here, so let's abbreviate
+ */
+ protected type SeparatedHelper = SeparatedSequenceChildParseResultHelper
+ protected type UnseparatedHelper = UnseparatedSequenceChildParseResultHelper
+
+ protected def separatedHelper: SeparatedHelper
+ protected def unseparatedHelper: UnseparatedHelper
+
+ protected lazy val sepGram = {
+ sscb match {
+ case _: PositionalLike =>
+ sgtb.checkSeparatorTerminatorConflict
+ case _ => // ok
+ }
+ sq.sequenceSeparator
+ }
+
protected lazy val sepParser = sepGram.parser
protected lazy val sepUnparser = sepGram.unparser
- lazy val srd = sq.sequenceRuntimeData
- lazy val trd = child.termRuntimeData
+ final protected def srd = sq.sequenceRuntimeData
+ final protected def trd = child.termRuntimeData
+ final def termRuntimeData = trd
+ final protected lazy val mgrd =
child.asInstanceOf[ModelGroup].modelGroupRuntimeData
+ final protected lazy val erd =
child.asInstanceOf[ElementBase].elementRuntimeData
- /**
- * Used by unparsing algorithms that involve separator suppression for
- * zero-length data.
- */
- final lazy val zeroLengthDetector: ZeroLengthDetector = {
- val result: ZeroLengthDetector = child match {
- case e: ElementBase => {
- Assert.invariant(e.isRepresented)
- import LengthKind._
-
- lazy val couldBeZLSimpleType =
- !e.hasDelimiters &&
- e.isSimpleType &&
- (e.lengthKind match {
- case Delimited | EndOfParent => true
- //
- // When parsing, the pattern might not match zero-length data,
but
- // when unparsing we just output the data as it appears in the
infoset.
- // We don't check that it satisfies the pattern. So a zero-length
- // string in the infoset will be zero-length when output.
- //
- case Pattern => true
- case Explicit | Implicit => e.hasFixedLengthOf(0)
- case Prefixed => false
- }) &&
- //
- // Alignment stuff - can't be any aligning possible or we'd have
- // to do a runtime check for whether things are aligned or not.
- // In the unparser, that is a suspendable operation, since we may
- // not know the bit position.
- //
- // Separated sequences where there's any alignment possibly needed
- // are an obscure corner case we don't need to handle.
- //
- e.hasNoSkipRegions &&
- (((e.impliedRepresentation eq Representation.Text) &&
e.hasTextAlignment) ||
- // binary data is allowed to be delimited... so long as the
- // separators are recognizably not confused with binary bytes.
- ((e.impliedRepresentation eq Representation.Binary) &&
e.isKnownToBeAligned))
-
- val hasNilZL = (e.isNillable && !e.hasNilValueRequiredSyntax)
- val hasEmptyZL = (e.emptyIsAnObservableConcept &&
e.hasEmptyValueZLSyntax)
- val hasStringZL = e.isSimpleType && (e.simpleType.primType eq
NodeInfo.String) && (couldBeZLSimpleType || hasEmptyZL)
- val hasHexBinaryZL = e.isSimpleType && (e.simpleType.primType eq
NodeInfo.HexBinary) && (couldBeZLSimpleType || hasEmptyZL)
- (hasNilZL, hasStringZL, hasHexBinaryZL) match {
- case (true, false, false) => new NillableZeroLengthDetector
- case (false, true, false) => new StringZeroLengthDetector
- case (false, false, true) => new HexBinaryZeroLengthDetector
- case (true, true, false) => new NillableStringZeroLengthDetector
- case (true, false, true) => new NillableHexBinaryZeroLengthDetector
- case (_, true, true) => Assert.invariantFailed("Can't be both String
and HexBinary type")
- case _ => new NeverZeroLengthDetector
+ private lazy val childElement = child.asInstanceOf[ElementBase]
+ private lazy val childModelGroup = child.asInstanceOf[ModelGroup]
+
+ protected final def ssp = sq.separatorSuppressionPolicy
+
+ protected lazy val separatedSequenceChildBehavior:
SeparatedSequenceChildBehavior = {
+ import SeparatorSuppressionPolicy._
+ import SeparatedSequenceChildBehavior._
+ child match {
+ case m: ModelGroup =>
+ if (child.isPotentiallyTrailing) {
+ ssp match {
+ case AnyEmpty => NonPositional
+ case TrailingEmpty => PositionalTrailingLax
+ case TrailingEmptyStrict => PositionalTrailingStrict
+ case Never => Positional
+ }
+ } else {
+ ssp match {
+ // case AnyEmpty => NonPositional
+ case _ => Positional
+ }
+ }
+ case eb: ElementBase if (eb.isArray || eb.isOptional) => {
+ import OccursCountKind._
+ eb.occursCountKind match {
+ case Fixed => Positional
+ case Expression => Positional
+ case Parsed => NonPositional
+ case StopValue => NonPositional
+ case Implicit => {
+ val UNB = -1
+ (eb.isPotentiallyTrailing,
+ this.isDeclaredLast,
+ sq.separatorSuppressionPolicy,
+ eb.maxOccurs) match {
+ case (false, _, AnyEmpty, _) => NonPositional
+ case (false, _, _, UNB) if
!eb.isLastDeclaredRepresentedInSequence =>
+ Assert.invariantFailed("Should be SDE found elsewhere.")
+ case (false, _, _, _) => Positional
+ case (true, _, Never, UNB) => Assert.invariantFailed("Should
be SDE found elsewhere.")
+ case (true, false, AnyEmpty, UNB) => NonPositional
+ case (true, false, TrailingEmpty, UNB) if
!eb.isLastDeclaredRepresentedInSequence =>
+ Assert.invariantFailed("Should be SDE found elsewhere.")
+ case (true, false, TrailingEmptyStrict, UNB) if
!eb.isLastDeclaredRepresentedInSequence =>
+ Assert.invariantFailed("Should be SDE found elsewhere.")
+ case (true, _, Never, _) => Positional
+ case (true, _, AnyEmpty, _) => NonPositional
+ case (true, _, TrailingEmpty, _) => PositionalTrailingLax
+ case (true, _, TrailingEmptyStrict, _) =>
PositionalTrailingStrict
+ }
+ }
}
}
- case m: ModelGroup => {
- new NeverZeroLengthDetector
+ case eb: ElementBase if eb.isScalar => {
+ if (child.isPotentiallyTrailing) {
+ ssp match {
+ case AnyEmpty => NonPositional
+ case TrailingEmpty => PositionalTrailingLax
+ case TrailingEmptyStrict => PositionalTrailingStrict
+ case Never => Positional
+ }
+ } else {
+ ssp match {
+ // case AnyEmpty => NonPositional
+ case _ => Positional
+ }
+ }
}
+ case _ => Positional // obscure cases like maxOccurs 0 with lengthKind
'implicit'
+ }
+ }
+ protected final def sscb = separatedSequenceChildBehavior
+
+ final protected def isDeclaredLast: Boolean = {
+ child.isLastDeclaredRepresentedInSequence
+ }
+
+ final protected def isPositional: Boolean = {
+ separatedSequenceChildBehavior match {
+ case _: SeparatedSequenceChildBehavior.PositionalLike => true
+ case _ => false
}
- result
}
/**
- * Used when unparsing to determine which of several separator suppression
- * algorithms is required.
+ * Combines static knowledge of whether the term can
+ * be zero length, with issues like in order for trailing sep suppression to
+ * apply, the term must be potentially trailing.
+ *
+ * This combines the static information about the child term with that of
+ * the sequence child itself to answer, for this usage of the term, whether
+ * we know for sure that we should NOT suppress the separator.
*
- * Relevant only if the enclosing sequence is separated.
+ * True if we should never suppress separator i.e., always lay down
+ * associated separator. False if we may/may-not suppress separator
+ * depending on runtime characteristics like whether some thing(s) are zero
length.
*/
- final lazy val separatorSuppressionMode: SeparatorSuppressionMode = {
- Assert.invariant(sq.hasSeparator)
- import SeparatorSuppressionMode._
- import OccursCountKind._
- child match {
- case e: ElementBase => {
- if (e.isScalar) None
- else e.occursCountKind match {
- case Fixed | Expression => FixedOrExpression
- case Parsed => new SuppressAnyEmpty(zeroLengthDetector)
- case StopValue => Assert.notYetImplemented()
- case Implicit => {
- import SeparatorSuppressionPolicy._
- sq.separatorSuppressionPolicy match {
- case Never => ImplicitNeverOrNotPotentiallyTrailing
- case AnyEmpty => new ImplicitSuppressAnyEmpty(zeroLengthDetector)
- case _ if (!e.isPotentiallyTrailing) =>
ImplicitNeverOrNotPotentiallyTrailing
- case _ => new ImplicitPotentiallyTrailing(zeroLengthDetector)
- }
- }
+ final protected lazy val isKnownStaticallyNotToSuppressSeparator: Boolean = {
+ val neverSuppressSeparator = true
+ val sometimesSuppressSeparator = false
+ val res =
+ if (child.zeroLengthDetector eq NeverZeroLengthDetector)
+ neverSuppressSeparator // never zero length so we don't suppress
separator
+ else {
+ ssp match {
+ case TrailingEmpty | TrailingEmptyStrict =>
+ if (child.isPotentiallyTrailing)
+ sometimesSuppressSeparator
+ else
+ neverSuppressSeparator
+ case AnyEmpty =>
+ sometimesSuppressSeparator
+ case Never =>
+ neverSuppressSeparator
}
}
- case m: ModelGroup => {
- SeparatorSuppressionMode.None
+ res
+ }
+
+ /**
+ * Combines static information about the model groupsequence child's
definition
+ * with properties of the sequence to tell us if a zero-length
+ * after a parse attempt needs special treatment.
+ *
+ * This is the concept of "empty" that applies to model groups.
+ * As they are not elements, nothing about default values or EVDP or nil
reps applies
+ * here.
+ */
+
+ final protected lazy val (isModelGroupRepPossiblyZeroLength,
isModelGroupRepNonZeroLength): (Boolean, Boolean) = {
+ if (!childModelGroup.isRepresented) (false, false)
+ else childModelGroup match {
+ case mg if mg.isPotentiallyTrailing => (true, false)
+ case mg => {
+ val hasSyntax = mg.hasKnownRequiredSyntax
+ (!hasSyntax, hasSyntax)
}
}
}
+ final protected def isMGRPZL = isModelGroupRepPossiblyZeroLength
+ final protected def isMGRNZL = isModelGroupRepNonZeroLength
Review comment:
Looks like these aren't used anywhere, and the only difference is a single
character, which makes it difficult to differentiate. I'd suggest they be
removed.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services