mbeckerle commented on a change in pull request #88: Daffodil 1919 separators
URL: https://github.com/apache/incubator-daffodil/pull/88#discussion_r206584649
 
 

 ##########
 File path: 
daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/parsers/SeparatedSequenceParsers.scala
 ##########
 @@ -0,0 +1,541 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.daffodil.processors.parsers
+
+import org.apache.daffodil.exceptions.Assert
+import org.apache.daffodil.processors._
+import org.apache.daffodil.schema.annotation.props.SeparatorSuppressionPolicy
+import org.apache.daffodil.schema.annotation.props.gen.SeparatorPosition
+import ArrayIndexStatus._
+import org.apache.daffodil.dpath.NodeInfo
+
+trait Separated { self: SequenceChildParser =>
+  // import ArrayIndexStatus._
+
+  def sep: Parser
+  def spos: SeparatorPosition
+  def ssp: SeparatorSuppressionPolicy
+
+  val childProcessors = Seq(sep, childParser)
+}
+
+sealed trait RepeatingSeparatedPoU extends Separated { self: 
RepeatingChildParser =>
+
+  private lazy val shouldRemoveZLStringHexBinaryValue_ =
+    false && // disable this feature as many test in daffodil-core depend on 
NOT removing these empty strings.
+      isPotentiallyTrailing &&
+      isDeclaredLastInSequence &&
+      (
+        (ssp eq SeparatorSuppressionPolicy.TrailingEmpty) ||
+        (ssp eq SeparatorSuppressionPolicy.TrailingEmptyStrict)
+      )
+
+  /**
+   * Tells us if we should remove a successfully parsed zero-length string
+   * or hexBinary from the infoset, because it is optional, so even though
+   * zero length may parse successfully and return an empty string or hexbinary
+   * normal value, the optionality of the element wins out over the 
empty-string value, and
+   * we don't put the element into the infoset as an array child.
+   */
+  final def shouldRemoveZLStringHexBinaryValue(ais: ArrayIndexStatus, erd: 
ElementRuntimeData): Boolean = {
+    shouldRemoveZLStringHexBinaryValue_
+  }
+
+  /**
+   *  Only needed for separated sequences, but in order to avoid code 
duplication
+   *  we use this "fat interface" approach, and have these here by default.
+   *
+   *  Override in separated, hasPoU case.
+   */
+  def isPotentiallyTrailing: Boolean
+
+  /**
+   *  Only needed for separated sequences, but in order to avoid code 
duplication
+   *  we use this "fat interface" approach, and have these here by default.
+   *
+   *  Override in separated, hasPoU case.
+   */
+  def isDeclaredLastInSequence: Boolean
+
+  /**
+   * True for cases where we should deal with trailing separator toleration.
+   */
+  private lazy val shouldSuppressZLDelimitedParseFailures_ = {
+    //
+    // A complex type that fails to parse can't fail on zero length.
+    // It can be successful on zero length, but if it fails, something will
+    // have to have been scanned/examined from the input data.
+    //
+    val result =
+      if (erd.isComplexType) false
+      else {
+        Assert.invariant(erd.isSimpleType)
+        val ty = erd.optPrimType.get
+        ty match {
+          //
+          // String and HexBinary, if they FAIL, given ZL, then they must have
+          // assertions that cause them to fail because otherwise they would 
always
+          // parse successfully on ZL.
+          // So that means these aren't ever ZL failures
+          //
+          case NodeInfo.String => false
+          case NodeInfo.HexBinary => false
+          //
+          // For non hexbinary or string, if it fails, and the length is zero,
+          // that's the case where we should suppress the failure.
+          case other => {
+            // but only if the element decl is potentially trailing
+            {
+              isPotentiallyTrailing && // and only if it is declared last in 
its sequence.
+                // isDeclaredLastInSequence &&
+                (
+                  (ssp eq SeparatorSuppressionPolicy.TrailingEmpty) ||
+                  (ssp eq SeparatorSuppressionPolicy.TrailingEmptyStrict)
+                )
+            }
+          }
+
+        }
+      }
+    result
+  }
+
+  /**
+   * True for cases where we should deal with trailing separator toleration.
+   *
+   * Combines the runtime information needed with static/schema-compile-time 
information
+   * about the Sequence child.
+   *
+   * This is applicable only to OPTIONAL elements (as in between 
min/maxOccurs, i.e.,
+   * has variable occurrences, and speculative parsing/PoU.
+   */
+  final def shouldSuppressZLDelimitedParseFailure(pstate: PState,
+    hasZLParseAttempt: Boolean): Boolean = {
+    val shouldSuppress =
+      shouldSuppressZLDelimitedParseFailures_ && {
+        Assert.invariant(erd.isSimpleType)
+        hasZLParseAttempt
+      }
+    shouldSuppress
+  }
+}
+
+final class ScalarOrderedSeparatedSequenceChildParser(
+  childParser: Parser,
+  srd: SequenceRuntimeData,
+  trd: TermRuntimeData,
+  val sep: Parser,
+  val spos: SeparatorPosition,
+  val ssp: SeparatorSuppressionPolicy)
+  extends SequenceChildParser(childParser, srd, trd)
+  with Separated {
+
+  override def parse(state: PState) = childParser.parse1(state)
+
+}
+
+final class RepOrderedExactlyNSeparatedSequenceChildParser(
+  childParser: Parser,
+  srd: SequenceRuntimeData,
+  erd: ElementRuntimeData,
+  val sep: Parser,
+  val spos: SeparatorPosition,
+  val ssp: SeparatorSuppressionPolicy)
+  extends OccursCountExactParser(childParser, srd, erd)
+  with Separated
+
+final class RepOrderedExactlyTotalOccursCountSeparatedSequenceChildParser(
+  childParser: Parser,
+  ocEv: OccursCountEv,
+  srd: SequenceRuntimeData,
+  erd: ElementRuntimeData,
+  val sep: Parser,
+  val spos: SeparatorPosition,
+  val ssp: SeparatorSuppressionPolicy)
+  extends OccursCountExpressionParser(childParser, srd, erd, ocEv)
+  with Separated
+
+final class RepOrderedWithMinMaxSeparatedSequenceChildParser(
+  childParser: Parser,
+  srd: SequenceRuntimeData,
+  erd: ElementRuntimeData,
+  override val sep: Parser,
+  override val spos: SeparatorPosition,
+  override val ssp: SeparatorSuppressionPolicy,
+  override val isPotentiallyTrailing: Boolean,
+  override val isDeclaredLastInSequence: Boolean)
+  extends OccursCountMinMaxParser(childParser, srd, erd)
+  with RepeatingSeparatedPoU {
+
+}
+
+final class OrderedSeparatedSequenceParser(rd: SequenceRuntimeData,
+  ssp: SeparatorSuppressionPolicy,
+  spos: SeparatorPosition,
+  sep: Parser,
+  childrenArg: Seq[SequenceChildParser])
+  extends OrderedSequenceParserBase(rd, childrenArg) {
+
+  override lazy val childProcessors: Seq[Parser] = sep +: 
childrenArg.asInstanceOf[Seq[Parser]]
+
+  /**
+   * Parses (1) one iteration of an array with fixed/expression occurs count.
+   * (2) a model group (3) a scalar element.
+   *
+   * Returns a status indicating success/failure and the nature of that 
success/failure.
+   *
+   * No backtracking supported.
+   */
+  final protected def parseOneWithoutPoU(
+    parserArg: SequenceChildParser,
+    trd: TermRuntimeData,
+    pstate: PState): ParseAttemptStatus = {
+
+    val parser = parserArg.asInstanceOf[SequenceChildParser with Separated]
+
+    val finalStatus: ParseAttemptStatus = {
+      // parse prefix sep if any
+      val prefixSepSuccessful =
+        if ((spos eq SeparatorPosition.Prefix) && trd.isRepresented) {
+          sep.parse1(pstate)
+          pstate.processorStatus eq Success
+        } else
+          true
+
+      if (!prefixSepSuccessful) {
+        failedSeparator(pstate, "prefix", trd)
+        ParseAttemptStatus.Failed_EntireArray
+      } else {
+        // except for the first position of the group, parse an infix separator
+
+        val infixSepSuccessful =
+          if ((spos eq SeparatorPosition.Infix) && pstate.mpstate.groupPos > 1 
&& trd.isRepresented) {
+            sep.parse1(pstate)
+            pstate.processorStatus eq Success
+          } else
+            true
+
+        if (!infixSepSuccessful) {
+          failedSeparator(pstate, "infix", trd)
+          ParseAttemptStatus.Failed_EntireArray
+        } else {
+          //
+          // now we parse the child
+          //
+          if (pstate.dataProc.isDefined) 
pstate.dataProc.get.beforeRepetition(pstate, this)
+
+          parser.parse1(pstate)
+
+          if (pstate.dataProc.isDefined) 
pstate.dataProc.get.afterRepetition(pstate, this)
+
+          val childSuccessful = pstate.processorStatus eq Success
+
+          val res: ParseAttemptStatus = {
+            if (!childSuccessful) {
+              ParseAttemptStatus.Failed_EntireArray
+            } else {
+              Assert.invariant(childSuccessful)
+              // parse postfix sep if any
+              val postfixSepSuccessful =
+                if ((spos eq SeparatorPosition.Postfix) && trd.isRepresented) {
+                  sep.parse1(pstate)
+                  pstate.processorStatus eq Success
+                } else
+                  true
+
+              if (!postfixSepSuccessful) {
+                failedSeparator(pstate, "postfix", trd)
+                ParseAttemptStatus.Failed_EntireArray
+              } else {
+
+                //
+                // successful parse of required element. Zero length or not.
+                // note that anyEmpty doesn't apply in this case, and
+                // that separatorSuppressionPolicy property should be ignored.
+                //
+                ParseAttemptStatus.Success_LengthUndetermined
+                // returning lengthUndeterined indicates that because this was 
a
+                // required element, treat it just like it had content,
+                // even if it was zero length
+
+              } // end if postfix success/fail
+            } // child if child success/fail
+          }
+          res
+        } // end if infix
+      } // end if prefix
+    }
+    finalStatus
+  }
+
+  /**
+   * Parses one iteration of an array/optional element, and returns
+   * a status indicating success/failure and the nature of that 
success/failure.
+   */
+  final protected def parseOneWithPoU(
+    parserArg: RepeatingChildParser,
+    erd: ElementRuntimeData,
+    pstate: PState,
+    priorState: PState.Mark,
+    ais: GoArrayIndexStatus,
+    isBounded: Boolean): ParseAttemptStatus = {
+
+    val parser = parserArg.asInstanceOf[RepeatingChildParser with 
RepeatingSeparatedPoU]
+
+    val finalStatus: ParseAttemptStatus = {
+
+      val bitPosBeforeSeparator = pstate.bitPos0b
+
+      // parse prefix sep if any
+      val prefixSepSuccessful =
+        if (spos eq SeparatorPosition.Prefix) {
+          sep.parse1(pstate)
+          pstate.processorStatus eq Success
+        } else
+          true
+
+      if (!prefixSepSuccessful) {
+        failedSeparator(pstate, "prefix", erd)
+        processFailedSeparatorWithPoU(pstate)
+      } else {
+        // except for the first position of the group, parse an infix separator
+
+        val isInfix = spos eq SeparatorPosition.Infix
+
+        val infixSepShouldBePresent =
+          pstate.mpstate.groupPos > 1
+
+        val infixSepSuccessful =
+          if (isInfix && infixSepShouldBePresent) {
+            sep.parse1(pstate)
+            pstate.processorStatus eq Success
+          } else
+            true
+
+        //
+        // captures corner case when there is no separator because it's infix,
+        // and this optional/array element was first in the group
+        // so we don't put an infix separator in.
+        //
+        val wasInfixSepSkippedForInitialElement =
+          isInfix && !infixSepShouldBePresent
+
+        if (!infixSepSuccessful) {
+          failedSeparator(pstate, "infix", erd)
+          processFailedSeparatorWithPoU(pstate)
+        } else {
+          //
+          // now we parse the child
+          //
+          val prevBitPosBeforeChild = pstate.bitPos0b
+
+          pstate.pushDiscriminator
+
+          if (pstate.dataProc.isDefined) 
pstate.dataProc.get.beforeRepetition(pstate, this)
+
+          parser.parse1(pstate)
+
+          if (pstate.dataProc.isDefined) 
pstate.dataProc.get.afterRepetition(pstate, this)
+
+          val childSuccessful = pstate.processorStatus eq Success
+          val childFailure = !childSuccessful // just makes later logic easier 
to read
+
+          val bitPosAfterChildAttempt = pstate.bitPos0b
+
+          val hasZLChildAttempt = prevBitPosBeforeChild == 
bitPosAfterChildAttempt
+          val hasZLChildSuccess = childSuccessful && hasZLChildAttempt
+
+          /**
+           * This is what the DFDL Spec v1.0 calls the "absent" representation.
+           * (Section 9.2.4). This is a kind of missing representation 
(section 9.2.6)
+           */
+          val isRepresentationAbsent = childFailure && hasZLChildAttempt
+
+          val postfixSepSuccessful =
+            if ((spos eq SeparatorPosition.Postfix) && (
+              childSuccessful ||
+              isRepresentationAbsent)) {
+              // parse postfix sep if any
+              sep.parse1(pstate)
+              pstate.processorStatus eq Success
+            } else
+              true
+
+          val bitPosAfterSeparator = pstate.bitPos0b
+
+          if (!postfixSepSuccessful) {
+            failedSeparator(pstate, "postfix", erd)
+            processFailedSeparatorWithPoU(pstate)
+          } else {
+
+            //
+            // At this point we know the separator (wherever located) was
+            // successfully parsed, and we're past where separators are 
concerned
+            // so the separator was successful.
+            //
+            val hasMadeForwardProgress = bitPosAfterSeparator > 
bitPosBeforeSeparator
+
+            Assert.invariant(hasMadeForwardProgress || 
wasInfixSepSkippedForInitialElement || postfixSepSuccessful)
+
+            val res = {
+              if (childFailure) {
+                if (pstate.discriminator == true) {
+                  ParseAttemptStatus.Failed_WithDiscriminatorSet
+                } else {
+                  Assert.invariant(pstate.discriminator == false)
+                  ais match {
+                    case ArrayIndexStatus.Required => {
+                      //
+                      // failure of required element is handled by caller.
+                      //
+                      ParseAttemptStatus.Failed_SpeculativeParse
+                    }
+                    case opt: OptionalArrayIndexStatus => {
+                      //
+                      // failure of optional element
+                      //
+                      if (!wasInfixSepSkippedForInitialElement &&
+                        isRepresentationAbsent &&
+                        parser.shouldSuppressZLDelimitedParseFailure(pstate, 
hasZLChildAttempt)) {
+                        //
+                        // optional element failed, but for tolerating excess
+                        // trailing separators, we want to keep the separator.
+                        // But we must remove the speculated element.
+                        //
+                        // This will not work right if the separator and 
terminator are/can-be the same,
+                        // as we'll never find the terminator after a failure 
that happens
+                        // to have zero-length. We'll find the "separator", 
and fail to parse
+                        // but we'll suppress this failure in the case where 
we're supposed
+                        // to tolerate trailing additional separators. So we 
succeed, consume
+                        // the "separator", and the separator will never be 
reconsidered as the terminator.
+                        //
+                        // However, solving for "separator can be equal to 
terminator", so as to issue
+                        // an SDE is pretty difficult. It has to be done in an 
Evaluatable, so that
+                        // the delimiters can be run-time computed. Since 
delimiters can have
+                        // things like characters class entities in them, 
deciding this is quite hard.
+                        //
+                        // In general, the ambiguity this creates is a 
"user-beware" situation
+                        // and the right thing for the runtime to do is make 
sure a trace/debug
+                        // session can help the user isolate what is actually 
happening.
+                        //
+
+                        // backtrack away this speculative element and any 
variable side-effects
+                        pstate.reset(priorState)
+                        // but that resets the position to before the separator
+                        // so we set position to after the separator
+                        
pstate.dataInputStream.setBitPos0b(bitPosAfterSeparator)
+                        //
+                        // use special return status that indicates we've 
taken care of the
+                        // priorState and repositioning after a skipped 
separator.
+                        //
+                        ParseAttemptStatus.Success_SkippedSeparator
+                      } else {
+                        //
+                        // optional element failed but separator was found (or 
it's the first of an infix-separated array)
+                        // this is regular speculative parse failure to find 
the element.
+                        //
+                        ParseAttemptStatus.Failed_SpeculativeParse
+                      }
+                    }
+                  }
+                }
+              } else {
+                Assert.invariant(childSuccessful)
+
+                val result: ParseAttemptStatus = ais match {
+                  case Required => {
+                    //
+                    // Success on a required element.
+                    // We don't actually care if it is zero length or non-zero 
length.
+                    //
+                    ParseAttemptStatus.Success_LengthUndetermined
+                  }
+                  case _: OptionalArrayIndexStatus => {
+                    //
+                    // Now we have to analyze the cases where ZL matters and 
needs special handling
+                    //
+                    if (hasZLChildSuccess) {
+                      val shouldRemoveZLElement = 
parser.shouldRemoveZLStringHexBinaryValue(ais, erd)
+                      if (shouldRemoveZLElement) {
+                        //
+                        // It's an optional element, type is string/hexBinary 
and length is zero
+                        // so we don't want to add it to the infoset
+                        //
+                        // Note: This does seem to be correct interpretation 
of the DFDL v1.0 spec.
+                        // However, the language is very subtle. Hopefully it 
gets clarified.
+                        //
+                        // However, we do want to keep trying to parse more, 
as they could be trailing separators.
+                        // that are to be tolerated.
+                        //
+                        // So we don't backtrack here. We just remove the 
accumulated element.
+                        //
+                        // Note that we do NOT backout variable side-effects 
that occurred while parsing
+                        // and also a discriminator could be set to true.
+                        // So while we are suppressing adding the element to 
the infoset,
+                        // we're not suppressing side-effects that occurred 
during its parsing.
 
 Review comment:
   Tests are needed that illustrate and capture this behavior where 
side-effects such as discriminators/asserts and setVar are carried out even 
though the element is removed. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to