This is an automated email from the ASF dual-hosted git repository.
olabusayo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new 146778413 Remove unnecessary truncation code
146778413 is described below
commit 146778413c84d1e6749d69d97b9d6596e2bc53c5
Author: olabusayoT <[email protected]>
AuthorDate: Wed Oct 9 18:35:53 2024 -0400
Remove unnecessary truncation code
- we were truncating and overwriting value, but we didn't actually need to
since we ended up truncating again right after. Removed the unnecessary and
buggy code which changed the dataValue. StringMaybeTruncateBitsUnparser.unparse
does not change the dataValue, instead it changes the DOS, resulting in the
value getting truncated but string-length returning the untruncated length when
it's called
- move subset/subsetError to ThrowsSDE trait so state has access to it
- refactor SpecifiedLengthExplicitImplicitUnparser to be more
straightforward and to remove unused cruft
- add tests
DAFFODIL-1592
---
.../core/grammar/primitives/SpecifiedLength.scala | 3 +-
.../apache/daffodil/lib/exceptions/ThrowsSDE.scala | 13 ++
.../runtime1/SpecifiedLengthUnparsers.scala | 240 ++-------------------
.../org/apache/daffodil/runtime1/dsom/SDE.scala | 15 --
.../section00/general/testUnparserGeneral.tdml | 127 +++++++++++
.../section00/general/TestUnparserGeneral.scala | 15 ++
6 files changed, 170 insertions(+), 243 deletions(-)
diff --git
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala
index 0bd09d18c..00552021a 100644
---
a/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala
+++
b/daffodil-core/src/main/scala/org/apache/daffodil/core/grammar/primitives/SpecifiedLength.scala
@@ -110,8 +110,7 @@ trait SpecifiedLengthExplicitImplicitUnparserMixin {
new SpecifiedLengthExplicitImplicitUnparser(
u,
e.elementRuntimeData,
- e.unparseTargetLengthInBitsEv,
- e.maybeUnparseTargetLengthInCharactersEv
+ e.unparseTargetLengthInBitsEv
)
}
}
diff --git
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala
index e6801bedb..dc5df76c3 100644
---
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala
+++
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/exceptions/ThrowsSDE.scala
@@ -80,6 +80,19 @@ trait ThrowsSDE {
final def notYetImplemented(msg: String, args: Any*): Nothing =
SDE("Feature not yet implemented: " + msg, args: _*)
+ /**
+ * Use for cases where it is an SDE because of something we've chosen
+ * not to implement. Not merely short term (haven't coded it yet, but
intending to),
+ * more like things we've chosen to defer intentionally to some future
release.
+ */
+ def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = {
+ if (!testThatWillThrowIfFalse) subsetError(msg, args: _*)
+ }
+
+ def subsetError(msg: String, args: Any*) = {
+ val msgTxt = msg.format(args: _*)
+ SDE("Subset: " + msgTxt)
+ }
}
/**
diff --git
a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala
b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala
index 321e73831..f2ec32e9b 100644
---
a/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala
+++
b/daffodil-runtime1-unparser/src/main/scala/org/apache/daffodil/unparsers/runtime1/SpecifiedLengthUnparsers.scala
@@ -20,42 +20,22 @@ package org.apache.daffodil.unparsers.runtime1
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.schema.annotation.props.gen.LengthUnits
import org.apache.daffodil.lib.schema.annotation.props.gen.Representation
-import org.apache.daffodil.lib.util.Maybe
import org.apache.daffodil.lib.util.Maybe._
-import org.apache.daffodil.lib.util.MaybeJULong
-import org.apache.daffodil.runtime1.dpath.NodeInfo.PrimType
import org.apache.daffodil.runtime1.infoset.DIElement
import org.apache.daffodil.runtime1.infoset.DISimple
import org.apache.daffodil.runtime1.infoset.Infoset
-import org.apache.daffodil.runtime1.infoset.RetryableException
import org.apache.daffodil.runtime1.processors.CharsetEv
import org.apache.daffodil.runtime1.processors.ElementRuntimeData
-import org.apache.daffodil.runtime1.processors.Evaluatable
import org.apache.daffodil.runtime1.processors.ParseOrUnparseState
-import org.apache.daffodil.runtime1.processors.Success
import org.apache.daffodil.runtime1.processors.UnparseTargetLengthInBitsEv
-import
org.apache.daffodil.runtime1.processors.UnparseTargetLengthInCharactersEv
import org.apache.daffodil.runtime1.processors.unparsers._
import passera.unsigned.ULong
-/**
- * Restricts the bits available for unparsing to just those within
- * the specified length computed.
- *
- * If a unparser (supplied as arg) runs past the available space,
- * that's an unparse error.
- *
- * Truncation of strings - the only case where we truncate, and only when
- * dfdl:truncateSpecifiedLengthString is 'yes', is handled elsewhere.
- */
-sealed abstract class SpecifiedLengthUnparserBase(eUnparser: Unparser, erd:
ElementRuntimeData)
-
final class SpecifiedLengthExplicitImplicitUnparser(
eUnparser: Unparser,
erd: ElementRuntimeData,
- targetLengthInBitsEv: UnparseTargetLengthInBitsEv,
- maybeTargetLengthInCharactersEv: Maybe[UnparseTargetLengthInCharactersEv]
+ targetLengthInBitsEv: UnparseTargetLengthInBitsEv
) extends CombinatorUnparser(erd) {
override lazy val runtimeDependencies = Vector()
@@ -74,215 +54,23 @@ final class SpecifiedLengthExplicitImplicitUnparser(
}
override final def unparse(state: UState): Unit = {
-
- erd.impliedRepresentation match {
- case Representation.Binary =>
- unparseBits(state)
- case Representation.Text => {
- val dcs = getCharset(state)
- if (dcs.maybeFixedWidth.isDefined)
- unparseBits(state)
- else {
- // we know the encoding is variable width characters
- // but we don't know if the length units characters or bits/bytes.
- lengthUnits match {
- case LengthUnits.Bits | LengthUnits.Bytes =>
- unparseVarWidthCharactersInBits(state)
- case LengthUnits.Characters =>
- unparseVarWidthCharactersInCharacters(state)
- }
- }
- }
- }
- }
-
- /**
- * Encoding is variable width (e.g., utf-8, but the
- * target length is expressed in bits.
- *
- * Truncation, in this case, requires determining how many
- * of the string's characters fit within the available target length
- * bits.
- */
- def unparseVarWidthCharactersInBits(state: UState): Unit = {
- val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv)
-
- if (maybeTLBits.isDefined) {
- //
- // We know the target length. We can use it.
- //
- if (areTruncating) {
- val diSimple = state.currentInfosetNode.asSimple
- val v = diSimple.dataValue.getString
- val tl = maybeTLBits.get
- val cs = getCharset(state)
- val newV = state.truncateToBits(v, cs, tl)
- //
- // JIRA DFDL-1592
- //
- // BUG: should not modify the actual dataset value.
- // as fn:string-length of the value should always return the same
- // value which is the un-truncated length.
- //
- diSimple.overwriteDataValue(newV)
- }
- eUnparser.unparse1(state)
+ lazy val dcs = getCharset(state)
+ if (
+ erd.impliedRepresentation == Representation.Text &&
+ lengthUnits == LengthUnits.Characters &&
+ dcs.maybeFixedWidth.isEmpty &&
+ erd.isComplexType
+ ) {
+ state.subsetError(
+ "Variable width character encoding '%s', dfdl:lengthKind '%s' and
dfdl:lengthUnits '%s' are not supported for complex types.",
+ getCharset(state).name,
+ lengthKind.toString,
+ lengthUnits.toString
+ )
} else {
- // target length unknown
- // ignore constraining the output length. Just unparse it.
- //
- // This happens when we're unparsing, and this element depends on a
prior element for
- // determining its length, but that prior element has
dfdl:outputValueCalc that depends
- // on this element.
- // This breaks the chicken-egg cycle.
- //
eUnparser.unparse1(state)
}
}
-
- private def areTruncating = {
- if (erd.isSimpleType && (erd.optPrimType.get eq PrimType.String)) {
- Assert.invariant(erd.optTruncateSpecifiedLengthString.isDefined)
- erd.optTruncateSpecifiedLengthString.get
- } else
- false
- }
-
- /**
- * Encoding is variable width (e.g., utf-8). The target
- * length is expressed in characters.
- */
- def unparseVarWidthCharactersInCharacters(state: UState): Unit = {
-
- //
- // variable-width encodings and lengthUnits characters, and lengthKind
explicit
- // is not supported (currently) for complex types
- //
- state.schemaDefinitionUnless(
- erd.isSimpleType,
- "Variable width character encoding '%s', dfdl:lengthKind '%s' and
dfdl:lengthUnits '%s' are not supported for complex types.",
- getCharset(state).name,
- lengthKind.toString,
- lengthUnits.toString
- )
-
- Assert.invariant(erd.isSimpleType)
- Assert.invariant(this.maybeTargetLengthInCharactersEv.isDefined)
- val tlEv = this.maybeTargetLengthInCharactersEv.get
- val tlChars = this.getMaybeTL(state, tlEv)
-
- if (tlChars.isDefined) {
- //
- // possibly truncate
- //
- if (areTruncating) {
- val diSimple = state.currentInfosetNode.asSimple
- val v = diSimple.dataValue.getString
- val tl = tlChars.get
- if (v.length > tl) {
- // string is too long, truncate to target length
- val newV = v.substring(0, tl.toInt)
- //
- // BUG: JIRA DFDL-1592 - should not be overwriting the value with
- // truncated value.
- //
- diSimple.overwriteDataValue(newV)
- }
- }
- eUnparser.unparse1(state, erd)
- } else {
- // target length unknown
- // ignore constraining the output length. Just unparse it.
- //
- // This happens when we're unparsing, and this element depends on a
prior element for
- // determining its length, but that prior element has
dfdl:outputValueCalc that depends
- // on this element.
- // This breaks the chicken-egg cycle.
- //
- eUnparser.unparse1(state, erd)
- }
- }
-
- private def getMaybeTL(state: UState, TLEv: Evaluatable[MaybeJULong]):
MaybeJULong = {
- val maybeTLBits =
- try {
- val tlRes = TLEv.evaluate(state)
- Assert.invariant(tlRes.isDefined) // otherwise we shouldn't be in this
method at all
- tlRes
- } catch {
- case e: RetryableException => {
- //
- // TargetLength expression couldn't be evaluated.
- //
- MaybeJULong.Nope
- }
- }
- maybeTLBits
- }
-
- /**
- * Regardless of the type (text or binary), the target length
- * will be provided in bits.
- */
- def unparseBits(state: UState): Unit = {
- val maybeTLBits = getMaybeTL(state, targetLengthInBitsEv)
-
- if (maybeTLBits.isDefined) {
- //
- // We know the target length. We can use it.
- //
- // val nBits = maybeTLBits.get
- // val dos = state.dataOutputStream
-
- //
- // withBitLengthLimit is incorrect. It doesn't take into account
- // that after the unparse, we could be looking at a current state
- // with a distinct DOS
- //
- // val isLimitOk = dos.withBitLengthLimit(nBits) {
- // eUnparser.unparse1(state, erd)
- // }
- //
- // if (!isLimitOk) {
- // val availBits = if (dos.remainingBits.isDefined)
dos.remainingBits.get.toString else "(unknown)"
- // UE(state, "Insufficient bits available. Required %s bits, but
only %s were available.", nBits, availBits)
- // }
-
- eUnparser.unparse1(state, erd)
-
- // at this point the recursive parse of the children is finished
-
- if (state.processorStatus ne Success) return
-
- // We might not have used up all the bits. So some bits may need to
- // be skipped and filled in by fillbyte.
- //
- // In the DFDL data grammar the region being skipped is either the
- // RightFill region, or the ElementUnused region. Skipping this is
handled
- // elsewhere, along with insertion of padding before/after a string.
- //
-
- } else {
- //
- // we couldn't get the target length
- //
- // This happens when we're unparsing, and this element depends on a
prior element for
- // determining its length via a length expression, but that prior element
- // has dfdl:outputValueCalc that depends on this element, typically by a
- // call to dfdl:valueLength of this, or of some structure that includes
- // this.
- //
- // This breaks the chicken-egg cycle, by just unparsing it without
- // constraint. That produces the value which (ignoring truncation)
- // can be unparsed to produce the dfdl:valueLength of this element.
- //
- // This does assume that the value will get truncated properly for the
- // case where we do truncation (type string, with
- // dfdl:truncateSpecifiedLengthString 'yes') by some other mechanism.
- //
- eUnparser.unparse1(state, erd)
- }
- }
}
// TODO: implement the capture length unparsers as just using this trait?
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala
index 145bccb3a..ff2535f01 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/runtime1/dsom/SDE.scala
@@ -281,19 +281,4 @@ trait ImplementsThrowsOrSavesSDE extends
ImplementsThrowsSDE with SavesErrorsAnd
}
}
}
-
- /**
- * Use for cases where it is an SDE because of something we've chosen
- * not to implement. Not merely short term (haven't coded it yet, but
intending to),
- * more like things we've chosen to defer intentionally to some future
release.
- */
- def subset(testThatWillThrowIfFalse: Boolean, msg: String, args: Any*) = {
- if (!testThatWillThrowIfFalse) subsetError(msg, args: _*)
- }
-
- def subsetError(msg: String, args: Any*) = {
- val msgTxt = msg.format(args: _*)
- SDE("Subset: " + msgTxt)
- }
-
}
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/testUnparserGeneral.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/testUnparserGeneral.tdml
index f4dee13dc..2b4691fe4 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/testUnparserGeneral.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section00/general/testUnparserGeneral.tdml
@@ -20,6 +20,7 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:ex="http://example.com"
xmlns:daf="urn:ogf:dfdl:2013:imp:daffodil.apache.org:2018:ext"
+ xmlns:fn="http://www.w3.org/2005/xpath-functions"
suiteName="generalUnparserTests">
<tdml:defineSchema name="fixedLengthStrings">
@@ -40,6 +41,42 @@
</xs:complexType>
</xs:element>
+ <xs:element name="e5">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="e1" dfdl:lengthKind="explicit" dfdl:length="5"
type="xs:string" dfdl:truncateSpecifiedLengthString="yes"
dfdl:encoding="UTF-8"/>
+ <xs:element name="e2" type="xs:int" dfdl:lengthKind="explicit"
dfdl:length="1" dfdl:outputValueCalc="{fn:string-length(../ex:e1)}"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="e6">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="e1" dfdl:lengthKind="explicit" dfdl:length="5"
type="xs:string" dfdl:truncateSpecifiedLengthString="yes" dfdl:encoding="UTF-8"
dfdl:lengthUnits="characters"/>
+ <xs:element name="e2" type="xs:int" dfdl:lengthKind="explicit"
dfdl:length="1" dfdl:outputValueCalc="{fn:string-length(../ex:e1)}"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="e7">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="e1" dfdl:lengthKind="explicit" dfdl:length="5"
type="xs:string" dfdl:truncateSpecifiedLengthString="yes" />
+ <xs:element name="e2" type="xs:int" dfdl:lengthKind="explicit"
dfdl:length="1" dfdl:outputValueCalc="{fn:string-length(../ex:e1)}"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
+ <xs:element name="e8">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="e1" dfdl:lengthKind="explicit" dfdl:length="5"
type="xs:string" dfdl:truncateSpecifiedLengthString="no" />
+ <xs:element name="e2" type="xs:int" dfdl:lengthKind="explicit"
dfdl:length="1" dfdl:outputValueCalc="{fn:string-length(../ex:e1)}"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
</tdml:defineSchema>
<tdml:defineSchema name="illegalChars">
@@ -56,6 +93,14 @@
<xs:element name="e1" dfdl:lengthKind="delimited" type="xs:string"/>
+ <xs:element name="e2" dfdl:lengthUnits="characters"
dfdl:lengthKind="explicit" dfdl:length="3">
+ <xs:complexType>
+ <xs:sequence>
+ <xs:element name="e" dfdl:lengthKind="delimited" type="xs:string"/>
+ </xs:sequence>
+ </xs:complexType>
+ </xs:element>
+
</tdml:defineSchema>
<!--
@@ -202,6 +247,25 @@
</tdml:parserTestCase>
+ <tdml:unparserTestCase name="variableWidthComplexType" root="e2"
model="utf8Chars">
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <ex:e2>
+ <ex:e></ex:e>
+ </ex:e2>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+
+ <tdml:errors>
+ <tdml:error>Runtime Schema Definition Error</tdml:error>
+ <tdml:error>Variable width character</tdml:error>
+ <tdml:error>lengthKind 'explicit'</tdml:error>
+ <tdml:error>lengthUnits 'characters'</tdml:error>
+ <tdml:error>not supported</tdml:error>
+ <tdml:error>complex types</tdml:error>
+ </tdml:errors>
+ </tdml:unparserTestCase>
+
<!--
Test Name: unparseFixedLengthString01
Schema: fixedLengthStrings
@@ -296,6 +360,69 @@
</tdml:unparserTestCase>
+ <tdml:unparserTestCase name="unparseFixedLengthString04" root="e5"
model="fixedLengthStrings" roundTrip="false">
+
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <ex:e5>
+ <ex:e1>Hellow</ex:e1>
+ <ex:e2></ex:e2>
+ </ex:e5>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+
+ <tdml:document>Hello6</tdml:document>
+
+ </tdml:unparserTestCase>
+
+ <tdml:unparserTestCase name="unparseFixedLengthString05" root="e6"
model="fixedLengthStrings" roundTrip="false">
+
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <ex:e6>
+ <ex:e1>Hellow</ex:e1>
+ <ex:e2></ex:e2>
+ </ex:e6>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+
+ <tdml:document>Hello6</tdml:document>
+
+ </tdml:unparserTestCase>
+
+ <tdml:unparserTestCase name="unparseFixedLengthString06" root="e7"
model="fixedLengthStrings" roundTrip="false">
+
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <ex:e7>
+ <ex:e1>Hellow</ex:e1>
+ <ex:e2></ex:e2>
+ </ex:e7>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+
+ <tdml:document>Hello6</tdml:document>
+
+ </tdml:unparserTestCase>
+
+ <tdml:unparserTestCase name="unparseFixedLengthString07" root="e8"
model="fixedLengthStrings" roundTrip="false">
+
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <ex:e8>
+ <ex:e1>Hellow</ex:e1>
+ <ex:e2></ex:e2>
+ </ex:e8>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ <tdml:errors>
+ <tdml:error>Unparse Error</tdml:error>
+ <tdml:error>data too long</tdml:error>
+ <tdml:error>unable to truncate</tdml:error>
+ </tdml:errors>
+
+ </tdml:unparserTestCase>
+
<!--
Test Name: negativeUnparseTest01
Schema: fixedLengthStrings
diff --git
a/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestUnparserGeneral.scala
b/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestUnparserGeneral.scala
index 9dd4de267..f0d40c8c1 100644
---
a/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestUnparserGeneral.scala
+++
b/daffodil-test/src/test/scala/org/apache/daffodil/section00/general/TestUnparserGeneral.scala
@@ -42,6 +42,9 @@ class TestUnparserGeneral {
@Test def test_puaPreexistingInfosetChars_remapped(): Unit = {
runner.runOneTest("puaPreexistingInfosetChars_remapped")
}
+ @Test def test_variableWidthComplexType(): Unit = {
+ runner.runOneTest("variableWidthComplexType")
+ }
@Test def test_puaInfosetChars_CR_CRLF_01(): Unit = {
runner.runOneTest("puaInfosetChars_CR_CRLF_01")
@@ -63,6 +66,18 @@ class TestUnparserGeneral {
@Test def test_unparseFixedLengthString03(): Unit = {
runner.runOneTest("unparseFixedLengthString03")
}
+ @Test def test_unparseFixedLengthString04(): Unit = {
+ runner.runOneTest("unparseFixedLengthString04")
+ }
+ @Test def test_unparseFixedLengthString05(): Unit = {
+ runner.runOneTest("unparseFixedLengthString05")
+ }
+ @Test def test_unparseFixedLengthString06(): Unit = {
+ runner.runOneTest("unparseFixedLengthString06")
+ }
+ @Test def test_unparseFixedLengthString07(): Unit = {
+ runner.runOneTest("unparseFixedLengthString07")
+ }
@Test def test_parseFixedLengthString01(): Unit = {
runner.runOneTest("parseFixedLengthString01")