This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new b5d0ec727 Allow empty leading/trailing whitespace for list-of-strings
properties
b5d0ec727 is described below
commit b5d0ec7278cfe42dcf706ba4970749b065de6f8d
Author: Steve Lawrence <[email protected]>
AuthorDate: Fri Oct 27 14:04:51 2023 -0400
Allow empty leading/trailing whitespace for list-of-strings properties
Currently we SDE if a property that accepts of list of DFDL string
literals starts or end with whitespace. This removes that restriction in
the list cooker by splitting on whitespace and removing any strings that
are the empty string.
This has one side effect that the resulting list can now be
empty--previously an empty property would have a single element with the
empty string. An empty list is easier to reason about and detect than a
list with a single zero-length string, but it does require changing the
logic in a few places.
DAFFODIL-2858
---
.../org/apache/daffodil/lib/cookers/Cookers.scala | 5 +-
.../daffodil/lib/cookers/EntityReplacer.scala | 53 ++++++++++------------
.../daffodil/extensions/repType/repType.tdml | 33 ++++++++++++++
.../delimiter_properties/DelimiterProperties.tdml | 36 ++++++++++-----
.../DelimiterPropertiesUnparse.tdml | 8 +---
.../daffodil/section13/nillable/nillable.tdml | 7 +--
.../text_number_props/TextNumberProps.tdml | 2 +-
.../section23/dfdl_expressions/expressions.tdml | 36 +++++++--------
.../runtime_properties/dynamicSeparator.tdml | 3 +-
.../apache/daffodil/extensions/TestRepType.scala | 8 ++++
10 files changed, 117 insertions(+), 74 deletions(-)
diff --git
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/Cookers.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/Cookers.scala
index 4a22f9d1e..fcadb5d27 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/Cookers.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/Cookers.scala
@@ -53,7 +53,9 @@ object NilValueLiteralValueBinaryCooker
object NilValueLiteralValueTextCooker extends
NonEmptyListOfStringLiteral("nilValue", true)
-object NilValueRawListCooker extends ListOfStringLiteral("nilValue", true)
+object NilValueRawListCooker
+ extends ListOfStringLiteral("nilValue", true)
+ with ListOfStringOneOrMoreLiteral
object EscapeCharacterCooker extends
SingleCharacterLiteralNoCharClassEntitiesNoByteEntities()
@@ -79,6 +81,7 @@ object SeparatorCooker extends
DelimiterCookerNoES("separator")
object TextStandardDecimalSeparatorCooker
extends ListOfSingleCharacterLiteralNoCharClassEntitiesNoByteEntities()
+ with ListOfStringOneOrMoreLiteral
object TextStandardGroupingSeparatorCooker
extends SingleCharacterLiteralNoCharClassEntitiesNoByteEntities()
diff --git
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/EntityReplacer.scala
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/EntityReplacer.scala
index 67888ddcf..29ba77236 100755
---
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/EntityReplacer.scala
+++
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/EntityReplacer.scala
@@ -722,20 +722,9 @@ sealed abstract class ListOfStringLiteralBase(
private lazy val olc = oneLiteralCooker
protected def cook(raw: String, context: ThrowsSDE, forUnparse: Boolean):
List[String] = {
- if (raw.length != 0 && (raw.head.isWhitespace || raw.last.isWhitespace)) {
- val ws = if (raw.head.isWhitespace) raw.head else raw.last
- val wsVisible = Misc.remapCodepointToVisibleGlyph(ws.toChar).toChar
- val hexCodePoint = "%04x".format(ws.toInt)
- context.SDE(
- "The property '%s' cannot start or end with the string \"%s\"(Unicode
hex code point U+%s), or consist entirely of whitespace."
- + "\nDid you mean to use character entities like '%%SP;' or '%%NL;'
to indicate whitespace in the data format instead?",
- propName,
- wsVisible,
- hexCodePoint,
- )
- }
- val rawList = raw.split("\\s+").toList
+ // ignore leading, trailing, and repeating whitespae
+ val rawList = raw.split("\\s").filterNot(_ == "").toList
val cooked = {
val cookedList: ListBuffer[String] = ListBuffer.empty
@@ -774,18 +763,15 @@ class NonEmptyListOfStringLiteral(pn: String,
allowByteEntities: Boolean)
override def testCooked(cookedList: List[String], context: ThrowsSDE) = {
context.schemaDefinitionUnless(
- cookedList.exists { _.length > 0 },
- "Property dfdl:%s cannot be empty string. Use dfdl:nilValue='%%ES;' for
empty string as nil value.",
+ cookedList.length > 0,
+ "Property dfdl:%s cannot be empty string. Use dfdl:%s='%%ES;' for empty
string.",
+ propName,
propName,
)
}
}
-class ListOfString1OrMoreLiteral(pn: String, allowByteEntities: Boolean)
- extends ListOfStringLiteralBase(pn, allowByteEntities) {
-
- override protected val oneLiteralCooker: StringLiteralBase =
- new StringLiteral(propName, allowByteEntities)
+trait ListOfStringOneOrMoreLiteral { self: ListOfStringLiteralBase =>
override protected def testCooked(cooked: List[String], context: ThrowsSDE):
Unit = {
context.schemaDefinitionUnless(
@@ -889,16 +875,17 @@ class
NonEmptyListOfStringLiteralCharClass_ES_WithByteEntities(pn: String)
override def testCooked(cookedList: List[String], context: ThrowsSDE) = {
context.schemaDefinitionUnless(
- cookedList.exists { _.length > 0 },
- "Property dfdl:%s cannot be empty string. Use dfdl:nilValue='%%ES;' for
empty string as nil value.",
+ cookedList.length > 0,
+ "Property dfdl:%s cannot be empty string. Use dfdl:%s='%%ES;' for empty
string.",
+ propName,
propName,
)
}
}
-class DelimiterCookerNoES(pn: String) extends ListOfString1OrMoreLiteral(pn,
true) {
+class DelimiterCookerNoES(pn: String) extends DelimiterCooker(pn) {
- override val oneLiteralCooker: StringLiteralBase =
+ override def oneDelimiterLiteralCooker: StringLiteralBase =
new StringLiteralNoCharClassEntities(propName, true) with
DisallowedCharClassEntitiesMixin {
// Disallow "%ES" in the string raw. Disallow "%WSP*" when it is
@@ -915,9 +902,9 @@ class DelimiterCookerNoES(pn: String) extends
ListOfString1OrMoreLiteral(pn, tru
}
}
-class DelimiterCookerNoSoleES(pn: String) extends
ListOfString1OrMoreLiteral(pn, true) {
+class DelimiterCookerNoSoleES(pn: String) extends DelimiterCooker(pn) {
- override val oneLiteralCooker: StringLiteralBase =
+ override def oneDelimiterLiteralCooker: StringLiteralBase =
new StringLiteralBase(propName, true) {
override def testRaw(raw: String, context: ThrowsSDE): Unit = {
@@ -937,8 +924,18 @@ class DelimiterCookerNoSoleES(pn: String) extends
ListOfString1OrMoreLiteral(pn,
}
class DelimiterCooker(pn: String) extends ListOfStringLiteralBase(pn, true) {
- private val constantCooker = new ListOfStringLiteral(propName, true) // zero
length allowed
- private val runtimeCooker = new ListOfString1OrMoreLiteral(propName, true)
+
+ def oneDelimiterLiteralCooker: StringLiteralBase = new StringLiteral(pn,
true)
+
+ // zero length allowed for constants
+ private val constantCooker = new ListOfStringLiteral(propName, true) {
+ override val oneLiteralCooker = oneDelimiterLiteralCooker
+ }
+
+ private val runtimeCooker = new ListOfStringLiteral(propName, true)
+ with ListOfStringOneOrMoreLiteral {
+ override val oneLiteralCooker = oneDelimiterLiteralCooker
+ }
override def convertRuntime(
b: String,
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/extensions/repType/repType.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/extensions/repType/repType.tdml
index 3f6be0393..b1b21f8b4 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/extensions/repType/repType.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/extensions/repType/repType.tdml
@@ -69,6 +69,16 @@
</xs:restriction>
</xs:simpleType>
+ <xs:element name="repValuesWithSpaces" dfdlx:repType="tns:uint8">
+ <xs:simpleType>
+ <xs:restriction base="xs:string">
+ <xs:enumeration value="zero" dfdlx:repValues=" 0 " />
+ <xs:enumeration value="one" dfdlx:repValues=" 1 " />
+ <xs:enumeration value="more" dfdlx:repValues=" 2 3 4 "
dfdlx:repValueRanges=" 5 100 101 200 " />
+ </xs:restriction>
+ </xs:simpleType>
+ </xs:element>
+
</tdml:defineSchema>
<tdml:parserTestCase name="repType_keysetValue_00"
@@ -197,6 +207,29 @@
</tdml:infoset>
</tdml:unparserTestCase>
+ <tdml:parserTestCase name="repValuesWithSpaces_01"
root="repValuesWithSpaces" model="repType-Embedded.dfdl.xsd">
+ <tdml:document>
+ <tdml:documentPart type="byte">01</tdml:documentPart>
+ </tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <repValuesWithSpaces>one</repValuesWithSpaces>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ </tdml:parserTestCase>
+
+ <tdml:parserTestCase name="repValuesWithSpaces_02"
root="repValuesWithSpaces" model="repType-Embedded.dfdl.xsd">
+ <tdml:document>
+ <tdml:documentPart type="byte">02</tdml:documentPart>
+ </tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <repValuesWithSpaces>more</repValuesWithSpaces>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ </tdml:parserTestCase>
+
+
<tdml:defineSchema name="repType-Inherited.dfdl.xsd">
<xs:include
schemaLocation="/org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
index 1a6c94fba..471126389 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
@@ -388,32 +388,44 @@
<tdml:parserTestCase name="DelimProp_10_02" root="DP_04_02"
model="DelimiterProperties-Embedded.dfdl.xsd"
- description="Section 12 property delimiters -use of empty space as
terminator - DFDL-12-033R Should fail"
+ description="Section 12 property delimiters - terminator starts with space
- DFDL-12-033R Should pass, leading space ignored"
roundTrip="twoPass">
<tdml:document><![CDATA[.00300 -02.75
3.9900 7.33
]]></tdml:document>
- <tdml:errors>
- <tdml:error>Schema Definition Error</tdml:error>
- <tdml:error>cannot start or end</tdml:error>
- <tdml:error>'%SP;'</tdml:error>
- </tdml:errors>
+ <tdml:infoset>
+ <tdml:dfdlInfoset xmlns:xs="http://www.w3.org/2001/XMLSchema"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <DP_04_02>
+ <s1>0.003</s1>
+ <s1>-2.75</s1>
+ <s1>3.99</s1>
+ <s1>7.33</s1>
+ </DP_04_02>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
</tdml:parserTestCase>
<tdml:parserTestCase name="DelimProp_10_03" root="DP_04_03"
model="DelimiterProperties-Embedded.dfdl.xsd"
- description="Section 12 property delimiters -use of empty space as
terminator - DFDL-12-033R Should fail"
+ description="Section 12 property delimiters - terminator ends with space -
DFDL-12-033R Should pass, trailing space ignored"
roundTrip="twoPass">
<tdml:document><![CDATA[.00300 -02.75
3.9900 7.33
]]></tdml:document>
- <tdml:errors>
- <tdml:error>Schema Definition Error</tdml:error>
- <tdml:error>cannot start or end</tdml:error>
- <tdml:error>'%SP;'</tdml:error>
- </tdml:errors>
+ <tdml:infoset>
+ <tdml:dfdlInfoset xmlns:xs="http://www.w3.org/2001/XMLSchema"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+ <DP_04_03>
+ <s1>0.003</s1>
+ <s1>-2.75</s1>
+ <s1>3.99</s1>
+ <s1>7.33</s1>
+ </DP_04_03>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
</tdml:parserTestCase>
<tdml:parserTestCase name="OptionalWSPTermWithExplicitLength" root="e1"
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterPropertiesUnparse.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterPropertiesUnparse.tdml
index f5b06f618..deffeaf11 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterPropertiesUnparse.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterPropertiesUnparse.tdml
@@ -342,7 +342,7 @@
Test Name: unparseSeparatorLeadingSpace
Schema: delimitedStrings
Root: e15
- Purpose: This test demonstrates the scenario where the list of
separators has a leading space
+ Purpose: This test demonstrates the scenario where the list of
separators has a leading space, the leading space should be ignored
-->
<tdml:unparserTestCase name="unparseSeparatorLeadingSpace"
model="delimitedStrings" root="e15" roundTrip="true">
@@ -354,11 +354,7 @@
</ex:e15>
</tdml:dfdlInfoset>
</tdml:infoset>
-
- <tdml:errors>
- <tdml:error>Schema Definition Error</tdml:error>
- </tdml:errors>
-
+ <tdml:document>,apple+.|banana1</tdml:document>
</tdml:unparserTestCase>
<!--
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/nillable/nillable.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/nillable/nillable.tdml
index 7a41b6fd7..520a7d44d 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/nillable/nillable.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/nillable/nillable.tdml
@@ -229,13 +229,8 @@
</tdml:document>
<tdml:errors>
<tdml:error>Schema Definition Error</tdml:error>
- <tdml:error>nillable</tdml:error>
- <tdml:error>complex</tdml:error>
- <tdml:error>elements</tdml:error>
- <tdml:error>only</tdml:error>
- <tdml:error>ES</tdml:error>
<tdml:error>nilValue</tdml:error>
- <tdml:error>property</tdml:error>
+ <tdml:error>cannot be empty</tdml:error>
</tdml:errors>
</tdml:parserTestCase>
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
index 272d3f623..cac85281f 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
@@ -1390,7 +1390,7 @@
<tdml:errors>
<tdml:error>Schema Definition Error</tdml:error>
<tdml:error>textStandardDecimalSeparator</tdml:error>
- <tdml:error>exactly 1</tdml:error>
+ <tdml:error>cannot be empty</tdml:error>
</tdml:errors>
</tdml:parserTestCase>
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml
index f0c514c83..6ca8c3621 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml
@@ -1493,9 +1493,8 @@
<xs:complexType>
<xs:sequence dfdl:separator=",">
<xs:element name="value" type="xs:string"
dfdl:lengthKind="delimited" dfdl:initiator="{'
-3
-2
-1
+321
+blastoff
' }"/>
</xs:sequence>
</xs:complexType>
@@ -1956,20 +1955,20 @@
<!--
Test Name: internal_space_preserved4
Schema: expressions-Embedded.dfdl.xsd
- Purpose: This test demonstrates that in order to use newlines in an
expression and
- have them preserved, one must use dfdl:property.
+ Purpose: This test demonstrates that in multiple spaces are allowed
using dfdl:property
-->
<tdml:parserTestCase name="internal_space_preserved4" root="expr_space5b"
model="expressions-Embedded.dfdl.xsd" description="">
<tdml:document><![CDATA[1blastoff]]></tdml:document>
- <tdml:errors>
- <tdml:error>Schema Definition Error</tdml:error>
- <tdml:error>property 'initiator'</tdml:error>
- <tdml:error>cannot start or end with the string</tdml:error>
- <tdml:error>U+0020</tdml:error>
- </tdml:errors>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <expr_space5b>
+ <value>blastoff</value>
+ </expr_space5b>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
</tdml:parserTestCase>
<!--
@@ -2001,7 +2000,7 @@
Test Name: internal_space_not_preserved2
Schema: expressions-Embedded.dfdl.xsd
Purpose: This test demonstrates that when expressions appear in
attribute values
- as in short form dfdl:inputValueCalc, that newlines are not preserved.
+ as in short form dfdl:inputValueCalc, that newlines are not preserved
and are treated as spaces.
This is unavoidable in XML. One must use dfdl:property for expressions
where
whitespace is significant.
-->
@@ -2010,12 +2009,13 @@
model="expressions-Embedded.dfdl.xsd" description="">
<tdml:document><![CDATA[321blastoff]]></tdml:document>
- <tdml:errors>
- <tdml:error>Schema Definition Error</tdml:error>
- <tdml:error>property 'initiator'</tdml:error>
- <tdml:error>cannot start or end with the string</tdml:error>
- <tdml:error>U+0020</tdml:error>
- </tdml:errors>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <expr_space5>
+ <value>blastoff</value>
+ </expr_space5>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
</tdml:parserTestCase>
<tdml:defineSchema name="expression-type-errors.dfdl.xsd">
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/section23/runtime_properties/dynamicSeparator.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/section23/runtime_properties/dynamicSeparator.tdml
index dd9a6e866..ca20f72ca 100644
---
a/daffodil-test/src/test/resources/org/apache/daffodil/section23/runtime_properties/dynamicSeparator.tdml
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/section23/runtime_properties/dynamicSeparator.tdml
@@ -55,8 +55,7 @@
</tdml:document>
<tdml:errors>
<tdml:error>Schema Definition Error</tdml:error><!-- at runtime -->
- <tdml:error>The property 'separator'</tdml:error>
- <tdml:error>U+000a</tdml:error>
+ <tdml:error>Property separator cannot be empty string</tdml:error>
</tdml:errors>
</tdml:parserTestCase>
diff --git
a/daffodil-test/src/test/scala/org/apache/daffodil/extensions/TestRepType.scala
b/daffodil-test/src/test/scala/org/apache/daffodil/extensions/TestRepType.scala
index d232a6a6d..bf5fe5abb 100644
---
a/daffodil-test/src/test/scala/org/apache/daffodil/extensions/TestRepType.scala
+++
b/daffodil-test/src/test/scala/org/apache/daffodil/extensions/TestRepType.scala
@@ -107,4 +107,12 @@ class TestRepType {
@Test def test_repType_different_namespaces_01(): Unit = {
runner.runOneTest("repType_different_namespaces_01")
}
+
+ @Test def test_repValuesWithSpaces_01(): Unit = {
+ runner.runOneTest("repValuesWithSpaces_01")
+ }
+
+ @Test def test_repValuesWithSpaces_02(): Unit = {
+ runner.runOneTest("repValuesWithSpaces_02")
+ }
}