This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new b5d0ec727 Allow empty leading/trailing whitespace for list-of-strings 
properties
b5d0ec727 is described below

commit b5d0ec7278cfe42dcf706ba4970749b065de6f8d
Author: Steve Lawrence <[email protected]>
AuthorDate: Fri Oct 27 14:04:51 2023 -0400

    Allow empty leading/trailing whitespace for list-of-strings properties
    
    Currently we SDE if a property that accepts of list of DFDL string
    literals starts or end with whitespace. This removes that restriction in
    the list cooker by splitting on whitespace and removing any strings that
    are the empty string.
    
    This has one side effect that the resulting list can now be
    empty--previously an empty property would have a single element with the
    empty string. An empty list is easier to reason about and detect than a
    list with a single zero-length string, but it does require changing the
    logic in a few places.
    
    DAFFODIL-2858
---
 .../org/apache/daffodil/lib/cookers/Cookers.scala  |  5 +-
 .../daffodil/lib/cookers/EntityReplacer.scala      | 53 ++++++++++------------
 .../daffodil/extensions/repType/repType.tdml       | 33 ++++++++++++++
 .../delimiter_properties/DelimiterProperties.tdml  | 36 ++++++++++-----
 .../DelimiterPropertiesUnparse.tdml                |  8 +---
 .../daffodil/section13/nillable/nillable.tdml      |  7 +--
 .../text_number_props/TextNumberProps.tdml         |  2 +-
 .../section23/dfdl_expressions/expressions.tdml    | 36 +++++++--------
 .../runtime_properties/dynamicSeparator.tdml       |  3 +-
 .../apache/daffodil/extensions/TestRepType.scala   |  8 ++++
 10 files changed, 117 insertions(+), 74 deletions(-)

diff --git 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/Cookers.scala 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/Cookers.scala
index 4a22f9d1e..fcadb5d27 100644
--- a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/Cookers.scala
+++ b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/Cookers.scala
@@ -53,7 +53,9 @@ object NilValueLiteralValueBinaryCooker
 
 object NilValueLiteralValueTextCooker extends 
NonEmptyListOfStringLiteral("nilValue", true)
 
-object NilValueRawListCooker extends ListOfStringLiteral("nilValue", true)
+object NilValueRawListCooker
+  extends ListOfStringLiteral("nilValue", true)
+  with ListOfStringOneOrMoreLiteral
 
 object EscapeCharacterCooker extends 
SingleCharacterLiteralNoCharClassEntitiesNoByteEntities()
 
@@ -79,6 +81,7 @@ object SeparatorCooker extends 
DelimiterCookerNoES("separator")
 
 object TextStandardDecimalSeparatorCooker
   extends ListOfSingleCharacterLiteralNoCharClassEntitiesNoByteEntities()
+  with ListOfStringOneOrMoreLiteral
 
 object TextStandardGroupingSeparatorCooker
   extends SingleCharacterLiteralNoCharClassEntitiesNoByteEntities()
diff --git 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/EntityReplacer.scala
 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/EntityReplacer.scala
index 67888ddcf..29ba77236 100755
--- 
a/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/EntityReplacer.scala
+++ 
b/daffodil-lib/src/main/scala/org/apache/daffodil/lib/cookers/EntityReplacer.scala
@@ -722,20 +722,9 @@ sealed abstract class ListOfStringLiteralBase(
   private lazy val olc = oneLiteralCooker
 
   protected def cook(raw: String, context: ThrowsSDE, forUnparse: Boolean): 
List[String] = {
-    if (raw.length != 0 && (raw.head.isWhitespace || raw.last.isWhitespace)) {
-      val ws = if (raw.head.isWhitespace) raw.head else raw.last
-      val wsVisible = Misc.remapCodepointToVisibleGlyph(ws.toChar).toChar
-      val hexCodePoint = "%04x".format(ws.toInt)
-      context.SDE(
-        "The property '%s' cannot start or end with the string \"%s\"(Unicode 
hex code point U+%s), or consist entirely of whitespace."
-          + "\nDid you mean to use character entities like '%%SP;' or '%%NL;' 
to indicate whitespace in the data format instead?",
-        propName,
-        wsVisible,
-        hexCodePoint,
-      )
-    }
 
-    val rawList = raw.split("\\s+").toList
+    // ignore leading, trailing, and repeating whitespae
+    val rawList = raw.split("\\s").filterNot(_ == "").toList
 
     val cooked = {
       val cookedList: ListBuffer[String] = ListBuffer.empty
@@ -774,18 +763,15 @@ class NonEmptyListOfStringLiteral(pn: String, 
allowByteEntities: Boolean)
 
   override def testCooked(cookedList: List[String], context: ThrowsSDE) = {
     context.schemaDefinitionUnless(
-      cookedList.exists { _.length > 0 },
-      "Property dfdl:%s cannot be empty string. Use dfdl:nilValue='%%ES;' for 
empty string as nil value.",
+      cookedList.length > 0,
+      "Property dfdl:%s cannot be empty string. Use dfdl:%s='%%ES;' for empty 
string.",
+      propName,
       propName,
     )
   }
 }
 
-class ListOfString1OrMoreLiteral(pn: String, allowByteEntities: Boolean)
-  extends ListOfStringLiteralBase(pn, allowByteEntities) {
-
-  override protected val oneLiteralCooker: StringLiteralBase =
-    new StringLiteral(propName, allowByteEntities)
+trait ListOfStringOneOrMoreLiteral { self: ListOfStringLiteralBase =>
 
   override protected def testCooked(cooked: List[String], context: ThrowsSDE): 
Unit = {
     context.schemaDefinitionUnless(
@@ -889,16 +875,17 @@ class 
NonEmptyListOfStringLiteralCharClass_ES_WithByteEntities(pn: String)
 
   override def testCooked(cookedList: List[String], context: ThrowsSDE) = {
     context.schemaDefinitionUnless(
-      cookedList.exists { _.length > 0 },
-      "Property dfdl:%s cannot be empty string. Use dfdl:nilValue='%%ES;' for 
empty string as nil value.",
+      cookedList.length > 0,
+      "Property dfdl:%s cannot be empty string. Use dfdl:%s='%%ES;' for empty 
string.",
+      propName,
       propName,
     )
   }
 }
 
-class DelimiterCookerNoES(pn: String) extends ListOfString1OrMoreLiteral(pn, 
true) {
+class DelimiterCookerNoES(pn: String) extends DelimiterCooker(pn) {
 
-  override val oneLiteralCooker: StringLiteralBase =
+  override def oneDelimiterLiteralCooker: StringLiteralBase =
     new StringLiteralNoCharClassEntities(propName, true) with 
DisallowedCharClassEntitiesMixin {
 
       // Disallow "%ES" in the string raw. Disallow "%WSP*" when it is
@@ -915,9 +902,9 @@ class DelimiterCookerNoES(pn: String) extends 
ListOfString1OrMoreLiteral(pn, tru
     }
 }
 
-class DelimiterCookerNoSoleES(pn: String) extends 
ListOfString1OrMoreLiteral(pn, true) {
+class DelimiterCookerNoSoleES(pn: String) extends DelimiterCooker(pn) {
 
-  override val oneLiteralCooker: StringLiteralBase =
+  override def oneDelimiterLiteralCooker: StringLiteralBase =
     new StringLiteralBase(propName, true) {
 
       override def testRaw(raw: String, context: ThrowsSDE): Unit = {
@@ -937,8 +924,18 @@ class DelimiterCookerNoSoleES(pn: String) extends 
ListOfString1OrMoreLiteral(pn,
 }
 
 class DelimiterCooker(pn: String) extends ListOfStringLiteralBase(pn, true) {
-  private val constantCooker = new ListOfStringLiteral(propName, true) // zero 
length allowed
-  private val runtimeCooker = new ListOfString1OrMoreLiteral(propName, true)
+
+  def oneDelimiterLiteralCooker: StringLiteralBase = new StringLiteral(pn, 
true)
+
+  // zero length allowed for constants
+  private val constantCooker = new ListOfStringLiteral(propName, true) {
+    override val oneLiteralCooker = oneDelimiterLiteralCooker
+  }
+
+  private val runtimeCooker = new ListOfStringLiteral(propName, true)
+    with ListOfStringOneOrMoreLiteral {
+    override val oneLiteralCooker = oneDelimiterLiteralCooker
+  }
 
   override def convertRuntime(
     b: String,
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/extensions/repType/repType.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/extensions/repType/repType.tdml
index 3f6be0393..b1b21f8b4 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/extensions/repType/repType.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/extensions/repType/repType.tdml
@@ -69,6 +69,16 @@
       </xs:restriction>
     </xs:simpleType>
 
+    <xs:element name="repValuesWithSpaces" dfdlx:repType="tns:uint8">
+      <xs:simpleType>
+        <xs:restriction base="xs:string">
+          <xs:enumeration value="zero" dfdlx:repValues="  0  " />
+          <xs:enumeration value="one" dfdlx:repValues="  1  " />
+          <xs:enumeration value="more" dfdlx:repValues="  2   3   4  " 
dfdlx:repValueRanges="  5   100  101  200  " />
+        </xs:restriction>
+      </xs:simpleType>
+    </xs:element>
+
   </tdml:defineSchema>
 
   <tdml:parserTestCase name="repType_keysetValue_00"
@@ -197,6 +207,29 @@
     </tdml:infoset>
   </tdml:unparserTestCase>
 
+  <tdml:parserTestCase name="repValuesWithSpaces_01" 
root="repValuesWithSpaces" model="repType-Embedded.dfdl.xsd">
+    <tdml:document>
+      <tdml:documentPart type="byte">01</tdml:documentPart>
+    </tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <repValuesWithSpaces>one</repValuesWithSpaces>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
+  <tdml:parserTestCase name="repValuesWithSpaces_02" 
root="repValuesWithSpaces" model="repType-Embedded.dfdl.xsd">
+    <tdml:document>
+      <tdml:documentPart type="byte">02</tdml:documentPart>
+    </tdml:document>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <repValuesWithSpaces>more</repValuesWithSpaces>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
+  </tdml:parserTestCase>
+
+
   <tdml:defineSchema name="repType-Inherited.dfdl.xsd">
 
     <xs:include 
schemaLocation="/org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
index 1a6c94fba..471126389 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterProperties.tdml
@@ -388,32 +388,44 @@
 
   <tdml:parserTestCase name="DelimProp_10_02" root="DP_04_02"
     model="DelimiterProperties-Embedded.dfdl.xsd"
-    description="Section 12 property delimiters -use of empty space as 
terminator - DFDL-12-033R Should fail"
+    description="Section 12 property delimiters - terminator starts with space 
- DFDL-12-033R Should pass, leading space ignored"
     roundTrip="twoPass">
 
     <tdml:document><![CDATA[.00300 -02.75
       3.9900 7.33
     ]]></tdml:document>
-    <tdml:errors>
-      <tdml:error>Schema Definition Error</tdml:error>
-      <tdml:error>cannot start or end</tdml:error>
-      <tdml:error>'%SP;'</tdml:error>
-    </tdml:errors>
+    <tdml:infoset>
+      <tdml:dfdlInfoset xmlns:xs="http://www.w3.org/2001/XMLSchema";
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";>
+        <DP_04_02>
+          <s1>0.003</s1>
+          <s1>-2.75</s1>
+          <s1>3.99</s1>
+          <s1>7.33</s1>
+        </DP_04_02>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
   </tdml:parserTestCase>
 
   <tdml:parserTestCase name="DelimProp_10_03" root="DP_04_03"
     model="DelimiterProperties-Embedded.dfdl.xsd"
-    description="Section 12 property delimiters -use of empty space as 
terminator - DFDL-12-033R Should fail"
+    description="Section 12 property delimiters - terminator ends with space - 
DFDL-12-033R Should pass, trailing space ignored"
     roundTrip="twoPass">
 
     <tdml:document><![CDATA[.00300 -02.75
       3.9900 7.33
     ]]></tdml:document>
-    <tdml:errors>
-      <tdml:error>Schema Definition Error</tdml:error>
-      <tdml:error>cannot start or end</tdml:error>
-      <tdml:error>'%SP;'</tdml:error>
-    </tdml:errors>
+    <tdml:infoset>
+      <tdml:dfdlInfoset xmlns:xs="http://www.w3.org/2001/XMLSchema";
+        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";>
+        <DP_04_03>
+          <s1>0.003</s1>
+          <s1>-2.75</s1>
+          <s1>3.99</s1>
+          <s1>7.33</s1>
+        </DP_04_03>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
   </tdml:parserTestCase>
 
   <tdml:parserTestCase name="OptionalWSPTermWithExplicitLength" root="e1"
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterPropertiesUnparse.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterPropertiesUnparse.tdml
index f5b06f618..deffeaf11 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterPropertiesUnparse.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/section12/delimiter_properties/DelimiterPropertiesUnparse.tdml
@@ -342,7 +342,7 @@
       Test Name: unparseSeparatorLeadingSpace
       Schema: delimitedStrings
       Root: e15
-      Purpose: This test demonstrates the scenario where the list of 
separators has a leading space
+      Purpose: This test demonstrates the scenario where the list of 
separators has a leading space, the leading space should be ignored
 -->
 
   <tdml:unparserTestCase name="unparseSeparatorLeadingSpace" 
model="delimitedStrings" root="e15" roundTrip="true">
@@ -354,11 +354,7 @@
         </ex:e15>
       </tdml:dfdlInfoset>
     </tdml:infoset>
-
-    <tdml:errors>
-      <tdml:error>Schema Definition Error</tdml:error>
-    </tdml:errors>
-
+    <tdml:document>,apple+.|banana1</tdml:document>
   </tdml:unparserTestCase>
 
 <!--
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/nillable/nillable.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/nillable/nillable.tdml
index 7a41b6fd7..520a7d44d 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/nillable/nillable.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/nillable/nillable.tdml
@@ -229,13 +229,8 @@
     </tdml:document>
     <tdml:errors>
       <tdml:error>Schema Definition Error</tdml:error>
-      <tdml:error>nillable</tdml:error>
-      <tdml:error>complex</tdml:error>
-      <tdml:error>elements</tdml:error>
-      <tdml:error>only</tdml:error>
-      <tdml:error>ES</tdml:error>
       <tdml:error>nilValue</tdml:error>
-      <tdml:error>property</tdml:error>
+      <tdml:error>cannot be empty</tdml:error>
     </tdml:errors>
   </tdml:parserTestCase>
 
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
index 272d3f623..cac85281f 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/section13/text_number_props/TextNumberProps.tdml
@@ -1390,7 +1390,7 @@
     <tdml:errors>
       <tdml:error>Schema Definition Error</tdml:error>
       <tdml:error>textStandardDecimalSeparator</tdml:error>
-      <tdml:error>exactly 1</tdml:error>
+      <tdml:error>cannot be empty</tdml:error>
     </tdml:errors>
 
   </tdml:parserTestCase>
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml
index f0c514c83..6ca8c3621 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/section23/dfdl_expressions/expressions.tdml
@@ -1493,9 +1493,8 @@
       <xs:complexType>
         <xs:sequence dfdl:separator=",">
           <xs:element name="value" type="xs:string" 
dfdl:lengthKind="delimited" dfdl:initiator="{'
-3
-2
-1
+321
+blastoff
 ' }"/>
         </xs:sequence>
       </xs:complexType>
@@ -1956,20 +1955,20 @@
 <!--
      Test Name: internal_space_preserved4
         Schema: expressions-Embedded.dfdl.xsd
-       Purpose: This test demonstrates that in order to use newlines in an 
expression and
-                have them preserved, one must use dfdl:property.
+       Purpose: This test demonstrates that in multiple spaces are allowed 
using dfdl:property
 -->
   
   <tdml:parserTestCase name="internal_space_preserved4" root="expr_space5b"
     model="expressions-Embedded.dfdl.xsd" description="">
 
     <tdml:document><![CDATA[1blastoff]]></tdml:document>
-    <tdml:errors>
-      <tdml:error>Schema Definition Error</tdml:error>
-      <tdml:error>property 'initiator'</tdml:error>
-      <tdml:error>cannot start or end with the string</tdml:error>
-      <tdml:error>U+0020</tdml:error>
-    </tdml:errors>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <expr_space5b>
+          <value>blastoff</value>
+        </expr_space5b>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
   </tdml:parserTestCase>
 
 <!--
@@ -2001,7 +2000,7 @@
      Test Name: internal_space_not_preserved2
         Schema: expressions-Embedded.dfdl.xsd
        Purpose: This test demonstrates that when expressions appear in 
attribute values
-       as in short form dfdl:inputValueCalc, that newlines are not preserved.
+       as in short form dfdl:inputValueCalc, that newlines are not preserved 
and are treated as spaces.
        This is unavoidable in XML. One must use dfdl:property for expressions 
where
        whitespace is significant.
 -->
@@ -2010,12 +2009,13 @@
     model="expressions-Embedded.dfdl.xsd" description="">
 
     <tdml:document><![CDATA[321blastoff]]></tdml:document>
-    <tdml:errors>
-      <tdml:error>Schema Definition Error</tdml:error>
-      <tdml:error>property 'initiator'</tdml:error>
-      <tdml:error>cannot start or end with the string</tdml:error>
-      <tdml:error>U+0020</tdml:error>
-    </tdml:errors>
+    <tdml:infoset>
+      <tdml:dfdlInfoset>
+        <expr_space5>
+          <value>blastoff</value>
+        </expr_space5>
+      </tdml:dfdlInfoset>
+    </tdml:infoset>
   </tdml:parserTestCase>
 
   <tdml:defineSchema name="expression-type-errors.dfdl.xsd">
diff --git 
a/daffodil-test/src/test/resources/org/apache/daffodil/section23/runtime_properties/dynamicSeparator.tdml
 
b/daffodil-test/src/test/resources/org/apache/daffodil/section23/runtime_properties/dynamicSeparator.tdml
index dd9a6e866..ca20f72ca 100644
--- 
a/daffodil-test/src/test/resources/org/apache/daffodil/section23/runtime_properties/dynamicSeparator.tdml
+++ 
b/daffodil-test/src/test/resources/org/apache/daffodil/section23/runtime_properties/dynamicSeparator.tdml
@@ -55,8 +55,7 @@
     </tdml:document>
     <tdml:errors>
       <tdml:error>Schema Definition Error</tdml:error><!-- at runtime -->
-      <tdml:error>The property 'separator'</tdml:error>
-      <tdml:error>U+000a</tdml:error>
+      <tdml:error>Property separator cannot be empty string</tdml:error>
     </tdml:errors>
   </tdml:parserTestCase>
 
diff --git 
a/daffodil-test/src/test/scala/org/apache/daffodil/extensions/TestRepType.scala 
b/daffodil-test/src/test/scala/org/apache/daffodil/extensions/TestRepType.scala
index d232a6a6d..bf5fe5abb 100644
--- 
a/daffodil-test/src/test/scala/org/apache/daffodil/extensions/TestRepType.scala
+++ 
b/daffodil-test/src/test/scala/org/apache/daffodil/extensions/TestRepType.scala
@@ -107,4 +107,12 @@ class TestRepType {
   @Test def test_repType_different_namespaces_01(): Unit = {
     runner.runOneTest("repType_different_namespaces_01")
   }
+
+  @Test def test_repValuesWithSpaces_01(): Unit = {
+    runner.runOneTest("repValuesWithSpaces_01")
+  }
+
+  @Test def test_repValuesWithSpaces_02(): Unit = {
+    runner.runOneTest("repValuesWithSpaces_02")
+  }
 }

Reply via email to