This is an automated email from the ASF dual-hosted git repository.

slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git


The following commit(s) were added to refs/heads/main by this push:
     new 899fd431e Add support for comment syntax in 
BitDocumentPart/ByteDocumentPart
899fd431e is described below

commit 899fd431e9b37e0f07fe737937d665dc149003a9
Author: Varun Zaver <[email protected]>
AuthorDate: Tue May 31 09:26:56 2022 -0500

    Add support for comment syntax in BitDocumentPart/ByteDocumentPart
    
    By creating a comment syntax, we allow users to be able to create labels
    within documentPart elements of type bit and byte.
    
    Two regexes are used to check for the comment syntax, // and /**/.
    Anything from // to the end of the line and anything from /* to */ will
    be ignored and not interpreted as user data. The remaining data will be
    parsed and any non-valid character will be ignored and a log warning
    will be generated notifying the user that the data contains non-valid
    characters and that a label should be considered. There are some
    non-valid characters that are ignored and do not trigger a log because
    of their extensive use in the code already.
    
    DAFFODIL-2369
---
 .../org/apache/daffodil/tdml/TDMLRunner.scala      |  54 +++-
 .../apache/daffodil/tdml/UnitTestTDMLRunner.scala  | 316 ++++++++++++++++++++-
 2 files changed, 358 insertions(+), 12 deletions(-)

diff --git 
a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala 
b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index a1b3d9353..de897236b 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -2208,20 +2208,14 @@ class ByteDocumentPart(part: Node, parent: Document) 
extends DataDocumentPart(pa
     bits
   }
 
-  // Note: anything that is not a valid hex digit (or binary digit for binary) 
is simply skipped
-  // TODO: we should check for whitespace and other characters we want to 
allow, and verify them.
-  // TODO: Or better, validate this in the XML Schema for tdml via a pattern 
facet
-  // TODO: Consider whether to support a comment syntax. When showing data 
examples this may be useful.
-  //
-  lazy val hexDigits = partRawContent.flatMap { ch => if 
(validHexDigits.contains(ch)) List(ch) else Nil }
+  lazy val hexDigits = CanonData.canonicalizeData(validHexDigits, 
partRawContent)
 
 }
 
-class BitsDocumentPart(part: Node, parent: Document) extends 
DataDocumentPart(part, parent) {
-  lazy val bitDigits = {
-    val res = partRawContent.split("[^01]").mkString
-    res
-  }
+final class BitsDocumentPart(part: Node, parent: Document) extends 
DataDocumentPart(part, parent) {
+  val validBits = "01"
+
+  lazy val bitDigits = CanonData.canonicalizeData(validBits, partRawContent)
 
   lazy val dataBits = partByteOrder match {
     case LTR => {
@@ -2360,6 +2354,44 @@ sealed abstract class DocumentPart(part: Node, parent: 
Document) {
 
 }
 
+object CanonData {
+  private lazy val doubleForwardPattern = "//.*".r
+  private lazy val openClosePattern = "(?s)/[*].*?[*]/".r
+  private lazy val noWarnCharsSet = "|()[].Xx \n\r"
+
+  /*
+  * Allow "//" and "/* */" to act as comments.
+  * Any valid XML characters not explicitly allowed are also considered 
comments and are removed.
+  */
+  def canonicalizeData(validCharactersSet: String, userData: String): String = 
{
+    var doWarning: Boolean = false
+
+    //Remove the comments (//) and (/* */)
+    val noCommentsForward = doubleForwardPattern.replaceAllIn(userData, "")
+    val noCommentsBothFormats = 
openClosePattern.replaceAllIn(noCommentsForward, "")
+
+    //Throw exception if /* or */ still found. This means user input was not 
formatted correctly.
+    if (noCommentsBothFormats.contains("/*") || 
noCommentsBothFormats.contains("*/")) {
+      throw TDMLException("Improper formatting of /* */ style comment", None)
+    }
+
+    //Check value of the characters, if invalid character found create log and 
skip over it
+    val validData = noCommentsBothFormats.filter { ch =>
+      if (validCharactersSet.contains(ch)) true
+      else {
+        if (!noWarnCharsSet.contains(ch)) doWarning = true
+        false
+      }
+    }
+
+    if (doWarning) {
+        Logger.log.warn("Data contains invalid character(s). Consider using a 
comment (// or /* */).")
+    }
+
+    validData
+  }
+}
+
 case class Infoset(i: NodeSeq, parent: TestCase) {
   lazy val Seq(dfdlInfoset) = (i \ "dfdlInfoset").map { node => 
DFDLInfoset(node, this) }
   lazy val contents = dfdlInfoset.contents
diff --git 
a/daffodil-tdml-lib/src/test/scala/org/apache/daffodil/tdml/UnitTestTDMLRunner.scala
 
b/daffodil-tdml-lib/src/test/scala/org/apache/daffodil/tdml/UnitTestTDMLRunner.scala
index 25301ac49..3b75e38cf 100644
--- 
a/daffodil-tdml-lib/src/test/scala/org/apache/daffodil/tdml/UnitTestTDMLRunner.scala
+++ 
b/daffodil-tdml-lib/src/test/scala/org/apache/daffodil/tdml/UnitTestTDMLRunner.scala
@@ -490,4 +490,318 @@ class UnitTestTDMLRunner {
     assertTrue(dataElem ne null)
     runner.reset
   }
-}
+
+  @Test def testCommentBit(): Unit = {
+    val xml = <document bitOrder="LSBFirst"><documentPart type="bits">00000010 
//this is a label111</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("00000010", firstPart.bitDigits)
+  }
+
+  @Test def testCommentBitWithNewLine(): Unit = {
+    val xml = <document bitOrder="LSBFirst">
+              <documentPart type="bits">01 01 11 //flagByte1
+                                        1 //bool2</documentPart>
+              </document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("0101111", firstPart.bitDigits)
+  }
+
+    @Test def testCommentBitJustComments(): Unit = {
+      val xml = <document bitOrder="LSBFirst">
+                  <documentPart type="bits">
+                  // this doc part contains no bits
+                  // at all. It is just comments.
+                  // 101010101
+                  </documentPart>
+                </document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("", firstPart.bitDigits)
+  }
+
+  @Test def testCommentBitNoLineEnding(): Unit = {
+    val xml = <document bitOrder="LSBFirst"><documentPart type="bits">01011010 
// just a comment here no line ending </documentPart>
+              </document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("01011010", firstPart.bitDigits)
+  }
+
+  @Test def testCommentBitBothCommentFormatsNewLine(): Unit = {
+    val xml = <document bitOrder="LSBFirst">
+              <documentPart type="bits">0100110110 /*C0mment 01011111 _01*/11
+100111//D1fferent sty1e c0mment</documentPart>
+              </document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("010011011011100111", firstPart.bitDigits)
+  }
+
+  @Test def testCommentBitBothCommentFormats(): Unit = {
+    val xml = <document bitOrder="LSBFirst">
+              <documentPart type="bits">0100110110 /*C0mment 01011111 
_01*/100111//D1fferent sty1e c0mment</documentPart>
+              </document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("0100110110100111", firstPart.bitDigits)
+  }
+
+  @Test def testBitBadCommentFormatException(): Unit = {
+    val xml = <document bitOrder="LSBFirst">
+              <documentPart type="bits">0100110110 C0mment 01011111 
_01100*/111//D1fferent sty1e c0mment</documentPart>
+              </document>
+    val exc = intercept[TDMLException] {
+      val doc = new Document(xml, null)
+      doc.documentParts.collect { case x: BitsDocumentPart => x }
+      val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+      val firstPart = dp(0).bitDigits
+    }
+    assertTrue(exc.getMessage().contains("Improper formatting of /* */ style 
comment"))
+  }
+
+  @Test def testCommentBitNoWarningCharacters(): Unit = {
+    val xml = <document bitOrder="LSBFirst">
+              <documentPart type="bits">01|01|00
+                                        (10).[01]</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("0101001001", firstPart.bitDigits)
+  }
+
+  @Test def testCommentBitNoWarningCharactersWithInvalid(): Unit = {
+    val xml = <document bitOrder="LSBFirst">
+              <documentPart type="bits">01|01|00 !!
+                                        (10).[01]</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("0101001001", firstPart.bitDigits)
+  }
+
+  @Test def testCommentBitNonGreedy(): Unit = {
+    val xml = <document bitOrder="LSBFirst">
+              <documentPart type="bits">0101 /*Data 1*/ 0101 /*Data 
2*/</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("01010101", firstPart.bitDigits)
+  }
+
+  @Test def testCommentBitNonGreedyNewLine(): Unit = {
+    val xml = <document bitOrder="LSBFirst">
+              <documentPart type="bits">0101 /*Data 1
+                                              Explanation*/
+                                        0101 /*Data 2
+                                              Explanation*/
+              </documentPart>
+              </document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("01010101", firstPart.bitDigits)
+  }
+
+  @Test def testCommentBitCarriageReturn(): Unit = {
+    val xml = <document bitOrder="LSBFirst">
+              <documentPart type="bits">0101&#13;00 /*Data 1*/
+                                        0101 /*Data 2*/
+              </documentPart>
+              </document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+    val firstPart = dp(0)
+    assertEquals("0101000101", firstPart.bitDigits)
+  }
+
+  @Test def testCommentByte(): Unit = {
+    val xml = <document><documentPart type="byte">12 3A BC.abc //Label 
(ABCDEF123456789</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+    val hexDigits = dp(0).hexDigits
+    assertEquals("123ABCabc", hexDigits)
+  }
+
+    @Test def testCommentByteWithNewLine(): Unit = {
+    val xml = <document><documentPart type="byte">123ABCabc //Label 
(ABCDEF123456789
+      456DEFdef //New Label</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+    val hexDigits = dp(0).hexDigits
+    assertEquals("123ABCabc456DEFdef", hexDigits)
+  }
+
+  @Test def testCommentByteBothCommentFormatsNewLine(): Unit = {
+    val xml = <document><documentPart type="byte">12AB3C /*Comment ABC123 ** */
+    45D6d//Different style comment</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+    val hexDigits = dp(0).hexDigits
+    assertEquals("12AB3C45D6d", hexDigits)
+  }
+
+  @Test def testCommentByteBothCommentFormats(): Unit = {
+    val xml = <document><documentPart type="byte">12AB3C /*Comment ABC123 ** 
*/45D6d//Different style comment</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+    val hexDigits = dp(0).hexDigits
+    assertEquals("12AB3C45D6d", hexDigits)
+  }
+
+  @Test def testByteBadCommentFormatException(): Unit = {
+    val xml = <document><documentPart type="byte">12AB3C Comment ABC123 ** 
*/45D6d//Different style comment</documentPart></document>
+    val exc = intercept[TDMLException] {
+      val doc = new Document(xml, null)
+      val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+      val hexDigits = dp(0).hexDigits
+    }
+    assertTrue(exc.getMessage().contains("Improper formatting of /* */ style 
comment"))
+  }
+
+  @Test def testCommentByteNoWarningCharacters(): Unit = {
+    val xml = <document><documentPart type="byte">01|01|00
+                                                  
(AB).[AB]</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+    val hexDigits = dp(0).hexDigits
+    assertEquals("010100ABAB", hexDigits)
+  }
+
+  @Test def testCommentByteNoWarningCharactersWithInvalid(): Unit = {
+    val xml = <document><documentPart type="byte">01|01|00 !!
+                                                  
(AB).[AB]</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+    val hexDigits = dp(0).hexDigits
+    assertEquals("010100ABAB", hexDigits)
+  }
+
+  @Test def testCommentByteCommentNonGreedy(): Unit = {
+    val xml = <document><documentPart type="byte">0101AB /*Data 1*/ 0101ab 
/*Data 2*/</documentPart></document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+    val hexDigits = dp(0).hexDigits
+    assertEquals("0101AB0101ab", hexDigits)
+  }
+
+  @Test def testCommentByteNonGreedyNewLine(): Unit = {
+    val xml = <document>
+                <documentPart type="byte">
+                  <![CDATA[
+                    0101AB /*Data 1
+                            Explanation
+                            `0123456789
+                            [;,]'.\/'
+                            */
+                    0101ab /*Data 2
+                            Explanation
+                            ~)!@#$%^&*(
+                            {:<}">|?
+                            */
+                  ]]>
+                </documentPart>
+              </document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+    val hexDigits = dp(0).hexDigits
+    assertEquals("0101AB0101ab", hexDigits)
+  }
+
+  @Test def testCommentByteCarriageReturn(): Unit = {
+    val xml = <document>
+                <documentPart type="byte">
+                    01&#13;01AB /*Data 1*/
+                    0101ab /*Data 2*/
+                </documentPart>
+              </document>
+    val doc = new Document(xml, null)
+    val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+    val hexDigits = dp(0).hexDigits
+    assertEquals("0101AB0101ab", hexDigits)
+  }
+
+  @Test def testMIL2045_47001D_Page70_TableB_I_With_Comment_Syntax_1(): Unit = 
{
+    val doc = new Document(
+      <document bitOrder="LSBFirst">
+        <documentPart type="bits" byteOrder="RTL">/*Version*/                  
       XXXX 0011</documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*FPI*/                      
       XXX0 XXXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*Compression*/              
       NA       </documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*GPI for Originator 
Address*/      XX1X XXXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*FPI for URN*/              
       X1XX XXXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*URN*/  X0000000 00000000 
01100111 1XXX XXXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*FPI for Unit Name*/        
       1XXX XXXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*Unit Name*/                
       X101 0101</documentPart>
+        <documentPart type="bits" byteOrder="RTL">                             
       0XXX XXXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">                             
       XX10 0111</documentPart>
+        <documentPart type="bits" byteOrder="RTL">                             
       01XX XXXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">                             
       XXX1 0010</documentPart>
+        <documentPart type="bits" byteOrder="RTL">                             
       100X XXXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">                             
       XXXX 1010</documentPart>
+        <documentPart type="bits" byteOrder="RTL">                             
       0001 XXXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">                             
       XXXX X100</documentPart>
+        <documentPart type="bits" byteOrder="RTL">                             
       1111 1XXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">                             
       XXXX XX11</documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*GPI for Recip. Addr 
Group*/       XXXX X1XX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*GRI for R_ONE*/            
       XXXX 0XXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*FPI for URN*/              
       XXX1 XXXX</documentPart>
+        <documentPart type="bits" byteOrder="RTL">/*URN*/ XXXX00000 00000000 
00000000 011X XXXX</documentPart>
+      </document>, null)
+    val doc1bits = doc.documentBits
+    doc1bits.length
+    val doc2 = new Document(
+      <document bitOrder="LSBFirst">
+        <documentPart type="byte"><![CDATA[
+            E3 67 00 80 55 67 92 1A FC 77 00 00 00
+         ]]></documentPart>
+      </document>, null)
+    val doc2bits = doc2.documentBits
+    assertEquals(doc2bits, doc1bits)
+  }
+
+  @Test def testMIL2045_47001D_Page70_TableB_I_With_Comment_Syntax_2(): Unit = 
{
+    val doc = new Document(
+      <document bitOrder="LSBFirst">
+        <documentPart type="bits" byteOrder="RTL">XXXX 0011                    
      //Version </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XXX0 XXXX                    
          //FPI </documentPart>
+        <documentPart type="bits" byteOrder="RTL">NA                           
  //Compression </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XX1X XXXX       //GPI for 
Originator Address </documentPart>
+        <documentPart type="bits" byteOrder="RTL">X1XX XXXX                    
  //FPI for URN </documentPart>
+        <documentPart type="bits" byteOrder="RTL">X0000000 00000000 01100111 
1XXX XXXX   //URN </documentPart>
+        <documentPart type="bits" byteOrder="RTL">1XXX XXXX                
//FPI for Unit Name </documentPart>
+        <documentPart type="bits" byteOrder="RTL">X101 0101                    
    //Unit Name </documentPart>
+        <documentPart type="bits" byteOrder="RTL">0XXX XXXX                    
                </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XX10 0111                    
                </documentPart>
+        <documentPart type="bits" byteOrder="RTL">01XX XXXX                    
                </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XXX1 0010                    
                </documentPart>
+        <documentPart type="bits" byteOrder="RTL">100X XXXX                    
                </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XXXX 1010                    
                </documentPart>
+        <documentPart type="bits" byteOrder="RTL">0001 XXXX                    
                </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XXXX X100                    
                </documentPart>
+        <documentPart type="bits" byteOrder="RTL">1111 1XXX                    
                </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XXXX XX11                    
                </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XXXX X1XX        //GPI for 
Recip. Addr Group </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XXXX 0XXX                    
//GRI for R_ONE </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XXX1 XXXX                    
  //FPI for URN </documentPart>
+        <documentPart type="bits" byteOrder="RTL">XXXX00000 00000000 00000000 
011X XXXX  //URN </documentPart>
+      </document>, null)
+    val doc1bits = doc.documentBits
+    doc1bits.length
+    val doc2 = new Document(
+      <document bitOrder="LSBFirst">
+        <documentPart type="byte"><![CDATA[
+            E3 67 00 80 55 67 92 1A FC 77 00 00 00
+         ]]></documentPart>
+      </document>, null)
+    val doc2bits = doc2.documentBits
+    assertEquals(doc2bits, doc1bits)
+  }
+
+}
\ No newline at end of file

Reply via email to