This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new 899fd431e Add support for comment syntax in
BitDocumentPart/ByteDocumentPart
899fd431e is described below
commit 899fd431e9b37e0f07fe737937d665dc149003a9
Author: Varun Zaver <[email protected]>
AuthorDate: Tue May 31 09:26:56 2022 -0500
Add support for comment syntax in BitDocumentPart/ByteDocumentPart
By creating a comment syntax, we allow users to be able to create labels
within documentPart elements of type bit and byte.
Two regexes are used to check for the comment syntax, // and /**/.
Anything from // to the end of the line and anything from /* to */ will
be ignored and not interpreted as user data. The remaining data will be
parsed and any non-valid character will be ignored and a log warning
will be generated notifying the user that the data contains non-valid
characters and that a label should be considered. There are some
non-valid characters that are ignored and do not trigger a log because
of their extensive use in the code already.
DAFFODIL-2369
---
.../org/apache/daffodil/tdml/TDMLRunner.scala | 54 +++-
.../apache/daffodil/tdml/UnitTestTDMLRunner.scala | 316 ++++++++++++++++++++-
2 files changed, 358 insertions(+), 12 deletions(-)
diff --git
a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
index a1b3d9353..de897236b 100644
--- a/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
+++ b/daffodil-tdml-lib/src/main/scala/org/apache/daffodil/tdml/TDMLRunner.scala
@@ -2208,20 +2208,14 @@ class ByteDocumentPart(part: Node, parent: Document)
extends DataDocumentPart(pa
bits
}
- // Note: anything that is not a valid hex digit (or binary digit for binary)
is simply skipped
- // TODO: we should check for whitespace and other characters we want to
allow, and verify them.
- // TODO: Or better, validate this in the XML Schema for tdml via a pattern
facet
- // TODO: Consider whether to support a comment syntax. When showing data
examples this may be useful.
- //
- lazy val hexDigits = partRawContent.flatMap { ch => if
(validHexDigits.contains(ch)) List(ch) else Nil }
+ lazy val hexDigits = CanonData.canonicalizeData(validHexDigits,
partRawContent)
}
-class BitsDocumentPart(part: Node, parent: Document) extends
DataDocumentPart(part, parent) {
- lazy val bitDigits = {
- val res = partRawContent.split("[^01]").mkString
- res
- }
+final class BitsDocumentPart(part: Node, parent: Document) extends
DataDocumentPart(part, parent) {
+ val validBits = "01"
+
+ lazy val bitDigits = CanonData.canonicalizeData(validBits, partRawContent)
lazy val dataBits = partByteOrder match {
case LTR => {
@@ -2360,6 +2354,44 @@ sealed abstract class DocumentPart(part: Node, parent:
Document) {
}
+object CanonData {
+ private lazy val doubleForwardPattern = "//.*".r
+ private lazy val openClosePattern = "(?s)/[*].*?[*]/".r
+ private lazy val noWarnCharsSet = "|()[].Xx \n\r"
+
+ /*
+ * Allow "//" and "/* */" to act as comments.
+ * Any valid XML characters not explicitly allowed are also considered
comments and are removed.
+ */
+ def canonicalizeData(validCharactersSet: String, userData: String): String =
{
+ var doWarning: Boolean = false
+
+ //Remove the comments (//) and (/* */)
+ val noCommentsForward = doubleForwardPattern.replaceAllIn(userData, "")
+ val noCommentsBothFormats =
openClosePattern.replaceAllIn(noCommentsForward, "")
+
+ //Throw exception if /* or */ still found. This means user input was not
formatted correctly.
+ if (noCommentsBothFormats.contains("/*") ||
noCommentsBothFormats.contains("*/")) {
+ throw TDMLException("Improper formatting of /* */ style comment", None)
+ }
+
+ //Check value of the characters, if invalid character found create log and
skip over it
+ val validData = noCommentsBothFormats.filter { ch =>
+ if (validCharactersSet.contains(ch)) true
+ else {
+ if (!noWarnCharsSet.contains(ch)) doWarning = true
+ false
+ }
+ }
+
+ if (doWarning) {
+ Logger.log.warn("Data contains invalid character(s). Consider using a
comment (// or /* */).")
+ }
+
+ validData
+ }
+}
+
case class Infoset(i: NodeSeq, parent: TestCase) {
lazy val Seq(dfdlInfoset) = (i \ "dfdlInfoset").map { node =>
DFDLInfoset(node, this) }
lazy val contents = dfdlInfoset.contents
diff --git
a/daffodil-tdml-lib/src/test/scala/org/apache/daffodil/tdml/UnitTestTDMLRunner.scala
b/daffodil-tdml-lib/src/test/scala/org/apache/daffodil/tdml/UnitTestTDMLRunner.scala
index 25301ac49..3b75e38cf 100644
---
a/daffodil-tdml-lib/src/test/scala/org/apache/daffodil/tdml/UnitTestTDMLRunner.scala
+++
b/daffodil-tdml-lib/src/test/scala/org/apache/daffodil/tdml/UnitTestTDMLRunner.scala
@@ -490,4 +490,318 @@ class UnitTestTDMLRunner {
assertTrue(dataElem ne null)
runner.reset
}
-}
+
+ @Test def testCommentBit(): Unit = {
+ val xml = <document bitOrder="LSBFirst"><documentPart type="bits">00000010
//this is a label111</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("00000010", firstPart.bitDigits)
+ }
+
+ @Test def testCommentBitWithNewLine(): Unit = {
+ val xml = <document bitOrder="LSBFirst">
+ <documentPart type="bits">01 01 11 //flagByte1
+ 1 //bool2</documentPart>
+ </document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("0101111", firstPart.bitDigits)
+ }
+
+ @Test def testCommentBitJustComments(): Unit = {
+ val xml = <document bitOrder="LSBFirst">
+ <documentPart type="bits">
+ // this doc part contains no bits
+ // at all. It is just comments.
+ // 101010101
+ </documentPart>
+ </document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("", firstPart.bitDigits)
+ }
+
+ @Test def testCommentBitNoLineEnding(): Unit = {
+ val xml = <document bitOrder="LSBFirst"><documentPart type="bits">01011010
// just a comment here no line ending </documentPart>
+ </document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("01011010", firstPart.bitDigits)
+ }
+
+ @Test def testCommentBitBothCommentFormatsNewLine(): Unit = {
+ val xml = <document bitOrder="LSBFirst">
+ <documentPart type="bits">0100110110 /*C0mment 01011111 _01*/11
+100111//D1fferent sty1e c0mment</documentPart>
+ </document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("010011011011100111", firstPart.bitDigits)
+ }
+
+ @Test def testCommentBitBothCommentFormats(): Unit = {
+ val xml = <document bitOrder="LSBFirst">
+ <documentPart type="bits">0100110110 /*C0mment 01011111
_01*/100111//D1fferent sty1e c0mment</documentPart>
+ </document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("0100110110100111", firstPart.bitDigits)
+ }
+
+ @Test def testBitBadCommentFormatException(): Unit = {
+ val xml = <document bitOrder="LSBFirst">
+ <documentPart type="bits">0100110110 C0mment 01011111
_01100*/111//D1fferent sty1e c0mment</documentPart>
+ </document>
+ val exc = intercept[TDMLException] {
+ val doc = new Document(xml, null)
+ doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0).bitDigits
+ }
+ assertTrue(exc.getMessage().contains("Improper formatting of /* */ style
comment"))
+ }
+
+ @Test def testCommentBitNoWarningCharacters(): Unit = {
+ val xml = <document bitOrder="LSBFirst">
+ <documentPart type="bits">01|01|00
+ (10).[01]</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("0101001001", firstPart.bitDigits)
+ }
+
+ @Test def testCommentBitNoWarningCharactersWithInvalid(): Unit = {
+ val xml = <document bitOrder="LSBFirst">
+ <documentPart type="bits">01|01|00 !!
+ (10).[01]</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("0101001001", firstPart.bitDigits)
+ }
+
+ @Test def testCommentBitNonGreedy(): Unit = {
+ val xml = <document bitOrder="LSBFirst">
+ <documentPart type="bits">0101 /*Data 1*/ 0101 /*Data
2*/</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("01010101", firstPart.bitDigits)
+ }
+
+ @Test def testCommentBitNonGreedyNewLine(): Unit = {
+ val xml = <document bitOrder="LSBFirst">
+ <documentPart type="bits">0101 /*Data 1
+ Explanation*/
+ 0101 /*Data 2
+ Explanation*/
+ </documentPart>
+ </document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("01010101", firstPart.bitDigits)
+ }
+
+ @Test def testCommentBitCarriageReturn(): Unit = {
+ val xml = <document bitOrder="LSBFirst">
+ <documentPart type="bits">0101 00 /*Data 1*/
+ 0101 /*Data 2*/
+ </documentPart>
+ </document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: BitsDocumentPart => x }
+ val firstPart = dp(0)
+ assertEquals("0101000101", firstPart.bitDigits)
+ }
+
+ @Test def testCommentByte(): Unit = {
+ val xml = <document><documentPart type="byte">12 3A BC.abc //Label
(ABCDEF123456789</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+ val hexDigits = dp(0).hexDigits
+ assertEquals("123ABCabc", hexDigits)
+ }
+
+ @Test def testCommentByteWithNewLine(): Unit = {
+ val xml = <document><documentPart type="byte">123ABCabc //Label
(ABCDEF123456789
+ 456DEFdef //New Label</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+ val hexDigits = dp(0).hexDigits
+ assertEquals("123ABCabc456DEFdef", hexDigits)
+ }
+
+ @Test def testCommentByteBothCommentFormatsNewLine(): Unit = {
+ val xml = <document><documentPart type="byte">12AB3C /*Comment ABC123 ** */
+ 45D6d//Different style comment</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+ val hexDigits = dp(0).hexDigits
+ assertEquals("12AB3C45D6d", hexDigits)
+ }
+
+ @Test def testCommentByteBothCommentFormats(): Unit = {
+ val xml = <document><documentPart type="byte">12AB3C /*Comment ABC123 **
*/45D6d//Different style comment</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+ val hexDigits = dp(0).hexDigits
+ assertEquals("12AB3C45D6d", hexDigits)
+ }
+
+ @Test def testByteBadCommentFormatException(): Unit = {
+ val xml = <document><documentPart type="byte">12AB3C Comment ABC123 **
*/45D6d//Different style comment</documentPart></document>
+ val exc = intercept[TDMLException] {
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+ val hexDigits = dp(0).hexDigits
+ }
+ assertTrue(exc.getMessage().contains("Improper formatting of /* */ style
comment"))
+ }
+
+ @Test def testCommentByteNoWarningCharacters(): Unit = {
+ val xml = <document><documentPart type="byte">01|01|00
+
(AB).[AB]</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+ val hexDigits = dp(0).hexDigits
+ assertEquals("010100ABAB", hexDigits)
+ }
+
+ @Test def testCommentByteNoWarningCharactersWithInvalid(): Unit = {
+ val xml = <document><documentPart type="byte">01|01|00 !!
+
(AB).[AB]</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+ val hexDigits = dp(0).hexDigits
+ assertEquals("010100ABAB", hexDigits)
+ }
+
+ @Test def testCommentByteCommentNonGreedy(): Unit = {
+ val xml = <document><documentPart type="byte">0101AB /*Data 1*/ 0101ab
/*Data 2*/</documentPart></document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+ val hexDigits = dp(0).hexDigits
+ assertEquals("0101AB0101ab", hexDigits)
+ }
+
+ @Test def testCommentByteNonGreedyNewLine(): Unit = {
+ val xml = <document>
+ <documentPart type="byte">
+ <![CDATA[
+ 0101AB /*Data 1
+ Explanation
+ `0123456789
+ [;,]'.\/'
+ */
+ 0101ab /*Data 2
+ Explanation
+ ~)!@#$%^&*(
+ {:<}">|?
+ */
+ ]]>
+ </documentPart>
+ </document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+ val hexDigits = dp(0).hexDigits
+ assertEquals("0101AB0101ab", hexDigits)
+ }
+
+ @Test def testCommentByteCarriageReturn(): Unit = {
+ val xml = <document>
+ <documentPart type="byte">
+ 01 01AB /*Data 1*/
+ 0101ab /*Data 2*/
+ </documentPart>
+ </document>
+ val doc = new Document(xml, null)
+ val dp = doc.documentParts.collect { case x: ByteDocumentPart => x }
+ val hexDigits = dp(0).hexDigits
+ assertEquals("0101AB0101ab", hexDigits)
+ }
+
+ @Test def testMIL2045_47001D_Page70_TableB_I_With_Comment_Syntax_1(): Unit =
{
+ val doc = new Document(
+ <document bitOrder="LSBFirst">
+ <documentPart type="bits" byteOrder="RTL">/*Version*/
XXXX 0011</documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*FPI*/
XXX0 XXXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*Compression*/
NA </documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*GPI for Originator
Address*/ XX1X XXXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*FPI for URN*/
X1XX XXXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*URN*/ X0000000 00000000
01100111 1XXX XXXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*FPI for Unit Name*/
1XXX XXXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*Unit Name*/
X101 0101</documentPart>
+ <documentPart type="bits" byteOrder="RTL">
0XXX XXXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">
XX10 0111</documentPart>
+ <documentPart type="bits" byteOrder="RTL">
01XX XXXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">
XXX1 0010</documentPart>
+ <documentPart type="bits" byteOrder="RTL">
100X XXXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">
XXXX 1010</documentPart>
+ <documentPart type="bits" byteOrder="RTL">
0001 XXXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">
XXXX X100</documentPart>
+ <documentPart type="bits" byteOrder="RTL">
1111 1XXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">
XXXX XX11</documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*GPI for Recip. Addr
Group*/ XXXX X1XX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*GRI for R_ONE*/
XXXX 0XXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*FPI for URN*/
XXX1 XXXX</documentPart>
+ <documentPart type="bits" byteOrder="RTL">/*URN*/ XXXX00000 00000000
00000000 011X XXXX</documentPart>
+ </document>, null)
+ val doc1bits = doc.documentBits
+ doc1bits.length
+ val doc2 = new Document(
+ <document bitOrder="LSBFirst">
+ <documentPart type="byte"><![CDATA[
+ E3 67 00 80 55 67 92 1A FC 77 00 00 00
+ ]]></documentPart>
+ </document>, null)
+ val doc2bits = doc2.documentBits
+ assertEquals(doc2bits, doc1bits)
+ }
+
+ @Test def testMIL2045_47001D_Page70_TableB_I_With_Comment_Syntax_2(): Unit =
{
+ val doc = new Document(
+ <document bitOrder="LSBFirst">
+ <documentPart type="bits" byteOrder="RTL">XXXX 0011
//Version </documentPart>
+ <documentPart type="bits" byteOrder="RTL">XXX0 XXXX
//FPI </documentPart>
+ <documentPart type="bits" byteOrder="RTL">NA
//Compression </documentPart>
+ <documentPart type="bits" byteOrder="RTL">XX1X XXXX //GPI for
Originator Address </documentPart>
+ <documentPart type="bits" byteOrder="RTL">X1XX XXXX
//FPI for URN </documentPart>
+ <documentPart type="bits" byteOrder="RTL">X0000000 00000000 01100111
1XXX XXXX //URN </documentPart>
+ <documentPart type="bits" byteOrder="RTL">1XXX XXXX
//FPI for Unit Name </documentPart>
+ <documentPart type="bits" byteOrder="RTL">X101 0101
//Unit Name </documentPart>
+ <documentPart type="bits" byteOrder="RTL">0XXX XXXX
</documentPart>
+ <documentPart type="bits" byteOrder="RTL">XX10 0111
</documentPart>
+ <documentPart type="bits" byteOrder="RTL">01XX XXXX
</documentPart>
+ <documentPart type="bits" byteOrder="RTL">XXX1 0010
</documentPart>
+ <documentPart type="bits" byteOrder="RTL">100X XXXX
</documentPart>
+ <documentPart type="bits" byteOrder="RTL">XXXX 1010
</documentPart>
+ <documentPart type="bits" byteOrder="RTL">0001 XXXX
</documentPart>
+ <documentPart type="bits" byteOrder="RTL">XXXX X100
</documentPart>
+ <documentPart type="bits" byteOrder="RTL">1111 1XXX
</documentPart>
+ <documentPart type="bits" byteOrder="RTL">XXXX XX11
</documentPart>
+ <documentPart type="bits" byteOrder="RTL">XXXX X1XX //GPI for
Recip. Addr Group </documentPart>
+ <documentPart type="bits" byteOrder="RTL">XXXX 0XXX
//GRI for R_ONE </documentPart>
+ <documentPart type="bits" byteOrder="RTL">XXX1 XXXX
//FPI for URN </documentPart>
+ <documentPart type="bits" byteOrder="RTL">XXXX00000 00000000 00000000
011X XXXX //URN </documentPart>
+ </document>, null)
+ val doc1bits = doc.documentBits
+ doc1bits.length
+ val doc2 = new Document(
+ <document bitOrder="LSBFirst">
+ <documentPart type="byte"><![CDATA[
+ E3 67 00 80 55 67 92 1A FC 77 00 00 00
+ ]]></documentPart>
+ </document>, null)
+ val doc2bits = doc2.documentBits
+ assertEquals(doc2bits, doc1bits)
+ }
+
+}
\ No newline at end of file