bsloane1650 commented on a change in pull request #245: Added
X-DFDL-5-BIT-DFI-1661-DUI-001 char encoding
URL: https://github.com/apache/incubator-daffodil/pull/245#discussion_r297210463
##########
File path:
daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/X_DFDL_MIL_STD.scala
##########
@@ -43,7 +43,15 @@ object BitsCharset6BitDFI264DUI001 extends {
object BitsCharset6BitDFI311DUI002 extends {
override val name = "X-DFDL-6-BIT-DFI-311-DUI-002"
override val bitWidthOfACodeUnit = 6
- override val decodeString =
"""\u00A0ABCDEFGHIJKLMNOPQRSTuVWXYZ\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD
\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD0123456789\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"""
+ override val decodeString =
"""\u00A0ABCDEFGHIJKLMNOPQRSTUVWXYZ\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD
\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD0123456789\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"""
+ override val replacementCharCode = 0x0
+ override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
+} with BitsCharsetNonByteSize
+
+object BitsCharset5BitDFI1661DUI001 extends {
+ override val name = "X-DFDL-5-BIT-DFI-1661-DUI-001"
+ override val bitWidthOfACodeUnit = 5
+ override val decodeString =
"""\u00A0ABCDEFGHIJKLMNOPQRSTUVWXYZ\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"""
Review comment:
How should I be doing this then?
To avoid issues of text encoding, I switch my schema to be a sequence of 5
bit unsigned integers:
```
<xs:element name="fiveBitDFI1661DUI001"
dfdl:bitOrder="leastSignificantBitFirst" dfdl:byteOrder="littleEndian">
<xs:complexType>
<xs:sequence>
<xs:element name="character" type="xs:unsignedInt"
dfdl:lengthKind="explicit" dfdl:lengthUnits="bits" dfdl:length="5"
dfdl:representation="binary"
dfdl:bitOrder="leastSignificantBitFirst"
dfdl:byteOrder="littleEndian" maxOccurs="unbounded"
dfdl:occursCountKind="parsed"
dfdl:alignmentUnits="bits"/>
</xs:sequence>
</xs:complexType>
</xs:element>
```
When the document is specified as LSBFirst, RTL, the behaviour is
consisntent with what I was seeing with text encoding:
```
<?xml version="1.0"?>
<ex:fiveBitDFI1661DUI001 xmlns:ex="http://example.com">
<ex:character>31</ex:character>
<ex:character>30</ex:character>
<ex:character>29</ex:character>
<ex:character>28</ex:character>
<ex:character>27</ex:character>
<ex:character>26</ex:character>
<ex:character>25</ex:character>
<ex:character>24</ex:character>
<ex:character>23</ex:character>
<ex:character>22</ex:character>
<ex:character>21</ex:character>
<ex:character>20</ex:character>
<ex:character>19</ex:character>
<ex:character>18</ex:character>
<ex:character>17</ex:character>
<ex:character>16</ex:character>
<ex:character>15</ex:character>
<ex:character>14</ex:character>
<ex:character>13</ex:character>
<ex:character>12</ex:character>
<ex:character>11</ex:character>
<ex:character>10</ex:character>
<ex:character>9</ex:character>
<ex:character>8</ex:character>
<ex:character>7</ex:character>
<ex:character>6</ex:character>
<ex:character>5</ex:character>
<ex:character>4</ex:character>
<ex:character>3</ex:character>
<ex:character>2</ex:character>
<ex:character>1</ex:character>
<ex:character>0</ex:character>
</ex:fiveBitDFI1661DUI001>
```
However, when I switch to LTR, the behaviour becomes hard to explain:
```
<?xml version="1.0"?>
<ex:fiveBitDFI1661DUI001 xmlns:ex="http://example.com">
<ex:character>0</ex:character>
<ex:character>0</ex:character>
<ex:character>17</ex:character>
<ex:character>4</ex:character>
<ex:character>3</ex:character>
<ex:character>10</ex:character>
<ex:character>28</ex:character>
<ex:character>24</ex:character>
<ex:character>2</ex:character>
<ex:character>2</ex:character>
<ex:character>21</ex:character>
<ex:character>12</ex:character>
<ex:character>27</ex:character>
<ex:character>26</ex:character>
<ex:character>28</ex:character>
<ex:character>25</ex:character>
<ex:character>4</ex:character>
<ex:character>12</ex:character>
<ex:character>25</ex:character>
<ex:character>20</ex:character>
<ex:character>3</ex:character>
<ex:character>11</ex:character>
<ex:character>29</ex:character>
<ex:character>26</ex:character>
<ex:character>6</ex:character>
<ex:character>14</ex:character>
<ex:character>29</ex:character>
<ex:character>28</ex:character>
<ex:character>27</ex:character>
<ex:character>27</ex:character>
<ex:character>29</ex:character>
<ex:character>27</ex:character>
</ex:fiveBitDFI1661DUI001>
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services