This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/main by this push:
new c81b213b0 Added support for pluggable charsets
c81b213b0 is described below
commit c81b213b02ac414a39290787cf2eea14d83fdc26
Author: alexanderrevello <[email protected]>
AuthorDate: Mon Mar 7 11:27:49 2022 -0500
Added support for pluggable charsets
Refactored existing charsets to use the BitsCharsetDefinition and created
support for it. To implement a charset follow the format and add the classpath
in the file in resources/META-INF/services. This process has been shown in
daffodil-test/.../charsets
DAFFODIL-2663
---
...ffodil.processors.charset.BitsCharsetDefinition | 53 +++++++
.../processors/charset/AISPayloadArmoring.scala | 3 +
.../apache/daffodil/processors/charset/Base4.scala | 6 +
.../daffodil/processors/charset/Binary.scala | 6 +
.../daffodil/processors/charset/BitsCharset.scala | 1 -
...BitPacked.scala => BitsCharsetDefinition.scala} | 21 ++-
...d.scala => BitsCharsetDefinitionRegistry.scala} | 32 ++--
.../daffodil/processors/charset/CharsetUtils.scala | 11 +-
.../charset/DaffodilCharsetProvider.scala | 83 -----------
.../apache/daffodil/processors/charset/Hex.scala | 6 +
.../daffodil/processors/charset/IBM037.scala | 11 +-
.../daffodil/processors/charset/ISO88591.scala | 2 +
.../processors/charset/ISO885918BitPacked.scala | 6 +
.../apache/daffodil/processors/charset/Octal.scala | 6 +
.../daffodil/processors/charset/USASCII.scala | 7 +-
.../processors/charset/USASCII5BitPacked.scala | 3 +
.../processors/charset/USASCII6BitPacked.scala | 10 +-
.../processors/charset/USASCII7BitPacked.scala | 3 +
.../daffodil/processors/charset/UTF16BE.scala | 7 +-
.../daffodil/processors/charset/UTF16LE.scala | 2 +
.../daffodil/processors/charset/UTF32BE.scala | 7 +-
.../daffodil/processors/charset/UTF32LE.scala | 3 +
.../apache/daffodil/processors/charset/UTF8.scala | 3 +
.../processors/charset/X_DFDL_MIL_STD.scala | 29 +++-
.../apache/daffodil/processors/EvEncoding.scala | 13 +-
...ffodil.processors.charset.BitsCharsetDefinition | 18 +++
.../charsets/TestBitsCharsetDefinition.dfdl.xsd | 69 +++++++++
.../charsets/TestBitsCharsetDefinition.tdml | 162 +++++++++++++++++++++
.../daffodil/charsets/ISO_8859_1_Reverse.scala | 28 ++--
.../charsets/TestBitsCharsetDefinition.scala | 45 ++++++
.../apache/daffodil/charsets/TestISO_8859_13.scala | 27 +++-
31 files changed, 527 insertions(+), 156 deletions(-)
diff --git
a/daffodil-io/src/main/resources/META-INF/services/org.apache.daffodil.processors.charset.BitsCharsetDefinition
b/daffodil-io/src/main/resources/META-INF/services/org.apache.daffodil.processors.charset.BitsCharsetDefinition
new file mode 100644
index 000000000..0fdbf83e9
--- /dev/null
+++
b/daffodil-io/src/main/resources/META-INF/services/org.apache.daffodil.processors.charset.BitsCharsetDefinition
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+org.apache.daffodil.processors.charset.BitsCharset3BitDFI336DUI001Definition
+org.apache.daffodil.processors.charset.BitsCharset3BitDFI746DUI002Definition
+org.apache.daffodil.processors.charset.BitsCharset3BitDFI747DUI001Definition
+org.apache.daffodil.processors.charset.BitsCharset4BitDFI746DUI002Definition
+org.apache.daffodil.processors.charset.BitsCharset5BitDFI1661DUI001Definition
+org.apache.daffodil.processors.charset.BitsCharset5BitDFI769DUI002Definition
+org.apache.daffodil.processors.charset.BitsCharset5BitPackedLSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharset6BitDFI264DUI001Definition
+org.apache.daffodil.processors.charset.BitsCharset6BitDFI311DUI002Definition
+org.apache.daffodil.processors.charset.BitsCharset6BitICAOAircraftIDDefinition
+org.apache.daffodil.processors.charset.BitsCharsetAISPayloadArmoringDefinition
+org.apache.daffodil.processors.charset.BitsCharsetBase4LSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetBase4MSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetBinaryLSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetBinaryMSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetHexLSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetHexMSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetEBCDIC_CP_USDefinition
+org.apache.daffodil.processors.charset.BitsCharsetIBM037Definition
+org.apache.daffodil.processors.charset.BitsCharsetISO885918BitPackedLSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetISO885918BitPackedMSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetISO88591Definition
+org.apache.daffodil.processors.charset.BitsCharsetOctalLSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetOctalMSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetUSASCII6BitPackedDefinition
+org.apache.daffodil.processors.charset.BitsCharsetUSASCII6BitPackedLSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetUSASCII6BitPackedMSBFDefinition
+org.apache.daffodil.processors.charset.BitsCharsetUSASCII7BitPackedDefinition
+org.apache.daffodil.processors.charset.BitsCharsetASCIIDefinition
+org.apache.daffodil.processors.charset.BitsCharsetUSASCIIDefinition
+org.apache.daffodil.processors.charset.BitsCharsetUTF16Definition
+org.apache.daffodil.processors.charset.BitsCharsetUTF16BEDefinition
+org.apache.daffodil.processors.charset.BitsCharsetUTF16LEDefinition
+org.apache.daffodil.processors.charset.BitsCharsetUTF32Definition
+org.apache.daffodil.processors.charset.BitsCharsetUTF32BEDefinition
+org.apache.daffodil.processors.charset.BitsCharsetUTF32LEDefinition
+org.apache.daffodil.processors.charset.BitsCharsetUTF8Definition
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/AISPayloadArmoring.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/AISPayloadArmoring.scala
index b6e934e6c..f50fdcf1d 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/AISPayloadArmoring.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/AISPayloadArmoring.scala
@@ -44,3 +44,6 @@ object BitsCharsetAISPayloadArmoring extends {
override val replacementCharCode = 0x30
override val requiredBitOrder = BitOrder.MostSignificantBitFirst
} with BitsCharsetNonByteSize
+
+final class BitsCharsetAISPayloadArmoringDefinition
+ extends BitsCharsetDefinition(BitsCharsetAISPayloadArmoring)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Base4.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Base4.scala
index 2f0eec995..0f67a8e40 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Base4.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Base4.scala
@@ -31,6 +31,9 @@ object BitsCharsetBase4LSBF extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharsetBase4LSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetBase4LSBF)
+
object BitsCharsetBase4MSBF extends {
override val name = "X-DFDL-BASE4-MSBF"
override val bitWidthOfACodeUnit = 2
@@ -38,3 +41,6 @@ object BitsCharsetBase4MSBF extends {
override val replacementCharCode = 0x0
override val requiredBitOrder = BitOrder.MostSignificantBitFirst
} with BitsCharsetNonByteSize
+
+final class BitsCharsetBase4MSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetBase4MSBF)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Binary.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Binary.scala
index 450fd0a65..3bd2c38b8 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Binary.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Binary.scala
@@ -31,6 +31,9 @@ object BitsCharsetBinaryLSBF extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharsetBinaryLSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetBinaryLSBF)
+
/**
* X-DFDL-BITS-MSBF occupies only 1 bit with each
* code unit.
@@ -42,3 +45,6 @@ object BitsCharsetBinaryMSBF extends {
override val replacementCharCode = 0x0
override val requiredBitOrder = BitOrder.MostSignificantBitFirst
} with BitsCharsetNonByteSize
+
+final class BitsCharsetBinaryMSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetBinaryMSBF)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharset.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharset.scala
index 9520b4a2f..184e772e5 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharset.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharset.scala
@@ -52,7 +52,6 @@ trait BitsCharset extends Serializable {
case _ => false
}
def name: String
- def aliases: Seq[String] = Nil
def bitWidthOfACodeUnit: Int // in units of bits
def requiredBitOrder: BitOrder
def mandatoryBitAlignment: Int // ignored when dfdlx:alignmentKind is
'manual'
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII7BitPacked.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharsetDefinition.scala
similarity index 60%
copy from
daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII7BitPacked.scala
copy to
daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharsetDefinition.scala
index ca88a3ec5..72403faf0 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII7BitPacked.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharsetDefinition.scala
@@ -14,19 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.daffodil.processors.charset
-import org.apache.daffodil.schema.annotation.props.gen.BitOrder
-
/**
- * X-DFDL-US-ASCII-7-BIT-PACKED occupies only 7 bits with each
- * code unit.
+ * These are the classes which must be dynamically loaded in order to add a
charset implementation
+ * to Daffodil. All charsets must implement this class and be added to the
+ * org.apache.daffodil.processors.charset.BitsCharsetDefinition file in
+ * daffodil-io/src/main/resources/META-INF/services. name() must return a
fully capitalized string
*/
-object BitsCharsetUSASCII7BitPacked extends {
- override val name = "X-DFDL-US-ASCII-7-BIT-PACKED"
- override val bitWidthOfACodeUnit = 7
- override val decodeString = (0 to 127).map { _.toChar }.mkString
- override val replacementCharCode = 0x3F
- override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
-} with BitsCharsetNonByteSize
+abstract class BitsCharsetDefinition(charset: BitsCharset, alias:
Option[String] = None) {
+ final def name(): String = alias.getOrElse(charset.name).toUpperCase()
+
+ final def charset(): BitsCharset = charset
+}
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII5BitPacked.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharsetDefinitionRegistry.scala
similarity index 55%
copy from
daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII5BitPacked.scala
copy to
daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharsetDefinitionRegistry.scala
index d286115ba..8598e80b1 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII5BitPacked.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/BitsCharsetDefinitionRegistry.scala
@@ -14,27 +14,23 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
package org.apache.daffodil.processors.charset
-import org.apache.daffodil.schema.annotation.props.gen.BitOrder
-import org.apache.daffodil.util.MaybeInt
+import org.apache.daffodil.util.SimpleNamedServiceLoader
-/**
- * X-DFDL-5-BIT-PACKED-LSBF occupies only 5 bits with each
- * code unit.
+/*
+ * Finds all pluggable BitCharsets and makes them available to Daffodil after
they have been
+ * setup as described in BitsCharsetDefinition.scala
*/
-object BitsCharset5BitPackedLSBF extends {
- override val name = "X-DFDL-5-BIT-PACKED-LSBF"
- override val bitWidthOfACodeUnit = 5
- override val decodeString = """01234567ABCDEFGHJKLMNPQRSTUVWXYZ"""
- override val replacementCharCode = 0x1D
- override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
-} with BitsCharsetNonByteSize {
+object BitsCharsetDefinitionRegistry {
+
+ private lazy val bitsCharsetDefinitionMap: Map[String,
BitsCharsetDefinition] =
+
SimpleNamedServiceLoader.loadClass[BitsCharsetDefinition](classOf[BitsCharsetDefinition])
+
+ /**
+ * Given name, finds the BitsCharsetDefinition or null if not found
+ */
+ def find(name: String): Option[BitsCharsetDefinition] =
bitsCharsetDefinitionMap.get(name)
- override def charToCode(char: Char) = {
- if (char == 'I') MaybeInt(1)
- else if (char == 'O') MaybeInt(0)
- else super.charToCode(char)
- }
+ def supportedEncodingsString = bitsCharsetDefinitionMap.keySet.mkString(", ")
}
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
index a64fb15ae..c3b226778 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/CharsetUtils.scala
@@ -30,14 +30,17 @@ object CharsetUtils {
/**
* Call instead of Charset.forName to obtain Daffodil's less-than-byte-sized
- * encodings as well as the standard ones.
+ * encodings as well as the standard ones. This will return the charset if
it exists or null
*/
def getCharset(name: String): BitsCharset = {
- val cs = DaffodilCharsetProvider.charsetForName(name)
- cs
+ val cs =
BitsCharsetDefinitionRegistry.find(name.toUpperCase).getOrElse(null)
+ if (cs == null)
+ null
+ else
+ cs.charset
}
- def supportedEncodingsString = DaffodilCharsetProvider.charsets.map { _.name
}.mkString(", ")
+ def supportedEncodingsString =
BitsCharsetDefinitionRegistry.supportedEncodingsString
/**
* Subtle bug in decoders in Java 7 when there is room for only 1
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/DaffodilCharsetProvider.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/DaffodilCharsetProvider.scala
deleted file mode 100644
index 4ab32bd32..000000000
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/DaffodilCharsetProvider.scala
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.daffodil.processors.charset
-
-import org.apache.daffodil.exceptions.Assert
-
-object DaffodilCharsetProvider {
-
- lazy val charsets = Seq(
- BitsCharset3BitDFI336DUI001,
- BitsCharset3BitDFI746DUI002,
- BitsCharset3BitDFI747DUI001,
- BitsCharset4BitDFI746DUI002,
- BitsCharset5BitDFI769DUI002,
- BitsCharset5BitDFI1661DUI001,
- BitsCharset5BitPackedLSBF,
- BitsCharset6BitDFI264DUI001,
- BitsCharset6BitDFI311DUI002,
- BitsCharsetBase4LSBF,
- BitsCharsetBase4MSBF,
- BitsCharsetBinaryLSBF,
- BitsCharsetBinaryMSBF,
- BitsCharsetHexLSBF,
- BitsCharsetHexMSBF,
- BitsCharsetIBM037,
- BitsCharsetISO88591,
- BitsCharsetOctalLSBF,
- BitsCharsetOctalMSBF,
- BitsCharsetUSASCII,
- BitsCharsetUSASCII6BitPackedLSBF,
- BitsCharsetUSASCII6BitPackedMSBF,
- BitsCharsetUSASCII7BitPacked,
- BitsCharsetISO885918BitPackedLSBF,
- BitsCharsetISO885918BitPackedMSBF,
- BitsCharsetUTF16BE,
- BitsCharsetUTF16LE,
- BitsCharsetUTF32BE,
- BitsCharsetUTF32LE,
- BitsCharsetUTF8,
- BitsCharset6BitICAOAircraftID)
-
- private lazy val charsetMap = {
- val nameCharsetPairs = charsets.flatMap { cs =>
- val names = cs.name +: cs.aliases
- names.map { name => (name -> cs) }
- }
- nameCharsetPairs.toMap
- }
-
- /**
- * Retrieves a charset for the given charset name.
- *
- * @param charsetName
- * The name of the requested charset; may be either
- * a canonical name or an alias
- *
- * @return A charset object for the named charset,
- * or <tt>null</tt> if the named charset
- * is not supported by this provider
- */
- def charsetForName(charsetName: String): BitsCharset = {
- Assert.usage(charsetName != null);
- val lookupResult = charsetMap.get(charsetName)
- val cs = lookupResult.getOrElse(null)
- cs
- }
-
-}
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Hex.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Hex.scala
index bfcc9784a..b1d7af54c 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Hex.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Hex.scala
@@ -31,6 +31,9 @@ object BitsCharsetHexLSBF extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharsetHexLSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetHexLSBF)
+
/**
* X-DFDL-HEX-MSBF occupies only 4 bits with each
* code unit.
@@ -42,3 +45,6 @@ object BitsCharsetHexMSBF extends {
override val replacementCharCode = 0x00
override val requiredBitOrder = BitOrder.MostSignificantBitFirst
} with BitsCharsetNonByteSize
+
+final class BitsCharsetHexMSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetHexMSBF)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/IBM037.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/IBM037.scala
index b8a0710b7..f075e852c 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/IBM037.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/IBM037.scala
@@ -22,7 +22,6 @@ import org.apache.daffodil.io.FormatInfo
object BitsCharsetIBM037 extends {
override val name = "IBM037"
- override val aliases = Seq("EBCDIC-CP-US")
} with BitsCharsetJava {
val decodeStringTable =
@@ -51,7 +50,13 @@ class BitsCharsetDecoderIBM037
protected override def decodeOneChar(dis: InputSourceDataInputStream, finfo:
FormatInfo): Char = {
val byte = getByte(dis, 0)
- val dec = BitsCharsetIBM037.decodeStringTable(byte)
- dec
+ BitsCharsetIBM037.decodeStringTable(byte)
}
}
+
+final class BitsCharsetIBM037Definition
+ extends BitsCharsetDefinition(BitsCharsetIBM037)
+
+final class BitsCharsetEBCDIC_CP_USDefinition
+ extends BitsCharsetDefinition(BitsCharsetIBM037, Some("EBCDIC-CP-US"))
+
\ No newline at end of file
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO88591.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO88591.scala
index b45396908..d0bc689af 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO88591.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO88591.scala
@@ -37,3 +37,5 @@ class BitsCharsetDecoderISO88591
}
}
+final class BitsCharsetISO88591Definition
+ extends BitsCharsetDefinition(BitsCharsetISO88591)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO885918BitPacked.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO885918BitPacked.scala
index 1e57b0824..16d36d376 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO885918BitPacked.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO885918BitPacked.scala
@@ -31,6 +31,9 @@ object BitsCharsetISO885918BitPackedLSBF extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharsetISO885918BitPackedLSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetISO885918BitPackedLSBF)
+
/**
* X-DFDL-ISO-88591-8-BIT-PACKED-MSB-FIRST occupies only 8 bits with each
* code unit.
@@ -42,3 +45,6 @@ object BitsCharsetISO885918BitPackedMSBF extends {
override val replacementCharCode = 0x3F
override val requiredBitOrder = BitOrder.MostSignificantBitFirst
} with BitsCharsetNonByteSize
+
+final class BitsCharsetISO885918BitPackedMSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetISO885918BitPackedMSBF)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Octal.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Octal.scala
index d09308571..3982c31c2 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Octal.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Octal.scala
@@ -31,6 +31,9 @@ object BitsCharsetOctalLSBF extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharsetOctalLSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetOctalLSBF)
+
/**
* X-DFDL-OCTAL-MSBF occupies only 3 bits with each
* code unit.
@@ -42,3 +45,6 @@ object BitsCharsetOctalMSBF extends {
override val replacementCharCode = 0x0
override val requiredBitOrder = BitOrder.MostSignificantBitFirst
} with BitsCharsetNonByteSize
+
+final class BitsCharsetOctalMSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetOctalMSBF)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII.scala
index 414c0e9ba..75a8629e9 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII.scala
@@ -22,7 +22,6 @@ import org.apache.daffodil.io.FormatInfo
object BitsCharsetUSASCII extends {
override val name = "US-ASCII"
- override val aliases = Seq("ASCII")
} with BitsCharsetJava {
override def newDecoder() = new BitsCharsetDecoderUSASCII()
@@ -39,3 +38,9 @@ class BitsCharsetDecoderUSASCII
byte.toChar
}
}
+
+final class BitsCharsetASCIIDefinition
+ extends BitsCharsetDefinition(BitsCharsetUSASCII, Some("ASCII"))
+
+final class BitsCharsetUSASCIIDefinition
+ extends BitsCharsetDefinition(BitsCharsetUSASCII)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII5BitPacked.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII5BitPacked.scala
index d286115ba..176a191d7 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII5BitPacked.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII5BitPacked.scala
@@ -38,3 +38,6 @@ object BitsCharset5BitPackedLSBF extends {
else super.charToCode(char)
}
}
+
+final class BitsCharset5BitPackedLSBFDefinition
+ extends BitsCharsetDefinition(BitsCharset5BitPackedLSBF)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII6BitPacked.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII6BitPacked.scala
index ef82b8f2e..945053385 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII6BitPacked.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII6BitPacked.scala
@@ -25,13 +25,18 @@ import
org.apache.daffodil.schema.annotation.props.gen.BitOrder
*/
object BitsCharsetUSASCII6BitPackedLSBF extends {
override val name = "X-DFDL-US-ASCII-6-BIT-PACKED-LSB-FIRST"
- override val aliases = Seq("X-DFDL-US-ASCII-6-BIT-PACKED")
override val bitWidthOfACodeUnit = 6
override val decodeString = """@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_
!"#$%&'()*+,-./0123456789:;<=>?"""
override val replacementCharCode = 0x1F
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharsetUSASCII6BitPackedLSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetUSASCII6BitPackedLSBF)
+
+final class BitsCharsetUSASCII6BitPackedDefinition
+ extends BitsCharsetDefinition(BitsCharsetUSASCII6BitPackedLSBF,
Some("X-DFDL-US-ASCII-6-BIT-PACKED"))
+
object BitsCharsetUSASCII6BitPackedMSBF extends {
override val name = "X-DFDL-US-ASCII-6-BIT-PACKED-MSB-FIRST"
override val bitWidthOfACodeUnit = 6
@@ -39,3 +44,6 @@ object BitsCharsetUSASCII6BitPackedMSBF extends {
override val replacementCharCode = 0x1F
override val requiredBitOrder = BitOrder.MostSignificantBitFirst
} with BitsCharsetNonByteSize
+
+final class BitsCharsetUSASCII6BitPackedMSBFDefinition
+ extends BitsCharsetDefinition(BitsCharsetUSASCII6BitPackedMSBF)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII7BitPacked.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII7BitPacked.scala
index ca88a3ec5..62c4dae80 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII7BitPacked.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/USASCII7BitPacked.scala
@@ -30,3 +30,6 @@ object BitsCharsetUSASCII7BitPacked extends {
override val replacementCharCode = 0x3F
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+
+final class BitsCharsetUSASCII7BitPackedDefinition
+ extends BitsCharsetDefinition(BitsCharsetUSASCII7BitPacked)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF16BE.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF16BE.scala
index ae040877e..4f983f2a1 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF16BE.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF16BE.scala
@@ -22,7 +22,6 @@ import org.apache.daffodil.io.FormatInfo
object BitsCharsetUTF16BE extends {
override val name = "UTF-16BE"
- override val aliases = Seq("UTF-16")
} with BitsCharsetJava {
override def newDecoder() = new BitsCharsetDecoderUTF16BE()
@@ -54,3 +53,9 @@ class BitsCharsetDecoderUTF16BE
high.toChar
}
}
+
+final class BitsCharsetUTF16BEDefinition
+ extends BitsCharsetDefinition(BitsCharsetUTF16BE)
+
+final class BitsCharsetUTF16Definition
+ extends BitsCharsetDefinition(BitsCharsetUTF16BE, Some("UTF-16"))
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF16LE.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF16LE.scala
index 1d34fe822..d9c5fcdbf 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF16LE.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF16LE.scala
@@ -55,3 +55,5 @@ class BitsCharsetDecoderUTF16LE
}
}
+final class BitsCharsetUTF16LEDefinition
+ extends BitsCharsetDefinition(BitsCharsetUTF16LE)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF32BE.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF32BE.scala
index 469120d77..58938b1c1 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF32BE.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF32BE.scala
@@ -22,7 +22,6 @@ import org.apache.daffodil.io.FormatInfo
object BitsCharsetUTF32BE extends {
override val name = "UTF-32BE"
- override val aliases = Seq("UTF-32")
} with BitsCharsetJava {
override def newDecoder() = new BitsCharsetDecoderUTF32BE()
@@ -49,3 +48,9 @@ class BitsCharsetDecoderUTF32BE
}
}
}
+
+final class BitsCharsetUTF32BEDefinition
+ extends BitsCharsetDefinition(BitsCharsetUTF32BE)
+
+final class BitsCharsetUTF32Definition
+ extends BitsCharsetDefinition(BitsCharsetUTF32BE, Some("UTF-32"))
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF32LE.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF32LE.scala
index 5411b7d07..993b21acb 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF32LE.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF32LE.scala
@@ -48,3 +48,6 @@ class BitsCharsetDecoderUTF32LE
}
}
}
+
+final class BitsCharsetUTF32LEDefinition
+ extends BitsCharsetDefinition(BitsCharsetUTF32LE)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF8.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF8.scala
index a8c5cffb8..5c88624ec 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF8.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/UTF8.scala
@@ -119,3 +119,6 @@ class BitsCharsetDecoderUTF8
if ((byte1 & mask1) == 0 && (byte2 & mask2) == 0) throw new
BitsCharsetDecoderMalformedException(bitsConsumedSoFar)
}
}
+
+final class BitsCharsetUTF8Definition
+ extends BitsCharsetDefinition(BitsCharsetUTF8)
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/X_DFDL_MIL_STD.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/X_DFDL_MIL_STD.scala
index c33d4acf9..2fbe005d9 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/X_DFDL_MIL_STD.scala
+++
b/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/X_DFDL_MIL_STD.scala
@@ -40,6 +40,9 @@ object BitsCharset6BitDFI264DUI001 extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharset6BitDFI264DUI001Definition
+ extends BitsCharsetDefinition(BitsCharset6BitDFI264DUI001)
+
sealed abstract class BitsCharset6BitDFI311DUI002Base extends {
override val bitWidthOfACodeUnit = 6
override val decodeString =
"""\u00A0ABCDEFGHIJKLMNOPQRSTUVWXYZ\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD
\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD0123456789\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"""
@@ -51,12 +54,18 @@ object BitsCharset6BitDFI311DUI002 extends
BitsCharset6BitDFI311DUI002Base {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
}
+final class BitsCharset6BitDFI311DUI002Definition
+ extends BitsCharsetDefinition(BitsCharset6BitDFI311DUI002)
+
object BitsCharset6BitICAOAircraftID extends BitsCharset6BitDFI311DUI002Base {
override val name = "X-DFDL-6-BIT-ICAO-Aircraft-ID"
- override val aliases = Seq("X-DFDL-6-BIT-ICAO-AIRCRAFT-ID")
override val requiredBitOrder = BitOrder.MostSignificantBitFirst
}
+final class BitsCharset6BitICAOAircraftIDDefinition
+ extends BitsCharsetDefinition(BitsCharset6BitICAOAircraftID)
+
+
object BitsCharset3BitDFI336DUI001 extends {
override val name = "X-DFDL-3-BIT-DFI-336-DUI-001"
override val bitWidthOfACodeUnit = 3
@@ -65,6 +74,9 @@ object BitsCharset3BitDFI336DUI001 extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharset3BitDFI336DUI001Definition
+ extends BitsCharsetDefinition(BitsCharset3BitDFI336DUI001)
+
object BitsCharset4BitDFI746DUI002 extends {
override val name = "X-DFDL-4-BIT-DFI-746-DUI-002"
override val bitWidthOfACodeUnit = 4
@@ -73,6 +85,9 @@ object BitsCharset4BitDFI746DUI002 extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharset4BitDFI746DUI002Definition
+ extends BitsCharsetDefinition(BitsCharset4BitDFI746DUI002)
+
object BitsCharset3BitDFI746DUI002 extends {
override val name = "X-DFDL-3-BIT-DFI-746-DUI-002"
override val bitWidthOfACodeUnit = 3
@@ -81,6 +96,9 @@ object BitsCharset3BitDFI746DUI002 extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharset3BitDFI746DUI002Definition
+ extends BitsCharsetDefinition(BitsCharset3BitDFI746DUI002)
+
object BitsCharset3BitDFI747DUI001 extends {
override val name = "X-DFDL-3-BIT-DFI-747-DUI-001"
override val bitWidthOfACodeUnit = 3
@@ -89,6 +107,9 @@ object BitsCharset3BitDFI747DUI001 extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharset3BitDFI747DUI001Definition
+ extends BitsCharsetDefinition(BitsCharset3BitDFI747DUI001)
+
object BitsCharset5BitDFI769DUI002 extends {
override val name = "X-DFDL-5-BIT-DFI-769-DUI-002"
override val bitWidthOfACodeUnit = 5
@@ -97,6 +118,9 @@ object BitsCharset5BitDFI769DUI002 extends {
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+final class BitsCharset5BitDFI769DUI002Definition
+ extends BitsCharsetDefinition(BitsCharset5BitDFI769DUI002)
+
object BitsCharset5BitDFI1661DUI001 extends {
override val name = "X-DFDL-5-BIT-DFI-1661-DUI-001"
override val bitWidthOfACodeUnit = 5
@@ -104,3 +128,6 @@ object BitsCharset5BitDFI1661DUI001 extends {
override val replacementCharCode = 0x0
override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
} with BitsCharsetNonByteSize
+
+final class BitsCharset5BitDFI1661DUI001Definition
+ extends BitsCharsetDefinition(BitsCharset5BitDFI1661DUI001)
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/EvEncoding.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/EvEncoding.scala
index 59fd172bb..36bc2a130 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/EvEncoding.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/processors/EvEncoding.scala
@@ -21,11 +21,12 @@ import org.apache.daffodil.dsom._
import org.apache.daffodil.processors.charset.BitsCharset
import org.apache.daffodil.processors.charset.BitsCharsetJava
import org.apache.daffodil.processors.charset.BitsCharsetNonByteSize
-import org.apache.daffodil.processors.charset.CharsetUtils
import org.apache.daffodil.exceptions.Assert
import org.apache.daffodil.util.MaybeInt
import org.apache.daffodil.cookers.FillByteCooker
import org.apache.daffodil.cookers.EncodingCooker
+import org.apache.daffodil.processors.charset.BitsCharsetDefinitionRegistry
+import org.apache.daffodil.processors.charset.CharsetUtils
/*
* The way encoding works, is if a EncodingChangeParser or Unparser is
@@ -93,12 +94,12 @@ abstract class CharsetEvBase(encodingEv: EncodingEvBase,
tci: DPathCompileInfo)
override def compute(state: ParseOrUnparseState) = {
val encString = encodingEv.evaluate(state)
- val cs = CharsetUtils.getCharset(encString)
- if (cs == null) {
- tci.schemaDefinitionError("Unsupported encoding: %s. Supported
encodings: %s", encString, CharsetUtils.supportedEncodingsString)
+ val bc = CharsetUtils.getCharset(encString)
+ if (bc == null) {
+ tci.schemaDefinitionError("Unsupported encoding: %s. Supported
encodings: %s", encString,
BitsCharsetDefinitionRegistry.supportedEncodingsString)
}
- if (!encodingEv.isConstant) checkCharset(state, cs)
- cs
+ if (!encodingEv.isConstant) checkCharset(state, bc)
+ bc
}
}
diff --git
a/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.processors.charset.BitsCharsetDefinition
b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.processors.charset.BitsCharsetDefinition
new file mode 100644
index 000000000..399c816dc
--- /dev/null
+++
b/daffodil-test/src/test/resources/META-INF/services/org.apache.daffodil.processors.charset.BitsCharsetDefinition
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+org.apache.daffodil.charsets.BitsCharset_ISO_8859_1_Reverse_Definition
+org.apache.daffodil.charsets.BitsCharsetTest_ISO_8859_13_Definition
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
new file mode 100644
index 000000000..7bf7a12f1
--- /dev/null
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd
@@ -0,0 +1,69 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema xmlns="http://www.w3.org/2001/XMLSchema"
targetNamespace="urn:org.apache.daffodil.charsets.TestBitsCharsetDefinition"
+ xmlns:tns="urn:org.apache.daffodil.charsets.TestBitsCharsetDefinition"
+ xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
+ xmlns:xsd="http://www.w3.org/2001/XMLSchema"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+
+ <include
schemaLocation="org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd"/>
+
+ <annotation>
+ <appinfo source="http://www.ogf.org/dfdl/">
+ <dfdl:format ref="tns:GeneralFormat" />
+ </appinfo>
+ </annotation>
+
+ <element name="s1">
+ <complexType>
+ <sequence>
+ <element name="e1" dfdl:encoding="ISO-8859-13"
dfdl:lengthKind="explicit" dfdl:length="8" type="xsd:string" />
+ <element name="e2"
dfdl:encoding="X-DFDL-ISO-8859-1-8-BIT-PACKED-LSB-FIRST-REVERSE"
dfdl:lengthKind="delimited" type="xsd:string" />
+ </sequence>
+ </complexType>
+ </element>
+
+ <element name="s2">
+ <complexType>
+ <sequence>
+ <element name="e1" dfdl:encoding="ISO-8859-1"
dfdl:lengthKind="explicit" dfdl:length="8" type="xsd:string" />
+ <element name="e2" dfdl:encoding="ISO-8859-13"
dfdl:lengthKind="delimited" type="xsd:string" />
+ </sequence>
+ </complexType>
+ </element>
+
+ <element name="s3">
+ <complexType>
+ <sequence>
+ <element name="e1" dfdl:encoding="ISO-8859-13"
dfdl:lengthKind="explicit" dfdl:length="8" type="xsd:string" />
+ <element name="e2" dfdl:encoding="ISO-8859-1"
dfdl:lengthKind="delimited" type="xsd:string" />
+ </sequence>
+ </complexType>
+ </element>
+
+ <element name="s4">
+ <complexType>
+ <sequence>
+ <element name="e1" dfdl:encoding="ISO-DNE" dfdl:lengthKind="explicit"
dfdl:length="8" type="xsd:string" />
+ </sequence>
+ </complexType>
+ </element>
+
+
+</schema>
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
new file mode 100644
index 000000000..656d00011
--- /dev/null
+++
b/daffodil-test/src/test/resources/org/apache/daffodil/charsets/TestBitsCharsetDefinition.tdml
@@ -0,0 +1,162 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<tdml:testSuite xmlns:tdml="http://www.ibm.com/xmlns/dfdl/testData"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xmlns:dfdl="http://www.ogf.org/dfdl/dfdl-1.0/"
+ xmlns:xs="http://www.w3.org/2001/XMLSchema"
+ xmlns:fn="http://www.w3.org/2005/xpath-functions"
+ xmlns:dfdlx="http://www.ogf.org/dfdl/dfdl-1.0/extensions"
+ xmlns:ex="http://example.com"
+ xmlns:tns="urn:org.apache.daffodil.charsets.TestBitsCharsetDefinition"
defaultRoundTrip="none">
+
+ <tdml:parserTestCase name="parse_charsets" root="s1"
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd">
+
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <s1>
+ <!-- First char in e1 is unicode char 0xC0 captial A with grave -->
+ <e1>Ą1234567</e1>
+ <e2>01234567</e2>
+ </s1>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ <tdml:document>
+ <tdml:documentPart type="byte"><![CDATA[C0 31 32 33 34 35 36 37
+ CF CE CD CC CB CA C9 C8]]>
+ </tdml:documentPart>
+ </tdml:document>
+
+ </tdml:parserTestCase>
+
+ <tdml:unparserTestCase name="unparse_charsets" root="s1"
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd">
+
+ <tdml:document>
+ <tdml:documentPart type="byte"><![CDATA[C0 31 32 33 34 35 36 37
+ CF CE CD CC CB CA C9 C8]]>
+ </tdml:documentPart>
+ </tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <tns:s1>
+ <!-- First char in e1 is unicode char 0x104 captial A with Greek
Ogonek -->
+ <e1>Ą1234567</e1>
+ <e2>01234567</e2>
+ </tns:s1>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ </tdml:unparserTestCase>
+
+ <tdml:parserTestCase name="parse_charsets2" root="s2"
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd"
roundTrip="true">
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <s2>
+ <!-- First char in e1 is unicode char 0xC0 captial A with grave -->
+ <e1>À1234567</e1>
+ <!-- First char in e2 is unicode char 0x104 captial A with Greek
Ogonek -->
+ <e2>Ą1234567</e2>
+ </s2>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ <tdml:document>
+ <tdml:documentPart type="byte"><![CDATA[C0 31 32 33 34 35 36 37
+ C0 31 32 33 34 35 36 37]]>
+ </tdml:documentPart>
+ </tdml:document>
+
+ </tdml:parserTestCase>
+
+ <tdml:unparserTestCase name="unparse_charsets2" root="s2"
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd">
+
+ <tdml:document>
+ <tdml:documentPart type="byte"><![CDATA[C0 31 32 33 34 35 36 37
+ C0 31 32 33 34 35 36 37]]>
+ </tdml:documentPart>
+ </tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <tns:s2>
+ <!-- First char in e1 is unicode char 0x104 captial A with Greek
Ogonek -->
+ <e1>À1234567</e1>
+ <!-- First char in e2 is unicode char 0xC0 captial A with grave -->
+ <e2>Ą1234567</e2>
+ </tns:s2>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ </tdml:unparserTestCase>
+
+ <tdml:parserTestCase name="parse_charsets3" root="s3"
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd">
+
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <s3>
+ <!-- First char in e1 is unicode char 0xC0 captial A with grave -->
+ <e1>Ą1234567</e1>
+ <!-- First char in e2 is unicode char 0x104 captial A with Greek
Ogonek -->
+ <e2>À1234567</e2>
+ </s3>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ <tdml:document>
+ <tdml:documentPart type="byte"><![CDATA[C0 31 32 33 34 35 36 37
+ C0 31 32 33 34 35 36 37]]>
+ </tdml:documentPart>
+ </tdml:document>
+
+</tdml:parserTestCase>
+
+<tdml:unparserTestCase name="unparse_charsets3" root="s3"
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd">
+
+ <tdml:document>
+ <tdml:documentPart type="byte"><![CDATA[C0 31 32 33 34 35 36 37
+ C0 31 32 33 34 35 36 37]]>
+ </tdml:documentPart>
+ </tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <tns:s3>
+ <!-- First char in e1 is unicode char 0x104 captial A with Greek
Ogonek -->
+ <e1>Ą1234567</e1>
+ <!-- First char in e2 is unicode char 0xC0 captial A with grave -->
+ <e2>À1234567</e2>
+ </tns:s3>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+</tdml:unparserTestCase>
+
+ <tdml:unparserTestCase name="verify_error_message" root="s4"
model="org/apache/daffodil/charsets/TestBitsCharsetDefinition.dfdl.xsd">
+
+ <tdml:document>
+ <tdml:documentPart type="byte"><![CDATA[C0 31 32 33 34 35 36 37]]>
+ </tdml:documentPart>
+ </tdml:document>
+ <tdml:infoset>
+ <tdml:dfdlInfoset>
+ <tns:s4>
+ <!-- First char in e1 is unicode char 0xC0 captial A with grave -->
+ <e1>À1234567</e1>
+ </tns:s4>
+ </tdml:dfdlInfoset>
+ </tdml:infoset>
+ <tdml:errors>
+ <tdml:error>Unsupported encoding: ISO-DNE.</tdml:error>
+ <tdml:error>Supported encodings:</tdml:error>
+ <tdml:error>UTF-32LE,</tdml:error>
+ </tdml:errors>
+ </tdml:unparserTestCase>
+
+</tdml:testSuite>
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Base4.scala
b/daffodil-test/src/test/scala/org/apache/daffodil/charsets/ISO_8859_1_Reverse.scala
similarity index 65%
copy from
daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Base4.scala
copy to
daffodil-test/src/test/scala/org/apache/daffodil/charsets/ISO_8859_1_Reverse.scala
index 2f0eec995..d27e8860a 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/Base4.scala
+++
b/daffodil-test/src/test/scala/org/apache/daffodil/charsets/ISO_8859_1_Reverse.scala
@@ -15,26 +15,20 @@
* limitations under the License.
*/
-package org.apache.daffodil.processors.charset
+package org.apache.daffodil.charsets
import org.apache.daffodil.schema.annotation.props.gen.BitOrder
+import org.apache.daffodil.processors.charset.BitsCharsetNonByteSize
+import org.apache.daffodil.processors.charset.BitsCharsetDefinition
-/**
- * Base 4 aka Quarternary
- */
-
-object BitsCharsetBase4LSBF extends {
- override val name = "X-DFDL-BASE4-LSBF"
- override val bitWidthOfACodeUnit = 2
- override val decodeString = "0123"
- override val replacementCharCode = 0x0
- override val requiredBitOrder = BitOrder.LeastSignificantBitFirst
-} with BitsCharsetNonByteSize
-
-object BitsCharsetBase4MSBF extends {
- override val name = "X-DFDL-BASE4-MSBF"
- override val bitWidthOfACodeUnit = 2
- override val decodeString = "0123"
+object BitsCharset_ISO_8859_1_Reverse extends{
+ override val name = "X-DFDL-ISO-8859-1-8-BIT-PACKED-LSB-FIRST-REVERSE"
+ override val bitWidthOfACodeUnit = 8
+ override val decodeString = (0 to 255).map { _.toChar }.mkString.reverse
override val replacementCharCode = 0x0
override val requiredBitOrder = BitOrder.MostSignificantBitFirst
} with BitsCharsetNonByteSize
+
+
+final class BitsCharset_ISO_8859_1_Reverse_Definition
+ extends BitsCharsetDefinition(BitsCharset_ISO_8859_1_Reverse)
diff --git
a/daffodil-test/src/test/scala/org/apache/daffodil/charsets/TestBitsCharsetDefinition.scala
b/daffodil-test/src/test/scala/org/apache/daffodil/charsets/TestBitsCharsetDefinition.scala
new file mode 100644
index 000000000..cf7219170
--- /dev/null
+++
b/daffodil-test/src/test/scala/org/apache/daffodil/charsets/TestBitsCharsetDefinition.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.daffodil.charsets
+
+import org.junit.Test
+import org.apache.daffodil.tdml.Runner
+import org.junit.AfterClass
+
+object TestCharsets {
+
+ val testDir = "/org/apache/daffodil/charsets/"
+ val runner = Runner(testDir, "TestBitsCharsetDefinition.tdml")
+
+ @AfterClass def shutDown(): Unit = {
+ runner.reset
+ }
+}
+
+class TestCharsets{
+ import TestCharsets._
+
+ @Test def parse_loaded_charsets(): Unit = {
runner.runOneTest("parse_charsets") }
+ @Test def unparse_loaded_charsets(): Unit = {
runner.runOneTest("unparse_charsets") }
+ @Test def parse_loaded_charsets2(): Unit = {
runner.runOneTest("parse_charsets2") }
+ @Test def unparse_loaded_charsets2(): Unit = {
runner.runOneTest("unparse_charsets2") }
+ @Test def parse_loaded_charsets3(): Unit = {
runner.runOneTest("parse_charsets3") }
+ @Test def unparse_loaded_charsets3(): Unit = {
runner.runOneTest("unparse_charsets3") }
+ @Test def unparse_loaded_charsets_DNE(): Unit = {
runner.runOneTest("verify_error_message") }
+
+}
diff --git
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO88591.scala
b/daffodil-test/src/test/scala/org/apache/daffodil/charsets/TestISO_8859_13.scala
similarity index 58%
copy from
daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO88591.scala
copy to
daffodil-test/src/test/scala/org/apache/daffodil/charsets/TestISO_8859_13.scala
index b45396908..4569e4ef8 100644
---
a/daffodil-io/src/main/scala/org/apache/daffodil/processors/charset/ISO88591.scala
+++
b/daffodil-test/src/test/scala/org/apache/daffodil/charsets/TestISO_8859_13.scala
@@ -1,3 +1,4 @@
+
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -15,25 +16,37 @@
* limitations under the License.
*/
-package org.apache.daffodil.processors.charset
+package org.apache.daffodil.charsets
+import org.apache.daffodil.processors.charset.BitsCharsetJava
+import org.apache.daffodil.processors.charset.BitsCharsetDecoderByteSize
import org.apache.daffodil.io.InputSourceDataInputStream
import org.apache.daffodil.io.FormatInfo
+import org.apache.daffodil.processors.charset.BitsCharsetDefinition
+import java.nio.ByteBuffer
+import java.nio.charset.Charset
-object BitsCharsetISO88591 extends {
- override val name = "ISO-8859-1"
+object BitsCharsetTest_ISO_8859_13 extends {
+ override val name = "ISO-8859-13"
} with BitsCharsetJava {
- override def newDecoder() = new BitsCharsetDecoderISO88591()
-}
+ override def newDecoder() = new BitsCharsetTest_Decoder_ISO_8859_13()
+}
-class BitsCharsetDecoderISO88591
+class BitsCharsetTest_Decoder_ISO_8859_13
extends BitsCharsetDecoderByteSize {
+ val decodeString = {
+ val bytes = ByteBuffer.wrap((0 to 255).map{ _.toByte }.toArray)
+ Charset.forName("ISO-8859-13").newDecoder().decode(bytes).toString
+ }
+
protected override def decodeOneChar(dis: InputSourceDataInputStream, finfo:
FormatInfo): Char = {
val byte = getByte(dis, 0)
- byte.toChar
+ decodeString(byte)
}
}
+final class BitsCharsetTest_ISO_8859_13_Definition
+ extends BitsCharsetDefinition(BitsCharsetTest_ISO_8859_13)