mbeckerle commented on a change in pull request #30: Performance improvements 
around FormatInfo change.
URL: https://github.com/apache/incubator-daffodil/pull/30#discussion_r163698711
 
 

 ##########
 File path: 
daffodil-io/src/main/scala/edu/illinois/ncsa/daffodil/processors/charset/BitsCharset.scala
 ##########
 @@ -0,0 +1,243 @@
+package edu.illinois.ncsa.daffodil.processors.charset
+
+import edu.illinois.ncsa.daffodil.schema.annotation.props.gen.BitOrder
+import edu.illinois.ncsa.daffodil.exceptions.Assert
+import java.nio.charset.CoderResult
+import java.nio.charset.Charset
+import java.nio.charset.CodingErrorAction
+import java.nio.charset.CharsetEncoder
+import java.nio.charset.CharsetDecoder
+import java.nio.CharBuffer
+import java.nio.ByteBuffer
+import edu.illinois.ncsa.daffodil.util.MaybeULong
+import java.nio.charset.StandardCharsets
+
+/**
+ * Charset enhanced with features allowing it to work with Daffodil's Bit-wise
+ * DataInputStream and DataOutputStream.
+ */
+trait BitsCharset {
+  final override def hashCode = name.hashCode
+  final override def equals(other: Any) = other match {
+    case bcs: BitsCharset => this.name == bcs.name
+    case _ => false
+  }
+  def name: String
+  def bitWidthOfACodeUnit: Int // in units of bits
+  def requiredBitOrder: BitOrder
+  def mandatoryBitAlignment: Int
+  def newDecoder(): BitsCharsetDecoder
+  def newEncoder(): BitsCharsetEncoder
+}
+
+trait BitsCharsetDecoder {
+  def bitsCharset: BitsCharset
+  def setInitialBitOffset(offset: Int): Unit
+  def setFinalByteBitLimitOffset0b(bitLimitOffset0b: MaybeULong): Unit
+  def averageCharsPerByte(): Float
+  def maxCharsPerByte(): Float
+  def averageCharsPerBit(): Float
+  def maxCharsPerBit(): Float
+  def replacement(): String
+  def replaceWith(newReplacement: String): BitsCharsetDecoder
+  def flush(out: CharBuffer): CoderResult
+  def reset(): BitsCharsetDecoder
+  /**
+   * Used to determine if the data input stream must be aligned (if not 
already)
+   * for this encoding. Based on whether the coder has been reset. If the
+   * coder has not been reset, it is assumed we are in the middle of decoding
+   * many characters, and so no mandatory alignment is needed. However, if the
+   * coder was reset, then it is assumed that we may be unaligned at the start
+   * of decoding characters, and so we must check if we are mandatory aligned.
+   */
+  def isMandatoryAlignmentNeeded(): Boolean
+  def malformedInputAction(): CodingErrorAction
+  def onMalformedInput(action: CodingErrorAction): BitsCharsetDecoder
+  def unmappableCharacterAction(): CodingErrorAction
+  def onUnmappableCharacter(action: CodingErrorAction): BitsCharsetDecoder
+  def decode(in: ByteBuffer, out: CharBuffer, endOfInput: Boolean): CoderResult
+  protected def decodeLoop(in: ByteBuffer, out: CharBuffer): CoderResult
+
+  final def decode(in: ByteBuffer): CharBuffer = {
+    var n = scala.math.ceil(in.remaining() * averageCharsPerByte()).toInt
+    var out = CharBuffer.allocate(n)
+
+    if ((n == 0) && (in.remaining() == 0)) out
+    else {
+      reset()
+      var break = false
+      while (!break) {
+        var cr =
+          if (in.hasRemaining())
+            decode(in, out, true)
+          else
+            CoderResult.UNDERFLOW
+        if (cr.isUnderflow())
+          cr = flush(out)
+        if (cr.isUnderflow())
+          break = true
+        else if (cr.isOverflow()) {
+          n = 2 * n + 1; // Ensure progress; n might be 0!
+          val o = CharBuffer.allocate(n)
+          out.flip()
+          o.put(out)
+          out = o
+        } else
+          cr.throwException()
+      }
+      out.flip()
+      out
+    }
+  }
+}
+
+abstract class BitsCharsetEncoder {
+  def bitsCharset: BitsCharset
+  def averageBytesPerChar(): Float
+  def maxBytesPerChar(): Float
+  def averageBitsPerChar(): Float
+  def maxBitsPerChar(): Float
+  def replacement(): Array[Byte]
+  def replaceWith(newReplacement: Array[Byte]): BitsCharsetEncoder
+  def flush(out: ByteBuffer): CoderResult
+  def reset(): BitsCharsetEncoder
+  /**
+   * Used to determine if the data input stream must be aligned (if not 
already)
+   * for this encoding. Based on whether the coder has been reset. If the
+   * coder has not been reset, it is assumed we are in the middle of encoding
+   * many characters, and so no mandatory alignment is needed. However, if the
+   * coder was reset, then it is assumed that we may be unaligned at the start
+   * of encoding characters, and so we must check if we are mandatory aligned.
+   */
+  def isMandatoryAlignmentNeeded(): Boolean
+  def malformedInputAction(): CodingErrorAction
+  def onMalformedInput(action: CodingErrorAction): BitsCharsetEncoder
+  def unmappableCharacterAction(): CodingErrorAction
+  def onUnmappableCharacter(action: CodingErrorAction): BitsCharsetEncoder
+  def encode(in: CharBuffer, out: ByteBuffer, endOfInput: Boolean): CoderResult
+  protected def encodeLoop(in: CharBuffer, out: ByteBuffer): CoderResult
+}
+
 
 Review comment:
   Scaladoc badly needed. This implements our BitsCharset, but by delegating to 
a Java Charset.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to