This is an automated email from the ASF dual-hosted git repository.
slawrence pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/daffodil.git
The following commit(s) were added to refs/heads/master by this push:
new 1fcf9f7 Changes to support Java 16
1fcf9f7 is described below
commit 1fcf9f70317e22df3b837c57453f9b878e587ef0
Author: Steve Lawrence <[email protected]>
AuthorDate: Tue Apr 13 13:00:54 2021 -0400
Changes to support Java 16
- Update GitHub actions to replace Java 15 with 16, 15 is no longer
supported
- Remove ByteSource from the data dumper. This requires a certain kind
of reflection that is no longer allowed in Java 16. Fortunately, the
generality this provided was not actually used--all places we dump data
are trivially replaced with ByteBuffer's
- Java 16 changes the GZIPOutputStream to always output 0xFF for the
GZIP header OS field, where it used to output 0x00. See Java bug
JDK-8244706. There is no simple way to override this value to use the
old behavior, or have older Java versions use the new behavior. To
resolve this, this adds a new GZIPFixedOutputStream class which acts
as a proxy between the GZIPOutputStream and the underlying
OutputStream. This proxy fixes the one incorrect byte in the header so
all older Java versions will use the correct 0xFF byte. Java 16+ has
fixed this issue, so this proxy is only used if an older version of
Java is detected.
DAFFODIL-2484
---
.github/workflows/main.yml | 2 +-
.../org/apache/daffodil/layers/TestLayers.scala | 4 +
.../main/scala/org/apache/daffodil/io/Dump.scala | 54 +++++------
.../scala/org/apache/daffodil/io/TestDump.scala | 105 +++++++++------------
.../apache/daffodil/layers/GZipTransformer.scala | 83 +++++++++++++++-
.../org/apache/daffodil/layers/layers.tdml | 4 +-
6 files changed, 160 insertions(+), 92 deletions(-)
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a398844..e8d1b55 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -23,7 +23,7 @@ jobs:
strategy:
fail-fast: false
matrix:
- java_version: [ 8, 11, 15 ]
+ java_version: [ 8, 11, 16 ]
scala_version: [ 2.12.11 ]
os: [ 'ubuntu-latest', 'windows-latest' ]
env:
diff --git
a/daffodil-core/src/test/scala/org/apache/daffodil/layers/TestLayers.scala
b/daffodil-core/src/test/scala/org/apache/daffodil/layers/TestLayers.scala
index c323ef2..232c25b 100644
--- a/daffodil-core/src/test/scala/org/apache/daffodil/layers/TestLayers.scala
+++ b/daffodil-core/src/test/scala/org/apache/daffodil/layers/TestLayers.scala
@@ -111,6 +111,10 @@ class TestLayers {
IOUtils.write(text, gzos, StandardCharsets.UTF_8)
gzos.close()
val data = baos.toByteArray()
+ // Java 16+ sets the 9th byte to 0xFF, but previous Java versions set the
+ // value to 0x00. Daffodil always unparses with 0xFF regardless of Java
+ // version, so force the gzip data to 0xFF to make sure tests round trip
+ data(9) = 0xFF.toByte
data
}
diff --git a/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
b/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
index ed02b2d..0c52011 100644
--- a/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
+++ b/daffodil-io/src/main/scala/org/apache/daffodil/io/Dump.scala
@@ -19,15 +19,17 @@ package org.apache.daffodil.io
import java.nio.ByteBuffer
import java.nio.CharBuffer
-import org.apache.daffodil.exceptions.Assert
+import java.nio.charset.CoderResult
+import java.nio.charset.{ Charset => JavaCharset }
+import java.nio.charset.{ CharsetDecoder => JavaCharsetDecoder }
+
import com.ibm.icu.lang.UCharacter
import com.ibm.icu.lang.UCharacterEnums
import com.ibm.icu.lang.UProperty
-import org.apache.daffodil.util.Misc
+
import org.apache.daffodil.equality._
-import java.nio.charset.{ CharsetDecoder => JavaCharsetDecoder }
-import java.nio.charset.{ Charset => JavaCharset }
-import java.nio.charset.CoderResult
+import org.apache.daffodil.exceptions.Assert
+import org.apache.daffodil.util.Misc
/**
* Hex/Bits and text dump formats for debug/trace purposes.
@@ -39,12 +41,6 @@ import java.nio.charset.CoderResult
*/
class DataDumper {
- import scala.language.reflectiveCalls
-
- type ByteSource = {
- def get(byteAddress0b: Int): Byte // arg must be Int, not Long, so
ByteBuffers satisfy this constract.
- }
-
val defaultMaxLineLength = 70
/**
@@ -99,12 +95,12 @@ class DataDumper {
* indicator info is a start position and length for the "region of
interest". The units
* are in bits.
*
- * The shamStartBitAddress0b is the location where the data in the
byteSource starts.
- * E.g., the byte at byteSource.get(0) is from the data stream at the
shamStartBitAddress0b.
+ * The shamStartBitAddress0b is the location where the data in the
byteBuffer starts.
+ * E.g., the byte at byteBuffer.get(0) is from the data stream at the
shamStartBitAddress0b.
*
* The byte source is a window into the data stream.
*/
- def dump(kind: Kind, shamStartBitAddress0b: Long, lengthInBits: Int,
byteSource: ByteSource, maxLineLength: Int = defaultMaxLineLength,
+ def dump(kind: Kind, shamStartBitAddress0b: Long, lengthInBits: Int,
byteBuffer: ByteBuffer, maxLineLength: Int = defaultMaxLineLength,
includeHeadingLine: Boolean = true,
indicatorInfo: Option[(Long, Int)] = None): Seq[String] = {
val (shamStartByteAddress0b, lengthInBytes, _) =
convertBitsToBytesUnits(shamStartBitAddress0b, lengthInBits)
@@ -119,12 +115,12 @@ class DataDumper {
}
kind match {
case TextOnly(enc) => {
- dumpTextLine(maxLineLength, shamStartByteAddress0b, lengthInBytes,
byteSource, enc, indicatorInfoInBytes)
+ dumpTextLine(maxLineLength, shamStartByteAddress0b, lengthInBytes,
byteBuffer, enc, indicatorInfoInBytes)
}
case MixedHexLTR(optionCS) =>
- dumpHexAndTextBytes(shamStartByteAddress0b, lengthInBytes, byteSource,
includeHeadingLine, optEncName, indicatorInfoInBytes)
+ dumpHexAndTextBytes(shamStartByteAddress0b, lengthInBytes, byteBuffer,
includeHeadingLine, optEncName, indicatorInfoInBytes)
case MixedHexRTL(None) =>
- dumpHexAndTextBytesLSBFirst(shamStartByteAddress0b, lengthInBytes,
byteSource, includeHeadingLine, optEncName)
+ dumpHexAndTextBytesLSBFirst(shamStartByteAddress0b, lengthInBytes,
byteBuffer, includeHeadingLine, optEncName)
case _ => Assert.usageError("unsupported dump kind")
}
}
@@ -137,13 +133,13 @@ class DataDumper {
var nPadBytesFromPriorLine = 0
private def textDump(addr: Long, rowStart0b: Int, txtsb: StringBuilder,
- limit0b: Int, endByteAddress0b: Long, byteSource: ByteSource, decoder:
Option[JavaCharsetDecoder],
+ limit0b: Int, endByteAddress0b: Long, byteBuffer: ByteBuffer, decoder:
Option[JavaCharsetDecoder],
textByteWidth: Int): Unit = {
var i = rowStart0b + nPadBytesFromPriorLine
txtsb ++= paddingFromPriorLine
while (i <= limit0b) {
val bytePos0b = addr + i
- val (charRep, nBytesConsumed, width) = convertToCharRepr(bytePos0b,
endByteAddress0b, byteSource, decoder)
+ val (charRep, nBytesConsumed, width) = convertToCharRepr(bytePos0b,
endByteAddress0b, byteBuffer, decoder)
Assert.invariant(nBytesConsumed > 0)
// some characters will print double width. It is assumed all such
// characters occupy at least one byte.
@@ -201,7 +197,7 @@ class DataDumper {
* For examples see the TestDump class.
*/
private[io] def dumpHexAndTextBytes(startByteAddress0b: Long, lengthInBytes:
Int,
- byteSource: ByteSource,
+ byteBuffer: ByteBuffer,
includeHeadingLine: Boolean,
optEncodingName: Option[String],
indicatorInfoInBytes: Option[(Long, Int)]): Seq[String] = {
@@ -277,7 +273,7 @@ class DataDumper {
rowStart0b to limit0b foreach { i =>
val bytePos0b = addr + i - startByteAddress0b
val byteValue = try {
- byteSource.get(bytePos0b.toInt)
+ byteBuffer.get(bytePos0b.toInt)
} catch {
case e: IndexOutOfBoundsException => 0.toByte
}
@@ -289,7 +285,7 @@ class DataDumper {
// Text dump
//
textDump(addr - startByteAddress0b, rowStart0b, txtsb,
- limit0b, endByteAddress0b, byteSource, decoder,
+ limit0b, endByteAddress0b, byteBuffer, decoder,
textByteWidth)
if (isLastRow) {
@@ -472,7 +468,7 @@ class DataDumper {
private def convertToCharRepr(
startingBytePos0b: Long,
endingBytePos0b: Long,
- bs: ByteSource,
+ byteBuffer: ByteBuffer,
decoder: Option[JavaCharsetDecoder]): (String, Int, Int) = {
Assert.invariant(decoder.map { d => Misc.isAsciiBased(d.charset())
}.getOrElse(true))
@@ -492,7 +488,7 @@ class DataDumper {
val thePos = (startingBytePos0b + i).toInt
Assert.invariant(thePos >= 0)
val theByte = try {
- bs.get(thePos)
+ byteBuffer.get(thePos)
} catch {
case e: IndexOutOfBoundsException => 0.toByte
}
@@ -573,7 +569,7 @@ class DataDumper {
// no encoding, so use the general one based on windows-1252 where
// every byte corresponds to a character with a glyph.
val byteValue = try {
- bs.get(startingBytePos0b.toInt)
+ byteBuffer.get(startingBytePos0b.toInt)
} catch {
case e: IndexOutOfBoundsException => 0.toByte
}
@@ -592,7 +588,7 @@ class DataDumper {
* If displaying ONLY text, then we just display one long line
* and replace any whitespace or non-glyph characters with glyph characters.
*/
- def dumpTextLine(maxLineLen: Int, startByteAddress0b: Long,
lengthInBytesRequested: Int, byteSource: ByteSource,
+ def dumpTextLine(maxLineLen: Int, startByteAddress0b: Long,
lengthInBytesRequested: Int, byteBuffer: ByteBuffer,
optEncodingName: Option[String] = None,
indicatorInfoInBytes: Option[(Long, Int)] = None): Seq[String] = {
Assert.usage(startByteAddress0b >= 0)
@@ -640,7 +636,7 @@ class DataDumper {
var i = startByteAddress0b
val sb = new StringBuilder
while (i <= endByteAddress0b) {
- val (cR, nBytesConsumed, _) = convertToCharRepr(i - startByteAddress0b,
endByteAddress0b, byteSource, decoder)
+ val (cR, nBytesConsumed, _) = convertToCharRepr(i - startByteAddress0b,
endByteAddress0b, byteBuffer, decoder)
sb ++= cR
i += nBytesConsumed
}
@@ -678,11 +674,11 @@ class DataDumper {
* little-endian data
*/
private[io] def dumpHexAndTextBytesLSBFirst(startByteAddress0b: Long,
lengthInBytes: Int,
- byteSource: ByteSource,
+ byteBuffer: ByteBuffer,
includeHeadingLine: Boolean = true,
optEncodingName: Option[String] = None): Seq[String] = {
val ltrDump = dumpHexAndTextBytes(startByteAddress0b, lengthInBytes,
- byteSource, includeHeadingLine, optEncodingName, None)
+ byteBuffer, includeHeadingLine, optEncodingName, None)
val ltrLines =
ltrDump.filterNot { _.length() == 0 }
val wholeLineRegex = """([0-9a-fA-F]{8})(:?\s+)([0-9a-fA-F
]+[0-9a-fA-F])(\s+)(.*)""".r
diff --git a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDump.scala
b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDump.scala
index 971527c..dec9390 100644
--- a/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDump.scala
+++ b/daffodil-io/src/test/scala/org/apache/daffodil/io/TestDump.scala
@@ -17,36 +17,23 @@
package org.apache.daffodil.io
+import java.nio.ByteBuffer
+
import org.junit.Test
import org.junit.Assert._
-import org.apache.daffodil.schema.annotation.props.gen.BitOrder
-import org.apache.daffodil.util.Misc
class TestDump {
val Dump = new DataDumper
- class BS(val bytes: Array[Byte]) {
-
- def this(hex: String) = this(Misc.hex2Bytes(hex))
-
- def get(byteAddress0b: Int): Byte = {
- getRawByte(byteAddress0b * 8, java.nio.ByteOrder.BIG_ENDIAN,
BitOrder.MostSignificantBitFirst)
- }
-
- private def getRawByte(bitPos0b: Long, order: java.nio.ByteOrder,
bitOrder: BitOrder): Byte = {
- bytes((bitPos0b >> 3).toInt)
- }
- }
-
@Test def testDumpHexAndText1(): Unit = {
val bytes = "Date 年月日=2003年08月27日".getBytes("utf-8")
val lengthInBits = bytes.length * 8
- val bs = new BS(bytes)
+ val bb = ByteBuffer.wrap(bytes)
val dumpString =
- Dump.dump(Dump.MixedHexLTR(Some("utf-8")), 0, lengthInBits, bs,
+ Dump.dump(Dump.MixedHexLTR(Some("utf-8")), 0, lengthInBits, bb,
includeHeadingLine = true).mkString("\n")
val expected = """
87654321 0011 2233 4455 6677 8899 aabb ccdd eeff
0~1~2~3~4~5~6~7~8~9~a~b~c~d~e~f~
@@ -61,10 +48,10 @@ class TestDump {
val dateString = "Date 年月日=2003年08月27日"
val dateStringLengthInBytes = dateString.getBytes("utf-8").length
val bytes = dateString.getBytes("utf-8")
- val bs = new BS(bytes)
+ val bb = ByteBuffer.wrap(bytes)
val dumpString =
- Dump.dump(Dump.MixedHexLTR(Some("utf-8")), 1000 * 8,
dateStringLengthInBytes * 8, bs,
+ Dump.dump(Dump.MixedHexLTR(Some("utf-8")), 1000 * 8,
dateStringLengthInBytes * 8, bb,
includeHeadingLine = true,
indicatorInfo = Some(((1000 + 12) * 8), 6 * 8)).mkString("\n")
//
@@ -85,10 +72,10 @@ class TestDump {
val bytes = "Date 年月日=2003年08月27日".getBytes("utf-8")
val lengthInBits = bytes.length * 8
- val bs = new BS(bytes)
+ val bb = ByteBuffer.wrap(bytes)
val dumpString =
- Dump.dump(Dump.MixedHexLTR(Some("utf-8")), 0, lengthInBits, bs,
+ Dump.dump(Dump.MixedHexLTR(Some("utf-8")), 0, lengthInBits, bb,
includeHeadingLine = true,
indicatorInfo = Some((0, lengthInBits))).mkString("\n")
val expected = """
@@ -104,10 +91,10 @@ class TestDump {
val bytes = "Date 年月日=2003年08月27日".getBytes("utf-8")
val lengthInBits = bytes.length * 8
- val bs = new BS(bytes)
+ val bb = ByteBuffer.wrap(bytes)
val dumpString =
- Dump.dump(Dump.MixedHexLTR(Some("utf-8")), 0, lengthInBits, bs,
+ Dump.dump(Dump.MixedHexLTR(Some("utf-8")), 0, lengthInBits, bb,
includeHeadingLine = true,
indicatorInfo = Some((8, 14 * 8))).mkString("\n")
val expected = """
@@ -123,9 +110,9 @@ class TestDump {
val bytes = "Date 年月日=2003年08月27日".getBytes("utf-32BE")
val lengthInBits = bytes.length * 8
- val bs = new BS(bytes)
+ val bb = ByteBuffer.wrap(bytes)
- val dumpString = Dump.dump(Dump.MixedHexLTR(Some("utf-32BE")), 0,
lengthInBits, bs,
+ val dumpString = Dump.dump(Dump.MixedHexLTR(Some("utf-32BE")), 0,
lengthInBits, bb,
includeHeadingLine = true).mkString("\n")
val expected = """
87654321 0011 2233 4455 6677 8899 aabb ccdd eeff
0~1~2~3~4~5~6~7~8~9~a~b~c~d~e~f~
@@ -142,9 +129,9 @@ class TestDump {
val bytes = "Date 年月日=2003年08月27日".getBytes("utf-32BE")
val lengthInBits = bytes.length * 8
- val bs = new BS(bytes)
+ val bb = ByteBuffer.wrap(bytes)
- val dumpString = Dump.dump(Dump.MixedHexLTR(Some("utf-32BE")), 0,
lengthInBits, bs,
+ val dumpString = Dump.dump(Dump.MixedHexLTR(Some("utf-32BE")), 0,
lengthInBits, bb,
includeHeadingLine = true,
indicatorInfo = Some((0, lengthInBits))).mkString("\n")
val expected = """
@@ -163,9 +150,9 @@ class TestDump {
val bytes = "Date 年月日=2003年08月27日".getBytes("utf-32BE")
val lengthInBits = bytes.length * 8
- val bs = new BS(bytes)
+ val bb = ByteBuffer.wrap(bytes)
- val dumpString = Dump.dump(Dump.MixedHexLTR(Some("utf-32BE")), 0,
lengthInBits, bs,
+ val dumpString = Dump.dump(Dump.MixedHexLTR(Some("utf-32BE")), 0,
lengthInBits, bb,
includeHeadingLine = true,
indicatorInfo = Some((16, 12 * 8))).mkString("\n")
val expected = """
@@ -184,9 +171,9 @@ class TestDump {
val bytes = "Date 年月日=2003年08月27日".getBytes("utf-16LE")
val lengthInBits = bytes.length * 8
- val bs = new BS(bytes)
+ val bb = ByteBuffer.wrap(bytes)
- val dumpString = Dump.dump(Dump.MixedHexLTR(Some("utf-16LE")), 0,
lengthInBits, bs,
+ val dumpString = Dump.dump(Dump.MixedHexLTR(Some("utf-16LE")), 0,
lengthInBits, bb,
includeHeadingLine = true).mkString("\n")
val expected = """
87654321 0011 2233 4455 6677 8899 aabb ccdd eeff
0~1~2~3~4~5~6~7~8~9~a~b~c~d~e~f~
@@ -206,8 +193,8 @@ dada 0000 0101 0817 ece2 8017 ece2 dead beef cc7a 1234
.replaceAll("\\s+", "").grouped(2)
.map { Integer.parseInt(_, 16).toByte }.toArray
val lengthInBits = bytes.length * 8
- val bs = new BS(bytes)
- val dumpString = Dump.dump(Dump.MixedHexLTR(Some("utf-8")), 0,
lengthInBits, bs,
+ val bb = ByteBuffer.wrap(bytes)
+ val dumpString = Dump.dump(Dump.MixedHexLTR(Some("utf-8")), 0,
lengthInBits, bb,
includeHeadingLine = true).mkString("\n")
val u068b = Character.toChars(0x068b).mkString
val u10907 = Character.toChars(0x10907).mkString
@@ -225,9 +212,9 @@ dada 0000 0101 0817 ece2 8017 ece2 dead beef cc7a 1234
@Test def testDump1(): Unit = {
- val bs = new BS((0 to 255).map { _.toByte }.toArray)
+ val bb = ByteBuffer.wrap((0 to 255).map { _.toByte }.toArray)
- val dumpString = Dump.dump(Dump.MixedHexLTR(None), 0, 256 * 8, bs,
includeHeadingLine = true).mkString("\n")
+ val dumpString = Dump.dump(Dump.MixedHexLTR(None), 0, 256 * 8, bb,
includeHeadingLine = true).mkString("\n")
val expected = """
87654321 0011 2233 4455 6677 8899 aabb ccdd eeff 0123456789abcdef
00000000: 0001 0203 0405 0607 0809 0a0b 0c0d 0e0f ␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏
@@ -252,9 +239,9 @@ dada 0000 0101 0817 ece2 8017 ece2 dead beef cc7a 1234
@Test def testDump2(): Unit = {
- val bs = new BS((0 to 255).map { _.toByte }.toArray)
+ val bb = ByteBuffer.wrap((0 to 255).map { _.toByte }.toArray)
- val dumpString = Dump.dump(Dump.MixedHexLTR(None), 8, 254 * 8, bs,
includeHeadingLine = true).mkString("\n")
+ val dumpString = Dump.dump(Dump.MixedHexLTR(None), 8, 254 * 8, bb,
includeHeadingLine = true).mkString("\n")
val expected = """
87654321 0011 2233 4455 6677 8899 aabb ccdd eeff 0123456789abcdef
@@ -281,9 +268,9 @@ dada 0000 0101 0817 ece2 8017 ece2 dead beef cc7a 1234
@Test def testDump3(): Unit = {
- val bs = new BS((0 to 255).map { _.toByte }.toArray)
+ val bb = ByteBuffer.wrap((0 to 255).map { _.toByte }.toArray)
- val dumpString = Dump.dump(Dump.MixedHexLTR(None), 50, (191 * 8) - 5, bs,
includeHeadingLine = true).mkString("\n")
+ val dumpString = Dump.dump(Dump.MixedHexLTR(None), 50, (191 * 8) - 5, bb,
includeHeadingLine = true).mkString("\n")
val expected = """
87654321 0011 2233 4455 6677 8899 aabb ccdd eeff 0123456789abcdef
00000000: 0001 0203 0405 0607 0809 ␀␁␂␃␄␅␆␇␈␉
@@ -306,9 +293,9 @@ dada 0000 0101 0817 ece2 8017 ece2 dead beef cc7a 1234
@Test def testDump4(): Unit = {
- val bs = new BS((0 to 255).map { _.toByte }.toArray)
+ val bb = ByteBuffer.wrap((0 to 255).map { _.toByte }.toArray)
- val dumpString = Dump.dump(Dump.MixedHexLTR(None), 50, 51, bs,
includeHeadingLine = true).mkString("\n")
+ val dumpString = Dump.dump(Dump.MixedHexLTR(None), 50, 51, bb,
includeHeadingLine = true).mkString("\n")
val expected = """
87654321 0011 2233 4455 6677 8899 aabb ccdd eeff 0123456789abcdef
00000000: 0001 0203 0405 0607 ␀␁␂␃␄␅␆␇
@@ -318,9 +305,9 @@ dada 0000 0101 0817 ece2 8017 ece2 dead beef cc7a 1234
@Test def testDumpLSBFirst1(): Unit = {
- val bs = new BS((0 to 255).map { _.toByte }.toArray)
+ val bb = ByteBuffer.wrap((0 to 255).map { _.toByte }.toArray)
- val dumpString = Dump.dump(Dump.MixedHexRTL(None), 50, 51, bs,
includeHeadingLine = true).mkString("\n")
+ val dumpString = Dump.dump(Dump.MixedHexRTL(None), 50, 51, bb,
includeHeadingLine = true).mkString("\n")
val expected = """
fedcba9876543210 ffee ddcc bbaa 9988 7766 5544 3322 1100 87654321
␇␆␅␄␃␂␁␀ 0706 0504 0302 0100 :00000000
@@ -337,9 +324,9 @@ fedcba9876543210 ffee ddcc bbaa 9988 7766 5544 3322 1100
87654321
.split("\\s+")
.map { Integer.parseInt(_, 16).toByte }
.toArray
- val bs = new BS(bytes)
+ val bb = ByteBuffer.wrap(bytes)
- val dumpString = Dump.dump(Dump.MixedHexRTL(None), 0, bytes.length * 8,
bs, includeHeadingLine = true).mkString("\n")
+ val dumpString = Dump.dump(Dump.MixedHexRTL(None), 0, bytes.length * 8,
bb, includeHeadingLine = true).mkString("\n")
val expected = """
fedcba9876543210 ffee ddcc bbaa 9988 7766 5544 3322 1100 87654321
cø€␀␀␀wü␚’gU€␀gä 63f8 8000 0000 77fc 1a92 6755 8000 67e4 :00000000
@@ -350,9 +337,9 @@ cø€␀␀␀wü␚’gU€␀gä 63f8 8000 0000 77fc 1a92 6755 8000 67e4
:00
@Test def testDumpTextLine1(): Unit = {
val data = (0 to 255).map { _.toByte }.toArray
- val bs = new BS(data)
+ val bb = ByteBuffer.wrap(data)
val lengthInbits = data.length * 8
- val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bs)
+ val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bb)
val dumpString = dump.mkString("\n")
val expected =
"""␀␁␂␃␄␅␆␇␈␉␊␋␌␍␎␏␐␑␒␓␔␕␖␗␘␙␚␛␜␝␞␟␣!"#$%&'()*+,-./0123456789:;<=>?@ABCDE"""
//
"""FGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~␡€Ɓ‚ƒ„…†‡ˆ‰Š‹ŒƍŽƏƐ‘’“”•–—˜™š›œƝžŸ␢¡¢£¤¥¦§¨©ª«¬-®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ""".replace("\r\n",
"\n")
@@ -361,11 +348,11 @@ cø€␀␀␀wü␚’gU€␀gä 63f8 8000 0000 77fc 1a92 6755 8000 67e4
:00
@Test def testDumpTextLine2(): Unit = {
val data = (0 to 255).map { _.toByte }.toArray
- val bs = new BS(data)
+ val bb = ByteBuffer.wrap(data)
val lengthInbits = data.length * 8
val indicatorStartAtByte0b = 32L
val indicatorLengthInBytes = 8
- val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bs,
+ val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bb,
indicatorInfo = Some((indicatorStartAtByte0b * 8, indicatorLengthInBytes
* 8)))
val dumpString = dump.mkString("\n")
val expected =
@@ -377,11 +364,11 @@ cø€␀␀␀wü␚’gU€␀gä 63f8 8000 0000 77fc 1a92 6755 8000 67e4
:00
@Test def testDumpTextLine3(): Unit = {
val data = (32 to 255).map { _.toByte }.toArray
- val bs = new BS(data)
+ val bb = ByteBuffer.wrap(data)
val lengthInbits = data.length * 8
val indicatorStartAtByte0b = 0L
val indicatorLengthInBytes = 1000
- val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bs,
+ val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bb,
indicatorInfo = Some((indicatorStartAtByte0b * 8, indicatorLengthInBytes
* 8)))
val dumpString = dump.mkString("\n")
@@ -404,11 +391,11 @@ cø€␀␀␀wü␚’gU€␀gä 63f8 8000 0000 77fc 1a92 6755 8000 67e4
:00
@Test def testDumpTextLine4(): Unit = {
val data = (32 to 63).map { _.toByte }.toArray
- val bs = new BS(data)
+ val bb = ByteBuffer.wrap(data)
val lengthInbits = data.length * 8
val indicatorStartAtByte0b = 0L
val indicatorLengthInBytes = 1000
- val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bs,
+ val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bb,
indicatorInfo = Some((indicatorStartAtByte0b * 8, indicatorLengthInBytes
* 8)))
val dumpString = dump.mkString("\n")
//
@@ -428,11 +415,11 @@ cø€␀␀␀wü␚’gU€␀gä 63f8 8000 0000 77fc 1a92 6755 8000 67e4
:00
@Test def testDumpTextLine5(): Unit = {
val data = (32 to 63).map { _.toByte }.toArray
- val bs = new BS(data)
+ val bb = ByteBuffer.wrap(data)
val lengthInbits = data.length * 8
val indicatorStartAtByte0b = 5L
val indicatorLengthInBytes = 0
- val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bs,
+ val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bb,
indicatorInfo = Some((indicatorStartAtByte0b * 8, indicatorLengthInBytes
* 8)))
val dumpString = dump.mkString("\n")
//
@@ -448,11 +435,11 @@ cø€␀␀␀wü␚’gU€␀gä 63f8 8000 0000 77fc 1a92 6755 8000 67e4
:00
@Test def testDumpTextLine6(): Unit = {
val data = (32 to 63).map { _.toByte }.toArray
- val bs = new BS(data)
+ val bb = ByteBuffer.wrap(data)
val lengthInbits = data.length * 8
val indicatorStartAtByte0b = 5L
val indicatorLengthInBytes = 1
- val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bs,
+ val dump = Dump.dump(Dump.TextOnly(None), 0, lengthInbits, bb,
indicatorInfo = Some((indicatorStartAtByte0b * 8, indicatorLengthInBytes
* 8)))
val dumpString = dump.mkString("\n")
val expected =
@@ -467,9 +454,9 @@ cø€␀␀␀wü␚’gU€␀gä 63f8 8000 0000 77fc 1a92 6755 8000 67e4
:00
0000 0101 0817 dead beef cc7a"""
.replaceAll("\\s+", "").grouped(2)
.map { Integer.parseInt(_, 16).toByte }.toArray
- val bs = new BS(data)
+ val bb = ByteBuffer.wrap(data)
val lengthInbits = data.length * 8
- val dumpString = Dump.dump(Dump.TextOnly(Some("utf-8")), 0, lengthInbits,
bs).mkString("\n")
+ val dumpString = Dump.dump(Dump.TextOnly(Some("utf-8")), 0, lengthInbits,
bb).mkString("\n")
val uUnknown = 0xfffd
val arrayOfDecodedChars1 =
Array(0x068b, 0x10907, 0x10bf8b, uUnknown, 0x007a)
diff --git
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/layers/GZipTransformer.scala
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/layers/GZipTransformer.scala
index 83083b0..346c51b 100644
---
a/daffodil-runtime1/src/main/scala/org/apache/daffodil/layers/GZipTransformer.scala
+++
b/daffodil-runtime1/src/main/scala/org/apache/daffodil/layers/GZipTransformer.scala
@@ -43,7 +43,7 @@ class GZIPTransformer(layerLengthInBytesEv:
LayerLengthInBytesEv)
}
override protected def wrapLayerEncoder(jos: java.io.OutputStream):
java.io.OutputStream = {
- val s = new java.util.zip.GZIPOutputStream(jos)
+ val s = GZIPFixedOutputStream(jos)
s
}
@@ -69,3 +69,84 @@ object GZIPTransformerFactory
xformer
}
}
+
+
+object GZIPFixedOutputStream {
+
+ private val fixIsNeeded = !scala.util.Properties.isJavaAtLeast("16")
+
+ /**
+ * Create a GZIPOutputStream that, if necessary, proxies writes through an
+ * OutputStream that fixes inconsistencies between Java versions
+ */
+ def apply(os: java.io.OutputStream) = {
+ val fixedOS = if (fixIsNeeded) new GZIPFixedOutputStream(os) else os
+ new java.util.zip.GZIPOutputStream(fixedOS)
+ }
+}
+
+/**
+ * Prior to Java 16, the java.util.zip.GZIPOutputStream wrote a value of zero
for
+ * the OS field in the header (byte index 9). In Java 16, this was changed to a
+ * value of 255 to better abide by the GZIP specification. Unfortunately, this
+ * means unparsed data using a GZIP layer might have a single byte difference,
+ * depending on the Java version used. This can lead to inconsistent behavior
of
+ * test failures that expect a certain byte value.
+ *
+ * To resolve this issue, we create this GZIPFixedOutputStream. This should
wrap
+ * the underlying OutputStream and be passed as the OutputStream to the
+ * GZIPOutputStream. When the GZIPOutputStream writes the 9th byte to this
+ * GZIPFixedOutputStream, this will always write a value of 255, making all
Java
+ * versions prior to 16 consistent with Java 16+ behavior.
+ */
+class GZIPFixedOutputStream private (os: java.io.OutputStream) extends
java.io.OutputStream {
+
+ /**
+ * The next byte position that byte will be written to. If this is negative,
+ * that means we have already fixed the output and everything should just
+ * pass straight through.
+ */
+ private var bytePosition = 0
+
+ override def close(): Unit = os.close()
+ override def flush(): Unit = os.flush()
+
+ override def write(b: Array[Byte], off: Int, len: Int): Unit = {
+ if (bytePosition < 0) {
+ // The bad byte has been fixed, pass all writes directly through to the
+ // underlying OutputStream. This may be more efficient than the default
+ // OutputStream write() function, which writes the bytes from his array
+ // one at a time
+ os.write(b, off, len)
+ } else {
+ // The bad byte has not been fixed yet. Unless a newer version of Java
+ // has made changes, the GZIPOutputStreamm will have passed in a 10 byte
+ // array to this function that includes the bad byte. Let's just write
+ // that array using the default write(array) method that writes these
+ // bytes one at a time and will call the write(int) method that will fix
+ // that byte. Calling write() one at a time is maybe inefficient but for
+ // such a small array it should not have a noticeable effect.
+ super.write(b, off, len)
+ }
+ }
+
+ override def write(b: Int): Unit = {
+ if (bytePosition < 0) {
+ // The bad byte has already been fixed, simply pass this byte through to
+ // the underlying OutputStream
+ os.write(b)
+ } else if (bytePosition < 9) {
+ // The bad byte has not been fixed, and we haven't reached it yet, simply
+ // pass this byte through and increment our byte position
+ os.write(b)
+ bytePosition += 1
+ } else if (bytePosition == 9) {
+ // This is the bad byte, it is a 0 on some Java versions. Write 255
+ // instead of to match Java 16+ behavior. Also, set bytePosition to -1 to
+ // signify that we have fixed the bad byte and that all other writes
+ // should just pass directly to the underlying OutputStream
+ os.write(255)
+ bytePosition = -1
+ }
+ }
+}
diff --git
a/daffodil-test/src/test/resources/org/apache/daffodil/layers/layers.tdml
b/daffodil-test/src/test/resources/org/apache/daffodil/layers/layers.tdml
index bd9cf75..90a264b 100644
--- a/daffodil-test/src/test/resources/org/apache/daffodil/layers/layers.tdml
+++ b/daffodil-test/src/test/resources/org/apache/daffodil/layers/layers.tdml
@@ -70,7 +70,7 @@
<tdml:parserTestCase name="layers1" root="root" model="s1">
<tdml:document>
<tdml:documentPart type="byte"><![CDATA[
-
000000D41F8B08000000000000004D904176C3200C44AF3207C8F33DBA6F0F40CCD8568391
+
000000D41F8B08000000000000FF4D904176C3200C44AF3207C8F33DBA6F0F40CCD8568391
8B44D3DC3EC2C9A2EFB1013EF3357C6E6288F5DDCD61BA137BCA443FE0FC73F8967C5C4B75
D6CC0C575C8984857714A93414ADEB848F25D800B794036045632A67C605E2B86B2F19553D
805FBE889F2ECE70E2AA4DEA3AA2E3519EF065842E58D2AEDD02530F8DB640832A8F26F3B9
@@ -383,7 +383,7 @@ jones,arya,cat,1986-02-19
We gzip that, and then we must prepend that with the length (as a binary
4-byte int) before we base64 encode.
-->
- <tdml:documentPart type="text"
replaceDFDLEntities="true"><![CDATA[AAAAcx+LCAAAAAAAAAAtyUEKgCAQheG94E1mIDWittG+M0xpaNQIo5tuX0Kb98P7LioVjiTf3sn7%CR;%LF;K8CyzlqVO9UIkrcgFTYh9pnBTOOInUPba3XmyOX7WiEGlqfxgJ1B6xpzKEDyEOxUf7JoJq1e/RI4%CR;%LF;wXIAAAA=--END--]]></tdml:documentPart>
+ <tdml:documentPart type="text"
replaceDFDLEntities="true"><![CDATA[AAAAcx+LCAAAAAAAAP8tyUEKgCAQheG94E1mIDWittG+M0xpaNQIo5tuX0Kb98P7LioVjiTf3sn7%CR;%LF;K8CyzlqVO9UIkrcgFTYh9pnBTOOInUPba3XmyOX7WiEGlqfxgJ1B6xpzKEDyEOxUf7JoJq1e/RI4%CR;%LF;wXIAAAA=--END--]]></tdml:documentPart>
</tdml:document>
<tdml:infoset>
<tdml:dfdlInfoset>