This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-compress.git
The following commit(s) were added to refs/heads/master by this push:
new 59dede8d0 Add
GzipCompressorInputStream.Builder.setOnMemberStart(Consumer) to monitor member
parsing
59dede8d0 is described below
commit 59dede8d0550d8b57eff15e018c4a5606b4bc334
Author: Gary Gregory <[email protected]>
AuthorDate: Thu Jan 2 09:59:35 2025 -0500
Add GzipCompressorInputStream.Builder.setOnMemberStart(Consumer) to
monitor member parsing
Add GzipCompressorInputStream.Builder.setOnMemberEnd(Consumer) to
monitor member parsing
---
src/changes/changes.xml | 2 +
.../gzip/GzipCompressorInputStream.java | 121 +++++++-----
.../gzip/GzipCompressorOutputStream.java | 2 +
.../compress/compressors/gzip/GzipParameters.java | 170 ++++++++++-------
.../gzip/GzipCompressorInputStreamTest.java | 208 +++++++++++++++++++++
.../gzip/GzipCompressorOutputStreamTest.java | 10 +-
.../apache/commons/compress/gzip/members-size-0.gz | Bin 0 -> 117 bytes
7 files changed, 395 insertions(+), 118 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index d3c5df614..6b35bee96 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -72,6 +72,8 @@ The <action> type attribute can be add,update,fix,remove.
<action type="add" dev="ggregory" due-to="Danny Deschenes, Gary
Gregory">GzipCompressorInputStream writes the modification time (MTIME) the
value incorrectly divided by 1,000.</action>
<action type="add" dev="ggregory" due-to="Danny Deschenes, Gary
Gregory">Add optional FHCRC to GZIP header #627.</action>
<action type="add" dev="ggregory" due-to="Gary Gregory">Add
GzipCompressorInputStream.Builder allowing to customize the file name and
comment Charsets.</action>
+ <action type="add" dev="ggregory" due-to="Gary Gregory">Add
GzipCompressorInputStream.Builder.setOnMemberStart(Consumer) to monitor member
parsing.</action>
+ <action type="add" dev="ggregory" due-to="Gary Gregory">Add
GzipCompressorInputStream.Builder.setOnMemberEnd(Consumer) to monitor member
parsing.</action>
<!-- UPDATE -->
<action type="update" dev="ggregory" due-to="Dependabot, Gary
Gregory">Bump org.apache.commons:commons-parent from 72 to 78 #563, #567, #574,
#582, #587, #595.</action>
<action type="update" dev="ggregory" due-to="Dependabot, Gary
Gregory">Bump com.github.luben:zstd-jni from 1.5.6-4 to 1.5.6-8 #565, #578,
#601, #616.</action>
diff --git
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java
index 1ba1828ee..f03d82b56 100644
---
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java
+++
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStream.java
@@ -34,7 +34,9 @@ import java.util.zip.Inflater;
import org.apache.commons.compress.compressors.CompressorInputStream;
import org.apache.commons.compress.utils.ByteUtils;
import org.apache.commons.compress.utils.InputStreamStatistics;
+import org.apache.commons.io.IOUtils;
import org.apache.commons.io.build.AbstractStreamBuilder;
+import org.apache.commons.io.function.IOConsumer;
import org.apache.commons.io.input.BoundedInputStream;
/**
@@ -45,29 +47,29 @@ import org.apache.commons.io.input.BoundedInputStream;
* </p>
*
* <p>
- * Instead of using {@code java.util.zip.GZIPInputStream}, this class has its
own GZIP member decoder.
- * The actual decompression is done with {@link java.util.zip.Inflater}.
+ * Instead of using {@code java.util.zip.GZIPInputStream}, this class has its
own GZIP member decoder. The actual decompression is done with
+ * {@link java.util.zip.Inflater}.
* </p>
*
* <p>
- * If you use the constructor {@code GzipCompressorInputStream(in)} or {@code
GzipCompressorInputStream(in, false)},
- * then {@link #read} will return -1 as soon as the first encoded GZIP member
has been completely read. In this case,
- * if the underlying input stream supports {@link InputStream#mark mark()} and
{@link InputStream#reset reset()},
- * then it will be left positioned just after the end of the encoded GZIP
member; otherwise, some indeterminate number
- * of extra bytes following the encoded GZIP member will have been consumed
and discarded.
+ * If you use the constructor {@code GzipCompressorInputStream(in)} or {@code
GzipCompressorInputStream(in, false)}, then {@link #read} will return -1 as soon
+ * as the first encoded GZIP member has been completely read. In this case, if
the underlying input stream supports {@link InputStream#mark mark()} and
+ * {@link InputStream#reset reset()}, then it will be left positioned just
after the end of the encoded GZIP member; otherwise, some indeterminate number
of
+ * extra bytes following the encoded GZIP member will have been consumed and
discarded.
* </p>
*
* <p>
- * If you use the constructor {@code GzipCompressorInputStream(in, true)} then
{@link #read} will return -1 only after
- * the entire input stream has been exhausted; any bytes that follow an
encoded GZIP member must constitute a new encoded
- * GZIP member, otherwise an {@link IOException} is thrown. The data read from
a stream constructed this way will consist
- * of the concatenated data of all of the encoded GZIP members in order.
+ * If you use the constructor {@code GzipCompressorInputStream(in, true)} then
{@link #read} will return -1 only after the entire input stream has been
+ * exhausted; any bytes that follow an encoded GZIP member must constitute a
new encoded GZIP member, otherwise an {@link IOException} is thrown. The data
read
+ * from a stream constructed this way will consist of the concatenated data of
all of the encoded GZIP members in order.
* </p>
*
* @see <a href="https://datatracker.ietf.org/doc/html/rfc1952">RFC 1952 GZIP
File Format Specification</a>
*/
public class GzipCompressorInputStream extends CompressorInputStream
implements InputStreamStatistics {
+ private static final IOConsumer<GzipCompressorInputStream> NOOP =
IOConsumer.noop();
+
/**
* Constructs a new builder of {@link GzipCompressorInputStream}.
*
@@ -80,6 +82,10 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
private Charset fileNameCharset = GzipUtils.GZIP_ENCODING;
+ private IOConsumer<GzipCompressorInputStream> onMemberStart;
+
+ private IOConsumer<GzipCompressorInputStream> onMemberEnd;
+
/**
* Constructs a new builder of {@link GzipCompressorInputStream}.
*/
@@ -87,10 +93,9 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
// empty
}
- @SuppressWarnings("resource") // caller closes
@Override
public GzipCompressorInputStream get() throws IOException {
- return new GzipCompressorInputStream(getInputStream(),
decompressConcatenated, fileNameCharset);
+ return new GzipCompressorInputStream(this);
}
/**
@@ -121,6 +126,34 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
this.fileNameCharset = fileNameCharset;
return this;
}
+
+ /**
+ * Sets the consumer called when a member header is parsed. Note that
the member size is unknown at call time, it is stored in a member
+ * <em>trailer</em> and used for validation.
+ *
+ * @param onMemberEnd The consumer.
+ * @return this instance.
+ * @see GzipCompressorInputStream#getMetaData()
+ */
+ public Builder setOnMemberEnd(final
IOConsumer<GzipCompressorInputStream> onMemberEnd) {
+ this.onMemberEnd = onMemberEnd;
+ return this;
+ }
+
+ /**
+ * Sets the consumer called when a member trailer is parsed.
+ * <p>
+ * There are two values set from the trailer in the current {@link
GzipParameters}: {@code trailerCrc} and {@code trailerISize}.
+ * </p>
+ *
+ * @param onMemberStart The consumer.
+ * @return this instance.
+ * @see GzipCompressorInputStream#getMetaData()
+ */
+ public Builder setOnMemberStart(final
IOConsumer<GzipCompressorInputStream> onMemberStart) {
+ this.onMemberStart = onMemberStart;
+ return this;
+ }
}
/**
@@ -175,8 +208,7 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
private final Charset fileNameCharset;
/**
- * Compressed input stream, possibly wrapped in a
- * BufferedInputStream, always wrapped in countingStream above
+ * Compressed input stream, possibly wrapped in a BufferedInputStream,
always wrapped in countingStream above
*/
private final InputStream in;
@@ -186,7 +218,11 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
/** Buffer for no-argument read method. */
private final byte[] oneByte = new byte[1];
- private final GzipParameters parameters = new GzipParameters();
+ private GzipParameters parameters;
+
+ private final IOConsumer<GzipCompressorInputStream> onMemberStart;
+
+ private final IOConsumer<GzipCompressorInputStream> onMemberEnd;
/**
* Constructs a new input stream that decompresses gzip-compressed data
from the specified input stream.
@@ -198,7 +234,7 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
* @throws IOException if the stream could not be created
*/
public GzipCompressorInputStream(final InputStream inputStream) throws
IOException {
- this(inputStream, false, GzipUtils.GZIP_ENCODING);
+ this(builder().setInputStream(inputStream));
}
/**
@@ -216,16 +252,19 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
*/
@Deprecated
public GzipCompressorInputStream(final InputStream inputStream, final
boolean decompressConcatenated) throws IOException {
- this(inputStream, decompressConcatenated, GzipUtils.GZIP_ENCODING);
+
this(builder().setInputStream(inputStream).setDecompressConcatenated(decompressConcatenated));
}
- private GzipCompressorInputStream(final InputStream inputStream, final
boolean decompressConcatenated, final Charset fileNameCharset) throws
IOException {
- countingStream =
BoundedInputStream.builder().setInputStream(inputStream).get();
+ @SuppressWarnings("resource") // caller closes
+ private GzipCompressorInputStream(final Builder builder) throws
IOException {
+ countingStream =
BoundedInputStream.builder().setInputStream(builder.getInputStream()).get();
// Mark support is strictly needed for concatenated files only,
// but it's simpler if it is always available.
in = countingStream.markSupported() ? countingStream : new
BufferedInputStream(countingStream);
- this.decompressConcatenated = decompressConcatenated;
- this.fileNameCharset = fileNameCharset;
+ this.decompressConcatenated = builder.decompressConcatenated;
+ this.fileNameCharset = builder.fileNameCharset;
+ this.onMemberStart = builder.onMemberStart != null ?
builder.onMemberStart : NOOP;
+ this.onMemberEnd = builder.onMemberEnd != null ? builder.onMemberEnd :
NOOP;
init(true);
}
@@ -269,32 +308,28 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
if (!isFirstMember && !decompressConcatenated) { // at least one must
be true
throw new IllegalStateException("Unexpected: isFirstMember and
decompressConcatenated are both false.");
}
- parameters.setFileNameCharset(fileNameCharset);
// Check the magic bytes without a possibility of EOFException.
final int magic0 = in.read();
-
// If end of input was reached after decompressing at least
// one .gz member, we have reached the end of the file successfully.
if (magic0 == -1 && !isFirstMember) {
return false;
}
-
if (magic0 != GzipUtils.ID1 || in.read() != GzipUtils.ID2) {
throw new IOException(isFirstMember ? "Input is not in the .gz
format." : "Unexpected data after a valid .gz stream.");
}
-
+ parameters = new GzipParameters();
+ parameters.setFileNameCharset(fileNameCharset);
// Parsing the rest of the header may throw EOFException.
final DataInput inData = new DataInputStream(in);
final int method = inData.readUnsignedByte();
if (method != Deflater.DEFLATED) {
throw new IOException("Unsupported compression method " + method +
" in the .gz header");
}
-
final int flg = inData.readUnsignedByte();
if ((flg & GzipUtils.FRESERVED) != 0) {
throw new IOException("Reserved flags are set in the .gz header.");
}
-
parameters.setModificationTime(ByteUtils.fromLittleEndian(inData, 4));
switch (inData.readUnsignedByte()) { // extra flags
case GzipUtils.XFL_MAX_COMPRESSION:
@@ -308,7 +343,6 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
break;
}
parameters.setOperatingSystem(inData.readUnsignedByte());
-
// Extra field
if ((flg & GzipUtils.FEXTRA) != 0) {
int xlen = inData.readUnsignedByte();
@@ -317,17 +351,14 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
inData.readFully(extra);
parameters.setExtraField(ExtraField.fromBytes(extra));
}
-
// Original file name
if ((flg & GzipUtils.FNAME) != 0) {
parameters.setFileName(new String(readToNull(inData),
parameters.getFileNameCharset()));
}
-
// Comment
if ((flg & GzipUtils.FCOMMENT) != 0) {
parameters.setComment(new String(readToNull(inData),
parameters.getFileNameCharset()));
}
-
// Header "CRC16" which is actually a truncated CRC32 (which isn't
// as good as real CRC16). I don't know if any encoder implementation
// sets this, so it's not worth trying to verify it. GNU gzip 1.4
@@ -337,11 +368,10 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
parameters.setHeaderCRC(true);
inData.readShort();
}
-
// Reset
inflater.reset();
crc.reset();
-
+ onMemberStart.accept(this);
return true;
}
@@ -397,30 +427,25 @@ public class GzipCompressorInputStream extends
CompressorInputStream implements
// We may have read too many bytes. Rewind the read
// position to match the actual amount used.
in.reset();
-
final int skipAmount = bufUsed - inflater.getRemaining();
- if (org.apache.commons.io.IOUtils.skip(in, skipAmount) !=
skipAmount) {
+ if (IOUtils.skip(in, skipAmount) != skipAmount) {
throw new IOException();
}
-
bufUsed = 0;
-
final DataInput inData = new DataInputStream(in);
-
// CRC32
- final long crcStored = ByteUtils.fromLittleEndian(inData, 4);
-
- if (crcStored != crc.getValue()) {
+ final long trailerCrc = ByteUtils.fromLittleEndian(inData, 4);
+ if (trailerCrc != crc.getValue()) {
throw new IOException("Gzip-compressed data is corrupt
(CRC32 error).");
}
-
- // Uncompressed size modulo 2^32 (ISIZE in the spec)
- final long isize = ByteUtils.fromLittleEndian(inData, 4);
-
- if (isize != (inflater.getBytesWritten() & 0xffffffffL)) {
+ // Uncompressed size modulo 2^32, ISIZE in the RFC.
+ final long iSize = ByteUtils.fromLittleEndian(inData, 4);
+ if (iSize != (inflater.getBytesWritten() & 0xffffffffL)) {
throw new IOException("Gzip-compressed data is corrupt
(uncompressed size mismatch).");
}
-
+ parameters.setTrailerCrc(trailerCrc);
+ parameters.setTrailerISize(iSize);
+ onMemberEnd.accept(this);
// See if this is the end of the file.
if (!decompressConcatenated || !init(false)) {
inflater.end();
diff --git
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
index 1f501e90e..05d823745 100644
---
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
+++
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStream.java
@@ -105,6 +105,7 @@ public class GzipCompressorOutputStream extends
CompressorOutputStream<OutputStr
deflate();
}
writeMemberTrailer();
+ deflater.reset();
}
}
@@ -125,6 +126,7 @@ public class GzipCompressorOutputStream extends
CompressorOutputStream<OutputStr
*/
@Override
public void write(final byte[] buffer, final int offset, final int length)
throws IOException {
+ checkOpen();
if (deflater.finished()) {
throw new IOException("Cannot write more data, the end of the
compressed data stream has been reached.");
}
diff --git
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java
index 7ed63d19c..516dc8451 100644
---
a/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java
+++
b/src/main/java/org/apache/commons/compress/compressors/gzip/GzipParameters.java
@@ -64,11 +64,10 @@ public class GzipParameters {
*/
public enum OS {
- // @formatter:off
/**
- * 0: FAT filesystem (MS-DOS, OS/2, NT/Win32).
+ * 13: Acorn RISCOS.
*/
- FAT(OS_FAT),
+ ACORN_RISCOS(OS_ACORN_RISCOS),
/**
* 1: Amiga.
@@ -76,24 +75,20 @@ public class GzipParameters {
AMIGA(OS_AMIGA),
/**
- * 2: VMS (or OpenVMS).
- */
- VMS(OS_VMS),
-
- /**
- * 3: Unix.
+ * 5: Atari TOS.
*/
- UNIX(OS_UNIX),
+ ATARI_TOS(OS_ATARI_TOS),
/**
- * 4: VM/CMS.
+ * 9: CP/M.
*/
- VM_CMS(OS_VM_CMS),
+ CPM(OS_CPM),
+ // @formatter:off
/**
- * 5: Atari TOS.
+ * 0: FAT filesystem (MS-DOS, OS/2, NT/Win32).
*/
- ATARI_TOS(OS_ATARI_TOS),
+ FAT(OS_FAT),
/**
* 6: HPFS filesystem (OS/2, NT).
@@ -106,14 +101,14 @@ public class GzipParameters {
MACINTOSH(OS_MACINTOSH),
/**
- * 8: Z-System.
+ * 11: NTFS filesystem (NT).
*/
- Z_SYSTEM(OS_Z_SYSTEM),
+ NTFS(OS_NTFS),
/**
- * 9: CP/M.
+ * 12: QDOS.
*/
- CPM(OS_CPM),
+ QDOS(OS_QDOS),
/**
* 10: TOPS-20.
@@ -121,24 +116,29 @@ public class GzipParameters {
TOPS_20(OS_TOPS_20),
/**
- * 11: NTFS filesystem (NT).
+ * 3: Unix.
*/
- NTFS(OS_NTFS),
+ UNIX(OS_UNIX),
/**
- * 12: QDOS.
+ * 255: unknown.
*/
- QDOS(OS_QDOS),
+ UNKNOWN(OS_UNKNOWN),
/**
- * 13: Acorn RISCOS.
+ * 4: VM/CMS.
*/
- ACORN_RISCOS(OS_ACORN_RISCOS),
+ VM_CMS(OS_VM_CMS),
/**
- * 255: unknown.
+ * 2: VMS (or OpenVMS).
+ */
+ VMS(OS_VMS),
+
+ /**
+ * 8: Z-System.
*/
- UNKNOWN(OS_UNKNOWN);
+ Z_SYSTEM(OS_Z_SYSTEM);
// @formatter:on
/**
@@ -209,9 +209,9 @@ public class GzipParameters {
private static final int BUFFER_SIZE = 512;
/**
- * 0: FAT.
+ * 13: Acorn RISCOS.
*/
- private static final int OS_FAT = 0;
+ private static final int OS_ACORN_RISCOS = 13;
/**
* 1: Amiga.
@@ -219,24 +219,19 @@ public class GzipParameters {
private static final int OS_AMIGA = 1;
/**
- * 2: VMS (or OpenVMS).
- */
- private static final int OS_VMS = 2;
-
- /**
- * 3: Unix.
+ * 5: Atari TOS.
*/
- private static final int OS_UNIX = 3;
+ private static final int OS_ATARI_TOS = 5;
/**
- * 4: VM/CMS.
+ * 9: CP/M.
*/
- private static final int OS_VM_CMS = 4;
+ private static final int OS_CPM = 9;
/**
- * 5: Atari TOS.
+ * 0: FAT.
*/
- private static final int OS_ATARI_TOS = 5;
+ private static final int OS_FAT = 0;
/**
* 6: HPFS filesystem (OS/2, NT).
@@ -249,14 +244,14 @@ public class GzipParameters {
private static final int OS_MACINTOSH = 7;
/**
- * 8: Z-System.
+ * 11: NTFS filesystem (NT).
*/
- private static final int OS_Z_SYSTEM = 8;
+ private static final int OS_NTFS = 11;
/**
- * 9: CP/M.
+ * 12: QDOS.
*/
- private static final int OS_CPM = 9;
+ private static final int OS_QDOS = 12;
/**
* 10: TOPS-20.
@@ -264,27 +259,39 @@ public class GzipParameters {
private static final int OS_TOPS_20 = 10;
/**
- * 11: NTFS filesystem (NT).
+ * 3: Unix.
*/
- private static final int OS_NTFS = 11;
+ private static final int OS_UNIX = 3;
/**
- * 12: QDOS.
+ * 255: unknown.
*/
- private static final int OS_QDOS = 12;
+ private static final int OS_UNKNOWN = 255;
/**
- * 13: Acorn RISCOS.
+ * 4: VM/CMS.
*/
- private static final int OS_ACORN_RISCOS = 13;
+ private static final int OS_VM_CMS = 4;
/**
- * 255: unknown.
+ * 2: VMS (or OpenVMS).
*/
- private static final int OS_UNKNOWN = 255;
+ private static final int OS_VMS = 2;
- private int compressionLevel = Deflater.DEFAULT_COMPRESSION;
+ /**
+ * 8: Z-System.
+ */
+ private static final int OS_Z_SYSTEM = 8;
+
+ private int bufferSize = BUFFER_SIZE;
+ private String comment;
+ private int compressionLevel = Deflater.DEFAULT_COMPRESSION;
+ private int deflateStrategy = Deflater.DEFAULT_STRATEGY;
+ private ExtraField extraField;
+ private String fileName;
+ private Charset fileNameCharset = GzipUtils.GZIP_ENCODING;
+ private boolean headerCrc;
/**
* The most recent modification time (MTIME) of the original file being
compressed.
* <p>
@@ -294,14 +301,9 @@ public class GzipParameters {
* </p>
*/
private Instant modificationInstant = Instant.EPOCH;
- private ExtraField extraField;
- private String fileName;
- private Charset fileNameCharset = GzipUtils.GZIP_ENCODING;
- private String comment;
private OS operatingSystem = OS.UNKNOWN; // Unknown OS by default
- private int bufferSize = BUFFER_SIZE;
- private int deflateStrategy = Deflater.DEFAULT_STRATEGY;
- private boolean headerCRC;
+ private long trailerCrc;
+ private long trailerISize;
@Override
public boolean equals(final Object obj) {
@@ -314,8 +316,9 @@ public class GzipParameters {
final GzipParameters other = (GzipParameters) obj;
return bufferSize == other.bufferSize && Objects.equals(comment,
other.comment) && compressionLevel == other.compressionLevel
&& deflateStrategy == other.deflateStrategy &&
Objects.equals(extraField, other.extraField) && Objects.equals(fileName,
other.fileName)
- && Objects.equals(fileNameCharset, other.fileNameCharset) &&
headerCRC == other.headerCRC
- && Objects.equals(modificationInstant,
other.modificationInstant) && operatingSystem == other.operatingSystem;
+ && Objects.equals(fileNameCharset, other.fileNameCharset) &&
headerCrc == other.headerCrc
+ && Objects.equals(modificationInstant,
other.modificationInstant) && operatingSystem == other.operatingSystem &&
trailerCrc == other.trailerCrc
+ && trailerISize == other.trailerISize;
}
/**
@@ -414,7 +417,7 @@ public class GzipParameters {
* @since 1.28.0
*/
public boolean getHeaderCRC() {
- return headerCRC;
+ return headerCrc;
}
/**
@@ -460,10 +463,30 @@ public class GzipParameters {
return operatingSystem;
}
+ /**
+ * Gets the trailer CRC value.
+ *
+ * @return the trailer CRC value.
+ * @since 1.28.0
+ */
+ public long getTrailerCrc() {
+ return trailerCrc;
+ }
+
+ /**
+ * Gets the trailer ISIZE value.
+ *
+ * @return the trailer ISIZE value.
+ * @since 1.28.0
+ */
+ public long getTrailerISize() {
+ return trailerISize;
+ }
+
@Override
public int hashCode() {
- return Objects.hash(bufferSize, comment, compressionLevel,
deflateStrategy, extraField, fileName, fileNameCharset, headerCRC,
modificationInstant,
- operatingSystem);
+ return Objects.hash(bufferSize, comment, compressionLevel,
deflateStrategy, extraField, fileName, fileNameCharset, headerCrc,
modificationInstant,
+ operatingSystem, trailerCrc, trailerISize);
}
private String requireNonNulByte(final String text) {
@@ -581,7 +604,7 @@ public class GzipParameters {
* @since 1.28.0
*/
public void setHeaderCRC(final boolean headerCRC) {
- this.headerCRC = headerCRC;
+ this.headerCrc = headerCRC;
}
/**
@@ -644,13 +667,22 @@ public class GzipParameters {
this.operatingSystem = os != null ? os : OS.UNKNOWN;
}
+ void setTrailerCrc(final long trailerCrc) {
+ this.trailerCrc = trailerCrc;
+ }
+
+ void setTrailerISize(final long trailerISize) {
+ this.trailerISize = trailerISize;
+ }
+
@Override
public String toString() {
final StringBuilder builder = new StringBuilder();
- builder.append("GzipParameters
[compressionLevel=").append(compressionLevel).append(",
modificationInstant=").append(modificationInstant)
- .append(", extraField=").append(extraField).append(",
fileName=").append(fileName).append(",
fileNameCharset=").append(fileNameCharset)
- .append(", comment=").append(comment).append(",
operatingSystem=").append(operatingSystem).append(",
bufferSize=").append(bufferSize)
- .append(",
deflateStrategy=").append(deflateStrategy).append(",
headerCRC=").append(headerCRC).append("]");
+ builder.append("GzipParameters
[bufferSize=").append(bufferSize).append(",
comment=").append(comment).append(", compressionLevel=")
+ .append(compressionLevel).append(",
deflateStrategy=").append(deflateStrategy).append(",
extraField=").append(extraField).append(", fileName=")
+ .append(fileName).append(",
fileNameCharset=").append(fileNameCharset).append(",
headerCrc=").append(headerCrc).append(", modificationInstant=")
+ .append(modificationInstant).append(",
operatingSystem=").append(operatingSystem).append(",
trailerCrc=").append(trailerCrc)
+ .append(", trailerISize=").append(trailerISize).append("]");
return builder.toString();
}
}
diff --git
a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStreamTest.java
b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStreamTest.java
index 8869eb91a..389e68b2b 100644
---
a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStreamTest.java
+++
b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorInputStreamTest.java
@@ -19,19 +19,111 @@
package org.apache.commons.compress.compressors.gzip;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
+import java.io.OutputStream;
import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.io.IOUtils;
+import org.apache.commons.io.file.PathUtils;
+import org.apache.commons.io.function.IOStream;
+import org.apache.commons.io.input.RandomAccessFileInputStream;
+import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
/**
* Tests {@link GzipCompressorInputStream}.
*/
public class GzipCompressorInputStreamTest {
+ @TempDir
+ Path tempDir;
+
+ /**
+ * Extracts members of a GZIP file to the temporary directory.
+ */
+ @SuppressWarnings("resource")
+ private List<Path> extractMembers(final String sourceGzipPath) throws
IOException {
+ final List<GzipParameters> members = new ArrayList<>();
+ final Path tempFile = tempDir.resolve("temp.bin");
+ // Extract GZIP members in one temp file.
+ // Callbacks are invoked while reading with the member size known only
after reading a member's trailer.
+ // @formatter:off
+ try (OutputStream fos = Files.newOutputStream(tempFile);
+ GzipCompressorInputStream gis =
GzipCompressorInputStream.builder()
+ .setFile(sourceGzipPath)
+ .setDecompressConcatenated(true)
+ .setOnMemberEnd(in -> members.add(in.getMetaData()))
+ .get()) {
+ // @formatter:on
+ IOUtils.copy(gis, fos);
+ }
+ final List<Path> resolved = new ArrayList<>(members.size());
+ final AtomicLong startPos = new AtomicLong();
+ // Read temp file and write each member file.
+ // @formatter:off
+ try (RandomAccessFileInputStream rafIs =
RandomAccessFileInputStream.builder()
+ .setPath(tempFile)
+ .setCloseOnClose(true)
+ .get()) {
+ // @formatter:on
+ IOStream.of(members).forEach(e -> {
+ final Path member = tempDir.resolve(e.getFileName());
+ resolved.add(member);
+ try (OutputStream os = Files.newOutputStream(member,
StandardOpenOption.CREATE_NEW, StandardOpenOption.TRUNCATE_EXISTING)) {
+ // TODO Commons IO 2.19.0
RandomAccessFileInputStream.copy()
+ rafIs.getRandomAccessFile().seek(startPos.get());
+ IOUtils.copyLarge(rafIs, os, 0, e.getTrailerISize());
+ startPos.addAndGet(e.getTrailerISize());
+ }
+ });
+ }
+ return resolved;
+ }
+
+ @Test
+ @Disabled
+ public void testGzipParametersMembersIo() throws IOException {
+ final Path targetFile = tempDir.resolve("test.gz");
+ final String sourceFileName1 = "file1";
+ final String sourceFileName2 = "file2";
+ final Path tempSourceFile1 = tempDir.resolve(sourceFileName1);
+ final Path tempSourceFile2 = tempDir.resolve(sourceFileName2);
+ final byte[] bytes1 = "<text>Hello World
1!</text>".getBytes(StandardCharsets.UTF_8);
+ final byte[] bytes2 = "<text>Hello World
2!</text>".getBytes(StandardCharsets.UTF_8);
+ Files.write(tempSourceFile1, bytes1);
+ Files.write(tempSourceFile2, bytes2);
+ final GzipParameters parameters1 = new GzipParameters();
+ final GzipParameters parameters2 = new GzipParameters();
+ parameters1.setFileName(sourceFileName1);
+ parameters2.setFileName(sourceFileName2);
+ try (OutputStream fos = Files.newOutputStream(targetFile);
+ GzipCompressorOutputStream gos = new
GzipCompressorOutputStream(fos, parameters1)) {
+ gos.write(tempSourceFile1);
+ gos.finish();
+ gos.write(tempSourceFile2);
+ gos.finish();
+ }
+ try (GzipCompressorInputStream gis =
GzipCompressorInputStream.builder().setPath(targetFile).setDecompressConcatenated(false).get())
{
+ assertEquals(parameters1, gis.getMetaData());
+ assertArrayEquals(bytes1, IOUtils.toByteArray(gis));
+ }
+ try (GzipCompressorInputStream gis =
GzipCompressorInputStream.builder().setPath(targetFile).setDecompressConcatenated(true).get())
{
+ assertEquals(parameters1, gis.getMetaData());
+ // assertArrayEquals(ArrayUtils.addAll(bytes1, bytes2),
IOUtils.toByteArray(gis));
+ }
+ }
+
/**
* Tests file from gzip 1.13.
*
@@ -39,6 +131,122 @@ public class GzipCompressorInputStreamTest {
* gzip --keep --name --best -c hello1.txt >members.gz
* gzip --keep --name --best -c hello2.txt >>members.gz
* }</pre>
+ *
+ * @throws IOException on test failure.
+ */
+ @Test
+ public void testOnMemberFirstAll() throws IOException {
+ final List<GzipParameters> parametersStart = new ArrayList<>();
+ final List<GzipParameters> parametersEnd = new ArrayList<>();
+ // Concatenated members, same file
+ // @formatter:off
+ try (GzipCompressorInputStream gis =
GzipCompressorInputStream.builder()
+
.setFile("src/test/resources/org/apache/commons/compress/gzip/members.gz")
+ .setDecompressConcatenated(true)
+ .setOnMemberStart(in -> parametersStart.add(in.getMetaData()))
+ .setOnMemberEnd(in -> parametersEnd.add(in.getMetaData()))
+ .get()) {
+ // @formatter:on
+ assertEquals("hello1.txt", gis.getMetaData().getFileName());
+ assertEquals("Hello1\nHello2\n", IOUtils.toString(gis,
StandardCharsets.ISO_8859_1));
+ assertEquals("hello2.txt", gis.getMetaData().getFileName());
+ }
+ assertEquals(2, parametersStart.size());
+ assertEquals(2, parametersEnd.size());
+ assertEquals(parametersStart, parametersEnd);
+ // Make sure we are not reusing GzipParameters anymore.
+ assertEquals(2, new HashSet<>(parametersStart).size());
+ assertEquals(2, new HashSet<>(parametersEnd).size());
+ // trailers
+ assertEquals(4202744527L, parametersEnd.get(0).getTrailerCrc());
+ assertEquals(7, parametersEnd.get(0).getTrailerISize());
+ assertEquals(3517815052L, parametersEnd.get(1).getTrailerCrc());
+ assertEquals(7, parametersEnd.get(1).getTrailerISize());
+ }
+
+ /**
+ * Tests file from gzip 1.13.
+ *
+ * <pre>{@code
+ * gzip --keep --name --best -c hello1.txt >members.gz
+ * gzip --keep --name --best -c hello2.txt >>members.gz
+ * }</pre>
+ *
+ * @throws IOException on test failure.
+ */
+ @Test
+ public void testOnMemberFirstOnly() throws IOException {
+ final List<GzipParameters> parametersStart = new ArrayList<>();
+ final List<GzipParameters> parametersEnd = new ArrayList<>();
+ // First member only
+ // @formatter:off
+ try (GzipCompressorInputStream gis =
GzipCompressorInputStream.builder()
+
.setFile("src/test/resources/org/apache/commons/compress/gzip/members.gz")
+ .setDecompressConcatenated(false)
+ .setOnMemberStart(in -> parametersStart.add(in.getMetaData()))
+ .setOnMemberEnd(in -> parametersEnd.add(in.getMetaData()))
+ .get()) {
+ // @formatter:on
+ assertEquals("hello1.txt", gis.getMetaData().getFileName());
+ assertEquals("Hello1\n", IOUtils.toString(gis,
StandardCharsets.ISO_8859_1));
+ assertEquals("hello1.txt", gis.getMetaData().getFileName());
+ }
+ assertEquals(1, parametersStart.size());
+ assertEquals(1, parametersEnd.size());
+ assertEquals(parametersStart, parametersEnd);
+ // trailer
+ assertEquals(4202744527L, parametersEnd.get(0).getTrailerCrc());
+ assertEquals(7, parametersEnd.get(0).getTrailerISize());
+ }
+
+ /**
+ * Tests file from gzip 1.13.
+ *
+ * <pre>{@code
+ * gzip --keep --name --best -c hello1.txt >members.gz
+ * gzip --keep --name --best -c hello2.txt >>members.gz
+ * }</pre>
+ *
+ * @throws IOException on test failure.
+ */
+ @Test
+ public void testOnMemberSaveAsFiles() throws IOException {
+ final List<Path> resolved =
extractMembers("src/test/resources/org/apache/commons/compress/gzip/members.gz");
+ assertEquals("Hello1\n", PathUtils.readString(resolved.get(0),
StandardCharsets.ISO_8859_1));
+ assertEquals("Hello2\n", PathUtils.readString(resolved.get(1),
StandardCharsets.ISO_8859_1));
+ }
+
+ /**
+ * Tests file from gzip 1.13 for input files of size 0.
+ *
+ * <pre>{@code
+ * gzip --keep --name --best -c hello-size-0-a.txt >members-size-0.gz
+ * gzip --keep --name --best -c hello-size-0-b.txt >>members-size-0.gz
+ * gzip --keep --name --best -c hello-size-0-c.txt >>members-size-0.gz
+ * }</pre>
+ *
+ * @throws IOException on test failure.
+ */
+ @SuppressWarnings("resource")
+ @Test
+ public void testOnMemberSaveAsSize0Files() throws IOException {
+ final List<Path> resolved =
extractMembers("src/test/resources/org/apache/commons/compress/gzip/members-size-0.gz");
+ assertEquals(3, resolved.size());
+ IOStream.of(resolved).forEach(p -> {
+ assertEquals(0, Files.size(p));
+ assertEquals("", PathUtils.readString(p,
StandardCharsets.ISO_8859_1));
+ });
+ }
+
+ /**
+ * Tests file from gzip 1.13.
+ *
+ * <pre>{@code
+ * gzip --keep --name --best -c hello1.txt >members.gz
+ * gzip --keep --name --best -c hello2.txt >>members.gz
+ * }</pre>
+ *
+ * @throws IOException on test failure.
*/
@Test
public void testReadGzipFileCreatedByCli() throws IOException {
diff --git
a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
index f2779cd87..309dced34 100644
---
a/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
+++
b/src/test/java/org/apache/commons/compress/compressors/gzip/GzipCompressorOutputStreamTest.java
@@ -85,11 +85,19 @@ public class GzipCompressorOutputStreamTest {
assertArrayEquals(bytes, IOUtils.toByteArray(gis));
}
// Construction allows configuration of reading the file name and
comment Charset.
- try (GzipCompressorInputStream gis =
GzipCompressorInputStream.builder().setPath(targetFile).setFileNameCharset(fileNameCharset).get())
{
+ // @formatter:off
+ try (GzipCompressorInputStream gis =
GzipCompressorInputStream.builder()
+ .setPath(targetFile)
+ .setFileNameCharset(fileNameCharset)
+ .get()) {
+ // @formatter:on
final byte[] fileNameBytes =
gis.getMetaData().getFileName().getBytes(fileNameCharset);
final String unicodeFileName = new String(fileNameBytes,
fileNameCharset);
assertEquals(expected, unicodeFileName);
assertArrayEquals(bytes, IOUtils.toByteArray(gis));
+ // reset trailer values for a simple assertion.
+ gis.getMetaData().setTrailerCrc(0);
+ gis.getMetaData().setTrailerISize(0);
assertEquals(parameters, gis.getMetaData());
}
}
diff --git
a/src/test/resources/org/apache/commons/compress/gzip/members-size-0.gz
b/src/test/resources/org/apache/commons/compress/gzip/members-size-0.gz
new file mode 100644
index 000000000..cedd0ff7a
Binary files /dev/null and
b/src/test/resources/org/apache/commons/compress/gzip/members-size-0.gz differ